Line data Source code
1 : //
2 : // block.c
3 : // cloudsync
4 : //
5 : // Block-level LWW CRDT support for text/blob fields.
6 : //
7 :
8 : #include <stdio.h>
9 : #include <string.h>
10 : #include <stdlib.h>
11 : #include "block.h"
12 : #include "utils.h"
13 : #include "fractional_indexing.h"
14 :
15 : // MARK: - Col name helpers -
16 :
17 167198 : bool block_is_block_colname(const char *col_name) {
18 167198 : if (!col_name) return false;
19 167198 : return strchr(col_name, BLOCK_SEPARATOR) != NULL;
20 167198 : }
21 :
22 412 : char *block_extract_base_colname(const char *col_name) {
23 412 : if (!col_name) return NULL;
24 412 : const char *sep = strchr(col_name, BLOCK_SEPARATOR);
25 412 : if (!sep) return cloudsync_string_dup(col_name);
26 412 : return cloudsync_string_ndup(col_name, (size_t)(sep - col_name));
27 412 : }
28 :
29 1348 : const char *block_extract_position_id(const char *col_name) {
30 1348 : if (!col_name) return NULL;
31 1348 : const char *sep = strchr(col_name, BLOCK_SEPARATOR);
32 1348 : if (!sep) return NULL;
33 1348 : return sep + 1;
34 1348 : }
35 :
36 995 : char *block_build_colname(const char *base_col, const char *position_id) {
37 995 : if (!base_col || !position_id) return NULL;
38 995 : size_t blen = strlen(base_col);
39 995 : size_t plen = strlen(position_id);
40 995 : char *result = (char *)cloudsync_memory_alloc(blen + 1 + plen + 1);
41 995 : if (!result) return NULL;
42 995 : memcpy(result, base_col, blen);
43 995 : result[blen] = BLOCK_SEPARATOR;
44 995 : memcpy(result + blen + 1, position_id, plen);
45 995 : result[blen + 1 + plen] = '\0';
46 995 : return result;
47 995 : }
48 :
49 : // MARK: - Text splitting -
50 :
51 224 : static block_list_t *block_list_create(void) {
52 224 : block_list_t *list = (block_list_t *)cloudsync_memory_zeroalloc(sizeof(block_list_t));
53 224 : return list;
54 : }
55 :
56 3060 : static bool block_list_append(block_list_t *list, const char *content, size_t content_len, const char *position_id) {
57 3060 : if (list->count >= list->capacity) {
58 329 : int new_cap = list->capacity ? list->capacity * 2 : 16;
59 329 : block_entry_t *new_entries = (block_entry_t *)cloudsync_memory_realloc(
60 329 : list->entries, (uint64_t)(new_cap * sizeof(block_entry_t)));
61 329 : if (!new_entries) return false;
62 329 : list->entries = new_entries;
63 329 : list->capacity = new_cap;
64 329 : }
65 3060 : block_entry_t *e = &list->entries[list->count];
66 3060 : e->content = cloudsync_string_ndup(content, content_len);
67 3060 : e->position_id = position_id ? cloudsync_string_dup(position_id) : NULL;
68 3060 : if (!e->content) return false;
69 3060 : list->count++;
70 3060 : return true;
71 3060 : }
72 :
73 224 : void block_list_free(block_list_t *list) {
74 224 : if (!list) return;
75 3284 : for (int i = 0; i < list->count; i++) {
76 3060 : if (list->entries[i].content) cloudsync_memory_free(list->entries[i].content);
77 3060 : if (list->entries[i].position_id) cloudsync_memory_free(list->entries[i].position_id);
78 3060 : }
79 224 : if (list->entries) cloudsync_memory_free(list->entries);
80 224 : cloudsync_memory_free(list);
81 224 : }
82 :
83 92 : block_list_t *block_list_create_empty(void) {
84 92 : return block_list_create();
85 : }
86 :
87 1348 : bool block_list_add(block_list_t *list, const char *content, const char *position_id) {
88 1348 : if (!list) return false;
89 1348 : return block_list_append(list, content, strlen(content), position_id);
90 1348 : }
91 :
92 132 : block_list_t *block_split(const char *text, const char *delimiter) {
93 132 : block_list_t *list = block_list_create();
94 132 : if (!list) return NULL;
95 :
96 132 : if (!text || !*text) {
97 : // Empty text produces a single empty block
98 5 : block_list_append(list, "", 0, NULL);
99 5 : return list;
100 : }
101 :
102 127 : size_t dlen = strlen(delimiter);
103 127 : if (dlen == 0) {
104 : // No delimiter: entire text is one block
105 0 : block_list_append(list, text, strlen(text), NULL);
106 0 : return list;
107 : }
108 :
109 127 : const char *start = text;
110 : const char *found;
111 1707 : while ((found = strstr(start, delimiter)) != NULL) {
112 1580 : size_t seg_len = (size_t)(found - start);
113 1580 : if (!block_list_append(list, start, seg_len, NULL)) {
114 0 : block_list_free(list);
115 0 : return NULL;
116 : }
117 1580 : start = found + dlen;
118 : }
119 : // Last segment (after last delimiter or entire string if no delimiter found)
120 127 : if (!block_list_append(list, start, strlen(start), NULL)) {
121 0 : block_list_free(list);
122 0 : return NULL;
123 : }
124 :
125 127 : return list;
126 132 : }
127 :
128 : // MARK: - Fractional indexing (via fractional-indexing submodule) -
129 :
130 : // Wrapper for calloc: fractional_indexing expects (count, size) but cloudsync_memory_zeroalloc takes a single size.
131 63 : static void *fi_calloc_wrapper(size_t count, size_t size) {
132 63 : return cloudsync_memory_zeroalloc((uint64_t)(count * size));
133 : }
134 :
135 193 : void block_init_allocator(void) {
136 193 : fractional_indexing_allocator alloc = {
137 : .malloc = (void *(*)(size_t))cloudsync_memory_alloc,
138 : .calloc = fi_calloc_wrapper,
139 : .free = cloudsync_memory_free
140 : };
141 193 : fractional_indexing_set_allocator(&alloc);
142 193 : }
143 :
144 93 : char *block_position_between(const char *before, const char *after) {
145 93 : return generate_key_between(before, after);
146 : }
147 :
148 42 : char **block_initial_positions(int count) {
149 42 : if (count <= 0) return NULL;
150 42 : return generate_n_keys_between(NULL, NULL, count);
151 42 : }
152 :
153 : // MARK: - Block diff -
154 :
155 91 : static block_diff_t *block_diff_create(void) {
156 91 : block_diff_t *diff = (block_diff_t *)cloudsync_memory_zeroalloc(sizeof(block_diff_t));
157 91 : return diff;
158 : }
159 :
160 132 : static bool block_diff_append(block_diff_t *diff, block_diff_type type, const char *position_id, const char *content) {
161 132 : if (diff->count >= diff->capacity) {
162 91 : int new_cap = diff->capacity ? diff->capacity * 2 : 16;
163 91 : block_diff_entry_t *new_entries = (block_diff_entry_t *)cloudsync_memory_realloc(
164 91 : diff->entries, (uint64_t)(new_cap * sizeof(block_diff_entry_t)));
165 91 : if (!new_entries) return false;
166 91 : diff->entries = new_entries;
167 91 : diff->capacity = new_cap;
168 91 : }
169 132 : block_diff_entry_t *e = &diff->entries[diff->count];
170 132 : e->type = type;
171 132 : e->position_id = cloudsync_string_dup(position_id);
172 132 : e->content = content ? cloudsync_string_dup(content) : NULL;
173 132 : diff->count++;
174 132 : return true;
175 132 : }
176 :
177 91 : void block_diff_free(block_diff_t *diff) {
178 91 : if (!diff) return;
179 223 : for (int i = 0; i < diff->count; i++) {
180 132 : if (diff->entries[i].position_id) cloudsync_memory_free(diff->entries[i].position_id);
181 132 : if (diff->entries[i].content) cloudsync_memory_free(diff->entries[i].content);
182 132 : }
183 91 : if (diff->entries) cloudsync_memory_free(diff->entries);
184 91 : cloudsync_memory_free(diff);
185 91 : }
186 :
187 : // Content-based matching diff algorithm:
188 : // 1. Build a consumed-set from old blocks
189 : // 2. For each new block, find the first unconsumed old block with matching content
190 : // 3. Matched blocks keep their position_id (UNCHANGED)
191 : // 4. Unmatched new blocks get new position_ids (ADDED)
192 : // 5. Unconsumed old blocks are REMOVED
193 : // Modified blocks are detected when content changed but position stayed (handled as MODIFIED)
194 91 : block_diff_t *block_diff(block_entry_t *old_blocks, int old_count,
195 : const char **new_parts, int new_count) {
196 91 : block_diff_t *diff = block_diff_create();
197 91 : if (!diff) return NULL;
198 :
199 : // Track which old blocks have been consumed
200 91 : bool *old_consumed = NULL;
201 91 : if (old_count > 0) {
202 91 : old_consumed = (bool *)cloudsync_memory_zeroalloc((uint64_t)(old_count * sizeof(bool)));
203 91 : if (!old_consumed) {
204 0 : block_diff_free(diff);
205 0 : return NULL;
206 : }
207 91 : }
208 :
209 : // For each new block, try to find a matching unconsumed old block
210 : // Use a simple forward scan to preserve ordering
211 91 : int old_scan = 0;
212 91 : char *last_position = NULL;
213 :
214 1493 : for (int ni = 0; ni < new_count; ni++) {
215 1402 : bool found = false;
216 :
217 : // Scan forward in old blocks for a content match
218 1497 : for (int oi = old_scan; oi < old_count; oi++) {
219 1404 : if (old_consumed[oi]) continue;
220 :
221 1404 : if (strcmp(old_blocks[oi].content, new_parts[ni]) == 0) {
222 : // Exact match — mark any skipped old blocks as REMOVED
223 1329 : for (int si = old_scan; si < oi; si++) {
224 20 : if (!old_consumed[si]) {
225 20 : block_diff_append(diff, BLOCK_DIFF_REMOVED, old_blocks[si].position_id, NULL);
226 20 : old_consumed[si] = true;
227 20 : }
228 20 : }
229 1309 : old_consumed[oi] = true;
230 1309 : old_scan = oi + 1;
231 1309 : last_position = old_blocks[oi].position_id;
232 1309 : found = true;
233 1309 : break;
234 : }
235 95 : }
236 :
237 1402 : if (!found) {
238 : // New block — needs a new position_id
239 93 : const char *next_pos = NULL;
240 : // Find the next unconsumed old block's position for the upper bound
241 93 : for (int oi = old_scan; oi < old_count; oi++) {
242 39 : if (!old_consumed[oi]) {
243 39 : next_pos = old_blocks[oi].position_id;
244 39 : break;
245 : }
246 0 : }
247 :
248 93 : char *new_pos = block_position_between(last_position, next_pos);
249 93 : if (new_pos) {
250 93 : block_diff_append(diff, BLOCK_DIFF_ADDED, new_pos, new_parts[ni]);
251 93 : last_position = diff->entries[diff->count - 1].position_id;
252 93 : cloudsync_memory_free(new_pos);
253 93 : }
254 93 : }
255 1402 : }
256 :
257 : // Mark remaining unconsumed old blocks as REMOVED
258 110 : for (int oi = old_scan; oi < old_count; oi++) {
259 19 : if (!old_consumed[oi]) {
260 19 : block_diff_append(diff, BLOCK_DIFF_REMOVED, old_blocks[oi].position_id, NULL);
261 19 : }
262 19 : }
263 :
264 91 : if (old_consumed) cloudsync_memory_free(old_consumed);
265 91 : return diff;
266 91 : }
267 :
268 : // MARK: - Materialization -
269 :
270 458 : char *block_materialize_text(const char **blocks, int count, const char *delimiter) {
271 458 : if (count == 0) return cloudsync_string_dup("");
272 458 : if (!delimiter) delimiter = BLOCK_DEFAULT_DELIMITER;
273 :
274 458 : size_t dlen = strlen(delimiter);
275 458 : size_t total = 0;
276 22691 : for (int i = 0; i < count; i++) {
277 22233 : total += strlen(blocks[i]);
278 22233 : if (i < count - 1) total += dlen;
279 22233 : }
280 :
281 458 : char *result = (char *)cloudsync_memory_alloc(total + 1);
282 458 : if (!result) return NULL;
283 :
284 458 : size_t offset = 0;
285 22691 : for (int i = 0; i < count; i++) {
286 22233 : size_t blen = strlen(blocks[i]);
287 22233 : memcpy(result + offset, blocks[i], blen);
288 22233 : offset += blen;
289 22233 : if (i < count - 1) {
290 21775 : memcpy(result + offset, delimiter, dlen);
291 21775 : offset += dlen;
292 21775 : }
293 22233 : }
294 458 : result[offset] = '\0';
295 :
296 458 : return result;
297 458 : }
|