Line data Source code
1 : //
2 : // block.c
3 : // cloudsync
4 : //
5 : // Block-level LWW CRDT support for text/blob fields.
6 : //
7 :
8 : #include <stdio.h>
9 : #include <string.h>
10 : #include <stdlib.h>
11 : #include "block.h"
12 : #include "utils.h"
13 : #include "fractional_indexing.h"
14 :
15 : // MARK: - Col name helpers -
16 :
17 167437 : bool block_is_block_colname(const char *col_name) {
18 167437 : if (!col_name) return false;
19 167437 : return strchr(col_name, BLOCK_SEPARATOR) != NULL;
20 167437 : }
21 :
22 412 : char *block_extract_base_colname(const char *col_name) {
23 412 : if (!col_name) return NULL;
24 412 : const char *sep = strchr(col_name, BLOCK_SEPARATOR);
25 412 : if (!sep) return cloudsync_string_dup(col_name);
26 412 : return cloudsync_string_ndup(col_name, (size_t)(sep - col_name));
27 412 : }
28 :
29 1354 : const char *block_extract_position_id(const char *col_name) {
30 1354 : if (!col_name) return NULL;
31 1354 : const char *sep = strchr(col_name, BLOCK_SEPARATOR);
32 1354 : if (!sep) return NULL;
33 1354 : return sep + 1;
34 1354 : }
35 :
36 1085 : char *block_build_colname(const char *base_col, const char *position_id) {
37 1085 : if (!base_col || !position_id) return NULL;
38 1085 : size_t blen = strlen(base_col);
39 1085 : size_t plen = strlen(position_id);
40 1085 : char *result = (char *)cloudsync_memory_alloc(blen + 1 + plen + 1);
41 1085 : if (!result) return NULL;
42 1085 : memcpy(result, base_col, blen);
43 1085 : result[blen] = BLOCK_SEPARATOR;
44 1085 : memcpy(result + blen + 1, position_id, plen);
45 1085 : result[blen + 1 + plen] = '\0';
46 1085 : return result;
47 1085 : }
48 :
49 : // MARK: - Text splitting -
50 :
51 232 : static block_list_t *block_list_create(void) {
52 232 : block_list_t *list = (block_list_t *)cloudsync_memory_zeroalloc(sizeof(block_list_t));
53 232 : return list;
54 : }
55 :
56 3083 : static bool block_list_append(block_list_t *list, const char *content, size_t content_len, const char *position_id) {
57 3083 : if (list->count >= list->capacity) {
58 337 : int new_cap = list->capacity ? list->capacity * 2 : 16;
59 337 : block_entry_t *new_entries = (block_entry_t *)cloudsync_memory_realloc(
60 337 : list->entries, (uint64_t)(new_cap * sizeof(block_entry_t)));
61 337 : if (!new_entries) return false;
62 337 : list->entries = new_entries;
63 337 : list->capacity = new_cap;
64 337 : }
65 3083 : block_entry_t *e = &list->entries[list->count];
66 3083 : e->content = cloudsync_string_ndup(content, content_len);
67 3083 : e->position_id = position_id ? cloudsync_string_dup(position_id) : NULL;
68 3083 : if (!e->content) return false;
69 3083 : list->count++;
70 3083 : return true;
71 3083 : }
72 :
73 232 : void block_list_free(block_list_t *list) {
74 232 : if (!list) return;
75 3315 : for (int i = 0; i < list->count; i++) {
76 3083 : if (list->entries[i].content) cloudsync_memory_free(list->entries[i].content);
77 3083 : if (list->entries[i].position_id) cloudsync_memory_free(list->entries[i].position_id);
78 3083 : }
79 232 : if (list->entries) cloudsync_memory_free(list->entries);
80 232 : cloudsync_memory_free(list);
81 232 : }
82 :
83 94 : block_list_t *block_list_create_empty(void) {
84 94 : return block_list_create();
85 : }
86 :
87 1354 : bool block_list_add(block_list_t *list, const char *content, const char *position_id) {
88 1354 : if (!list) return false;
89 1354 : return block_list_append(list, content, strlen(content), position_id);
90 1354 : }
91 :
92 138 : block_list_t *block_split(const char *text, const char *delimiter) {
93 138 : block_list_t *list = block_list_create();
94 138 : if (!list) return NULL;
95 :
96 138 : if (!text || !*text) {
97 : // Empty text produces a single empty block
98 5 : block_list_append(list, "", 0, NULL);
99 5 : return list;
100 : }
101 :
102 133 : size_t dlen = strlen(delimiter);
103 133 : if (dlen == 0) {
104 : // No delimiter: entire text is one block
105 0 : block_list_append(list, text, strlen(text), NULL);
106 0 : return list;
107 : }
108 :
109 133 : const char *start = text;
110 : const char *found;
111 1724 : while ((found = strstr(start, delimiter)) != NULL) {
112 1591 : size_t seg_len = (size_t)(found - start);
113 1591 : if (!block_list_append(list, start, seg_len, NULL)) {
114 0 : block_list_free(list);
115 0 : return NULL;
116 : }
117 1591 : start = found + dlen;
118 : }
119 : // Last segment (after last delimiter or entire string if no delimiter found)
120 133 : if (!block_list_append(list, start, strlen(start), NULL)) {
121 0 : block_list_free(list);
122 0 : return NULL;
123 : }
124 :
125 133 : return list;
126 138 : }
127 :
128 : // MARK: - Fractional indexing (via fractional-indexing submodule) -
129 :
130 : // Wrapper for malloc: fractional_indexing expects size_t (32-bit on WASM) but cloudsync_memory_alloc takes uint64_t.
131 : // A direct cast passes strict call_indirect type checking in WASM and triggers a RuntimeError.
132 437 : static void *fi_malloc_wrapper(size_t size) {
133 437 : return cloudsync_memory_alloc((uint64_t)size);
134 : }
135 :
136 : // Wrapper for calloc: fractional_indexing expects (count, size) but cloudsync_memory_zeroalloc takes a single size.
137 69 : static void *fi_calloc_wrapper(size_t count, size_t size) {
138 69 : return cloudsync_memory_zeroalloc((uint64_t)(count * size));
139 : }
140 :
141 232 : void block_init_allocator(void) {
142 232 : fractional_indexing_allocator alloc = {
143 : .malloc = fi_malloc_wrapper,
144 : .calloc = fi_calloc_wrapper,
145 : .free = cloudsync_memory_free
146 : };
147 232 : fractional_indexing_set_allocator(&alloc);
148 232 : }
149 :
150 95 : char *block_position_between(const char *before, const char *after) {
151 95 : return generate_key_between(before, after);
152 : }
153 :
154 46 : char **block_initial_positions(int count) {
155 46 : if (count <= 0) return NULL;
156 46 : return generate_n_keys_between(NULL, NULL, count);
157 46 : }
158 :
159 : // MARK: - Block diff -
160 :
161 93 : static block_diff_t *block_diff_create(void) {
162 93 : block_diff_t *diff = (block_diff_t *)cloudsync_memory_zeroalloc(sizeof(block_diff_t));
163 93 : return diff;
164 : }
165 :
166 136 : static bool block_diff_append(block_diff_t *diff, block_diff_type type, const char *position_id, const char *content) {
167 136 : if (diff->count >= diff->capacity) {
168 93 : int new_cap = diff->capacity ? diff->capacity * 2 : 16;
169 93 : block_diff_entry_t *new_entries = (block_diff_entry_t *)cloudsync_memory_realloc(
170 93 : diff->entries, (uint64_t)(new_cap * sizeof(block_diff_entry_t)));
171 93 : if (!new_entries) return false;
172 93 : diff->entries = new_entries;
173 93 : diff->capacity = new_cap;
174 93 : }
175 136 : block_diff_entry_t *e = &diff->entries[diff->count];
176 136 : e->type = type;
177 136 : e->position_id = cloudsync_string_dup(position_id);
178 136 : e->content = content ? cloudsync_string_dup(content) : NULL;
179 136 : diff->count++;
180 136 : return true;
181 136 : }
182 :
183 93 : void block_diff_free(block_diff_t *diff) {
184 93 : if (!diff) return;
185 229 : for (int i = 0; i < diff->count; i++) {
186 136 : if (diff->entries[i].position_id) cloudsync_memory_free(diff->entries[i].position_id);
187 136 : if (diff->entries[i].content) cloudsync_memory_free(diff->entries[i].content);
188 136 : }
189 93 : if (diff->entries) cloudsync_memory_free(diff->entries);
190 93 : cloudsync_memory_free(diff);
191 93 : }
192 :
193 : // Content-based matching diff algorithm:
194 : // 1. Build a consumed-set from old blocks
195 : // 2. For each new block, find the first unconsumed old block with matching content
196 : // 3. Matched blocks keep their position_id (UNCHANGED)
197 : // 4. Unmatched new blocks get new position_ids (ADDED)
198 : // 5. Unconsumed old blocks are REMOVED
199 : // Modified blocks are detected when content changed but position stayed (handled as MODIFIED)
200 93 : block_diff_t *block_diff(block_entry_t *old_blocks, int old_count,
201 : const char **new_parts, int new_count) {
202 93 : block_diff_t *diff = block_diff_create();
203 93 : if (!diff) return NULL;
204 :
205 : // Track which old blocks have been consumed
206 93 : bool *old_consumed = NULL;
207 93 : if (old_count > 0) {
208 93 : old_consumed = (bool *)cloudsync_memory_zeroalloc((uint64_t)(old_count * sizeof(bool)));
209 93 : if (!old_consumed) {
210 0 : block_diff_free(diff);
211 0 : return NULL;
212 : }
213 93 : }
214 :
215 : // For each new block, try to find a matching unconsumed old block
216 : // Use a simple forward scan to preserve ordering
217 93 : int old_scan = 0;
218 93 : char *last_position = NULL;
219 :
220 1501 : for (int ni = 0; ni < new_count; ni++) {
221 1408 : bool found = false;
222 :
223 : // Scan forward in old blocks for a content match
224 1509 : for (int oi = old_scan; oi < old_count; oi++) {
225 1414 : if (old_consumed[oi]) continue;
226 :
227 1414 : if (strcmp(old_blocks[oi].content, new_parts[ni]) == 0) {
228 : // Exact match — mark any skipped old blocks as REMOVED
229 1335 : for (int si = old_scan; si < oi; si++) {
230 22 : if (!old_consumed[si]) {
231 22 : block_diff_append(diff, BLOCK_DIFF_REMOVED, old_blocks[si].position_id, NULL);
232 22 : old_consumed[si] = true;
233 22 : }
234 22 : }
235 1313 : old_consumed[oi] = true;
236 1313 : old_scan = oi + 1;
237 1313 : last_position = old_blocks[oi].position_id;
238 1313 : found = true;
239 1313 : break;
240 : }
241 101 : }
242 :
243 1408 : if (!found) {
244 : // New block — needs a new position_id
245 95 : const char *next_pos = NULL;
246 : // Find the next unconsumed old block's position for the upper bound
247 95 : for (int oi = old_scan; oi < old_count; oi++) {
248 41 : if (!old_consumed[oi]) {
249 41 : next_pos = old_blocks[oi].position_id;
250 41 : break;
251 : }
252 0 : }
253 :
254 95 : char *new_pos = block_position_between(last_position, next_pos);
255 95 : if (new_pos) {
256 95 : block_diff_append(diff, BLOCK_DIFF_ADDED, new_pos, new_parts[ni]);
257 95 : last_position = diff->entries[diff->count - 1].position_id;
258 95 : cloudsync_memory_free(new_pos);
259 95 : }
260 95 : }
261 1408 : }
262 :
263 : // Mark remaining unconsumed old blocks as REMOVED
264 112 : for (int oi = old_scan; oi < old_count; oi++) {
265 19 : if (!old_consumed[oi]) {
266 19 : block_diff_append(diff, BLOCK_DIFF_REMOVED, old_blocks[oi].position_id, NULL);
267 19 : }
268 19 : }
269 :
270 93 : if (old_consumed) cloudsync_memory_free(old_consumed);
271 93 : return diff;
272 93 : }
273 :
274 : // MARK: - Materialization -
275 :
276 458 : char *block_materialize_text(const char **blocks, int count, const char *delimiter) {
277 458 : if (count == 0) return cloudsync_string_dup("");
278 458 : if (!delimiter) delimiter = BLOCK_DEFAULT_DELIMITER;
279 :
280 458 : size_t dlen = strlen(delimiter);
281 458 : size_t total = 0;
282 22691 : for (int i = 0; i < count; i++) {
283 22233 : total += strlen(blocks[i]);
284 22233 : if (i < count - 1) total += dlen;
285 22233 : }
286 :
287 458 : char *result = (char *)cloudsync_memory_alloc(total + 1);
288 458 : if (!result) return NULL;
289 :
290 458 : size_t offset = 0;
291 22691 : for (int i = 0; i < count; i++) {
292 22233 : size_t blen = strlen(blocks[i]);
293 22233 : memcpy(result + offset, blocks[i], blen);
294 22233 : offset += blen;
295 22233 : if (i < count - 1) {
296 21775 : memcpy(result + offset, delimiter, dlen);
297 21775 : offset += dlen;
298 21775 : }
299 22233 : }
300 458 : result[offset] = '\0';
301 :
302 458 : return result;
303 458 : }
|