Line data Source code
1 : //
2 : // pk.c
3 : // cloudsync
4 : //
5 : // Created by Marco Bambini on 21/08/24.
6 : //
7 :
8 : #include "pk.h"
9 : #include "utils.h"
10 :
11 : #ifndef SQLITE_CORE
12 : SQLITE_EXTENSION_INIT3
13 : #endif
14 :
15 : /*
16 :
17 : The pk_encode and pk_decode functions are designed to serialize and deserialize an array of values (sqlite_value structures)
18 : into a binary format that can be transmitted over a network or stored efficiently.
19 : These functions support all the data types natively supported by SQLite (integer, float, blob, text, and null)
20 : and ensure that the serialized data is platform-independent, particularly with respect to endianess.
21 :
22 : pk_encode
23 : =========
24 : The pk_encode function encodes an array of values into a contiguous memory buffer.
25 : This buffer can then be sent over a network or saved to a file, ensuring that the data can be reliably reconstructed later, regardless of the platform.
26 :
27 : Algorithm:
28 :
29 : * Number of Columns: The first byte of the buffer stores the number of columns (num_args), which is limited to 255 columns.
30 : * Type and Length Encoding: For each column:
31 : * The type of the column (e.g., integer, float, text) is encoded in a single byte. The first 3 bits represent the type, and the remaining 5 bits encode the number of bytes required for the integer or length information if applicable.
32 : * If the column is an integer or a blob/text type, additional bytes are written to the buffer to store the actual value or the length of the data.
33 : * Endianess handling is applied using htonl/htonll to ensure integers and floating-point numbers are consistently stored in big-endian format (network byte order), making the serialized data platform-independent.
34 : * Floating-point numbers are treated as 64-bit integers for endianess conversion.
35 : * Efficient Storage: By using only the minimum number of bytes required to represent integers and lengths, the solution optimizes storage space, reducing the size of the serialized buffer.
36 :
37 : Advantages:
38 :
39 : * Platform Independence: By converting all integers and floating-point values to network byte order, the serialized data can be transmitted between systems with different endianess.
40 : * Efficiency: The function encodes data into the smallest possible format, minimizing the memory footprint of the serialized data. This is particularly important for network transmission and storage.
41 : * Flexibility: Supports multiple data types (integer, float, text, blob, null) and variable-length data, making it suitable for a wide range of applications.
42 :
43 : pk_decode
44 : =========
45 : The pk_decode function decodes the buffer created by pk_encode back into an array of sqlite_value structures.
46 : This allows the original data to be reconstructed and used by the application.
47 :
48 : Algorithm:
49 :
50 : * Read Number of Columns: The function starts by reading the first byte to determine the number of columns in the buffer.
51 : * Type and Length Decoding: For each column:
52 : * The function reads the type byte to determine the column's data type and the number of bytes used to store length or integer values.
53 : * Depending on the type, the appropriate number of bytes is read from the buffer to reconstruct the integer, floating-point value, blob, or text data.
54 : * Endianess is handled by converting from network byte order back to the host's byte order using ntohl/ntohll.
55 : * Memory Management: For blob and text data, memory is dynamically allocated to store the decoded data. The caller is responsible for freeing this memory after use.
56 :
57 : Advantages:
58 :
59 : * Correctness: By reversing the serialization process, the unpack_columns function ensures that the original data can be accurately reconstructed.
60 : * Endianess Handling: The function handles endianess conversion during decoding, ensuring that data is correctly interpreted regardless of the platform on which it was serialized or deserialized.
61 : * Robustness: The function includes error handling to manage cases where the buffer is malformed or insufficient data is available, reducing the risk of corruption or crashes.
62 :
63 : Overall Advantages of the Solution
64 :
65 : * Portability: The serialized format is platform-independent, ensuring data can be transmitted across different architectures without compatibility issues.
66 : * Efficiency: The use of compact encoding for integers and lengths reduces the size of the serialized data, optimizing it for storage and transmission.
67 : * Versatility: The ability to handle multiple data types and variable-length data makes this solution suitable for complex data structures.
68 : * Simplicity: The functions are designed to be straightforward to use, with clear memory management responsibilities.
69 :
70 : */
71 :
72 : // Three bits are reserved for the type field, so only values in the 0..7 range can be used (8 values)
73 : // SQLITE already reserved values from 1 to 5
74 : // #define SQLITE_INTEGER 1
75 : // #define SQLITE_FLOAT 2
76 : // #define SQLITE_TEXT 3
77 : // #define SQLITE_BLOB 4
78 : // #define SQLITE_NULL 5
79 : #define SQLITE_NEGATIVE_INTEGER 0
80 : #define SQLITE_MAX_NEGATIVE_INTEGER 6
81 : #define SQLITE_NEGATIVE_FLOAT 7
82 :
83 : // MARK: - Decoding -
84 :
85 772148 : int pk_decode_bind_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval) {
86 : // default decode callback used to bind values to a sqlite3_stmt vm
87 :
88 772148 : sqlite3_stmt *vm = (sqlite3_stmt *)xdata;
89 772148 : int rc = SQLITE_OK;
90 772148 : switch (type) {
91 : case SQLITE_INTEGER:
92 261868 : rc = sqlite3_bind_int64(vm, index+1, ival);
93 261868 : break;
94 :
95 : case SQLITE_FLOAT:
96 1 : rc = sqlite3_bind_double(vm, index+1, dval);
97 1 : break;
98 :
99 : case SQLITE_NULL:
100 789 : rc = sqlite3_bind_null(vm, index+1);
101 789 : break;
102 :
103 : case SQLITE_TEXT:
104 412146 : rc = sqlite3_bind_text(vm, index+1, pval, (int)ival, SQLITE_STATIC);
105 412146 : break;
106 :
107 : case SQLITE_BLOB:
108 97344 : rc = sqlite3_bind_blob64(vm, index+1, (const void *)pval, (sqlite3_uint64)ival, SQLITE_STATIC);
109 97344 : break;
110 : }
111 :
112 772148 : return rc;
113 : }
114 :
115 8 : int pk_decode_print_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval) {
116 8 : switch (type) {
117 : case SQLITE_INTEGER:
118 4 : printf("%d\tINTEGER:\t%lld\n", index, (long long)ival);
119 4 : break;
120 :
121 : case SQLITE_FLOAT:
122 1 : printf("%d\tFLOAT:\t%.5f\n", index, dval);
123 1 : break;
124 :
125 : case SQLITE_NULL:
126 1 : printf("%d\tNULL\n", index);
127 1 : break;
128 :
129 : case SQLITE_TEXT:
130 1 : printf("%d\tTEXT:\t%s\n", index, pval);
131 1 : break;
132 :
133 : case SQLITE_BLOB:
134 1 : printf("%d\tBLOB:\t%lld bytes\n", index, (long long)ival);
135 1 : break;
136 : }
137 :
138 8 : return SQLITE_OK;
139 : }
140 :
141 1601245 : uint8_t pk_decode_u8 (char *buffer, size_t *bseek) {
142 1601245 : uint8_t value = buffer[*bseek];
143 1601245 : *bseek += 1;
144 1601245 : return value;
145 : }
146 :
147 1417375 : int64_t pk_decode_int64 (char *buffer, size_t *bseek, size_t nbytes) {
148 1417375 : int64_t value = 0;
149 :
150 : // decode bytes in big-endian order (most significant byte first)
151 5489033 : for (size_t i = 0; i < nbytes; i++) {
152 4071658 : value = (value << 8) | (uint8_t)buffer[*bseek];
153 4071658 : (*bseek)++;
154 4071658 : }
155 :
156 1417375 : return value;
157 : }
158 :
159 832891 : char *pk_decode_data (char *buffer, size_t *bseek, int32_t blen) {
160 832891 : char *value = buffer + *bseek;
161 832891 : *bseek += blen;
162 :
163 832891 : return value;
164 : }
165 :
166 161468 : double pk_decode_double (char *buffer, size_t *bseek) {
167 161468 : double value = 0;
168 161468 : int64_t int64value = pk_decode_int64(buffer, bseek, sizeof(int64_t));
169 161468 : memcpy(&value, &int64value, sizeof(int64_t));
170 :
171 161468 : return value;
172 : }
173 :
174 231747 : int pk_decode(char *buffer, size_t blen, int count, size_t *seek, int (*cb) (void *xdata, int index, int type, int64_t ival, double dval, char *pval), void *xdata) {
175 231747 : size_t bseek = (seek) ? *seek : 0;
176 231747 : if (count == -1) count = pk_decode_u8(buffer, &bseek);
177 :
178 1649915 : for (size_t i = 0; i < (size_t)count; i++) {
179 1418168 : uint8_t type_byte = (uint8_t)pk_decode_u8(buffer, &bseek);
180 1418168 : int type = (int)(type_byte & 0x07);
181 1418168 : size_t nbytes = (type_byte >> 3) & 0x1F;
182 :
183 1418168 : switch (type) {
184 : case SQLITE_MAX_NEGATIVE_INTEGER: {
185 2 : int64_t value = INT64_MIN;
186 2 : type = SQLITE_INTEGER;
187 2 : if (cb) if (cb(xdata, (int)i, type, value, 0.0, NULL) != SQLITE_OK) return -1;
188 : }
189 2 : break;
190 :
191 : case SQLITE_NEGATIVE_INTEGER:
192 : case SQLITE_INTEGER: {
193 423016 : int64_t value = pk_decode_int64(buffer, &bseek, nbytes);
194 423016 : if (type == SQLITE_NEGATIVE_INTEGER) {value = -value; type = SQLITE_INTEGER;}
195 423016 : if (cb) if (cb(xdata, (int)i, type, value, 0.0, NULL) != SQLITE_OK) return -1;
196 : }
197 423016 : break;
198 :
199 : case SQLITE_NEGATIVE_FLOAT:
200 : case SQLITE_FLOAT: {
201 161468 : double value = pk_decode_double(buffer, &bseek);
202 161468 : if (type == SQLITE_NEGATIVE_FLOAT) {value = -value; type = SQLITE_FLOAT;}
203 161468 : if (cb) if (cb(xdata, (int)i, type, 0, value, NULL) != SQLITE_OK) return -1;
204 : }
205 161468 : break;
206 :
207 : case SQLITE_TEXT:
208 : case SQLITE_BLOB: {
209 832891 : int64_t length = pk_decode_int64(buffer, &bseek, nbytes);
210 832891 : char *value = pk_decode_data(buffer, &bseek, (int32_t)length);
211 832891 : if (cb) if (cb(xdata, (int)i, type, length, 0.0, value) != SQLITE_OK) return -1;
212 : }
213 832891 : break;
214 :
215 : case SQLITE_NULL: {
216 791 : if (cb) if (cb(xdata, (int)i, type, 0, 0.0, NULL) != SQLITE_OK) return -1;
217 : }
218 791 : break;
219 : }
220 1418168 : }
221 :
222 231747 : if (seek) *seek = bseek;
223 231747 : return count;
224 231747 : }
225 :
226 183077 : int pk_decode_prikey (char *buffer, size_t blen, int (*cb) (void *xdata, int index, int type, int64_t ival, double dval, char *pval), void *xdata) {
227 183077 : size_t bseek = 0;
228 183077 : uint8_t count = pk_decode_u8(buffer, &bseek);
229 183077 : return pk_decode(buffer, blen, count, &bseek, cb, xdata);
230 : }
231 :
232 : // MARK: - Encoding -
233 :
234 1860762 : size_t pk_encode_nbytes_needed (int64_t value) {
235 1860762 : if (value <= 0x7F) return 1; // 7 bits
236 1027020 : if (value <= 0x7FFF) return 2; // 15 bits
237 322618 : if (value <= 0x7FFFFF) return 3; // 23 bits
238 322344 : if (value <= 0x7FFFFFFF) return 4; // 31 bits
239 322342 : if (value <= 0x7FFFFFFFFF) return 5; // 39 bits
240 322342 : if (value <= 0x7FFFFFFFFFFF) return 6; // 47 bits
241 322338 : if (value <= 0x7FFFFFFFFFFFFF) return 7; // 55 bits
242 321094 : return 8; // Larger than 7-byte range, needs 8 bytes
243 1860762 : }
244 :
245 62976 : size_t pk_encode_size (sqlite3_value **argv, int argc, int reserved) {
246 : // estimate the required buffer size
247 62976 : size_t required = reserved;
248 : size_t nbytes;
249 : int64_t val, len;
250 :
251 1155624 : for (int i = 0; i < argc; i++) {
252 1092648 : switch (sqlite3_value_type(argv[i])) {
253 : case SQLITE_INTEGER:
254 373789 : val = sqlite3_value_int64(argv[i]);
255 373789 : if (val == INT64_MIN) {
256 1 : required += 1;
257 1 : break;
258 : }
259 373788 : if (val < 0) val = -val;
260 373788 : nbytes = pk_encode_nbytes_needed(val);
261 373788 : required += 1 + nbytes;
262 373788 : break;
263 : case SQLITE_FLOAT:
264 161467 : required += 1 + sizeof(int64_t);
265 161467 : break;
266 : case SQLITE_TEXT:
267 : case SQLITE_BLOB:
268 556593 : len = (int32_t)sqlite3_value_bytes(argv[i]);
269 556593 : nbytes = pk_encode_nbytes_needed(len);
270 556593 : required += 1 + len + nbytes;
271 556593 : break;
272 : case SQLITE_NULL:
273 799 : required += 1;
274 799 : break;
275 : }
276 1092648 : }
277 :
278 62976 : return required;
279 : }
280 :
281 1106874 : size_t pk_encode_u8 (char *buffer, size_t bseek, uint8_t value) {
282 1106874 : buffer[bseek++] = value;
283 1106874 : return bseek;
284 : }
285 :
286 1091848 : size_t pk_encode_int64 (char *buffer, size_t bseek, int64_t value, size_t nbytes) {
287 4794014 : for (size_t i = 0; i < nbytes; i++) {
288 3702166 : buffer[bseek++] = (uint8_t)((value >> (8 * (nbytes - 1 - i))) & 0xFF);
289 3702166 : }
290 1091848 : return bseek;
291 : }
292 :
293 556593 : size_t pk_encode_data (char *buffer, size_t bseek, char *data, size_t datalen) {
294 556593 : memcpy(buffer + bseek, data, datalen);
295 556593 : return bseek + datalen;
296 : }
297 :
298 62976 : char *pk_encode (sqlite3_value **argv, int argc, char *b, bool is_prikey, size_t *bsize) {
299 62976 : size_t bseek = 0;
300 62976 : size_t blen = 0;
301 62976 : char *buffer = b;
302 :
303 : // in primary-key encoding the number of items must be explicitly added to the encoded buffer
304 62976 : if (is_prikey) {
305 : // 1 is the number of items in the serialization (always 1 byte so max 255 primary keys, even if there is an hard SQLite limit of 128)
306 14226 : blen = pk_encode_size(argv, argc, 1);
307 14226 : size_t blen_curr = *bsize;
308 14226 : buffer = (blen > blen_curr || b == NULL) ? cloudsync_memory_alloc((sqlite3_uint64)blen) : b;
309 14226 : if (!buffer) return NULL;
310 :
311 : // the first u8 value is the total number of items in the primary key(s)
312 14226 : bseek = pk_encode_u8(buffer, 0, argc);
313 14226 : }
314 :
315 1155624 : for (int i = 0; i < argc; i++) {
316 1092648 : int type = sqlite3_value_type(argv[i]);
317 1092648 : switch (type) {
318 : case SQLITE_INTEGER: {
319 373789 : int64_t value = sqlite3_value_int64(argv[i]);
320 373789 : if (value == INT64_MIN) {
321 1 : bseek = pk_encode_u8(buffer, bseek, SQLITE_MAX_NEGATIVE_INTEGER);
322 1 : break;
323 : }
324 373788 : if (value < 0) {value = -value; type = SQLITE_NEGATIVE_INTEGER;}
325 373788 : size_t nbytes = pk_encode_nbytes_needed(value);
326 373788 : uint8_t type_byte = (nbytes << 3) | type;
327 373788 : bseek = pk_encode_u8(buffer, bseek, type_byte);
328 373788 : bseek = pk_encode_int64(buffer, bseek, value, nbytes);
329 : }
330 373788 : break;
331 : case SQLITE_FLOAT: {
332 161467 : double value = sqlite3_value_double(argv[i]);
333 161467 : if (value < 0) {value = -value; type = SQLITE_NEGATIVE_FLOAT;}
334 : int64_t net_double;
335 161467 : memcpy(&net_double, &value, sizeof(int64_t));
336 161467 : bseek = pk_encode_u8(buffer, bseek, type);
337 161467 : bseek = pk_encode_int64(buffer, bseek, net_double, sizeof(int64_t));
338 : }
339 161467 : break;
340 : case SQLITE_TEXT:
341 : case SQLITE_BLOB: {
342 556593 : int32_t len = (int32_t)sqlite3_value_bytes(argv[i]);
343 556593 : size_t nbytes = pk_encode_nbytes_needed(len);
344 556593 : uint8_t type_byte = (nbytes << 3) | sqlite3_value_type(argv[i]);
345 556593 : bseek = pk_encode_u8(buffer, bseek, type_byte);
346 556593 : bseek = pk_encode_int64(buffer, bseek, len, nbytes);
347 556593 : bseek = pk_encode_data(buffer, bseek, (char *)sqlite3_value_blob(argv[i]), len);
348 : }
349 556593 : break;
350 : case SQLITE_NULL: {
351 799 : bseek = pk_encode_u8(buffer, bseek, SQLITE_NULL);
352 : }
353 799 : break;
354 : }
355 1092648 : }
356 :
357 62976 : if (bsize) *bsize = blen;
358 62976 : return buffer;
359 62976 : }
360 :
361 14226 : char *pk_encode_prikey (sqlite3_value **argv, int argc, char *b, size_t *bsize) {
362 14226 : return pk_encode(argv, argc, b, true, bsize);
363 : }
|