Line data Source code
1 : //
2 : // pk.c
3 : // cloudsync
4 : //
5 : // Created by Marco Bambini on 21/08/24.
6 : //
7 :
8 : #include "pk.h"
9 : #include "utils.h"
10 :
11 : #ifndef SQLITE_CORE
12 : SQLITE_EXTENSION_INIT3
13 : #endif
14 :
15 : /*
16 :
17 : The pk_encode and pk_decode functions are designed to serialize and deserialize an array of values (sqlite_value structures)
18 : into a binary format that can be transmitted over a network or stored efficiently.
19 : These functions support all the data types natively supported by SQLite (integer, float, blob, text, and null)
20 : and ensure that the serialized data is platform-independent, particularly with respect to endianess.
21 :
22 : pk_encode
23 : =========
24 : The pk_encode function encodes an array of values into a contiguous memory buffer.
25 : This buffer can then be sent over a network or saved to a file, ensuring that the data can be reliably reconstructed later, regardless of the platform.
26 :
27 : Algorithm:
28 :
29 : * Number of Columns: The first byte of the buffer stores the number of columns (num_args), which is limited to 255 columns.
30 : * Type and Length Encoding: For each column:
31 : * The type of the column (e.g., integer, float, text) is encoded in a single byte. The first 3 bits represent the type, and the remaining 5 bits encode the number of bytes required for the integer or length information if applicable.
32 : * If the column is an integer or a blob/text type, additional bytes are written to the buffer to store the actual value or the length of the data.
33 : * Endianess handling is applied using htonl/htonll to ensure integers and floating-point numbers are consistently stored in big-endian format (network byte order), making the serialized data platform-independent.
34 : * Floating-point numbers are treated as 64-bit integers for endianess conversion.
35 : * Efficient Storage: By using only the minimum number of bytes required to represent integers and lengths, the solution optimizes storage space, reducing the size of the serialized buffer.
36 :
37 : Advantages:
38 :
39 : * Platform Independence: By converting all integers and floating-point values to network byte order, the serialized data can be transmitted between systems with different endianess.
40 : * Efficiency: The function encodes data into the smallest possible format, minimizing the memory footprint of the serialized data. This is particularly important for network transmission and storage.
41 : * Flexibility: Supports multiple data types (integer, float, text, blob, null) and variable-length data, making it suitable for a wide range of applications.
42 :
43 : pk_decode
44 : =========
45 : The pk_decode function decodes the buffer created by pk_encode back into an array of sqlite_value structures.
46 : This allows the original data to be reconstructed and used by the application.
47 :
48 : Algorithm:
49 :
50 : * Read Number of Columns: The function starts by reading the first byte to determine the number of columns in the buffer.
51 : * Type and Length Decoding: For each column:
52 : * The function reads the type byte to determine the column's data type and the number of bytes used to store length or integer values.
53 : * Depending on the type, the appropriate number of bytes is read from the buffer to reconstruct the integer, floating-point value, blob, or text data.
54 : * Endianess is handled by converting from network byte order back to the host's byte order using ntohl/ntohll.
55 : * Memory Management: For blob and text data, memory is dynamically allocated to store the decoded data. The caller is responsible for freeing this memory after use.
56 :
57 : Advantages:
58 :
59 : * Correctness: By reversing the serialization process, the unpack_columns function ensures that the original data can be accurately reconstructed.
60 : * Endianess Handling: The function handles endianess conversion during decoding, ensuring that data is correctly interpreted regardless of the platform on which it was serialized or deserialized.
61 : * Robustness: The function includes error handling to manage cases where the buffer is malformed or insufficient data is available, reducing the risk of corruption or crashes.
62 :
63 : Overall Advantages of the Solution
64 :
65 : * Portability: The serialized format is platform-independent, ensuring data can be transmitted across different architectures without compatibility issues.
66 : * Efficiency: The use of compact encoding for integers and lengths reduces the size of the serialized data, optimizing it for storage and transmission.
67 : * Versatility: The ability to handle multiple data types and variable-length data makes this solution suitable for complex data structures.
68 : * Simplicity: The functions are designed to be straightforward to use, with clear memory management responsibilities.
69 :
70 : */
71 :
72 : // Three bits are reserved for the type field, so only values in the 0..7 range can be used (8 values)
73 : // SQLITE already reserved values from 1 to 5
74 : // #define SQLITE_INTEGER 1
75 : // #define SQLITE_FLOAT 2
76 : // #define SQLITE_TEXT 3
77 : // #define SQLITE_BLOB 4
78 : // #define SQLITE_NULL 5
79 : #define SQLITE_NEGATIVE_INTEGER 0
80 : #define SQLITE_MAX_NEGATIVE_INTEGER 6
81 : #define SQLITE_NEGATIVE_FLOAT 7
82 :
83 : // MARK: - Decoding -
84 :
85 54884 : int pk_decode_bind_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval) {
86 : // default decode callback used to bind values to a sqlite3_stmt vm
87 :
88 54884 : sqlite3_stmt *vm = (sqlite3_stmt *)xdata;
89 54884 : int rc = SQLITE_OK;
90 54884 : switch (type) {
91 : case SQLITE_INTEGER:
92 18008 : rc = sqlite3_bind_int64(vm, index+1, ival);
93 18008 : break;
94 :
95 : case SQLITE_FLOAT:
96 1 : rc = sqlite3_bind_double(vm, index+1, dval);
97 1 : break;
98 :
99 : case SQLITE_NULL:
100 306 : rc = sqlite3_bind_null(vm, index+1);
101 306 : break;
102 :
103 : case SQLITE_TEXT:
104 28202 : rc = sqlite3_bind_text(vm, index+1, pval, (int)ival, SQLITE_STATIC);
105 28202 : break;
106 :
107 : case SQLITE_BLOB:
108 8367 : rc = sqlite3_bind_blob64(vm, index+1, (const void *)pval, (sqlite3_uint64)ival, SQLITE_STATIC);
109 8367 : break;
110 : }
111 :
112 54884 : return rc;
113 : }
114 :
115 8 : int pk_decode_print_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval) {
116 8 : switch (type) {
117 : case SQLITE_INTEGER:
118 4 : printf("%d\tINTEGER:\t%lld\n", index, (long long)ival);
119 4 : break;
120 :
121 : case SQLITE_FLOAT:
122 1 : printf("%d\tFLOAT:\t%.5f\n", index, dval);
123 1 : break;
124 :
125 : case SQLITE_NULL:
126 1 : printf("%d\tNULL\n", index);
127 1 : break;
128 :
129 : case SQLITE_TEXT:
130 1 : printf("%d\tTEXT:\t%s\n", index, pval);
131 1 : break;
132 :
133 : case SQLITE_BLOB:
134 1 : printf("%d\tBLOB:\t%lld bytes\n", index, (long long)ival);
135 1 : break;
136 : }
137 :
138 8 : return SQLITE_OK;
139 : }
140 :
141 729367 : uint8_t pk_decode_u8 (char *buffer, size_t *bseek) {
142 729367 : uint8_t value = buffer[*bseek];
143 729367 : *bseek += 1;
144 729367 : return value;
145 : }
146 :
147 701958 : int64_t pk_decode_int64 (char *buffer, size_t *bseek, size_t nbytes) {
148 701958 : int64_t value = 0;
149 :
150 : // decode bytes in big-endian order (most significant byte first)
151 3957737 : for (size_t i = 0; i < nbytes; i++) {
152 3255779 : value = (value << 8) | (uint8_t)buffer[*bseek];
153 3255779 : (*bseek)++;
154 3255779 : }
155 :
156 701958 : return value;
157 : }
158 :
159 363662 : char *pk_decode_data (char *buffer, size_t *bseek, int32_t blen) {
160 363662 : char *value = buffer + *bseek;
161 363662 : *bseek += blen;
162 :
163 363662 : return value;
164 : }
165 :
166 160512 : double pk_decode_double (char *buffer, size_t *bseek) {
167 160512 : double value = 0;
168 160512 : int64_t int64value = pk_decode_int64(buffer, bseek, sizeof(int64_t));
169 160512 : memcpy(&value, &int64value, sizeof(int64_t));
170 :
171 160512 : return value;
172 : }
173 :
174 31282 : int pk_decode(char *buffer, size_t blen, int count, size_t *seek, int (*cb) (void *xdata, int index, int type, int64_t ival, double dval, char *pval), void *xdata) {
175 31282 : size_t bseek = (seek) ? *seek : 0;
176 31282 : if (count == -1) count = pk_decode_u8(buffer, &bseek);
177 :
178 733550 : for (size_t i = 0; i < (size_t)count; i++) {
179 702268 : uint8_t type_byte = (uint8_t)pk_decode_u8(buffer, &bseek);
180 702268 : int type = (int)(type_byte & 0x07);
181 702268 : size_t nbytes = (type_byte >> 3) & 0x1F;
182 :
183 702268 : switch (type) {
184 : case SQLITE_MAX_NEGATIVE_INTEGER: {
185 2 : int64_t value = INT64_MIN;
186 2 : type = SQLITE_INTEGER;
187 2 : if (cb) if (cb(xdata, (int)i, type, value, 0.0, NULL) != SQLITE_OK) return -1;
188 : }
189 2 : break;
190 :
191 : case SQLITE_NEGATIVE_INTEGER:
192 : case SQLITE_INTEGER: {
193 177784 : int64_t value = pk_decode_int64(buffer, &bseek, nbytes);
194 177784 : if (type == SQLITE_NEGATIVE_INTEGER) {value = -value; type = SQLITE_INTEGER;}
195 177784 : if (cb) if (cb(xdata, (int)i, type, value, 0.0, NULL) != SQLITE_OK) return -1;
196 : }
197 177784 : break;
198 :
199 : case SQLITE_NEGATIVE_FLOAT:
200 : case SQLITE_FLOAT: {
201 160512 : double value = pk_decode_double(buffer, &bseek);
202 160512 : if (type == SQLITE_NEGATIVE_FLOAT) {value = -value; type = SQLITE_FLOAT;}
203 160512 : if (cb) if (cb(xdata, (int)i, type, 0, value, NULL) != SQLITE_OK) return -1;
204 : }
205 160512 : break;
206 :
207 : case SQLITE_TEXT:
208 : case SQLITE_BLOB: {
209 363662 : int64_t length = pk_decode_int64(buffer, &bseek, nbytes);
210 363662 : char *value = pk_decode_data(buffer, &bseek, (int32_t)length);
211 363662 : if (cb) if (cb(xdata, (int)i, type, length, 0.0, value) != SQLITE_OK) return -1;
212 : }
213 363662 : break;
214 :
215 : case SQLITE_NULL: {
216 308 : if (cb) if (cb(xdata, (int)i, type, 0, 0.0, NULL) != SQLITE_OK) return -1;
217 : }
218 308 : break;
219 : }
220 702268 : }
221 :
222 31282 : if (seek) *seek = bseek;
223 31282 : return count;
224 31282 : }
225 :
226 27099 : int pk_decode_prikey (char *buffer, size_t blen, int (*cb) (void *xdata, int index, int type, int64_t ival, double dval, char *pval), void *xdata) {
227 27099 : size_t bseek = 0;
228 27099 : uint8_t count = pk_decode_u8(buffer, &bseek);
229 27099 : return pk_decode(buffer, blen, count, &bseek, cb, xdata);
230 : }
231 :
232 : // MARK: - Encoding -
233 :
234 1039510 : size_t pk_encode_nbytes_needed (int64_t value) {
235 1039510 : if (value <= 0x7F) return 1; // 7 bits
236 944086 : if (value <= 0x7FFF) return 2; // 15 bits
237 319602 : if (value <= 0x7FFFFF) return 3; // 23 bits
238 319596 : if (value <= 0x7FFFFFFF) return 4; // 31 bits
239 319596 : if (value <= 0x7FFFFFFFFF) return 5; // 39 bits
240 319596 : if (value <= 0x7FFFFFFFFFFF) return 6; // 47 bits
241 319584 : if (value <= 0x7FFFFFFFFFFFFF) return 7; // 55 bits
242 318370 : return 8; // Larger than 7-byte range, needs 8 bytes
243 1039510 : }
244 :
245 14862 : size_t pk_encode_size (sqlite3_value **argv, int argc, int reserved) {
246 : // estimate the required buffer size
247 14862 : size_t required = reserved;
248 : size_t nbytes;
249 : int64_t val, len;
250 :
251 695445 : for (int i = 0; i < argc; i++) {
252 680583 : switch (sqlite3_value_type(argv[i])) {
253 : case SQLITE_INTEGER:
254 178084 : val = sqlite3_value_int64(argv[i]);
255 178084 : if (val == INT64_MIN) {
256 1 : required += 1;
257 1 : break;
258 : }
259 178083 : if (val < 0) val = -val;
260 178083 : nbytes = pk_encode_nbytes_needed(val);
261 178083 : required += 1 + nbytes;
262 178083 : break;
263 : case SQLITE_FLOAT:
264 160511 : required += 1 + sizeof(int64_t);
265 160511 : break;
266 : case SQLITE_TEXT:
267 : case SQLITE_BLOB:
268 341672 : len = (int32_t)sqlite3_value_bytes(argv[i]);
269 341672 : nbytes = pk_encode_nbytes_needed(len);
270 341672 : required += 1 + len + nbytes;
271 341672 : break;
272 : case SQLITE_NULL:
273 316 : required += 1;
274 316 : break;
275 : }
276 680583 : }
277 :
278 14862 : return required;
279 : }
280 :
281 691190 : size_t pk_encode_u8 (char *buffer, size_t bseek, uint8_t value) {
282 691190 : buffer[bseek++] = value;
283 691190 : return bseek;
284 : }
285 :
286 680266 : size_t pk_encode_int64 (char *buffer, size_t bseek, int64_t value, size_t nbytes) {
287 3914324 : for (size_t i = 0; i < nbytes; i++) {
288 3234058 : buffer[bseek++] = (uint8_t)((value >> (8 * (nbytes - 1 - i))) & 0xFF);
289 3234058 : }
290 680266 : return bseek;
291 : }
292 :
293 341672 : size_t pk_encode_data (char *buffer, size_t bseek, char *data, size_t datalen) {
294 341672 : memcpy(buffer + bseek, data, datalen);
295 341672 : return bseek + datalen;
296 : }
297 :
298 14862 : char *pk_encode (sqlite3_value **argv, int argc, char *b, bool is_prikey, size_t *bsize) {
299 14862 : size_t bseek = 0;
300 14862 : size_t blen = 0;
301 14862 : char *buffer = b;
302 :
303 : // in primary-key encoding the number of items must be explicitly added to the encoded buffer
304 14862 : if (is_prikey) {
305 : // 1 is the number of items in the serialization (always 1 byte so max 255 primary keys, even if there is an hard SQLite limit of 128)
306 10607 : blen = pk_encode_size(argv, argc, 1);
307 10607 : size_t blen_curr = *bsize;
308 10607 : buffer = (blen > blen_curr || b == NULL) ? cloudsync_memory_alloc((sqlite3_uint64)blen) : b;
309 10607 : if (!buffer) return NULL;
310 :
311 : // the first u8 value is the total number of items in the primary key(s)
312 10607 : bseek = pk_encode_u8(buffer, 0, argc);
313 10607 : }
314 :
315 695445 : for (int i = 0; i < argc; i++) {
316 680583 : int type = sqlite3_value_type(argv[i]);
317 680583 : switch (type) {
318 : case SQLITE_INTEGER: {
319 178084 : int64_t value = sqlite3_value_int64(argv[i]);
320 178084 : if (value == INT64_MIN) {
321 1 : bseek = pk_encode_u8(buffer, bseek, SQLITE_MAX_NEGATIVE_INTEGER);
322 1 : break;
323 : }
324 178083 : if (value < 0) {value = -value; type = SQLITE_NEGATIVE_INTEGER;}
325 178083 : size_t nbytes = pk_encode_nbytes_needed(value);
326 178083 : uint8_t type_byte = (nbytes << 3) | type;
327 178083 : bseek = pk_encode_u8(buffer, bseek, type_byte);
328 178083 : bseek = pk_encode_int64(buffer, bseek, value, nbytes);
329 : }
330 178083 : break;
331 : case SQLITE_FLOAT: {
332 160511 : double value = sqlite3_value_double(argv[i]);
333 160511 : if (value < 0) {value = -value; type = SQLITE_NEGATIVE_FLOAT;}
334 : int64_t net_double;
335 160511 : memcpy(&net_double, &value, sizeof(int64_t));
336 160511 : bseek = pk_encode_u8(buffer, bseek, type);
337 160511 : bseek = pk_encode_int64(buffer, bseek, net_double, sizeof(int64_t));
338 : }
339 160511 : break;
340 : case SQLITE_TEXT:
341 : case SQLITE_BLOB: {
342 341672 : int32_t len = (int32_t)sqlite3_value_bytes(argv[i]);
343 341672 : size_t nbytes = pk_encode_nbytes_needed(len);
344 341672 : uint8_t type_byte = (nbytes << 3) | sqlite3_value_type(argv[i]);
345 341672 : bseek = pk_encode_u8(buffer, bseek, type_byte);
346 341672 : bseek = pk_encode_int64(buffer, bseek, len, nbytes);
347 341672 : bseek = pk_encode_data(buffer, bseek, (char *)sqlite3_value_blob(argv[i]), len);
348 : }
349 341672 : break;
350 : case SQLITE_NULL: {
351 316 : bseek = pk_encode_u8(buffer, bseek, SQLITE_NULL);
352 : }
353 316 : break;
354 : }
355 680583 : }
356 :
357 14862 : if (bsize) *bsize = blen;
358 14862 : return buffer;
359 14862 : }
360 :
361 10607 : char *pk_encode_prikey (sqlite3_value **argv, int argc, char *b, size_t *bsize) {
362 10607 : return pk_encode(argv, argc, b, true, bsize);
363 : }
|