btree.c

00001 /*
00002  * issues
00003  * ======
00004  * (A) how does all of this compare to mmap
00005  * (B) the ser_key and ser_value stuff isn't implemented consistently. Either
00006  *     it's an important optimization and then the dirty bits should be set 
00007  *     correctly to prevent calling serialization functions unnecessarily
00008  *     often, or it doesn't make much of a difference and should be dropped
00009  *     altogether. 
00010  */
00011 #include <stdio.h>
00012 #include <stdlib.h>
00013 #include <string.h>
00014 #include <dlfcn.h>
00015 #include <errno.h>
00016 
00017 #include "btree.h"
00018 
00019 #include "vector.h"
00020 
00021 #define __BTREE_DEBUG
00022 
00023 #ifdef __BTREE_DEBUG
00024 #define printfdbg printf
00025 #define dump_node_dbg dump_node
00026 #else
00027 #define printfdbg(...) 
00028 #define dump_node_dbg(...)
00029 #endif /* __BTREE_DEBUG */
00030 
00031 
00032 #define HEADER_SIZE(tree) (5 * (tree)->order * sizeof(unsigned long))
00033 
00034 #define BTREE_NODE_DIRTY_HEADER      1
00035 #define BTREE_NODE_DIRTY_VALUE       2
00036 #define BTREE_NODE_DIRTY_KEY         4
00037 #define BTREE_NODE_DIRTY_KEYS        8
00038 #define BTREE_NODE_DIRTY_CHILD      16
00039 #define BTREE_NODE_DIRTY_PARENT     32
00040 #define BTREE_NODE_USE_VALUE_ADDR   64
00041 
00042 /* max. nodes to keep in memory */
00043 static int BTREE_NODE_CACHE_SIZE = 10;
00044 
00045 /****************************************************************************
00046  *                                                                          *
00047  *                       conveniences and utilities                         *
00048  *                                                                          *
00049  ****************************************************************************/
00050 
00051 #define CP_BYTE_ORDER_LITTLE_ENDIAN 1
00052 
00053 /* convert bytes to a int value */
00054 unsigned int mem2int(void *addr)
00055 {
00056     unsigned int res = 0;
00057 
00058 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00059     int i;
00060     unsigned char *p = addr;
00061     res = p[0];
00062     for (i = 1; i < sizeof(unsigned int); i++)
00063     {
00064         res <<= 8;
00065         res += p[i];
00066     }
00067 #else
00068     res = *(unsigned int *) addr;
00069 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00070 
00071     return res;
00072 }
00073 
00074 /* convert an int value to big endian bytes */
00075 void int2mem(unsigned int value, void *addr)
00076 {
00077 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00078     int i;
00079     unsigned char *p = addr;
00080     for (i = sizeof(unsigned int); i > 0; i--)
00081     {
00082         p[i - 1] = value & 0xFF;
00083         value >>= 8;
00084     }
00085 #else
00086     unsigned int *p = addr;
00087     *p = value;
00088 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00089 }
00090 
00091 /* convert bytes to a size_t value */
00092 size_t mem2size(void *addr)
00093 {
00094     size_t res = 0;
00095 
00096 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00097     int i;
00098     char *p = addr;
00099     res = p[0];
00100     for (i = 1; i < sizeof(size_t); i++)
00101     {
00102         res <<= 8;
00103         res += p[i];
00104     }
00105 #else
00106     res = *(size_t *) addr;
00107 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00108 
00109     return res;
00110 }
00111 
00112 /* convert an size_t value to big endian bytes */
00113 void size2mem(size_t value, void *addr)
00114 {
00115 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00116     int i;
00117     char *p = addr;
00118     for (i = sizeof(size_t); i > 0; i--)
00119     {
00120         p[i - 1] = value & 0xFF;
00121         value >>= 8;
00122     }
00123 #else
00124     size_t *p = addr;
00125     *p = value;
00126 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00127 }
00128 
00129 /* convert bytes to a long value */
00130 unsigned long mem2long(void *addr)
00131 {
00132     unsigned long res = 0;
00133 
00134 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00135     int i;
00136     unsigned char *p = addr;
00137     res = p[0];
00138     for (i = 1; i < sizeof(unsigned long); i++)
00139     {
00140         res <<= 8;
00141         res += p[i];
00142     }
00143 #else
00144     res = *(unsigned long *) addr;
00145 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00146 
00147     return res;
00148 }
00149 
00150 /* convert a long value to big endian bytes */
00151 void long2mem(unsigned long value, void *addr)
00152 {
00153 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00154     int i;
00155     unsigned char *p = addr;
00156     for (i = sizeof(unsigned long); i > 0; i--)
00157     {
00158         p[i - 1] = value & 0xFF;
00159         value >>= 8;
00160     }
00161 #else
00162     unsigned long *p = addr;
00163     *p = value;
00164 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00165 }
00166 
00167 /* read a platform independent 4 byte integer from a file. returns non-zero on 
00168  * error.
00169  */
00170 int cp_fread_int(FILE *fp, unsigned int *i)
00171 {
00172     unsigned char buf[4];
00173     size_t read;
00174 
00175     read = fread(buf, 4, 1, fp);
00176     if (read == 0) return -1;
00177 
00178 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00179     *i = buf[3] | (buf[2] << 8) | (buf[1] << 16) | (buf[0] << 24);
00180 #else
00181     *i = *(unsigned int *) buf;
00182 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00183 
00184     return 0;
00185 }
00186 
00187 /* read a platform independent size_t object from a file. returns non-zero on 
00188  * error.
00189  */
00190 int cp_fread_size(FILE *fp, size_t *i)
00191 {
00192     unsigned char buf[sizeof(size_t)];
00193     size_t read;
00194 
00195     read = fread(buf, sizeof(size_t), 1, fp);
00196     if (read == 0) return -1;
00197 
00198 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00199     {
00200         int j;
00201         int roll = 8;
00202         *i = buf[sizeof(size_t) - 1];
00203         for (j = sizeof(size_t) - 2; j >= 0; j--)
00204         {
00205             *i |= buf[j] << roll;
00206             roll += 8;
00207         }
00208     }
00209 #else
00210     *i = *(size_t *) buf;
00211 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00212 
00213     return 0;
00214 }
00215 
00216 /* read a platform independent long integer from a file. returns non-zero on 
00217  * error.
00218  */
00219 int cp_fread_long(FILE *fp, unsigned long *i)
00220 {
00221     unsigned char buf[sizeof(long)];
00222     size_t read;
00223 
00224     read = fread(buf, sizeof(long), 1, fp);
00225     if (read == 0) return -1;
00226 
00227 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00228     {
00229         int j;
00230         int roll = 8;
00231         *i = buf[sizeof(long) - 1];
00232         for (j = sizeof(long) - 2; j >= 0; j--)
00233         {
00234             *i |= buf[j] << roll;
00235             roll += 8;
00236         }
00237     }
00238 #else
00239     *i = *(unsigned long *) buf;
00240 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00241 
00242     return 0;
00243 }
00244 
00245 /* write a platform independent 4 byte integer to a file. returns non-zero on 
00246  * error.
00247  */
00248 int cp_fwrite_int(FILE *fp, unsigned int i)
00249 {
00250     size_t write;
00251     unsigned char *buf;
00252 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00253     unsigned char swap[4];
00254     swap[3] = i & 255;
00255     swap[2] = (i >> 8) & 255;
00256     swap[1] = (i >> 16) & 255;
00257     swap[0] = (i >> 24) & 255;
00258     buf = swap;
00259 #else
00260     buf = (char *) &i;
00261 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00262     write = fwrite(buf, 4, 1, fp);
00263     return write != 1;
00264 }
00265 
00266 /* write a platform independent long integer to a file. returns non-zero on 
00267  * error.
00268  */
00269 int cp_fwrite_long(FILE *fp, unsigned long i)
00270 {
00271     size_t write;
00272     unsigned char *buf;
00273 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00274     unsigned char swap[sizeof(long)];
00275     int j;
00276     int roll = 0;
00277     for (j = sizeof(long) - 1; j >= 0; j--)
00278     {
00279         swap[j] = (i >> roll) & 255;
00280         roll += 8;
00281     }
00282     buf = swap;
00283 #else
00284     buf = (char *) &i;
00285 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00286     write = fwrite(buf, sizeof(long), 1, fp);
00287     return write != 1;
00288 }
00289 
00290 int cp_fwrite_int_array(FILE *fp, unsigned int *i, int count)
00291 {
00292     int rc;
00293 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00294     int k;
00295     char *tmp = (char *) malloc(sizeof(unsigned int) * count);
00296     if (tmp == NULL) return -1;
00297     for (k = 0; k < count; k++)
00298         int2mem(i[k], &tmp[k * sizeof(unsigned int)]);
00299     rc = fwrite(tmp, sizeof(unsigned int), count, fp);
00300     free(tmp);
00301 #else
00302     rc = fwrite(i, sizeof(unsigned int), count, fp);
00303 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00304     return rc;
00305 }
00306 
00307 int cp_fread_long_array(FILE *fp, unsigned long *i, int count)
00308 {
00309     int rc;
00310 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00311     int k;
00312     char *tmp = (char *) malloc(sizeof(unsigned long) * count);
00313     if (tmp == NULL) return -1;
00314     rc = fread(tmp, sizeof(unsigned long), count, fp);
00315     for (k = 0; k < rc; k++)
00316         i[k] = mem2long(&tmp[k * sizeof(unsigned long)]);
00317     free(tmp);
00318 #else
00319     rc = fread(i, sizeof(unsigned long), count, fp);
00320 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00321     return rc;
00322 
00323 }
00324 
00325 int cp_fwrite_long_array(FILE *fp, unsigned long *i, int count)
00326 {
00327     int rc;
00328 #ifdef CP_BYTE_ORDER_LITTLE_ENDIAN
00329     int k;
00330     char *tmp = (char *) malloc(sizeof(unsigned long) * count);
00331     if (tmp == NULL) return -1;
00332     for (k = 0; k < count; k++)
00333         long2mem(i[k], &tmp[k * sizeof(unsigned long)]);
00334     rc = fwrite(tmp, sizeof(unsigned long), count, fp);
00335     free(tmp);
00336 #else
00337     rc = fwrite(i, sizeof(unsigned long), count, fp);
00338 #endif /* CP_BYTE_ORDER_LITTLE_ENDIAN */
00339     return rc;
00340 }
00341 
00342 /****************************************************************************
00343  *                                                                          *
00344  *                       btree function implementations                     *
00345  *                                                                          *
00346  ****************************************************************************/
00347 
00348 #define FREE_BLOCK_HEADER_POSITION (sizeof(unsigned long) + sizeof(unsigned) + sizeof(unsigned))
00349     
00350 void *mem_block_desc_size(void *p)
00351 {
00352     mem_block_desc *desc = (mem_block_desc *) p;
00353     return &desc->size;
00354 }
00355 
00356 void *mem_block_desc_offset(void *p)
00357 {
00358     mem_block_desc *desc = (mem_block_desc *) p;
00359     return &desc->offset;
00360 }
00361 
00362 void DIE()
00363 {
00364     fflush(stdout);
00365     fflush(stderr);
00366     char *p = NULL;
00367     *p = 0;
00368 }
00369 
00370 #define FREE_BLOCK_HEADER_LEN (2 * sizeof(unsigned long))
00371 
00372 static int block_desc_total; //~~ 018
00373 static int chksum; //~~ 018
00374 
00375 void dump_mem_block_desc(void *entry)
00376 {
00377     mem_block_desc *m = entry;
00378     printf("[%p] %lX, %ld, %lX, %lX\n", entry, m->offset, m->size, m->prev, m->next);
00379     block_desc_total++; //~~ 018
00380     chksum += ((int) entry);
00381     if (m->offset == m->prev || m->offset == m->next)
00382     {
00383         printfdbg("free block descriptor {[%p] %lX, %ld, %lX, %lX} exhibits circularity\n", entry, m->offset, m->size, m->prev, m->next);
00384         DIE();
00385     }
00386 }
00387 
00388 
00389 int dump_mem_block_desc_entry(void *entry, void *multiple)
00390 {
00391     cp_index_map_node *node = entry;
00392 
00393     if (multiple)
00394     {
00395         int i;
00396         cp_vector *v = node->entry;
00397         for (i = 0; i < cp_vector_size(v); i++)
00398             dump_mem_block_desc(cp_vector_element_at(v, i));
00399     }
00400     else
00401         dump_mem_block_desc(node->entry);
00402 
00403     return 0;
00404 }
00405 
00406 unsigned long hash_block_desc(void *p)
00407 {
00408     mem_block_desc *m = (mem_block_desc *) p;
00409     return m->offset;
00410 }
00411 
00412 unsigned long compare_block_desc_offset(void *p, void *q)
00413 {
00414     mem_block_desc *m = (mem_block_desc *) p;
00415     mem_block_desc *n = (mem_block_desc *) q;
00416 
00417     return m->offset - n->offset;
00418 }
00419 
00420 cp_hashtable *blocks;
00421 
00422 int load_blocks(void *entry, void *prm)
00423 {
00424     cp_index_map_node *node = entry;
00425     mem_block_desc *block = node->entry;
00426     if (cp_hashtable_get(blocks, node->entry))
00427     {
00428         printfdbg("strangely, this entry already exists\n");
00429         DIE();
00430     }
00431     printfdbg("verify: loading block %lx, %d\n", block->offset, block->size);
00432     cp_hashtable_put(blocks, node->entry, node->entry);
00433     return 0;
00434 }
00435 
00436 void verify_free_blocks(cp_btree_file *file)
00437 {
00438     int i, rc, count;
00439     mem_block_desc curr;
00440     mem_block_desc *mem;
00441 
00442     blocks = 
00443         cp_hashtable_create_by_option(COLLECTION_MODE_NOSYNC, 
00444                                       10,
00445                                       (cp_hashfunction) hash_block_desc, 
00446                                       (cp_compare_fn) compare_block_desc_offset,
00447                                       NULL, NULL, NULL, NULL);
00448 
00449     cp_multimap_callback(file->free_block, load_blocks, NULL);
00450 
00451     fseek(file->fp, FREE_BLOCK_HEADER_POSITION, SEEK_SET);
00452     if ((rc = cp_fread_long_array(file->fp, (unsigned long *) &curr, 2)) != 2) 
00453     {
00454         printfdbg("error reading free block header\n");
00455         DIE();
00456     }
00457 
00458     while (curr.offset)
00459     {
00460         mem = cp_hashtable_remove(blocks, &curr);
00461         if (mem == NULL)
00462         {
00463             printfdbg("disk block %lx, %d lost\n", curr.offset, curr.size);
00464             DIE();
00465         }
00466         printf("verify: matched block %lx, %d\n", curr.offset, curr.size);
00467 
00468         if ((rc = fseek(file->fp, curr.offset, SEEK_SET))) 
00469         {
00470             printfdbg("error reading free block link\n");
00471             DIE();
00472         }
00473 
00474         if ((rc = cp_fread_long_array(file->fp, (unsigned long *) &curr, 2)) != 2)
00475         {
00476             printfdbg("error reading free block link\n");
00477             DIE();
00478         }
00479     } 
00480 
00481     if ((count = cp_hashtable_count(blocks)) > 0)
00482     {
00483         void **keys;
00484         
00485         printfdbg("lost free block entries in memory, dumping\n");
00486         keys = cp_hashtable_get_keys(blocks);
00487         for (i = 0; i < count; i++)
00488         {
00489             mem = cp_hashtable_get(blocks, keys[i]);
00490             dump_mem_block_desc(mem);
00491         }
00492 
00493         free(keys);
00494         DIE();
00495     }
00496 
00497     cp_hashtable_destroy(blocks);
00498 }
00499 
00500 void dump_free_block(cp_btree_file *file, char *msg)
00501 {
00502     cp_index *index = file->free_block_size_index;
00503     void *multiple = (void *) 1;
00504     int size_index, offset_index; //~~ 018
00505     int size_chksum, offset_chksum;
00506 
00507     printf("dump_free_block %s\n---------------\n", 
00508             msg ? msg : "");
00509 
00510 block_desc_total = 0;
00511 chksum = 0;
00512     cp_multimap_callback_by_index(file->free_block, 
00513                                   index, 
00514                                   dump_mem_block_desc_entry, 
00515                                   multiple); 
00516 size_index = block_desc_total;
00517 size_chksum = chksum;
00518 block_desc_total = 0;
00519 chksum = 0;
00520     printf("dump_free_block [offset index] %s\n---------------\n", 
00521             msg ? msg : "");
00522 cp_multimap_callback(file->free_block, dump_mem_block_desc_entry, NULL);
00523 offset_index = block_desc_total;
00524 offset_chksum = chksum;
00525 if (offset_index != size_index)
00526 {
00527     printf("error. error.\n");
00528     printf("size index has %d items\n", size_index);
00529     printf("offset index has %d items\n", offset_index);
00530     printf("\nerror. error.\n\ngoodbye.\n");
00531     DIE();
00532 }
00533 if (size_chksum != offset_chksum)
00534 {
00535     printf("error. error.\n");
00536     printf("size chksum:    0x%x\n", size_chksum);
00537     printf("offset chksum:  0x%x\n", offset_chksum);
00538     printf("\nerror. error.\n\ngoodbye.\n");
00539     DIE();
00540 }
00541 
00542 }
00543 
00544 void vdump_free_block(cp_btree_file *file, char *msg)
00545 {
00546     dump_free_block(file, msg);
00547 //  verify_free_blocks(file);
00548 }
00549 
00550 int merge_free_block(cp_btree_file *file, 
00551                      mem_block_desc *curr, 
00552                      mem_block_desc *mod)
00553 {
00554     unsigned long prev;
00555     mem_block_desc t;
00556     int rc;
00557 
00558 #ifdef DEBUG
00559     if (curr->offset == mod->offset)
00560     {
00561         DEBUGMSG("merge_free_block: offsets are the same merging %lX, %ld, %lX, %lX and %lX, %ld, %lX, %lX", curr->offset, curr->size, curr->prev, curr->next, mod->offset, mod->size, mod->prev, mod->next);
00562         return 1;
00563     }
00564 #endif
00565 printfdbg(" [MFB] curr: %lx, %d, %lx, %lx\n", curr->offset, curr->size, curr->prev, curr->next);
00566 printfdbg(" [MFB]  mod: %lx, %d, %lx, %lx\n", mod->offset, mod->size, mod->prev, mod->next);
00567 
00568     /* copy current free block pointer to new position */
00569     if (curr->next)
00570     {
00571         mem_block_desc *mnext;
00572         t.offset = curr->next;
00573         mnext = cp_multimap_get(file->free_block, &t);
00574         mnext->prev = mod->offset;
00575 
00576         memcpy(&t, mnext, sizeof(block_desc));
00577     }
00578     else
00579         memset(&t, 0, sizeof(block_desc));
00580     if ((rc = fseek(file->fp, mod->offset, SEEK_SET)))
00581     {
00582 #ifdef DEBUG
00583             cp_perror(CP_IO_ERROR, errno, 
00584                       "can\'t write adjusted free block information");
00585 #endif
00586             return -1;
00587     }
00588 printfdbg("%s:%d writing at %lx\n", __FILE__, __LINE__, mod->offset);
00589     if (cp_fwrite_long_array(file->fp, (unsigned long *) &t, 2) != 2)
00590     {
00591 #ifdef DEBUG
00592         cp_perror(CP_IO_ERROR, errno, 
00593                  "can\'t write adjusted free block information");
00594 #endif
00595             return -1;
00596     }
00597 
00598     t.offset = mod->offset;
00599     t.size = mod->size + curr->size;
00600     prev = curr->prev ? curr->prev : FREE_BLOCK_HEADER_POSITION;
00601 
00602     if ((rc = fseek(file->fp, prev, SEEK_SET)))
00603     {
00604 #ifdef DEBUG
00605             cp_perror(CP_IO_ERROR, errno, 
00606                       "can\'t write adjusted free block information");
00607 #endif
00608             return -1;
00609     }
00610 printfdbg("%s:%d writing at %lx\n", __FILE__, __LINE__, prev);
00611     if (cp_fwrite_long_array(file->fp, (unsigned long *) &t, 2) != 2)
00612     {
00613 #ifdef DEBUG
00614         cp_perror(CP_IO_ERROR, errno, 
00615                  "can\'t write adjusted free block information");
00616 #endif
00617             return -1;
00618     }
00619 
00620     return 0;
00621 }
00622 
00623 static int CHUNK_SIZE = 0;
00624 static void init_chunk_size()
00625 {
00626     int min = sizeof(block_desc);
00627     CHUNK_SIZE = 1;
00628     while (CHUNK_SIZE < min) CHUNK_SIZE <<= 1;
00629 printf("set chunk size to %d\n", CHUNK_SIZE);
00630 }
00631 
00632 static inline int round_up(int size)
00633 {
00634     return size + ((CHUNK_SIZE - (size % CHUNK_SIZE)) % CHUNK_SIZE);
00635 }
00636 
00637 int add_free_block(cp_btree_file *file, unsigned long addr, unsigned long plen)
00638 {
00639     int rc;
00640 //  cp_index_map_node *tmap;
00641     mem_block_desc *prev, *next;
00642     short unify_left, unify_right;
00643     mem_block_desc *desc;
00644     unsigned long len = round_up(plen);
00645 unsigned long writepos;
00646 printfdbg(" [AFB-1] \n");
00647 printfdbg("(-) fALLOC %lX %ld\n", addr, len);
00648 //printfdbg("add_free_block: offset %lX size %ld\n", addr, len);
00649 printfdbg("add_free_block: offset %lX size %ld (%ld)\n", addr, len, plen);
00650 {
00651 int error = 0;
00652 mem_block_desc t;
00653 t.offset = addr;
00654 
00655 prev = cp_multimap_find(file->free_block, &t, CP_OP_LT);
00656 if (prev && prev->offset + prev->size > addr)
00657 {
00658     printfdbg("new free block at %lX, %ld overlaps with (%lX, %ld, %lX, %lX)\n", addr, len, prev->offset, prev->size, prev->prev, prev->next);
00659     error = 1;
00660 }
00661 
00662 next = cp_multimap_find(file->free_block, &t, CP_OP_GT);
00663 if (next && addr + len > next->offset)
00664 {
00665     printfdbg("new free block at %lX, %ld overlaps with (%lX, %ld, %lX, %lX)\n", addr, len, next->offset, next->size, next->prev, next->next);
00666     error = 1;
00667 }
00668 
00669 if (error) DIE();
00670 }
00671     if (len < FREE_BLOCK_HEADER_LEN) return 0; //~~
00672 dump_free_block(file, " +++ add_free_block +++ ");
00673     desc = (mem_block_desc *) calloc(1, sizeof(mem_block_desc));
00674     if (desc == NULL) 
00675     {
00676 #ifdef DEBUG
00677         cp_error(CP_MEMORY_ALLOCATION_FAILURE, "can\'t allocate free block "
00678                  "descriptor");
00679 #endif
00680         return -1;
00681     }
00682     
00683 if (len > file->eof)
00684 {
00685     printfdbg("attempting to add invalid free block size %ld\n", len);
00686     DIE();
00687 }
00688 if (addr > file->eof)
00689 {
00690     printfdbg("attempting to add invalid free block offset %lX\n", addr);
00691     DIE();
00692 }
00693 
00694     desc->offset = addr;
00695     desc->size = len;
00696 //    desc->next = 0;
00697 //    desc->prev = 0;
00698 
00699     /* merge with adjacent free blocks if these exist. There are 3 possible
00700      * cases here:
00701      *        +---+---+       +------+
00702      * (I)    |   |xxx|  ==>  |      |    extend adjacent free block 
00703      *        +---+---+       +------+    on the left
00704      *
00705      *        +---+---+       +------+
00706      * (II)   |xxx|   |  ==>  |      |    extend adjacent free block 
00707      *        +---+---+       +------+    on the right
00708      *
00709      *        +---+---+---+       +---------+  
00710      * (III)  |   |xxx|   |  ==>  |         |  unify free blocks on the right
00711      *        +---+---+---+       +---------+  and left
00712      *
00713      * Luckily the problem is uni-dimensional. Otherwise it would be way more
00714      * complicated. 
00715      */
00716 //    tmap = cp_multimap_find(file->free_block, desc, CP_OP_LT);
00717 //  prev = tmap ? tmap->entry : NULL;
00718     prev = cp_multimap_find(file->free_block, desc, CP_OP_LT);
00719 //    tmap = cp_multimap_find(file->free_block, desc, CP_OP_GT);
00720 //  next = tmap ? tmap->entry : NULL;
00721     next = cp_multimap_find(file->free_block, desc, CP_OP_GT);
00722     unify_left = prev && prev->offset + prev->size == desc->offset;
00723     unify_right = next && desc->offset + desc->size == next->offset;
00724 
00725     if (unify_left && unify_right) /* case (III) */
00726     {
00727         unsigned long new_size = prev->size + desc->size + next->size;
00728 printfdbg(" [AFB-2] \n");
00729 printfdbg("add free block (2)\n");
00730         writepos = prev->prev > 0 ? prev->prev : FREE_BLOCK_HEADER_POSITION;
00731         writepos += sizeof(unsigned long);
00732 
00733         rc = fseek(file->fp, writepos, SEEK_SET);
00734         if (rc == -1)
00735         {
00736 #ifdef DEBUG
00737             cp_perror(CP_IO_ERROR, errno, 
00738                       "can\'t write adjusted free block information");
00739 #endif
00740             return -1;
00741         }
00742 printfdbg("%s:%d writing at %lx\n", __FILE__, __LINE__, writepos);
00743         cp_fwrite_long(file->fp, new_size);
00744 
00745         /* write updated offset for next block */
00746         if ((rc = fseek(file->fp, prev->offset, SEEK_SET)))
00747         {
00748 #ifdef DEBUG
00749             cp_perror(CP_IO_ERROR, errno, 
00750                       "can\'t write adjusted free block information");
00751 #endif
00752             return -1;
00753         }
00754 writepos = prev->offset;
00755         memcpy(desc, prev, sizeof(mem_block_desc));
00756         /* use desc as temp holder for re-indexing */
00757         desc->size = new_size;
00758         desc->next = next->next;
00759         /* after reindexing prev is updated to new values */
00760 printfdbg("REINDEX [1] (%lX, %ld, %lX, %lX) => (%lX, %ld, %lX, %lX)\n", (prev)->offset, (prev)->size, (prev)->prev, (prev)->next, (desc)->offset, (desc)->size, (desc)->prev, (desc)->next);
00761 dump_free_block(file, "[1]");
00762         cp_multimap_reindex(file->free_block, prev, desc); 
00763 //                          sizeof(mem_block_desc));
00764 dump_free_block(file, "---");
00765         if (next->next)
00766         {
00767             mem_block_desc t, *nnext;
00768             t.offset = next->next;
00769             nnext = cp_multimap_get(file->free_block, &t);
00770 printfdbg("%s:%d writing at %lx\n", __FILE__, __LINE__, writepos);
00771             if (cp_fwrite_long_array(file->fp, (unsigned long *) nnext, 2) < 0)
00772             {
00773 #ifdef DEBUG
00774                 cp_perror(CP_IO_ERROR, errno, 
00775                           "can\'t write adjusted free block information");
00776 #endif
00777                 return -1;
00778             }
00779             nnext->prev = desc->offset;
00780 //          nnext->prev = prev->offset;
00781         }
00782         else
00783         {
00784             block_desc fdesc;
00785 printfdbg("add free block (3)\n");
00786             memset(&fdesc, 0, sizeof(block_desc));
00787 printfdbg("%s:%d writing at %lx\n", __FILE__, __LINE__, prev->offset);
00788             if (cp_fwrite_long_array(file->fp, (unsigned long *) &fdesc, 2) < 0)
00789             {
00790 #ifdef DEBUG
00791                 cp_perror(CP_IO_ERROR, errno, 
00792                           "can\'t write adjusted free block information");
00793 #endif
00794                 return -1;
00795             }
00796         }
00797         cp_multimap_remove(file->free_block, next);
00798 //      free(prev); //~~
00799 //        free(desc);
00800         return 0;
00801     }
00802     else if (unify_left) /* case (I) */
00803     {
00804         unsigned long new_size = prev->size + desc->size;
00805         mem_block_desc empty;
00806 printfdbg(" [AFB-3] \n");
00807 printfdbg("add free block (4)\n");
00808         /* update link to next item on disk */
00809         if ((rc = fseek(file->fp, prev->offset, SEEK_SET)))
00810         {
00811 #ifdef DEBUG
00812             cp_perror(CP_IO_ERROR, errno, 
00813                       "can\'t write adjusted free block information");
00814 #endif
00815             return -1;
00816         }
00817 printfdbg("%s:%d writing at %lx\n", __FILE__, __LINE__, prev->offset);
00818         if (next == NULL)
00819         {
00820             memset(&empty, 0, sizeof(mem_block_desc));
00821             next = &empty;
00822         }
00823         if (cp_fwrite_long_array(file->fp, (unsigned long *) next, 2) <= 0)
00824         {
00825 #ifdef DEBUG
00826             cp_perror(CP_IO_ERROR, errno, 
00827                       "can\'t write adjusted free block information");
00828 #endif
00829             return -1;
00830         }
00831         writepos = prev->prev > 0 ? prev->prev : FREE_BLOCK_HEADER_POSITION;
00832         rc = fseek(file->fp, writepos, SEEK_SET);
00833         if (rc == -1)
00834         {
00835 #ifdef DEBUG
00836             cp_perror(CP_IO_ERROR, errno, 
00837                       "can\'t write adjusted free block information");
00838 #endif
00839             return -1;
00840         }
00841 
00842         /* use desc for re-indexing */
00843         memcpy(desc, prev, sizeof(mem_block_desc));
00844         desc->size = new_size;
00845 
00846 printfdbg("%s:%d writing at %lx\n", __FILE__, __LINE__, writepos);
00847         if ((rc = cp_fwrite_long_array(file->fp, (unsigned long *) desc, 2)) <= 0)
00848         {
00849 #ifdef DEBUG
00850             cp_perror(CP_IO_ERROR, errno, 
00851                       "can\'t write adjusted free block information");
00852 #endif
00853             return -1;
00854         }
00855 
00856 printfdbg("REINDEX [2] (%lX, %ld, %lX, %lX) => (%lX, %ld, %lX, %lX)\n", (prev)->offset, (prev)->size, (prev)->prev, (prev)->next, (desc)->offset, (desc)->size, (desc)->prev, (desc)->next);
00857 dump_free_block(file, "[2]");
00858         cp_multimap_reindex(file->free_block, prev, desc);
00859 //      cp_multimap_reindex(file->free_block, prev, desc, sizeof(mem_block_desc));
00860 dump_free_block(file, "---");
00861 //~~    free(prev);
00862 //      free(desc);
00863         return 0;
00864     }
00865     else if (unify_right) /* case (II) */
00866     {
00867         unsigned long new_size = desc->size + next->size;
00868 printfdbg(" [AFB-4] \n");
00869 printfdbg("add free block (5)\n");
00870 
00871         merge_free_block(file, next, desc);
00872     
00873 #if 0
00874         /* update link to next item on disk */
00875         if ((rc = fseek(file->fp, desc->offset, SEEK_SET)))
00876         {
00877 #ifdef DEBUG
00878             cp_perror(CP_IO_ERROR, errno, 
00879                       "can\'t write adjusted free block information");
00880 #endif
00881             return -1;
00882         }
00883         if (cp_fwrite_long_array(file->fp, (unsigned long *) next, 2) != 2)
00884         {
00885 #ifdef DEBUG
00886             cp_perror(CP_IO_ERROR, errno, 
00887                       "can\'t write adjusted free block information");
00888 #endif
00889             return -1;
00890         }
00891 
00892         if (next->prev > 0)
00893         {
00894             mprev = 
00895             rc = fseek(file->fp, next->prev, SEEK_SET);
00896 
00897             prev->next = desc->offset;
00898         }
00899         else
00900             rc = fseek(file->fp, FREE_BLOCK_HEADER_POSITION, SEEK_SET);
00901         if (rc == -1)
00902         {
00903 #ifdef DEBUG
00904             cp_perror(CP_IO_ERROR, errno, 
00905                       "can\'t write adjusted free block information");
00906 #endif
00907             return -1;
00908         }
00909         if (cp_fwrite_long_array(file->fp, (unsigned long *) prev, 2) != 2)
00910         {
00911 #ifdef DEBUG
00912             cp_perror(CP_IO_ERROR, errno, 
00913                       "can\'t write adjusted free block information");
00914 #endif
00915             return -1;
00916         }
00917         memcpy(desc, next, sizeof(mem_block_desc));
00918         desc->size = new_size;
00919         desc->offset = addr;
00920         cp_fwrite_long_array(file->fp, (unsigned long *) desc, 2);
00921 #endif
00922         desc->size = new_size;
00923         /* use desc for re-indexing */
00924 printfdbg("REINDEX [3] (%lX, %ld, %lX, %lX) => (%lX, %ld, %lX, %lX)\n", (next)->offset, (next)->size, (next)->prev, (next)->next, (desc)->offset, (desc)->size, (desc)->prev, (desc)->next);
00925 dump_free_block(file, "[3]");
00926         desc->prev = next->prev;
00927         desc->next = next->next;
00928         if (prev) prev->next = desc->offset;
00929         cp_multimap_reindex(file->free_block, next, desc);
00930 //      cp_multimap_reindex(file->free_block, next, desc, sizeof(mem_block_desc));
00931 dump_free_block(file, "---");
00932 //~~    free(next);
00933 //      free(desc);
00934         return 0;
00935     }
00936 printfdbg(" [AFB-5] \n");
00937 printfdbg("add free block (6)\n");
00938 
00939     /* if no unification is to be performed, the new block must be added to
00940      * the free block map in memory. The previous entry must be updated to 
00941      * point to the new block, and the new block must point to the next block
00942      * if one exists.
00943      */
00944     rc = fseek(file->fp, desc->offset, SEEK_SET);
00945     if (rc == -1)
00946     {
00947 #ifdef DEBUG
00948         cp_perror(CP_IO_ERROR, errno, 
00949                   "can\'t write adjusted free block information");
00950 #endif
00951         return -1;
00952     }
00953     if (next)
00954     {
00955         cp_fwrite_long_array(file->fp, (unsigned long *) next, 2);
00956         next->prev = desc->offset;
00957         desc->next = next->offset;
00958     }
00959     else
00960     {
00961         block_desc fdesc;
00962         memset(&fdesc, 0, sizeof(block_desc));
00963         cp_fwrite_long_array(file->fp, (unsigned long *) &fdesc, 2);
00964     }
00965 
00966     if (prev)
00967     {
00968         prev->next = desc->offset;
00969         desc->prev = prev->offset;
00970         rc = fseek(file->fp, prev->offset, SEEK_SET);
00971     }
00972     else
00973         rc = fseek(file->fp, FREE_BLOCK_HEADER_POSITION, SEEK_SET);
00974     if (rc == -1)
00975     {
00976 #ifdef DEBUG
00977         cp_perror(CP_IO_ERROR, errno, 
00978                   "can\'t write adjusted free block information");
00979 #endif
00980         return -1;
00981     }
00982     cp_fwrite_long_array(file->fp, (unsigned long *) desc, 2);
00983     
00984 printfdbg("[+] add_free_block (7) desc: %lX, %ld, %lX, %lX\n",
00985        desc->offset, desc->size, desc->next, desc->prev);
00986 if (next)
00987 {
00988 printfdbg("[+] add_free_block (7) BEFORE next: %lX, %ld, %lX, %lX\n",
00989        next->offset, next->size, next->prev, next->next);
00990 }
00991 if (prev)
00992 {
00993 printfdbg("[+] add_free_block (7) BEFORE prev: %lX, %ld, %lX, %lX\n",
00994        prev->offset, prev->size, prev->next, prev->prev);
00995 }
00996 
00997     cp_multimap_insert(file->free_block, desc, NULL);
00998 if (next)
00999 {
01000 printfdbg("[+] add_free_block (7) AFTER next: %lX, %ld, %lX, %lX\n",
01001        next->offset, next->size, next->prev, next->next);
01002 }
01003 if (prev)
01004 {
01005 printfdbg("[+] add_free_block (7) AFTER prev: %lX, %ld, %lX, %lX\n",
01006        prev->offset, prev->size, prev->next, prev->prev);
01007 }
01008 vdump_free_block(file, " --- add_free_block --- ");
01009 
01010     return 0;
01011 }
01012 
01013 /* pull out next and resize prev or pull it out altogether if necessary. 
01014  * Here's the scenario:
01015  *
01016  *  +---+---+---+
01017  *  |   |xxx|   |
01018  *  +---+---+---+
01019  *
01020  *  reallocate the block marked xxx. The blocks on the right and left are both
01021  *  free, but neither is big enough to accomodate the new allocation 
01022  *  requirement on its own. We want to update the free block state with the 
01023  *  minimal number of writes to disk.
01024  */ 
01025 int nab_free_block(cp_btree_file *file, 
01026                    mem_block_desc *prev, 
01027                    mem_block_desc *next, 
01028                    int new_len)
01029 {
01030     unsigned long size = next->offset - prev->offset + next->size;
01031     unsigned long prev_offset;
01032     block_desc updated_prev;
01033     mem_block_desc *nnext;
01034     mem_block_desc *pprev;
01035 
01036     if (next->next)
01037     {
01038         mem_block_desc t;
01039         t.offset = next->next;
01040         nnext = cp_multimap_get(file->free_block, &t);
01041     }
01042     else
01043         nnext = NULL;
01044 
01045     /* check if we can still use part of prev */
01046     if (new_len + sizeof(block_desc) >= size) /* nope */
01047     {
01048         if (nnext) 
01049         {   
01050             nnext->prev = prev->prev;
01051             updated_prev.offset = nnext->offset;
01052             updated_prev.size = new_len;
01053         }
01054         else
01055             memset(&updated_prev, 0, sizeof(block_desc));
01056     }
01057     else /* reuse remaining space on prev */
01058     {
01059         block_desc updated_next;
01060         mem_block_desc *updated_prev_entry;
01061 
01062         updated_prev.offset = prev->offset + new_len;
01063         updated_prev.size = size - new_len;
01064         if (nnext)
01065         {
01066             nnext->prev = updated_prev.offset;
01067             memcpy(&updated_next, nnext, sizeof(block_desc));
01068         }
01069         else
01070             memset(&updated_next, 0, sizeof(block_desc));
01071 
01072         if (fseek(file->fp, updated_prev.offset, SEEK_SET))
01073         {
01074 #ifdef DEBUG
01075             cp_perror(CP_IO_ERROR, errno, 
01076                 "can\'t write updated free block descriptor");
01077 #endif
01078             return -1;
01079         }
01080         if (cp_fwrite_long_array(file->fp, (unsigned long *) &updated_next, 2) != 2)
01081         {
01082 #ifdef DEBUG
01083             cp_perror(CP_IO_ERROR, errno, 
01084                 "can\'t write updated free block descriptor");
01085 #endif
01086             return -1;
01087         }
01088         updated_prev_entry = 
01089             (mem_block_desc *) malloc(sizeof(mem_block_desc));
01090         memcpy(updated_prev_entry, &updated_prev, sizeof(block_desc));
01091         updated_prev_entry->prev = prev->prev;
01092         updated_prev_entry->next = next->next;
01093 printfdbg("[+] nab_free_block: inserting %lX, %ld, %lX, %lX\n", updated_prev_entry->offset, updated_prev_entry->size, updated_prev_entry->prev, updated_prev_entry->next);
01094         cp_multimap_insert(file->free_block, updated_prev_entry, NULL);
01095     }
01096 
01097     if (prev->prev)
01098     {
01099         prev_offset = prev->prev;
01100         mem_block_desc t;
01101         t.offset = prev->prev;
01102         pprev = cp_multimap_get(file->free_block, &t);
01103         pprev->next = updated_prev.offset;
01104     }
01105     else
01106         prev_offset = FREE_BLOCK_HEADER_POSITION;
01107 
01108     if (fseek(file->fp, prev_offset, SEEK_SET))
01109     {
01110 #ifdef DEBUG
01111         cp_perror(CP_IO_ERROR, errno, 
01112             "can\'t write updated free block descriptor");
01113 #endif
01114         return -1;
01115     }
01116     if (cp_fwrite_long_array(file->fp, (unsigned long *) &updated_prev, 2) != 2)
01117     {
01118 #ifdef DEBUG
01119         cp_perror(CP_IO_ERROR, errno, 
01120             "can\'t write updated free block descriptor");
01121 #endif
01122         return -1;
01123     }
01124 
01125     cp_multimap_remove(file->free_block, prev);
01126     cp_multimap_remove(file->free_block, next);
01127 
01128     return 0;
01129 }
01130 
01131 mem_block_desc *mem_block_desc_cp(void *p);
01132 
01133 int update_free_block(cp_btree_file *file, 
01134                       mem_block_desc *old, 
01135                       block_desc *update)
01136 {
01137     int rc;
01138     mem_block_desc t;
01139     mem_block_desc *next = NULL;
01140     mem_block_desc *prev;
01141     mem_block_desc reindex;
01142     size_t written;
01143 
01144     if (old->next)
01145     {
01146         t.offset = old->next;
01147         next = cp_multimap_get(file->free_block, &t);
01148         if (next == NULL) 
01149         {
01150 #ifdef DEBUG
01151             cp_error(CP_CORRUPT_INDEX, 
01152                 "can\'t find next free block descriptor for %lX "
01153                 "expected at %lX" , old->offset, old->next);
01154             DIE(); //~~
01155 #endif
01156             return -1;
01157         }
01158         next->prev = update->offset;
01159     }
01160 
01161     if (fseek(file->fp, update->offset, SEEK_SET))
01162     {
01163 #ifdef DEBUG
01164         cp_perror(CP_IO_ERROR, errno, "can\'t locate offset %lX", 
01165                   update->offset);
01166 #endif
01167         return -1;
01168     }
01169     if (next)
01170         memcpy(&t, next, sizeof(block_desc));
01171     else
01172         memset(&t, 0, sizeof(block_desc));
01173     written = cp_fwrite_long_array(file->fp, (unsigned long *) &t, 2);
01174     if (written != 2)
01175     {
01176 #ifdef DEBUG
01177         cp_error(CP_IO_ERROR, "error writing free block descriptor");
01178 #endif
01179         return -1;
01180     }
01181 
01182     if (old->prev)
01183     {
01184         t.offset = old->prev;
01185         prev = cp_multimap_get(file->free_block, &t);
01186         if (prev == NULL)
01187         {
01188 #ifdef DEBUG
01189             cp_error(CP_CORRUPT_INDEX, 
01190                 "can\'t find previous free block descriptor for %lX "
01191                 "expected at offset %lX" , old->offset, old->prev);
01192 #endif
01193             return -1;
01194         }
01195         prev->next = update->offset;
01196         rc = fseek(file->fp, old->prev, SEEK_SET);
01197     }
01198     else
01199         rc = fseek(file->fp, FREE_BLOCK_HEADER_POSITION, SEEK_SET);
01200 
01201     if (rc == -1)
01202     {
01203 #ifdef DEBUG
01204         cp_error(CP_CORRUPT_INDEX, 
01205             "can\'t find previous free block descriptor for %lX expected at "
01206             " offset %lX", old->offset, 
01207                 old->prev ? old->prev : FREE_BLOCK_HEADER_POSITION);
01208 #endif
01209         return -1;
01210     }
01211     written = cp_fwrite_long_array(file->fp, (unsigned long *) update, 2);
01212     if (written != 2)
01213     {
01214 #ifdef DEBUG
01215         cp_error(CP_IO_ERROR, "error writing free block descriptor");
01216 #endif
01217         return -1;
01218     }
01219 
01220     reindex.offset = update->offset;
01221     reindex.size = update->size;
01222     reindex.prev = old->prev;
01223     reindex.next = old->next;
01224 printfdbg("REINDEX [4] (%lX, %ld, %lX, %lX) => (%lX, %ld, %lX, %lX)\n", (old)->offset, (old)->size, (old)->prev, (old)->next, (&reindex)->offset, (&reindex)->size, (&reindex)->prev, (&reindex)->next);
01225 dump_free_block(file, "[4]");
01226     cp_multimap_reindex(file->free_block, old, mem_block_desc_cp(&reindex));
01227 //  cp_multimap_reindex(file->free_block, old, &reindex, sizeof(mem_block_desc));
01228 //~~ free(old);
01229 vdump_free_block(file, "---");
01230 
01231     return rc;
01232 }
01233 
01234 int drop_free_block(cp_btree_file *file, mem_block_desc *block)
01235 {
01236     int rc = 0;
01237     block_desc prev_update;
01238     mem_block_desc *next, *prev;
01239     mem_block_desc t;
01240     int written;
01241 printfdbg("(+) fALLOC %lX %ld\n", block->offset, block->size);
01242 printfdbg("drop free block %lX, %ld, %lX, %lX\n", block->offset, block->size, block->prev, block->next);
01243     if (block->next)
01244     {
01245         t.offset = block->next;
01246         next = cp_multimap_get(file->free_block, &t);
01247         if (next == NULL) 
01248         {
01249 #ifdef DEBUG
01250             cp_error(CP_CORRUPT_INDEX, 
01251                 "can\'t find next free block descriptor for %lX "
01252                 "expected at %lX" , block->offset, block->next);
01253 #endif
01254             return -1;
01255         }
01256         next->prev = block->prev;
01257         memcpy(&prev_update, next, sizeof(block_desc));
01258     }
01259     else
01260         memset(&prev_update, 0, sizeof(block_desc));
01261 
01262     if (block->prev)
01263     {
01264         t.offset = block->prev;
01265         prev = cp_multimap_get(file->free_block, &t);
01266         if (prev == NULL) 
01267         {
01268 #ifdef DEBUG
01269             cp_error(CP_CORRUPT_INDEX, 
01270                 "can\'t find previous free block descriptor for %lX "
01271                 "expected at %lX" , block->offset, block->prev);
01272 #endif
01273             return -1;
01274         }
01275         prev->next = block->next;
01276 
01277         rc = fseek(file->fp, block->prev, SEEK_SET);
01278     }
01279         
01280     else
01281         rc = fseek(file->fp, FREE_BLOCK_HEADER_POSITION, SEEK_SET);
01282 
01283     if (rc == -1)
01284     {
01285 #ifdef DEBUG
01286         cp_error(CP_CORRUPT_INDEX, 
01287             "can\'t find previous free block descriptor for %lX expected at %lX"
01288             , block->offset, block->prev);
01289 #endif
01290         return -1;
01291     }
01292 
01293     written = cp_fwrite_long_array(file->fp, (unsigned long *) &prev_update, 2);
01294     if (written != 2)
01295     {
01296 #ifdef DEBUG
01297         cp_perror(CP_IO_ERROR, errno, "error writing free block descriptor at "
01298                     "offset %lX", 
01299                         block->prev ? block->prev : FREE_BLOCK_HEADER_POSITION);
01300 #endif
01301         return -1;
01302     }
01303 
01304     cp_multimap_remove(file->free_block, block);
01305 
01306     return 0;
01307 }
01308 
01309 /* cp_btree_refalloc - rerun file alloc. 
01310  * 
01311  * adjusts file usage for new size and writes. 1st case - new size is larger
01312  * than old size: if contiguous space available or at end of file, expands to
01313  * new size, otherwise adds old block to free list and writes at EOF. 2nd case
01314  * - adds superfluous space to free list and writes at current offset.
01315  */
01316 int cp_btree_refalloc(cp_btree_file *file, block_desc *block,
01317                       int new_len, void *buf)
01318 {
01319     int rc = 0;
01320     unsigned long eof; /* end of file  */
01321     unsigned long eob; /* end of block */
01322     int adjust_eof = 0;
01323     size_t written;
01324     block_desc roundblock, *pblock;
01325     int roundlen = round_up(new_len);
01326     roundblock.offset = block->offset;
01327     roundblock.size = round_up(block->size);
01328     pblock = &roundblock;
01329 
01330 printfdbg("REFALLOC: offset %lX, size %ld to size %d\n", block->offset, block->size, new_len);
01331 //    if (block->size > new_len)
01332     if (pblock->size > roundlen)
01333     {
01334         int err;
01335 printfdbg("REFALLOC (1)\n");
01336 //        if ((err = add_free_block(file, block->offset + new_len, 
01337 //                                  block->size - new_len)))
01338 printf("[AFB] offset %lX => %lX, releasing %d bytes\n", pblock->offset, pblock->offset + roundlen, pblock->size - roundlen);                                  
01339         if ((err = add_free_block(file, pblock->offset + roundlen, 
01340                                   pblock->size - roundlen)))
01341         {
01342 #ifdef DEBUG
01343             cp_error(err, "error reallocating block at offset %lX", 
01344                      block->offset);
01345 #endif
01346             return -1;
01347         }
01348 //      block->size = new_len;
01349     }
01350 //  else if (block->size < new_len)
01351     else if (pblock->size < roundlen)
01352     {
01353         /* to check for contiguous space, there are 3 possibilities:
01354          * 1. current block ends at EOF
01355          * 2. current block ends at a free block
01356          * 3. current block begins at the end of a free block.
01357          *
01358          * if none of these apply, check free blocks. If no free block is
01359          * available, write at EOF.
01360          */
01361     
01362 printfdbg("REFALLOC (2)\n");
01363 //      eob = block->offset + block->size;
01364         eob = pblock->offset + pblock->size;
01365         if (eob == file->eof) // case (1) - at EOF, just write
01366         {
01367 //          block->size = new_len;
01368             adjust_eof = 1;
01369         }
01370         else
01371         {
01372             short unify_left, unify_right;
01373             mem_block_desc *prev, *next;
01374 //          cp_index_map_node *tmap;
01375             if (file->free_block == NULL)
01376             {
01377                 file->free_block = 
01378                     cp_multimap_create_by_option(COLLECTION_MODE_NOSYNC | 
01379                                                  COLLECTION_MODE_DEEP, 
01380                                                  mem_block_desc_offset,
01381                                                  cp_hash_compare_long, 
01382                                                  NULL, free);
01383 
01384                 file->free_block_size_index = 
01385                     cp_multimap_create_index(file->free_block, CP_MULTIPLE, 
01386                                              mem_block_desc_size, 
01387                                              cp_hash_compare_long, &rc);
01388 #ifdef DEBUG
01389                 if (rc)
01390                     cp_error(rc, "creating free block end offset index");
01391 #endif
01392             }
01393 printfdbg("REFALLOC (2.1)\n");
01394             prev = cp_multimap_find(file->free_block, block, CP_OP_LT);
01395 //          tmap = cp_multimap_find(file->free_block, block, CP_OP_LT);
01396 //printfdbg("REFALLOC (2.2)\n");
01397 //          prev = tmap ? tmap->entry : NULL;
01398 printfdbg("REFALLOC (2.3)\n");
01399             next = cp_multimap_find(file->free_block, block, CP_OP_GT);
01400 //          tmap = cp_multimap_find(file->free_block, block, CP_OP_GT);
01401 //printfdbg("REFALLOC (2.4)\n");
01402 //          next = tmap ? tmap->entry : NULL;
01403 // printfdbg("REFALLOC (2.5)\n");
01404             unify_left = prev && prev->offset + prev->size == pblock->offset;
01405 //          unify_right = next && block->offset + block->size == next->offset;
01406             unify_right = next && pblock->offset + pblock->size == next->offset;
01407 //          if (unify_left && block->size + prev->size >= new_len)
01408             if (unify_left && pblock->size + prev->size >= roundlen)
01409             {
01410 printfdbg("REFALLOC (2.5.1)\n");
01411 //              if (block->size + prev->size <= new_len + sizeof(block_desc))
01412                 if (pblock->size + prev->size <= roundlen + sizeof(block_desc))
01413                 {
01414 printfdbg(" ( *) REFALLOC (2.5.1)\n");
01415                     block->offset = prev->offset;
01416                     drop_free_block(file, prev);
01417                 }
01418                 else
01419                 {
01420                     block_desc update;
01421 printfdbg(" (**) REFALLOC (2.5.1)\n");
01422 //                  block->offset = prev->offset + prev->size + block->size 
01423 //                                  - new_len;
01424                     block->offset = prev->offset + prev->size + pblock->size 
01425                                     - roundlen;
01426                     update.offset = prev->offset;
01427 //                  update.size = prev->size + block->size - new_len;
01428                     update.size = prev->size + pblock->size - roundlen;
01429                     update_free_block(file, prev, &update);
01430                 }
01431             }
01432 //          else if (unify_right && block->size + next->size >= new_len)
01433             else if (unify_right && pblock->size + next->size >= roundlen)
01434             {
01435 printfdbg("REFALLOC (2.5.2) block->size == %ld, next->size == %ld, new_len == %ld, desc %ld bytes\n", block->size, next->size, new_len, sizeof(block_desc));
01436 //              if (block->size + next->size <= new_len + sizeof(block_desc))
01437                 if (pblock->size + next->size <= roundlen + sizeof(block_desc))
01438 {
01439 printfdbg("REFALLOC (2.5.2.1)\n");
01440                     drop_free_block(file, next);
01441 }
01442                 else
01443                 {
01444                     block_desc update;
01445 printfdbg("REFALLOC (2.5.2.2)\n");
01446 //                  update.offset = block->offset + block->size + next->size - new_len;
01447 //                  update.offset = block->offset + new_len;                    
01448                     update.offset = block->offset + roundlen;                   
01449 //                  update.size = block->size + next->size - new_len;
01450                     update.size = pblock->size + next->size - roundlen;
01451                     update_free_block(file, next, &update);
01452                 }
01453             }
01454             else if (unify_right && unify_left && 
01455 //                   prev->size + block->size + next->size >= new_len)
01456                      prev->size + pblock->size + next->size >= roundlen)
01457             {
01458 printfdbg("REFALLOC (2.5.3)\n");
01459                 block->offset = prev->offset;
01460 //              nab_free_block(file, prev, next, new_len);
01461                 nab_free_block(file, prev, next, roundlen);
01462             }
01463             else
01464             {
01465 //              cp_index_map_node *tmap;
01466                 block_desc t;
01467                 cp_vector *v;
01468                 memset(&t, 0, sizeof(block_desc));
01469 printfdbg("REFALLOC (2.5.4)\n");
01470 //              t.size = new_len;
01471                 t.size = roundlen;
01472 vdump_free_block(file, "REFALLOC (2.5.4)");
01473                 v = cp_multimap_find_by_index(file->free_block, 
01474                             file->free_block_size_index, &t, CP_OP_GE);
01475 //              tmap = cp_multimap_find_by_index(file->free_block, 
01476 //                          file->free_block_size_index, &t, CP_OP_GE);
01477                 /* check if free block available */
01478                 if (v)
01479                 {
01480 //                  cp_vector *vmap = (cp_vector *) tmap->entry;
01481 printfdbg("REFALLOC (2.5.4.1)\n");
01482                     if (cp_vector_size(v))
01483 //                  if (cp_vector_size(vmap))
01484                     {
01485                         mem_block_desc *fb = (mem_block_desc *)
01486                             cp_vector_element_at(v, cp_vector_size(v) - 1);
01487 //                          cp_vector_element_at(vmap, cp_vector_size(vmap) - 1);
01488 printfdbg("REFALLOC (2.5.4.2)\n");
01489                         block->offset = fb->offset;
01490 //                      if (fb->size > new_len + sizeof(block_desc))
01491                         if (fb->size > roundlen + sizeof(block_desc))
01492                         {
01493                             block_desc nb;
01494 //                          nb.offset = fb->offset + new_len;
01495                             nb.offset = fb->offset + roundlen;
01496 //                          nb.size = fb->size - new_len;
01497                             nb.size = fb->size - roundlen;
01498                             update_free_block(file, fb, (block_desc *) &nb);
01499                         }
01500                         else
01501                             drop_free_block(file, fb);
01502                     }
01503                 }
01504                 else /* no tricks left */
01505                 {
01506 printfdbg("REFALLOC (2.5.4.3)\n");
01507                     block->offset = file->eof;
01508                     adjust_eof = 1;
01509                 }
01510             }
01511         }
01512 //      block->size = new_len;
01513     }
01514 //  else
01515     block->size = new_len;
01516 
01517 printfdbg("refalloc'ed to %lX\n", block->offset);
01518     if ((rc = fseek(file->fp, block->offset, SEEK_SET)))
01519     {
01520 #ifdef DEBUG
01521         cp_perror(CP_IO_ERROR, errno, "error refreshing block at offset %lX", 
01522                  block->offset);
01523 #endif
01524         return -1;
01525     }
01526 //  written = fwrite(buf, 1, actual_newlen, file->fp);
01527 //  if (written != actual_newlen) return -1;
01528     written = fwrite(buf, 1, new_len, file->fp);
01529     if (written != new_len) return -1;
01530 
01531 //  if (adjust_eof)
01532 //          file->eof += new_len - block->size;
01533     if (adjust_eof)
01534     {
01535         int diff = roundlen - new_len;
01536         if (diff > 0)
01537         {
01538             char *filler = (char *) calloc(diff, sizeof(char));
01539             memset(filler, -1, diff);
01540             written = fwrite(filler, 1, diff, file->fp);
01541             free(filler);
01542         }
01543         fseek(file->fp, 0, SEEK_END);
01544         file->eof = ftell(file->fp);
01545     }
01546 
01547     fflush(file->fp);
01548 printfdbg("(+) fALLOC %lX %ld\n", block->offset, new_len);
01549     return 0;
01550 }
01551 
01552 cp_btreenode *node_cache_fetch(cp_btree_file *file, unsigned long addr)
01553 {
01554     cp_btreenode *node;
01555 #if 0
01556 if (cp_hashlist_item_count(file->node_cache) >= file->cache_size)
01557 {
01558     printfdbg(" *** \n"
01559            " *** node cache exceeds max (%d): %d items\n"
01560            " *** \n", file->cache_size, 
01561            cp_hashlist_item_count(file->node_cache));
01562 }
01563 #endif
01564 //  cp_btreenode *node = 
01565     node = 
01566         cp_hashlist_remove_by_option(file->node_cache, 
01567                                      &addr, COLLECTION_MODE_NOSYNC);
01568     if (node)
01569     {
01570         cp_hashlist_append_by_option(file->node_cache, &node->address, node, 
01571                                      COLLECTION_MODE_NOSYNC);
01572     }
01573 
01574     return node;
01575 }
01576 
01577 void _node_cache_maintain(cp_btree_file *file)
01578 {
01579 }
01580 
01581 int dirty_node(cp_btreenode *node)
01582 {
01583     int i;
01584     if (node->nodrop || node->dirty_flag) return 1;
01585 
01586     for (i = 0; i < node->items; i++) 
01587         if (node->dirty[i]) return 1; 
01588 
01589     return 0;
01590 }
01591 
01592 void node_cache_maintain(cp_btree_file *file)
01593 {
01594     int curr = cp_hashlist_item_count(file->node_cache);
01595     int max = file->cache_size;
01596     cp_btreenode *node;
01597     cp_hashlist_iterator itr;
01598 cp_btreenode *chk;
01599 
01600     if (curr >= max)
01601     {
01602 printfdbg("node cache: %d / %d items, culling\n", curr, max);
01603         cp_hashlist_iterator_init(&itr, file->node_cache, COLLECTION_LOCK_NONE);
01604         do
01605         {
01606             node = cp_hashlist_iterator_curr_value(&itr);
01607 printfdbg("----------  node %lx - nodrop %d, dirty %d\n", node->address, node->nodrop, dirty_node(node));
01608 //          if (node->nodrop) continue;
01609 # if 0
01610 chk = cp_hashlist_iterator_curr_value(&itr);            
01611 if (chk != node)
01612 {
01613     printf("chk is %lx, node is %lx. problem.\n", node->address, chk->address);
01614     DIE();
01615 }
01616 #endif
01617             if (!dirty_node(node)) 
01618             {
01619                 cp_hashlist_iterator_remove(&itr);
01620                 if (--curr < max) break;
01621             }
01622 
01623 //          cp_hashlist_remove(file->node_cache, &node->address);
01624         }
01625         while ((cp_hashlist_iterator_next(&itr)));
01626     }
01627 }
01628 
01629 //      if (cp_hashlist_item_count(file->node_cache) == file->cache_size)
01630 //~~
01631 static void btree_set_root(cp_btree_file *tree, cp_btreenode *node);
01632 /*
01633  * here's what a node header looks like on the disk.
01634  *
01635  * +-+-----+----+------+-----+------+-----+...+------+-----+------+-----+
01636  * |n|Kaddr|Klen|k1addr|k1len|v1addr|v1len|   |kNaddr|kNlen|vNaddr|vNlen|
01637  * +-+-----+----+------+-----+------+-----+...+------+-----+------+-----+
01638  * +-+--+...+----+
01639  * |P|C1|   |CN+1|
01640  * +-+--+...+----+
01641  *
01642  * with a tree of order N+1, the parameters are as follows: 
01643  * 
01644  * o    n                  - number of items in this node
01645  * o    Kaddr              - address of the key block
01646  * o    Klen               - total length of the key block
01647  * o    k1addr .. kNaddr   - offsets of the keys k1 to kN within the key block
01648  * o    k1len  .. kNlen    - lengths of the keys k1 .. kN
01649  * o    v1addr .. vNaddr   - absolute offsets of the values v1 .. vN
01650  * o    v1len  .. vNlen    - lengths of the values v1 .. vN
01651  * o    P                 - address of parent node
01652  * o    C1 .. CN+1         - addresses of children nodes 1 .. N+1
01653  *
01654  * The key block resides elsewhere. The added complexity in this scheme serves
01655  * to allow a flexible key length. Assuming unlimited read sizes, retrieving 
01656  * the complete node header from the disk requires 2 reads: 
01657  * 
01658  * (1) read the node header: W*2 + (order - 1)*W*2*2 + W + order*W bytes
01659  * (2) read the key block: Klen bytes
01660  * 
01661  * The fixed key length implementation is simpler and faster, but does not 
01662  * allow flexible key sizes. 
01663  */
01664 cp_btreenode *cp_btree_read_node(cp_btree_file *file, unsigned long addr)
01665 {
01666     int i;
01667     int rc;
01668     size_t size;
01669     unsigned long l, item_count;
01670     block_desc key_address;
01671     int index;
01672     cp_btreenode *node = NULL;
01673     char *buf = NULL;
01674     char *keybuf = NULL;
01675 if (addr < 0 || addr > file->eof)
01676 {
01677     printfdbg("cp_btree_read_node: request for invalid file offset %lX\n", addr);
01678     DIE();
01679 }
01680     if (file == NULL || file->fp == NULL) return NULL;
01681 
01682 printfdbg("cp_btree_read_node[1]  addr %lX - trying root\n", addr);
01683     if (file->root && (addr == 0 || addr == file->root->address)) 
01684         return file->root;
01685 
01686     if (addr == 0) /* read root node */
01687         addr = file->root_addr;
01688 //  else if ((node = cp_hashlist_get(file->node_cache, &addr)))
01689     else if ((node = node_cache_fetch(file, addr))) 
01690 {
01691 printfdbg("cp_btree_read_node[2]  addr %lX - found in cache\n", addr);
01692         return node;
01693 }
01694 
01695 printfdbg("cp_btree_read_node[3]  addr %lX - reading from file\n", addr);
01696     /* (1) read node header */
01697     buf = (char *) malloc(file->node_header_len);
01698     if (buf == NULL) 
01699     {
01700         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01701                  "can\'t allocate local storage reading %s", file->filename);
01702         goto READ_ERROR;
01703     }
01704 
01705     rc = fseek(file->fp, addr, SEEK_SET);
01706     if (rc) 
01707     {
01708         cp_error(CP_INVALID_FILE_OFFSET, "can\'t read node from %s:%lX", 
01709                  file->filename, addr);
01710         goto READ_ERROR;
01711     }
01712 
01713     size = fread(buf, file->node_header_len, 1, file->fp);
01714     if (size == 0) 
01715     {
01716         cp_error(CP_IO_ERROR, "error reading from %s:%lX", 
01717                  file->filename, addr);
01718         goto READ_ERROR;
01719     }
01720 
01721     /* 1st word in header is item count - keep for later */
01722     index = 0;
01723     item_count = mem2long(buf);
01724     index = sizeof(unsigned long);
01725     key_address.offset = mem2long(&buf[index]);
01726     index += sizeof(unsigned long);
01727     key_address.size = mem2long(&buf[index]);
01728 
01729     /* (2) read node key */
01730     keybuf = (char *) malloc(key_address.size);
01731     if (keybuf == NULL) 
01732     {    
01733         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01734                  "can\'t allocate local storage reading %s", file->filename);
01735         goto READ_ERROR;
01736     }
01737     
01738     rc = fseek(file->fp, key_address.offset, SEEK_SET);
01739     if (rc) 
01740     {
01741         cp_error(CP_INVALID_FILE_OFFSET, "can\'t read node key from %s:%lX", 
01742                  file->filename, addr);
01743         goto READ_ERROR;
01744     }
01745     
01746     size = fread(keybuf, key_address.size, 1, file->fp);
01747     if (size == 0) 
01748     {
01749         cp_error(CP_IO_ERROR, "error reading key from %s:%lX " 
01750                               "reading key offset %lX", 
01751                  file->filename, addr, key_address.offset);
01752         goto READ_ERROR;
01753     }
01754     
01755     /* (3) set up in-memory node structure */
01756     node = (cp_btreenode *) calloc(1, sizeof(cp_btreenode));
01757 printf("[MEM] (read) allocating node %p\n", node);
01758     if (node == NULL) 
01759     {
01760         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01761                  "can\'t allocate local storage reading %s", file->filename);
01762         goto READ_ERROR;
01763     }
01764 
01765     node->key = (void **) calloc(file->order - 1, sizeof(void *));
01766     if (node->key == NULL) 
01767     {
01768         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01769                  "can\'t allocate local storage reading %s", file->filename);
01770         goto READ_ERROR;
01771     }
01772     node->ser_key = (cp_string **) calloc(file->order - 1, sizeof(cp_string *));
01773     if (node->ser_key == NULL) 
01774     {
01775         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01776                  "can\'t allocate local storage reading %s", file->filename);
01777         goto READ_ERROR;
01778     }
01779 
01780     node->value = (void **) calloc(file->order - 1, sizeof(void *));
01781     if (node->value == NULL) 
01782     {
01783         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01784                  "can\'t allocate local storage reading %s", file->filename);
01785         goto READ_ERROR;
01786     }
01787     node->ser_value = 
01788         (cp_string **) calloc(file->order - 1, sizeof(cp_string *));
01789     if (node->ser_value == NULL) 
01790     {
01791         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01792                  "can\'t allocate local storage reading %s", file->filename);
01793         goto READ_ERROR;
01794     }
01795     node->value_address = 
01796         (block_desc *) calloc(file->order - 1, sizeof(block_desc));
01797     if (node->value_address == NULL) 
01798     {
01799         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01800                  "can\'t allocate local storage reading %s", file->filename);
01801         goto READ_ERROR;
01802     }
01803 
01804     node->child_address = 
01805         (unsigned long *) calloc(file->order, sizeof(unsigned long));
01806     if (node->child_address == NULL)
01807     {
01808         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01809                  "can\'t allocate local storage reading %s", file->filename);
01810         goto READ_ERROR;
01811     }
01812 
01813     node->dirty = 
01814         (short *) calloc(file->order, sizeof(short));
01815     if (node->child_address == NULL)
01816     {
01817         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
01818                  "can\'t allocate local storage reading %s", file->filename);
01819         goto READ_ERROR;
01820     }
01821 
01822     node->address = addr;
01823     node->items = (int) item_count;
01824     node->key_address = key_address;
01825 
01826     for (i = 0; i < node->items; i++)
01827     {
01828         unsigned long offset;
01829         index += sizeof(unsigned long);
01830         l = mem2long(&buf[index]);
01831         index += sizeof(unsigned long); 
01832         offset = mem2long(&buf[index]);
01833         node->ser_key[i] = cp_string_create(&keybuf[offset], l);
01834         if (node->ser_key[i] == NULL) goto READ_ERROR;
01835         node->key[i] = (*file->deserialize_key)(node->ser_key[i]->data, NULL);
01836         if (node->key[i] == NULL) goto READ_ERROR;
01837         
01838         index += sizeof(unsigned long);
01839         node->value_address[i].offset = mem2long(&buf[index]);
01840         index += sizeof(unsigned long); 
01841         node->value_address[i].size = mem2long(&buf[index]);
01842     }
01843 //    index += 4 * sizeof(unsigned long) * (file->order - 1 - node->items);
01844     index = (4 * file->order - 1) * sizeof(unsigned long);
01845     node->parent_address = mem2long(&buf[index]);
01846 
01847     for (i = 0; i < node->items + 1; i++)
01848     {
01849         index += sizeof(unsigned long);
01850         node->child_address[i] = mem2long(&buf[index]);
01851     }
01852     
01853     node->owner = file;
01854 
01855     if (addr == file->root_addr) 
01856         btree_set_root(file, node); 
01857 //      file->root = node;
01858     else
01859     {
01860         printfdbg("putting node %lX on cache\n", node->address);
01861         cp_hashlist_append(file->node_cache, &node->address, node);
01862         node_cache_maintain(file);
01863 //~~ 019 add this ^     
01864 //      if (cp_hashlist_item_count(file->node_cache) == file->cache_size)
01865 //          cp_hashlist_remove_head(file->node_cache);
01866     }
01867 
01868     if (buf) free(buf);
01869     if (keybuf) free(keybuf);
01870     return node;
01871 
01872 READ_ERROR:
01873 printfdbg("READ_ERROR\n");
01874 DIE();
01875     if (buf) free(buf);
01876     if (keybuf) free(keybuf);
01877     if (node)
01878     {
01879         if (node->ser_key) 
01880         {
01881             int i;
01882             for (i = 0; i < node->items; i++)
01883                 if (node->ser_key[i]) cp_string_destroy(node->ser_key[i]);
01884             free(node->ser_key);
01885         }
01886         if (node->value_address) free(node->value_address);
01887         free(node);
01888     }
01889     return NULL;
01890 }
01891 
01892 /* find a good spot for ``size'' bytes and write them there */
01893 unsigned long cp_btree_falloc(cp_btree_file *file, 
01894                               unsigned long size, 
01895                               char *buf, 
01896                               int *err)
01897 {
01898     unsigned long res = 0L;
01899     int update_eof = 0;
01900     int roundlen = round_up(size);
01901 //  int actual_size = size;
01902 //  size = round_up(size);
01903     *err = 0;
01904 printfdbg("FLC(0) %ld bytes\n", size);
01905 vdump_free_block(file, "falloc");
01906     if (file != NULL && size > 0)
01907     {
01908         int rc;
01909         mem_block_desc *target = NULL;
01910         mem_block_desc updated;
01911         memset(&updated, 0, sizeof(mem_block_desc));
01912 
01913         if (file->free_block)
01914         {
01915             cp_vector *v;
01916 //          cp_index_map_node *tmap;
01917             mem_block_desc m;
01918 printfdbg("FLC(1) free_block available\n");
01919             memset(&m, 0, sizeof(mem_block_desc));
01920 //            m.size = size;
01921             m.size = roundlen;
01922 printfdbg("FLC(1.05) looking for a chunk of size %lX or more\n", m.size);
01923 vdump_free_block(file, "FLC(1.05)");
01924             v = cp_multimap_find_by_index(file->free_block, 
01925                                              file->free_block_size_index,
01926 //          tmap = cp_multimap_find_by_index(file->free_block, 
01927 //                                           file->free_block_size_index,
01928                                              &m, CP_OP_GE);
01929 printfdbg("FLC(1.1) got free_block entry %p\n", v);
01930             if (v)
01931 //            if (tmap)
01932             {
01933 //              cp_vector *vmap = (cp_vector *) tmap->entry;
01934                 if (cp_vector_size(v))
01935 //              if (cp_vector_size(vmap))
01936                 {
01937                     target = (mem_block_desc *) 
01938                         cp_vector_element_at(v, cp_vector_size(v) - 1);
01939 printfdbg("FLC(1.1.1) got free_block entry %p\n", target);
01940 //                      cp_vector_element_at(vmap, cp_vector_size(vmap) - 1);
01941 printfdbg("cp_btree_falloc: got target %lX size %ld prev %lX next %lX\n", target->offset, target->size, target->prev, target->next);
01942 if (target->size > file->eof || target->size < size)
01943 {
01944     printfdbg("invalid block size %lX\n", target->size);
01945     DIE();
01946 }
01947 if (target->offset > file->eof)
01948 {
01949     char *p = 0;
01950     printfdbg("invalid block offset %lX\n", target->offset);
01951     *p = 1;
01952 }
01953                     res = target->offset;
01954 //                  if (target->size > size + sizeof(block_desc))
01955                     if (target->size > roundlen + sizeof(block_desc))
01956                     {
01957                         memcpy(&updated, target, sizeof(block_desc));
01958 //                      updated.size -= size;
01959                         updated.size -= roundlen;
01960 //                      updated.offset += size;
01961                         updated.offset += roundlen;
01962 
01963                         rc = update_free_block(file, target, (block_desc *) &updated);
01964                     }
01965                     else
01966                         rc = drop_free_block(file, target);
01967                 }
01968             }
01969         }
01970 
01971         if (target == NULL)
01972         {
01973             res = file->eof;
01974             update_eof = 1;
01975         }
01976 
01977         /* write buffer */
01978         if (fseek(file->fp, res, SEEK_SET))
01979         {
01980             *err = CP_IO_ERROR;
01981             return -1L;
01982         }
01983         if (fwrite(buf, size, 1, file->fp) == -1)
01984 //        if (fwrite(buf, actual_size, 1, file->fp) == -1)
01985         {
01986             *err = CP_IO_ERROR;
01987             return -1L;
01988         }
01989 
01990         if (update_eof)
01991 //      if (1)
01992         {
01993             int diff = roundlen - size;
01994             if (diff > 0)
01995             {
01996                 char *filler = (char *) calloc(diff, sizeof(char));
01997                 memset(filler, -1, diff);
01998                 fwrite(filler, 1, diff, file->fp);
01999                 free(filler);
02000             }
02001 
02002             fseek(file->fp, 0, SEEK_END);
02003             file->eof = ftell(file->fp);
02004         }
02005     }
02006     else
02007         *err = CP_INVALID_VALUE;
02008 
02009 //printfdbg("FALLOC: %ld bytes at offset %lX\n", size, res);
02010 printfdbg("(+) fALLOC %ld %lX\n", size, res);
02011 printfdbg("        eof is %lX\n", file->eof);
02012 //  if (res)
02013 //      fflush(file->fp);
02014 
02015     return res;
02016 }
02017 
02018 static int grab_value(cp_btree_file *file, cp_btreenode *node, int index)
02019 {
02020     int rc = 0;
02021     char *buf;
02022     cp_string *ser_value;
02023 //printfdbg("grab_value: node %p (%lX) index %d\n", node, node->address, index);
02024     if (node->value == NULL)
02025     {
02026         node->value = (void **) calloc(file->order - 1, sizeof(void *));
02027     }
02028     if (node->ser_value == NULL)
02029     {
02030         node->ser_value = 
02031             (cp_string **) calloc(file->order - 1, sizeof(cp_string *));
02032     }
02033     if (node->value[index]) return 0;
02034 if (node->value_address[index].size <= 0 || node->value_address[index].size > 0x100000) //~~ 018
02035 {
02036     printf("node %lx - value [%d] - value_addres size is %d\n", node->address, index, node->value_address[index].size);
02037     DIE();
02038 }
02039     buf = (char *) malloc(node->value_address[index].size);
02040     fseek(file->fp, node->value_address[index].offset, SEEK_SET);
02041     fread(buf, node->value_address[index].size, 1, file->fp);
02042 //  if (node->ser_value[index]) cp_string_free(node->ser_value[index]); //~~
02043     ser_value = 
02044         cp_string_create(buf, node->value_address[index].size);
02045     if (file->deserialize_value)
02046     {
02047 #if 1
02048         if (node->value[index] && 
02049             (file->mode & COLLECTION_MODE_DEEP) && 
02050             file->value_dtr)
02051             (*file->value_dtr)(node->value[index]);
02052 #endif
02053 
02054         node->value[index] = 
02055             (*file->deserialize_value)(ser_value->data, NULL);
02056     }
02057     cp_string_destroy(ser_value);
02058     free(buf);
02059     return rc;
02060 }
02061 
02062 static void * 
02063     btree_file_get_value(cp_btree_file *file, cp_btreenode *node, int index)
02064 {
02065 #if 1 //~~ 018
02066     if (node->value[index]) 
02067         return node->value[index];
02068 #endif
02069 
02070     grab_value(file, node, index);
02071     return node->value[index];
02072 }
02073 
02074 void dump_node(cp_btreenode *node)
02075 {
02076     int i;
02077     printf("DUMP NODE: offset %lX parent %lX %ld items\n", node->address, node->parent_address, node->items);
02078     for (i = 0; i < node->items; i++)
02079     {
02080         printf("%s %s => %s %s\n",
02081                node->dirty[i] & BTREE_NODE_DIRTY_KEY ? "." : " ",
02082                cp_string_tocstr(node->key[i]), 
02083                node->dirty[i] & BTREE_NODE_DIRTY_VALUE ? "." : " ",
02084                cp_string_tocstr(node->value[i]));
02085     }
02086 }
02087 
02088 
02089 
02090 
02091 
02092 
02093 
02094 
02095 
02096 int _compare_node(cp_btreenode *n, cp_btreenode *m)
02097 {
02098     return 0;
02099 }
02100 
02101 int compare_node(cp_btreenode *n, cp_btreenode *m)
02102 {
02103     int rc = n->items - m->items;
02104     printfdbg("compare_node: disk node \n");
02105     dump_node_dbg(n);
02106     printfdbg("compare_node: mem node \n");
02107     dump_node_dbg(m);
02108 
02109     if (rc == 0)
02110     {
02111         int i;
02112         int len = n->items;
02113         for (i = 0; i < len; i++)
02114             if ((rc = (*n->owner->cmp)(n->key[i], m->key[i])))
02115             {
02116                 printfdbg("compare_node: disk node [%s], mem node [%s]\n", cp_string_tocstr(n->key[i]), cp_string_tocstr(m->key[i]));
02117                 break;
02118             }
02119     }
02120 
02121     return rc;
02122 }
02123 
02124 void cp_btreenode_destroy(cp_btreenode *node);
02125 void verify_write(cp_btree_file *file, cp_btreenode *memnode)
02126 {
02127     int i;
02128     int rc;
02129     size_t size;
02130     unsigned long l, item_count;
02131     block_desc key_address;
02132     int index;
02133     cp_btreenode *node = NULL;
02134     char *buf = NULL;
02135     char *keybuf = NULL;
02136     unsigned long addr = memnode->address;
02137 printf("verify_write: checking node %x\n", memnode->address);
02138     /* (1) read node header */
02139     buf = (char *) malloc(file->node_header_len);
02140     if (buf == NULL) 
02141     {
02142         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02143                  "can\'t allocate local storage reading %s", file->filename);
02144         goto READ_ERROR;
02145     }
02146 
02147     rc = fseek(file->fp, addr, SEEK_SET);
02148     if (rc) 
02149     {
02150         cp_error(CP_INVALID_FILE_OFFSET, "can\'t read node from %s:%lX", 
02151                  file->filename, addr);
02152         goto READ_ERROR;
02153     }
02154 
02155     size = fread(buf, file->node_header_len, 1, file->fp);
02156     if (size == 0) 
02157     {
02158         cp_error(CP_IO_ERROR, "error reading from %s:%lX", 
02159                  file->filename, addr);
02160         goto READ_ERROR;
02161     }
02162 
02163     /* 1st word in header is item count - keep for later */
02164     index = 0;
02165     item_count = mem2long(buf);
02166     index = sizeof(unsigned long);
02167     key_address.offset = mem2long(&buf[index]);
02168     index += sizeof(unsigned long);
02169     key_address.size = mem2long(&buf[index]);
02170 
02171     /* (2) read node key */
02172     keybuf = (char *) malloc(key_address.size);
02173     if (keybuf == NULL) 
02174     {    
02175         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02176                  "can\'t allocate local storage reading %s", file->filename);
02177         goto READ_ERROR;
02178     }
02179     
02180     rc = fseek(file->fp, key_address.offset, SEEK_SET);
02181     if (rc) 
02182     {
02183         cp_error(CP_INVALID_FILE_OFFSET, "can\'t read node key from %s:%lX", 
02184                  file->filename, addr);
02185         goto READ_ERROR;
02186     }
02187     
02188     size = fread(keybuf, key_address.size, 1, file->fp);
02189     if (size == 0) 
02190     {
02191         cp_error(CP_IO_ERROR, "error reading key from %s:%lX", 
02192                  file->filename, addr);
02193         goto READ_ERROR;
02194     }
02195     
02196     /* (3) set up in-memory node structure */
02197     node = (cp_btreenode *) calloc(1, sizeof(cp_btreenode));
02198 printf("[MEM] (verify_write) allocating node %p\n", node);
02199     if (node == NULL) 
02200     {
02201         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02202                  "can\'t allocate local storage reading %s", file->filename);
02203         goto READ_ERROR;
02204     }
02205 
02206     node->key = (void **) calloc(file->order - 1, sizeof(void *));
02207     if (node->key == NULL) 
02208     {
02209         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02210                  "can\'t allocate local storage reading %s", file->filename);
02211         goto READ_ERROR;
02212     }
02213     node->ser_key = (cp_string **) calloc(file->order - 1, sizeof(cp_string *));
02214     if (node->ser_key == NULL) 
02215     {
02216         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02217                  "can\'t allocate local storage reading %s", file->filename);
02218         goto READ_ERROR;
02219     }
02220 
02221     node->value = (void **) calloc(file->order - 1, sizeof(void *));
02222     if (node->value == NULL) 
02223     {
02224         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02225                  "can\'t allocate local storage reading %s", file->filename);
02226         goto READ_ERROR;
02227     }
02228     node->ser_value = 
02229         (cp_string **) calloc(file->order - 1, sizeof(cp_string *));
02230     if (node->ser_value == NULL) 
02231     {
02232         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02233                  "can\'t allocate local storage reading %s", file->filename);
02234         goto READ_ERROR;
02235     }
02236     node->value_address = 
02237         (block_desc *) calloc(file->order - 1, sizeof(block_desc));
02238     if (node->value_address == NULL) 
02239     {
02240         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02241                  "can\'t allocate local storage reading %s", file->filename);
02242         goto READ_ERROR;
02243     }
02244 
02245     node->child_address = 
02246         (unsigned long *) calloc(file->order, sizeof(unsigned long));
02247     if (node->child_address == NULL)
02248     {
02249         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02250                  "can\'t allocate local storage reading %s", file->filename);
02251         goto READ_ERROR;
02252     }
02253 
02254     node->dirty = 
02255         (short *) calloc(file->order, sizeof(short));
02256     if (node->child_address == NULL)
02257     {
02258         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02259                  "can\'t allocate local storage reading %s", file->filename);
02260         goto READ_ERROR;
02261     }
02262 
02263     node->address = addr;
02264     node->items = (int) item_count;
02265     node->key_address = key_address;
02266 
02267     for (i = 0; i < node->items; i++)
02268     {
02269         unsigned long offset;
02270         index += sizeof(unsigned long);
02271         l = mem2long(&buf[index]);
02272         index += sizeof(unsigned long); 
02273         offset = mem2long(&buf[index]);
02274         node->ser_key[i] = cp_string_create(&keybuf[offset], l);
02275         if (node->ser_key[i] == NULL) goto READ_ERROR;
02276         node->key[i] = (*file->deserialize_key)(node->ser_key[i]->data, NULL);
02277         
02278         index += sizeof(unsigned long);
02279         node->value_address[i].offset = mem2long(&buf[index]);
02280         index += sizeof(unsigned long); 
02281         node->value_address[i].size = mem2long(&buf[index]);
02282 
02283         if (node->value_address[i].size == 0 ||
02284             node->value_address[i].offset == 0)
02285         {
02286             printfdbg("node %lx: null value [%d] on disk\n", node->address, i);
02287             DIE();
02288         }
02289     }
02290 //    index += 4 * sizeof(unsigned long) * (file->order - 1 - node->items);
02291     index = (4 * file->order - 1) * sizeof(unsigned long);
02292     node->parent_address = mem2long(&buf[index]);
02293 
02294     for (i = 0; i < node->items + 1; i++)
02295     {
02296         index += sizeof(unsigned long);
02297         node->child_address[i] = mem2long(&buf[index]);
02298     }
02299     
02300     node->owner = file;
02301 #if 0
02302     if (compare_node(node, memnode))
02303     {
02304         printfdbg("mem / disk mismatch after writing node at %lX\n", 
02305                node->address);
02306         DIE();
02307     }
02308 #endif
02309 
02310 READ_ERROR:
02311     if (buf) free(buf);
02312     if (keybuf) free(keybuf);
02313     if (node)
02314     {
02315         cp_btreenode_destroy(node);
02316 //      cp_btreenode_destroy(node);
02317 #if 0
02318         if (node->ser_key) 
02319         {
02320             int i;
02321             for (i = 0; i < node->items; i++)
02322                 if (node->ser_key[i]) cp_string_destroy(node->ser_key[i]);
02323             free(node->ser_key);
02324         }
02325         if (node->value_address) free(node->value_address);
02326         free(node);
02327 #endif
02328     }
02329 }
02330 
02331 
02332 void validate(cp_btreenode *node)
02333 {
02334     int i;
02335     
02336     printfdbg("validating node %lx - %d items\n", node->address, node->items);
02337     for (i = 0; i < node->items; i++)
02338     {
02339         if (node->value[i] == NULL && node->value_address[i].offset == 0)
02340         {
02341             printfdbg("emptiness on node %lx, index %d\n", node->address, i);
02342             DIE();
02343         }
02344     }
02345 }
02346 
02347 
02348 int cp_btree_write_node(cp_btree_file *file, cp_btreenode *node)
02349 {
02350     int rc;
02351     int i;
02352     char *header = NULL;
02353     cp_string *keybuf = NULL;
02354     unsigned long key_len, *offset;
02355     unsigned long key_addr, value_addr;
02356 printfdbg("W(1) node %lX: %d items\n", node->address, node->items);
02357 validate(node);
02358 // dump_node_dbg(node);
02359     /* new node - don't need to check dirty flags, just write */
02360     if (node->address == 0)
02361     {
02362 printfdbg("W(2)\n");
02363         header = (char *) calloc(1, HEADER_SIZE(file));
02364         long2mem(node->items, &header[0]);
02365         offset = (unsigned long *) &header[3 * sizeof(unsigned long)];
02366         key_len = 0;
02367         for (i = 0; i < node->items; i++)
02368         {
02369             unsigned long len;
02370             if (node->ser_key[i] != NULL) 
02371                 cp_string_destroy(node->ser_key[i]);
02372             node->ser_key[i] = (*file->serialize_key)(node->key[i]);
02373             len = cp_string_len(node->ser_key[i]);
02374             long2mem(len, offset++);
02375             long2mem(key_len, offset++);
02376             key_len += len;
02377 //          key_len += round_up(len);
02378             if (node->dirty[i] & BTREE_NODE_USE_VALUE_ADDR)
02379             {
02380                 value_addr = node->value_address[i].offset;
02381                 len = node->value_address[i].size;
02382             }   
02383             else
02384             {
02385                 if (node->ser_value[i] != NULL)
02386                     cp_string_destroy(node->ser_value[i]);
02387                 node->ser_value[i] = (*file->serialize_value)(node->value[i]);
02388                 len = cp_string_len(node->ser_value[i]);
02389                 value_addr = 
02390                     cp_btree_falloc(file, len,
02391                                     cp_string_data(node->ser_value[i]), &rc);
02392                 node->value_address[i].offset = value_addr;
02393                 node->value_address[i].size = len;
02394             }
02395 if (value_addr == 0 || len == 0)
02396 {
02397     printfdbg("value address == %lx, len == %lx\n", value_addr, len);
02398     DIE();
02399 }
02400             long2mem(value_addr, offset++);
02401             long2mem(len, offset++);
02402         }
02403 printfdbg("W(3)\n");
02404         // create key buffer
02405         keybuf = cp_string_dup(node->ser_key[0]);
02406         for (i = 1; i < node->items; i++)
02407             cp_string_cat(keybuf, node->ser_key[i]);
02408         // write key info
02409         key_addr = cp_btree_falloc(file, key_len, cp_string_data(keybuf), &rc);
02410         // write key address & len at offset 1, 2
02411         long2mem(key_addr, &header[sizeof(unsigned long)]);
02412         long2mem(key_len, &header[2 * sizeof(unsigned long)]);
02413         node->key_address.offset = key_addr;
02414         node->key_address.size = key_len;
02415         offset = 
02416             (unsigned long *) &header[(4 * file->order - 1) * sizeof(long)];
02417         // write parent address
02418         long2mem(node->parent_address, offset++);
02419         // write children addresses
02420         for (i = 0; i <= node->items; i++)
02421             long2mem(node->child_address[i], offset++);
02422         
02423 printfdbg("W(3.1) node [%lx]: writing key len %lx\n", node->address, key_len);
02424 printfdbg("W(3.5) falloc %d bytes\n", HEADER_SIZE(file));
02425         node->address = 
02426             cp_btree_falloc(file, HEADER_SIZE(file), header, &rc);
02427 
02428         cp_hashlist_append(file->node_cache, &node->address, node);
02429 printfdbg("W(4)\n");
02430         goto WRITE_DONE;
02431     }
02432 
02433 printfdbg("W(5)\n");
02434     for (i = 0; i < node->items; i++)
02435     {
02436 printfdbg("W(6)\n");
02437         if ((node->dirty[i] & BTREE_NODE_DIRTY_VALUE) &&
02438             !(node->dirty[i] & BTREE_NODE_USE_VALUE_ADDR))
02439         {
02440             node->dirty_flag |= BTREE_NODE_DIRTY_HEADER;
02441             if (node->ser_value[i]) cp_string_destroy(node->ser_value[i]); //~~ 018 ???
02442 //          node->ser_value[i] = (*file->serialize_value)(node->value[i]);
02443             node->ser_value[i] = 
02444                 (*file->serialize_value)(btree_file_get_value(file, node, i));
02445 printfdbg("W(7)\n");
02446             if (node->value_address[i].offset)
02447             {
02448 printfdbg("W(8)\n");
02449                 rc = cp_btree_refalloc(file, &node->value_address[i], 
02450                                        cp_string_len(node->ser_value[i]), 
02451                                        cp_string_data(node->ser_value[i]));
02452             }
02453             else
02454             {
02455 printfdbg("W(9)\n");
02456                 node->value_address[i].offset = 
02457                     cp_btree_falloc(file, cp_string_len(node->ser_value[i]),
02458                                     cp_string_data(node->ser_value[i]), &rc);
02459                 node->value_address[i].size = cp_string_len(node->ser_value[i]);
02460             }
02461         }
02462         if (node->dirty[i] & BTREE_NODE_DIRTY_CHILD)
02463             node->dirty_flag |= BTREE_NODE_DIRTY_HEADER;
02464     }
02465     // check child n + 1
02466     if (node->items && node->dirty[node->items] & BTREE_NODE_DIRTY_CHILD)
02467         node->dirty_flag |= BTREE_NODE_DIRTY_HEADER;
02468 
02469     if (node->dirty_flag & (BTREE_NODE_DIRTY_HEADER | BTREE_NODE_DIRTY_KEYS))
02470     {
02471 printfdbg("W(10)\n");
02472         header = (char *) calloc(1, HEADER_SIZE(file));
02473         long2mem(node->items, &header[0]);
02474         offset = (unsigned long *) &header[3 * sizeof(unsigned long)];
02475         key_len = 0;
02476         for (i = 0; i < node->items; i++)
02477         {
02478             unsigned long len;
02479             if (node->dirty[i] & BTREE_NODE_DIRTY_KEY)
02480             {
02481                 if (node->ser_key[i]) cp_string_destroy(node->ser_key[i]); //~~ 018 ???
02482                 node->ser_key[i] = (*file->serialize_key)(node->key[i]);
02483 printfdbg("W(10.5) serialize key %d\n", i);
02484             }
02485 printfdbg("W(10.6) node->ser_key[%d]: size %d, len %d\n", i, node->ser_key[i]->size, node->ser_key[i]->len);
02486             len = cp_string_len(node->ser_key[i]);
02487 printfdbg("W(10.7) node->ser_key[%d]: size %d, len %d\n", i, node->ser_key[i]->size, node->ser_key[i]->len);
02488             long2mem(len, offset++);
02489             long2mem(key_len, offset++);
02490             key_len += len;
02491             long2mem(node->value_address[i].offset, offset++);
02492             long2mem(node->value_address[i].size, offset++);
02493         }
02494         if (node->dirty_flag & BTREE_NODE_DIRTY_KEYS)
02495         {
02496 printfdbg("W(11)\n");
02497             // create key buffer
02498             keybuf = cp_string_dup(node->ser_key[0]);
02499             for (i = 1; i < node->items; i++)
02500                 cp_string_cat(keybuf, node->ser_key[i]);
02501             // write key info
02502 printf("W(11.1) before: keys at %lx - 0x%lx bytes\n", node->key_address.offset, node->key_address.size);
02503             rc = cp_btree_refalloc(file, &node->key_address, 
02504                                    cp_string_len(keybuf), 
02505                                    cp_string_data(keybuf));
02506 printf("W(11.2) after: keys at %lx - 0x%lx bytes\n", node->key_address.offset, node->key_address.size);
02507 //                                 key_len, cp_string_data(keybuf));
02508             // write key address & len at offset 1, 2
02509             long2mem(node->key_address.offset, &header[sizeof(unsigned long)]);
02510             long2mem(node->key_address.size, &header[2 * sizeof(unsigned long)]);
02511 printfdbg("W(11.3) node [%lx]: writing key len %lx\n", node->address, node->key_address.size);
02512         }
02513         else
02514         {
02515 // printfdbg("setting key address %lX, len %ld\n", node->key_address.offset, node->key_address.size);
02516             long2mem(node->key_address.offset, &header[sizeof(unsigned long)]);
02517             long2mem(node->key_address.size, &header[2 * sizeof(unsigned long)]);
02518         }
02519 
02520         offset = 
02521             (unsigned long *) &header[(4 * file->order - 1) * sizeof(long)];
02522         // write parent address
02523         long2mem(node->parent_address, offset++);
02524         // write children addresses
02525         for (i = 0; i <= node->items; i++)
02526             long2mem(node->child_address[i], offset++);
02527 
02528         rc = fseek(file->fp, node->address, SEEK_SET);
02529         rc = fwrite(header, file->node_header_len, 1, file->fp);
02530     }
02531 
02532 WRITE_DONE:
02533 printfdbg("W(12)\n");
02534     fflush(file->fp);
02535     if (keybuf) cp_string_destroy(keybuf);
02536     if (header) free(header);
02537     node->dirty_flag = 0;
02538     memset(node->dirty, 0, (node->items + 1) * sizeof(short));
02539 //verify_write(file, node);
02540 //printfdbg("W(13)\n");
02541 //cp_btree_file_print(file);
02542 //printfdbg("W(14)\n");
02543     return rc;
02544 }
02545 
02546 int cp_compare_mem_block_desc_mapping(cp_mapping *b1, cp_mapping *b2)
02547 {
02548     mem_block_desc *l1 = (mem_block_desc *) cp_mapping_value(b1);
02549     mem_block_desc *l2 = (mem_block_desc *) cp_mapping_value(b2); 
02550     return l1->size > l2->size ? 1 : l1->size < l2->size ? -1 : 0;
02551 }
02552 
02553 mem_block_desc *mem_block_desc_cp(void *p)
02554 {
02555     mem_block_desc *m = NULL;
02556 
02557     if (p)
02558     {
02559         m = (mem_block_desc *) malloc(sizeof(mem_block_desc));
02560         if (m)
02561             memcpy(m, p, sizeof(mem_block_desc));
02562     }
02563 
02564     return m;
02565 }
02566 
02567 void cp_btreenode_destroy(cp_btreenode *node)
02568 {
02569     int i;
02570 #ifdef DEBUG
02571     if (node == NULL) printfdbg("attempting to release NULL node\n");
02572 #endif
02573 printfdbg("[MEM] DESTROY NODE %p\n", node);
02574 dump_node_dbg(node);
02575 
02576     for (i = 0; i < node->items; i++)
02577     {
02578         if (node->owner->mode & COLLECTION_MODE_DEEP)
02579         {
02580             if (node->owner->key_dtr)
02581                 (*node->owner->key_dtr)(node->key[i]);
02582             if (node->owner->value_dtr)
02583                 (*node->owner->value_dtr)(node->value[i]);
02584             node->value[i] = NULL;
02585         }
02586         if (node->ser_key[i]) cp_string_destroy(node->ser_key[i]);
02587         if (node->ser_value[i]) cp_string_destroy(node->ser_value[i]);
02588     }
02589     free(node->key);
02590     free(node->value);
02591     free(node->ser_key);
02592     free(node->ser_value);
02593     free(node->value_address);
02594     free(node->child_address);
02595     free(node->dirty);
02596     free(node);
02597 }
02598 
02599 void btree_destroy_node(cp_btree_file *tree, cp_btreenode *node)
02600 {
02601     void *cached = 0;
02602     if (tree && tree->node_cache)
02603         cached = cp_hashlist_remove(tree->node_cache, &node->address);
02604 
02605     if (cached == NULL)
02606         cp_btreenode_destroy(node);
02607 }
02608 
02609 /* cp_btree_reconstruct_freeblocks - load free block list from disk
02610  *
02611  * the thing to keep in mind with these free blocks is that the in-memory 
02612  * structure is semantically confusing in relation to what's on the disk. 
02613  *
02614  * Here's how 2 free blocks would look on the disk:
02615  *
02616  *  <file header>
02617  *  +------+-------+-------+-------+
02618  *  | root | order | free1 | size1 | 
02619  *  +------+-------+-------+-------+ 
02620  *                     |    <--- total size1 bytes --->
02621  *                     |    +-------+-------+-.......-+
02622  *   offset free1 ---> +--->| free2 | size2 |         |
02623  *                          +-------+-------+ ........+
02624  *                              |
02625  *                    +---------+
02626  *                    |    <--- total size2 bytes --->
02627  *                    |    +-------+-------+-.......-+
02628  *  offset free2 ---> +--> |   0   | 0     |         | 
02629  *                         +-------+-------+ ........+
02630  *
02631  * In memory, the mem_block_desc describes an actual block, rather than an 
02632  * offset pointer to a block. Here's a rundown on the members:
02633  *
02634  * offset - the offset this block is actually at.
02635  * size - the size of this block.
02636  * prev - the offset of the block pointing to this block.
02637  * next - the offset of the next block.
02638  *
02639  * And here's how these 2 blocks would be represented in memory:
02640  *
02641  * desc1 = { free1, size1, 0, free2 };
02642  * desc2 = { free2, size2, free1, 0 };
02643  */
02644 int cp_btree_reconstruct_freeblocks(cp_btree_file *bf)
02645 {
02646     int rc = 0;
02647     mem_block_desc *curr;
02648     mem_block_desc *prev_desc = NULL;
02649 mem_block_desc *dbg = NULL;
02650     unsigned long prev = 0;
02651 
02652     /* the free block registry is a binary tree containing offset tables. 
02653      * Multiple free chunks may have the same size. These are stored in a 
02654      * ordered hash to allow removal by address once a chunk is put to use or 
02655      * resized. 
02656      */
02657     bf->free_block = 
02658         cp_multimap_create_by_option(COLLECTION_MODE_NOSYNC | 
02659                                      COLLECTION_MODE_DEEP, 
02660                                      mem_block_desc_offset,
02661                                      cp_hash_compare_long, 
02662                                      NULL, free);
02663 
02664     bf->free_block_size_index = 
02665         cp_multimap_create_index(bf->free_block, CP_MULTIPLE, 
02666                                  mem_block_desc_size, 
02667                                  cp_hash_compare_long, &rc);
02668 #ifdef DEBUG
02669     if (rc)
02670         cp_error(rc, "creating free block end offset index");
02671 #endif
02672 
02673     curr = (mem_block_desc *) calloc(1, sizeof(mem_block_desc));
02674     if (curr == NULL)
02675     {
02676         cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02677                  "can\'t allocate block descriptor");
02678         return -1;
02679     }
02680     if ((rc = cp_fread_long_array(bf->fp, (unsigned long *) curr, 2)) != 2) 
02681     {
02682         free(curr);
02683         return -1;
02684     }
02685     if (curr->offset == 0) 
02686     {
02687         free(curr);
02688         return 0;
02689     }
02690 
02691     do
02692     {
02693         if (prev_desc)
02694         {
02695             prev_desc->next = curr->offset;
02696             curr->prev = prev_desc->offset;
02697         }
02698 //        bf->last_free_block = prev = curr->offset;
02699 printfdbg("[+] reconstruct: insert free block %lX, %ld, %lX, %lX\n", curr->offset, curr->size, curr->prev, curr->next);
02700 if ((dbg = cp_multimap_get(bf->free_block, curr)) != NULL)
02701 {
02702     printfdbg("cycle detected: freeblock {%lX, %ld, %lX, %lX} already loaded\n", curr->offset, curr->size, curr->prev, curr->next);
02703     printfdbg("                    dbg = {%lX, %ld, %lX, %lX} already loaded\n", curr->offset, curr->size, curr->prev, curr->next);
02704     DIE();
02705 }
02706         cp_multimap_insert(bf->free_block, curr, NULL);
02707         prev_desc = curr;
02708         
02709         if ((rc = fseek(bf->fp, curr->offset, SEEK_SET))) return -1;
02710         curr = (mem_block_desc *) calloc(1, sizeof(mem_block_desc));
02711         if (curr == NULL)
02712         {
02713             cp_error(CP_MEMORY_ALLOCATION_FAILURE, 
02714                      "can\'t allocate block descriptor");
02715             return -1;
02716         }
02717         if ((rc = cp_fread_long_array(bf->fp, (unsigned long *) curr, 2)) != 2) 
02718         {
02719             free(curr);
02720             return -1;
02721         }
02722     } while (curr->offset);
02723 
02724     free(curr);
02725 
02726     return 0;
02727 }
02728 
02729 int cp_btree_file_destroy(cp_btree_file *file)
02730 {
02731     int rc = 0;
02732 
02733     if (file)
02734     {
02735         if ((rc = cp_btree_file_close(file))) //~~ return error
02736         {
02737 #ifdef DEBUG
02738             DEBUGMSG("error closing btree file %s\n", file->filename);
02739 #endif
02740         }
02741         if (file->filename) free(file->filename);
02742         free(file);
02743     }
02744 
02745     return rc;
02746 }
02747 
02748 /* retrieve_aux_fn returns
02749  *   1  if a function name of length 0 was found (ie NULL)
02750  *   0  if a function name of non-zero length was found and could be loaded
02751  *  -1  if a function name of non-zero length was found but could not be loaded
02752  */
02753 static int retrieve_aux_fn(void *handle, void **fnptr, char **buf)
02754 {
02755     size_t len;
02756     char *tmp;
02757     char *p = *buf;
02758     char tmpchar;
02759 
02760     len = mem2size(p);
02761     p += sizeof(size_t);
02762     if (len)
02763     {
02764         tmp = &p[len];
02765         tmpchar = *tmp;
02766         *tmp = '\0';
02767         *fnptr = dlsym(handle, p);
02768 // printf("link aux fn [%s]\n", p);
02769         if (*fnptr == NULL)
02770         {
02771             cp_error(CP_LOADFN_FAILED, "%s", dlerror());
02772             cp_error(CP_LOADFN_FAILED, 
02773                 "is the executable linked with --export-dynamic?");
02774             return -1;
02775         }
02776 
02777         *tmp = tmpchar;
02778         p += len;
02779     }
02780     else
02781         *fnptr = NULL;
02782 
02783     *buf = p;
02784 
02785     if (len == 0) return 1;
02786     return 0;
02787 }
02788 
02789 static int retrieve_aux_functions(cp_btree_file *file)
02790 {
02791     char *buf;
02792     char *p;
02793     char *tmp;
02794     char tmpchar;
02795     char sizebuf[sizeof(size_t)];
02796     size_t offset = FREE_BLOCK_HEADER_POSITION + 2 * sizeof(long);
02797     size_t len;
02798 
02799     if (fseek(file->fp, offset, SEEK_SET)) return -1;
02800     if (cp_fread_size(file->fp, &len)) return -1;
02801 
02802     buf = (char *) malloc(len + 1);
02803     if (fread(buf, len, 1, file->fp) != 1) goto AUX_ERR;
02804 
02805     p = buf;
02806     len = mem2size(p);
02807     p += sizeof(size_t);
02808     if (len)
02809     {
02810         tmp = &p[len];
02811         tmpchar = *tmp;
02812         *tmp = '\0';
02813         file->aux_lib = dlopen(p, RTLD_LAZY);
02814         if (file->aux_lib == NULL)
02815         {
02816             cp_error(CP_LOADLIB_FAILED, "%s", dlerror());
02817             goto AUX_ERR;
02818         }
02819         *tmp = tmpchar;
02820         p += len;
02821     }
02822     else
02823     {
02824         file->aux_lib = dlopen(NULL, RTLD_LAZY);
02825         if (file->aux_lib == NULL)
02826         {
02827             cp_error(CP_LOADLIB_FAILED, "%s", dlerror());
02828             goto AUX_ERR;
02829         }
02830     }
02831 
02832 // printf("retrieving cmp fn\n");
02833     if (retrieve_aux_fn(file->aux_lib, (void **) &file->cmp, &p)) 
02834         goto AUX_ERR;
02835 // printf("retrieving key copy fn\n");
02836     if (retrieve_aux_fn(file->aux_lib, (void **) &file->key_copy, &p) == -1) 
02837         goto AUX_ERR;
02838 // printf("retrieving key dtr fn\n");
02839     if (retrieve_aux_fn(file->aux_lib, (void **) &file->key_dtr, &p) == -1) 
02840         goto AUX_ERR;
02841 // printf("retrieving value copy fn\n");
02842     if (retrieve_aux_fn(file->aux_lib, (void **) &file->value_copy, &p) == -1) 
02843         goto AUX_ERR;
02844 // printf("retrieving value dtr fn\n");
02845     if (retrieve_aux_fn(file->aux_lib, (void **) &file->value_dtr, &p) == -1) 
02846         goto AUX_ERR;
02847 // printf("retrieving key ser fn\n");
02848     if (retrieve_aux_fn(file->aux_lib, (void **) &file->serialize_key, &p)) 
02849         goto AUX_ERR;
02850 // printf("retrieving key deser fn\n");
02851     if (retrieve_aux_fn(file->aux_lib, (void **) &file->deserialize_key, &p)) 
02852         goto AUX_ERR;
02853 // printf("retrieving value ser fn\n");
02854     if (retrieve_aux_fn(file->aux_lib, (void **) &file->serialize_value, &p)) 
02855         goto AUX_ERR;
02856 // printf("retrieving value deser fn\n");
02857     if (retrieve_aux_fn(file->aux_lib, (void **) &file->deserialize_value, &p)) 
02858         goto AUX_ERR;
02859 
02860     free(buf);
02861     return 0;
02862 
02863 AUX_ERR:
02864     if (buf) free(buf);
02865     return -1;
02866 }
02867 
02868 cp_btree_file *cp_btree_file_open(char *filename, int *rc)
02869 {
02870     cp_btree_file *bf; 
02871 
02872     if (CHUNK_SIZE == 0) init_chunk_size();
02873 
02874     bf = (cp_btree_file *) calloc(1, sizeof(cp_btree_file));
02875     if (bf == NULL)
02876     {
02877         *rc = CP_MEMORY_ALLOCATION_FAILURE;
02878         return NULL;
02879     }
02880 
02881     if ((bf->fp  = fopen(filename, "r+b")) == NULL) 
02882     {
02883         *rc = CP_FILE_NOT_FOUND;
02884         goto OPEN_ERROR;
02885     }
02886 
02887 //    bf->fd = fileno(bf->fp);
02888     bf->filename = strdup(filename);
02889     if (bf->filename == NULL)
02890     {
02891         *rc = CP_MEMORY_ALLOCATION_FAILURE;
02892         goto OPEN_ERROR;
02893     }
02894 
02895     if ((*rc = cp_fread_long(bf->fp, &bf->root_addr))) goto OPEN_ERROR;
02896     if ((*rc = cp_fread_int(bf->fp, &bf->order))) goto OPEN_ERROR;
02897     if ((*rc = cp_fread_int(bf->fp, &bf->mode))) goto OPEN_ERROR;
02898     bf->node_header_len = (bf->order * 5 ) * sizeof(unsigned long);
02899     if ((*rc = cp_btree_reconstruct_freeblocks(bf))) goto OPEN_ERROR;
02900     bf->node_cache = 
02901         cp_hashlist_create_by_option(COLLECTION_MODE_NOSYNC |
02902                                      COLLECTION_MODE_DEEP, 3, 
02903                                      cp_hash_long, cp_hash_compare_long, 
02904                                      NULL, NULL, NULL, 
02905                                      (cp_destructor_fn) cp_btreenode_destroy);
02906 
02907     bf->cache_size = BTREE_NODE_CACHE_SIZE;
02908 
02909     if ((*rc = retrieve_aux_functions(bf))) goto OPEN_ERROR;
02910 
02911     if ((*rc = fseek(bf->fp, 0, SEEK_END))) goto OPEN_ERROR;
02912     if ((bf->eof = ftell(bf->fp)) == (unsigned long) -1) goto OPEN_ERROR;
02913 
02914     if (bf->root_addr)
02915         bf->root = cp_btree_read_node(bf, bf->root_addr);
02916 
02917     *rc = 0;
02918     return bf;
02919 
02920 OPEN_ERROR:
02921     if (bf)
02922     {
02923         if (bf->filename) free(bf->filename);
02924         free(bf);
02925     }
02926     return NULL;
02927 }
02928 
02929 
02930 static int 
02931     load_auxilliary_functions(cp_btree_file *bf, char *aux_lib, char *cmp, 
02932                               char *key_copy, char *key_dtr, 
02933                               char *value_copy, char *value_dtr,
02934                               char *serialize_key, char *deserialize_key, 
02935                               char *serialize_value, char *deserialize_value)
02936 {
02937     bf->aux_lib = dlopen(aux_lib, RTLD_LAZY);
02938     if (bf->aux_lib == NULL)
02939     {
02940         cp_error(CP_LOADLIB_FAILED, 
02941             "can\'t lookup implementation functions: %s", dlerror());
02942         return -1;
02943     }
02944 
02945     bf->cmp = dlsym(bf->aux_lib, cmp);
02946     if (bf->cmp == NULL)
02947     {
02948         cp_error(CP_LOADFN_FAILED, 
02949             "can\'t lookup compare function: %s", dlerror());
02950         goto LOAD_ERROR;
02951     }
02952 
02953     if (key_copy)
02954     {
02955         bf->key_copy = dlsym(bf->aux_lib, key_copy);
02956         if (bf->key_copy == NULL)
02957         {
02958             cp_error(CP_LOADFN_FAILED, 
02959                 "can\'t lookup key copy function: %s", dlerror());
02960             goto LOAD_ERROR;
02961         }
02962     }
02963     else
02964         bf->key_copy = NULL;
02965 
02966     if (key_dtr)
02967     {
02968         bf->key_dtr = dlsym(bf->aux_lib, key_dtr);
02969         if (bf->key_dtr == NULL)
02970         {
02971             cp_error(CP_LOADFN_FAILED, 
02972                 "can\'t lookup key destructor function: %s", dlerror());
02973             goto LOAD_ERROR;
02974         }
02975     }
02976     else
02977         bf->key_dtr = NULL;
02978 
02979     if (value_copy)
02980     {
02981         bf->value_copy = dlsym(bf->aux_lib, value_copy);
02982         if (bf->value_copy == NULL)
02983         {
02984             cp_error(CP_LOADFN_FAILED, 
02985                 "can\'t lookup value copy function: %s", dlerror());
02986             goto LOAD_ERROR;
02987         }
02988     }
02989     else
02990         bf->value_copy = NULL;
02991 
02992     if (value_dtr)
02993     {
02994         bf->value_dtr = dlsym(bf->aux_lib, value_dtr);
02995         if (bf->value_dtr == NULL)
02996         {
02997             cp_error(CP_LOADFN_FAILED, 
02998                 "can\'t lookup value destructor function: %s", dlerror());
02999             goto LOAD_ERROR;
03000         }
03001     }
03002     else
03003         bf->value_dtr = NULL;
03004 
03005     bf->serialize_key = dlsym(bf->aux_lib, serialize_key);
03006     if (bf->serialize_key == NULL)
03007     {
03008         cp_error(CP_LOADFN_FAILED, 
03009             "can\'t lookup key serialization function: %s", dlerror());
03010         goto LOAD_ERROR;
03011     }
03012 
03013     bf->deserialize_key = dlsym(bf->aux_lib, deserialize_key);
03014     if (bf->deserialize_key == NULL)
03015     {
03016         cp_error(CP_LOADFN_FAILED, 
03017             "can\'t lookup key deserialization function: %s", dlerror());
03018         goto LOAD_ERROR;
03019     }
03020 
03021     bf->serialize_value = dlsym(bf->aux_lib, serialize_value);
03022     if (bf->serialize_value == NULL)
03023     {
03024         cp_error(CP_LOADFN_FAILED, 
03025             "can\'t lookup value serialization function: %s", dlerror());
03026         goto LOAD_ERROR;
03027     }
03028 
03029     bf->deserialize_value = dlsym(bf->aux_lib, deserialize_value);
03030     if (bf->deserialize_value == NULL)
03031     {
03032         cp_error(CP_LOADFN_FAILED, 
03033             "can\'t lookup value deserialization function: %s", dlerror());
03034         goto LOAD_ERROR;
03035     }
03036 
03037     return 0;
03038 
03039 LOAD_ERROR:
03040     if (aux_lib == NULL)
03041         cp_error(CP_LOADLIB_FAILED, 
03042             "is the executable linked with --export-dynamic?");
03043     return -1;
03044 }
03045 
03046 static void append_fn_name(cp_string *str, char *name)
03047 {
03048     char sizebuf[sizeof(size_t)];
03049     size_t len;
03050 
03051     if (name) 
03052     {
03053         len = strlen(name);
03054         size2mem(len, sizebuf);
03055         cp_string_cat_bin(str, sizebuf, sizeof(size_t));
03056         cp_string_cstrcat(str, name);
03057     }
03058     else
03059     {
03060         memset(sizebuf, 0, sizeof(size_t));
03061         cp_string_cat_bin(str, sizebuf, sizeof(size_t));
03062     }
03063 }
03064 
03065 /* auxilliary function names (compare function, serialization / deserialization
03066  * functions) are seralized as follows:
03067  *
03068  *  +-------+------+------+------+-----+------+------+-------+-------+
03069  *  | total | Nlib | lib* | Ncmp | cmp | Nkcp | kcp* | Nkdtr | kdtr* |
03070  *  +-------+------+------+------+-----+------+------+-------+-------+
03071  *  +------+------+-------+-------+-----+----+-----+----+-----+----+-----+---+
03072  *  | Nvcp | vcp* | Nvdtr | vdtr* | Nsk | sk | Ndk | dk | Nsv | sv | Ndv |dv |
03073  *  +------+------+-------+-------+-----+----+-----+----+-----+----+-----+---+
03074  *
03075  *  (items marked with an asterix may be absent)
03076  *
03077  *  total - total length of serialized auxillary function names in bytes
03078  *  Nlib  - length of auxillary library name 
03079  *  lib   - name of auxilliary library 
03080  *  Ncmp  - length of compare function name
03081  *  cmp   - compare function name
03082  *  Nkcp  - length of key copy function name
03083  *  kcp   - key copy function name
03084  *  Nkdtr - length of key destructor function name
03085  *  kdtr  - key destructor function name
03086  *  Nvcp  - length of value copy function name
03087  *  vcp   - value copy function name
03088  *  Nvdtr - length of value destructor function name
03089  *  vdtr  - value destructor function name
03090  *  Nsk   - length of key serialization function name
03091  *  sk    - key serialization function name
03092  *  Ndk   - length of key deserialization function name
03093  *  dk    - key deserialization function name
03094  *  Nsv   - length of value serialization function name
03095  *  sv    - value serialization function name
03096  *  Ndv   - length of value deserialization function name
03097  *  dv    - value deserialization function name
03098  */
03099 cp_string *
03100     serialize_aux_fn_names(char *aux_lib, char *cmp, char *key_copy, 
03101                            char *key_dtr, char *value_copy, char *value_dtr,
03102                            char *serialize_key, char *deserialize_key, 
03103                            char *serialize_value, char *deserialize_value)
03104 {
03105     cp_string *str;
03106     size_t len;
03107     char sizebuf[sizeof(size_t)];
03108 
03109     len = strlen(cmp) + strlen(serialize_key) + strlen(deserialize_key) + 
03110           strlen(serialize_value) + strlen(deserialize_value) + 
03111           11 * sizeof(size_t);
03112     if (aux_lib) len += strlen(aux_lib);
03113     if (key_copy) len += strlen(key_copy);
03114     if (key_dtr) len += strlen(key_dtr);
03115     if (value_copy) len += strlen(value_copy);
03116     if (value_dtr) len += strlen(value_dtr);
03117     str = cp_string_create_empty(len);
03118     len -= sizeof(size_t);
03119     size2mem(len, sizebuf);
03120     cp_string_cat_bin(str, sizebuf, sizeof(size_t));
03121 
03122     append_fn_name(str, aux_lib);
03123     append_fn_name(str, cmp);
03124     append_fn_name(str, key_copy);
03125     append_fn_name(str, key_dtr);
03126     append_fn_name(str, value_copy);
03127     append_fn_name(str, value_dtr);
03128     append_fn_name(str, serialize_key);
03129     append_fn_name(str, deserialize_key);
03130     append_fn_name(str, serialize_value);
03131     append_fn_name(str, deserialize_value);
03132 
03133     return str;
03134 }
03135 
03136 /* the file header looks like this:
03137  *
03138  *  +------+-------+-------+-------+
03139  *  | root | order | free1 | size1 | 
03140  *  +------+-------+-------+-------+ 
03141  */
03142 cp_btree_file *
03143     cp_btree_file_create(char *filename, unsigned int order, int mode, 
03144                          char *aux_lib, char *cmp, char *key_copy, 
03145                          char *key_dtr, char *value_copy, char *value_dtr,
03146                          char *serialize_key, char *deserialize_key, 
03147                          char *serialize_value, char *deserialize_value, 
03148                          int *rc)
03149 {
03150     block_desc x;
03151     cp_string *aux_names;
03152     cp_btree_file *bf;
03153 
03154     if (CHUNK_SIZE == 0) init_chunk_size();
03155 
03156     int dummy;
03157     if (rc == NULL) rc = &dummy;
03158 
03159     bf = (cp_btree_file *) calloc(1, sizeof(cp_btree_file));
03160     if (bf == NULL)
03161     {
03162         *rc = CP_MEMORY_ALLOCATION_FAILURE;
03163         goto CREATE_ERROR;
03164     }
03165 
03166     if (load_auxilliary_functions(bf, aux_lib, cmp, key_copy, key_dtr, 
03167                                   value_copy, value_dtr, serialize_key, 
03168                                   deserialize_key, serialize_value, 
03169                                   deserialize_value))
03170         goto CREATE_ERROR;
03171 
03172     if ((bf->fp = fopen(filename, "w+b")) == NULL) 
03173     {
03174         *rc = CP_FILE_NOT_FOUND;
03175         goto CREATE_ERROR;
03176     }
03177 //    bf->fd = fileno(bf->fp);
03178     bf->filename = strdup(filename);
03179     bf->order = order;
03180     bf->mode = mode;
03181     bf->node_header_len = (order * 5 ) * sizeof(unsigned long);
03182 
03183     if ((*rc = cp_fwrite_long(bf->fp, 0L))) goto CREATE_ERROR;
03184     if ((*rc = cp_fwrite_int(bf->fp, bf->order))) goto CREATE_ERROR;
03185     if ((*rc = cp_fwrite_int(bf->fp, bf->mode))) goto CREATE_ERROR;
03186     memset(&x, 0, sizeof(block_desc));
03187     if (cp_fwrite_long_array(bf->fp, (unsigned long *) &x, 2) != 2) 
03188         goto CREATE_ERROR;
03189     
03190     aux_names = 
03191         serialize_aux_fn_names(aux_lib, cmp, key_copy, key_dtr, value_copy, 
03192                                value_dtr, serialize_key, deserialize_key, 
03193                                serialize_value, deserialize_value);
03194     if ((*rc = fwrite(aux_names->data, aux_names->len, 1, bf->fp)) != 1)
03195         goto CREATE_ERROR;
03196 
03197     bf->eof = FREE_BLOCK_HEADER_POSITION + 2 * sizeof(long) + aux_names->len;
03198     cp_string_destroy(aux_names);
03199 
03200     /* the free block registry is a binary tree containing offset tables. 
03201      * Multiple free chunks may have the same size. These are stored in a 
03202      * ordered hash to allow removal by address once a chunk is put to use or 
03203      * resized. 
03204      */
03205     bf->free_block = 
03206         cp_multimap_create_by_option(COLLECTION_MODE_NOSYNC | 
03207                                      COLLECTION_MODE_DEEP, 
03208                                      mem_block_desc_offset,
03209                                      cp_hash_compare_long, 
03210                                      NULL, free);
03211     if (bf->free_block == NULL)
03212     {
03213         *rc = CP_MEMORY_ALLOCATION_FAILURE;
03214         goto CREATE_ERROR;
03215     }
03216 
03217     bf->free_block_size_index = 
03218         cp_multimap_create_index(bf->free_block, CP_MULTIPLE, 
03219                                  mem_block_desc_size, 
03220                                  cp_hash_compare_long, rc);
03221     if (bf->free_block_size_index == NULL) goto CREATE_ERROR;
03222 
03223     bf->node_cache = 
03224         cp_hashlist_create_by_option(COLLECTION_MODE_NOSYNC |
03225                                      COLLECTION_MODE_DEEP, 3, 
03226                                      cp_hash_long, cp_hash_compare_long, 
03227                                      NULL, NULL, NULL, 
03228                                      (cp_destructor_fn) cp_btreenode_destroy);
03229 
03230     bf->cache_size = BTREE_NODE_CACHE_SIZE;
03231 
03232     *rc = 0;
03233 
03234     return bf;
03235 
03236 CREATE_ERROR:
03237 #ifdef DEBUG
03238     perror("CREATE ERROR");
03239 #endif
03240     if (bf)
03241     {
03242         if (bf->aux_lib) dlclose(bf->aux_lib);
03243         if (bf->filename) free(bf->filename);
03244         if (bf->free_block) cp_multimap_destroy(bf->free_block);
03245         if (bf->free_block_size_index) free(bf->free_block_size_index);
03246         if (bf->node_cache) cp_hashlist_destroy(bf->node_cache);
03247     }
03248     return NULL;
03249 }
03250 
03251 void cp_btree_file_dump_node(cp_btree_file *file, long offset)
03252 {
03253     cp_btreenode *node = cp_btree_read_node(file, offset);
03254     if (node)
03255     {
03256         int i;
03257 
03258         for (i = 0; i < node->items; i++)
03259         {
03260             grab_value(file, node, i);
03261             printf("[%lX\t^%lX\t(%d)] %s => %s\n", offset, node->parent_address,i,  
03262                    cp_string_tocstr(node->key[i]), 
03263                    cp_string_tocstr(node->value[i]));
03264         }
03265 
03266         for (i = 0; i <= node->items; i++)
03267         {
03268             if (node->child_address[i])
03269                 cp_btree_file_dump_node(file, node->child_address[i]);
03270         }
03271     }
03272     else
03273         printf("couldn\'t read node at offset %lX\n", offset);
03274 }
03275 
03276 void cp_btree_file_dump(cp_btree_file *bf)
03277 {
03278     printf("------------------------------------------\n");
03279     printf("cp_btree_file [%s] order %d: %d items\n", bf->filename, bf->order, bf->items);
03280     if (bf->root_addr)
03281         cp_btree_file_dump_node(bf, bf->root_addr);
03282     printf("      ------- free block dump -------     \n");
03283     vdump_free_block(bf, NULL);
03284     printf("------------------------------------------\n");
03285 }
03286 
03287 static int bounce_counter;
03288 static int offset;
03289 
03290 void cp_btree_file_print_node(cp_btree_file *bf, unsigned long addr)
03291 {
03292     int i, j;
03293     int count;
03294     cp_btreenode *node = cp_btree_read_node(bf, addr);
03295     if (node == NULL) return;
03296 
03297 //  node->nodrop = 1;
03298 
03299     count = node->items;
03300     for (i = 0; i < count; i++)
03301     {
03302         offset++;
03303         if (node->child_address[i])
03304             cp_btree_file_print_node(bf, node->child_address[i]);
03305         /* pointer may be defunct by now. why not refcount? because it may
03306          * not be possible to keep the whole subtree in memory.
03307          */
03308         node = cp_btree_read_node(bf, addr);
03309 bounce_counter++;
03310         grab_value(bf, node, i);
03311         offset--;
03312         for (j = 0; j < offset; j++)
03313             printf("\t");
03314         printf("[%lX\t%lX\t(%d)] %s => %s\n", addr, node->parent_address, i, 
03315                cp_string_tocstr(node->key[i]), 
03316                cp_string_tocstr(node->value[i]));
03317 #if 0
03318 if (bounce_counter > bf->items) 
03319 {
03320     printfdbg("more items than there should be\n");
03321     DIE();
03322 }
03323 #endif
03324     }
03325     
03326     if (node->child_address[i])
03327     {
03328         offset++;
03329         cp_btree_file_print_node(bf, node->child_address[i]);
03330         offset--;
03331     }
03332 //  node->nodrop = 0;
03333 }
03334 
03335 void cp_btree_file_print(cp_btree_file *bf)
03336 {
03337 bounce_counter = 0;
03338 offset = 0;
03339     printf("\n------------------------------------------\n");
03340     printf("cp_btree_file [%s] order %d: %d items\n", bf->filename, bf->order, bf->items);
03341     if (bf->root_addr)
03342         cp_btree_file_print_node(bf, bf->root_addr);
03343     printf("      ------- free block dump -------     \n");
03344     vdump_free_block(bf, NULL);
03345     printf("------------------------------------------\n");
03346 }
03347 
03348 int cp_btree_file_close(cp_btree_file *bf)
03349 {
03350     int rc = 0;
03351     if (bf->root) 
03352     {
03353         void *found = NULL;
03354         if (bf->node_cache) 
03355             found = cp_hashlist_remove(bf->node_cache, &bf->root->address);
03356         if (found == NULL)
03357             cp_btreenode_destroy(bf->root);
03358         bf->root = NULL;
03359     }
03360 
03361     if (bf->node_cache) 
03362     {
03363         cp_hashlist_destroy(bf->node_cache);
03364         bf->node_cache = NULL;
03365     }
03366 
03367     if (bf->free_block) 
03368     {
03369         cp_multimap_destroy(bf->free_block);
03370         bf->free_block = NULL;
03371     }
03372 
03373     if (bf->filename)
03374     {
03375         free(bf->filename);
03376         bf->filename = NULL;
03377     }
03378 
03379     if (bf->aux_lib) 
03380     {
03381         dlclose(bf->aux_lib);
03382         bf->aux_lib = NULL;
03383     }
03384 
03385     if (bf->fp)
03386     {
03387         rc = fclose(bf->fp);
03388         bf->fp = NULL;
03389     }
03390 
03391     return rc;
03392 }
03393 
03394 cp_btreenode *cp_btreenode_alloc(cp_btree_file *file, unsigned long parent)
03395 {
03396     int order = file->order;
03397     cp_btreenode *node = (cp_btreenode *) calloc(1, sizeof(cp_btreenode));
03398     if (node == NULL) goto ALLOC_ERROR;
03399     node->key = (void **) calloc(order - 1, sizeof(void *));
03400     if (node->key == NULL) goto ALLOC_ERROR;
03401     node->ser_key = (cp_string **) calloc(order - 1, sizeof(cp_string *));
03402     if (node->ser_key == NULL) goto ALLOC_ERROR;
03403     node->value = (void **) calloc(order - 1, sizeof(void *));
03404     if (node->value == NULL) goto ALLOC_ERROR;
03405     node->ser_value = (cp_string **) calloc(order - 1, sizeof(cp_string *));
03406     if (node->ser_value == NULL) goto ALLOC_ERROR;
03407     node->value_address = (block_desc *) calloc(order - 1, sizeof(block_desc));
03408     if (node->value_address == NULL) goto ALLOC_ERROR;
03409     node->dirty = (short *) calloc(order, sizeof(short));
03410     if (node->dirty == NULL) goto ALLOC_ERROR;
03411     node->child_address = (unsigned long *)calloc(order, sizeof(unsigned long));
03412     if (node->child_address == NULL) goto ALLOC_ERROR;
03413 
03414     node->parent_address = parent;
03415     node->owner = file;
03416 printf("[MEM] allocating node %p\n", node);
03417     return node;
03418 
03419 ALLOC_ERROR:
03420     if (node)
03421     {
03422         if (node->ser_key) free(node->ser_key);
03423         if (node->ser_value) free(node->ser_value);
03424         if (node->value_address) free(node->value_address);
03425         if (node->dirty) free(node->dirty);
03426         if (node->child_address) free(node->child_address);
03427         free(node);
03428     }
03429     return NULL;
03430 }
03431 
03432     
03433 cp_string *cp_serialize_cstr(void *cstr)
03434 {
03435     char *str = cstr;
03436     size_t len = strlen(str);
03437     cp_string *res = cp_string_create_empty(len + sizeof(size_t));
03438     size2mem(len, res->data);
03439     res->len = sizeof(size_t);
03440     cp_string_cstrcat(res, str);
03441     return res;
03442 }
03443 
03444 void *cp_deserialize_cstr(void *data, size_t *size)
03445 {
03446     char *buf = data;
03447     char *res = NULL;
03448     size_t len = mem2size(buf);
03449     res = (char *) malloc(len + 1);
03450     memcpy(res, &buf[sizeof(size_t)], len);
03451     res[len] = '\0';
03452     if (size) *size = len + sizeof(size_t);
03453     return res;
03454 }
03455 
03456 cp_string *cp_string_serialize(void *cpstr)
03457 {
03458     cp_string *str = (cp_string *) cpstr;
03459     cp_string *res = 
03460         cp_string_create_empty(cp_string_len(str) + sizeof(size_t));
03461     size2mem(cp_string_len(str), res->data);
03462     res->len = sizeof(size_t);
03463     cp_string_cat(res, str);
03464     return res;
03465 }
03466 
03467 void *cp_string_deserialize(void *data, size_t *size)
03468 {
03469     char *buf = data;
03470     cp_string *res = NULL;
03471     size_t len = mem2size(buf);
03472     res = cp_string_create(&buf[sizeof(size_t)], len);
03473 printf("[MEM] deserialize %p [%s]\n", res, cp_string_tocstr(res));
03474     if (size) *size = len + sizeof(size_t);
03475     return res;
03476 }
03477 
03478 /* locate_mapping - run a binary search to find where a new mapping should be 
03479  * put 
03480  */
03481 static cp_btreenode *
03482     btree_locate_mapping(cp_btree_file *tree, void *key, int *idx, int *exists)
03483 {
03484     cp_btreenode *curr = tree->root;
03485     int min, max;
03486     int pos = 0, cmp = 0;
03487 
03488     while (curr)
03489     {
03490         min = -1;
03491         max = curr->items;
03492         while (max > min + 1)
03493         {
03494             pos = (min + max) / 2;
03495             if ((cmp = (*tree->cmp)(key, curr->key[pos])) == 0)
03496             {
03497                 *idx = pos;
03498                 *exists = 1;
03499                 return curr;
03500             }
03501             if (cmp > 0)
03502                 min = pos;
03503             else
03504                 max = pos;
03505         }
03506         if (curr->child_address[max] == 0)
03507         {
03508             *idx = max;
03509             *exists = 0;
03510             break;
03511         }
03512         curr = cp_btree_read_node(tree, curr->child_address[max]);
03513     }
03514 
03515     return curr;
03516 }
03517 
03518 static void btree_set_root(cp_btree_file *tree, cp_btreenode *node)
03519 {
03520     unsigned long addr;
03521     /* put old root node on the cache */
03522     if (tree->root != NULL) 
03523         cp_hashlist_append (tree->node_cache, &tree->root->address, tree->root);
03524 
03525     addr = node ? node->address : 0;
03526     tree->root = node;
03527     tree->root_addr = addr;
03528     /* remove the current root with no COLLECTION_MODE_DEEP bit so that it 
03529      * doesn't mysteriously disappear later if it's removed from the cache
03530      */
03531     if (node)
03532         cp_hashlist_remove_by_option(tree->node_cache, &node->address, 
03533                                      COLLECTION_MODE_NOSYNC);
03534     fseek(tree->fp, 0, SEEK_SET);
03535     cp_fwrite_long(tree->fp, addr);
03536 }
03537 
03538 static void cp_btreenode_set_key(cp_btreenode *node, int index, void *key)
03539 {
03540     cp_btree_file *tree = node->owner;
03541 #ifdef DEBUG
03542     if (index < 0 || index >= tree->order - 1)
03543     {
03544         cp_error(CP_INVALID_VALUE, 
03545             "attempting to set key at index %d on tree of order %d", 
03546             index, node->owner->order);
03547         return;
03548     }
03549 #endif
03550 #if 0
03551     if (node->key[index] != NULL)
03552         if ((tree->mode & COLLECTION_MODE_DEEP))
03553 {
03554 printf("[%lX] dropping key %d: [%s]\n", node->address, index, cp_string_tocstr(node->key[index]));
03555             if (tree->key_dtr) (*tree->key_dtr)(node->key[index]);
03556 }
03557 #endif
03558     if ((tree->mode & COLLECTION_MODE_COPY) && tree->key_copy)
03559         node->key[index] = (*tree->key_copy)(key);
03560     else
03561         node->key[index] = key;
03562 
03563     node->dirty_flag |= BTREE_NODE_DIRTY_KEYS;
03564     node->dirty[index] |= BTREE_NODE_DIRTY_KEY;
03565 }
03566 
03567 static void 
03568     cp_btreenode_set_value_impl(cp_btreenode *node, 
03569                                 int index, 
03570                                 void *value, 
03571                                 int deep)
03572 {
03573     cp_btree_file *tree = node->owner;
03574 #ifdef DEBUG
03575     if (index < 0 || index >= tree->order - 1)
03576     {
03577         cp_error(CP_INVALID_VALUE, 
03578             "attempting to set key at index %d on tree of order %d", 
03579             index, node->owner->order);
03580         return;
03581     }
03582 #endif
03583 #if 1
03584     if ((node->value[index] != NULL) && (node->items > index))
03585     {
03586         if (value == node->value[index])
03587         {
03588             printf("setting value %p to itself - problem - exiting\n", value);
03589             DIE();
03590         }
03591         //~~ COLLECTION_MODE_MULTIPLE_VALUES ? 
03592 //~~ 018        if (tree->mode & COLLECTION_MODE_DEEP)
03593         if ((deep) && (tree->mode & COLLECTION_MODE_DEEP))
03594             if (tree->value_dtr) (*tree->value_dtr)(node->value[index]);
03595     }
03596 #endif
03597 
03598     if ((tree->mode & COLLECTION_MODE_COPY) && tree->value_copy)
03599         node->value[index] = (*tree->value_copy)(value);
03600     else
03601         node->value[index] = value;
03602 
03603     node->dirty[index] |= BTREE_NODE_DIRTY_VALUE;
03604 }
03605 
03606 static void cp_btreenode_set_value(cp_btreenode *node, int index, void *value)
03607 {
03608     cp_btreenode_set_value_impl(node, index, value, 0);
03609 }
03610 
03611 static void cp_btreenode_set_value_shallow(cp_btreenode *node, int index, void *value)
03612 {
03613     cp_btreenode_set_value_impl(node, index, value, 0);
03614 }
03615     
03616 static void 
03617     cp_btreenode_set_parent(cp_btreenode *node, unsigned long addr)
03618 {
03619     node->parent_address = addr;
03620     node->dirty_flag |= BTREE_NODE_DIRTY_HEADER | BTREE_NODE_DIRTY_PARENT;
03621 }
03622 
03623 static void 
03624     cp_btreenode_set_child(cp_btreenode *node, int index, unsigned long addr)
03625 {
03626 printfdbg("node %lX set child %d => %lX\n", node->address, index, addr);
03627     node->child_address[index] = addr;
03628     node->dirty[index] |= BTREE_NODE_DIRTY_CHILD;
03629 }
03630 
03631 static void cp_btreenode_set_items(cp_btreenode *node, int items)
03632 {
03633 int i;
03634 if (items < node->items)
03635 {
03636     for (i = items; i < node->items; i++)
03637     {
03638         if (node->ser_key[i] != NULL)
03639             *(char *) 0 = 0;
03640         if (node->ser_value[i] != NULL)
03641             *(char *) 0 = 0;
03642     }
03643 }
03644     node->items = items;
03645     node->dirty_flag |= BTREE_NODE_DIRTY_HEADER;
03646 }
03647 
03648 /* btreenode_shift
03649  *
03650  *  +---+---+---+---+---+           +---+---+---+---+---+
03651  *  | A | B | C | D | - |    =>     | A | - | B | C | D |
03652  *  +---+---+---+---+---+           +---+---+---+---+---+
03653  *  
03654  */
03655 static void btreenode_shift(cp_btreenode *node, int index)
03656 {
03657     int len = node->items - index;
03658 printfdbg("btreenode_shift %lX index %d\n", node->address, index);
03659     memmove(&node->key[index + 1], &node->key[index], len * sizeof(void *));
03660     if (node->ser_key[index]) //~~ 018 ???
03661     {
03662         cp_string_destroy(node->ser_key[index]);
03663         node->ser_key[index] = NULL;
03664     }
03665     memmove(&node->ser_key[index + 1], &node->ser_key[index], len * sizeof(cp_string *));
03666 //  node->ser_key[index] = NULL;
03667     memmove(&node->value[index + 1], &node->value[index], len * sizeof(void *));
03668     if (node->ser_value[index]) //~~ 018 ???
03669     {
03670         cp_string_destroy(node->ser_value[index]);
03671         node->ser_value[index] = NULL;
03672     }
03673     memmove(&node->ser_value[index + 1], &node->ser_value[index], len * sizeof(cp_string *));
03674 //  node->ser_value[index] = NULL;
03675     memmove(&node->value_address[index + 1], &node->value_address[index], len * sizeof(block_desc));
03676     memmove(&node->child_address[index + 1], &node->child_address[index + 0], 
03677             (len + 1) * sizeof(unsigned long));
03678     memmove(&node->dirty[index + 1], &node->dirty[index], len * sizeof(short));
03679 #if 0
03680     //~~ 018
03681     node->key[index] = NULL;
03682     node->value[index] = NULL;
03683     memset(&node->value_address[index], 0, sizeof(block_desc));
03684     //~~ 018
03685 #endif
03686 
03687     for (index++ ; index <= node->items; index++)
03688     {
03689         if (node->value[index] == NULL)
03690             node->dirty[index] |= BTREE_NODE_USE_VALUE_ADDR;
03691         node->dirty[index] |= BTREE_NODE_DIRTY_KEY | BTREE_NODE_DIRTY_VALUE |
03692                               BTREE_NODE_DIRTY_CHILD;
03693     }
03694     node->dirty[index] |= BTREE_NODE_DIRTY_CHILD;
03695     node->dirty_flag |= BTREE_NODE_DIRTY_HEADER | BTREE_NODE_DIRTY_KEYS;
03696 }
03697 
03698 /* btreenode_unshift
03699  *
03700  *  +---+---+---+---+---+           +---+---+---+---+---+
03701  *  | A | B | C | D | E |    =>     | A | C | D | E | - |
03702  *  +---+---+---+---+---+           +---+---+---+---+---+
03703  *  
03704  */
03705 static void btreenode_unshift(cp_btreenode *node, int index)
03706 {
03707     int len = node->items - index - 1;
03708     int i;
03709     if (len == 0) return;
03710 printfdbg("btreenode_unshift %lX index %d\n", node->address, index);
03711     memmove(&node->key[index], &node->key[index + 1], len * sizeof(void *));
03712     if (node->ser_key[index]) //~~ 018 ???
03713     {
03714         cp_string_destroy(node->ser_key[index]);
03715         node->ser_key[index] = NULL;
03716     }
03717     memmove(&node->ser_key[index], &node->ser_key[index + 1], len * sizeof(cp_string *));
03718     node->ser_key[node->items - 1] = NULL;
03719     memmove(&node->value[index], &node->value[index + 1], len * sizeof(void *));
03720     if (node->ser_value[index]) //~~ 018 ???
03721     {
03722         cp_string_destroy(node->ser_value[index]);
03723         node->ser_value[index] = NULL;
03724     }
03725     memmove(&node->ser_value[index], &node->ser_value[index + 1], len * sizeof(cp_string *));
03726     memmove(&node->value_address[index], &node->value_address[index + 1], len * sizeof(block_desc));
03727     memmove(&node->child_address[index + 1], &node->child_address[index + 2], 
03728             len * sizeof(unsigned long));
03729     memmove(&node->dirty[index], &node->dirty[index + 1], len * sizeof(short));
03730     node->ser_value[node->items - 1] = NULL;
03731 #if 0
03732     //~~ 018
03733     node->key[node->items - 1] = NULL;
03734     node->value[node->items - 1] = NULL;
03735     memset(&node->value_address[node->items - 1], 0, sizeof(block_desc));
03736     //~~ 018
03737 #endif
03738     for (i = index; i < node->items - 1; i++)
03739     {
03740         if (node->value[i] == NULL)
03741             node->dirty[i] |= BTREE_NODE_USE_VALUE_ADDR;
03742         node->dirty[i] |= BTREE_NODE_DIRTY_KEY | 
03743                           BTREE_NODE_DIRTY_VALUE;
03744         node->dirty[i + 1] |= BTREE_NODE_DIRTY_CHILD;
03745     }
03746     node->dirty_flag |= BTREE_NODE_DIRTY_HEADER | BTREE_NODE_DIRTY_KEYS;
03747 }
03748 
03749 static void btree_split(cp_btree_file *tree, 
03750                         unsigned long parent_address,
03751                         cp_btreenode *node, 
03752                         void *key, 
03753                         void *value, 
03754                         block_desc *desc);
03755 
03756 /* On Splitting
03757  * ------------
03758  *
03759  *  if O is the order of the tree (number of children per node), i is the 
03760  *  index where the insertion would occur if the node were not full, find m, 
03761  *  the key to be moved up to the parent node. 
03762  *  
03763  *  let k = (O - 1) / 2 
03764  *
03765  * 
03766  *   O = 2, k = 0             O = 3, k = 1             O = 4, k = 1 
03767  *      +---+                  +---+---+              +---+---+---+ 
03768  *      | 0 |                  | 0 | 1 |              | 0 | 1 | 2 | 
03769  *      +---+                  +---+---+              +---+---+---+ 
03770  *  
03771  *  i = 0   m = 0, i         i = 0   m = 0           i = 0   m = 0, 1  
03772  *  i = 1   m = i, 0         i = 1   m = i           i = 1   m = i, 1 
03773  *                           i = 2   m = 1           i = 2   m = 1, i 
03774  *                                                   i = 3   m = 1, 2 
03775  * 
03776  * 
03777  *       O = 5, k = 2               O = 6, k = 2 
03778  *    +---+---+---+---+         +---+---+---+---+---+ 
03779  *    | 0 | 1 | 2 | 3 |         | 0 | 1 | 2 | 3 | 4 | 
03780  *    +---+---+---+---+         +---+---+---+---+---+ 
03781  * 
03782  *     i = 0   m = 1              i = 0   m = 1, 2 
03783  *     i = 1   m = 1              i = 1   m = 1, 2 
03784  *     i = 2   m = i              i = 2   m = i, 2 
03785  *     i = 3   m = 2              i = 3   m = 2, i 
03786  *     i = 4   m = 2              i = 4   m = 2, 3 
03787  *                                i = 5   m = 2, 3 
03788  *
03789  *
03790  *  It is immediately apparent that for all cases, we may choose
03791  *
03792  *  m = k - 1    i < k     (a)
03793  *      i        i = k     (b)
03794  *      k        i > k     (c)
03795  *
03796  * -------------------------------------------------------------------------
03797  * btree_inner_split copies about half of the existing node's content into a 
03798  * newly created node. 
03799  * 
03800  * btree_split runs after a node has been split. One mapping moves up to the 
03801  * parent node, and a link to the newly created node is added.
03802  */
03803 static void btree_inner_split(cp_btree_file *tree, 
03804                               cp_btreenode *node, 
03805                               int index, 
03806                               void *key, 
03807                               void *value, 
03808                               block_desc *desc,
03809                               unsigned long child)
03810 {
03811     void *swapkey, *swapval;
03812     block_desc swap_tmpl;
03813     block_desc *swapdesc = NULL;
03814     cp_string *swapkey_ser = NULL, *swapval_ser = NULL;
03815     int m; /* the index moving up the tree */
03816     int k = (tree->order - 1) / 2; /* minimum fill factor */
03817     int j; 
03818 unsigned long sibladdr; //~~
03819 printfdbg("S(1) k = %d\n", k);
03820 printfdbg("S(1) parent_address = %lX\n", node->parent_address);
03821 #if 0
03822 if (value == NULL)
03823 {
03824     printfdbg("btree_inner_split: null value\n");
03825     DIE();
03826 }
03827 #endif
03828     cp_btreenode *sibl = cp_btreenode_alloc(tree, node->parent_address);
03829     if (index < k) /* case (a) */
03830     {
03831 printfdbg("S(2)\n");
03832 if (index < k - 1 && value == NULL)
03833 {
03834     printfdbg("null value problem in inner split\n");
03835 //~~    DIE();
03836 }
03837         m = k - 1;
03838         swapkey = node->key[m];
03839         swapval = node->value[m];
03840         swapkey_ser = node->ser_key[m]; node->ser_key[m] = NULL;
03841         swapval_ser = node->ser_value[m]; node->ser_value[m] = NULL;
03842 if (swapkey_ser) cp_string_destroy(swapkey_ser);
03843 if (swapval_ser) cp_string_destroy(swapval_ser);
03844 
03845         swap_tmpl = node->value_address[m];
03846         swapdesc = &swap_tmpl;
03847 
03848         sibl->items = tree->order - k - 1;
03849         memcpy(&sibl->key[0], &node->key[k], 
03850                sibl->items * sizeof(void *));
03851         memcpy(&sibl->ser_key[0], &node->ser_key[k],
03852                sibl->items * sizeof(cp_string *));
03853         memcpy(&sibl->value[0], &node->value[k], 
03854                sibl->items * sizeof(void *));
03855         memcpy(&sibl->value_address[0], &node->value_address[k],
03856                sibl->items * sizeof(block_desc));
03857         memcpy(&sibl->ser_value[0], &node->ser_value[k],
03858                sibl->items * sizeof(cp_string *));
03859         memset(&node->ser_key[k], 0, sibl->items * sizeof(cp_string *));
03860         memset(&node->ser_value[k], 0, sibl->items * sizeof(cp_string *));
03861 //~~        memset(&node->value_address[k], 0, sibl->items * sizeof(block_desc));
03862 
03863         for (j = 0; j < sibl->items; j++)
03864             sibl->dirty[j] |= BTREE_NODE_USE_VALUE_ADDR;
03865         memcpy(&sibl->child_address[0], &node->child_address[k], 
03866                (sibl->items + 1) * sizeof(unsigned long));
03867         cp_btreenode_set_items(node, node->items - sibl->items);
03868 //      node->items -= sibl->items;
03869         if (index < k - 1)
03870             btreenode_shift(node, index);
03871             
03872         cp_btreenode_set_key(node, index, key);
03873         cp_btreenode_set_value(node, index, value);
03874         if (desc != NULL)
03875         {
03876             memcpy(&node->value_address[index], desc, sizeof(block_desc));
03877             node->dirty[index] |= BTREE_NODE_USE_VALUE_ADDR;
03878         }
03879         else
03880             memset(&node->value_address[index], 0, sizeof(block_desc));
03881         cp_btreenode_set_child(node, index + 1, child);
03882         if (value) node->dirty[index] &= ~BTREE_NODE_USE_VALUE_ADDR;
03883     }
03884     else if (index == k) /* case (b) */
03885     {
03886 printfdbg("S(3)\n");
03887         m = index;
03888         swapkey = key;
03889         swapval = value;
03890         swapdesc = desc;
03891 
03892         sibl->items = tree->order - k - 1;
03893         memcpy(&sibl->key[0], &node->key[k], 
03894                sibl->items * sizeof(void *));
03895         memcpy(&sibl->ser_key[0], &node->ser_key[k], 
03896                sibl->items * sizeof(cp_string *));
03897         memcpy(&sibl->value[0], &node->value[k], 
03898                sibl->items * sizeof(void *));
03899         memcpy(&sibl->ser_value[0], &node->ser_value[k], 
03900                sibl->items * sizeof(cp_string *));
03901         memcpy(&sibl->value_address[0], &node->value_address[k],
03902                sibl->items * sizeof(block_desc));
03903         memset(&node->ser_key[k], 0, sibl->items * sizeof(cp_string *));
03904         memset(&node->ser_value[k], 0, sibl->items * sizeof(cp_string *));
03905         for (j = 0; j < sibl->items; j++)
03906             sibl->dirty[j] |= BTREE_NODE_USE_VALUE_ADDR;
03907         memcpy(&sibl->child_address[1], &node->child_address[k + 1], 
03908                sibl->items * sizeof(unsigned long));
03909         cp_btreenode_set_child(sibl, 0, child);
03910         cp_btreenode_set_items(node, node->items - sibl->items);
03911 //      node->items -= sibl->items;
03912     }
03913     else /* index > k  --  case (c) */
03914     {
03915         int siblindex = index - (k + 1);
03916         m = k;
03917         swapkey = node->key[m];
03918         swapval = node->value[m];
03919         swapkey_ser = node->ser_key[m]; node->ser_key[m] = NULL;
03920         swapval_ser = node->ser_value[m]; node->ser_value[m] = NULL;
03921 if (swapkey_ser) cp_string_destroy(swapkey_ser);
03922 if (swapval_ser) cp_string_destroy(swapval_ser);
03923 // 019
03924         memcpy(&swap_tmpl, &node->value_address[m], sizeof(block_desc));
03925         swapdesc = &swap_tmpl;
03926 printfdbg("swapdesc: %lx, %lx\n", swap_tmpl.offset, swap_tmpl.size);
03927 //      swapdesc = &node->value_address[m];
03928 printfdbg("S(4) m = %d\n", m);
03929 
03930         sibl->items = tree->order - k - 2;
03931 printfdbg("S(4) sibl->items = %d\n", sibl->items);
03932 
03933         memcpy(&sibl->key[0], &node->key[k + 1], 
03934                sibl->items * sizeof(void *));
03935         memcpy(&sibl->ser_key[0], &node->ser_key[k + 1], 
03936                sibl->items * sizeof(cp_string *));
03937         memcpy(&sibl->value[0], &node->value[k + 1],
03938                sibl->items * sizeof(void *));
03939         memcpy(&sibl->ser_value[0], &node->ser_value[k + 1],
03940                sibl->items * sizeof(cp_string *));
03941         memcpy(&sibl->value_address[0], &node->value_address[k + 1],
03942                sibl->items * sizeof(block_desc));
03943         memset(&node->ser_key[k + 1], 0, sibl->items * sizeof(cp_string *));
03944         memset(&node->ser_value[k + 1], 0, sibl->items * sizeof(cp_string *));
03945         for (j = 0; j < sibl->items; j++)
03946             sibl->dirty[j] |= BTREE_NODE_USE_VALUE_ADDR;
03947 
03948         memcpy(&sibl->child_address[0], &node->child_address[k + 1], 
03949                (sibl->items + 1) * sizeof(unsigned long));
03950 printfdbg("shift sibl (%d): index %d\n", sibl->address, siblindex); 
03951         if (siblindex < sibl->items)
03952             btreenode_shift(sibl, siblindex);
03953 
03954         cp_btreenode_set_key(sibl, siblindex, key);
03955         cp_btreenode_set_value(sibl, siblindex, value);
03956         if (desc != NULL)
03957         {
03958             memcpy(&sibl->value_address[siblindex], desc, sizeof(block_desc));
03959             sibl->dirty[siblindex] |= BTREE_NODE_USE_VALUE_ADDR;
03960         }
03961         else if (value) 
03962             sibl->dirty[siblindex] &= ~BTREE_NODE_USE_VALUE_ADDR;
03963         // 019
03964         cp_btreenode_set_child(sibl, siblindex + 1, child);
03965         cp_btreenode_set_items(sibl, sibl->items + 1);
03966 //      sibl->items++;
03967         cp_btreenode_set_items(node, node->items - sibl->items);
03968 //      node->items -= sibl->items;
03969     }
03970     cp_btree_write_node(tree, node);
03971 printfdbg("before write: sibl->address: %lX\n", sibl->address);
03972 printfdbg("sibl->parent: %lX\n", sibl->parent_address);
03973     cp_btree_write_node(tree, sibl);
03974     sibl->nodrop = 1;
03975     node->nodrop = 1;
03976 printfdbg("after write: sibl->address: %lX\n", sibl->address);
03977 sibladdr = sibl->address;
03978     for (j = 0; j <= sibl->items; j++) /* reassign parent */
03979         if (sibl->child_address[j]) 
03980         {
03981             cp_btreenode *cj = cp_btree_read_node(tree, sibl->child_address[j]);
03982             cp_btreenode_set_parent(cj, sibladdr);
03983 //          cp_btreenode_set_parent(cj, sibl->address);
03984 printfdbg("S(4.5): updating parent on node at %lX\n", cj->address);
03985             cp_btree_write_node(tree, cj);
03986         }
03987 printfdbg("S(5): BEFORE SPLIT P/2\n");
03988 //  cp_btree_file_dump(tree);
03989     btree_split(tree, node->parent_address, sibl, swapkey, swapval, swapdesc);
03990     sibl->nodrop = 0;
03991     node->nodrop = 0;
03992 #if 0
03993     if (node->parent_address == 0) 
03994     {
03995         cp_btreenode_set_parent(node, sibl->parent_address);
03996 printfdbg("node->parent: %lX\n", node->parent_address);
03997         cp_btree_write_node(tree, node);
03998     }
03999 #endif
04000 printfdbg("S(6): AFTER SPLIT P/2\n");
04001 //  cp_btree_file_dump(tree);
04002 }
04003 
04004 /* btree_split - complete the split operation by adding the given mapping to
04005  * the parent node
04006  */
04007 static void btree_split(cp_btree_file *tree, 
04008                         unsigned long parent_address,
04009                         cp_btreenode *node, 
04010                         void *key, 
04011                         void *value, 
04012                         block_desc *desc)
04013 {
04014     cp_btreenode *parent;
04015     int index = 0;
04016 #if 0
04017 if (value == NULL)
04018 {
04019     printfdbg("btree_split: null value\n");
04020     DIE();
04021 }
04022 #endif
04023 
04024 printfdbg("BS(1) node %lX\n", node->address);
04025     /* (1) split at root */
04026     if (parent_address == 0) // || parent_address == tree->root_addr)
04027     {
04028 printfdbg("BS(2)\n");
04029         parent = cp_btreenode_alloc(tree, 0);
04030         if (parent == NULL) return; //~~ emit error
04031         cp_btreenode_set_child(parent, 0, tree->root_addr);
04032         cp_btreenode_set_child(parent, 1, node->address);
04033         cp_btreenode_set_key(parent, 0, key);
04034         cp_btreenode_set_value(parent, 0, value);
04035         if (desc)
04036         {
04037             parent->value_address[0] = *desc;
04038             parent->dirty[0] |= BTREE_NODE_USE_VALUE_ADDR;
04039         }
04040         parent->items = 1;
04041 printfdbg("BS(2.1) --- write new root node ---\n");
04042         cp_btree_write_node(tree, parent);
04043 printfdbg("BS(2.1.2) new root node got address %lX\n", parent->address);
04044         cp_btreenode_set_parent(tree->root, parent->address); 
04045 // printfdbg("BS(2.1) --- write old root node ---\n");
04046         cp_btree_write_node(tree, tree->root);
04047         btree_set_root(tree, parent);
04048         cp_btreenode_set_parent(node, parent->address);
04049 printfdbg("BS(2.1) --- write new node ---\n");
04050         cp_btree_write_node(tree, node);
04051         return;
04052     }
04053 
04054 printfdbg("BS(3)\n");
04055     parent = cp_btree_read_node(tree, parent_address);
04056 
04057     /* (2) find where new key fits in */
04058     while (index < parent->items && 
04059            (*tree->cmp)(key, parent->key[index]) > 0) index++;
04060 
04061 #if 0
04062     //~~ 019
04063     if (value == NULL && desc != NULL)
04064     {
04065         parent->value_address[index] = *desc;
04066         parent->dirty[index] |= BTREE_NODE_USE_VALUE_ADDR;
04067     }
04068 #endif
04069 
04070 printfdbg("BS(4)\n");
04071     if (parent->items == tree->order - 1) /* (3) split again */
04072     {
04073         btree_inner_split(tree, parent, index, key, value, desc, node->address);
04074         return;
04075     }
04076 
04077 printfdbg("BS(5)\n");
04078     /* (4) insert new node */
04079     if (index < parent->items)
04080         btreenode_shift(parent, index);
04081     
04082     cp_btreenode_set_key(parent, index, key);
04083     //~~ 018 what's this?! same issue with bullshit value
04084     parent->value[index] = NULL;
04085     cp_btreenode_set_value(parent, index, value);
04086     cp_btreenode_set_child(parent, index + 1, node->address);
04087     cp_btreenode_set_items(parent, parent->items + 1);
04088     if (desc)
04089     {
04090         parent->value_address[index] = *desc;
04091         parent->dirty[index] |= BTREE_NODE_USE_VALUE_ADDR;
04092     }
04093     else
04094     {
04095         parent->dirty[index] &= ~BTREE_NODE_USE_VALUE_ADDR;
04096         memset(&parent->value_address[index], 0, sizeof(block_desc));
04097     }
04098 printfdbg("BS(6)\n");
04099     cp_btree_write_node(tree, parent);
04100     cp_btreenode_set_parent(node, parent->address);
04101 printfdbg("BS(7)\n");
04102     cp_btree_write_node(tree, node);
04103 }
04104 
04105 static void btreenode_unify(cp_btree_file *tree, cp_btreenode *node);
04106 
04107 /* On Unification
04108  *
04109  * btreenode_unify - borrow left
04110  *
04111  *             +---+---+---+---+                      +---+---+---+---+
04112  *             | A | Q | T |   |                      | A | M | T |   |
04113  *             +---+---+---+---+                      +---+---+---+---+
04114  *                /     \               =>               /     \
04115  * +---+---+---+---+  +---+---+---+---+   +---+---+---+---+  +---+---+---+---+
04116  * | E | J | M | - |  | R |   |   |   |   | E | J |   |   |  | Q | R |   |   |
04117  * +---+---+---+---+  +---+---+---+---+   +---+---+---+---+  +---+---+---+---+
04118  *
04119  * btreenode_unify - borrow right
04120  *
04121  *             +---+---+---+---+                      +---+---+---+---+
04122  *             | A | J | T |   |                      | A | M | T |   |
04123  *             +---+---+---+---+                      +---+---+---+---+
04124  *                /     \               =>               /     \
04125  * +---+---+---+---+  +---+---+---+---+   +---+---+---+---+  +---+---+---+---+
04126  * | E | - | - | - |  | M | Q | R |   |   | E | J |   |   |  | Q | R |   |   |
04127  * +---+---+---+---+  +---+---+---+---+   +---+---+---+---+  +---+---+---+---+
04128  *
04129  * 
04130  * left_unify 
04131  * 
04132  *               +---+---+---+---+                   +---+---+---+---+
04133  *               | A | J | T |   |                   | A | T |   |   |
04134  *               +---+---+---+---+                   +---+---+---+---+
04135  *                  /     \                   =>        /
04136  *   +---+---+---+---+   +---+---+---+---+            +---+---+---+---+
04137  *   | E | - | - | - |   | M | Q |   |   |            | E | J | M | Q |
04138  *   +---+---+---+---+   +---+---+---+---+            +---+---+---+---+
04139  *
04140  *   if the parent node goes below k = (order - 1) / 2 items, btreenode_unify
04141  *   is called on the parent.
04142  */
04143 static void left_unify(cp_btree_file *tree, 
04144                        cp_btreenode *surr, /* parent */
04145                        cp_btreenode *node, /* left node */
04146                        cp_btreenode *sibl, /* right node */
04147                        int idx)
04148 {
04149     int i;
04150     int lim;
04151 //  cp_btreenode *surr; //, *sibl; 
04152     unsigned long addr = node->address;
04153 //  if (node == NULL) node = cp_btree_read_node(tree, addr);
04154 //  surr = cp_btree_read_node(tree, node->parent_address);
04155 //  sibl = cp_btree_read_node(tree, surr->child_address[idx + 1]);
04156 printfdbg("left_unify %lX index %d node %lX %d items\n", addr, idx, node->address, node->items);
04157 printfdbg("LU(0) node: %lX %d items\n"
04158        "      surr: %lX %d items\n"
04159        "      sibl: %lX %d items\n", node->address, node->items, 
04160        surr->address, surr->items, sibl->address, sibl->items);
04161 printfdbg("left unify: move entry [%s => %s] from parent (%lX) to node (%lX)\n", 
04162     cp_string_tocstr(surr->key[idx]), surr->value[idx] ? cp_string_tocstr(surr->value[idx]) : "N/A", surr->address, node->address);
04163 printfdbg("left   (node):\n"); dump_node_dbg(node);
04164 printfdbg("right  (sibl):\n"); dump_node_dbg(sibl);
04165 printfdbg("parent (surr):\n"); dump_node_dbg(surr);
04166 //~~ 018 what's this?! tricky - set_value calls dtr, defunct value must be null!
04167     cp_btreenode_set_key(node, node->items, surr->key[idx]);
04168     node->ser_key[node->items] = surr->ser_key[idx]; surr->ser_key[idx] = NULL;
04169     node->value[node->items] = NULL;
04170     cp_btreenode_set_value(node, node->items, surr->value[idx]);
04171     node->value_address[node->items] = surr->value_address[idx];
04172     node->ser_value[node->items] = surr->ser_value[idx]; surr->ser_value[idx] = NULL;
04173     node->dirty[idx] |= BTREE_NODE_USE_VALUE_ADDR;
04174 //  node->key[node->items] = surr->key[idx];
04175 //  node->value[node->items] = surr->value[idx];
04176     if (idx + 2 == tree->order)
04177         cp_btreenode_set_child(surr, idx + 1, 0);
04178     else
04179         cp_btreenode_set_child(surr, idx + 1, surr->child_address[idx + 2]);    
04180 //  surr->child[idx + 1] = surr->child[idx + 2]; 
04181     btreenode_unshift(surr, idx);
04182     cp_btreenode_set_items(surr, surr->items - 1);
04183 printfdbg("left_unify: write parent (%lX)\n", surr->address);
04184 #if 0
04185     if (surr->items == 0)
04186     {
04187         add_free_block(tree, surr->address, HEADER_SIZE(tree)); 
04188         add_free_block(tree, surr->key_address.offset, surr->key_address.size);
04189     }
04190     else 
04191 #endif
04192     if (surr->items) 
04193         cp_btree_write_node(tree, surr); //~~ ?
04194     
04195 //  surr->items--;
04196     cp_btreenode_set_items(node, node->items + 1);  
04197 //  node->items++;
04198 printfdbg("left_unify: moving %d items from %lX to %lX (%d items => %d)\n", sibl->items, sibl->address, node->address, node->items, node->items + sibl->items);
04199 if (node->items + sibl->items < 0 || 
04200     node->items + sibl->items > tree->order - 1)
04201 {
04202     printfdbg("BAD ITEMS COUNTS\n");
04203     DIE();
04204 }
04205 //if (sibl->items <= 0)
04206 if (sibl->items < 0)
04207 {
04208     printfdbg("SIBL HAS NO ITEMS (%d)\n", sibl->items);
04209     DIE();
04210 }
04211 //  if (sibl->items)
04212     {
04213     memcpy(&node->key[node->items], &sibl->key[0], 
04214            sibl->items * sizeof(void *));
04215     memcpy(&node->ser_key[node->items], &sibl->ser_key[0], 
04216            sibl->items * sizeof(cp_string *));
04217     memset(&sibl->ser_key[0], 0, sibl->items * sizeof(cp_string *));
04218     memcpy(&node->value[node->items], &sibl->value[0], 
04219            sibl->items * sizeof(void *));
04220     memcpy(&node->ser_value[node->items], &sibl->ser_value[0], 
04221            sibl->items * sizeof(cp_string *));
04222     memset(&sibl->ser_value[0], 0, sibl->items * sizeof(cp_string *));
04223     memcpy(&node->value_address[node->items], &sibl->value_address[0], 
04224            sibl->items * sizeof(block_desc));
04225     }
04226     for (i = 0; i <= sibl->items; i++)
04227     {
04228         node->dirty[node->items + i] |= BTREE_NODE_DIRTY_KEY;
04229         cp_btreenode_set_child(node, node->items + i, sibl->child_address[i]);
04230     }
04231 //  memcpy(&node->child_address[node->items], &sibl->child_address[0], 
04232 //         (sibl->items + 1) * sizeof(cp_btreenode *));
04233            
04234 if (node->items + sibl->items  >= tree->order)
04235 {
04236     printf("too many items to unify. ciao\n");
04237     DIE();
04238 }
04239 
04240     for (i = 0; i < sibl->items; i++) 
04241         node->dirty[node->items - 1 + i] |= BTREE_NODE_USE_VALUE_ADDR;
04242 
04243     cp_btreenode_set_items(node, node->items + sibl->items);
04244 
04245 printfdbg("left_unify: write node (%lX)\n", node->address);
04246     cp_btree_write_node(tree, node);
04247 //  node->items += sibl->items;
04248 
04249 #if 0
04250     for (i = 0; i < sibl->items; i++) 
04251         node->dirty[node->items - 1 + i] |= BTREE_NODE_USE_VALUE_ADDR;
04252 #endif
04253 
04254     for (i = 0; i <= sibl->items; i++) 
04255     {
04256         if (sibl->child_address[i]) 
04257         {
04258             cp_btreenode *child = 
04259                 cp_btree_read_node(tree, sibl->child_address[i]);
04260             cp_btreenode_set_parent(child, node->address);
04261             cp_btree_write_node(tree, child);
04262 //          sibl->child[i]->parent = node;
04263         }
04264 //  }
04265     }
04266 
04267     add_free_block(tree, sibl->key_address.offset, sibl->key_address.size);
04268     add_free_block(tree, sibl->address, HEADER_SIZE(tree));
04269     cp_btreenode_set_items(sibl, 0); //~~ 018
04270     btree_destroy_node(tree, sibl);
04271 //~~ add free block
04272     btreenode_unify(tree, surr);
04273 }
04274 
04275 short is_root(cp_btreenode *node)
04276 {
04277     return node == node->owner->root || node->address == 0 || node->address == node->owner->root_addr;
04278 }
04279 
04280 /* node may have less than k items - if so, unify with other nodes */
04281 static void btreenode_unify(cp_btree_file *tree, cp_btreenode *node)
04282 {
04283     cp_btreenode *surr, *sibl;
04284     int i, k;
04285     int idx = 0; /* prevents gcc warning */
04286     int found;
04287 printfdbg("btreenode_unify node %p (%lX)\n", node, node->address);
04288     /* handle root node */
04289     if (is_root(node))
04290     {
04291 printfdbg("btreenode_unify node %p (%lX) is current root\n", node, node->address);
04292         if (node->items > 0) 
04293         {
04294             cp_btree_write_node(tree, node);
04295             return;
04296         }
04297         if (node->child_address[0])
04298         {
04299             surr = cp_btree_read_node(tree, node->child_address[0]);
04300             cp_btreenode_set_parent(surr, 0);
04301 printfdbg("btreenode_unify (0) write child %lX\n", surr->address);
04302             cp_btree_write_node(tree, surr);
04303             btree_set_root(tree, surr);
04304 //          cp_btreenode_destroy(node); //~~ ?
04305             //~~ add free block
04306         }
04307         else
04308             btree_set_root(tree, NULL);
04309 
04310         add_free_block(tree, node->key_address.offset, node->key_address.size);
04311         add_free_block(tree, node->address, HEADER_SIZE(tree));
04312         btree_destroy_node(tree, node);
04313         return;
04314     }
04315 
04316     k = (tree->order - 1) / 2;
04317 printfdbg("BU(1) k == %d\n", k);
04318     if (node->items >= k) /* btree_file condition met - no problem */
04319     {
04320         cp_btree_write_node(tree, node);
04321         return; 
04322     }
04323 printfdbg("BU(2) node has %d items\n", node->items);
04324 
04325     /* try to borrow a mapping from siblings */
04326     surr = cp_btree_read_node(tree, node->parent_address);
04327 printfdbg("BU(2.1) parent: %lX %d items\n", surr->address, surr->items);
04328     for (i = 0; i <= surr->items; i++)
04329         if (surr->child_address[i] == node->address)
04330         {
04331             idx = i;
04332             break;
04333         }
04334     /* idx now holds the current node's index in the parent's child[] array */
04335 printfdbg("BU(3) idx == %d\n", idx);
04336     
04337     found = 0;
04338     /* try borrow from left sibling */
04339     if (idx > 0 && surr->child_address[idx - 1]) 
04340     {
04341         sibl = cp_btree_read_node(tree, surr->child_address[idx - 1]);
04342 printfdbg("BU(4) sibl (%d items) at %lX\n", sibl->items, sibl->address);
04343         if (sibl->items > k)
04344         {
04345 printfdbg("BU(5) borrowing from sibl at %lX\n", sibl->address);
04346 printfdbg("SIBL:\n"); dump_node_dbg(sibl);
04347 printfdbg("SURR:\n"); dump_node_dbg(surr);
04348 printfdbg("NODE:\n"); dump_node_dbg(node);
04349             found = 1;
04350             btreenode_shift(node, 0);
04351             cp_btreenode_set_key(node, 0, surr->key[idx - 1]);
04352             node->ser_key[0] = surr->ser_key[idx - 1]; surr->ser_key[idx - 1] = NULL;
04353             cp_btreenode_set_value(node, 0, surr->value[idx - 1]);
04354             node->ser_value[0] = surr->ser_value[idx - 1]; surr->ser_value[idx - 1] = NULL;
04355             node->value_address[0] = surr->value_address[idx - 1];
04356 //          node->key[0] = surr->key[idx - 1];
04357 //          node->value[0] = surr->value[idx - 1];
04358             cp_btreenode_set_key(surr, idx - 1, sibl->key[sibl->items - 1]);
04359             surr->ser_key[idx - 1] = sibl->ser_key[sibl->items - 1]; sibl->ser_key[sibl->items - 1] = NULL;
04360             cp_btreenode_set_value_shallow(surr, idx - 1, sibl->value[sibl->items - 1]);
04361             surr->ser_value[idx - 1] = sibl->ser_value[sibl->items - 1]; sibl->ser_value[sibl->items - 1] = NULL;
04362             memcpy(&surr->value_address[idx - 1], &sibl->value_address[sibl->items - 1], sizeof(block_desc));
04363             surr->dirty[idx - 1] |= BTREE_NODE_USE_VALUE_ADDR;
04364 
04365             cp_btree_write_node(tree, surr);
04366 //          surr->key[idx - 1] = sibl->key[sibl->items - 1];
04367 //          surr->value[idx - 1] = sibl->value[sibl->items - 1];
04368             cp_btreenode_set_items(node, node->items + 1);          
04369 //          node->items++;
04370             cp_btreenode_set_child(node, 0, sibl->child_address[sibl->items]);
04371 //          node->child[0] = sibl->child[sibl->items];
04372             if (node->child_address[0])
04373             {
04374                 cp_btreenode *child = 
04375                     cp_btree_read_node(tree, node->child_address[0]);
04376                 cp_btreenode_set_parent(child, node->address);
04377 printfdbg("btreenode_unify (1) write child %lX\n", child->address);
04378                 cp_btree_write_node(tree, child);
04379 //              node->child[0]->parent = node; 
04380             }
04381 printfdbg("btreenode_unify (2) write node %lX\n", node->address);
04382             cp_btree_write_node(tree, node);
04383             cp_btreenode_set_items(sibl, sibl->items - 1);
04384 printfdbg("btreenode_unify (3) write sibl %lX\n", sibl->address);
04385             cp_btree_write_node(tree, sibl);
04386 //          sibl->items--;
04387             
04388         }
04389     }
04390 
04391     /* try borrow from right sibling */
04392     if (!found && (idx < surr->items && surr->child_address[idx + 1]))
04393     {
04394         sibl = cp_btree_read_node(tree, surr->child_address[idx + 1]);
04395 printfdbg("BU(6) sibl (%d items) at %lX\n", sibl->items, sibl->address);
04396         if (sibl->items > k)
04397         {
04398 printfdbg("BU(7) borrowing from sibl at %lX\n", sibl->address);
04399 printfdbg("SIBL:\n"); dump_node_dbg(sibl);
04400 printfdbg("SURR:\n"); dump_node_dbg(surr);
04401 printfdbg("NODE:\n"); dump_node_dbg(node);
04402             found = 1;
04403             cp_btreenode_set_key(node, node->items, surr->key[idx]);
04404             node->ser_key[node->items] = surr->ser_key[idx]; surr->ser_key[idx] = NULL;
04405             cp_btreenode_set_value(node, node->items, surr->value[idx]);
04406             node->ser_value[node->items] = surr->ser_value[idx]; surr->ser_value[idx] = NULL;
04407             node->value_address[node->items] = surr->value_address[idx];
04408 //          node->key[node->items] = surr->key[idx];
04409 //          node->value[node->items] = surr->value[idx];
04410             cp_btreenode_set_key(surr, idx, sibl->key[0]);
04411             surr->ser_key[idx] = sibl->ser_key[0]; sibl->ser_key[0] = NULL;
04412             cp_btreenode_set_value(surr, idx, sibl->value[0]);
04413             surr->ser_value[idx] = sibl->ser_value[0]; sibl->ser_value[0] = NULL;
04414             surr->value_address[idx] = sibl->value_address[0];
04415 printfdbg("btreenode_unify (4) write parent %lX\n", surr->address);
04416             cp_btree_write_node(tree, surr);
04417 //          surr->key[idx] = sibl->key[0];
04418 //          surr->value[idx] = sibl->value[0];
04419             cp_btreenode_set_items(node, node->items + 1);
04420 //          node->items++;
04421             cp_btreenode_set_child(node, node->items, sibl->child_address[0]);
04422 //          node->child[node->items] = sibl->child[0];
04423             if (node->child_address[node->items])
04424             {
04425                 cp_btreenode *child = 
04426                     cp_btree_read_node(tree, node->child_address[node->items]);
04427                 cp_btreenode_set_parent(child, node->address);
04428 printfdbg("btreenode_unify (5) write child %lX\n", child->address);
04429                 cp_btree_write_node(tree, child);
04430 //              node->child[node->items]->parent = node;
04431             }
04432 printfdbg("btreenode_unify (6) write node %lX\n", node->address);
04433             cp_btree_write_node(tree, node);
04434             cp_btreenode_set_child(sibl, 0, sibl->child_address[1]);
04435 //          sibl->child[0] = sibl->child[1];
04436             btreenode_unshift(sibl, 0);
04437             cp_btreenode_set_items(sibl, sibl->items - 1);
04438 //          sibl->items--;
04439 printfdbg("btreenode_unify (7) write sibl %lX\n", sibl->address);
04440             cp_btree_write_node(tree, sibl);            
04441         }
04442     }
04443 
04444     if (!found)  /* siblings too small - unify */
04445     {
04446 printfdbg("BU(8) node %lX - %d items: can\'t borrow, performing left_unify\n", node->address, node->items);
04447         cp_btree_write_node(tree, node); //~~ 018 - test if necessary
04448         if (idx > 0 && surr->child_address[idx - 1])
04449 {
04450             cp_btreenode *left = 
04451                 cp_btree_read_node(tree, surr->child_address[idx - 1]);
04452 printfdbg("BU(9) case 1 - unify with node at %lX\n", surr->child_address[idx - 1]);
04453             
04454             left_unify(tree, surr, left, node, idx - 1);
04455 }
04456         else if (idx < surr->items && surr->child_address[idx + 1])
04457 {
04458             cp_btreenode *right = 
04459                 cp_btree_read_node(tree, surr->child_address[idx + 1]);
04460 printfdbg("BU(10) case 2 - unify with node at %lX\n", node->address);
04461             left_unify(tree, surr, node, right, idx);
04462 }
04463     }
04464 }
04465 
04466 /* btree_find_mapping - performs a binary search on tree nodes to find the 
04467  * mapping for the given key. On average this saves comparisons for tree orders
04468  * above 8 more or less.
04469  *
04470  * The node containing the mapping is returned and *idx is set to the index
04471  * of the requested mapping. 
04472  */
04473 static cp_btreenode *
04474     btree_find_mapping(cp_btree_file *tree, void *key, int *idx)
04475 {
04476     cp_btreenode *curr = tree->root;
04477     int min, max;
04478     int pos = 0, cmp = 0;
04479 
04480 //printfdbg("F(1)\n");
04481     while (curr)
04482     {
04483         min = -1;
04484         max = curr->items;
04485 //printfdbg("F(2) curr: %p min: %d max: %d\n", curr, min, max);
04486         while (max > min + 1)
04487         {
04488             pos = (min + max) / 2;
04489 //printfdbg("comparing key %p, curr->key[%d] -> %p\n", key, pos, curr->key[pos]);
04490 //printfdbg("[%s], [%s]\n", cp_string_tocstr(key), cp_string_tocstr(curr->key[pos]));
04491             if ((cmp = (*tree->cmp)(key, curr->key[pos])) == 0)
04492             {
04493                 *idx = pos;
04494                 return curr;
04495             }
04496             if (cmp > 0)
04497                 min = pos;
04498             else
04499                 max = pos;
04500         }
04501         if (curr->child_address[max])
04502             curr = cp_btree_read_node(tree, curr->child_address[max]);
04503         else
04504             curr = NULL;
04505     }
04506 
04507 //printfdbg("F(3): %p\n", curr);
04508     return curr;
04509 }
04510 
04511 static cp_btreenode *
04512     create_root_node(cp_btree_file *file, void *key, void *value)
04513 {
04514     cp_btreenode *p = cp_btreenode_alloc(file, 0);
04515 
04516     p->items = 1;
04517     cp_btreenode_set_key(p, 0, key);
04518     cp_btreenode_set_value(p, 0, value);
04519     cp_btree_write_node(file, p);
04520 
04521     file->root = p;
04522     file->root_addr = p->address;
04523     fseek(file->fp, 0, SEEK_SET);
04524     cp_fwrite_long(file->fp, p->address);
04525 
04526     return p;
04527 }
04528 
04529 /****************************************************************************
04530  *                                                                          *
04531  *                      btree interface implementation                      *
04532  *                                                                          *
04533  ****************************************************************************/
04534 
04535 void *cp_btree_file_insert(cp_btree_file *tree, void *key, void *value)
04536 {
04537     int index = 0;
04538     int exists = 0;
04539     cp_btreenode *node;
04540     void *res = NULL;
04541 
04542     if (tree->root == NULL)
04543     {
04544         tree->items = 1;
04545         return create_root_node(tree, key, value);
04546     }
04547 
04548 printfdbg("(1)\n");
04549     /* (1) find where new key fits in */
04550     node = btree_locate_mapping(tree, key, &index, &exists);
04551 
04552     if (exists) /* (2) replace */
04553     {
04554         cp_btreenode_set_key(node, index, key);
04555         cp_btreenode_set_value(node, index, value);
04556 
04557 printfdbg("(2)\n");
04558         if (node->items == 0) node->items = 1;
04559         goto INSERT_DONE;
04560     }
04561         
04562 printfdbg("(3)\n");
04563     /* inserting in this node */
04564     tree->items++;
04565     if (node->items == tree->order - 1) /* (4) node full, split */
04566     {
04567 printfdbg("(4)\n");
04568         btree_inner_split(tree, node, index, key, value, NULL, 0);
04569         goto INSERT_DONE;
04570     }
04571 
04572 printfdbg("(5) node == %p root == %p\n", node, tree->root);
04573     /* (3) add to this node */
04574     if (index < node->items) /* shift unless inserting at end */
04575 {
04576         btreenode_shift(node, index);
04577 printfdbg("(6) index == %d dirty[%d] == %d\n", index, index, node->dirty[index]);
04578 }   
04579     /* copying handled by caller (in cp_btree_file_insert) */
04580 printfdbg("(7) index == %d\n", index);
04581     cp_btreenode_set_key(node, index, key);
04582     cp_btreenode_set_value(node, index, value);
04583     memset(&node->value_address[index], 0, sizeof(block_desc));
04584 printfdbg("(7.5) key: %s value: %s\n", cp_string_tocstr(key), cp_string_tocstr(value));
04585     cp_btreenode_set_items(node, node->items + 1);
04586 //  node->items++;
04587     cp_btree_write_node(tree, node);
04588 printfdbg("(8)\n");
04589 
04590 INSERT_DONE:
04591     node_cache_maintain(tree);
04592     return res;
04593 }
04594 
04595 void *cp_btree_file_get(cp_btree_file *tree, void *key)
04596 {
04597     void *res = NULL;
04598     int idx;
04599     cp_btreenode *node;
04600 
04601 //  if (cp_btree_txlock(tree, COLLECTION_LOCK_READ)) return NULL;
04602 
04603 printfdbg("cp_btree_file_get: invoking btree_find_mapping\n");
04604     node = btree_find_mapping(tree, key, &idx);
04605 printfdbg("cp_btree_file_get: btree_find_mapping returned\n");
04606     if (node) 
04607     {
04608         res = node->value[idx];
04609         if (res == NULL)
04610         {
04611 printfdbg("cp_btree_file_get: invoking grab_value\n");
04612             grab_value(tree, node, idx);
04613 printfdbg("cp_btree_file_get: grab_value returned\n");
04614             res = node->value[idx];
04615         }
04616     }
04617 
04618 //  cp_btree_file_txunlock(tree);
04619     
04620     node_cache_maintain(tree);
04621     return res;
04622 }
04623 
04624 void *cp_btree_file_delete(cp_btree_file *tree, void *key)
04625 {
04626     int i;
04627     cp_btreenode *curr;
04628     void *res = NULL;
04629     
04630 //  if (cp_btree_file_txlock(tree, COLLECTION_LOCK_WRITE)) return NULL;
04631 printfdbg("DELETE (0) %s\n", cp_string_tocstr(key));
04632     /* find mapping to remove */
04633     curr = btree_find_mapping(tree, key, &i);
04634     if (curr)
04635     {
04636 printfdbg("DELETE (1) %lX index %d (%d items)\n", curr->address, i, curr->items);
04637         res = btree_file_get_value(tree, curr, i);
04638 //      res = curr->value[i];
04639 #if 1
04640             if (tree->mode & COLLECTION_MODE_DEEP)
04641             {
04642 printf("[%lX] dropping key %d: [%s => %s]\n", curr->address, i, cp_string_tocstr(curr->key[i]), cp_string_tocstr(curr->value[i]));
04643                 if (tree->key_dtr) (*tree->key_dtr)(curr->key[i]);
04644                 if (tree->value_dtr) (*tree->value_dtr)(curr->value[i]);
04645                 curr->value[i] = NULL; //~~ 018
04646             }
04647 #endif
04648         if (curr->ser_key[i]) { cp_string_destroy(curr->ser_key[i]); curr->ser_key[i] = NULL; }
04649         if (curr->ser_value[i]) { cp_string_destroy(curr->ser_value[i]); curr->ser_value[i] = NULL; }
04650         if (curr->child_address[i + 1]) /* not leaf */
04651         {
04652             cp_btreenode *leaf = 
04653                 cp_btree_read_node(tree, curr->child_address[i + 1]);
04654             while (leaf->child_address[0])
04655                 leaf = cp_btree_read_node(tree, leaf->child_address[0]);
04656 printfdbg("DELETE (2) got leaf %lX (%d items)\n", leaf->address, leaf->items);
04657 
04658             cp_btreenode_set_key(curr, i, leaf->key[0]);
04659             cp_btreenode_set_value(curr, i, leaf->value[0]);
04660             curr->ser_key[i] = leaf->ser_key[0]; leaf->ser_key[0] = NULL;
04661             curr->ser_value[i] = leaf->ser_value[0]; leaf->ser_value[0] = NULL;
04662 #if 1 //~~ 018
04663             if (leaf->value_address[0].offset) //~~
04664             {
04665                 memcpy(&curr->value_address[i], &leaf->value_address[0].offset, sizeof(block_desc));
04666                 curr->dirty[0] |= BTREE_NODE_USE_VALUE_ADDR;
04667             }
04668 #endif          
04669 printfdbg("DELETE (2.1) write node %lX (%d items)\n", curr->address, curr->items);
04670             cp_btree_write_node(tree, curr);
04671 //          curr->key[i] = leaf->key[0];
04672 //          curr->value[i] = leaf->value[0];
04673 printfdbg("DELETE (2.2) unshift leaf %lX (%d items)\n", leaf->address, leaf->items);
04674             btreenode_unshift(leaf, 0);
04675             cp_btreenode_set_items(leaf, leaf->items - 1);
04676 //          if (leaf->items)
04677 //              cp_btree_write_node(tree, leaf);
04678 //          leaf->items--;
04679 printfdbg("DELETE (2.3) unify leaf %lX (%d items)\n", leaf->address, leaf->items);
04680             btreenode_unify(tree, leaf);
04681         }
04682         else /* leaf-like */
04683         {
04684 printfdbg("DELETE (3) unshift curr (%lX, %d items) index %d\n", curr->address, curr->items, i);
04685 #if 0
04686             if (tree->mode & COLLECTION_MODE_DEEP)
04687             {
04688                 if (tree->key_dtr) (*tree->key_dtr)(curr->key[i]);
04689                 if (tree->value_dtr) (*tree->value_dtr)(curr->value[i]);
04690             }
04691 #endif
04692             //~~ check on ser_key etc - should be free'd here?
04693             btreenode_unshift(curr, i);
04694             cp_btreenode_set_items(curr, curr->items - 1);
04695 //          curr->items--;
04696 printfdbg("DELETE (3) write curr (%lX, %d items)\n", curr->address, curr->items);
04697 //          if (curr->items) //~~
04698 //              cp_btree_write_node(tree, curr);
04699 printfdbg("DELETE (3) unify curr (%lX, %d items)\n", curr->address, curr->items);
04700             btreenode_unify(tree, curr);
04701         }
04702         tree->items--;
04703     }
04704 
04705 //  cp_btree_file_txunlock(tree);
04706     node_cache_maintain(tree);
04707     return res;
04708 }
04709 
04710 void *cp_btree_file_find(cp_btree_file *file, void *key, cp_op op)
04711 {
04712     void *value = NULL;
04713     return value;
04714 }
04715 
04716 int cp_btreenode_callback(cp_btree_file *f, unsigned long addr, 
04717                           cp_callback_fn fn, void *prm)
04718 {
04719     int res = 0;
04720     int i, count;
04721     cp_btreenode *node = cp_btree_read_node(f, addr);
04722     cp_mapping mapping;
04723 
04724     if (node == NULL) return 0;
04725 
04726     count = node->items;
04727     for (i = 0; i < count; i++)
04728     {
04729         if (node->child_address[i])
04730         {
04731             res = cp_btreenode_callback(f, node->child_address[i], fn, prm);
04732             if (res) return res;
04733         }
04734 
04735         /* pointer may be defunct by now. why not refcount? because it may
04736          * not be possible to keep the whole subtree in memory.
04737          */
04738         node = cp_btree_read_node(f, addr);
04739         grab_value(f, node, i);
04740         mapping.key = node->key[i];
04741         mapping.value = node->value[i];
04742         res = (*fn)(&mapping, prm);
04743         if (res) return res;
04744     }
04745     
04746     if (node->child_address[i])
04747         res = cp_btreenode_callback(f, node->child_address[i], fn, prm);
04748 
04749     return res;
04750 }
04751 
04752 int cp_btree_file_callback(cp_btree_file *file, cp_callback_fn fn, void *prm)
04753 {
04754     int res;
04755 
04756     if (file->root_addr)
04757     {
04758         if ((res = cp_btreenode_callback(file, file->root_addr, fn, prm)))
04759             return res;
04760     }
04761 
04762     return 0;
04763 }
04764 
04765 int cp_btree_file_count(cp_btree_file *file)
04766 {
04767     return file->items;
04768 }
04769 

Generated on Mon Dec 5 23:00:21 2011 for cprops by  doxygen 1.4.7