btree.h

00001 #ifndef _CP_BTREE_H
00002 #define _CP_BTREE_H
00003 
00004 /* WARNING - EXPERIMENTAL
00005  *
00006  * this code is EXPERIMENTAL. It will be CHANGED in a future release. You 
00007  * SHOULD NOT rely on the current cp_btree implementation for anything 
00008  * IMPORTANT, and here's why: 
00009  *
00010  *  o  The current implementation does not do full error checking.
00011  *  o  Concurrency is not handled at all. 
00012  *  o  There is no recovery mechanism. 
00013  *  o  Updates to the file often require multiple writes. The current 
00014  *     implementation has no fallback mechanism for cases where any of these
00015  *     writes fails. Such a failure will result in a corrupt file that can no
00016  *     longer be used and will probably cause a program that tries to to crash.
00017  *     This also means that if your application crashes for whatever reason 
00018  *     while updating a cp_btree in a different thread, this could result in 
00019  *     an incomplete update, i.e. a corrupt file.
00020  *  o  The current file format is not final. Future releases may not be 
00021  *     backwards compatible with files created with this release. 
00022  *  o  The association of data structure operations to disk storage will be
00023  *     replaced in a future release. In the current implementation memory and
00024  *     disk representations are tightly bound. This makes it difficult to 
00025  *     debug, optimize or extend. 
00026  */
00027 #include <stdio.h>
00028 #include "util.h"
00029 #include "str.h"
00030 #include "hashlist.h"
00031 #include "multimap.h"
00032 
00033 /* cp_btree_file.h - a file persistent BTree implementation 
00034  *
00035  * The following sketch describes a btree with 2 free blocks. 
00036  *
00037  *  <file header>
00038  *  +------+-------+------+-------+-------+--------------+
00039  *  | root | order | mode | free1 | size1 | aux fn names |
00040  *  +------+-------+------+-------+-------+--------------+ 
00041  *                            |
00042  *                     +------+
00043  *                     |    <--- total size1 bytes --->
00044  *                     |    +-------+-------+-.......-+
00045  *   offset free1 ---> +--->| free2 | size2 |         |
00046  *                          +-------+-------+ ........+
00047  *                              |
00048  *                    +---------+
00049  *                    |    <--- total size2 bytes --->
00050  *                    |    +-------+-------+-.......-+
00051  *  offset free2 ---> +--> |   0   | 0     |         | 
00052  *                         +-------+-------+ ........+
00053  *
00054  * auxilliary function names are formatted as follows:
00055  *
00056  *  +-------+------+------+------+-----+------+------+-------+-------+
00057  *  | total | Nlib | lib* | Ncmp | cmp | Nkcp | kcp* | Nkdtr | kdtr* |
00058  *  +-------+------+------+------+-----+------+------+-------+-------+
00059  *  +------+------+-------+-------+-----+----+-----+----+-----+----+-----+---+
00060  *  | Nvcp | vcp* | Nvdtr | vdtr* | Nsk | sk | Ndk | dk | Nsv | sv | Ndv |dv |
00061  *  +------+------+-------+-------+-----+----+-----+----+-----+----+-----+---+
00062  *
00063  *  (items marked with an asterix may be absent)
00064  *
00065  *  total - total length of serialized auxillary function names in bytes
00066  *  Nlib  - length of auxillary library name 
00067  *  lib   - name of auxilliary library 
00068  *  Ncmp  - length of compare function name
00069  *  cmp   - compare function name
00070  *  Nkcp  - length of key copy function name
00071  *  kcp   - key copy function name
00072  *  Nkdtr - length of key destructor function name
00073  *  kdtr  - key destructor function name
00074  *  Nvcp  - length of value copy function name
00075  *  vcp   - value copy function name
00076  *  Nvdtr - length of value destructor function name
00077  *  vdtr  - value destructor function name
00078  *  Nsk   - length of key serialization function name
00079  *  sk    - key serialization function name
00080  *  Ndk   - length of key deserialization function name
00081  *  dk    - key deserialization function name
00082  *  Nsv   - length of value serialization function name
00083  *  sv    - value serialization function name
00084  *  Ndv   - length of value deserialization function name
00085  *  dv    - value deserialization function name
00086  */
00087 
00088 unsigned int mem2int(void *addr);
00089 unsigned long mem2long(void *addr);
00090 int cp_fread_int(FILE *fp, unsigned int *i);
00091 int cp_fread_long(FILE *fp, unsigned long *i);
00092 int cp_fwrite_int(FILE *fp, unsigned int i);
00093 int cp_fwrite_long(FILE *fp, unsigned long i);
00094 
00095 
00096 /****************************************************************************
00097  *                                                                          *
00098  *                             btree definitions                            *
00099  *                                                                          *
00100  ****************************************************************************/
00101 
00102 /* block descriptor on the disk */
00103 typedef struct _block_desc
00104 {
00105     unsigned long offset;
00106     unsigned long size;
00107 } block_desc;
00108 
00109 /* block descriptor in memory */
00110 typedef struct _mem_block_desc
00111 {
00112     unsigned long offset;
00113     unsigned long size;
00114     unsigned long prev; 
00115     unsigned long next; 
00116 } mem_block_desc;
00117 
00118 void *mem_block_desc_size(void *p);
00119 void *mem_block_desc_offset(void *p);
00120 
00121 struct _cp_btree_file;
00122 
00123 /*
00124  * here's what a node header looks like on the disk.
00125  *
00126  * +-+-----+----+------+-----+------+-----+...+------+-----+------+-----+
00127  * |n|Kaddr|Klen|k1addr|k1len|v1addr|v1len|   |kNaddr|kNlen|vNaddr|vNlen|
00128  * +-+-----+----+------+-----+------+-----+...+------+-----+------+-----+
00129  * +-+--+...+----+
00130  * |P|C1|   |CN+1|
00131  * +-+--+...+----+
00132  *
00133  * with a tree of order N+1, the parameters are as follows: 
00134  * 
00135  * o    n                  - number of items in this node
00136  * o    Kaddr              - address of the key block
00137  * o    Klen               - total length of the key block
00138  * o    k1addr .. kNaddr   - offsets of the keys k1 to kN within the key block
00139  * o    k1len  .. kNlen    - lengths of the keys k1 .. kN
00140  * o    v1addr .. vNaddr   - absolute offsets of the values v1 .. vN
00141  * o    v1len  .. vNlen    - lengths of the values v1 .. vN
00142  * o    P                  - address of parent node
00143  * o    C1 .. CN+1         - addresses of children nodes 1 .. N+1
00144  *
00145  * The key block resides elsewhere. The added complexity in this scheme serves
00146  * to allow a flexible key length. Assuming unlimited read sizes, retrieving 
00147  * the complete node header from the disk requires 2 reads: 
00148  * 
00149  * (1) read the node header: W*2 + (order - 1)*W*2*2 + W + order*W bytes
00150  * (2) read the key block: Klen bytes
00151  * 
00152  * The fixed key length implementation is simpler and faster, but does not 
00153  * allow flexible key sizes. 
00154  */
00155 typedef struct _cp_btreenode
00156 {
00157     int items;
00158 
00159     unsigned long address;
00160     void **key;
00161     cp_string **ser_key;
00162     block_desc key_address;
00163 
00164     void **value;
00165     cp_string **ser_value;
00166     block_desc *value_address;
00167     short *dirty;
00168     short dirty_flag;
00169 
00170     short nodrop; /* do not remove from cache */
00171 
00172     unsigned long *child_address;
00173     unsigned long parent_address;
00174 
00175     struct _cp_btree_file *owner;
00176 } cp_btreenode;
00177 
00178 
00179 typedef struct _cp_btree_file
00180 {
00181     char *filename;
00182     FILE *fp;
00183     char *ctl_filename;
00184     FILE *ctl;
00185 //    int fd;
00186 
00187     long root_addr;
00188     unsigned int order;
00189     unsigned long node_header_len;
00190     cp_multimap *free_block;
00191     cp_index *free_block_size_index;
00192 //    unsigned long last_free_block;
00193     unsigned long eof;
00194 
00195     cp_btreenode *root;
00196     int cache_size;
00197     cp_hashlist *node_cache;
00198 
00199     int items;
00200 
00201     int mode; 
00202 
00203     void *aux_lib;                          
00205     cp_compare_fn cmp;                      
00206     cp_copy_fn key_copy;                    
00207     cp_destructor_fn key_dtr;               
00208     cp_copy_fn value_copy;                  
00209     cp_destructor_fn value_dtr;             
00211     cp_serialize_fn serialize_key;          
00212     cp_deserialize_fn deserialize_key;      
00213     cp_serialize_fn serialize_value;        
00214     cp_deserialize_fn deserialize_value;    
00215 } cp_btree_file;
00216 
00217 cp_btreenode *cp_btree_read_node(cp_btree_file *file, unsigned long addr);
00218 int cp_btree_write_node(cp_btree_file *file, cp_btreenode *node);
00219 cp_btree_file *cp_btree_file_open(char *filename, int *err);
00220 cp_btree_file *
00221     cp_btree_file_create(char *filename, unsigned int order, int mode, 
00222                          char *aux_lib, char *cmp, char *key_copy, 
00223                          char *key_dtr, char *value_copy, char *value_dtr,
00224                          char *serialize_key, char *deserialize_key, 
00225                          char *serialize_value, char *deserialize_value, 
00226                          int *err);
00227 void cp_btree_file_dump(cp_btree_file *bf);
00228 int cp_btree_file_close(cp_btree_file *bf);
00229 
00230 void *cp_btree_file_get(cp_btree_file *tree, void *key);
00231 
00232 #if 0
00233 typedef struct _write_desc
00234 {
00235     struct _write_desc *next;
00236     unsigned long offset;
00237     unsigned long size;
00238     void *data;
00239 } write_desc;
00240 
00241 typedef struct _file_transaction
00242 {
00243     unsigned long id;
00244     write_desc *op;
00245 } file_transaction;
00246 #endif
00247 
00248 #endif /* _CP_BTREE_H
00249 */

Generated on Mon Dec 5 23:00:21 2011 for cprops by  doxygen 1.4.7