bit_sequence.c

00001 #include "bit_sequence.h"
00002 
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <stdio.h>
00006 
00007 cp_bit_sequence *cp_bit_sequence_create(int length, unsigned char *bits)
00008 {
00009     int bytecount;
00010     cp_bit_sequence *seq = (cp_bit_sequence *) malloc(sizeof(cp_bit_sequence));
00011     if (seq == NULL) return NULL;
00012 
00013     bytecount = (length + 7) >> 3;
00014     seq->bits = (unsigned char *) malloc(bytecount);
00015     if (seq->bits == NULL)
00016     {
00017         free(seq);
00018         return NULL;
00019     }
00020 
00021     if (bits != NULL)
00022         memcpy(seq->bits, bits, bytecount);
00023 
00024     seq->length = length;
00025 
00026     return seq;
00027 }
00028 
00029 /*
00030  * initialize from a string that looks like "1010101" - convenience function 
00031  * for debugging.
00032  */
00033 cp_bit_sequence *cstr_to_bit_sequence(char *str)
00034 {
00035     int i, pos, len, bytecount, bit;
00036     cp_bit_sequence *seq;
00037 
00038     seq = (cp_bit_sequence *) malloc(sizeof(cp_bit_sequence));
00039     if (seq == NULL) return NULL;
00040 
00041     len = strlen(str);
00042     bytecount = (len + 7) / 8;
00043     
00044     seq->bits = (unsigned char *) calloc(1, bytecount);
00045     if (seq->bits == NULL)
00046     {
00047         free(seq);
00048         return NULL;
00049     }
00050 
00051     seq->length = len;
00052     for (i = 0, pos = 0, bit = 0x80; i < len; i++)
00053     {
00054         if (str[i] == '1')
00055             seq->bits[pos] |= bit;
00056         bit >>= 1;
00057         if (bit == 0)
00058         {
00059             bit = 0x80;
00060             pos++;
00061         }
00062     }
00063         
00064     return seq;
00065 }
00066 
00067 void cp_bit_sequence_destroy(cp_bit_sequence *seq)
00068 {
00069     if (seq)
00070     {
00071         free(seq->bits);
00072         free(seq);
00073     }
00074 }
00075 
00076 void cp_bit_sequence_dump(cp_bit_sequence *seq)
00077 {
00078     int i, pos;
00079 
00080     printf("bit sequence: %d bits\n", seq->length);
00081 
00082     for (i = 0, pos = 0; i < seq->length; i++)
00083     {
00084         if (seq->bits[pos] & (0x80 >> (i % 8))) 
00085             printf("1");
00086         else
00087             printf("0");
00088 
00089         if (((i + 1) % 8) == 0) pos++;
00090     }
00091 
00092     printf("\n");
00093 }
00094 
00095 char *cp_bit_sequence_to_string(cp_bit_sequence *seq)
00096 {
00097     int i, pos;
00098     char *res = (char *) malloc(seq->length + 1);
00099     if (res == NULL) return NULL;
00100     
00101     for (i = 0, pos = 0; i < seq->length; i++)
00102     {
00103         if (seq->bits[pos] & (0x80 >> (i % 8))) 
00104             res[i] = '1';
00105         else
00106             res[i] = '0';
00107 
00108         if (((i + 1) % 8) == 0) pos++;
00109     }
00110 
00111     res[i] = '\0';
00112 
00113     return res;
00114 }
00115 
00116 cp_bit_sequence *cp_bit_sequence_dup(cp_bit_sequence *seq)
00117 {
00118     int bytecount;
00119     cp_bit_sequence *dup;
00120    
00121     dup = (cp_bit_sequence *) malloc(sizeof(cp_bit_sequence));
00122     if (dup == NULL) return NULL;
00123 
00124     bytecount = BYTECOUNT(seq);
00125     dup->bits = (unsigned char *) malloc(bytecount);
00126     if (dup->bits == NULL)
00127     {
00128         free(dup);
00129         return NULL;
00130     }
00131 
00132     memcpy(dup->bits, seq->bits, bytecount);
00133     dup->length = seq->length;
00134 
00135     return dup;
00136 }
00137 
00138 cp_bit_sequence *cp_bit_sequence_cpy(cp_bit_sequence *dst, cp_bit_sequence *src)
00139 {
00140     int bytecount_src, bytecount_dst;
00141 
00142     bytecount_src = BYTECOUNT(src);
00143     bytecount_dst = BYTECOUNT(dst);
00144 
00145     if (bytecount_src > bytecount_dst)
00146     {
00147         if (dst->length > 0)
00148             free(dst->bits);
00149 
00150         dst->bits = (unsigned char *) malloc(bytecount_src);
00151         if (dst->bits == NULL) return NULL;
00152     }
00153 
00154     memcpy(dst->bits, src->bits, bytecount_dst);
00155     src->length = dst->length;
00156 
00157     return dst;
00158 }
00159 
00160 static unsigned char bstr_mask[] = 
00161 { 
00162     0x80, // 10000000
00163     0xC0, // 11000000 
00164     0xE0, // 11100000 
00165     0xF0, // 11110000 
00166     0xF8, // 11111000 
00167     0xFC, // 11111100 
00168     0xFE, // 11111110 
00169     0xFF  // 11111111
00170 };
00171 
00172 cp_bit_sequence *cp_bit_sequence_cat(cp_bit_sequence *head, cp_bit_sequence *tail)
00173 {
00174     int old_bytecount = BYTECOUNT(head);
00175     int pos = head->length;
00176     int cpos = old_bytecount << 3;
00177     int diff = cpos - pos;
00178     int newlen = head->length + tail->length;
00179     int new_bytecount = (newlen + 7) >> 3;
00180 
00181     int in_place = head == tail;
00182     if (in_place)
00183         tail = cp_bit_sequence_dup(tail);
00184 
00185     if (new_bytecount > old_bytecount)
00186     {
00187         unsigned char *p = (unsigned char *) malloc(new_bytecount);
00188         if (p == NULL) return NULL;
00189 
00190         memcpy(p, head->bits, old_bytecount);
00191         free(head->bits);
00192         head->bits = p;
00193     }
00194 
00195     if (diff == 0) // we're on a character boundary
00196     {
00197         memcpy(&head->bits[old_bytecount], tail->bits, BYTECOUNT(tail));
00198     }
00199     else
00200     {
00201         int i;
00202 
00203         /* example:
00204          *   head      tail               
00205          *   101      10100001 
00206          *      ^---- pos: head->length = 3  diff = 5
00207          */
00208         int stretch = new_bytecount - old_bytecount;
00209         head->bits[old_bytecount - 1] &= bstr_mask[(head->length % 8) - 1];
00210         for (i = 0; i < stretch; i++)
00211         {
00212             head->bits[old_bytecount - 1 + i] |= tail->bits[i] >> (8 - diff);
00213             head->bits[old_bytecount + i] = tail->bits[i] << diff;
00214         }
00215         if (stretch < BYTECOUNT(tail))
00216             head->bits[old_bytecount + stretch - 1] |= tail->bits[stretch] >> (8 - diff);
00217     }
00218 
00219     head->length = newlen;
00220 
00221     if (in_place)
00222         cp_bit_sequence_destroy(tail);
00223 
00224     return head;
00225 }
00226 
00227 //~~ optimize later: we want hardware CLZ instructions where available - gcc has builtins
00228 #ifndef CP_CLZ_CHAR
00229 #define CP_CLZ_CHAR(x) cp_clz_char_slow(x)
00230 #endif
00231 
00232 int cp_clz_char_slow(unsigned char x)
00233 {
00234     int bit = 0x80;
00235     int lz = 0;
00236     for (lz = 0, bit = 0x80; bit > 0 && ((bit & x) == 0); lz++, bit >>= 1);
00237     return lz;
00238 }
00239 
00240 //~~ optimize later: better to compare sizeof(long long) bytes at a time
00241 int cp_bit_sequence_cmp(cp_bit_sequence *a, cp_bit_sequence *b, int *rpos)
00242 {
00243     int cmp, pos;
00244     int ca, cb, len;
00245     int atail, btail;
00246     int acmp, bcmp, shift;
00247 
00248     ca = BYTECOUNT(a);
00249     atail = ca * 8 - a->length;
00250     if (atail > 0) ca--;
00251 
00252     cb = BYTECOUNT(b);
00253     btail = cb * 8 - b->length;
00254     if (btail > 0) cb--;
00255 
00256     len = ca < cb ? ca : cb;
00257 
00258     for (pos = 0; pos < len; pos++)
00259         if (a->bits[pos] != b->bits[pos]) 
00260             break;
00261 
00262     if (pos < len)
00263     {
00264         if (rpos != NULL)
00265             *rpos = pos * 8 + CP_CLZ_CHAR(a->bits[pos] ^ b->bits[pos]);
00266 
00267         return a->bits[pos] - b->bits[pos];
00268     }
00269 
00270     acmp = bcmp = -1;
00271     shift = 0;
00272 
00273     if (len < ca) 
00274         acmp = a->bits[len];
00275     else if (atail > 0)
00276     {
00277         acmp = a->bits[len] & bstr_mask[7 - atail];
00278         shift = atail;
00279     }
00280 
00281     if (len < cb)
00282         bcmp = b->bits[len];
00283     else if (btail > 0)
00284     {
00285         bcmp = b->bits[len] & bstr_mask[7 - btail];
00286 #define BMAX(a, b) ((a) > (b) ? (a) : (b))
00287         shift = BMAX(shift, btail);
00288     }
00289 
00290     if (acmp != -1 && bcmp != -1)
00291     {
00292         if (shift)
00293         {
00294             acmp >>= shift;
00295             bcmp >>= shift;
00296         }
00297 
00298         if (acmp == bcmp)
00299         {
00300             len = a->length - b->length;
00301             if (rpos != NULL)
00302                 *rpos = len < 0 ? a->length : b->length;
00303             return len;
00304         }
00305         else
00306         {
00307             if (rpos != NULL)
00308                 *rpos = (pos * 8 + CP_CLZ_CHAR(acmp ^ bcmp)) - shift;
00309             return (acmp - bcmp) << shift;
00310         }
00311     }
00312 
00313     if (rpos != NULL)
00314         *rpos = pos * 8; // + CP_CLZ_CHAR(a->bits[pos] ^ b->bits[pos]);
00315 
00316     return 0; // a->bits[pos] - b->bits[pos];
00317 }
00318 
00319 #if 0 //~~ 
00320     if (ca > sizeof(long) && cb > sizeof(long))
00321     {
00322         unsigned long *lpa = (unsigned long *) a->bits;
00323         unsigned long *lpb = (unsigned long *) b->bits;
00324 
00325         while (1)
00326         {
00327             cmp = *lpa - *lpb;
00328             if (cmp != 0) break;
00329             ca -= sizeof(long);
00330             cb -= sizeof(long);
00331             if (ca < sizeof(long) || cb < sizeof(long)) break;
00332             pos += sizeof(long) * 8;
00333         } 
00334 
00335         if (cmp != 0)
00336         {
00337             unsigned long *t = *lpa ^ *lpb;
00338             
00339         }
00340             
00341     }
00342 #endif
00343 
00344 /*
00345  * shift bits left by the specified count, resulting in a shorter bit sequence
00346  */
00347 int cp_bit_sequence_shift_left(cp_bit_sequence *seq, int count)
00348 {
00349     int i;
00350     int bytes = count / 8;
00351     int bits = count - bytes * 8;
00352     int len = BYTECOUNT(seq) - bytes;
00353 
00354     if (len > 0)
00355         memmove(seq->bits, &seq->bits[bytes], len);
00356 
00357     if (bits > 0)
00358     {
00359         seq->bits[0] <<= bits;
00360         for (i = 1; i < len; i++)
00361         {
00362             seq->bits[i - 1] |= seq->bits[i] >> (8 - bits);
00363             seq->bits[i] <<= bits;
00364         }
00365     }
00366 
00367     seq->length -= count;
00368 
00369     return seq->length;
00370 }
00371 

Generated on Mon Dec 5 23:00:21 2011 for cprops by  doxygen 1.4.7