#include #include #include #include #include #include #include #include #define DEPTH 8 #define RANGE 256 typedef struct _tree { void *branches[RANGE]; int index; } tree; typedef struct _linkedlist { int index; struct _linkedlist *next; } linkedlist; typedef struct _pair { int index; int length; int start; } pair; int ormask = 256; unsigned char flags = 0; int file_index = 1; FILE *out; int recentoff = -1; int lwm = 0; int sort_by_start(void *a, void *b); int sort_by_length(void *a, void *b); void add_string(tree *t, char *string, int index, int size, int depth); void setbits(int value, int length) { int mask; mask = 1<<(length-1); while (mask) { if (ormask == 1) { fseek(out, file_index, SEEK_SET); fwrite(&flags, 1, 1, out); fseek(out, 0, SEEK_END); file_index = ftell(out); //printf("file_index: %d\n", file_index); flags = 0; fwrite(&flags, 1, 1, out); ormask = 128; } else ormask >>= 1; if (value&mask) flags |= ormask; mask >>= 1; } } void setgamma(int gamma) { int mask = 1, tmp, len = 0; if (gamma < 2) { printf("Error: gamma passed %d\n", gamma); exit(1); } tmp = gamma; while (tmp != 1) { tmp >>= 1; mask <<= 1; len++; } //printf("len(gamma) == %d\n", len*2); gamma = gamma&(mask-1); mask >>= 1; while (len > 1) { len--; tmp = (((gamma&mask)>>len)<<1) | 1; setbits(tmp, 2); mask >>= 1; } setbits((gamma&1)<<1, 2); } void merge_sort(void *array, int count, int size, int cf(void *a, void *b)) { int i, join, actualsize; int left, middle; char *a = (char *)array; char tmp_item[size]; for (i = 0; i < count; i += 2) { if (i < count - 1) { if (cf(&a[i * size], &a[(i + 1) * size]) > 0) { memcpy(tmp_item, &a[i * size], size); memcpy(&a[i * size], &a[(i+1) * size], size); memcpy(&a[(i+1) * size], tmp_item, size); } } } for (i = 4; i < count*2; i *= 2) { for (join = 0; join < count; join += i) { actualsize = count - join; if (actualsize > i) actualsize = i; left = join; middle = join + i/2; while (left < middle && middle < join+actualsize) { if (cf(&a[left * size], &a[middle * size]) > 0) { memcpy(tmp_item, &a[middle * size], size); memmove(&a[(left+1) * size], &a[left * size], size*(middle-left)); memcpy(&a[left * size], tmp_item, size); middle++; } left++; } } } } void search(tree *t, char *file, int size, pair *points) { tree *tmp; linkedlist *ltmp; int i, k, len, idx; int s1, s2, s3; add_string(t, file, 0, size, DEPTH); points[0].index = 0; points[0].length = 1; for (i = 1; i < size; i++) { tmp = t; len = 0; idx = i; for (k = 0; i+len < size && k < DEPTH; k++) { if (!tmp->branches[(unsigned int)file[i+k]]) { if (!len) len = 1; break; } len++; tmp = tmp->branches[(unsigned int)file[i+k]]; idx = tmp->index; } if (i+len < size && k == DEPTH) { s1 = i+len; s2 = 0; s3 = len; ltmp = tmp->branches[(unsigned int)file[i+k]]; while(ltmp && i+len < size) { if (file[i+len] == file[ltmp->index+len] && strncmp(&file[s1], &file[ltmp->index+s3], s2) == 0) { idx = ltmp->index; len++; s2++; while (i+len < size && file[i+len] == file[idx+len]) { len++; s2++; } } ltmp = ltmp->next; } } points[i].length = len; points[i].index = idx; points[i].start = i; add_string(t, &file[i], i, size - i, DEPTH); } merge_sort(points, size, sizeof(pair), sort_by_length); } void add_string(tree *t, char *string, int index, int size, int depth) { linkedlist *l; int i; for (i = 0; i < size && i < depth; i++) { if (t->branches[(unsigned int)string[i]] == NULL) { t->branches[(unsigned int)string[i]] = malloc(sizeof(tree)); memset(((tree *)t->branches[(unsigned int)string[i]])->branches, 0, sizeof(void *)*RANGE); } t = t->branches[(unsigned int)string[i]]; t->index = index; } if (i < size) { l = malloc(sizeof(linkedlist)); l->index = index; l->next = t->branches[(unsigned int)string[i]]; t->index = index; t->branches[(unsigned int)string[i]] = l; } } int bitmask_fill(pair *points, pair *touse, int size) { int touse_len, idx, len, i, j; unsigned int bitmask[(size+31)>>5]; memset(bitmask, 0, ((size+31)>>5)*sizeof(unsigned int)); touse_len = 0; len = points[0].length; for (i = 0; i < size; i++) { idx = points[i].start; len = points[i].length; if (idx) { if (!(bitmask[idx>>5]&(1<<(idx&31))) && !(bitmask[(idx+len-1)>>5]&(1<<((idx+len-1)&31)))) { touse[touse_len++] = points[i]; for (j = 0; j < len; j++) bitmask[(idx+j)>>5] |= (1<<((idx+j)&31)); } else { if (!(bitmask[idx>>5]&(1<<(idx&31)))) { len = 1; while (!(bitmask[(idx+len)>>5]&(1<<((idx+len)&31)))) len++; points[i].length = len; touse[touse_len++] = points[i]; for (j = 0; j < len; j++) bitmask[(idx+j)>>5] |= (1<<((idx+j)&31)); } } } } merge_sort(touse, touse_len, sizeof(pair), sort_by_start); return touse_len; } int sort_by_length(void *a, void *b) { pair *left = (pair *) a; pair *right = (pair *) b; return right->length - left->length; } int sort_by_index(void *a, void *b) { pair *left = (pair *) a; pair *right = (pair *) b; return left->index - right->index; } int sort_by_start(void *a, void *b) { pair *left = (pair *) a; pair *right = (pair *) b; return left->start - right->start; } void raw(char c) { setbits(0, 1); fwrite(&c, 1, 1, out); lwm = 0; } void zero(void) { setbits(64+32+16, 7); lwm = 0; } void onenear(int idx) { setbits(64+32+16+idx, 7); lwm = 0; } void twoorthree(int off, int three) { unsigned char c = off<<1; if (three) c |= 1; setbits(4+2, 3); fwrite(&c, 1, 1, out); recentoff = off; lwm = 1; } void recent(int length) { setbits(8, 4); setgamma(length); lwm = 1; } void variable(int shift, char *string, int off, int length, int start) { int orglength, i; unsigned char c; orglength = length; if (off >= 32000) length--; if (off >= 1280) length--; if (off < 128) length -= 2; if (length > 1) { setbits(2, 2); setgamma((off>>8)+shift); c = off&255; fwrite(&c, 1, 1, out); setgamma(length); recentoff = off; lwm = 1; } else { for (i = 0; i < orglength; i++) { if (off < 16) { onenear(off); } else if (!string[i]) { zero(); } else { raw(string[i]); } } } } void encode(char *outfile, char *file, pair *touse, int touse_len) { int i, idx, off, len; char c; out = fopen(outfile, "wb"); if (!out) { printf("Error opening output.\n"); exit(2); } fwrite(&file[0], 1, 1, out); fwrite(&flags, 1, 1, out); for (i = 0; i < touse_len; i++) { idx = touse[i].index; off = touse[i].start - idx; len = touse[i].length; if (len > 1 && len < 4 && off > 0 && off < 128) { twoorthree(off, len == 3); } else if (len > 1 && lwm == 0) { if (off == recentoff) { recent(len); } else { variable(3, &file[touse[i].start], off, len, touse[i].start); } } else if (len > 1) { variable(2, &file[touse[i].start], off, len, touse[i].start); } else if (off > 0 && off < 16) { onenear(off); } else if (file[touse[i].start] == 0) { zero(); } else { raw(file[touse[i].start]); } } setbits(4+2, 3); c = 0; fwrite(&c, 1, 1, out); fseek(out, file_index, SEEK_SET); fwrite(&flags, 1, 1, out); fclose(out); } int main(int argc, char *argv[]) { int in, size; char *file; int touse_len; tree t; pair *points, *touse; if (argc == 3) { in = open(argv[1], O_RDONLY | O_BINARY); size = lseek(in, 0, SEEK_END); file = mmap(NULL, size, PROT_READ, MAP_SHARED, in, 0); memset(t.branches, 0, sizeof(void *)*RANGE); points = malloc(size*sizeof(pair)); search(&t, file, size, points); touse = malloc(size*sizeof(pair)); touse_len = bitmask_fill(points, touse, size); encode(argv[2], file, touse, touse_len); munmap(file, size); close(in); } return 0; }