
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define DEPTH 8
#define RANGE 256

typedef struct _tree {
	void *branches[RANGE];
	int index;
} tree;

typedef struct _linkedlist {
	int index;
	struct _linkedlist *next;
} linkedlist;

typedef struct _pair {
	int index;
	int length;
	int start;
} pair;

int ormask = 256;
unsigned char flags = 0;
int file_index = 1;
FILE *out;
int recentoff = -1;
int lwm = 0;

int sort_by_start(void *a, void *b);
int sort_by_length(void *a, void *b);
void add_string(tree *t, char *string, int index, int size, int depth);

void setbits(int value, int length)
{
	int mask;

	mask = 1<<(length-1);
	while (mask) {
		if (ormask == 1) {
			fseek(out, file_index, SEEK_SET);
			fwrite(&flags, 1, 1, out);
			fseek(out, 0, SEEK_END);
			file_index = ftell(out);
			//printf("file_index: %d\n", file_index);
			flags = 0;
			fwrite(&flags, 1, 1, out);
			ormask = 128;
		} else
			ormask >>= 1;
		if (value&mask)
			flags |= ormask;
		mask >>= 1;
	}
}

void setgamma(int gamma)
{
	int mask = 1, tmp, len = 0;
	if (gamma < 2) {
		printf("Error: gamma passed %d\n", gamma);
		exit(1);
	}
	tmp = gamma;
	while (tmp != 1)
	{
		tmp >>= 1;
		mask <<= 1;
		len++;
	}
	//printf("len(gamma) == %d\n", len*2);
	gamma = gamma&(mask-1);
	mask >>= 1;
	while (len > 1)
	{
		len--;
		tmp = (((gamma&mask)>>len)<<1) | 1;
		setbits(tmp, 2);
		mask >>= 1;
	}
	setbits((gamma&1)<<1, 2);
}

void merge_sort(void *array, int count, int size, int cf(void *a, void *b))
{
	int i, join, actualsize;
	int left, middle;
	char *a = (char *)array;
	char tmp_item[size];

	for (i = 0; i < count; i += 2)
	{
		if (i < count - 1)
		{
			if (cf(&a[i * size], &a[(i + 1) * size]) > 0) {
				memcpy(tmp_item, &a[i * size], size);
				memcpy(&a[i * size], &a[(i+1) * size], size);
				memcpy(&a[(i+1) * size], tmp_item, size);
			}
		}
	}
	for (i = 4; i < count*2; i *= 2)
	{
		for (join = 0; join < count; join += i)
		{
			actualsize = count - join;
			if (actualsize > i)
				actualsize = i;
			left = join;
			middle = join + i/2;
			while (left < middle && middle < join+actualsize) {
				if (cf(&a[left * size], &a[middle * size]) > 0) {
					memcpy(tmp_item, &a[middle * size], size);
					memmove(&a[(left+1) * size], &a[left * size], size*(middle-left));
					memcpy(&a[left * size], tmp_item, size);
					middle++;
				}
				left++;
			}
		}
	}
}

void search(tree *t, char *file, int size, pair *points)
{
	tree *tmp;
	linkedlist *ltmp;
	int i, k, len, idx;
	int s1, s2, s3;

	add_string(t, file, 0, size, DEPTH);
	points[0].index = 0;
	points[0].length = 1;
	for (i = 1; i < size; i++)
	{
		tmp = t;
		len = 0;
		idx = i;
		for (k = 0; i+len < size && k < DEPTH; k++)
		{
			if (!tmp->branches[(unsigned int)file[i+k]])
			{
				if (!len)
					len = 1;
				break;
			}
			len++;
			tmp = tmp->branches[(unsigned int)file[i+k]];
			idx = tmp->index;
		}
		if (i+len < size && k == DEPTH) {
			s1 = i+len;
			s2 = 0;
			s3 = len;
			ltmp = tmp->branches[(unsigned int)file[i+k]];
			while(ltmp && i+len < size) {
				if (file[i+len] == file[ltmp->index+len] && strncmp(&file[s1], &file[ltmp->index+s3], s2) == 0) {
					idx = ltmp->index;
					len++;
					s2++;
					while (i+len < size &&
							file[i+len] == file[idx+len])
					{
						len++;
						s2++;
					}
				}
				ltmp = ltmp->next;
			}
		}
		points[i].length = len;
		points[i].index = idx;
		points[i].start = i;
		add_string(t, &file[i], i, size - i, DEPTH);
	}
	merge_sort(points, size, sizeof(pair), sort_by_length);
}

void add_string(tree *t, char *string, int index, int size, int depth)
{
	linkedlist *l;
	int i;

	for (i = 0; i < size && i < depth; i++)
	{
		if (t->branches[(unsigned int)string[i]] == NULL) {
			t->branches[(unsigned int)string[i]] = malloc(sizeof(tree));
			memset(((tree *)t->branches[(unsigned int)string[i]])->branches, 0, sizeof(void *)*RANGE);
		}
		t = t->branches[(unsigned int)string[i]];
		t->index = index;
	}

	if (i < size) {
		l = malloc(sizeof(linkedlist));
		l->index = index;
		l->next = t->branches[(unsigned int)string[i]];
		t->index = index;
		t->branches[(unsigned int)string[i]] = l;
	}
}

int bitmask_fill(pair *points, pair *touse, int size)
{
	int touse_len, idx, len, i, j;
	unsigned int bitmask[(size+31)>>5];
	memset(bitmask, 0, ((size+31)>>5)*sizeof(unsigned int));

	touse_len = 0;
	len = points[0].length;
	for (i = 0; i < size; i++)
	{
		idx = points[i].start;
		len = points[i].length;
		if (idx) {
			if (!(bitmask[idx>>5]&(1<<(idx&31))) &&
				!(bitmask[(idx+len-1)>>5]&(1<<((idx+len-1)&31)))) {
				touse[touse_len++] = points[i];
				for (j = 0; j < len; j++)
					bitmask[(idx+j)>>5] |= (1<<((idx+j)&31));
			} else {
				if (!(bitmask[idx>>5]&(1<<(idx&31)))) {
					len = 1;
					while (!(bitmask[(idx+len)>>5]&(1<<((idx+len)&31))))
						len++;
					points[i].length = len;
					touse[touse_len++] = points[i];
					for (j = 0; j < len; j++)
						bitmask[(idx+j)>>5] |= (1<<((idx+j)&31));
				}
			}
		}
	}
	merge_sort(touse, touse_len, sizeof(pair), sort_by_start);
	return touse_len;
}

int sort_by_length(void *a, void *b)
{
	pair *left = (pair *) a;
	pair *right = (pair *) b;
	return right->length - left->length;
}

int sort_by_index(void *a, void *b)
{
	pair *left = (pair *) a;
	pair *right = (pair *) b;
	return left->index - right->index;
}

int sort_by_start(void *a, void *b)
{
	pair *left = (pair *) a;
	pair *right = (pair *) b;
	return left->start - right->start;
}

void raw(char c)
{
	setbits(0, 1);
	fwrite(&c, 1, 1, out);
	lwm = 0;
}

void zero(void)
{
	setbits(64+32+16, 7);
	lwm = 0;
}

void onenear(int idx)
{
	setbits(64+32+16+idx, 7);
	lwm = 0;
}

void twoorthree(int off, int three)
{
	unsigned char c = off<<1;
	if (three)
		c |= 1;
	setbits(4+2, 3);
	fwrite(&c, 1, 1, out);
	recentoff = off;
	lwm = 1;
}

void recent(int length)
{
	setbits(8, 4);
	setgamma(length);
	lwm = 1;
}

void variable(int shift, char *string, int off, int length, int start)
{
	int orglength, i;
	unsigned char c;

	orglength = length;
	if (off >= 32000)
		length--;
	if (off >= 1280)
		length--;
	if (off < 128)
		length -= 2;
	if (length > 1) {
		setbits(2, 2);
		setgamma((off>>8)+shift);
		c = off&255;
		fwrite(&c, 1, 1, out);
		setgamma(length);
		recentoff = off;
		lwm = 1;
	} else {
		for (i = 0; i < orglength; i++)
		{
			if (off < 16) {
				onenear(off);
			} else if (!string[i]) {
				zero();
			} else {
				raw(string[i]);
			}
		}
	}
}

void encode(char *outfile, char *file, pair *touse, int touse_len)
{
	int i, idx, off, len;
	char c;

	out = fopen(outfile, "wb");
	if (!out) {
		printf("Error opening output.\n");
		exit(2);
	}
	fwrite(&file[0], 1, 1, out);
	fwrite(&flags, 1, 1, out);
	for (i = 0; i < touse_len; i++)
	{
		idx = touse[i].index;
		off = touse[i].start - idx;
		len = touse[i].length;
		if (len > 1 && len < 4 && off > 0 && off < 128) {
			twoorthree(off, len == 3);
		} else if (len > 1 && lwm == 0) {
			if (off == recentoff) {
				recent(len);
			} else {
				variable(3, &file[touse[i].start], off, len, touse[i].start);
			}
		} else if (len > 1) {
			variable(2, &file[touse[i].start], off, len, touse[i].start);
		} else if (off > 0 && off < 16) {
			onenear(off);
		} else if (file[touse[i].start] == 0) {
			zero();
		} else {
			raw(file[touse[i].start]);
		}
	}
	setbits(4+2, 3);
	c = 0;
	fwrite(&c, 1, 1, out);
	fseek(out, file_index, SEEK_SET);
	fwrite(&flags, 1, 1, out);
	fclose(out);
}

int main(int argc, char *argv[])
{
	int in, size;
	char *file;
	int touse_len;
	tree t;
	pair *points, *touse;

	if (argc == 3) {
		in = open(argv[1], O_RDONLY | O_BINARY);
		size = lseek(in, 0, SEEK_END);
		file = mmap(NULL, size, PROT_READ, MAP_SHARED, in, 0);
		memset(t.branches, 0, sizeof(void *)*RANGE);
		points = malloc(size*sizeof(pair));
		search(&t, file, size, points);
		touse = malloc(size*sizeof(pair));
		touse_len = bitmask_fill(points, touse, size);
		encode(argv[2], file, touse, touse_len);

		munmap(file, size);
		close(in);
	}
	return 0;
}
