# gzip-compatible compression filter.

implement Filter;

include "sys.m";
	sys:	Sys;

include "filter.m";

GZMAGIC1:	con byte 16r1f;
GZMAGIC2:	con byte 16r8b;

GZDEFLATE:	con byte 8;

GZFTEXT:	con byte 1 << 0;		# file is text
GZFHCRC:	con byte 1 << 1;		# crc of header included
GZFEXTRA:	con byte 1 << 2;		# extra header included
GZFNAME:	con byte 1 << 3;		# name of file included
GZFCOMMENT:	con byte 1 << 4;		# header comment included
GZFMASK:	con (byte 1 << 5) - byte 1;	# mask of specified bits

GZXFAST:	con byte 2;			# used fast algorithm little compression
GZXBEST:	con byte 4;			# used maximum compression algorithm

GZOSFAT:	con byte 0;			# FAT file system
GZOSAMIGA:	con byte 1;			# Amiga
GZOSVMS:	con byte 2;			# VMS or OpenVMS
GZOSUNIX:	con byte 3;			# Unix
GZOSVMCMS:	con byte 4;			# VM/CMS
GZOSATARI:	con byte 5;			# Atari TOS
GZOSHPFS:	con byte 6;			# HPFS file system
GZOSMAC:	con byte 7;			# Macintosh
GZOSZSYS:	con byte 8;			# Z-System
GZOSCPM:	con byte 9;			# CP/M
GZOSTOPS20:	con byte 10;			# TOPS-20
GZOSNTFS:	con byte 11;			# NTFS file system
GZOSQDOS:	con byte 12;			# QDOS
GZOSACORN:	con byte 13;			# Acorn RISCOS
GZOSUNK:	con byte 255;

GZCRCPOLY:	con int 16redb88320;
GZOSINFERNO:	con GZOSUNIX;


LZstate: adt
{
	hist:		array of byte;		# [HistSize];
	epos:		int;			# end of history buffer
	pos:		int;			# current location in history buffer
	eof:		int;
	hash:		array of int;		# [Nhash] hash chains
	nexts:		array of int;		# [MaxOff]
	me:		int;			# pos in hash chains
	dot:		int;			# dawn of time in history
	prevlen:	int;			# lazy matching state
	prevoff:	int;
	maxchars:	int;			# compressor tuning
	maxdefer:	int;

	crctab: array of int;
	crc:		int;
	tot:		int;
	headers:	int;

	outbuf:		array of byte;		# current output buffer;
	out:		int;			# current position in the output buffer
	bits:		int;			# bit shift register
	nbits:		int;

	verbose:	int;
	debug:		int;

	lzb:		ref LZblock;
	slop:		array of byte;
	dlitlentab:	array of Huff;		# [Nlitlen]
	dofftab:	array of Huff;		# [Noff];
	hlitlentab:	array of Huff;		# [Nlitlen];
	dyncode:	ref Dyncode;
	hdyncode:	ref Dyncode;
	c:		chan of ref Rq;
	rc:		chan of int;
};

#
# lempel-ziv compressed block
#
LZblock: adt
{
	litlen:		array of byte;			# [MaxUncBlock+1];
	off:		array of int;			# [MaxUncBlock+1];
	litlencount:	array of int;			# [Nlitlen];
	offcount:	array of int;			# [Noff];
	entries:	int;				# entries in litlen & off tables
	bytes:		int;				# consumed from the input
	excost:		int;				# cost of encoding extra len & off bits
};

#
# encoding of dynamic huffman trees
#
Dyncode: adt
{
	nlit:		int;
	noff:		int;
	nclen:		int;
	ncode:		int;
	codetab:	array of Huff;		# [Nclen];
	codes:		array of byte;		# [Nlitlen+Noff];
	codeaux:	array of byte;		# [Nlitlen+Noff];
};

#
# huffman code table
#
Huff: adt
{
	bits:		int;				# length of the code
	encode:		int;				# the code
};

DeflateBlock:	con 64*1024-258-1;
DeflateOut:	con 258+10;


DeflateUnc:	con 0;			# uncompressed block
DeflateFix:	con 1;			# fixed huffman codes
DeflateDyn:	con 2;			# dynamic huffman codes

DeflateEob:	con 256;		# end of block code in lit/len book

LenStart:	con 257;		# start of length codes in litlen
Nlitlen:	con 288;		# number of litlen codes
Noff:		con 30;			# number of offset codes
Nclen:		con 19;			# number of codelen codes

MaxLeaf:	con Nlitlen;
MaxHuffBits:	con 15;			# max bits in a huffman code
ChainMem:	con 2 * MaxHuffBits * (MaxHuffBits + 1);

MaxUncBlock:	con 64*1024-1;		# maximum size of uncompressed block

MaxOff:		con 32*1024;
MinMatch:	con 3;			# shortest match possible
MaxMatch:	con 258;		# longest match possible
MinMatchMaxOff:	con 4096;		# max profitable offset for small match;
					#  assumes 8 bits for len; 5+10 for offset
HistSlop:	con 4096;		# slop for fewer calls to lzcomp
HistSize:	con MaxOff + 2*HistSlop;

Hshift:		con 4;			# nice compromise between space & time
Nhash:		con 1<<(Hshift*MinMatch);
Hmask:		con Nhash-1;

MaxOffCode:	con 256;		# biggest offset looked up in direct table

EstLitBits:	con 8;
EstLenBits:	con 4;
EstOffBits:	con 5;

# conversion from len to code word
lencode := array[MaxMatch] of int;

#
# conversion from off to code word
# off <= MaxOffCode ? offcode[off] : bigoffcode[(off-1) >> 7]
#
offcode := array[MaxOffCode + 1] of int;
bigoffcode := array[256] of int;

# litlen code words LenStart-285 extra bits
litlenbase := array[Nlitlen-LenStart] of int;
litlenextra := array[Nlitlen-LenStart] of
{
	0, 0, 0,
	0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
	2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
	4, 5, 5, 5, 5, 0, 0, 0
};

# offset code word extra bits
offbase := array[Noff] of int;
offextra := array[] of
{
	0,  0,  0,  0,  1,  1,  2,  2,  3,  3,
	4,  4,  5,  5,  6,  6,  7,  7,  8,  8,
	9,  9,  10, 10, 11, 11, 12, 12, 13, 13,
	0,  0,
};

# order code lengths
clenorder := array[Nclen] of
{
        16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
};

# static huffman tables
litlentab : array of Huff;
offtab : array of Huff;
hofftab : array of Huff;

# bit reversal for brain dead endian swap in huffman codes
revtab: array of byte;

init()
{
	sys = load Sys Sys->PATH;

	bitcount := array[MaxHuffBits] of int;
	i, j, ci, n: int;

	# byte reverse table
	revtab = array[256] of byte;
	for(i=0; i<256; i++){
		revtab[i] = byte 0;
		for(j=0; j<8; j++)
			if(i & (1<<j))
				revtab[i] |= byte 16r80 >> j;
	}

	litlentab = array[Nlitlen] of Huff;
	offtab = array[Noff] of Huff;
	hofftab = array[Noff] of { * => Huff(0, 0) };

	# static Litlen bit lengths
	for(i=0; i<144; i++)
		litlentab[i].bits = 8;
	for(i=144; i<256; i++)
		litlentab[i].bits = 9;
	for(i=256; i<280; i++)
		litlentab[i].bits = 7;
	for(i=280; i<Nlitlen; i++)
		litlentab[i].bits = 8;

	for(i = 0; i < 10; i++)
		bitcount[i] = 0;
	bitcount[8] += 144 - 0;
	bitcount[9] += 256 - 144;
	bitcount[7] += 280 - 256;
	bitcount[8] += Nlitlen - 280;

	hufftabinit(litlentab, Nlitlen, bitcount, 9);

	# static offset bit lengths
	for(i = 0; i < Noff; i++)
		offtab[i].bits = 5;

	for(i = 0; i < 5; i++)
		bitcount[i] = 0;
	bitcount[5] = Noff;

	hufftabinit(offtab, Noff, bitcount, 5);

	bitcount[0] = 0;
	bitcount[1] = 0;
	mkprecode(hofftab, bitcount, 2, MaxHuffBits);

	# conversion tables for lens & offs to codes
	ci = 0;
	for(i = LenStart; i < 286; i++){
		n = ci + (1 << litlenextra[i - LenStart]);
		litlenbase[i - LenStart] = ci;
		for(; ci < n; ci++)
			lencode[ci] = i;
	}
	# patch up special case for len MaxMatch
	lencode[MaxMatch-MinMatch] = 285;
	litlenbase[285-LenStart] = MaxMatch-MinMatch;

	ci = 1;
	for(i = 0; i < 16; i++){
		n = ci + (1 << offextra[i]);
		offbase[i] = ci;
		for(; ci < n; ci++)
			offcode[ci] = i;
	}

	ci = (LenStart - 1) >> 7;
	for(; i < 30; i++){
		n = ci + (1 << (offextra[i] - 7));
		offbase[i] = (ci << 7) + 1;
		for(; ci < n; ci++)
			bigoffcode[ci] = i;
	}
}

start(param: string): chan of ref Rq
{
	# param contains flags:
	# [0-9] - compression level
	# v verbose
	# d debug
	lz := ref LZstate;
	level := 6;
	lz.verbose = lz.debug = 0;
	lz.headers = 0;
	lz.crc = lz.tot = 0;
	# XXX could also put filename and modification time in param
	for (i := 0; i < len param; i++) {
		case param[i] {
		'0' to '9' =>
			level = param[i] - '0';
		'v' =>
			lz.verbose = 1;
		'h' =>
			lz.headers = 1;
		'd' =>
			lz.debug = 1;
		}
	}
	
	lz.hist = array[HistSize] of byte;
	lz.hash = array[Nhash] of int;
	lz.nexts = array[MaxOff] of int;
	lz.slop = array[2*MaxMatch] of byte;
	lz.dlitlentab = array[Nlitlen] of Huff;
	lz.dofftab = array[Noff] of Huff;
	lz.hlitlentab = array[Nlitlen] of Huff;

	lz.lzb = ref LZblock;
	lzb := lz.lzb;
	lzb.litlen = array[MaxUncBlock+1] of byte;
	lzb.off = array[MaxUncBlock+1] of int;
	lzb.litlencount = array[Nlitlen] of int;
	lzb.offcount = array[Noff] of int;

	lz.dyncode = ref Dyncode;
	lz.dyncode.codetab =array[Nclen] of Huff;
	lz.dyncode.codes =array[Nlitlen+Noff] of byte;
	lz.dyncode.codeaux = array[Nlitlen+Noff] of byte;
	lz.hdyncode = ref Dyncode;
	lz.hdyncode.codetab =array[Nclen] of Huff;
	lz.hdyncode.codes =array[Nlitlen+Noff] of byte;
	lz.hdyncode.codeaux = array[Nlitlen+Noff] of byte;

	for(i = 0; i < MaxOff; i++)
		lz.nexts[i] = 0;
	for(i = 0; i < Nhash; i++)
		lz.hash[i] = 0;
	lz.pos = 0;
	lz.epos = 0;
	lz.prevlen = MinMatch - 1;
	lz.prevoff = 0;
	lz.eof = 0;
	lz.me = 4 * MaxOff;
	lz.dot = lz.me;
	lz.bits = 0;
	lz.nbits = 0;
	if(level < 5) {
		lz.maxchars = 1;
		lz.maxdefer = 0;
	} else if(level == 9) {
		lz.maxchars = 4000;
		lz.maxdefer = MaxMatch;
	} else {
		lz.maxchars = 200;
		lz.maxdefer = MaxMatch / 4;
	}
	if (lz.headers)
		lz.crctab = mkcrctab(GZCRCPOLY);
	lz.c = chan of ref Rq;
	lz.rc = chan of int;
	spawn deflate(lz);
	return lz.c;
}

# return (eof, nbytes)
fillbuf(lz: ref LZstate, buf: array of byte): (int, int)
{
	n := 0;
	while (n < len buf) {
		lz.c <-= ref Rq.Fill(buf[n:], lz.rc);
		nr := <-lz.rc;
		if (nr == -1)
			exit;
		if (nr == 0)
			return (1, n);
		n += nr;
	}
	return (0, n);
}

deflate(lz: ref LZstate)
{
	lz.c <-= ref Rq.Start(sys->pctl(0, nil));

	if (lz.headers)
		header(lz);
	buf := array[DeflateBlock] of byte;
	out := array[DeflateBlock + DeflateOut] of byte;
	eof := 0;
	for (;;) {
		nslop := lz.epos - lz.pos;
		nbuf := 0;
		if (!eof) {
			(eof, nbuf) = fillbuf(lz, buf);
			inblock(lz, buf[0:nbuf]);
		}
		if(eof && nbuf == 0 && nslop == 0) {
			if(lz.nbits) {
				out[0] = byte lz.bits;
				lz.nbits = 0;
				lz.c <-= ref Rq.Result(out[0:1], lz.rc);
				if (<-lz.rc == -1)
					exit;
				continue;
			}
			if (lz.headers)
				footer(lz);
			lz.c <-= ref Rq.Finished(nil);
			exit;
		}

		lz.outbuf = out;

		if(nslop > 2*MaxMatch) {
			lz.c <-= ref Rq.Error(sys->sprint("slop too large: %d", nslop));
			exit;
		}
		lz.slop[0:] = lz.hist[lz.pos:lz.epos];	# memmove(slop, lz.pos, nslop);
	
		lzb := lz.lzb;
		for(i := 0; i < Nlitlen; i++)
			lzb.litlencount[i] = 0;
		for(i = 0; i < Noff; i++)
			lzb.offcount[i] = 0;
		lzb.litlencount[DeflateEob]++;
	
		lzb.bytes = 0;
		lzb.entries = 0;
		lzb.excost = 0;
		lz.eof = 0;
	
		n := 0;
		while(n < nbuf || eof && !lz.eof){
			if(!lz.eof) {
				if(lz.pos >= MaxOff + HistSlop) {
					lz.pos -= MaxOff + HistSlop;
					lz.epos -= MaxOff + HistSlop;
					lz.hist[:] = lz.hist[MaxOff + HistSlop: MaxOff + HistSlop + lz.epos];
				}
				m := HistSlop - (lz.epos - lz.pos);
				if(lz.epos + m > HistSize) {
					lz.c <-= ref Rq.Error("read too long");
					exit;
				}
				if(m >= nbuf - n) {
					m = nbuf - n;
					lz.eof = eof;
				}
				lz.hist[lz.epos:] = buf[n:n+m];
				n += m;
				lz.epos += m;
			}
			lzcomp(lz, lzb, lz.epos - lz.pos);
		}
	
		lz.outbuf = out;
		lz.out = 0;
	
		nunc := lzb.bytes;
		if(nunc < nslop)
			nslop = nunc;
	
		mkprecode(lz.dlitlentab, lzb.litlencount, Nlitlen, MaxHuffBits);
		mkprecode(lz.dofftab, lzb.offcount, Noff, MaxHuffBits);
			
		ndyn := huffcodes(lz.dyncode, lz.dlitlentab, lz.dofftab)
			+ bitcost(lz.dlitlentab, lzb.litlencount, Nlitlen)
			+ bitcost(lz.dofftab, lzb.offcount, Noff)
			+ lzb.excost;
	
		litcount := array[Nlitlen] of int;
		for(i = 0; i < Nlitlen; i++)
			litcount[i] = 0;
		for(i = 0; i < nslop; i++)
			litcount[int lz.slop[i]]++;
		for(i = 0; i < nunc-nslop; i++)
			litcount[int buf[i]]++;
		litcount[DeflateEob]++;
	
		mkprecode(lz.hlitlentab, litcount, Nlitlen, MaxHuffBits);
		nhuff := huffcodes(lz.hdyncode, lz.hlitlentab, hofftab)
			+ bitcost(lz.hlitlentab, litcount, Nlitlen);
	
		nfix := bitcost(litlentab, lzb.litlencount, Nlitlen)
			+ bitcost(offtab, lzb.offcount, Noff)
			+ lzb.excost;
	
		lzput(lz, lz.eof && lz.pos == lz.epos, 1);
	
		if(lz.verbose) {
			lz.c <-= ref Rq.Info(sys->sprint("block: %d bytes %d entries %d extra bits",
						nunc, lzb.entries, lzb.excost));
			lz.c <-= ref Rq.Info(sys->sprint("\tuncompressed %d fixed %d dynamic %d huffman %d",
				(nunc + 4) * 8, nfix, ndyn, nhuff));
		}
	
		if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff) {
			lzput(lz, DeflateUnc, 2);
			lzflushbits(lz);
	
			lz.outbuf[lz.out++] = byte(nunc);
			lz.outbuf[lz.out++] = byte(nunc >> 8);
			lz.outbuf[lz.out++] = byte(~nunc);
			lz.outbuf[lz.out++] = byte(~nunc >> 8);
	
			lz.outbuf[lz.out:] = lz.slop[:nslop];
			lz.out += nslop;
			lz.outbuf[lz.out:] = buf[:nunc - nslop];
			lz.out += nunc - nslop;
		} else if(ndyn < nfix && ndyn < nhuff) {
			lzput(lz, DeflateDyn, 2);
	
			wrdyncode(lz, lz.dyncode);
			wrblock(lz, lzb.entries, lzb.litlen, lzb.off, lz.dlitlentab, lz.dofftab);
			lzput(lz, lz.dlitlentab[DeflateEob].encode, lz.dlitlentab[DeflateEob].bits);
		} else if(nhuff < nfix){
			lzput(lz, DeflateDyn, 2);
	
			wrdyncode(lz, lz.hdyncode);
			for(i = 0; i < len lzb.off; i++)
				lzb.off[i] = 0;
	
			wrblock(lz, nslop, lz.slop, lzb.off, lz.hlitlentab, hofftab);
			wrblock(lz, nunc-nslop, buf, lzb.off, lz.hlitlentab, hofftab);
			lzput(lz, lz.hlitlentab[DeflateEob].encode, lz.hlitlentab[DeflateEob].bits);
		} else {
			lzput(lz, DeflateFix, 2);
	
			wrblock(lz, lzb.entries, lzb.litlen, lzb.off, litlentab, offtab);
			lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits);
		}

		lz.c <-= ref Rq.Result(out[0:lz.out], lz.rc);
		if (<-lz.rc == -1)
			exit;
	}
}

header(lz: ref LZstate)
{
	buf := array[20] of byte;
	i := 0;
	buf[i++] = byte GZMAGIC1;
	buf[i++] = byte GZMAGIC2;
	buf[i++] = byte GZDEFLATE;

	flags := 0;
	#if(file != nil)
	#	flags |= GZFNAME;
	buf[i++] = byte flags;

	mtime := 0;
	buf[i++] = byte(mtime);
	buf[i++] = byte(mtime>>8);
	buf[i++] = byte(mtime>>16);
	buf[i++] = byte(mtime>>24);

	buf[i++] = byte 0;
	buf[i++] = byte GZOSINFERNO;

	#if((flags & GZFNAME) == GZFNAME){
	#	bout.puts(file);
	#	bout.putb(byte 0);
	#}
	lz.c <-= ref Rq.Result(buf[0:i], lz.rc);
	if (<-lz.rc == -1)
		exit;
}

footer(lz: ref LZstate)
{
	buf := array[8] of byte;
	i := 0;
	buf[i++] = byte(lz.crc);
	buf[i++] = byte(lz.crc>>8);
	buf[i++] = byte(lz.crc>>16);
	buf[i++] = byte(lz.crc>>24);

	buf[i++] = byte(lz.tot);
	buf[i++] = byte(lz.tot>>8);
	buf[i++] = byte(lz.tot>>16);
	buf[i++] = byte(lz.tot>>24);
	lz.c <-= ref Rq.Result(buf[0:i], lz.rc);
	if (<-lz.rc == -1)
		exit;
}

lzput(lz: ref LZstate, bits, nbits: int): int
{
	bits = (bits << lz.nbits) | lz.bits;
	for(nbits += lz.nbits; nbits >= 8; nbits -= 8){
		lz.outbuf[lz.out++] = byte bits;
		bits >>= 8;
	}
	lz.bits = bits;
	lz.nbits = nbits;
	return 0;
}

lzflushbits(lz: ref LZstate): int
{
	if(lz.nbits & 7)
		lzput(lz, 0, 8 - (lz.nbits & 7));
	return 0;
}

#
# write out a block of n samples,
# given lz encoding and counts for huffman tables
# todo: inline lzput
#
wrblock(lz: ref LZstate, n: int, litlen: array of byte, off: array of int, litlentab, offtab: array of Huff): int
{
	for(i := 0; i < n; i++) {
		offset := off[i];
		lit := int litlen[i];
		if(lz.debug) {
			if(offset == 0)
				lz.c <-= ref Rq.Info(sys->sprint("\tlit %.2ux %c", lit, lit));
			else
				lz.c <-= ref Rq.Info(sys->sprint("\t<%d, %d>", offset, lit + MinMatch));
		}
		if(offset == 0)
			lzput(lz, litlentab[lit].encode, litlentab[lit].bits);
		else {
			c := lencode[lit];
			lzput(lz, litlentab[c].encode, litlentab[c].bits);
			c -= LenStart;
			if(litlenextra[c])
				lzput(lz, lit - litlenbase[c], litlenextra[c]);

			if(offset <= MaxOffCode)
				c = offcode[offset];
			else
				c = bigoffcode[(offset - 1) >> 7];
			lzput(lz, offtab[c].encode, offtab[c].bits);
			if(offextra[c])
				lzput(lz, offset - offbase[c], offextra[c]);
		}
	}

	return n;
}

lzcomp(lz: ref LZstate, lzb: ref LZblock, max: int)
{
	q, s, es, t: int;
	you, m: int;

#	hashcheck(lz, "start");

	hist := lz.hist;
	nexts := lz.nexts;
	hash := lz.hash;
	me := lz.me;

	p := lz.pos;
	ep := lz.epos;
	if(p + max < ep)
		ep = p + max;
	if(lz.prevlen != MinMatch - 1)
		p++;

	#
	# hash in the links for any hanging link positions,
	# and calculate the hash for the current position.
	#
	n := MinMatch;
	if(n > ep - p)
		n = ep - p;
	h := 0;
	for(i := 0; i < n - 1; i++) {
		m = me - ((MinMatch-1) - i);
		if(m < lz.dot)
			continue;
		s = p - (me - m);
		if(s < 0)
			s += MaxOff + HistSlop;
		h = hashit(s, hist);
		for(you = hash[h]; me - you < me - m; you = nexts[you & (MaxOff-1)])
			;
		if(you == m)
			continue;
		nexts[m & (MaxOff-1)] = hash[h];
		hash[h] = m;
	}
	for(i = 0; i < n; i++)
		h = ((h << Hshift) ^ int hist[p+i]) & Hmask;

	#
	# me must point to the index in the next/prev arrays
	# corresponding to p's position in the history
	#
	entries := lzb.entries;
	litlencount := lzb.litlencount;
	offcount := lzb.offcount;
	litlen := lzb.litlen;
	off := lzb.off;
	prevlen := lz.prevlen;
	prevoff := lz.prevoff;
	maxdefer := lz.maxdefer;
	maxchars := lz.maxchars;
	excost := 0;
	for(;;) {
		es = p + MaxMatch;
		if(es > ep) {
			if(!lz.eof || ep != lz.epos || p >= ep)
				break;
			es = ep;
		}

		#
		# look for the longest, closest string which
		# matches what we are going to send.  the clever
		# part here is looking for a string 1 longer than
		# are previous best match.
		#
		runlen := prevlen;
		m = 0;
		chars := maxchars;
	matchloop:
		for(you = hash[h]; me-you <= MaxOff && chars > 0; you = nexts[you & (MaxOff-1)]) {
			s = p + runlen;
			if(s >= es)
				break;
			t = s - me + you;
			if(t - runlen < 0)
				t += MaxOff + HistSlop;
			for(; s >= p; s--) {
				if(hist[s] != hist[t]) {
					chars -= p + runlen - s + 1;
					continue matchloop;
				}
				t--;
			}

			#
			# we have a new best match.
			# extend it to it's maximum length
			#
			t += runlen + 2;
			s += runlen + 2;
			for(; s < es; s++) {
				if(hist[s] != hist[t])
					break;
				t++;
			}
			runlen = s - p;
			m = you;
			if(s == es)
				break;
			if(runlen > 7)
				chars >>= 1;
			chars -= runlen;
		}

		#
		# back out of small matches too far in the past
		#
		if(runlen == MinMatch && me - m >= MinMatchMaxOff) {
			runlen = MinMatch - 1;
			m = 0;
		}

		#
		# record the encoding and increment counts for huffman trees
		# if we get a match, defer selecting it until we check for
		# a longer match at the next position.
		#
		if(prevlen >= runlen && prevlen != MinMatch - 1) {
			#
			# old match at least as good; use that one
			#
			n = prevlen - MinMatch;
			litlen[entries] = byte n;
			n = lencode[n];
			litlencount[n]++;
			excost += litlenextra[n - LenStart];

			off[entries++] = prevoff;
			if(prevoff <= MaxOffCode)
				n = offcode[prevoff];
			else
				n = bigoffcode[(prevoff - 1) >> 7];
			offcount[n]++;
			excost += offextra[n];

			runlen = prevlen - 1;
			prevlen = MinMatch - 1;
		} else if(runlen == MinMatch - 1) {
			#
			# no match; just put out the literal
			#
			n = int hist[p];
			litlen[entries] = byte n;
			litlencount[n]++;
			off[entries++] = 0;
			runlen = 1;
		} else {
			if(prevlen != MinMatch - 1) {
				#
				# longer match now. output previous literal,
				# update current match, and try again
				#
				n = int hist[p - 1];
				litlen[entries] = byte n;
				litlencount[n]++;
				off[entries++] = 0;
			}

			prevoff = me - m;

			if(runlen < maxdefer) {
				prevlen = runlen;
				runlen = 1;
			} else {
				n = runlen - MinMatch;
				litlen[entries] = byte n;
				n = lencode[n];
				litlencount[n]++;
				excost += litlenextra[n - LenStart];

				off[entries++] = prevoff;
				if(prevoff <= MaxOffCode)
					n = offcode[prevoff];
				else
					n = bigoffcode[(prevoff - 1) >> 7];
				offcount[n]++;
				excost += offextra[n];
				prevlen = MinMatch - 1;
			}
		}

		#
		# update the hash for the newly matched data
		# this is constructed so the link for the old
		# match in this position must at the end of a chain,
		# and will expire when this match is added, ie it will
		# never be examined for by the match loop.
		# add to the hash chain only if we have the real hash data.
		#
		for(q = p + runlen; p != q; p++) {
			if(p + MinMatch <= ep) {
				nexts[me & (MaxOff-1)] = hash[h];
				hash[h] = me;
				if(p + MinMatch < ep)
					h = ((h << Hshift) ^ int hist[p + MinMatch]) & Hmask;
			}
			me++;
		}
	}

	#
	# we can just store away the lazy state and
	# pick it up next time.  the last block will have eof
	# so we won't have any pending matches
	# however, we need to correct for how much we've encoded
	#
	if(prevlen != MinMatch - 1)
		p--;

	lzb.excost += excost;
	lzb.bytes += p - lz.pos;
	lzb.entries = entries;

	lz.pos = p;
	lz.me = me;
	lz.prevlen = prevlen;
	lz.prevoff = prevoff;

#	hashcheck(lz, "stop");
}

#
# check all the hash list invariants are really satisfied
#
hashcheck(lz: ref LZstate, where: string)
{
	s, age, a, you: int;

	nexts := lz.nexts;
	hash := lz.hash;
	me := lz.me;
	start := lz.pos;
	if(lz.prevlen != MinMatch-1)
		start++;
	found := array [MaxOff] of byte;
	for(i := 0; i < MaxOff; i++)
		found[i] = byte 0;
	for(i = 0; i < Nhash; i++) {
		age = 0;
		for(you = hash[i]; me-you <= MaxOff; you = nexts[you & (MaxOff-1)]) {
			a = me - you;
			if(a < age)
				fatal(lz, sys->sprint("%s: out of order links age %d a %d me %d you %d",
					where, age, a, me, you));

			age = a;

			s = start - a;
			if(s < 0)
				s += MaxOff + HistSlop;

			if(hashit(s, lz.hist) != i)
				fatal(lz, sys->sprint("%s: bad hash chain you %d me %d s %d start %d chain %d hash %d %d %d",
					where, you, me, s, start, i, hashit(s - 1, lz.hist), hashit(s, lz.hist), hashit(s + 1, lz.hist)));

			if(found[you & (MaxOff - 1)] != byte 0)
				fatal(lz, where + ": found link again");
			found[you & (MaxOff - 1)] = byte 1;
		}
	}

	for(you = me - (MaxOff-1); you != me; you++)
		found[you & (MaxOff - 1)] = byte 1;

	for(i = 0; i < MaxOff; i++){
		if(found[i] == byte 0 && nexts[i] != 0)
			fatal(lz, sys->sprint("%s: link not found: max %d at %d", where, me & (MaxOff-1), i));
	}
}

hashit(p: int, hist: array of byte): int
{
	h := 0;
	for(ep := p + MinMatch; p < ep; p++)
		h = ((h << Hshift) ^ int hist[p]) & Hmask;
	return h;
}

#
# make up the dynamic code tables, and return the number of bits
# needed to transmit them.
#
huffcodes(dc: ref Dyncode, littab, offtab: array of Huff): int
{
	i, n, m, c, nlit, noff, ncode, nclen: int;

	codetab := dc.codetab;
	codes := dc.codes;
	codeaux := dc.codeaux;

	#
	# trim the sizes of the tables
	#
	for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--)
		;
	for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--)
		;

	#
	# make the code-length code
	#
	for(i = 0; i < nlit; i++)
		codes[i] = byte littab[i].bits;
	for(i = 0; i < noff; i++)
		codes[i + nlit] = byte offtab[i].bits;

	#
	# run-length compress the code-length code
	#
	excost := 0;
	c = 0;
	ncode = nlit+noff;
	for(i = 0; i < ncode; ) {
		n = i + 1;
		v := codes[i];
		while(n < ncode && v == codes[n])
			n++;
		n -= i;
		i += n;
		if(v == byte 0) {
			while(n >= 11) {
				m = n;
				if(m > 138)
					m = 138;
				codes[c] = byte 18;
				codeaux[c++] = byte(m - 11);
				n -= m;
				excost += 7;
			}
			if(n >= 3) {
				codes[c] = byte 17;
				codeaux[c++] = byte(n - 3);
				n = 0;
				excost += 3;
			}
		}
		while(n--) {
			codes[c++] = v;
			while(n >= 3) {
				m = n;
				if(m > 6)
					m = 6;
				codes[c] = byte 16;
				codeaux[c++] = byte(m - 3);
				n -= m;
				excost += 3;
			}
		}
	}

	codecount := array[Nclen] of {* => 0};
	for(i = 0; i < c; i++)
		codecount[int codes[i]]++;
	mkprecode(codetab, codecount, Nclen, 7);

	for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--)
		;

	dc.nlit = nlit;
	dc.noff = noff;
	dc.nclen = nclen;
	dc.ncode = c;

	return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost;
}

wrdyncode(out: ref LZstate, dc: ref Dyncode)
{
	#
	# write out header, then code length code lengths,
	# and code lengths
	#
	lzput(out, dc.nlit-257, 5);
	lzput(out, dc.noff-1, 5);
	lzput(out, dc.nclen-4, 4);

	codetab := dc.codetab;
	for(i := 0; i < dc.nclen; i++)
		lzput(out, codetab[clenorder[i]].bits, 3);

	codes := dc.codes;
	codeaux := dc.codeaux;
	c := dc.ncode;
	for(i = 0; i < c; i++){
		v := int codes[i];
		lzput(out, codetab[v].encode, codetab[v].bits);
		if(v >= 16){
			if(v == 16)
				lzput(out, int codeaux[i], 2);
			else if(v == 17)
				lzput(out, int codeaux[i], 3);
			else # v == 18
				lzput(out, int codeaux[i], 7);
		}
	}
}

bitcost(tab: array of Huff, count: array of int, n: int): int
{
	tot := 0;
	for(i := 0; i < n; i++)
		tot += count[i] * tab[i].bits;
	return tot;
}

hufftabinit(tab: array of Huff, n: int, bitcount: array of int, nbits: int)
{
	nc := array[MaxHuffBits + 1] of int;

	code := 0;
	for(bits := 1; bits <= nbits; bits++) {
		code = (code + bitcount[bits-1]) << 1;
		nc[bits] = code;
	}

	for(i := 0; i < n; i++) {
		bits = tab[i].bits;
		if(bits != 0) {
			code = nc[bits]++ << (16 - bits);
			tab[i].encode = int(revtab[code >> 8]) | (int(revtab[code & 16rff]) << 8);
		}
	}
}

Chain: adt
{
	count:		int;				# occurances of everything in the chain
	leaf:		int;				# leaves to the left of chain, or leaf value
	col:		byte;				# ref count for collecting unused chains
	gen:		byte;				# need to generate chains for next lower level
	up:		int;				# Chain up in the lists
};

Chains: adt
{
	lists:		array of int;			# [MaxHuffBits * 2]
	chains:		array of Chain;			# [ChainMem]
	nleaf:		int;				# number of leaves
	free:		int;
	col:		byte;
	nlists:		int;
};

Nil:	con -1;

#
# fast, low space overhead algorithm for max depth huffman type codes
#
# J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical
# algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms
# and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer
# Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds.,
# pp 12-21, Springer Verlag, New York, 1995.
#
mkprecode(tab: array of Huff, count: array of int, n, maxbits: int)
{
	cs := ref Chains(array[MaxHuffBits * 2] of int, array[MaxLeaf+ChainMem] of Chain, 0, 0, byte 0, 0);
	bits: int;

	for(i := 0; i < n; i++){
		tab[i].bits = 0;
		tab[i].encode = 0;
	}

	#
	# set up the sorted list of leaves
	#
	m := 0;
	for(i = 0; i < n; i++) {
		if(count[i] != 0){
			cs.chains[m].count = count[i];
			cs.chains[m].leaf = i;
			m++;
		}
	}
	if(m < 2) {
		if(m != 0) {
			m = cs.chains[0].leaf;
			tab[m].bits = 1;
			tab[m].encode = 0;
		}
		return;
	}
	cs.nleaf = m;
	csorts(cs.chains, 0, m);

	cs.free = cs.nleaf + 2;
	cs.col = byte 1;

	#
	# initialize chains for each list
	#
	c := cs.chains;
	cl := cs.nleaf;
	c[cl].count = cs.chains[0].count;
	c[cl].leaf = 1;
	c[cl].col = cs.col;
	c[cl].up = Nil;
	c[cl].gen = byte 0;
	c[cl + 1] = c[cl];
	c[cl + 1].leaf = 2;
	c[cl + 1].count = cs.chains[1].count;
	for(i = 0; i < maxbits; i++){
		cs.lists[i * 2] = cl;
		cs.lists[i * 2 + 1] = cl + 1;
	}

	cs.nlists = 2 * maxbits;
	m = 2 * m - 2;
	for(i = 2; i < m; i++)
		nextchain(cs, cs.nlists - 2);

	bitcount := array[MaxHuffBits + 1] of int;
	bits = 0;
	bitcount[0] = cs.nleaf;
	for(cl = cs.lists[2 * maxbits - 1]; cl != Nil; cl = c[cl].up) {
		m = c[cl].leaf;
		for(i = 0; i < m; i++)
			tab[cs.chains[i].leaf].bits++;
		bitcount[bits++] -= m;
		bitcount[bits] = m;
	}

	hufftabinit(tab, n, bitcount, bits);
}

#
# calculate the next chain on the list
# we can always toss out the old chain
#
nextchain(cs: ref Chains, clist: int)
{
	i, nleaf, sumc: int;

	oc := cs.lists[clist + 1];
	cs.lists[clist] = oc;
	if(oc == Nil)
		return;

	#
	# make sure we have all chains needed to make sumc
	# note it is possible to generate only one of these,
	# use twice that value for sumc, and then generate
	# the second if that preliminary sumc would be chosen.
	# however, this appears to be slower on current tests
	#
	chains := cs.chains;
	if(chains[oc].gen != byte 0) {
		nextchain(cs, clist - 2);
		nextchain(cs, clist - 2);
		chains[oc].gen = byte 0;
	}

	#
	# pick up the chain we're going to add;
	# collect unused chains no free ones are left
	#
	for(c := cs.free; ; c++) {
		if(c >= ChainMem) {
			cs.col++;
			for(i = 0; i < cs.nlists; i++)
				for(c = cs.lists[i]; c != Nil; c = chains[c].up)
					chains[c].col = cs.col;
			c = cs.nleaf;
		}
		if(chains[c].col != cs.col)
			break;
	}

	#
	# pick the cheapest of
	# 1) the next package from the previous list
	# 2) the next leaf
	#
	nleaf = chains[oc].leaf;
	sumc = 0;
	if(clist > 0 && cs.lists[clist-1] != Nil)
		sumc = chains[cs.lists[clist-2]].count + chains[cs.lists[clist-1]].count;
	if(sumc != 0 && (nleaf >= cs.nleaf || chains[nleaf].count > sumc)) {
		chains[c].count = sumc;
		chains[c].leaf = chains[oc].leaf;
		chains[c].up = cs.lists[clist-1];
		chains[c].gen = byte 1;
	} else if(nleaf >= cs.nleaf) {
		cs.lists[clist + 1] = Nil;
		return;
	} else {
		chains[c].leaf = nleaf + 1;
		chains[c].count = chains[nleaf].count;
		chains[c].up = chains[oc].up;
		chains[c].gen = byte 0;
	}
	cs.free = c + 1;

	cs.lists[clist + 1] = c;
	chains[c].col = cs.col;
}

chaincmp(chain: array of Chain, ai, bi: int): int
{
	ac := chain[ai].count;
	bc := chain[bi].count;
	if(ac < bc)
		return -1;
	if(ac > bc)
		return 1;
	ac = chain[ai].leaf;
	bc = chain[bi].leaf;
	if(ac > bc)
		return -1;
	return ac < bc;
}

pivot(chain: array of Chain, a, n: int): int
{
	j := n/6;
	pi := a + j;	# 1/6
	j += j;
	pj := pi + j;	# 1/2
	pk := pj + j;	# 5/6
	if(chaincmp(chain, pi, pj) < 0) {
		if(chaincmp(chain, pi, pk) < 0) {
			if(chaincmp(chain, pj, pk) < 0)
				return pj;
			return pk;
		}
		return pi;
	}
	if(chaincmp(chain, pj, pk) < 0) {
		if(chaincmp(chain, pi, pk) < 0)
			return pi;
		return pk;
	}
	return pj;
}

csorts(chain: array of Chain, a, n: int)
{
	j, pi, pj, pn: int;

	while(n > 1) {
		if(n > 10)
			pi = pivot(chain, a, n);
		else
			pi = a + (n>>1);

		t := chain[pi];
		chain[pi] = chain[a];
		chain[a] = t;
		pi = a;
		pn = a + n;
		pj = pn;
		for(;;) {
			do
				pi++;
			while(pi < pn && chaincmp(chain, pi, a) < 0);
			do
				pj--;
			while(pj > a && chaincmp(chain, pj, a) > 0);
			if(pj < pi)
				break;
			t = chain[pi];
			chain[pi] = chain[pj];
			chain[pj] = t;
		}
		t = chain[a];
		chain[a] = chain[pj];
		chain[pj] = t;
		j = pj - a;

		n = n-j-1;
		if(j >= n) {
			csorts(chain, a, j);
			a += j+1;
		} else {
			csorts(chain, a + (j+1), n);
			n = j;
		}
	}
}

mkcrctab(poly: int): array of int
{
	crctab := array[256] of int;
	for(i := 0; i < 256; i++){
		crc := i;
		for(j := 0; j < 8; j++){
			c := crc & 1;
			crc = (crc >> 1) & 16r7fffffff;
			if(c)
				crc ^= poly;
		}
		crctab[i] = crc;
	}
	return crctab;
}

inblock(lz: ref LZstate, buf: array of byte)
{
	crc := lz.crc;
	n := len buf;
	crc ^= int 16rffffffff;
	for(i := 0; i < n; i++)
		crc = lz.crctab[int(byte crc ^ buf[i])] ^ ((crc >> 8) & 16r00ffffff);
	lz.crc = crc ^ int 16rffffffff;
	lz.tot += n;
}

fatal(lz: ref LZstate, s: string)
{
	lz.c <-= ref Rq.Error(s);
	exit;
}