implement UBFa; # # UBF(A) data encoding interpreter # include "sys.m"; sys: Sys; include "bufio.m"; bufio: Bufio; Iobuf: import bufio; include "ubfa.m"; Syntax: exception(string); Badwrite: exception; dict: array of list of string; dictlock: chan of int; init(m: Bufio) { sys = load Sys Sys->PATH; bufio = m; dict = array[74] of list of string; dictlock = chan[1] of int; } uvatom(s: string): ref UValue.Atom { return ref UValue.Atom(uniq(s)); } uvint(i: int): ref UValue.Int { return ref UValue.Int(i); } uvbig(i: big): ref UValue.Int { return ref UValue.Int(int i); } uvbinary(a: array of byte): ref UValue.Binary { return ref UValue.Binary(a); } uvstring(s: string): ref UValue.String { return ref UValue.String(s); } uvtuple(a: array of ref UValue): ref UValue.Tuple { return ref UValue.Tuple(a); } uvlist(l: list of ref UValue): ref UValue.List { return ref UValue.List(l); } uvtag(s: string, o: ref UValue): ref UValue.Tag { return ref UValue.Tag(uniq(s), o); } # needed only to avoid O(n) len s.s Stack: adt { s: list of ref UValue; n: int; new: fn(): ref Stack; pop: fn(s: self ref Stack): ref UValue raises(Syntax); push: fn(s: self ref Stack, o: ref UValue); }; Stack.new(): ref Stack { return ref Stack(nil, 0); } Stack.pop(s: self ref Stack): ref UValue raises(Syntax) { if(--s.n < 0 || s.s == nil) raise Syntax("parse stack underflow"); v := hd s.s; s.s = tl s.s; return v; } Stack.push(s: self ref Stack, o: ref UValue) { s.s = o :: s.s; s.n++; } Parse: adt { input: ref Iobuf; stack: ref Stack; reg: array of ref UValue; getb: fn(nil: self ref Parse): int raises(Syntax); unget: fn(nil: self ref Parse); }; Parse.getb(p: self ref Parse): int raises(Syntax) { c := p.input.getb(); if(c < 0){ if(c == Bufio->EOF) raise Syntax("unexpected end-of-file"); raise Syntax(sys->sprint("read error: %r")); } return c; } Parse.unget(p: self ref Parse) { p.input.ungetb(); } uniq(s: string): string { if(s == nil) return ""; dictlock <-= 1; h := 0; for(i:=0; i>24) & 16rFF) | g; } h = (h & Sys->Maxint)%len dict; for(l := dict[h]; l != nil; l = tl l) if(hd l == s){ s = hd l; # share space break; } if(l == nil) dict[h] = s :: dict[h]; <-dictlock; return s; } writeubf(out: ref Iobuf, obj: ref UValue): int { { # write it out, put final '$' if(out != nil) writeobj(out, obj); putc(out, '$'); return 0; }exception{ Badwrite => return -1; } } readubf(input: ref Iobuf): (ref UValue, string) { { return (getobj(ref Parse(input, Stack.new(), array[256] of ref UValue)), nil); }exception e{ Syntax => return (nil, sys->sprint("ubf error: offset %bd: %s", input.offset(), e)); } } UValue.isatom(o: self ref UValue): int { return tagof o == tagof UValue.Atom; } UValue.isstring(o: self ref UValue): int { return tagof o == tagof UValue.String; } UValue.isint(o: self ref UValue): int { return tagof o == tagof UValue.Int; } UValue.islist(o: self ref UValue): int { return tagof o == tagof UValue.List; } UValue.istuple(o: self ref UValue): int { return tagof o == tagof UValue.Tuple; } UValue.isbinary(o: self ref UValue): int { return tagof o == tagof UValue.Binary; } UValue.istag(o: self ref UValue): int { return tagof o == tagof UValue.Tag; } UValue.isop(o: self ref UValue, op: string, arity: int): int { pick r := o { Tuple => if(len r.a > 0 && (arity <= 0 || len r.a == arity)) pick s := r.a[0] { Atom => return s.name == op; String => return s.s == op; } } return 0; } UValue.op(o: self ref UValue, arity: int): string { pick r := o { Tuple => if(len r.a > 0 && (arity <= 0 || len r.a == arity)) pick s := r.a[0] { Atom => return s.name; String => return s.s; } } return nil; } UValue.args(o: self ref UValue, arity: int): array of ref UValue { pick r := o { Tuple => if(len r.a > 0 && (arity <= 0 || len r.a == arity)) return r.a[1:]; } return nil; } UValue.els(o: self ref UValue): list of ref UValue { pick r := o { List => return r.l; } return nil; } UValue.val(o: self ref UValue): int { pick r := o { Int => return r.value; } return 0; } UValue.objtag(o: self ref UValue): string { pick r := o { Tag => return r.name; } return nil; } UValue.obj(o: self ref UValue): ref UValue { pick r := o { Tag => return r.o; } return o; } UValue.binary(o: self ref UValue): array of byte { pick r := o { Atom => return array of byte r.name; String => return array of byte r.s; Binary => return r.a; } return nil; } UValue.text(o: self ref UValue): string { pick r := o { Atom => return r.name; String => return r.s; Int => return string r.value; Tuple => s := "{"; for(i := 0; i < len r.a; i++) s += " "+r.a[i].text(); return s+"}"; List => s := "["; for(l := r.l; l != nil; l = tl l) s += " "+(hd l).text(); return s+"]"; Binary => s := "<<"; for(i := 0; i < len r.a; i++) s += sys->sprint(" %.2ux", int r.a[i]); return s+">>"; Tag => return "{'$TYPE', "+r.name+", "+r.o.text()+"}"; * => return "unknown"; } } UValue.eq(o: self ref UValue, v: ref UValue): int { if(v == nil) return 0; if(o == v) return 1; pick r := o { Atom => pick s := v { Atom => return r.name == s.name; } return 0; String => pick s := v { String => return r.s == s.s; } return 0; Int => pick s := v { Int => return r.value == s.value; } return 0; Tuple => pick s := v { Tuple => if(len r.a != len s.a) return 0; for(i := 0; i < len r.a; i++) if(!r.a[i].eq(s.a[i])) return 0; return 1; } return 0; List => pick s := v { List => l1 := r.l; l2 := s.l; while(l1 != nil && l2 != nil){ if(!(hd l1).eq(hd l2)) return 0; l1 = tl l1; l2 = tl l2; } return l1 == l2; } return 0; Binary => pick s := v { Binary => if(len r.a != len s.a) return 0; for(i := 0; i < len r.a; i++) if(r.a[i] != s.a[i]) return 0; return 1; } return 0; Tag => pick s := v { Tag => return r.name == s.name && r.o.eq(s.o); } return 0; * => raise "ubf: bad object"; # can't happen } } S: con byte 1; special := array[256] of { '\n' or '\r' or '\t' or ' ' or ',' => S, '}' => S, '$' => S, '>' => S, '#' => S, '&' => S, '"' => S, '\'' => S, '{' => S, '~' => S, '-' => S, '0' to '9' => S, '%' => S, '`' => S, * => byte 0 }; getobj(p: ref Parse): ref UValue raises(Syntax) { { for(;;){ case p.getb() { '\n' or '\r' or '\t' or ' ' or ',' => ; # white space '%' => while((c := p.getb()) != '%'){ if(c == '\\'){ # do comments really use \? c = p.getb(); if(c != '\\' && c != '%') raise Syntax("invalid escape in comment"); } } '}' => a := array[p.stack.n] of ref UValue; for(i := len a; --i >= 0;) a[i] = p.stack.pop(); return ref UValue.Tuple(a); '$' => if(p.stack.n != 1) raise Syntax("unbalanced stack: size "+string p.stack.n); return p.stack.pop(); '>' => r := p.getb(); if(special[r] == S) raise Syntax("invalid register name"); p.reg[r] = p.stack.pop(); '`' => t := uniq(readdelimitedstring(p, '`')); p.stack.push(ref UValue.Tag(t, p.stack.pop())); * => p.unget(); p.stack.push(readobj(p)); } } }exception{ Syntax => raise; } } readobj(p: ref Parse): ref UValue raises(Syntax) { { case c := p.getb() { '#' => return ref UValue.List(nil); '&' => a := p.stack.pop(); b := p.stack.pop(); pick r := b { List => return ref UValue.List(a :: r.l); # not changed in place: might be shared register value * => raise Syntax("can't make cons with cdr "+b.text()); } '"' => return ref UValue.String(readdelimitedstring(p, c)); '\'' => return ref UValue.Atom(uniq(readdelimitedstring(p, c))); '{' => obj := getobj(ref Parse(p.input, Stack.new(), p.reg)); if(!obj.istuple()) raise Syntax("expected tuple: obj"); return obj; '~' => o := p.stack.pop(); if(!o.isint()) raise Syntax("expected Int before ~"); n := o.val(); if(n < 0) raise Syntax("negative length for binary"); a := array[n] of byte; n = p.input.read(a, len a); if(n != len a){ if(n != Bufio->ERROR) sys->werrstr("short read"); raise Syntax(sys->sprint("cannot read binary data: %r")); } if(p.getb() != '~') raise Syntax("missing closing ~"); return ref UValue.Binary(a); '-' or '0' to '9' => p.unget(); return ref UValue.Int(int readinteger(p)); * => if(p.reg[c] != nil) return p.reg[c]; p.unget(); # point to error raise Syntax(sys->sprint("invalid start character/undefined register #%.2ux",c)); } }exception{ Syntax => raise; } } readdelimitedstring(p: ref Parse, delim: int): string raises(Syntax) { { s := ""; while((c := p.input.getc()) != delim){ # note: we'll use UTF-8 if(c < 0){ if(c == Bufio->ERROR) raise Syntax(sys->sprint("read error: %r")); raise Syntax("unexpected end of file"); } if(c == '\\'){ c = p.getb(); if(c != '\\' && c != delim) raise Syntax("invalid escape"); } s[len s] = c; } return s; }exception{ Syntax => raise; } } readinteger(p: ref Parse): big raises(Syntax) { sign := 1; c := p.getb(); if(c == '-'){ sign = -1; c = p.getb(); if(!(c >= '0' && c <= '9')) raise Syntax("expected integer literal"); } for(n := big 0; c >= '0' && c <= '9'; c = p.getb()){ n = n*big 10 + big((c-'0')*sign); if(n > big Sys->Maxint || n < big(-Sys->Maxint-1)) raise Syntax("integer overflow"); } p.unget(); return n; } writeobj(out: ref Iobuf, o: ref UValue) raises(Badwrite) { { pick r := o { Atom => writedelimitedstring(out, r.name, '\''); String => writedelimitedstring(out, r.s, '"'); Int => puts(out, string r.value); Tuple => # { el * } putc(out, '{'); for(i := 0; i < len r.a; i++){ if(i != 0) putc(out, ' '); writeobj(out, r.a[i]); } putc(out, '}'); List => # # eN & eN-1 & ... & e0 & putc(out, '#'); # put them out in reverse order, each followed by '&' rl: list of ref UValue; for(l := r.l; l != nil; l = tl l) rl = hd l :: rl; for(; rl != nil; rl = tl rl){ writeobj(out, hd rl); putc(out, '&'); } Binary => # Int ~data~ puts(out, string len r.a); putc(out, '~'); if(out.write(r.a, len r.a) != len r.a) raise Badwrite; putc(out, '~'); Tag => # obj `tag` writeobj(out, r.o); writedelimitedstring(out, r.name, '`'); * => raise "ubf: unknown object"; # can't happen } }exception{ Badwrite => raise; } } writedelimitedstring(out: ref Iobuf, s: string, d: int) raises(Badwrite) { { putc(out, d); for(i := 0; i < len s; i++){ c := s[i]; if(c == d || c == '\\') putc(out, '\\'); putc(out, c); } putc(out, d); }exception{ Badwrite => raise; } } puts(out: ref Iobuf, s: string) raises(Badwrite) { if(out.puts(s) == Bufio->ERROR) raise Badwrite; } putc(out: ref Iobuf, c: int) raises(Badwrite) { if(out.putc(c) == Bufio->ERROR) raise Badwrite; }