implement Yacc; include "sys.m"; sys: Sys; print, fprint, sprint: import sys; UTFmax: import Sys; include "bufio.m"; bufio: Bufio; Iobuf: import bufio; include "draw.m"; Yacc: module { init: fn(ctxt: ref Draw->Context, argv: list of string); }; Arg: adt { argv: list of string; c: int; opts: string; init: fn(argv: list of string): ref Arg; opt: fn(arg: self ref Arg): int; arg: fn(arg: self ref Arg): string; }; PARSER: con "/lib/yaccpar"; OFILE: con "tab.b"; FILEU: con "output"; FILED: con "tab.m"; FILEDEBUG: con "debug"; # the following are adjustable # according to memory size ACTSIZE: con 30000; NSTATES: con 2000; TEMPSIZE: con 2000; SYMINC: con 50; # increase for non-term or term RULEINC: con 50; # increase for max rule length prodptr[i] PRODINC: con 100; # increase for productions prodptr WSETINC: con 50; # increase for working sets wsets STATEINC: con 200; # increase for states statemem NAMESIZE: con 50; NTYPES: con 63; ISIZE: con 400; PRIVATE: con 16rE000; # unicode private use # relationships which must hold: # TEMPSIZE >= NTERMS + NNONTERM + 1 # TEMPSIZE >= NSTATES # NTBASE: con 8r10000; ERRCODE: con 8190; ACCEPTCODE: con 8191; YYLEXUNK: con 3; TOKSTART: con 4; #index of first defined token # no, left, right, binary assoc. NOASC, LASC, RASC, BASC: con iota; # flags for state generation DONE, MUSTDO, MUSTLOOKAHEAD: con iota; # flags for a rule having an action, and being reduced ACTFLAG: con 16r4; REDFLAG: con 16r8; # output parser flags YYFLAG1: con -1000; # parse tokens IDENTIFIER, MARK, TERM, LEFT, RIGHT, BINARY, PREC, LCURLY, IDENTCOLON, NUMBER, START, TYPEDEF, TYPENAME, MODULE: con PRIVATE+iota; ENDFILE: con 0; EMPTY: con 1; WHOKNOWS: con 0; OK: con 1; NOMORE: con -1000; # macros for getting associativity and precedence levels ASSOC(i: int): int { return i & 3; } PLEVEL(i: int): int { return (i >> 4) & 16r3f; } TYPE(i: int): int { return (i >> 10) & 16r3f; } # macros for setting associativity and precedence levels SETASC(i, j: int): int { return i | j; } SETPLEV(i, j: int): int { return i | (j << 4); } SETTYPE(i, j: int): int { return i | (j << 10); } # I/O descriptors stderr: ref Sys->FD; fdefine: ref Iobuf; # file for module definition fdebug: ref Iobuf; # y.debug for strings for debugging ftable: ref Iobuf; # y.tab.c file finput: ref Iobuf; # input file foutput: ref Iobuf; # y.output file CodeData, CodeMod, CodeAct: con iota; NCode: con 8192; Code: adt { kind: int; data: array of byte; ndata: int; next: cyclic ref Code; }; codehead: ref Code; codetail: ref Code; modname: string; # name of module # communication variables between various I/O routines infile: string; # input file name numbval: int; # value of an input number tokname: string; # input token name, slop for runes and 0 # structure declarations Lkset: type array of int; Pitem: adt { prod: array of int; off: int; # offset within the production first: int; # first term or non-term in item prodno: int; # production number for sorting }; Item: adt { pitem: Pitem; look: Lkset; }; Symb: adt { name: string; value: int; }; Wset: adt { pitem: Pitem; flag: int; ws: Lkset; }; # storage of names parser := PARSER; yydebug: string; # storage of types ntypes: int; # number of types defined typeset := array[NTYPES] of string;# pointers to type tags # token information ntokens := 0; # number of tokens tokset: array of Symb; toklev: array of int; # vector with the precedence of the terminals # nonterminal information nnonter := -1; # the number of nonterminals nontrst: array of Symb; start: int; # start symbol # state information nstate := 0; # number of states pstate := array[NSTATES+2] of int; # index into statemem to the descriptions of the states statemem : array of Item; tystate := array[NSTATES] of int; # contains type information about the states tstates : array of int; # states generated by terminal gotos ntstates : array of int; # states generated by nonterminal gotos mstates := array[NSTATES] of {* => 0}; # chain of overflows of term/nonterm generation lists lastred: int; # number of last reduction of a state defact := array[NSTATES] of int; # default actions of states # lookahead set information lkst: array of Lkset; nolook := 0; # flag to turn off lookahead computations tbitset := 0; # size of lookahead sets clset: Lkset; # temporary storage for lookahead computations # working set information wsets: array of Wset; cwp: int; # storage for action table amem: array of int; # action table storage memp: int; # next free action table position indgo := array[NSTATES] of int; # index to the stored goto table # temporary vector, indexable by states, terms, or ntokens temp1 := array[TEMPSIZE] of int; # temporary storage, indexed by terms + ntokens or states lineno := 1; # current input line number fatfl := 1; # if on, error is fatal nerrors := 0; # number of errors # assigned token type values extval := 0; ytabc := OFILE; # name of y.tab.c # grammar rule information nprod := 1; # number of productions prdptr: array of array of int; # pointers to descriptions of productions levprd: array of int; # precedence levels for the productions rlines: array of int; # line number for this rule # statistics collection variables zzgoent := 0; zzgobest := 0; zzacent := 0; zzexcp := 0; zzclose := 0; zzrrconf := 0; zzsrconf := 0; zzstate := 0; # optimizer arrays yypgo: array of array of int; optst: array of array of int; ggreed: array of int; pgo: array of int; maxspr: int; # maximum spread of any entry maxoff: int; # maximum offset into a array maxa: int; # storage for information about the nonterminals pres: array of array of array of int; # vector of pointers to productions yielding each nonterminal pfirst: array of Lkset; pempty: array of int; # vector of nonterminals nontrivially deriving e # random stuff picked out from between functions indebug := 0; # debugging flag for cpfir pidebug := 0; # debugging flag for putitem gsdebug := 0; # debugging flag for stagen cldebug := 0; # debugging flag for closure pkdebug := 0; # debugging flag for apack g2debug := 0; # debugging for go2gen adb := 0; # debugging for callopt Resrv : adt { name: string; value: int; }; resrv := array[] of { Resrv("binary", BINARY), Resrv("module", MODULE), Resrv("left", LEFT), Resrv("nonassoc", BINARY), Resrv("prec", PREC), Resrv("right", RIGHT), Resrv("start", START), Resrv("term", TERM), Resrv("token", TERM), Resrv("type", TYPEDEF),}; zznewstate := 0; init(nil: ref Draw->Context, argv: list of string) { sys = load Sys Sys->PATH; bufio = load Bufio Bufio->PATH; stderr = sys->fildes(2); setup(argv); # initialize and read productions tbitset = (ntokens+32)/32; cpres(); # make table of which productions yield a given nonterminal cempty(); # make a table of which nonterminals can match the empty string cpfir(); # make a table of firsts of nonterminals stagen(); # generate the states yypgo = array[nnonter+1] of array of int; optst = array[nstate] of array of int; output(); # write the states and the tables go2out(); hideprod(); summary(); callopt(); others(); bufio->flush(); } setup(argv: list of string) { j, ty: int; ytab := 0; vflag := 0; dflag := 0; stem := 0; stemc := "y"; foutput = nil; fdefine = nil; fdebug = nil; arg := Arg.init(argv); while(c := arg.opt()){ case c{ 'v' or 'V' => vflag++; 'D' => yydebug = arg.arg(); 'd' => dflag++; 'o' => ytab++; ytabc = arg.arg(); 's' => stem++; stemc = arg.arg(); * => usage(); } } argv = arg.argv; if(len argv != 1) usage(); infile = hd argv; finput = bufio->open(infile, Bufio->OREAD); if(finput == nil) error("cannot open '"+infile+"'"); openup(stemc, dflag, vflag, ytab, ytabc); defin(0, "$end"); extval = PRIVATE; # tokens start in unicode 'private use' defin(0, "error"); defin(1, "$accept"); defin(0, "$unk"); i := 0; for(t := gettok(); t != MARK && t != ENDFILE; ) case t { ';' => t = gettok(); START => if(gettok() != IDENTIFIER) error("bad %%start construction"); start = chfind(1, tokname); t = gettok(); TYPEDEF => if(gettok() != TYPENAME) error("bad syntax in %%type"); ty = numbval; for(;;) { t = gettok(); case t { IDENTIFIER => if((t=chfind(1, tokname)) < NTBASE) { j = TYPE(toklev[t]); if(j != 0 && j != ty) error("type redeclaration of token "+ tokset[t].name); else toklev[t] = SETTYPE(toklev[t], ty); } else { j = nontrst[t-NTBASE].value; if(j != 0 && j != ty) error("type redeclaration of nonterminal "+ nontrst[t-NTBASE].name); else nontrst[t-NTBASE].value = ty; } continue; ',' => continue; ';' => t = gettok(); } break; } MODULE => cpymodule(); t = gettok(); LEFT or BINARY or RIGHT or TERM => # nonzero means new prec. and assoc. lev := t-TERM; if(lev) i++; ty = 0; # get identifiers so defined t = gettok(); # there is a type defined if(t == TYPENAME) { ty = numbval; t = gettok(); } for(;;) { case t { ',' => t = gettok(); continue; ';' => break; IDENTIFIER => j = chfind(0, tokname); if(j >= NTBASE) error(tokname+" defined earlier as nonterminal"); if(lev) { if(ASSOC(toklev[j])) error("redeclaration of precedence of "+tokname); toklev[j] = SETASC(toklev[j], lev); toklev[j] = SETPLEV(toklev[j], i); } if(ty) { if(TYPE(toklev[j])) error("redeclaration of type of "+tokname); toklev[j] = SETTYPE(toklev[j],ty); } t = gettok(); if(t == NUMBER) { tokset[j].value = numbval; t = gettok(); } continue; } break; } LCURLY => cpycode(); t = gettok(); * => error("syntax error"); } if(t == ENDFILE) error("unexpected EOF before %%"); if(modname == nil) error("missing %module specification"); moreprod(); prdptr[0] = array[4] of { NTBASE, # added production start, # if start is 0, we will overwrite with the lhs of the first rule 1, 0 }; nprod = 1; curprod := array[RULEINC] of int; t = gettok(); if(t != IDENTCOLON) error("bad syntax on first rule"); if(!start) prdptr[0][1] = chfind(1, tokname); # read rules # put into prdptr array in the format # target # followed by id's of terminals and non-terminals # followd by -nprod while(t != MARK && t != ENDFILE) { mem := 0; # process a rule rlines[nprod] = lineno; if(t == '|') curprod[mem++] = prdptr[nprod-1][0]; else if(t == IDENTCOLON) { curprod[mem] = chfind(1, tokname); if(curprod[mem] < NTBASE) error("token illegal on LHS of grammar rule"); mem++; } else error("illegal rule: missing semicolon or | ?"); # read rule body t = gettok(); for(;;){ while(t == IDENTIFIER) { curprod[mem] = chfind(1, tokname); if(curprod[mem] < NTBASE) levprd[nprod] = toklev[curprod[mem]]; mem++; if(mem >= len curprod){ ncurprod := array[mem+RULEINC] of int; ncurprod[0:] = curprod; curprod = ncurprod; } t = gettok(); } if(t == PREC) { if(gettok() != IDENTIFIER) error("illegal %%prec syntax"); j = chfind(2, tokname); if(j >= NTBASE) error("nonterminal "+nontrst[j-NTBASE].name+" illegal after %%prec"); levprd[nprod] = toklev[j]; t = gettok(); } if(t != '=') break; levprd[nprod] |= ACTFLAG; addcode(CodeAct, "\n"+string nprod+"=>"); cpyact(curprod, mem); # action within rule... if((t=gettok()) == IDENTIFIER) { # make it a nonterminal j = chfind(1, "$$"+string nprod); # # the current rule will become rule number nprod+1 # enter null production for action # moreprod(); prdptr[nprod] = array[2] of {j, -nprod}; # update the production information levprd[nprod+1] = levprd[nprod] & ~ACTFLAG; levprd[nprod] = ACTFLAG; nprod++; rlines[nprod] = lineno; # make the action appear in the original rule curprod[mem++] = j; if(mem >= len curprod){ ncurprod := array[mem+RULEINC] of int; ncurprod[0:] = curprod; curprod = ncurprod; } } } while(t == ';') t = gettok(); curprod[mem++] = -nprod; # check that default action is reasonable if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[curprod[0]-NTBASE].value) { # no explicit action, LHS has value tempty := curprod[1]; if(tempty < 0) error("must return a value, since LHS has a type"); else if(tempty >= NTBASE) tempty = nontrst[tempty-NTBASE].value; else tempty = TYPE(toklev[tempty]); if(tempty != nontrst[curprod[0]-NTBASE].value) error("default action causes potential type clash"); else{ addcodec(CodeAct, '\n'); addcode(CodeAct, string nprod); addcode(CodeAct, "=>\nyyval."); addcode(CodeAct, typeset[tempty]); addcode(CodeAct, " = yys[yyp+1].yyv."); addcode(CodeAct, typeset[tempty]); addcodec(CodeAct, ';'); } } moreprod(); prdptr[nprod] = array[mem] of int; prdptr[nprod][0:] = curprod[:mem]; nprod++; moreprod(); levprd[nprod] = 0; } # # end of all rules # dump out the prefix code # ftable.puts("implement "); ftable.puts(modname); ftable.puts(";\n"); dumpcode(CodeMod); dumpmod(); dumpcode(CodeAct); ftable.puts("YYEOFCODE: con 1;\n"); ftable.puts("YYERRCODE: con 2;\n"); ftable.puts("YYMAXDEPTH: con 150;\n"); ftable.puts("yyval: YYSTYPE;\n"); # # copy any postfix code # if(t == MARK) { ftable.puts("\n#line\t"); ftable.puts(string lineno); ftable.puts("\t\""); ftable.puts(infile); ftable.puts("\"\n"); while((c=finput.getc()) != Bufio->EOF) ftable.putc(c); } finput.close(); } # # allocate enough room to hold another production # moreprod() { n := len prdptr; if(nprod < n) return; n += PRODINC; aprod := array[n] of array of int; aprod[0:] = prdptr; prdptr = aprod; alevprd := array[n] of int; alevprd[0:] = levprd; levprd = alevprd; arlines := array[n] of int; arlines[0:] = rlines; rlines = arlines; } # # define s to be a terminal if t=0 # or a nonterminal if t=1 # defin(nt: int, s: string): int { val := 0; if(nt) { nnonter++; if(nnonter >= len nontrst){ anontrst := array[nnonter + SYMINC] of Symb; anontrst[0:] = nontrst; nontrst = anontrst; } nontrst[nnonter] = Symb(s, 0); return NTBASE + nnonter; } # must be a token ntokens++; if(ntokens >= len tokset){ atokset := array[ntokens + SYMINC] of Symb; atokset[0:] = tokset; tokset = atokset; atoklev := array[ntokens + SYMINC] of int; atoklev[0:] = toklev; toklev = atoklev; } tokset[ntokens].name = s; toklev[ntokens] = 0; # establish value for token # single character literal if(s[0] == ' ' && len s == 1+1){ val = s[1]; }else if(s[0] == ' ' && s[1] == '\\') { # escape sequence if(len s == 2+1) { # single character escape sequence case s[2] { '\'' => val = '\''; '"' => val = '"'; '\\' => val = '\\'; 'a' => val = '\a'; 'b' => val = '\b'; 'n' => val = '\n'; 'r' => val = '\r'; 't' => val = '\t'; 'v' => val = '\v'; * => error("invalid escape "+s[1:3]); } }else if(s[2] == 'u' && len s == 2+1+4) { # \unnnn sequence val = 0; s = s[3:]; while(s != ""){ c := s[0]; if(c >= '0' && c <= '9') c -= '0'; else if(c >= 'a' && c <= 'f') c -= 'a' - 10; else if(c >= 'A' && c <= 'F') c -= 'A' - 10; else error("illegal \\unnnn construction"); val = val * 16 + c; s = s[1:]; } if(val == 0) error("'\\u0000' is illegal"); }else error("unknown escape"); }else val = extval++; tokset[ntokens].value = val; return ntokens; } peekline := 0; gettok(): int { i, match, c: int; tokname = ""; for(;;){ reserve := 0; lineno += peekline; peekline = 0; c = finput.getc(); while(c == ' ' || c == '\n' || c == '\t' || c == '\v') { if(c == '\n') lineno++; c = finput.getc(); } # skip comment if(c != '#') break; lineno += skipcom(); } case c { Bufio->EOF => return ENDFILE; '{' => finput.ungetc(); return '='; '<' => # get, and look up, a type name (union member name) i = 0; while((c=finput.getc()) != '>' && c != Bufio->EOF && c != '\n') tokname[i++] = c; if(c != '>') error("unterminated < ... > clause"); for(i=1; i<=ntypes; i++) if(typeset[i] == tokname) { numbval = i; return TYPENAME; } ntypes++; numbval = ntypes; typeset[numbval] = tokname; return TYPENAME; '"' or '\'' => match = c; tokname[0] = ' '; i = 1; for(;;) { c = finput.getc(); if(c == '\n' || c == Bufio->EOF) error("illegal or missing ' or \"" ); if(c == '\\') { tokname[i++] = '\\'; c = finput.getc(); } else if(c == match) return IDENTIFIER; tokname[i++] = c; } '%' => case c = finput.getc(){ '%' => return MARK; '=' => return PREC; '{' => return LCURLY; } getword(c); # find a reserved word for(c=0; c < len resrv; c++) if(tokname == resrv[c].name) return resrv[c].value; error("invalid escape, or illegal reserved word: "+tokname); '0' to '9' => numbval = c - '0'; while(isdigit(c = finput.getc())) numbval = numbval*10 + c-'0'; finput.ungetc(); return NUMBER; * => if(isword(c) || c=='.' || c=='$') getword(c); else return c; } # look ahead to distinguish IDENTIFIER from IDENTCOLON c = finput.getc(); while(c == ' ' || c == '\t'|| c == '\n' || c == '\v' || c == '/') { if(c == '\n') peekline++; # look for comments if(c == '#') peekline += skipcom(); c = finput.getc(); } if(c == ':') return IDENTCOLON; finput.ungetc(); return IDENTIFIER; } getword(c: int) { i := 0; while(isword(c) || isdigit(c) || c == '-' || c=='.' || c=='$') { tokname[i++] = c; c = finput.getc(); } finput.ungetc(); } # # determine the type of a symbol # fdtype(t: int): int { v : int; s: string; if(t >= NTBASE) { v = nontrst[t-NTBASE].value; s = nontrst[t-NTBASE].name; } else { v = TYPE(toklev[t]); s = tokset[t].name; } if(v <= 0) error("must specify type for "+s); return v; } chfind(t: int, s: string): int { if(s[0] == ' ') t = 0; for(i:=0; i<=ntokens; i++) if(s == tokset[i].name) return i; for(i=0; i<=nnonter; i++) if(s == nontrst[i].name) return NTBASE+i; # cannot find name if(t > 1) error(s+" should have been defined earlier"); return defin(t, s); } # # saves module definition in Code # cpymodule() { if(gettok() != IDENTIFIER) error("bad %%module construction"); if(modname != nil) error("duplicate %%module construction"); modname = tokname; level := 0; for(;;) { if((c:=finput.getc()) == Bufio->EOF) error("EOF encountered while processing %%module"); case c { '\n' => lineno++; '{' => level++; if(level == 1) continue; '}' => level--; # we are finished copying if(level == 0) return; } addcodec(CodeMod, c); } } # # saves code between %{ and %} # cpycode() { c := finput.getc(); if(c == '\n') { c = finput.getc(); lineno++; } addcode(CodeData, "\n#line\t" + string lineno + "\t\"" + infile + "\"\n"); while(c != Bufio->EOF) { if(c == '%') { if((c=finput.getc()) == '}') return; addcodec(CodeData, '%'); } addcodec(CodeData, c); if(c == '\n') lineno++; c = finput.getc(); } error("eof before %%}"); } addcode(k: int, s: string) { for(i := 0; i < len s; i++) addcodec(k, s[i]); } addcodec(k, c: int) { if(codehead == nil || k != codetail.kind || codetail.ndata >= NCode){ cd := ref Code(k, array[NCode+UTFmax] of byte, 0, nil); if(codehead == nil) codehead = cd; else codetail.next = cd; codetail = cd; } codetail.ndata += sys->char2byte(c, codetail.data, codetail.ndata); } dumpcode(til: int) { for(; codehead != nil; codehead = codehead.next){ if(codehead.kind == til) return; if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata) error("can't write output file"); } } # # write out the module declaration and any token info # dumpmod() { if(fdefine != nil) { fdefine.puts(modname); fdefine.puts(": module {\n"); } ftable.puts(modname); ftable.puts(": module {\n"); for(; codehead != nil; codehead = codehead.next){ if(codehead.kind != CodeMod) break; if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata) error("can't write output file"); if(fdefine != nil && fdefine.write(codehead.data, codehead.ndata) != codehead.ndata) error("can't write define file"); } for(i:=TOKSTART; i<=ntokens; i++) { # non-literals c := tokset[i].name[0]; if(c != ' ' && c != '$') { s := tokset[i].name+": con "+string tokset[i].value+";\n"; ftable.puts(s); if(fdefine != nil) fdefine.puts(s); } } if(fdefine != nil) fdefine.puts("};\n"); ftable.puts("\n};\n"); if(fdebug != nil) { fdebug.puts("yytoknames = array[] of {\n"); for(i=1; i<=ntokens; i++) { if(tokset[i].name != nil) fdebug.puts("\t\""+chcopy(tokset[i].name)+"\",\n"); else fdebug.puts("\t\"\",\n"); } fdebug.puts("};\n"); } } # # skip over comments # skipcom is called after reading a '#' # skipcom(): int { c := finput.getc(); while(c != Bufio->EOF) { if(c == '\n') return 1; c = finput.getc(); } error("EOF inside comment"); return 0; } # # copy limbo action to the next ; or closing } # cpyact(curprod: array of int, max: int) { addcode(CodeAct, "\n#line\t"); addcode(CodeAct, string lineno); addcode(CodeAct, "\t\""); addcode(CodeAct, infile); addcode(CodeAct, "\"\n"); brac := 0; loop: for(;;){ c := finput.getc(); swt: case c { ';' => if(brac == 0) { addcodec(CodeAct, c); return; } '{' => brac++; '$' => s := 1; tok := -1; c = finput.getc(); # type description if(c == '<') { finput.ungetc(); if(gettok() != TYPENAME) error("bad syntax on $ clause"); tok = numbval; c = finput.getc(); } if(c == '$') { addcode(CodeAct, "yyval"); # put out the proper tag... if(ntypes) { if(tok < 0) tok = fdtype(curprod[0]); addcode(CodeAct, "."+typeset[tok]); } continue loop; } if(c == '-') { s = -s; c = finput.getc(); } j := 0; if(isdigit(c)) { while(isdigit(c)) { j = j*10 + c-'0'; c = finput.getc(); } finput.ungetc(); j = j*s; if(j >= max) error("Illegal use of $" + string j); }else if(isword(c) || c == '_' || c == '.') { # look for $name finput.ungetc(); if(gettok() != IDENTIFIER) error("$ must be followed by an identifier"); tokn := chfind(2, tokname); fnd := -1; if((c = finput.getc()) != '#') finput.ungetc(); else if(gettok() != NUMBER) error("# must be followed by number"); else fnd = numbval; for(j=1; j= max) error("$name or $name#number not found"); }else{ addcodec(CodeAct, '$'); if(s < 0) addcodec(CodeAct, '-'); finput.ungetc(); continue loop; } addcode(CodeAct, "yys[yypt-" + string(max-j-1) + "].yyv"); # put out the proper tag if(ntypes) { if(j <= 0 && tok < 0) error("must specify type of $" + string j); if(tok < 0) tok = fdtype(curprod[j]); addcodec(CodeAct, '.'); addcode(CodeAct, typeset[tok]); } continue loop; '}' => brac--; if(brac) break; addcodec(CodeAct, c); return; '#' => # a comment addcodec(CodeAct, c); c = finput.getc(); while(c != Bufio->EOF) { if(c == '\n') { lineno++; break swt; } addcodec(CodeAct, c); c = finput.getc(); } error("EOF inside comment"); '\''or '"' => # character string or constant match := c; addcodec(CodeAct, c); while(c = finput.getc()) { if(c == '\\') { addcodec(CodeAct, c); c = finput.getc(); if(c == '\n') lineno++; } else if(c == match) break swt; if(c == '\n') error("newline in string or char const."); addcodec(CodeAct, c); } error("EOF in string or character constant"); Bufio->EOF => error("action does not terminate"); '\n' => lineno++; } addcodec(CodeAct, c); } } openup(stem: string, dflag, vflag, ytab: int, ytabc: string) { buf: string; if(vflag) { buf = stem + "." + FILEU; foutput = bufio->create(buf, Bufio->OWRITE, 8r666); if(foutput == nil) error("can't create " + buf); } if(yydebug != nil) { buf = stem + "." + FILEDEBUG; fdebug = bufio->create(buf, Bufio->OWRITE, 8r666); if(fdebug == nil) error("can't create " + buf); } if(dflag) { buf = stem + "." + FILED; fdefine = bufio->create(buf, Bufio->OWRITE, 8r666); if(fdefine == nil) error("can't create " + buf); } if(ytab == 0) buf = stem + "." + OFILE; else buf = ytabc; ftable = bufio->create(buf, Bufio->OWRITE, 8r666); if(ftable == nil) error("can't create file " + buf); } # # return a pointer to the name of symbol i # symnam(i: int): string { s: string; if(i >= NTBASE) s = nontrst[i-NTBASE].name; else s = tokset[i].name; if(s[0] == ' ') s = s[1:]; return s; } # # write out error comment # error(s: string) { nerrors++; fprint(stderr, "\n fatal error: %s, %s:%d\n", s, infile, lineno); if(!fatfl) return; summary(); exit; # exits("error"); } # # set elements 0 through n-1 to c # aryfil(v: array of int, n, c: int) { for(i:=0; i= NTBASE && pempty[prd[p]-NTBASE] == WHOKNOWS) break; # production can be derived if(p == np) { pempty[prd[0]-NTBASE] = OK; continue more; } } break; } # now, look at the nonterminals, to see if they are all OK for(i=0; i<=nnonter; i++) { # the added production rises or falls as the start symbol ... if(i == 0) continue; if(pempty[i] != OK) { fatfl = 0; error("nonterminal " + nontrst[i].name + " never derives any token string"); } } if(nerrors) { summary(); exit; #exits("error"); } # now, compute the pempty array, to see which nonterminals derive the empty string # set pempty to WHOKNOWS aryfil(pempty, nnonter+1, WHOKNOWS); # loop as long as we keep finding empty nonterminals again: for(;;){ next: for(i=1; i 0}; # states generated by terminal gotos ntstates = array[nnonter+1] of {* => 0};# states generated by nonterminal gotos amem = array[ACTSIZE] of {* => 0}; memp = 0; clset = mkset(); pstate[0] = pstate[1] = 0; aryfil(clset, tbitset, 0); putitem(Pitem(prdptr[0], 0, 0, 0), clset); tystate[0] = MUSTDO; nstate = 1; pstate[2] = pstate[1]; # # now, the main state generation loop # first pass generates all of the states # later passes fix up lookahead # could be sped up a lot by remembering # results of the first pass rather than recomputing # first := 1; for(more := 1; more; first = 0){ more = 0; for(i:=0; i 0) { # terminal symbol if(ch < NTBASE) { setbit(clset, ch); break; } # nonterminal symbol setunion(clset, pfirst[ch-NTBASE]); if(!pempty[ch-NTBASE]) break; } if(ch <= 0) setunion(clset, wsets[v].ws); } # # now loop over productions derived from c # curres := pres[c - NTBASE]; n := len curres; # initially fill the sets nexts: for(s := 0; s < n; s++) { prd := curres[s]; # # put these items into the closure # is the item there # for(v=0; v= len wsets){ awsets := array[cwp + WSETINC] of Wset; awsets[0:] = wsets; wsets = awsets; } wsets[cwp].pitem = Pitem(prd, 0, prd[0], -prd[len prd-1]); wsets[cwp].flag = 1; wsets[cwp].ws = mkset(); if(!nolook) { work = 1; wsets[cwp].ws[0:] = clset; } cwp++; } } } # have computed closure; flags are reset; return if(cldebug && foutput != nil) { foutput.puts("\nState " + string i + ", nolook = " + string nolook + "\n"); for(u:=0; u p1; l--) { if(statemem[l].pitem.prodno < statemem[l-1].pitem.prodno || statemem[l].pitem.prodno == statemem[l-1].pitem.prodno && statemem[l].pitem.off < statemem[l-1].pitem.off) { s := statemem[l]; statemem[l] = statemem[l-1]; statemem[l-1] = s; }else break; } } size1 := p2 - p1; # size of state if(c >= NTBASE) i := ntstates[c-NTBASE]; else i = tstates[c]; look: for(; i != 0; i = mstates[i]) { # get ith state q1 := pstate[i]; q2 := pstate[i+1]; size2 := q2 - q1; if(size1 != size2) continue; k = p1; for(l = q1; l < q2; l++) { if(statemem[l].pitem.prod != statemem[k].pitem.prod || statemem[l].pitem.off != statemem[k].pitem.off) continue look; k++; } # found it pstate[nstate+1] = pstate[nstate]; # delete last state # fix up lookaheads if(nolook) return i; k = p1; for(l = q1; l < q2; l++) { if(setunion(statemem[l].look, statemem[k].look)) tystate[i] = MUSTDO; k++; } return i; } # state is new zznewstate++; if(nolook) error("yacc state/nolook error"); pstate[nstate+2] = p2; if(nstate+1 >= NSTATES) error("too many states"); if(c >= NTBASE) { mstates[nstate] = ntstates[c-NTBASE]; ntstates[c-NTBASE] = nstate; } else { mstates[nstate] = tstates[c]; tstates[c] = nstate; } tystate[nstate] = MUSTDO; return nstate++; } putitem(p: Pitem, set: Lkset) { p.off++; p.first = p.prod[p.off]; if(pidebug && foutput != nil) foutput.puts("putitem(" + writem(p) + "), state " + string nstate + "\n"); j := pstate[nstate+1]; if(j >= len statemem){ asm := array[j + STATEINC] of Item; asm[0:] = statemem; statemem = asm; } statemem[j].pitem = p; if(!nolook){ s := mkset(); s[0:] = set; statemem[j].look = s; } j++; pstate[nstate+1] = j; } # # creates output string for item pointed to by pp # writem(pp: Pitem): string { i: int; p := pp.prod; q := chcopy(nontrst[prdptr[pp.prodno][0]-NTBASE].name) + ": "; npi := pp.off; pi := p == prdptr[pp.prodno]; for(;;){ c := ' '; if(pi == npi) c = '.'; q[len q] = c; i = p[pi++]; if(i <= 0) break; q += chcopy(symnam(i)); } # an item calling for a reduction i = p[npi]; if(i < 0) q += " (" + string -i + ")"; return q; } # # pack state i from temp1 into amem # apack(p: array of int, n: int): int { # # we don't need to worry about checking because # we will only look at entries known to be there... # eliminate leading and trailing 0's # off := 0; for(pp := 0; pp <= n && p[pp] == 0; pp++) off--; # no actions if(pp > n) return 0; for(; n > pp && p[n] == 0; n--) ; p = p[pp:n+1]; # now, find a place for the elements from p to q, inclusive r := len amem - len p; nextk: for(rr := 0; rr <= r; rr++) { qq := rr; for(pp = 0; pp < len p; pp++) { if(p[pp] != 0) if(p[pp] != amem[qq] && amem[qq] != 0) continue nextk; qq++; } # we have found an acceptable k if(pkdebug && foutput != nil) foutput.puts("off = " + string(off+rr) + ", k = " + string rr + "\n"); qq = rr; for(pp = 0; pp < len p; pp++) { if(p[pp]) { if(qq > memp) memp = qq; amem[qq] = p[pp]; } qq++; } if(pkdebug && foutput != nil) { for(pp = 0; pp <= memp; pp += 10) { foutput.putc('\t'); for(qq = pp; qq <= pp+9; qq++) foutput.puts(string amem[qq] + " "); foutput.putc('\n'); } } return off + rr; } error("no space in action table"); return 0; } # # print the output for the states # output() { c, u, v: int; ftable.puts("yyexca := array[] of {"); if(fdebug != nil) fdebug.puts("yystates = array [] of {\n"); noset := mkset(); # output the stuff for state i for(i:=0; i 1 && c < NTBASE && temp1[c] == 0) { for(v=u; v NTBASE && temp1[(c -= NTBASE) + ntokens] == 0) temp1[c+ntokens] = amem[indgo[i]+c]; } if(i == 1) temp1[1] = ACCEPTCODE; # now, we have the shifts; look at the reductions lastred = 0; for(u=0; u 0) continue; lastred = -c; us := wsets[u].ws; for(k:=0; k<=ntokens; k++) { if(!bitset(us, k)) continue; if(temp1[k] == 0) temp1[k] = c; else if(temp1[k] < 0) { # reduce/reduce conflict if(foutput != nil) foutput.puts( "\n" + string i + ": reduce/reduce conflict (red'ns " + string -temp1[k] + " and " + string lastred + " ) on " + symnam(k)); if(-temp1[k] > lastred) temp1[k] = -lastred; zzrrconf++; } else # potential shift/reduce conflict precftn(lastred, k, i); } } wract(i); } if(fdebug != nil) fdebug.puts("};\n"); ftable.puts("};\n"); ftable.puts("YYNPROD: con " + string nprod + ";\n"); ftable.puts("YYPRIVATE: con " + string PRIVATE + ";\n"); ftable.puts("yytoknames: array of string;\n"); ftable.puts("yystates: array of string;\n"); if(yydebug != nil){ ftable.puts("include \"y.debug\";\n"); ftable.puts("yydebug: con " + yydebug + ";\n"); }else{ ftable.puts("yydebug: con 0;\n"); } } # # decide a shift/reduce conflict by precedence. # r is a rule number, t a token number # the conflict is in state s # temp1[t] is changed to reflect the action # precftn(r, t, s: int) { action: int; lp := levprd[r]; lt := toklev[t]; if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) { # conflict if(foutput != nil) foutput.puts( "\n" + string s + ": shift/reduce conflict (shift " + string temp1[t] + "(" + string PLEVEL(lt) + "), red'n " + string r + "(" + string PLEVEL(lp) + ")) on " + symnam(t)); zzsrconf++; return; } if(PLEVEL(lt) == PLEVEL(lp)) action = ASSOC(lt); else if(PLEVEL(lt) > PLEVEL(lp)) action = RASC; # shift else action = LASC; # reduce case action{ BASC => # error action temp1[t] = ERRCODE; LASC => # reduce temp1[t] = -r; } } # # output state i # temp1 has the actions, lastred the default # wract(i: int) { p, p1: int; # find the best choice for lastred lastred = 0; ntimes := 0; for(j:=0; j<=ntokens; j++) { if(temp1[j] >= 0) continue; if(temp1[j]+lastred == 0) continue; # count the number of appearances of temp1[j] count := 0; tred := -temp1[j]; levprd[tred] |= REDFLAG; for(p=0; p<=ntokens; p++) if(temp1[p]+tred == 0) count++; if(count > ntimes) { lastred = tred; ntimes = count; } } # # for error recovery, arrange that, if there is a shift on the # error recovery token, `error', that the default be the error action # if(temp1[2] > 0) lastred = 0; # clear out entries in temp1 which equal lastred # count entries in optst table n := 0; for(p=0; p<=ntokens; p++) { p1 = temp1[p]; if(p1+lastred == 0) temp1[p] = p1 = 0; if(p1 > 0 && p1 != ACCEPTCODE && p1 != ERRCODE) n++; } wrstate(i); defact[i] = lastred; flag := 0; os := array[n*2] of int; n = 0; for(p=0; p<=ntokens; p++) { if((p1=temp1[p]) != 0) { if(p1 < 0) { p1 = -p1; } else if(p1 == ACCEPTCODE) { p1 = -1; } else if(p1 == ERRCODE) { p1 = 0; } else { os[n++] = p; os[n++] = p1; zzacent++; continue; } if(flag++ == 0) ftable.puts("-1, " + string i + ",\n"); ftable.puts("\t" + string p + ", " + string p1 + ",\n"); zzexcp++; } } if(flag) { defact[i] = -2; ftable.puts("\t-2, " + string lastred + ",\n"); } optst[i] = os; } # # writes state i # wrstate(i: int) { j0, j1, u: int; pp, qq: int; if(fdebug != nil) { if(lastred) { fdebug.puts(" nil, #" + string i + "\n"); } else { fdebug.puts(" \""); qq = pstate[i+1]; for(pp=pstate[i]; pp 0) { if(j1 == ACCEPTCODE) foutput.puts("accept"); else if(j1 == ERRCODE) foutput.puts("error"); else foutput.puts("shift "+string j1); } else foutput.puts("reduce " + string -j1 + " (src line " + string rlines[-j1] + ")"); } # output the final production if(lastred) foutput.puts("\n\t. reduce " + string lastred + " (src line " + string rlines[lastred] + ")\n\n"); else foutput.puts("\n\t. error\n\n"); # now, output nonterminal actions j1 = ntokens; for(j0 = 1; j0 <= nnonter; j0++) { j1++; if(temp1[j1]) foutput.puts("\t" + symnam(j0+NTBASE) + " goto " + string temp1[j1] + "\n"); } } # # output the gotos for the nontermninals # go2out() { for(i := 1; i <= nnonter; i++) { go2gen(i); # find the best one to make default best := -1; times := 0; # is j the most frequent for(j := 0; j < nstate; j++) { if(tystate[j] == 0) continue; if(tystate[j] == best) continue; # is tystate[j] the most frequent count := 0; cbest := tystate[j]; for(k := j; k < nstate; k++) if(tystate[k] == cbest) count++; if(count > times) { best = cbest; times = count; } } # best is now the default entry zzgobest += times-1; n := 0; for(j = 0; j < nstate; j++) if(tystate[j] != 0 && tystate[j] != best) n++; goent := array[2*n+1] of int; n = 0; for(j = 0; j < nstate; j++) if(tystate[j] != 0 && tystate[j] != best) { goent[n++] = j; goent[n++] = tystate[j]; zzgoent++; } # now, the default zzgoent++; goent[n] = best; yypgo[i] = goent; } } # # output the gotos for nonterminal c # go2gen(c: int) { i, cc, p, q: int; # first, find nonterminals with gotos on c aryfil(temp1, nnonter+1, 0); temp1[c] = 1; work := 1; while(work) { work = 0; for(i=0; i= 0 && temp1[cc] != 0) { # thus, the left side of production i does too cc = prdptr[i][0]-NTBASE; if(temp1[cc] == 0) { work = 1; temp1[cc] = 1; } } } } # now, we have temp1[c] = 1 if a goto on c in closure of cc if(g2debug && foutput != nil) { foutput.puts(nontrst[c].name); foutput.puts(": gotos on "); for(i=0; i<=nnonter; i++) if(temp1[i]){ foutput.puts(nontrst[i].name); foutput.putc(' '); } foutput.putc('\n'); } # now, go through and put gotos into tystate aryfil(tystate, nstate, 0); for(i=0; i= NTBASE) { # goto on c is possible if(temp1[cc-NTBASE]) { tystate[i] = amem[indgo[i]+c]; break; } } } } } # # in order to free up the mem and amem arrays for the optimizer, # and still be able to output yyr1, etc., after the sizes of # the action array is known, we hide the nonterminals # derived by productions in levprd. # hideprod() { j := 0; levprd[0] = 0; for(i:=1; i j) j = v[p]; if(v[p] < k) k = v[p]; } # nontrivial situation if(k <= j) { # j is now the range # j -= k; # call scj if(k > maxoff) maxoff = k; } tystate[i] = q + 2*j; if(j > maxspr) maxspr = j; } # initialize ggreed table ggreed = array[nnonter+1] of int; for(i = 1; i <= nnonter; i++) { ggreed[i] = 1; j = 0; # minimum entry index is always 0 v = yypgo[i]; q = len v - 1; for(p = 0; p < q ; p += 2) { ggreed[i] += 2; if(v[p] > j) j = v[p]; } ggreed[i] = ggreed[i] + 2*j; if(j > maxoff) maxoff = j; } # now, prepare to put the shift actions into the amem array for(i = 0; i < ACTSIZE; i++) amem[i] = 0; maxa = 0; for(i = 0; i < nstate; i++) { if(tystate[i] == 0 && adb > 1) ftable.puts("State " + string i + ": null\n"); indgo[i] = YYFLAG1; } while((i = nxti()) != NOMORE) if(i >= 0) stin(i); else gin(-i); # print amem array if(adb > 2) for(p = 0; p <= maxa; p += 10) { ftable.puts(string p + " "); for(i = 0; i < 10; i++) ftable.puts(string amem[p+i] + " "); ftable.putc('\n'); } aoutput(); osummary(); } # # finds the next i # nxti(): int { max := 0; maxi := 0; for(i := 1; i <= nnonter; i++) if(ggreed[i] >= max) { max = ggreed[i]; maxi = -i; } for(i = 0; i < nstate; i++) if(tystate[i] >= max) { max = tystate[i]; maxi = i; } if(max == 0) return NOMORE; return maxi; } gin(i: int) { s: int; # enter gotos on nonterminal i into array amem ggreed[i] = 0; q := yypgo[i]; nq := len q - 1; # now, find amem place for it nextgp: for(p := 0; p < ACTSIZE; p++) { if(amem[p]) continue; for(r := 0; r < nq; r += 2) { s = p + q[r] + 1; if(s > maxa){ maxa = s; if(maxa >= ACTSIZE) error("a array overflow"); } if(amem[s]) continue nextgp; } # we have found amem spot amem[p] = q[nq]; if(p > maxa) maxa = p; for(r = 0; r < nq; r += 2) { s = p + q[r] + 1; amem[s] = q[r+1]; } pgo[i] = p; if(adb > 1) ftable.puts("Nonterminal " + string i + ", entry at " + string pgo[i] + "\n"); return; } error("cannot place goto " + string i + "\n"); } stin(i: int) { s: int; tystate[i] = 0; # enter state i into the amem array q := optst[i]; nq := len q; # find an acceptable place nextn: for(n := -maxoff; n < ACTSIZE; n++) { flag := 0; for(r := 0; r < nq; r += 2) { s = q[r] + n; if(s < 0 || s > ACTSIZE) continue nextn; if(amem[s] == 0) flag++; else if(amem[s] != q[r+1]) continue nextn; } # check the position equals another only if the states are identical for(j:=0; j 1) ftable.puts("State " + string i + ": entry at " + string n + " equals state " + string j + "\n"); return; } # we have some disagreement continue nextn; } } for(r = 0; r < nq; r += 2) { s = q[r] + n; if(s > maxa) maxa = s; if(amem[s] != 0 && amem[s] != q[r+1]) error("clobber of a array, pos'n " + string s + ", by " + string q[r+1] + ""); amem[s] = q[r+1]; } indgo[i] = n; if(adb > 1) ftable.puts("State " + string i + ": entry at " + string indgo[i] + "\n"); return; } error("Error; failure to place state " + string i + "\n"); } # # this version is for limbo # write out the optimized parser # aoutput() { ftable.puts("YYLAST:\tcon "+string (maxa+1)+";\n"); arout("yyact", amem, maxa+1); arout("yypact", indgo, nstate); arout("yypgo", pgo, nnonter+1); } # # put out other arrays, copy the parsers # others() { finput = bufio->open(parser, Bufio->OREAD); if(finput == nil) error("cannot find parser " + parser); arout("yyr1", levprd, nprod); aryfil(temp1, nprod, 0); # #yyr2 is the number of rules for each production # for(i:=1; i= 0 && j < 256) { if(temp1[j]) { print("yacc bug -- cant have 2 different Ts with same value\n"); print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); nerrors++; } temp1[j] = i; if(j > c) c = j; } } for(i = 0; i <= c; i++) if(temp1[i] == 0) temp1[i] = YYLEXUNK; arout("yytok1", temp1, c+1); # table 2 has PRIVATE-PRIVATE+256 aryfil(temp1, 256, 0); c = 0; for(i=1; i<=ntokens; i++) { j = tokset[i].value - PRIVATE; if(j >= 0 && j < 256) { if(temp1[j]) { print("yacc bug -- cant have 2 different Ts with same value\n"); print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); nerrors++; } temp1[j] = i; if(j > c) c = j; } } arout("yytok2", temp1, c+1); # table 3 has everything else ftable.puts("yytok3 := array[] of {\n"); c = 0; for(i=1; i<=ntokens; i++) { j = tokset[i].value; if(j >= 0 && j < 256) continue; if(j >= PRIVATE && j < 256+PRIVATE) continue; ftable.puts(sprint("%4d,%4d,", j, i)); c++; if(c%5 == 0) ftable.putc('\n'); } ftable.puts(sprint("%4d\n};\n", 0)); # copy parser text while((c=finput.getc()) != Bufio->EOF) { if(c == '$') { if((c = finput.getc()) != 'A') ftable.putc('$'); else { # copy actions dumpcode(-1); c = finput.getc(); } } ftable.putc(c); } ftable.close(); } arout(s: string, v: array of int, n: int) { ftable.puts(s+" := array[] of {"); for(i := 0; i < n; i++) { if(i%10 == 0) ftable.putc('\n'); ftable.puts(sprint("%4d", v[i])); ftable.putc(','); } ftable.puts("\n};\n"); } # # output the summary on y.output # summary() { if(foutput != nil) { foutput.puts("\n" + string ntokens + " terminals, " + string nnonter + " nonterminals\n"); foutput.puts("" + string nprod + " grammar rules, " + string nstate + "/" + string NSTATES + " states\n"); foutput.puts("" + string zzsrconf + " shift/reduce, " + string zzrrconf + " reduce/reduce conflicts reported\n"); foutput.puts("" + string len wsets + " working sets used\n"); foutput.puts("memory: parser " + string memp + "/" + string ACTSIZE + "\n"); foutput.puts(string (zzclose - 2*nstate) + " extra closures\n"); foutput.puts(string zzacent + " shift entries, " + string zzexcp + " exceptions\n"); foutput.puts(string zzgoent + " goto entries\n"); foutput.puts(string zzgobest + " entries saved by goto default\n"); } if(zzsrconf != 0 || zzrrconf != 0) { print("\nconflicts: "); if(zzsrconf) print("%d shift/reduce", zzsrconf); if(zzsrconf && zzrrconf) print(", "); if(zzrrconf) print("%d reduce/reduce", zzrrconf); print("\n"); } if(fdefine != nil) fdefine.close(); } # # write optimizer summary # osummary() { if(foutput == nil) return; i := 0; for(p := maxa; p >= 0; p--) if(amem[p] == 0) i++; foutput.puts("Optimizer space used: output " + string (maxa+1) + "/" + string ACTSIZE + "\n"); foutput.puts(string(maxa+1) + " table entries, " + string i + " zero\n"); foutput.puts("maximum spread: " + string maxspr + ", maximum offset: " + string maxoff + "\n"); } # # copies and protects "'s in q # chcopy(q: string): string { s := ""; j := 0; for(i := 0; i < len q; i++) { if(q[i] == '"') { s += q[j:i] + "\\"; j = i; } } return s + q[j:i]; } usage() { fprint(stderr, "usage: yacc [-vd] [-Dn] [-o output] [-s stem] file\n"); exit; } bitset(set: Lkset, bit: int): int { return set[bit>>5] & (1<<(bit&31)); } setbit(set: Lkset, bit: int): int { return set[bit>>5] |= (1<<(bit&31)); } mkset(): Lkset { return array[tbitset] of {* => 0}; } # # set a to the union of a and b # return 1 if b is not a subset of a, 0 otherwise # setunion(a, b: array of int): int { sub := 0; for(i:=0; i= '0' && c <= '9'; } isword(c: int): int { return c >= 16ra0 || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; } mktemp(t: string): string { return t; } # # arg processing # Arg.init(argv: list of string): ref Arg { if(argv != nil) argv = tl argv; return ref Arg(argv, 0, ""); } Arg.opt(arg: self ref Arg): int { opts := arg.opts; if(opts != ""){ arg.c = opts[0]; arg.opts = opts[1:]; return arg.c; } argv := arg.argv; if(argv == nil) return arg.c = 0; opts = hd argv; if(len opts < 2 || opts[0] != '-') return arg.c = 0; arg.argv = tl argv; if(opts == "--") return arg.c = 0; arg.opts = opts[2:]; return arg.c = opts[1]; } Arg.arg(arg: self ref Arg): string { s := arg.opts; arg.opts = ""; if(s != "") return s; argv := arg.argv; if(argv == nil) return ""; arg.argv = tl argv; return hd argv; }