implement HTML; include "sys.m"; include "html.m"; sys: Sys; Stringtab: adt { name: string; val: int; }; chartab:= array[] of { Stringtab ("AElig", 'Æ'), ("Aacute", 'Á'), ("Acirc", 'Â'), ("Agrave", 'À'), ("Aring", 'Å'), ("Atilde", 'Ã'), ("Auml", 'Ä'), ("Ccedil", 'Ç'), ("ETH", 'Ð'), ("Eacute", 'É'), ("Ecirc", 'Ê'), ("Egrave", 'È'), ("Euml", 'Ë'), ("Iacute", 'Í'), ("Icirc", 'Î'), ("Igrave", 'Ì'), ("Iuml", 'Ï'), ("Ntilde", 'Ñ'), ("Oacute", 'Ó'), ("Ocirc", 'Ô'), ("Ograve", 'Ò'), ("Oslash", 'Ø'), ("Otilde", 'Õ'), ("Ouml", 'Ö'), ("THORN", 'Þ'), ("Uacute", 'Ú'), ("Ucirc", 'Û'), ("Ugrave", 'Ù'), ("Uuml", 'Ü'), ("Yacute", 'Ý'), ("aElig", 'æ'), ("aacute", 'á'), ("acirc", 'â'), ("agrave", 'à'), ("alpha", 'α'), ("amp", '&'), ("aring", 'å'), ("atilde", 'ã'), ("auml", 'ä'), ("beta", 'β'), ("ccedil", 'ç'), ("cdots", '⋯'), ("chi", 'χ'), ("copy", '©'), ("ddots", '⋱'), ("delta", 'δ'), ("eacute", 'é'), ("ecirc", 'ê'), ("egrave", 'è'), ("emdash", '—'), ("emsp", ' '), ("endash", '–'), ("ensp", ' '), ("epsilon", 'ε'), ("eta", 'η'), ("eth", 'ð'), ("euml", 'ë'), ("gamma", 'γ'), ("gt", '>'), ("iacute", 'í'), ("icirc", 'î'), ("igrave", 'ì'), ("iota", 'ι'), ("iuml", 'ï'), ("kappa", 'κ'), ("lambda", 'λ'), ("ldots", '…'), ("lt", '<'), ("mu", 'μ'), ("nbsp", ' '), ("ntilde", 'ñ'), ("nu", 'ν'), ("oacute", 'ó'), ("ocirc", 'ô'), ("ograve", 'ò'), ("omega", 'ω'), ("omicron", 'ο'), ("oslash", 'ø'), ("otilde", 'õ'), ("ouml", 'ö'), ("phi", 'φ'), ("pi", 'π'), ("psi", 'ψ'), ("quad", ' '), ("quot", '"'), ("reg", '®'), ("rho", 'ρ'), ("shy", '­'), ("sigma", 'σ'), ("sp", ' '), ("szlig", 'ß'), ("tau", 'τ'), ("theta", 'θ'), ("thinsp", ' '), ("thorn", 'þ'), ("trade", '™'), ("uacute", 'ú'), ("ucirc", 'û'), ("ugrave", 'ù'), ("upsilon", 'υ'), ("uuml", 'ü'), ("varepsilon", '∈'), ("varphi", 'ϕ'), ("varpi", 'ϖ'), ("varrho", 'ϱ'), ("vdots", '⋮'), ("vsigma", 'ς'), ("vtheta", 'ϑ'), ("xi", 'ξ'), ("yacute", 'ý'), ("yuml", 'ÿ'), ("zeta", 'ζ'), }; htmlstringtab := array[] of { Stringtab ("a", Ta), ("address", Taddress), ("applet", Tapplet), ("area", Tarea), ("att_footer", Tatt_footer), ("b", Tb), ("base", Tbase), ("basefont", Tbasefont), ("big", Tbig), ("blink", Tblink), ("blockquote", Tblockquote), ("body", Tbody), ("bq", Tbq), ("br", Tbr), ("caption", Tcaption), ("center", Tcenter), ("cite", Tcite), ("code", Tcode), ("col", Tcol), ("colgroup", Tcolgroup), ("dd", Tdd), ("dfn", Tdfn), ("dir", Tdir), ("div", Tdiv), ("dl", Tdl), ("dt", Tdt), ("em", Tem), ("font", Tfont), ("form", Tform), ("h1", Th1), ("h2", Th2), ("h3", Th3), ("h4", Th4), ("h5", Th5), ("h6", Th6), ("head", Thead), ("hr", Thr), ("html", Thtml), ("i", Ti), ("img", Timg), ("input", Tinput), ("isindex", Tisindex), ("item", Titem), ("kbd", Tkbd), ("li", Tli), ("link", Tlink), ("map", Tmap), ("menu", Tmenu), ("meta", Tmeta), ("ol", Tol), ("option", Toption), ("p", Tp), ("param", Tparam), ("pre", Tpre), ("q", Tq), ("strike", Tstrike), ("samp", Tsamp), ("script", Tscript), ("select", Tselect), ("small", Tsmall), ("strong", Tstrong), ("style", Tstyle), ("sub", Tsub), ("sup", Tsup), ("t", Tt), ("table", Ttable), ("tbody", Ttbody), ("td", Ttd), ("textarea", Ttextarea), ("textflow", Ttextflow), ("tfoot", Ttfoot), ("th", Tth), ("thead", Tthead), ("title", Ttitle), ("tr", Ttr), ("tt", Ttt), ("u", Tu), ("ul", Tul), ("var", Tvar) }; lex(b: array of byte, keepwh: int): array of ref Lex { if(sys == nil) sys = load Sys Sys->PATH; a: array of ref Lex; ai := 0; i := 0; for(;;){ j := i; Whitespace: for(;;){ # ignore nulls if(j"){ j += 3; i = j; continue Whitespace; } j++; } continue Whitespace; } break; } if(j == len b) break; if(ai == len a){ na := array[len a + 100] of ref Lex; if(a != nil) na[0:] = a; a = na; } s: string; if(int b[j] == '<'){ (s, i) = gettag(b, j); rbra := 0; j = 1; # SGML parsing rule: record end immediately following start tag is ignored; # record end immediately preceding end tag is ignored if(len s>1 && s[1]=='/'){ rbra = RBRA; j = 2; if(ai > 0 && a[ai-1].tag == Data) { ps := a[ai-1].text; z := len ps - 1; if(z >= 0 && ps[z] == '\r') { a[ai-1].text = ps[0:z]; z--; } if(z >= 0 && ps[z] == '\n') a[ai-1].text = ps[0:z]; } } else { if(i < len b && int b[i] == '\r') i++; if(i < len b && int b[i] == '\n') i++; } for(k:=j; k' && quote == 0){ s[j++] = '>'; break; } if(c == '&') (c, i) = ampersand(b, i); if(quote) { if(quote == c) quote = 0; } else if(c == '"' || c == '\'') quote = c; s[j++] = c; } return (s, i); } ampersand(b: array of byte, i: int): (int, int) { starti := i; c := 0; if(i >= len b) return ('?', i); if(int b[i] == '#'){ i++; while(i'z') { if(k == 0) return ('&', starti+1); else break; } s[k++] = int b[i]; i++; } char := lookup(chartab, s); if(char == Notfound) return ('&', starti+1); return (char, i+1); } lowercase(s: string): string { l := ""; for(i:=0; i return 1; Data => return 0; } } return 0; } # for debugging lex2string(l: ref Lex): string { ans := ""; tag := l.tag; if(tag == HTML->Data) ans = "'" + l.text + "'"; else { ans = "<"; if(tag >= RBRA) { tag -= RBRA; ans = ans + "/"; } for(i := 0; i < len htmlstringtab; i++) if(tag == htmlstringtab[i].val) { ans = ans + uppercase(htmlstringtab[i].name); break; } for(al := l.attr; al != nil; al = tl al) { a := hd al; ans = ans + " " + a.name + "='" + a.value + "'"; } ans = ans + ">"; } return ans; }