implement HTML;
include "sys.m";
include "html.m";
sys: Sys;
Stringtab: adt
{
name: string;
val: int;
};
chartab:= array[] of { Stringtab
("AElig", 'Æ'),
("Aacute", 'Á'),
("Acirc", 'Â'),
("Agrave", 'À'),
("Aring", 'Å'),
("Atilde", 'Ã'),
("Auml", 'Ä'),
("Ccedil", 'Ç'),
("ETH", 'Ð'),
("Eacute", 'É'),
("Ecirc", 'Ê'),
("Egrave", 'È'),
("Euml", 'Ë'),
("Iacute", 'Í'),
("Icirc", 'Î'),
("Igrave", 'Ì'),
("Iuml", 'Ï'),
("Ntilde", 'Ñ'),
("Oacute", 'Ó'),
("Ocirc", 'Ô'),
("Ograve", 'Ò'),
("Oslash", 'Ø'),
("Otilde", 'Õ'),
("Ouml", 'Ö'),
("THORN", 'Þ'),
("Uacute", 'Ú'),
("Ucirc", 'Û'),
("Ugrave", 'Ù'),
("Uuml", 'Ü'),
("Yacute", 'Ý'),
("aElig", 'æ'),
("aacute", 'á'),
("acirc", 'â'),
("agrave", 'à'),
("alpha", 'α'),
("amp", '&'),
("aring", 'å'),
("atilde", 'ã'),
("auml", 'ä'),
("beta", 'β'),
("ccedil", 'ç'),
("cdots", '⋯'),
("chi", 'χ'),
("copy", '©'),
("ddots", '⋱'),
("delta", 'δ'),
("eacute", 'é'),
("ecirc", 'ê'),
("egrave", 'è'),
("emdash", '—'),
("emsp", ' '),
("endash", '–'),
("ensp", ' '),
("epsilon", 'ε'),
("eta", 'η'),
("eth", 'ð'),
("euml", 'ë'),
("gamma", 'γ'),
("gt", '>'),
("iacute", 'í'),
("icirc", 'î'),
("igrave", 'ì'),
("iota", 'ι'),
("iuml", 'ï'),
("kappa", 'κ'),
("lambda", 'λ'),
("ldots", '…'),
("lt", '<'),
("mu", 'μ'),
("nbsp", ' '),
("ntilde", 'ñ'),
("nu", 'ν'),
("oacute", 'ó'),
("ocirc", 'ô'),
("ograve", 'ò'),
("omega", 'ω'),
("omicron", 'ο'),
("oslash", 'ø'),
("otilde", 'õ'),
("ouml", 'ö'),
("phi", 'φ'),
("pi", 'π'),
("psi", 'ψ'),
("quad", ' '),
("quot", '"'),
("reg", '®'),
("rho", 'ρ'),
("shy", ''),
("sigma", 'σ'),
("sp", ' '),
("szlig", 'ß'),
("tau", 'τ'),
("theta", 'θ'),
("thinsp", ' '),
("thorn", 'þ'),
("trade", '™'),
("uacute", 'ú'),
("ucirc", 'û'),
("ugrave", 'ù'),
("upsilon", 'υ'),
("uuml", 'ü'),
("varepsilon", '∈'),
("varphi", 'ϕ'),
("varpi", 'ϖ'),
("varrho", 'ϱ'),
("vdots", '⋮'),
("vsigma", 'ς'),
("vtheta", 'ϑ'),
("xi", 'ξ'),
("yacute", 'ý'),
("yuml", 'ÿ'),
("zeta", 'ζ'),
};
htmlstringtab := array[] of { Stringtab
("a", Ta),
("address", Taddress),
("applet", Tapplet),
("area", Tarea),
("att_footer", Tatt_footer),
("b", Tb),
("base", Tbase),
("basefont", Tbasefont),
("big", Tbig),
("blink", Tblink),
("blockquote", Tblockquote),
("body", Tbody),
("bq", Tbq),
("br", Tbr),
("caption", Tcaption),
("center", Tcenter),
("cite", Tcite),
("code", Tcode),
("col", Tcol),
("colgroup", Tcolgroup),
("dd", Tdd),
("dfn", Tdfn),
("dir", Tdir),
("div", Tdiv),
("dl", Tdl),
("dt", Tdt),
("em", Tem),
("font", Tfont),
("form", Tform),
("h1", Th1),
("h2", Th2),
("h3", Th3),
("h4", Th4),
("h5", Th5),
("h6", Th6),
("head", Thead),
("hr", Thr),
("html", Thtml),
("i", Ti),
("img", Timg),
("input", Tinput),
("isindex", Tisindex),
("item", Titem),
("kbd", Tkbd),
("li", Tli),
("link", Tlink),
("map", Tmap),
("menu", Tmenu),
("meta", Tmeta),
("ol", Tol),
("option", Toption),
("p", Tp),
("param", Tparam),
("pre", Tpre),
("q", Tq),
("strike", Tstrike),
("samp", Tsamp),
("script", Tscript),
("select", Tselect),
("small", Tsmall),
("strong", Tstrong),
("style", Tstyle),
("sub", Tsub),
("sup", Tsup),
("t", Tt),
("table", Ttable),
("tbody", Ttbody),
("td", Ttd),
("textarea", Ttextarea),
("textflow", Ttextflow),
("tfoot", Ttfoot),
("th", Tth),
("thead", Tthead),
("title", Ttitle),
("tr", Ttr),
("tt", Ttt),
("u", Tu),
("ul", Tul),
("var", Tvar)
};
lex(b: array of byte, keepwh: int): array of ref Lex
{
if(sys == nil)
sys = load Sys Sys->PATH;
a: array of ref Lex;
ai := 0;
i := 0;
for(;;){
j := i;
Whitespace:
for(;;){
# ignore nulls
if(j"){
j += 3;
i = j;
continue Whitespace;
}
j++;
}
continue Whitespace;
}
break;
}
if(j == len b)
break;
if(ai == len a){
na := array[len a + 100] of ref Lex;
if(a != nil)
na[0:] = a;
a = na;
}
s: string;
if(int b[j] == '<'){
(s, i) = gettag(b, j);
rbra := 0;
j = 1;
# SGML parsing rule: record end immediately following start tag is ignored;
# record end immediately preceding end tag is ignored
if(len s>1 && s[1]=='/'){
rbra = RBRA;
j = 2;
if(ai > 0 && a[ai-1].tag == Data) {
ps := a[ai-1].text;
z := len ps - 1;
if(z >= 0 && ps[z] == '\r') {
a[ai-1].text = ps[0:z];
z--;
}
if(z >= 0 && ps[z] == '\n')
a[ai-1].text = ps[0:z];
}
}
else {
if(i < len b && int b[i] == '\r')
i++;
if(i < len b && int b[i] == '\n')
i++;
}
for(k:=j; k' && quote == 0){
s[j++] = '>';
break;
}
if(c == '&')
(c, i) = ampersand(b, i);
if(quote) {
if(quote == c)
quote = 0;
}
else if(c == '"' || c == '\'')
quote = c;
s[j++] = c;
}
return (s, i);
}
ampersand(b: array of byte, i: int): (int, int)
{
starti := i;
c := 0;
if(i >= len b)
return ('?', i);
if(int b[i] == '#'){
i++;
while(i'z') {
if(k == 0)
return ('&', starti+1);
else
break;
}
s[k++] = int b[i];
i++;
}
char := lookup(chartab, s);
if(char == Notfound)
return ('&', starti+1);
return (char, i+1);
}
lowercase(s: string): string
{
l := "";
for(i:=0; i
return 1;
Data =>
return 0;
}
}
return 0;
}
# for debugging
lex2string(l: ref Lex): string
{
ans := "";
tag := l.tag;
if(tag == HTML->Data)
ans = "'" + l.text + "'";
else {
ans = "<";
if(tag >= RBRA) {
tag -= RBRA;
ans = ans + "/";
}
for(i := 0; i < len htmlstringtab; i++)
if(tag == htmlstringtab[i].val) {
ans = ans + uppercase(htmlstringtab[i].name);
break;
}
for(al := l.attr; al != nil; al = tl al) {
a := hd al;
ans = ans + " " + a.name + "='" + a.value + "'";
}
ans = ans + ">";
}
return ans;
}