implement Build; include "common.m"; # local copies from CU sys: Sys; CU: CharonUtils; ByteSource, CImage, ImageCache, color: import CU; D: Draw; Point, Rect, Image: import D; S: String; T: StringIntTab; C: Ctype; LX: Lex; RBRA, Token, TokenSource: import LX; U: Url; ParsedUrl: import U; J: Script; ctype: array of byte; whitespace : con " \t\n\r"; notwhitespace : con "^ \t\n\r"; # These tables must be sorted align_tab := array[] of { T->StringInt ("baseline", int Abaseline), ("bottom", int Abottom), ("center", int Acenter), ("char", int Achar), ("justify", int Ajustify), ("left", int Aleft), ("middle", int Amiddle), ("right", int Aright), ("top", int Atop), }; input_tab := array[] of { T->StringInt ("button", Fbutton), ("checkbox", Fcheckbox), ("file", Ffile), ("hidden", Fhidden), ("image", Fimage), ("password", Fpassword), ("radio", Fradio), ("reset", Freset), ("submit", Fsubmit), ("text", Ftext), }; clear_tab := array[] of { T->StringInt ("all", IFcleft|IFcright), ("left", IFcleft), ("right", IFcright), }; fscroll_tab := array[] of { T->StringInt ("auto", FRhscrollauto|FRvscrollauto), ("no", FRnoscroll), ("yes", FRhscroll|FRvscroll), }; # blockbrk[tag] is break info for a block level element, or one # of a few others that get the same treatment re ending open paragraphs # and requiring a line break / vertical space before them. # If we want a line of space before the given element, SPBefore is OR'd in. # If we want a line of space after the given element, SPAfter is OR'd in. SPBefore: con byte 2; SPAfter: con byte 4; BL: con byte 1; BLBA: con BL|SPBefore|SPAfter; blockbrk := array[LX->Numtags] of { LX->Taddress => BLBA, LX->Tblockquote => BLBA, LX->Tcenter => BL, LX->Tdir => BLBA, LX->Tdiv => BL, LX->Tdd => BL, LX->Tdl => BLBA, LX->Tdt => BL, LX->Tform => BLBA, # headings and tables get breaks added manually LX->Th1 => BL, LX->Th2 => BL, LX->Th3 => BL, LX->Th4 => BL, LX->Th5 => BL, LX->Th6 => BL, LX->Thr => BL, LX->Tisindex => BLBA, LX->Tli => BL, LX->Tmenu => BLBA, LX->Tol => BLBA, LX->Tp => BLBA, LX->Tpre => BLBA, LX->Tul => BLBA, LX->Txmp => BLBA, * => byte 0 }; # attrinfo is information about attributes. # The AGEN value means that the attribute is generic (applies to almost all elements) AGEN: con byte 1; attrinfo := array[LX->Numattrs] of { LX->Aid => AGEN, LX->Aclass => AGEN, LX->Astyle => AGEN, LX->Atitle => AGEN, LX->Aonabort => AGEN, LX->Aonblur => AGEN, LX->Aonchange => AGEN, LX->Aonclick => AGEN, LX->Aondblclick => AGEN, LX->Aonerror => AGEN, LX->Aonfocus => AGEN, LX->Aonkeypress => AGEN, LX->Aonkeyup => AGEN, LX->Aonload => AGEN, LX->Aonmousedown => AGEN, LX->Aonmousemove => AGEN, LX->Aonmouseout => AGEN, LX->Aonmouseover => AGEN, LX->Aonmouseup => AGEN, LX->Aonreset => AGEN, LX->Aonselect => AGEN, LX->Aonsubmit => AGEN, LX->Aonunload => AGEN, * => byte 0 }; # Some constants FRKIDMARGIN: con 6; # default margin around kid frames IMGHSPACE: con 0; # default hspace for images (0 matches IE, Netscape) IMGVSPACE: con 0; # default vspace for images FLTIMGHSPACE: con 2; # default hspace for float images TABSP: con 2; # default cellspacing for tables TABPAD: con 2; # default cell padding for tables LISTTAB: con 1; # number of tabs to indent lists BQTAB: con 1; # number of tabs to indent blockquotes HRSZ: con 2; # thickness of horizontal rules SUBOFF: con 4; # vertical offset for subscripts SUPOFF: con 6; # vertical offset for superscripts NBSP: con ' '; # non-breaking space character dbg := 0; warn := 0; init(cu: CharonUtils) { CU = cu; sys = load Sys Sys->PATH; D = load Draw Draw->PATH; S = load String String->PATH;; T = load StringIntTab StringIntTab->PATH; U = load Url Url->PATH; if (U != nil) U->init(); C = cu->C; J = cu->J; LX = cu->LX; ctype = C->ctype; } # Assume f has been reset, and then had any values from HTTP headers # filled in (e.g., base, chset). ItemSource.new(bs: ref ByteSource, f: ref Layout->Frame, mtype: int) : ref ItemSource { di := f.doc; ts := TokenSource.new(bs, di.chset, mtype); psstk := list of { Pstate.new() }; if(mtype != CU->TextHtml) { ps := hd psstk; ps.curstate &= ~IFwrap; ps.literal = 1; pushfontstyle(ps, FntT); } return ref ItemSource(ts, mtype, di, f, psstk, 0, 0, 0, 0, nil, nil, nil, nil, nil, nil, nil); } ItemSource.getitems(is: self ref ItemSource) : ref Item { dbg = int (CU->config).dbg['h']; warn = (int (CU->config).dbg['w']) || dbg; doscripts := (CU->config).doscripts; psstk := is.psstk; ps := hd psstk; # ps is always same as hd psstk curtab: ref Table = nil; # curtab is always same as hd is.tabstk if(is.tabstk != nil) curtab = hd is.tabstk; toks := is.toks; is.toks = nil; tokslen := len toks; toki := 0; di := is.doc; TokLoop: for(;; toki++) { if(toki >= tokslen) { outerps := lastps(psstk); if(outerps.items.next != nil) break; toks = is.ts.gettoks(); tokslen = len toks; if(dbg) sys->print("build: got %d tokens from token source\n", tokslen); if(tokslen == 0) break; toki = 0; } tok := toks[toki]; if(dbg > 1) sys->print("build: curstate %ux, token %s\n", ps.curstate, tok.tostring()); tag := tok.tag; brk := byte 0; brksp := 0; if(tag < LX->Numtags) { brk = blockbrk[tag]; if((brk&SPBefore) != byte 0) brksp = 1; } else if(tag < LX->Numtags+RBRA) { brk = blockbrk[tag-RBRA]; if((brk&SPAfter) != byte 0) brksp = 1; } if(brk != byte 0) { addbrk(ps, brksp, 0); if(ps.inpar) { popjust(ps); ps.inpar = 0; } } # check common case first (Data), then case statement on tag if(tag == LX->Data) { # Lexing didn't pay attention to SGML record boundary rules: # \n after start tag or before end tag to be discarded. # (Lex has already discarded all \r's). # Some pages assume this doesn't happen in
text, # so we won't do it if literal is true. # BUG: won't discard \n before a start tag that begins # the next bufferful of tokens. s := tok.text; if(!ps.literal) { i := 0; j := len s; if(toki > 0) { pt := toks[toki-1].tag; # IE and Netscape both ignore this rule (contrary to spec) # if previous tag was img if(pt < LX->Numtags && pt != LX->Timg && j>0 && s[0]=='\n') i++; } if(toki < tokslen-1) { nt := toks[toki+1].tag; if(nt >= RBRA && nt < LX->Numtags+RBRA && j>i && s[j-1]=='\n') j--; } if(i>0 || jdrop(s, whitespace); if(s != "") ps.skipwhite = 0; } if(s != "") addtext(ps, s); } else case tag { # Some abbrevs used in following DTD comments # %text = #PCDATA # | TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP # | EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE # | A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP # | INPUT | SELECT | TEXTAREA # %block = P | UL | OL | DIR | MENU | DL | PRE | DL | DIV | CENTER # | BLOCKQUOTE | FORM | ISINDEX | HR | TABLE # %flow = (%text | %block)* # %body.content = (%heading | %text | %block | ADDRESS)* # # Anchors are not supposed to be nested, but you sometimes see # href anchors inside destination anchors. LX->Ta => if(ps.curanchor != 0) { if(warn) sys->print("warning: nested or missing \n"); ps.curanchor = 0; } name := aval(tok, LX->Aname); href := aurlval(tok, LX->Ahref, nil, di.base); target := astrval(tok, LX->Atarget, di.target); ga := getgenattr(tok); evl : list of Lex->Attr = nil; if(ga != nil) { evl = ga.events; if(evl != nil && doscripts) di.hasscripts = 1; } # ignore rel, rev, and title attrs if(href != nil) { di.anchors = ref Anchor(++is.nanchors, name, href, target, evl, 0) :: di.anchors; ps.curanchor = is.nanchors; ps.curfg = di.link; ps.fgstk = ps.curfg :: ps.fgstk; # underline, too ps.ulstk = ULunder :: ps.ulstk; ps.curul = ULunder; } if(name != nil) { # add a null item to be destination brkstate := ps.curstate & IFbrk; additem(ps, Item.newspacer(ISPnull, 0), tok); ps.curstate |= brkstate; # not quite right di.dests = ref DestAnchor(++is.nanchors, name, ps.lastit) :: di.dests; } LX->Ta+RBRA => if(ps.curanchor != 0) { if(ps.fgstk != nil) { ps.fgstk = tl ps.fgstk; if(ps.fgstk == nil) ps.curfg = di.text; else ps.curfg = hd ps.fgstk; } ps.curanchor = 0; if(ps.ulstk != nil) { ps.ulstk = tl ps.ulstk; if(ps.ulstk == nil) ps.curul = ULnone; else ps.curul = hd ps.ulstk; } } # # We can't do applets, so ignore PARAMS, and let # the %text contents appear for the alternative rep LX->Tapplet or LX->Tapplet+RBRA => if(warn && tag == LX->Tapplet) sys->print("warning: