implement Man2html; include "sys.m"; stderr: ref Sys->FD; sys: Sys; print, fprint, sprint: import sys; include "bufio.m"; include "draw.m"; include "daytime.m"; dt: Daytime; include "string.m"; str: String; include "arg.m"; Man2html: module { init: fn(ctxt: ref Draw->Context, args: list of string); }; Runeself: con 16r80; false, true: con iota; Troffspec: adt { name: string; value: string; }; tspec := array [] of { Troffspec ("ff", "ff"), ("fi", "fi"), ("fl", "fl"), ("Fi", "ffi"), ("ru", "_"), ("em", "—"), ("14", "¼"), ("12", "½"), ("co", "©"), ("de", "°"), ("dg", "¡"), ("fm", "´"), ("rg", "®"), # ("bu", "*"), ("bu", "•"), ("sq", "¤"), ("hy", "-"), ("pl", "+"), ("mi", "-"), ("mu", "×"), ("di", "÷"), ("eq", "="), ("==", "=="), (">=", ">="), ("<=", "<="), ("!=", "!="), ("+-", "±"), ("no", "¬"), ("sl", "/"), ("ap", "&"), ("~=", "~="), ("pt", "oc"), ("gr", "GRAD"), ("->", "->"), ("<-", "<-"), ("ua", "^"), ("da", "v"), ("is", "Integral"), ("pd", "DIV"), ("if", "oo"), ("sr", "-/"), ("sb", "(~"), ("sp", "~)"), ("cu", "U"), ("ca", "(^)"), ("ib", "(="), ("ip", "=)"), ("mo", "C"), ("es", "Ø"), ("aa", "´"), ("ga", "`"), ("ci", "O"), ("L1", "Lucent"), ("sc", "§"), ("dd", "++"), ("lh", "<="), ("rh", "=>"), ("lt", "("), ("rt", ")"), ("lc", "|"), ("rc", "|"), ("lb", "("), ("rb", ")"), ("lf", "|"), ("rf", "|"), ("lk", "|"), ("rk", "|"), ("bv", "|"), ("ts", "s"), ("br", "|"), ("or", "|"), ("ul", "_"), ("rn", " "), ("*p", "PI"), ("**", "*"), }; Entity: adt { name: string; value: int; }; Entities: array of Entity; Entities = array[] of { Entity( "¡", '¡' ), Entity( "¢", '¢' ), Entity( "£", '£' ), Entity( "¤", '¤' ), Entity( "¥", '¥' ), Entity( "¦", '¦' ), Entity( "§", '§' ), Entity( "¨", '¨' ), Entity( "©", '©' ), Entity( "ª", 'ª' ), Entity( "«", '«' ), Entity( "¬", '¬' ), Entity( "", '' ), Entity( "®", '®' ), Entity( "¯", '¯' ), Entity( "°", '°' ), Entity( "±", '±' ), Entity( "²", '²' ), Entity( "³", '³' ), Entity( "´", '´' ), Entity( "µ", 'µ' ), Entity( "¶", '¶' ), Entity( "·", '·' ), Entity( "¸", '¸' ), Entity( "¹", '¹' ), Entity( "º", 'º' ), Entity( "»", '»' ), Entity( "¼", '¼' ), Entity( "½", '½' ), Entity( "¾", '¾' ), Entity( "¿", '¿' ), Entity( "À", 'À' ), Entity( "Á", 'Á' ), Entity( "Â", 'Â' ), Entity( "Ã", 'Ã' ), Entity( "Ä", 'Ä' ), Entity( "Å", 'Å' ), Entity( "Æ", 'Æ' ), Entity( "Ç", 'Ç' ), Entity( "È", 'È' ), Entity( "É", 'É' ), Entity( "Ê", 'Ê' ), Entity( "Ë", 'Ë' ), Entity( "Ì", 'Ì' ), Entity( "Í", 'Í' ), Entity( "Î", 'Î' ), Entity( "Ï", 'Ï' ), Entity( "Ð", 'Ð' ), Entity( "Ñ", 'Ñ' ), Entity( "Ò", 'Ò' ), Entity( "Ó", 'Ó' ), Entity( "Ô", 'Ô' ), Entity( "Õ", 'Õ' ), Entity( "Ö", 'Ö' ), Entity( "&215;", '×' ), Entity( "Ø", 'Ø' ), Entity( "Ù", 'Ù' ), Entity( "Ú", 'Ú' ), Entity( "Û", 'Û' ), Entity( "Ü", 'Ü' ), Entity( "Ý", 'Ý' ), Entity( "Þ", 'Þ' ), Entity( "ß", 'ß' ), Entity( "à", 'à' ), Entity( "á", 'á' ), Entity( "â", 'â' ), Entity( "ã", 'ã' ), Entity( "ä", 'ä' ), Entity( "å", 'å' ), Entity( "æ", 'æ' ), Entity( "ç", 'ç' ), Entity( "è", 'è' ), Entity( "é", 'é' ), Entity( "ê", 'ê' ), Entity( "ë", 'ë' ), Entity( "ì", 'ì' ), Entity( "í", 'í' ), Entity( "î", 'î' ), Entity( "ï", 'ï' ), Entity( "ð", 'ð' ), Entity( "ñ", 'ñ' ), Entity( "ò", 'ò' ), Entity( "ó", 'ó' ), Entity( "ô", 'ô' ), Entity( "õ", 'õ' ), Entity( "ö", 'ö' ), Entity( "&247;", '÷' ), Entity( "ø", 'ø' ), Entity( "ù", 'ù' ), Entity( "ú", 'ú' ), Entity( "û", 'û' ), Entity( "ü", 'ü' ), Entity( "ý", 'ý' ), Entity( "þ", 'þ' ), Entity( "ÿ", 'ÿ' ), # ÿ Entity( "SPACE;", ' ' ), Entity( "RS;", '\n' ), Entity( "RE;", '\r' ), Entity( """, '"' ), Entity( "&", '&' ), Entity( "<", '<' ), Entity( ">", '>' ), Entity( "CAP-DELTA", 'Δ' ), Entity( "ALPHA", 'α' ), Entity( "BETA", 'β' ), Entity( "DELTA", 'δ' ), Entity( "EPSILON", 'ε' ), Entity( "THETA", 'θ' ), Entity( "MU", 'μ' ), Entity( "PI", 'π' ), Entity( "TAU", 'τ' ), Entity( "CHI", 'χ' ), Entity( "<-", '←' ), Entity( "^", '↑' ), Entity( "->", '→' ), Entity( "v", '↓' ), Entity( "!=", '≠' ), Entity( "<=", '≤' ), Entity( nil, 0 ), }; Hit: adt { glob: string; chap: string; mtype: string; page: string; }; Lnone, Lordered, Lunordered, Ldef, Lother: con iota; # list types Chaps: adt { name: string; primary: int; }; Types: adt { name: string; desc: string; }; # having two separate flags here allows for inclusion of old-style formatted pages # under a new-style three-level tree Oldstyle: adt { names: int; # two-level directory tree? fmt: int; # old internal formats: e.g., "B" font means "L"; name in .TH in all caps }; Href: adt { title: string; chap: string; mtype: string; man: string; }; # per-thread global data Global: adt { bufio: Bufio; bin: ref Bufio->Iobuf; bout: ref Bufio->Iobuf; topname: string; # name of the top level categories in the manual chaps: array of Chaps; # names of top-level partitions of this manual types: array of Types; # names of second-level partitions oldstyle: Oldstyle; mantitle: string; mandir: string; thisone: Hit; # man page we're displaying mtime: int; # last modification time of thisone href: Href; # hrefs of components of this man page hits: array of Hit; nhits: int; list_type: int; pm: string; # proprietary marking def_goobie: string; # deferred goobie sop: int; # output at start of paragraph? sol: int; # input at start of line? broken: int; # output at a break? fill: int; # in fill mode? pre: int; # in PRE block? example: int; # an example active? ipd: int; # emit inter-paragraph distance? indents: int; hangingdt: int; curfont: string; # current font prevfont: string; # previous font lastc: int; # previous char from input scanner def_sm: int; # amount of deferred "make smaller" request mk_href_chap: fn(g: self ref Global, chap: string); mk_href_man: fn(g: self ref Global, man: string, oldstyle: int); mk_href_mtype: fn(g: self ref Global, chap, mtype: string); dobreak: fn(g: self ref Global); print: fn(g: self ref Global, s: string); softbr: fn(g: self ref Global): string; softp: fn(g: self ref Global): string; }; header := "
"; initial := ""; trailer := ""; usage() { sys->fprint(stderr, "Usage: man2html [-h header] [-i initialtext] [-t trailer] file [section]\n"); raise "fail:usage"; } init(nil: ref Draw->Context, args: list of string) { sys = load Sys Sys->PATH; stderr = sys->fildes(2); str = load String String->PATH; dt = load Daytime Daytime->PATH; arg := load Arg Arg->PATH; arg->init(args); arg->setusage("man2html [-h header] [-t trailer] file [section]"); while((o := arg->opt()) != 0) case o { 'h' => header = arg->earg(); 't' => trailer = arg->earg(); * => arg->usage(); } args = arg->argv(); if(args == nil) arg->usage(); arg = nil; g := Global_init(); page := hd args; args = tl args; section := "1"; if(args != nil) section = hd args; hit := Hit ("", "man", section, page); domanpage(g, hit); g.print(trailer+"\n"); g.bufio->g.bout.flush(); } # remove markup from a string # doesn't handle nested/quoted delimiters demark(s: string): string { t: string; clean := true; for (i := 0; i < len s; i++) { case s[i] { '<' => clean = false; '>' => clean = true; * => if (clean) t[len t] = s[i]; } } return t; } # # Convert an individual man page to HTML and output. # domanpage(g: ref Global, man: Hit) { file := man.page; g.bin = g.bufio->open(file, Bufio->OREAD); g.bout = g.bufio->fopen(sys->fildes(1), Bufio->OWRITE); if (g.bin == nil) { fprint(stderr, "Cannot open %s: %r\n", file); return; } (err, info) := sys->fstat(g.bin.fd); if (! err) { g.mtime = info.mtime; } g.thisone = man; while ((p := getnext(g)) != nil) { c := p[0]; if (c == '.' && g.sol) { if (g.pre) { g.print(""); g.pre = false; } dogoobie(g, false); dohangingdt(g); } else if (g.def_goobie != nil || g.def_sm != 0) { g.bufio->g.bin.ungetc(); dogoobie(g, true); } else if (c == '\n') { g.print(p); dohangingdt(g); } else g.print(p); } if (g.pm != nil) { g.print(""); if (! g.broken) g.print("\n"); g.sop = true; g.fill = false; g.broken = true; g.example = true; } g_fi(g: ref Global) { if (g.fill) return; g.fill = true; g.print("\n"); g.broken = true; g.sop = true; } g_ft(g: ref Global, argl: list of string) { font: string; arg: string; if (argl == nil) arg = "P"; else arg = hd argl; if (g.curfont != nil) g.print(sprint("%s>", g.curfont)); case arg { "2" or "I" => font = "I"; "3" or "B" => font = "B"; "5" or "L" => font = "TT"; "P" => font = g.prevfont; * => font = nil; } g.prevfont = g.curfont; g.curfont = font; if (g.curfont != nil) if (g.fill) g.print(sprint("<%s>", g.curfont)); else g.print(sprint("<%s style=\"white-space: pre\">", g.curfont)); } # level == 1 is a .HP; level == 3 is a .TP g_HP_TP(g: ref Global, level: int) { case g.list_type { Ldef => if (g.hangingdt != 0) g.print("
\n"); g.broken = true; g.sop = true; g.pre = true; } g_PD(g: ref Global, argl: list of string) { if (len argl == 1 && hd argl == "0") g.ipd = false; else g.ipd = true; } g_PM(g: ref Global, argl: list of string) { code := "P"; if (argl != nil) code = hd argl; case code { * => # includes "1" and "P" g.pm = "Lucent Technologies - Proprietary\n" + "
Use pursuant to Company Instructions.\n"; "2" or "RS" => g.pm = "Lucent Technologies - Proprietary (Restricted)\n" + "
Solely for authorized persons having a need to know\n" + "
pursuant to Company Instructions.\n"; "3" or "RG" => g.pm = "Lucent Technologies - Proprietary (Registered)\n" + "
Solely for authorized persons having a need to know\n" + "
and subject to cover sheet instructions.\n"; "4" or "CP" => g.pm = "SEE PROPRIETARY NOTICE ON COVER PAGE\n"; "5" or "CR" => g.pm = "Copyright xxxx Lucent Technologies\n" + # should fill in the year from the date register "
All Rights Reserved.\n"; "6" or "UW" => g.pm = "THIS DOCUMENT CONTAINS PROPRIETARY INFORMATION OF\n" + "
LUCENT TECHNOLOGIES INC. AND IS NOT TO BE DISCLOSED OR USED EXCEPT IN\n" + "
ACCORDANCE WITH APPLICABLE AGREEMENTS.\n" + "
Unpublished & Not for Publication\n"; } } g_PP(g: ref Global) { closel(g); reset_font(g); p := g.softp(); if (p != nil) g.print(p); g.sop = true; g.broken = true; } g_RE(g: ref Global) { g.print("
"); g.print("
"); g.print("
")); g.print(sprint("%s(%s)", g.thisone.page, g.thisone.mtype)); g.print(" | \n"); g.print(sprint("Rev: %s |
"; else return "
"; } # # get (remainder of) a line # getline(g: ref Global): string { line := ""; while ((token := getnext(g)) != "\n") { if (token == nil) return line; line += token; } return line+"\n"; } # # Get next logical character. Expand it with escapes. # getnext(g: ref Global): string { iob := g.bufio; Iobuf: import iob; font: string; token: string; bin := g.bin; g.sol = (g.lastc == '\n'); c := bin.getc(); if (c < 0) return nil; g.lastc = c; if (c >= Runeself) { for (i := 0; i < len Entities; i++) if (Entities[i].value == c) return Entities[i].name; return sprint("%d;", c); } case c { '<' => return "<"; '>' => return ">"; '\\' => c = bin.getc(); if (c < 0) return nil; g.lastc = c; case c { ' ' => return " "; # chars to ignore '|' or '&' or '^' => return getnext(g); # ignore arg 'k' => nil = bin.getc(); return getnext(g); # defined strings '*' => case bin.getc() { 'R' => return "®"; } return getnext(g); # special chars '(' => token[0] = bin.getc(); token[1] = bin.getc(); for (i := 0; i < len tspec; i++) if (token == tspec[i].name) return tspec[i].value; return "¿"; 'c' => c = bin.getc(); if (c < 0) return nil; else if (c == '\n') { g.lastc = c; g.sol = true; token[0] = bin.getc(); return token; } # DEBUG: should there be a "return xxx" here? 'e' => return "\\"; 'f' => g.lastc = c = bin.getc(); if (c < 0) return nil; case c { '2' or 'I' => font = "I"; '3' or 'B' => font = "B"; '5' or 'L' => font = "TT"; 'P' => font = g.prevfont; * => # includes '1' and 'R' font = nil; } # There are serious problems with this. We don't know the fonts properly at this stage. # g.prevfont = g.curfont; # g.curfont = font; # if (g.prevfont != nil) # token = sprint("%s>", g.prevfont); # if (g.curfont != nil) # token += sprint("<%s>", g.curfont); if (token == nil) return ""; # looks odd but it avoids inserting a space in
text return token; 's' => sign := '+'; size := 0; relative := false; getsize: for (;;) { c = bin.getc(); if (c < 0) return nil; case c { '+' => relative = true; '-' => sign = '-'; relative = true; '0' to '9' => size = size * 10 + (c - '0'); * => bin.ungetc(); break getsize; } g.lastc = c; } if (size == 0) token = ""; else if (relative) token = sprint("", sign, size); else token = sprint("", size); return token; } } token[0] = c; return token; } # # Return strings before and after the left-most instance of separator; # (s, nil) if no match or separator is last char in s. # split(s: string, sep: int): (string, string) { for (i := 0; i < len s; i++) if (s[i] == sep) return (s[:i], s[i+1:]); # s[len s:] is a valid slice, with value == nil return (s, nil); } Global_init(): ref Global { g := ref Global; g.bufio = load Bufio Bufio->PATH; g.chaps = array[20] of Chaps; g.types = array[20] of Types; g.mantitle = ""; g.href.title = g.mantitle; # ?? g.mtime = 0; g.nhits = 0; g.oldstyle.names = false; g.oldstyle.fmt = false; g.topname = "System"; g.list_type = Lnone; g.def_sm = 0; g.hangingdt = 0; g.indents = 0; g.sop = true; g.broken = true; g.ipd = true; g.fill = true; g.example = false; g.pre = false; g.lastc = '\n'; return g; } Global.mk_href_chap(g: self ref Global, chap: string) { if (chap != nil) g.href.chap = sprint("%s", g.mandir, chap, chap); } Global.mk_href_man(g: self ref Global, man: string, oldstyle: int) { rman := man; if (oldstyle) rman = str->tolower(man); # compensate for tradition of putting titles in all CAPS g.href.man = sprint("%s", g.mandir, rman, man); } Global.mk_href_mtype(g: self ref Global, chap, mtype: string) { g.href.mtype = sprint("%s", g.mandir, chap, mtype, mtype); } # We assume that anything >= Runeself is already in UTF. # httpunesc(s: string): string { t := ""; for (i := 0; i < len s; i++) { c := s[i]; if (c == '&' && i + 1 < len s) { (char, rem) := str->splitl(s[i+1:], ";"); if (rem == nil) break; # require the terminating ';' if (char == nil) continue; if (char[0] == '#' && len char > 1) { c = int char[1:]; i += len char; if (c < 256 && c >= 161) { t[len t] = Entities[c-161].value; continue; } } else { for (j := 0; j < len Entities; j++) if (Entities[j].name == char) break; if (j < len Entities) { i += len char; t[len t] = Entities[j].value; continue; } } } t[len t] = c; } return t; } title(g: ref Global, t: string, search: int) { if(search) ; # not yet used g.print(header+"\n"); g.print(sprint("Inferno's %s \n", demark(t))); g.print("\n"); g.print(""+initial+"\n"); }