implement Man2html;
include "sys.m";
stderr: ref Sys->FD;
sys: Sys;
print, fprint, sprint: import sys;
include "bufio.m";
include "draw.m";
include "daytime.m";
dt: Daytime;
include "string.m";
str: String;
Man2html: module
{
init: fn(ctxt: ref Draw->Context, args: list of string);
};
Runeself: con 16r80;
false, true: con iota;
Troffspec: adt {
name: string;
value: string;
};
tspec := array [] of { Troffspec
("ff", "ff"),
("fi", "fi"),
("fl", "fl"),
("Fi", "ffi"),
("ru", "_"),
("em", "—"),
("14", "¼"),
("12", "½"),
("co", "©"),
("de", "°"),
("dg", "¡"),
("fm", "´"),
("rg", "®"),
# ("bu", "*"),
("bu", "•"),
("sq", "¤"),
("hy", "-"),
("pl", "+"),
("mi", "-"),
("mu", "×"),
("di", "÷"),
("eq", "="),
("==", "=="),
(">=", ">="),
("<=", "<="),
("!=", "!="),
("+-", "±"),
("no", "¬"),
("sl", "/"),
("ap", "&"),
("~=", "~="),
("pt", "oc"),
("gr", "GRAD"),
("->", "->"),
("<-", "<-"),
("ua", "^"),
("da", "v"),
("is", "Integral"),
("pd", "DIV"),
("if", "oo"),
("sr", "-/"),
("sb", "(~"),
("sp", "~)"),
("cu", "U"),
("ca", "(^)"),
("ib", "(="),
("ip", "=)"),
("mo", "C"),
("es", "Ø"),
("aa", "´"),
("ga", "`"),
("ci", "O"),
("L1", "Lucent"),
("sc", "§"),
("dd", "++"),
("lh", "<="),
("rh", "=>"),
("lt", "("),
("rt", ")"),
("lc", "|"),
("rc", "|"),
("lb", "("),
("rb", ")"),
("lf", "|"),
("rf", "|"),
("lk", "|"),
("rk", "|"),
("bv", "|"),
("ts", "s"),
("br", "|"),
("or", "|"),
("ul", "_"),
("rn", " "),
("*p", "PI"),
("**", "*"),
};
Entity: adt {
name: string;
value: int;
};
Entities: array of Entity;
Entities = array[] of {
Entity( "¡", '¡' ),
Entity( "¢", '¢' ),
Entity( "£", '£' ),
Entity( "¤", '¤' ),
Entity( "¥", '¥' ),
Entity( "¦", '¦' ),
Entity( "§", '§' ),
Entity( "¨", '¨' ),
Entity( "©", '©' ),
Entity( "ª", 'ª' ),
Entity( "«", '«' ),
Entity( "¬", '¬' ),
Entity( "", '' ),
Entity( "®", '®' ),
Entity( "¯", '¯' ),
Entity( "°", '°' ),
Entity( "±", '±' ),
Entity( "²", '²' ),
Entity( "³", '³' ),
Entity( "´", '´' ),
Entity( "µ", 'µ' ),
Entity( "¶", '¶' ),
Entity( "·", '·' ),
Entity( "¸", '¸' ),
Entity( "¹", '¹' ),
Entity( "º", 'º' ),
Entity( "»", '»' ),
Entity( "¼", '¼' ),
Entity( "½", '½' ),
Entity( "¾", '¾' ),
Entity( "¿", '¿' ),
Entity( "À", 'À' ),
Entity( "Á", 'Á' ),
Entity( "Â", 'Â' ),
Entity( "Ã", 'Ã' ),
Entity( "Ä", 'Ä' ),
Entity( "Å", 'Å' ),
Entity( "Æ", 'Æ' ),
Entity( "Ç", 'Ç' ),
Entity( "È", 'È' ),
Entity( "É", 'É' ),
Entity( "Ê", 'Ê' ),
Entity( "Ë", 'Ë' ),
Entity( "Ì", 'Ì' ),
Entity( "Í", 'Í' ),
Entity( "Î", 'Î' ),
Entity( "Ï", 'Ï' ),
Entity( "Ð", 'Ð' ),
Entity( "Ñ", 'Ñ' ),
Entity( "Ò", 'Ò' ),
Entity( "Ó", 'Ó' ),
Entity( "Ô", 'Ô' ),
Entity( "Õ", 'Õ' ),
Entity( "Ö", 'Ö' ),
Entity( "&215;", '×' ),
Entity( "Ø", 'Ø' ),
Entity( "Ù", 'Ù' ),
Entity( "Ú", 'Ú' ),
Entity( "Û", 'Û' ),
Entity( "Ü", 'Ü' ),
Entity( "Ý", 'Ý' ),
Entity( "Þ", 'Þ' ),
Entity( "ß", 'ß' ),
Entity( "à", 'à' ),
Entity( "á", 'á' ),
Entity( "â", 'â' ),
Entity( "ã", 'ã' ),
Entity( "ä", 'ä' ),
Entity( "å", 'å' ),
Entity( "æ", 'æ' ),
Entity( "ç", 'ç' ),
Entity( "è", 'è' ),
Entity( "é", 'é' ),
Entity( "ê", 'ê' ),
Entity( "ë", 'ë' ),
Entity( "ì", 'ì' ),
Entity( "í", 'í' ),
Entity( "î", 'î' ),
Entity( "ï", 'ï' ),
Entity( "ð", 'ð' ),
Entity( "ñ", 'ñ' ),
Entity( "ò", 'ò' ),
Entity( "ó", 'ó' ),
Entity( "ô", 'ô' ),
Entity( "õ", 'õ' ),
Entity( "ö", 'ö' ),
Entity( "&247;", '÷' ),
Entity( "ø", 'ø' ),
Entity( "ù", 'ù' ),
Entity( "ú", 'ú' ),
Entity( "û", 'û' ),
Entity( "ü", 'ü' ),
Entity( "ý", 'ý' ),
Entity( "þ", 'þ' ),
Entity( "ÿ", 'ÿ' ), # ÿ
Entity( "SPACE;", ' ' ),
Entity( "RS;", '\n' ),
Entity( "RE;", '\r' ),
Entity( """, '"' ),
Entity( "&", '&' ),
Entity( "<", '<' ),
Entity( ">", '>' ),
Entity( "CAP-DELTA", 'Δ' ),
Entity( "ALPHA", 'α' ),
Entity( "BETA", 'β' ),
Entity( "DELTA", 'δ' ),
Entity( "EPSILON", 'ε' ),
Entity( "THETA", 'θ' ),
Entity( "MU", 'μ' ),
Entity( "PI", 'π' ),
Entity( "TAU", 'τ' ),
Entity( "CHI", 'χ' ),
Entity( "<-", '←' ),
Entity( "^", '↑' ),
Entity( "->", '→' ),
Entity( "v", '↓' ),
Entity( "!=", '≠' ),
Entity( "<=", '≤' ),
Entity( nil, 0 ),
};
Hit: adt {
glob: string;
chap: string;
mtype: string;
page: string;
};
Lnone, Lordered, Lunordered, Ldef, Lother: con iota; # list types
Chaps: adt {
name: string;
primary: int;
};
Types: adt {
name: string;
desc: string;
};
# having two separate flags here allows for inclusion of old-style formatted pages
# under a new-style three-level tree
Oldstyle: adt {
names: int; # two-level directory tree?
fmt: int; # old internal formats: e.g., "B" font means "L"; name in .TH in all caps
};
Href: adt {
title: string;
chap: string;
mtype: string;
man: string;
};
# per-thread global data
Global: adt {
bufio: Bufio;
bin: ref Bufio->Iobuf;
bout: ref Bufio->Iobuf;
topname: string; # name of the top level categories in the manual
chaps: array of Chaps; # names of top-level partitions of this manual
types: array of Types; # names of second-level partitions
oldstyle: Oldstyle;
mantitle: string;
mandir: string;
thisone: Hit; # man page we're displaying
mtime: int; # last modification time of thisone
href: Href; # hrefs of components of this man page
hits: array of Hit;
nhits: int;
list_type: int;
pm: string; # proprietary marking
def_goobie: string; # deferred goobie
sop: int; # output at start of paragraph?
sol: int; # input at start of line?
broken: int; # output at a break?
fill: int; # in fill mode?
pre: int; # in PRE block?
example: int; # an example active?
ipd: int; # emit inter-paragraph distance?
indents: int;
hangingdt: int;
curfont: string; # current font
prevfont: string; # previous font
lastc: int; # previous char from input scanner
def_sm: int; # amount of deferred "make smaller" request
mk_href_chap: fn(g: self ref Global, chap: string);
mk_href_man: fn(g: self ref Global, man: string, oldstyle: int);
mk_href_mtype: fn(g: self ref Global, chap, mtype: string);
dobreak: fn(g: self ref Global);
print: fn(g: self ref Global, s: string);
softbr: fn(g: self ref Global): string;
softp: fn(g: self ref Global): string;
};
usage()
{
sys->fprint(stderr, "Usage: man2html file [section]\n");
raise "fail:usage";
}
init(nil: ref Draw->Context, args: list of string)
{
sys = load Sys Sys->PATH;
stderr = sys->fildes(2);
str = load String String->PATH;
dt = load Daytime Daytime->PATH;
g := Global_init();
if(args != nil)
args = tl args;
if(args == nil)
usage();
page := hd args;
args = tl args;
section := "1";
if(args != nil)
section = hd args;
hit := Hit ("", "man", section, page);
domanpage(g, hit);
g.bufio->g.bout.flush();
}
# remove markup from a string
# doesn't handle nested/quoted delimiters
demark(s: string): string
{
t: string;
clean := true;
for (i := 0; i < len s; i++) {
case s[i] {
'<' =>
clean = false;
'>' =>
clean = true;
* =>
if (clean)
t[len t] = s[i];
}
}
return t;
}
#
# Convert an individual man page to HTML and output.
#
domanpage(g: ref Global, man: Hit)
{
file := man.page;
g.bin = g.bufio->open(file, Bufio->OREAD);
g.bout = g.bufio->fopen(sys->fildes(1), Bufio->OWRITE);
if (g.bin == nil) {
fprint(stderr, "Cannot open %s: %r\n", file);
return;
}
(err, info) := sys->fstat(g.bin.fd);
if (! err) {
g.mtime = info.mtime;
}
g.thisone = man;
while ((p := getnext(g)) != nil) {
c := p[0];
if (c == '.' && g.sol) {
if (g.pre) {
g.print("");
g.pre = false;
}
dogoobie(g, false);
dohangingdt(g);
} else if (g.def_goobie != nil || g.def_sm != 0) {
g.bufio->g.bin.ungetc();
dogoobie(g, true);
} else if (c == '\n') {
g.print(p);
dohangingdt(g);
} else
g.print(p);
}
if (g.pm != nil) {
g.print("
"); if (! g.broken) g.print("\n"); g.sop = true; g.fill = false; g.broken = true; g.example = true; } g_fi(g: ref Global) { if (g.fill) return; g.fill = true; g.print("\n"); g.broken = true; g.sop = true; } g_ft(g: ref Global, argl: list of string) { font: string; arg: string; if (argl == nil) arg = "P"; else arg = hd argl; if (g.curfont != nil) g.print(sprint("%s>", g.curfont)); case arg { "2" or "I" => font = "I"; "3" or "B" => font = "B"; "5" or "L" => font = "TT"; "P" => font = g.prevfont; * => font = nil; } g.prevfont = g.curfont; g.curfont = font; if (g.curfont != nil) if (g.fill) g.print(sprint("<%s>", g.curfont)); else g.print(sprint("<%s style=\"white-space: pre\">", g.curfont)); } # level == 1 is a .HP; level == 3 is a .TP g_HP_TP(g: ref Global, level: int) { case g.list_type { Ldef => if (g.hangingdt != 0) g.print("
\n"); g.broken = true; g.sop = true; g.pre = true; } g_PD(g: ref Global, argl: list of string) { if (len argl == 1 && hd argl == "0") g.ipd = false; else g.ipd = true; } g_PM(g: ref Global, argl: list of string) { code := "P"; if (argl != nil) code = hd argl; case code { * => # includes "1" and "P" g.pm = "Lucent Technologies - Proprietary\n" + "
Use pursuant to Company Instructions.\n"; "2" or "RS" => g.pm = "Lucent Technologies - Proprietary (Restricted)\n" + "
Solely for authorized persons having a need to know\n" + "
pursuant to Company Instructions.\n"; "3" or "RG" => g.pm = "Lucent Technologies - Proprietary (Registered)\n" + "
Solely for authorized persons having a need to know\n" + "
and subject to cover sheet instructions.\n"; "4" or "CP" => g.pm = "SEE PROPRIETARY NOTICE ON COVER PAGE\n"; "5" or "CR" => g.pm = "Copyright xxxx Lucent Technologies\n" + # should fill in the year from the date register "
All Rights Reserved.\n"; "6" or "UW" => g.pm = "THIS DOCUMENT CONTAINS PROPRIETARY INFORMATION OF\n" + "
LUCENT TECHNOLOGIES INC. AND IS NOT TO BE DISCLOSED OR USED EXCEPT IN\n" + "
ACCORDANCE WITH APPLICABLE AGREEMENTS.\n" + "
Unpublished & Not for Publication\n"; } } g_PP(g: ref Global) { closel(g); reset_font(g); p := g.softp(); if (p != nil) g.print(p); g.sop = true; g.broken = true; } g_RE(g: ref Global) { g.print("
"); g.print("
"); g.print("
")); g.print(sprint("%s(%s)", g.thisone.page, g.thisone.mtype)); g.print(" | \n"); g.print(sprint("Rev: %s |
"; else return "
"; } # # get (remainder of) a line # getline(g: ref Global): string { line := ""; while ((token := getnext(g)) != "\n") { if (token == nil) return line; line += token; } return line+"\n"; } # # Get next logical character. Expand it with escapes. # getnext(g: ref Global): string { iob := g.bufio; Iobuf: import iob; font: string; token: string; bin := g.bin; g.sol = (g.lastc == '\n'); c := bin.getc(); if (c < 0) return nil; g.lastc = c; if (c >= Runeself) { for (i := 0; i < len Entities; i++) if (Entities[i].value == c) return Entities[i].name; return sprint("%d;", c); } case c { '<' => return "<"; '>' => return ">"; '\\' => c = bin.getc(); if (c < 0) return nil; g.lastc = c; case c { ' ' => return " "; # chars to ignore '|' or '&' or '^' => return getnext(g); # ignore arg 'k' => nil = bin.getc(); return getnext(g); # defined strings '*' => case bin.getc() { 'R' => return "®"; } return getnext(g); # special chars '(' => token[0] = bin.getc(); token[1] = bin.getc(); for (i := 0; i < len tspec; i++) if (token == tspec[i].name) return tspec[i].value; return "¿"; 'c' => c = bin.getc(); if (c < 0) return nil; else if (c == '\n') { g.lastc = c; g.sol = true; token[0] = bin.getc(); return token; } # DEBUG: should there be a "return xxx" here? 'e' => return "\\"; 'f' => g.lastc = c = bin.getc(); if (c < 0) return nil; case c { '2' or 'I' => font = "I"; '3' or 'B' => font = "B"; '5' or 'L' => font = "TT"; 'P' => font = g.prevfont; * => # includes '1' and 'R' font = nil; } # There are serious problems with this. We don't know the fonts properly at this stage. # g.prevfont = g.curfont; # g.curfont = font; # if (g.prevfont != nil) # token = sprint("%s>", g.prevfont); # if (g.curfont != nil) # token += sprint("<%s>", g.curfont); if (token == nil) return ""; # looks odd but it avoids inserting a space in
text return token; 's' => sign := '+'; size := 0; relative := false; getsize: for (;;) { c = bin.getc(); if (c < 0) return nil; case c { '+' => relative = true; '-' => sign = '-'; relative = true; '0' to '9' => size = size * 10 + (c - '0'); * => bin.ungetc(); break getsize; } g.lastc = c; } if (size == 0) token = ""; else if (relative) token = sprint("", sign, size); else token = sprint("", size); return token; } } token[0] = c; return token; } # # Return strings before and after the left-most instance of separator; # (s, nil) if no match or separator is last char in s. # split(s: string, sep: int): (string, string) { for (i := 0; i < len s; i++) if (s[i] == sep) return (s[:i], s[i+1:]); # s[len s:] is a valid slice, with value == nil return (s, nil); } Global_init(): ref Global { g := ref Global; g.bufio = load Bufio Bufio->PATH; g.chaps = array[20] of Chaps; g.types = array[20] of Types; g.mantitle = ""; g.href.title = g.mantitle; # ?? g.mtime = 0; g.nhits = 0; g.oldstyle.names = false; g.oldstyle.fmt = false; g.topname = "System"; g.list_type = Lnone; g.def_sm = 0; g.hangingdt = 0; g.indents = 0; g.sop = true; g.broken = true; g.ipd = true; g.fill = true; g.example = false; g.pre = false; g.lastc = '\n'; return g; } Global.mk_href_chap(g: self ref Global, chap: string) { if (chap != nil) g.href.chap = sprint("%s", g.mandir, chap, chap); } Global.mk_href_man(g: self ref Global, man: string, oldstyle: int) { rman := man; if (oldstyle) rman = str->tolower(man); # compensate for tradition of putting titles in all CAPS g.href.man = sprint("%s", g.mandir, rman, man); } Global.mk_href_mtype(g: self ref Global, chap, mtype: string) { g.href.mtype = sprint("%s", g.mandir, chap, mtype, mtype); } # We assume that anything >= Runeself is already in UTF. # httpunesc(s: string): string { t := ""; for (i := 0; i < len s; i++) { c := s[i]; if (c == '&' && i + 1 < len s) { (char, rem) := str->splitl(s[i+1:], ";"); if (rem == nil) break; # require the terminating ';' if (char == nil) continue; if (char[0] == '#' && len char > 1) { c = int char[1:]; i += len char; if (c < 256 && c >= 161) { t[len t] = Entities[c-161].value; continue; } } else { for (j := 0; j < len Entities; j++) if (Entities[j].name == char) break; if (j < len Entities) { i += len char; t[len t] = Entities[j].value; continue; } } } t[len t] = c; } return t; } title(g: ref Global, t: string, search: int) { if(search) ; # not yet used g.print("\n"); g.print(sprint("Inferno's %s \n", demark(t))); g.print("\n"); g.print("\n"); }