#include #include #include #include /* * caveat: * this stuff is only meant to work for ascii databases */ typedef struct Fid Fid; typedef struct Fs Fs; typedef struct Quick Quick; typedef struct Match Match; typedef struct Search Search; enum { OPERM = 0x3, /* mask of all permission types in open mode */ Nfidhash = 32, /* * qids */ Qroot = 1|CHDIR, Qsearch = 2, Qstats = 3, }; /* * boyer-moore quick string matching */ struct Quick { char *pat; char *up; /* match string for upper case of pat */ int len; /* of pat (and up) -1; used for fast search */ uchar jump[256]; /* jump index table */ int miss; /* amount to jump if we falsely match the last char */ }; extern void quickmk(Quick*, char*, int); extern void quickfree(Quick*); extern char* quicksearch(Quick*, char*, char*); /* * exact matching of a search string */ struct Match { Match *next; char *pat; /* null-terminated search string */ char *up; /* upper case of pat */ int len; /* length of both pat and up */ int (*op)(Match*, char*, char*); /* method for this partiticular search */ }; struct Search { Quick quick; /* quick match */ Match *match; /* exact matches */ int skip; /* number of matches to skip */ }; extern char* searchsearch(Search*, char*, char*, int*); extern Search* searchparse(char*, char*); extern void searchfree(Search*); struct Fid { Lock; Fid *next; Fid **last; uint fid; int ref; /* number of fcalls using the fid */ int attached; /* fid has beed attached or cloned and not clunked */ int open; Qid qid; Search *search; /* search patterns */ char *where; /* current location in the database */ int n; /* number of bytes left in found item */ }; void dostat(int, char*); void* emalloc(uint); void fatal(char*, ...); Match* mkmatch(Match*, int(*)(Match*, char*, char*), char*); Match* mkstrmatch(Match*, char*); char* nextsearch(char*, char*, char**, char**); int strlook(Match*, char*, char*); char* strndup(char*, int); int tolower(int); int toupper(int); char* urlunesc(char*, char*); void usage(void); struct Fs { Lock; /* for fids */ Fid *hash[Nfidhash]; }; extern void fsrun(Fs*, int); extern Fid* getfid(Fs*, uint); extern Fid* mkfid(Fs*, uint); extern void putfid(Fs*, Fid*); extern char* fsnop(Fs*, Fcall*); extern char* fssession(Fs*, Fcall*); extern char* fsattach(Fs*, Fcall*); extern char* fsclone(Fs*, Fcall*); extern char* fswalk(Fs*, Fcall*); extern char* fsopen(Fs*, Fcall*); extern char* fscreate(Fs*, Fcall*); extern char* fsread(Fs*, Fcall*); extern char* fswrite(Fs*, Fcall*); extern char* fsclunk(Fs*, Fcall*); extern char* fsremove(Fs*, Fcall*); extern char* fsstat(Fs*, Fcall*); extern char* fswstat(Fs*, Fcall*); char *(*fcalls[])(Fs*, Fcall*) = { [Tsession] fssession, [Tnop] fsnop, [Tattach] fsattach, [Tclone] fsclone, [Twalk] fswalk, [Topen] fsopen, [Tcreate] fscreate, [Tread] fsread, [Twrite] fswrite, [Tclunk] fsclunk, [Tremove] fsremove, [Tstat] fsstat, [Twstat] fswstat }; char Eperm[] = "permission denied"; char Enotdir[] = "not a directory"; char Enotexist[] = "file does not exist"; char Eisopen[] = "file already open for I/O"; char Einuse[] = "fid is already in use"; char Enofid[] = "no such fid"; char Enotopen[] = "file is not open"; char Ebadsearch[] = "bad search string"; Fs fs; char *database; char *edatabase; void main(int argc, char **argv) { Dir d; char buf[12], *mnt, *srv; int fd, p[2], n; mnt = "/tmp"; srv = nil; ARGBEGIN{ case 's': srv = ARGF(); mnt = nil; break; case 'm': mnt = ARGF(); break; }ARGEND if(argc != 1) usage(); fd = open(argv[0], OREAD); if(fd < 0 || dirfstat(fd, &d) < 0) fatal("can't open %s: %r", argv[0]); n = d.length; if(n == 0) fatal("zero length database %s", argv[0]); database = emalloc(n); if(read(fd, database, n) != n) fatal("can't read %s: %r", argv[0]); close(fd); edatabase = database + n; if(pipe(p) < 0) fatal("pipe failed"); switch(rfork(RFPROC|RFMEM|RFNOTEG|RFNAMEG)){ case 0: fsrun(&fs, p[0]); exits(nil); case -1: fatal("fork failed"); } if(mnt == nil){ if(srv == nil) usage(); fd = create(srv, OWRITE, 0666); if(fd < 0){ remove(srv); fd = create(srv, OWRITE, 0666); if(fd < 0){ close(p[1]); fatal("create of %s failed", srv); } } sprint(buf, "%d", p[1]); if(write(fd, buf, strlen(buf)) < 0){ close(p[1]); fatal("writing %s", srv); } close(p[1]); exits(nil); } if(mount(p[1], mnt, MREPL, "") < 0){ close(p[1]); fatal("mount failed"); } close(p[1]); exits(nil); } /* * execute the search * do a quick match, * isolate the line in which the occured, * and try all of the exact matches */ char* searchsearch(Search *search, char *where, char *end, int *np) { Match *m; char *s, *e; *np = 0; if(search == nil || where == nil) return nil; for(;;){ s = quicksearch(&search->quick, where, end); if(s == nil) return nil; e = memchr(s, '\n', end - s); if(e == nil) e = end; else e++; while(s > where && s[-1] != '\n') s--; for(m = search->match; m != nil; m = m->next){ if((*m->op)(m, s, e) == 0) break; } if(m == nil){ if(search->skip > 0) search->skip--; else{ *np = e - s; return s; } } where = e; } return nil; } /* * parse a search string of the form * tag=val&tag1=val1... */ Search* searchparse(char *search, char *esearch) { Search *s; Match *m, *next, **last; char *tag, *val, *p; int ok; s = emalloc(sizeof *s); s->match = nil; /* * acording to the http spec, * repeated search queries are ingored. * the last search given is performed on the original object */ while((p = memchr(s, '?', esearch - search)) != nil){ search = p + 1; } while(search < esearch){ search = nextsearch(search, esearch, &tag, &val); if(tag == nil) continue; ok = 0; if(strcmp(tag, "skip") == 0){ s->skip = strtoul(val, &p, 10); if(*p == 0) ok = 1; }else if(strcmp(tag, "search") == 0){ s->match = mkstrmatch(s->match, val); ok = 1; } free(tag); free(val); if(!ok){ searchfree(s); return nil; } } if(s->match == nil){ free(s); return nil; } /* * order the matches by probability of occurance * first cut is just by length */ for(ok = 0; !ok; ){ ok = 1; last = &s->match; for(m = *last; m && m->next; m = *last){ if(m->next->len > m->len){ next = m->next; m->next = next->next; next->next = m; *last = next; ok = 0; } last = &m->next; } } /* * convert the best search into a fast lookup */ m = s->match; s->match = m->next; quickmk(&s->quick, m->pat, 1); free(m->pat); free(m->up); free(m); return s; } void searchfree(Search *s) { Match *m, *next; if(s == nil) return; for(m = s->match; m; m = next){ next = m->next; free(m->pat); free(m->up); free(m); } quickfree(&s->quick); free(s); } char* nextsearch(char *search, char *esearch, char **tagp, char **valp) { char *tag, *val; *tagp = nil; *valp = nil; for(tag = search; search < esearch && *search != '='; search++) ; if(search == esearch) return search; tag = urlunesc(tag, search); search++; for(val = search; search < esearch && *search != '&'; search++) ; val = urlunesc(val, search); if(search != esearch) search++; *tagp = tag; *valp = val; return search; } Match* mkstrmatch(Match *m, char *pat) { char *s; for(s = pat; *s; s++){ if(*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'){ *s = 0; m = mkmatch(m, strlook, pat); pat = s + 1; }else *s = tolower(*s); } return mkmatch(m, strlook, pat); } Match* mkmatch(Match *next, int (*op)(Match*, char*, char*), char *pat) { Match *m; char *p; int n; n = strlen(pat); if(n == 0) return next; m = emalloc(sizeof *m); m->op = op; m->len = n; m->pat = strdup(pat); m->up = strdup(pat); for(p = m->up; *p; p++) *p = toupper(*p); for(p = m->pat; *p; p++) *p = tolower(*p); m->next = next; return m; } int strlook(Match *m, char *str, char *e) { char *pat, *up, *s; int c, pc, fc, fuc, n; n = m->len; fc = m->pat[0]; fuc = m->up[0]; for(; str + n <= e; str++){ c = *str; if(c != fc && c != fuc) continue; s = str + 1; up = m->up + 1; for(pat = m->pat + 1; pc = *pat; pat++){ c = *s; if(c != pc && c != *up) break; up++; s++; } if(pc == 0) return 1; } return 0; } /* * boyer-moore style pattern matching * implements an exact match for ascii * however, if mulitbyte upper-case and lower-case * characters differ in length or in more than one byte, * it only implements an approximate match */ void quickmk(Quick *q, char *spat, int ignorecase) { char *pat, *up; uchar *j; int ep, ea, cp, ca, i, c, n; /* * allocate the machine */ n = strlen(spat); if(n == 0){ q->pat = nil; q->up = nil; q->len = -1; return; } pat = emalloc(2* n + 2); q->pat = pat; up = pat; if(ignorecase) up = pat + n + 1; q->up = up; while(c = *spat++){ if(ignorecase){ *up++ = toupper(c); c = tolower(c); } *pat++ = c; } pat = q->pat; up = q->up; pat[n] = up[n] = '\0'; /* * make the skipping table */ if(n > 255) n = 255; j = q->jump; memset(j, n, 256); n--; q->len = n; for(i = 0; i <= n; i++){ j[(uchar)pat[i]] = n - i; j[(uchar)up[i]] = n - i; } /* * find the minimum safe amount to skip * if we match the last char but not the whole pat */ ep = pat[n]; ea = up[n]; for(i = n - 1; i >= 0; i--){ cp = pat[i]; ca = up[i]; if(cp == ep || cp == ea || ca == ep || ca == ea) break; } q->miss = n - i; } void quickfree(Quick *q) { if(q->pat != nil) free(q->pat); q->pat = nil; } char * quicksearch(Quick *q, char *s, char *e) { char *pat, *up, *m, *ee; uchar *j; int len, n, c, mc; len = q->len; if(len < 0) return s; j = q->jump; pat = q->pat; up = q->up; s += len; ee = e - (len * 4 + 4); while(s < e){ /* * look for a match on the last char */ while(s < ee && (n = j[(uchar)*s])){ s += n; s += j[(uchar)*s]; s += j[(uchar)*s]; s += j[(uchar)*s]; } if(s >= e) return nil; while(n = j[(uchar)*s]){ s += n; if(s >= e) return nil; } /* * does the string match? */ m = s - len; for(n = 0; c = pat[n]; n++){ mc = *m++; if(c != mc && mc != up[n]) break; } if(!c) return s - len; s += q->miss; } return nil; } void fsrun(Fs *fs, int fd) { Fcall rpc; char *err, *buf; int n; buf = emalloc(MAXRPC); for(;;){ /* * reading from a pipe or a network device * will give an error after a few eof reads * however, we cannot tell the difference * between a zero-length read and an interrupt * on the processes writing to us, * so we wait for the error */ n = read(fd, buf, MAXRPC); if(n == 0) continue; if(n < 0) fatal("mount read"); rpc.data = buf + MAXMSG; if(convM2S(buf, &rpc, n) == 0) continue; /* * flushes are way too hard. * a reply to the original message seems to work */ if(rpc.type == Tflush) continue; else if(rpc.type >= Tmax || !fcalls[rpc.type]) err = "bad fcall type"; else err = (*fcalls[rpc.type])(fs, &rpc); if(err){ rpc.type = Rerror; strncpy(rpc.ename, err, ERRLEN); }else{ rpc.type++; } n = convS2M(&rpc, buf); if(write(fd, buf, n) != n) fatal("mount write"); } } Fid* mkfid(Fs *fs, uint fid) { Fid *f; int h; lock(fs); h = fid % Nfidhash; for(f = fs->hash[h]; f; f = f->next){ if(f->fid == fid){ unlock(fs); return nil; } } f = emalloc(sizeof *f); f->next = fs->hash[h]; if(f->next != nil) f->next->last = &f->next; f->last = &fs->hash[h]; fs->hash[h] = f; f->fid = fid; f->ref = 1; f->attached = 1; f->open = 0; unlock(fs); return f; } Fid* getfid(Fs *fs, uint fid) { Fid *f; int h; lock(fs); h = fid % Nfidhash; for(f = fs->hash[h]; f; f = f->next){ if(f->fid == fid){ if(f->attached == 0) break; f->ref++; unlock(fs); return f; } } unlock(fs); return nil; } void putfid(Fs *fs, Fid *f) { lock(fs); f->ref--; if(f->ref == 0 && f->attached == 0){ *f->last = f->next; if(f->next != nil) f->next->last = f->last; if(f->search != nil) searchfree(f->search); free(f); } unlock(fs); } char* fsnop(Fs *, Fcall *) { return nil; } char* fssession(Fs *, Fcall *rpc) { memset(rpc->authid, 0, sizeof(rpc->authid)); memset(rpc->authdom, 0, sizeof(rpc->authdom)); memset(rpc->chal, 0, sizeof(rpc->chal)); return nil; } char* fsattach(Fs *fs, Fcall *rpc) { Fid *f; f = mkfid(fs, rpc->fid); if(f == nil) return Einuse; lock(f); f->open = 0; f->qid.path = Qroot; f->qid.vers = 0; rpc->qid = f->qid; unlock(f); putfid(fs, f); return nil; } char* fsclone(Fs *fs, Fcall *rpc) { Fid *f, *nf; Qid qid; if(rpc->fid == rpc->newfid) return Einuse; f = getfid(fs, rpc->fid); if(f == nil) return Enofid; lock(f); qid = f->qid; unlock(f); putfid(fs, f); nf = mkfid(fs, rpc->newfid); if(nf == nil) return Einuse; lock(nf); nf->qid = qid; nf->open = 0; unlock(nf); putfid(fs, nf); return nil; } char* fswalk(Fs *fs, Fcall *rpc) { Fid *f; f = getfid(fs, rpc->fid); if(f == nil) return Enofid; lock(f); if(f->qid.path != Qroot){ unlock(f); putfid(fs, f); return Enotdir; } if(strcmp(rpc->name, "search") == 0) f->qid.path = Qsearch; else if(strcmp(rpc->name, "stats") == 0) f->qid.path = Qstats; else{ unlock(f); putfid(fs, f); return Enotexist; } rpc->qid = f->qid; f->open = 0; unlock(f); putfid(fs, f); return nil; } char * fsopen(Fs *fs, Fcall *rpc) { Fid *f; int mode; f = getfid(fs, rpc->fid); if(f == nil) return Enofid; lock(f); if(f->open){ unlock(f); putfid(fs, f); return Eisopen; } mode = rpc->mode & OPERM; if(mode == OEXEC || f->qid.path == Qroot && (mode == OWRITE || mode == ORDWR)){ unlock(f); putfid(fs, f); return Eperm; } f->open = 1; f->where = nil; f->n = 0; f->search = nil; rpc->qid = f->qid; unlock(f); putfid(fs, f); return nil; } char * fscreate(Fs *, Fcall *) { return Eperm; } char* fsread(Fs *fs, Fcall *rpc) { Fid *f; int n, off, count, len; f = getfid(fs, rpc->fid); if(f == nil) return Enofid; if(!f->open){ unlock(f); putfid(fs, f); return Enotopen; } count = rpc->count; off = rpc->offset; rpc->count = 0; if(f->qid.path == Qroot){ /* * legal reads of directories must read * an integral number of dir structures */ if(count > DIRLEN) count = DIRLEN; dostat(Qsearch, rpc->data); rpc->count = count; if(off > 0) rpc->count = 0; unlock(f); putfid(fs, f); return nil; } if(f->qid.path == Qstats){ len = 0; }else{ for(len = 0; len < count; len += n){ if(f->where == nil || f->search == nil) break; if(f->n == 0) f->where = searchsearch(f->search, f->where, edatabase, &f->n); n = f->n; if(n != 0){ if(n > count-len) n = count-len; memmove(rpc->data+len, f->where, n); f->where += n; f->n -= n; } } } unlock(f); putfid(fs, f); rpc->count = len; return nil; } char* fswrite(Fs *fs, Fcall *rpc) { Fid *f; f = getfid(fs, rpc->fid); if(f == nil) return Enofid; lock(f); if(!f->open || f->qid.path != Qsearch){ unlock(f); putfid(fs, f); return Enotopen; } if(f->search != nil) searchfree(f->search); f->search = searchparse(rpc->data, rpc->data + rpc->count); if(f->search == nil){ unlock(f); putfid(fs, f); return Ebadsearch; } f->where = database; f->n = 0; unlock(f); putfid(fs, f); return nil; } char * fsclunk(Fs *fs, Fcall *rpc) { Fid *f; f = getfid(fs, rpc->fid); if(f != nil){ f->attached = 0; putfid(fs, f); } return nil; } char * fsremove(Fs *, Fcall *) { return Eperm; } char * fsstat(Fs *fs, Fcall *rpc) { Fid *f; f = getfid(fs, rpc->fid); if(f == nil) return Enofid; dostat(f->qid.path, rpc->stat); putfid(fs, f); return nil; } char * fswstat(Fs *, Fcall *) { return Eperm; } void dostat(int path, char *buf) { Dir d; switch(path){ case Qroot: strncpy(d.name, ".", NAMELEN); d.mode = CHDIR|0555; break; case Qsearch: strncpy(d.name, "search", NAMELEN); d.mode = 0666; break; case Qstats: strncpy(d.name, "stats", NAMELEN); d.mode = 0666; break; } d.qid.path = path; d.qid.vers = 0; d.length = 0; strncpy(d.uid, "none", NAMELEN); strncpy(d.gid, "none", NAMELEN); d.atime = d.mtime = time(nil); convD2M(&d, buf); } char * urlunesc(char *s, char *e) { char *t, *v; int c, n; v = emalloc((e - s) + 1); for(t = v; s < e; s++){ c = *s; if(c == '%'){ if(s + 2 >= e) break; n = s[1]; if(n >= '0' && n <= '9') n = n - '0'; else if(n >= 'A' && n <= 'F') n = n - 'A' + 10; else if(n >= 'a' && n <= 'f') n = n - 'a' + 10; else break; c = n; n = s[2]; if(n >= '0' && n <= '9') n = n - '0'; else if(n >= 'A' && n <= 'F') n = n - 'A' + 10; else if(n >= 'a' && n <= 'f') n = n - 'a' + 10; else break; s += 2; c = c * 16 + n; } *t++ = c; } *t = 0; return v; } int toupper(int c) { if(c >= 'a' && c <= 'z') c += 'A' - 'a'; return c; } int tolower(int c) { if(c >= 'A' && c <= 'Z') c += 'a' - 'A'; return c; } void fatal(char *fmt, ...) { va_list arg; char buf[1024]; write(2, "searchfs: ", 8); va_start(arg, fmt); doprint(buf, buf+1024, fmt, arg); va_end(arg); write(2, buf, strlen(buf)); write(2, "\n", 1); exits(fmt); } void * emalloc(uint n) { void *p; p = malloc(n); if(p == nil) fatal("out of memory"); memset(p, 0, n); return p; } void usage(void) { fprint(2, "usage: searchfs [-m mountpoint] [-s srvfile] database\n"); exits("usage"); }