#include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "../port/error.h" Ref pidalloc; Ref noteidalloc; struct Procalloc { Lock; Proc* ht[128]; Proc* arena; Proc* free; }procalloc; struct { Lock; Waitq* free; }waitqalloc; typedef struct { Lock; Proc* head; Proc* tail; int n; } Schedq; int nrdy; Schedq runq[Nrq]; char *statename[] = { /* BUG: generate automatically */ "Dead", "Moribund", "Ready", "Scheding", "Running", "Queueing", "QueueingR", "QueueingW", "Wakeme", "Broken", "Stopped", "Rendez", }; static void pidhash(Proc*); static void pidunhash(Proc*); /* * Always splhi()'ed. */ void schedinit(void) /* never returns */ { setlabel(&m->sched); if(up) { m->proc = 0; switch(up->state) { case Running: ready(up); break; case Moribund: up->state = Dead; /* * Holding locks from pexit: * procalloc * palloc */ mmurelease(up); up->qnext = procalloc.free; procalloc.free = up; unlock(&palloc); unlock(&procalloc); break; } up->mach = 0; up = 0; } sched(); } /* * If changing this routine, look also at sleep(). It * contains a copy of the guts of sched(). */ void sched(void) { if(up) { splhi(); /* statistics */ m->cs++; procsave(up); if(setlabel(&up->sched)) { procrestore(up); spllo(); return; } gotolabel(&m->sched); } up = runproc(); up->state = Running; up->mach = MACHP(m->machno); m->proc = up; mmuswitch(up); gotolabel(&up->sched); } int anyready(void) { return nrdy; } int anyhigher(void) { Schedq *rq; if(nrdy == 0) return 0; for(rq = &runq[Nrq-1]; rq > &runq[up->priority]; rq--) if(rq->head != nil) return 1; return 0; } enum { Squantum = (HZ+Nrq-1)/Nrq, }; void ready(Proc *p) { int s, pri; Schedq *rq; s = splhi(); if(p->fixedpri){ pri = p->basepri; } else { /* history counts */ if(p->state == Running){ p->rt++; pri = ((p->art + (p->rt<<1))>>2)/Squantum; } else { p->art = (p->art + (p->rt<<1))>>2; p->rt = 0; pri = p->art/Squantum; } pri = p->basepri - pri; if(pri < 0) pri = 0; /* the only intersection between the classes is at PriNormal */ if(pri < PriNormal && p->basepri > PriNormal) pri = PriNormal; /* stick at low priority any process waiting for a lock */ if(p->lockwait) pri = PriLock; } p->priority = pri; rq = &runq[p->priority]; lock(runq); p->rnext = 0; if(rq->tail) rq->tail->rnext = p; else rq->head = p; rq->tail = p; rq->n++; nrdy++; p->readytime = m->ticks; p->state = Ready; unlock(runq); splx(s); } Proc* runproc(void) { Schedq *rq, *xrq; Proc *p, *l; ulong rt; loop: /* * find a process that last ran on this processor (affinity), * or one that hasn't moved in a while (load balancing). */ spllo(); for(;;){ if((++(m->fairness) & 0x3) == 0){ /* * once in a while, run process that's been waiting longest * regardless of movetime */ rt = 0xffffffff; xrq = nil; for(rq = runq; rq < &runq[Nrq]; rq++){ p = rq->head; if(p == 0) continue; if(p->readytime < rt){ xrq = rq; rt = p->readytime; } } if(xrq != nil){ rq = xrq; p = rq->head; if(p != nil && p->wired == nil) p->movetime = 0; goto found; } } else { /* * get highest priority process that this * processor can run given affinity constraints */ for(rq = &runq[Nrq-1]; rq >= runq; rq--){ p = rq->head; if(p == 0) continue; for(; p; p = p->rnext){ if(p->mp == MACHP(m->machno) || p->movetime < MACHP(0)->ticks) goto found; } } } idlehands(); } found: splhi(); if(!canlock(runq)) goto loop; l = 0; for(p = rq->head; p; p = p->rnext){ if(p->mp == MACHP(m->machno) || p->movetime < MACHP(0)->ticks) break; l = p; } /* * p->mach==0 only when process state is saved */ if(p == 0 || p->mach){ unlock(runq); goto loop; } if(p->rnext == 0) rq->tail = l; if(l) l->rnext = p->rnext; else rq->head = p->rnext; rq->n--; nrdy--; if(p->state != Ready) print("runproc %s %lud %s\n", p->text, p->pid, statename[p->state]); unlock(runq); p->state = Scheding; if(p->mp != MACHP(m->machno)) p->movetime = MACHP(0)->ticks + HZ/10; p->mp = MACHP(m->machno); return p; } int canpage(Proc *p) { int ok = 0; splhi(); lock(runq); /* Only reliable way to see if we are Running */ if(p->mach == 0) { p->newtlb = 1; ok = 1; } unlock(runq); spllo(); return ok; } Proc* newproc(void) { Proc *p; lock(&procalloc); for(;;) { if(p = procalloc.free) break; unlock(&procalloc); resrcwait("no procs"); lock(&procalloc); } procalloc.free = p->qnext; unlock(&procalloc); p->state = Scheding; p->psstate = "New"; p->mach = 0; p->qnext = 0; p->nchild = 0; p->nwait = 0; p->waitq = 0; p->pgrp = 0; p->egrp = 0; p->fgrp = 0; p->rgrp = 0; p->pdbg = 0; p->fpstate = FPinit; p->kp = 0; p->procctl = 0; p->notepending = 0; p->mp = 0; p->movetime = 0; p->wired = 0; p->ureg = 0; p->error[0] = '\0'; free(p->args); p->args = nil; p->nargs = 0; memset(p->seg, 0, sizeof p->seg); p->pid = incref(&pidalloc); pidhash(p); p->noteid = incref(¬eidalloc); if(p->pid==0 || p->noteid==0) panic("pidalloc"); if(p->kstack == 0) p->kstack = smalloc(KSTACK); return p; } /* * wire this proc to a machine */ void procwired(Proc *p, int bm) { Proc *pp; int i; char nwired[MAXMACH]; Mach *wm; if(bm < 0){ /* pick a machine to wire to */ memset(nwired, 0, sizeof(nwired)); p->wired = 0; pp = proctab(0); for(i=0; iwired; if(wm && pp->pid) nwired[wm->machno]++; } bm = 0; for(i=0; iwired = MACHP(bm); p->movetime = 0xffffffff; p->mp = p->wired; } void procinit0(void) /* bad planning - clashes with devproc.c */ { Proc *p; int i; procalloc.free = xalloc(conf.nproc*sizeof(Proc)); if(procalloc.free == nil) panic("cannot allocate %d procs\n", conf.nproc); procalloc.arena = procalloc.free; p = procalloc.free; for(i=0; iqnext = p+1; p->qnext = 0; } /* * sleep if a condition is not true. Another process will * awaken us after it sets the condition. When we awaken * the condition may no longer be true. * * we lock both the process and the rendezvous to keep r->p * and p->r synchronized. */ void sleep(Rendez *r, int (*f)(void*), void *arg) { int s; s = splhi(); lock(r); lock(&up->rlock); if(r->p){ print("double sleep %lud %lud\n", r->p->pid, up->pid); dumpstack(); } /* * Wakeup only knows there may be something to do by testing * r->p in order to get something to lock on. * Flush that information out to memory in case the sleep is * committed. */ r->p = up; if((*f)(arg) || up->notepending){ /* * if condition happened or a note is pending * never mind */ r->p = nil; unlock(&up->rlock); unlock(r); } else { /* * now we are committed to * change state and call scheduler */ up->state = Wakeme; up->r = r; /* statistics */ m->cs++; procsave(up); if(setlabel(&up->sched)) { /* * here when the process is awakened */ procrestore(up); spllo(); } else { /* * here to go to sleep (i.e. stop Running) */ unlock(&up->rlock); unlock(r); gotolabel(&m->sched); } } if(up->notepending) { up->notepending = 0; splx(s); error(Eintr); } splx(s); } int tfn(void *arg) { return MACHP(0)->ticks >= up->twhen || up->tfn(arg); } void tsleep(Rendez *r, int (*fn)(void*), void *arg, int ms) { ulong when; Proc *f, **l; /* avoid overflows at the cost of precision */ if(ms >= 1000000) when = ms/(1000/HZ); else when = MS2TK(ms); when += MACHP(0)->ticks; lock(&talarm); /* take out of list if checkalarm didn't */ if(up->trend) { l = &talarm.list; for(f = *l; f; f = f->tlink) { if(f == up) { *l = up->tlink; break; } l = &f->tlink; } } /* insert in increasing time order */ l = &talarm.list; for(f = *l; f; f = f->tlink) { if(f->twhen >= when) break; l = &f->tlink; } up->trend = r; up->twhen = when; up->tfn = fn; up->tlink = *l; *l = up; unlock(&talarm); if(waserror()){ up->twhen = 0; nexterror(); } sleep(r, tfn, arg); up->twhen = 0; poperror(); } /* * Expects that only one process can call wakeup for any given Rendez. * We hold both locks to ensure that r->p and p->r remain consistent. * Richard Miller has a better solution that doesn't require both to * be held simultaneously, but I'm a paranoid - presotto. */ Proc* wakeup(Rendez *r) { Proc *p, *rv; int s; rv = 0; s = splhi(); lock(r); p = r->p; if(p != nil){ lock(&p->rlock); if(p->state != Wakeme || p->r != r) panic("wakeup: state"); r->p = nil; p->r = nil; ready(p); rv = p; unlock(&p->rlock); } unlock(r); splx(s); return rv; } /* * if waking a sleeping process, this routine must hold both * p->rlock and r->lock. However, it can't know them in * the same order as wakeup causing a possible lock ordering * deadlock. We break the deadlock by giving up the p->rlock * lock if we can't get the r->lock and retrying. */ int postnote(Proc *p, int dolock, char *n, int flag) { int s, ret; Rendez *r; Proc *d, **l; if(dolock) qlock(&p->debug); if(flag != NUser && (p->notify == 0 || p->notified)) p->nnote = 0; ret = 0; if(p->nnote < NNOTE) { strcpy(p->note[p->nnote].msg, n); p->note[p->nnote++].flag = flag; ret = 1; } p->notepending = 1; if(dolock) qunlock(&p->debug); /* this loop is to avoid lock ordering problems. */ for(;;){ s = splhi(); lock(&p->rlock); r = p->r; /* waiting for a wakeup? */ if(r == nil) break; /* no */ /* try for the second lock */ if(canlock(r)){ if(p->state != Wakeme || r->p != p) panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state); p->r = nil; r->p = nil; ready(p); unlock(r); break; } /* give other process time to get out of critical section and try again */ unlock(&p->rlock); splx(s); sched(); } unlock(&p->rlock); splx(s); if(p->state != Rendezvous) return ret; /* Try and pull out of a rendezvous */ lock(p->rgrp); if(p->state == Rendezvous) { p->rendval = ~0; l = &REND(p->rgrp, p->rendtag); for(d = *l; d; d = d->rendhash) { if(d == p) { *l = p->rendhash; break; } l = &d->rendhash; } ready(p); } unlock(p->rgrp); return ret; } /* * weird thing: keep at most NBROKEN around */ #define NBROKEN 4 struct { QLock; int n; Proc *p[NBROKEN]; }broken; void addbroken(Proc *p) { qlock(&broken); if(broken.n == NBROKEN) { ready(broken.p[0]); memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1)); --broken.n; } broken.p[broken.n++] = p; qunlock(&broken); p->state = Broken; p->psstate = 0; sched(); } void unbreak(Proc *p) { int b; qlock(&broken); for(b=0; b < broken.n; b++) if(broken.p[b] == p) { broken.n--; memmove(&broken.p[b], &broken.p[b+1], sizeof(Proc*)*(NBROKEN-(b+1))); ready(p); break; } qunlock(&broken); } int freebroken(void) { int i, n; qlock(&broken); n = broken.n; for(i=0; ialarm = 0; /* nil out all the resources under lock (free later) */ qlock(&up->debug); fgrp = up->fgrp; up->fgrp = nil; egrp = up->egrp; up->egrp = nil; rgrp = up->rgrp; up->rgrp = nil; pgrp = up->pgrp; up->pgrp = nil; dot = up->dot; up->dot = nil; qunlock(&up->debug); if(fgrp) closefgrp(fgrp); if(egrp) closeegrp(egrp); if(rgrp) closergrp(rgrp); if(dot) cclose(dot); if(pgrp) closepgrp(pgrp); /* * if not a kernel process and have a parent, * do some housekeeping. */ if(up->kp == 0) { p = up->parent; if(p == 0) { if(exitstr == 0) exitstr = "unknown"; panic("boot process died: %s", exitstr); } while(waserror()) ; wq = smalloc(sizeof(Waitq)); poperror(); readnum(0, wq->w.pid, NUMSIZE, up->pid, NUMSIZE); utime = up->time[TUser] + up->time[TCUser]; stime = up->time[TSys] + up->time[TCSys]; readnum(0, &wq->w.time[TUser*12], NUMSIZE, TK2MS(utime), NUMSIZE); readnum(0, &wq->w.time[TSys*12], NUMSIZE, TK2MS(stime), NUMSIZE); readnum(0, &wq->w.time[TReal*12], NUMSIZE, TK2MS(MACHP(0)->ticks - up->time[TReal]), NUMSIZE); if(exitstr && exitstr[0]){ n = sprint(wq->w.msg, "%s %lud:", up->text, up->pid); strncpy(wq->w.msg+n, exitstr, ERRLEN-n); wq->w.msg[ERRLEN-1] = 0; } else wq->w.msg[0] = '\0'; lock(&p->exl); /* * If my parent is no longer alive, or if there would be more * than 128 zombie child processes for my parent, then don't * leave a wait record behind. This helps prevent badly * written daemon processes from accumulating lots of wait * records. */ if(p->pid == up->parentpid && p->state != Broken && p->nwait < 128) { p->nchild--; p->time[TCUser] += utime; p->time[TCSys] += stime; wq->next = p->waitq; p->waitq = wq; p->nwait++; wakeup(&p->waitr); unlock(&p->exl); } else { unlock(&p->exl); free(wq); } } if(!freemem) addbroken(up); qlock(&up->seglock); es = &up->seg[NSEG]; for(s = up->seg; s < es; s++) { if(*s) { putseg(*s); *s = 0; } } qunlock(&up->seglock); lock(&up->exl); /* Prevent my children from leaving waits */ pidunhash(up); up->pid = 0; wakeup(&up->waitr); unlock(&up->exl); for(f = up->waitq; f; f = next) { next = f->next; free(f); } /* release debuggers */ qlock(&up->debug); if(up->pdbg) { wakeup(&up->pdbg->sleep); up->pdbg = 0; } qunlock(&up->debug); /* Sched must not loop for these locks */ lock(&procalloc); lock(&palloc); up->state = Moribund; sched(); panic("pexit"); } int haswaitq(void *x) { Proc *p; p = (Proc *)x; return p->waitq != 0; } ulong pwait(Waitmsg *w) { ulong cpid; Waitq *wq; if(!canqlock(&up->qwaitr)) error(Einuse); if(waserror()) { qunlock(&up->qwaitr); nexterror(); } lock(&up->exl); if(up->nchild == 0 && up->waitq == 0) { unlock(&up->exl); error(Enochild); } unlock(&up->exl); sleep(&up->waitr, haswaitq, up); lock(&up->exl); wq = up->waitq; up->waitq = wq->next; up->nwait--; unlock(&up->exl); qunlock(&up->qwaitr); poperror(); if(w) memmove(w, &wq->w, sizeof(Waitmsg)); cpid = atoi(wq->w.pid); free(wq); return cpid; } Proc* proctab(int i) { return &procalloc.arena[i]; } void dumpaproc(Proc *p) { ulong bss; char *s; if(p == 0) return; bss = 0; if(p->seg[BSEG]) bss = p->seg[BSEG]->top; s = p->psstate; if(s == 0) s = statename[p->state]; print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux\n", p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state], p->time[0], p->time[1], bss, p->qpc); } void procdump(void) { int i; Proc *p; if(up) print("up %lud\n", up->pid); else print("no current process\n"); for(i=0; istate == Dead) continue; dumpaproc(p); prflush(); } } /* * wait till all processes have flushed their mmu * state about segement s */ void procflushseg(Segment *s) { int i, ns, nm, nwait; Proc *p; /* * tell all processes with this * segment to flush their mmu's */ nwait = 0; for(i=0; istate == Dead) continue; for(ns = 0; ns < NSEG; ns++) if(p->seg[ns] == s){ p->newtlb = 1; for(nm = 0; nm < conf.nmach; nm++){ if(MACHP(nm)->proc == p){ MACHP(nm)->flushmmu = 1; nwait++; } } break; } } if(nwait == 0) return; /* * wait for all processors to take a clock interrupt * and flush their mmu's */ for(nm = 0; nm < conf.nmach; nm++) if(MACHP(nm) != m) while(MACHP(nm)->flushmmu) sched(); } void scheddump(void) { Proc *p; Schedq *rq; for(rq = &runq[Nrq-1]; rq >= runq; rq--){ if(rq->head == 0) continue; print("rq%ld:", rq-runq); for(p = rq->head; p; p = p->rnext) print(" %lud(%lud, %lud)", p->pid, m->ticks - p->readytime, MACHP(0)->ticks - p->movetime); print("\n"); delay(150); } print("nrdy %d\n", nrdy); } void kproc(char *name, void (*func)(void *), void *arg) { Proc *p; static Pgrp *kpgrp; p = newproc(); p->psstate = "kproc"; p->procmode = 0644; p->kp = 1; p->fpsave = up->fpsave; p->scallnr = up->scallnr; p->s = up->s; p->nerrlab = 0; p->slash = up->slash; p->dot = up->dot; incref(p->dot); memmove(p->note, up->note, sizeof(p->note)); p->nnote = up->nnote; p->notified = 0; p->lastnote = up->lastnote; p->notify = up->notify; p->ureg = 0; p->dbgreg = 0; p->basepri = PriKproc; p->priority = p->basepri; kprocchild(p, func, arg); strcpy(p->user, eve); if(kpgrp == 0) kpgrp = newpgrp(); p->pgrp = kpgrp; incref(kpgrp); strcpy(p->text, name); p->nchild = 0; p->parent = 0; memset(p->time, 0, sizeof(p->time)); p->time[TReal] = MACHP(0)->ticks; ready(p); /* * since the bss/data segments are now shareable, * any mmu info about this process is now stale * and has to be discarded. */ p->newtlb = 1; flushmmu(); } /* * called splhi() by notify(). See comment in notify for the * reasoning. */ void procctl(Proc *p) { char *state; ulong s; switch(p->procctl) { case Proc_exitbig: spllo(); pexit("Killed: Insufficient physical memory", 1); case Proc_exitme: spllo(); /* pexit has locks in it */ pexit("Killed", 1); case Proc_traceme: if(p->nnote == 0) return; /* No break */ case Proc_stopme: p->procctl = 0; state = p->psstate; p->psstate = "Stopped"; /* free a waiting debugger */ s = spllo(); qlock(&p->debug); if(p->pdbg) { wakeup(&p->pdbg->sleep); p->pdbg = 0; } qunlock(&p->debug); splhi(); p->state = Stopped; sched(); p->psstate = state; splx(s); return; } } #include "errstr.h" void error(char *err) { spllo(); strncpy(up->error, err, ERRLEN); nexterror(); } void nexterror(void) { gotolabel(&up->errlab[--up->nerrlab]); } void exhausted(char *resource) { char buf[ERRLEN]; sprint(buf, "no free %s", resource); iprint("%s\n", buf); error(buf); } void killbig(void) { int i; Segment *s; ulong l, max; Proc *p, *ep, *kp; max = 0; kp = 0; ep = procalloc.arena+conf.nproc; for(p = procalloc.arena; p < ep; p++) { if(p->state == Dead || p->kp) continue; l = 0; for(i=1; iseg[i]; if(s != 0) l += s->top - s->base; } if(l > max) { kp = p; max = l; } } kp->procctl = Proc_exitbig; for(i = 0; i < NSEG; i++) { s = kp->seg[i]; if(s != 0 && canqlock(&s->lk)) { mfreeseg(s, s->base, (s->top - s->base)/BY2PG); qunlock(&s->lk); } } print("%lud: %s killed because no swap configured\n", kp->pid, kp->text); } /* * change ownership to 'new' of all processes owned by 'old'. Used when * eve changes. */ void renameuser(char *old, char *new) { Proc *p, *ep; ep = procalloc.arena+conf.nproc; for(p = procalloc.arena; p < ep; p++) if(strcmp(old, p->user) == 0) memmove(p->user, new, NAMELEN); } /* * time accounting called by clock() splhi'd */ void accounttime(void) { Proc *p; int n; static int nrun; p = m->proc; if(p) { nrun++; p->time[p->insyscall]++; } /* only one processor gets to compute system load averages */ if(m->machno != 0) return; /* calculate decaying load average */ n = nrun; nrun = 0; n = (nrdy+n)*1000; m->load = (m->load*19+n)/20; } static void pidhash(Proc *p) { int h; h = p->pid % nelem(procalloc.ht); lock(&procalloc); p->pidhash = procalloc.ht[h]; procalloc.ht[h] = p; unlock(&procalloc); } static void pidunhash(Proc *p) { int h; Proc **l; h = p->pid % nelem(procalloc.ht); lock(&procalloc); for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash) if(*l == p){ *l = p->pidhash; break; } unlock(&procalloc); } int procindex(int pid) { Proc *p; int h; int s; s = -1; h = pid % nelem(procalloc.ht); lock(&procalloc); for(p = procalloc.ht[h]; p != nil; p = p->pidhash) if(p->pid == pid){ s = p - procalloc.arena; break; } unlock(&procalloc); return s; }