#include "all.h" #include "mem.h" #include "io.h" #include "ureg.h" /* * task state segment. Plan 9 ignores all the task switching goo and just * uses the tss for esp0 and ss0 on gate's into the kernel, interrupts, * and exceptions. The rest is completely ignored. * * This means that we only need one tss in the whole system. */ typedef struct Tss Tss; struct Tss { ulong backlink; /* unused */ ulong sp0; /* pl0 stack pointer */ ulong ss0; /* pl0 stack selector */ ulong sp1; /* pl1 stack pointer */ ulong ss1; /* pl1 stack selector */ ulong sp2; /* pl2 stack pointer */ ulong ss2; /* pl2 stack selector */ ulong cr3; /* page table descriptor */ ulong eip; /* instruction pointer */ ulong eflags; /* processor flags */ ulong eax; /* general (hah?) registers */ ulong ecx; ulong edx; ulong ebx; ulong esp; ulong ebp; ulong esi; ulong edi; ulong es; /* segment selectors */ ulong cs; ulong ss; ulong ds; ulong fs; ulong gs; ulong ldt; /* local descriptor table */ ulong iomap; /* io map base */ }; Tss tss; /* * segment descriptor initializers */ //#define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW } #define DATASEGM(p) { 1 , SEGG|SEGB|(0<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW|SEGE } #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } #define CALLGATE(s,o,p) { ((o)&0xFFFF)|((s)<<16), (o)&0xFFFF0000|SEGP|SEGPL(p)|SEGCG } #define D16SEGM(p) { 0xFFFF, (0x0<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW } #define E16SEGM(p) { 0xFFFF, (0x0<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\ ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP } /* * global descriptor table describing all segments */ Segdesc gdt[] = { [NULLSEG] { 0, 0}, /* null descriptor */ [KDSEG] DATASEGM(0), /* kernel data/stack */ [KESEG] EXECSEGM(0), /* kernel code */ [UDSEG] DATASEGM(3), /* user data/stack */ [UESEG] EXECSEGM(3), /* user code */ [TSSSEG] TSSSEGM(0,0), /* tss segment */ }; static struct { ulong va; ulong pa; } ktoppg; /* prototype top level page table * containing kernel mappings */ static ulong *kpt; /* 2nd level page tables for kernel mem */ #define ROUNDUP(s,v) (((s)+(v-1))&~(v-1)) /* * offset of virtual address into * top level page table */ #define TOPOFF(v) (((ulong)(v))>>(2*PGSHIFT-2)) /* * offset of virtual address into * bottom level page table */ #define BTMOFF(v) ((((ulong)(v))>>(PGSHIFT))&(WD2PG-1)) /* * Change current page table and the stack to use for exceptions * (traps & interrupts). The exception stack comes from the tss. * Since we use only one tss, (we hope) there's no need for a * puttr(). */ static void taskswitch(ulong pagetbl, ulong stack) { tss.ss0 = KDSEL; tss.sp0 = stack; tss.ss1 = KDSEL; tss.sp1 = stack; tss.ss2 = KDSEL; tss.sp2 = stack; tss.cr3 = pagetbl; putcr3(pagetbl); } /* * Create a prototype page map that maps all of memory into * kernel (KZERO) space. This is the default map. It is used * whenever the processor is not running a process or whenever running * a process which does not yet have its own map. */ void mmuinit(void) { ulong i, nkpt, npage, x, y, *top, nbytes, pgsz, flag; /* * set up the global descriptor table. we make the tss entry here * since it requires arithmetic on an address and hence cannot * be a compile or link time constant. */ x = (ulong)&tss; gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss); gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP; putgdt(gdt, sizeof gdt); /* * set up system page tables. * map all of physical memory to start at KZERO. * leave a map entry for a user area. */ /* * allocate top level table */ top = ialloc(BY2PG, BY2PG); ktoppg.va = (ulong)top; ktoppg.pa = PADDR(ktoppg.va); flag = PTEVALID|PTEKERNEL|PTEWRITE; pgsz = BY2PG; if(m->cpuiddx & 0x08){ putcr4(getcr4()|0x10); pgsz = 4*MB; flag |= PTESIZE; } /* map all memory to KZERO */ npage = mconf.topofmem/pgsz; if(pgsz == BY2PG){ nbytes = PGROUND(npage*BY2WD); /* words of page map */ nkpt = nbytes/BY2PG; /* pages of page map */ kpt = ialloc(nbytes, BY2PG); for(i = 0; i < npage; i++) kpt[i] = (0+i*pgsz)|flag; y = PADDR((ulong)kpt); } else{ nkpt = npage; y = 0; } x = TOPOFF(KZERO); for(i = 0; i < nkpt; i++) top[x+i] = (y+i*pgsz)|flag; /* * set up the task segment */ memset(&tss, 0, sizeof(tss)); taskswitch(ktoppg.pa, pgsz + (ulong)m); puttr(TSSSEL);/**/ } /* * used to map a page into 4 meg - BY2PG for confinit(). tpt is the temporary * page table set up by l.s. */ enum{ Pteoff = 4*MB-BY2PG, }; ulong* mapaddr(ulong paddr) { ulong base; ulong off; static ulong *pte, top; extern ulong tpt[]; if(pte == 0){ top = getcr3(); pte = (ulong*)KADDR(top-BY2PG)+(Pteoff>>PGSHIFT); } off = paddr&(BY2PG-1); base = paddr-off; *pte = base|PTEVALID|PTEKERNEL|PTEWRITE; /**/ putcr3(top); return (ulong*)(KZERO+Pteoff+off); } #define PDX(va) ((((ulong)(va))>>22) & 0x03FF) #define PTX(va) ((((ulong)(va))>>12) & 0x03FF) #define PPN(x) ((x)&~(BY2PG-1)) ulong* mmuwalk(ulong* pdb, ulong va, int level, int create) { ulong pa, *table; /* * Walk the page-table pointed to by pdb and return a pointer * to the entry for virtual address va at the requested level. * If the entry is invalid and create isn't requested then bail * out early. Otherwise, for the 2nd level walk, allocate a new * page-table page and register it in the 1st level. */ //print("mmuwalk(%p, %p, %d, %d)\n", pdb, va, level, create); table = &pdb[PDX(va)]; if(!(*table & PTEVALID) && create == 0){ print("pte not valid\n"); return 0; } switch(level){ default: return 0; case 1: return table; case 2: if(*table & PTESIZE) panic("mmuwalk2: va 0x%ux entry 0x%ux\n", va, *table); if(!(*table & PTEVALID)){ pa = PADDR(ialloc(BY2PG, BY2PG)); *table = pa|PTEWRITE|PTEVALID; } table = KADDR(PPN(*table)); //print(" table -> %p %p\n", table, &table[PTX(va)]); return &table[PTX(va)]; } } #define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1)) enum{ WD2PG = 4096/4, BY2XPG = 4096*1024, /* bytes per big page */ VPTSIZE = BY2XPG, KMAPSIZE = BY2XPG, VMAPSIZE = 0x10000000-VPTSIZE-KMAPSIZE, VPT = KZERO-VPTSIZE, KMAP = VPT-KMAPSIZE, VMAP = KMAP-VMAPSIZE, }; static Lock vmaplock; static int findhole(ulong *a, int n, int count) { int have, i; have = 0; for(i=0; i= count) return i+1 - have; } return -1; } /* * Look for free space in the vmap. */ static ulong vmapalloc(ulong size) { int i, n, o; ulong *vpdb, *pdb; int vpdbsize; pdb = (ulong*)ktoppg.va; vpdb = &pdb[PDX(VMAP)]; vpdbsize = VMAPSIZE/(4*MB); if(size >= 4*MB){ n = (size+4*MB-1) / (4*MB); if((o = findhole(vpdb, vpdbsize, n)) != -1) return VMAP + o*4*MB; return 0; } n = (size+BY2PG-1) / BY2PG; for(i=0; icpuiddx & 0x08) && (getcr4() & 0x10)) pse = 1; else pse = 0; for(off=0; off= 4MB and processor can do it. */ if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){ *table = (pa+off)|flag|PTESIZE|PTEVALID; pgsz = 4*MB; }else{ pte = mmuwalk(pdb, va+off, 2, 1); if(*pte&PTEVALID) panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux", va+off, pa+off, *pte); *pte = (pa+off)|flag|PTEVALID; pgsz = BY2PG; } } return 0; } void* vmap(ulong pa, int size) { int osize; ulong o, va, *pdb; /* * might be asking for less than a page. */ osize = size; o = pa & (BY2PG-1); pa -= o; size += o; size = ROUND(size, BY2PG); if(pa == 0){ print("vmap pa=0 pc=%#.8lux\n", getcallerpc(&pa)); return nil; } ilock(&vmaplock); pdb = (ulong*)ktoppg.va; if((va = vmapalloc(size)) == 0 || pdbmap(pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){ iunlock(&vmaplock); return 0; } iunlock(&vmaplock); USED(osize); // print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o); return (void*)(va + o); } ulong upamalloc(ulong pa, int size, int) { return (ulong)vmap(pa, size); }