/* * Copyright © 1999 Vita Nuova Limited * * this doesn't attempt to implement 387 floating-point properties * that aren't visible in the Inferno environment. in particular, * all arithmetic is done in double precision, not extended precision. * furthermore, the FP trap status isn't updated. */ #ifdef TEST #include #include #include #include "fpi.h" #include "tst.h" #else #include #include "ureg.h" #include "fpi.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #endif #define fabs Fabs typedef struct FPI FPI; struct FPI { char* name; void (*f)(Ureg*, int, void*, Internal*, Internal*); int dstf; }; enum { RndNearest = 0, RndDown, RndUp, Rnd0, C0 = 1<<8, C1 = 1<<9, C2 = 1<<10, C3 = 1<<14, }; int fpemudebug = 0; static Internal fpconst[7] = { /* indexed by op&7 */ /* s, e, l, h */ {0, 0x3FF, 0x00000000, 0x08000000}, /* 1 */ {0, 0x400, 0x0BCD1B8A, 0x0D49A784}, /* l2t */ {0, 0x3FF, 0x095C17F0, 0x0B8AA3B2}, /* l2e */ {0, 0x400, 0x022168C2, 0x0C90FDAA}, /* pi */ {0, 0x3FD, 0x04FBCFF7, 0x09A209A8}, /* lg2 */ {0, 0x3FE, 0x07D1CF79, 0x0B17217F}, /* ln2 */ {0, 0x1, 0x00000000, 0x00000000}, /* z */ }; static Internal *fpstk(int i); #define ST(x) (*fpstk((x))) #define I387 (up->env->fpu) /* BUG: check fetch (not worthwhile in Inferno) */ #define getubyte(a) (*(uchar*)(a)) #define getuword(a) (*(ushort*)(a)) #define getulong(a) (*(ulong*)(a)) static void popfp(void) { ushort *s; s = &I387.status; *s = (*s & ~0x3800) | ((*s + 0x0800) & 0x3800); } static void pushfp(void) { ushort *s; s = &I387.status; *s = (*s & ~0x3800) | ((*s + 0x3800) & 0x3800); } static Internal * fpstk(int i) { return (Internal*)I387.istack[(i+(I387.status>>11))&7]; } static void fldc(Ureg*, int op, void*, Internal*, Internal *d) { *d = fpconst[op&7]; } static void fabs(Ureg*, int, void*, Internal*, Internal *d) { d->s = 0; } static void fchs(Ureg*, int, void*, Internal*, Internal *d) { d->s ^= 1; } static void fadd(Ureg*, int, void*, Internal *s, Internal *d) { Internal l, r; l = *s; r = *d; (l.s == r.s? fpiadd: fpisub)(&l, &r, d); } static void fsub(Ureg*, int, void*, Internal *s, Internal *d) { Internal l, r; l = *s; r = *d; l.s ^= 1; (l.s == r.s? fpiadd: fpisub)(&l, &r, d); } static void fsubr(Ureg*, int, void*, Internal *s, Internal *d) { Internal l, r; l = *s; r = *d; r.s ^= 1; (l.s == r.s? fpiadd: fpisub)(&r, &l, d); } static void fmul(Ureg*, int, void*, Internal *s, Internal *d) { Internal l, r; l = *s; r = *d; fpimul(&l, &r, d); } static void fdiv(Ureg*, int, void*, Internal *s, Internal *d) { Internal l, r; l = *s; r = *d; fpidiv(&l, &r, d); } static void fdivr(Ureg*, int, void*, Internal *s, Internal *d) { Internal l, r; l = *s; r = *d; fpidiv(&r, &l, d); } static void fcom(Ureg*, int, void*, Internal *s, Internal *d) { int i; ushort *p; p = &I387.status; if(IsWeird(s) || IsWeird(d)){ *p |= C0|C2|C3; /* BUG: should trap if not masked */ return; } *p &= ~(C0|C2|C3); i = fpicmp(d, s); if(i < 0) *p |= C0; else if(i == 0) *p |= C3; } static void fpush(Ureg*, int op, void*, Internal*, Internal*) { Internal *p; p = &ST(op & 7); pushfp(); ST(0) = *p; } static void fmov(Ureg*, int, void*, Internal *s, Internal *d) { *d = *s; } static void fmovr(Ureg*, int, void*, Internal *s, Internal *d) { *s = *d; } static void fxch(Ureg*, int, void*, Internal *s, Internal *d) { Internal t; t = *s; *s = *d; *d = t; } static void frstor(Ureg*, int, void *s, Internal*, Internal*) { validaddr(s, 108, 0); memmove(&I387, s, 108); } static void fsave(Ureg*, int, void *d, Internal*, Internal*) { validaddr(d, 108, 1); memmove(d, &I387, 108); I387.control = 0x037F; I387.status = 0; I387.tag = 0; } static void fstsw(Ureg*, int, void *d, Internal*, Internal*) { validaddr(d, 2, 1); *(short*)d = I387.status; } static void fldenv(Ureg*, int, void *s, Internal*, Internal*) { validaddr(s, 28, 0); memmove(&I387, s, 28); } static void fldcw(Ureg*, int, void *s, Internal*, Internal*) { validaddr(s, 2, 0); I387.control = *(short*)s; } static void fstenv(Ureg*, int, void *d, Internal*, Internal*) { validaddr(d, 4*7, 1); memmove(d, &I387, 4*7); } static void fstcw(Ureg*, int, void *d, Internal*, Internal*) { validaddr(d, 2, 1); *(short*)d = I387.control; } static void fincstp(Ureg*, int, void*, Internal*, Internal*) { popfp(); } static void fdecstp(Ureg*, int, void*, Internal*, Internal*) { pushfp(); } static void fscale(Ureg*, int, void*, Internal *s, Internal *d) { Word w; fpii2w(&w, s); /* should truncate towards zero ... */ d->e += w; } static void fstswax(Ureg *ur, int, void*, Internal*, Internal*) { ur->ax = (ur->ax & ~0xFFFF) | (I387.status & 0xFFFF); } static void ftst(Ureg*, int, void*, Internal*, Internal *d) { ushort *p; p = &I387.status; if(IsWeird(d)){ *p |= C0|C2|C3; return; } *p &= ~(C0|C2|C3); fpinormalise(d); if(IsZero(d)) *p |= C3; else if(d->s) *p |=C0; } static void frndint(Ureg*, int, void*, Internal*, Internal *d) { fpiround(d); /* BUG: doesn't look at rounding mode */ } static void fnop(Ureg*, int, void*, Internal*, Internal*) { } enum { Fpop1= 1<<0, Fpop2 = 1<<1, Fload = 1<<2, }; /* * %e - effective address - Mod R/M value * %f - floating point register F0-F7 - from Mod R/M register */ static void fload(Ureg*, int, void*, Internal*, Internal*); static void fstore(Ureg*, int, void*, Internal*, Internal*); #define X(a,b) (((a)<<2)|(b)) static FPI optab1[4][4] = { /* normal mod r/m operand */ [0] { [0] {"FLDENV %e", fldenv, 0}, [1] {"FLDCW %e", fldcw, 0}, [2] {"FSTENV %e", fstenv, 0}, [3] {"FSTCW %e", fstcw, 0}, }, [1] { [1] {"FMOVX %e,F0", nil, Fload}, [3] {"FMOVXP F0,%e", nil, Fpop1}, }, [2] { [0] {"FRSTOR %e", frstor, 0}, [2] {"FSAVE %e", fsave, 0}, [3] {"FSTSW %e", fstsw, 0}, }, [3] { [0] {"FMOVB %e", nil, 0}, [1] {"FMOVV %e,F0", nil, Fload}, [2] {"FMOVBP %e", nil, Fpop1}, [3] {"FMOVVP F0,%e", nil, Fpop1}, }, }; #undef X static FPI optab2a[1<<3] = { /* A=0 */ [0] {"FADDx %e,F0", fadd, 0}, [1] {"FMULx %e,F0", fmul, 0}, [2] {"FCOMx %e,F0", fcom, 0}, [3] {"FCOMxP %e,F0", fcom, Fpop1}, [4] {"FSUBx %e,F0", fsub, 0}, [5] {"FSUBRx %e,F0", fsubr, 0}, /* ?? */ [6] {"FDIVx %e,F0", fdiv, 0}, [7] {"FDIVRx %e,F0", fdivr, 0}, /* ?? */ }; static FPI optab2b[1<<2] = { /* A=1, B=0,2,3 */ [0] {"FMOVx %e,F0", fload, Fload}, [2] {"FMOVx F0,%e", fstore, 0}, [3] {"FMOVxP F0,%e", fstore, Fpop1}, }; #define X(d,P,B) ((d<<4)|(P<<3)|B) static FPI optab3a[1<<5] = { /* A=0 */ [X(0,0,0)] {"FADDD %f,F0", fadd, 0}, [X(1,0,0)] {"FADDD F0,%f", fadd, 0}, [X(1,1,0)] {"FADDDP F0,%f", fadd, Fpop1}, [X(0,0,1)] {"FMULD %f,F0", fmul, 0}, [X(1,0,1)] {"FMULD F0,%f", fmul, 0}, [X(1,1,1)] {"FMULDP F0,%f", fmul, Fpop1}, [X(0,0,2)] {"FCOMD %f,F0", fcom, 0}, [X(0,0,3)] {"FCOMDP %f,F0", fcom, Fpop1}, [X(1,1,3)] {"FCOMDPP", fcom, Fpop1|Fpop2}, [X(0,0,4)] {"FSUBD %f,F0", fsub, 0}, [X(1,0,4)] {"FSUBRD F0,%f", fsubr, 0}, [X(1,1,4)] {"FSUBRDP F0,%f", fsubr, Fpop1}, [X(0,0,5)] {"FSUBRD %f,F0", fsubr, 0}, [X(1,0,5)] {"FSUBD F0,%f", fsub, 0}, [X(1,1,5)] {"FSUBDP F0,%f", fsub, Fpop1}, [X(0,1,5)] {"FUCOMPP", fcom, Fpop1|Fpop2}, [X(0,0,6)] {"FDIVD %f,F0", fdiv, 0}, [X(1,0,6)] {"FDIVRD F0,%f", fdivr, 0}, [X(1,1,6)] {"FDIVRDP F0,%f", fdivr, Fpop1}, [X(0,0,7)] {"FDIVRD %f,F0", fdivr, 0}, [X(1,0,7)] {"FDIVD F0,%f", fdiv, 0}, [X(1,1,7)] {"FDIVDP F0,%f", fdiv, Fpop1}, }; static FPI optab3b[1<<5] = { /* A=1 */ [X(0,0,0)] {"FMOVD %f,F0", fmov, Fload}, [X(0,0,1)] {"FXCHD %f,F0", fxch, 0}, [X(0,0,2)] {"FNOP", fnop, 0}, /* F0 only */ [X(1,0,0)] {"FFREED %f", fnop, 0}, [X(1,0,2)] {"FMOVD F0,%f", fmovr, 0}, [X(1,0,3)] {"FMOVDP F0,%f", fmovr, Fpop1}, [X(1,1,4)] {"FSTSW AX", fstswax, 0}, [X(1,0,4)] {"FUCOMD %f,F0", fcom, 0}, [X(1,0,5)] {"FUCOMDP %f,F0", fcom, Fpop1}, }; #undef X static FPI optab4[1<<6] = { [0x00] {"FCHS", fchs, 0}, [0x01] {"FABS", fabs, 0}, [0x04] {"FTST", ftst, 0}, [0x05] {"FXAM", nil, 0}, [0x08] {"FLD1", fldc, Fload}, [0x09] {"FLDL2T", fldc, Fload}, [0x0a] {"FLDL2E", fldc, Fload}, [0x0b] {"FLDPI", fldc, Fload}, [0x0c] {"FLDLG2", fldc, Fload}, [0x0d] {"FLDLN2", fldc, Fload}, [0x0e] {"FLDZ", fldc, Fload}, [0x10] {"F2XM1", nil, 0}, [0x11] {"FYL2X", nil, 0}, [0x12] {"FPTAN", nil, 0}, [0x13] {"FPATAN", nil, 0}, [0x14] {"FXTRACT", nil, 0}, [0x15] {"FPREM1", nil, 0}, [0x16] {"FDECSTP", fdecstp, 0}, [0x17] {"FINCSTP", fincstp, 0}, [0x18] {"FPREM", nil, 0}, [0x19] {"FYL2XP1", nil, 0}, [0x1a] {"FSQRT", nil, 0}, [0x1b] {"FSINCOS", nil, 0}, [0x1c] {"FRNDINT", frndint, 0}, [0x1d] {"FSCALE", fscale, 0}, [0x1e] {"FSIN", nil, 0}, [0x1f] {"FCOS", nil, 0}, }; static void loadr32(void *s, Internal *d) { validaddr(s, 4, 0); fpis2i(d, s); } static void loadi32(void *s, Internal *d) { validaddr(s, 4, 0); fpiw2i(d, s); } static void loadr64(void *s, Internal *d) { validaddr(s, 8, 0); fpid2i(d, s); } static void loadi16(void *s, Internal *d) { Word w; validaddr(s, 2, 0); w = *(short*)s; fpiw2i(d, &w); } static void (*loadf[4])(void*, Internal*) ={ loadr32, loadi32, loadr64, loadi16 }; static void storer32(Internal s, void *d) { validaddr(d, 4, 1); fpii2s(d, &s); } static void storei32(Internal s, void *d) { validaddr(d, 4, 1); fpii2w(d, &s); } static void storer64(Internal s, void *d) { validaddr(d, 8, 1); fpii2d(d, &s); } static void storei16(Internal s, void *d) { Word w; validaddr(d, 2, 1); fpii2w(&w, &s); if((short)w != w) ; /* overflow */ *(short*)d = w; } static void (*storef[4])(Internal, void*) ={ storer32, storei32, storer64, storei16 }; static void fload(Ureg*, int op, void *mem, Internal*, Internal *d) { (*loadf[(op>>9)&3])(mem, d); } static void fstore(Ureg*, int op, void *mem, Internal *s, Internal*) { (*storef[(op>>9)&3])(*s, mem); } #define REG(x) (*(ulong*)(((char*)ur)+roff[(x)])) static int roff[] = { offsetof(Ureg, ax), offsetof(Ureg, cx), offsetof(Ureg, dx), offsetof(Ureg, bx), offsetof(Ureg, ecode), /* ksp */ offsetof(Ureg, bp), offsetof(Ureg, si), offsetof(Ureg, di), }; static long getdisp(Ureg *ur, int mod, int rm) { uchar c; long disp; if(mod > 2) return 0; disp = 0; if(mod == 1) { c = getubyte(ur->pc++); if(c&0x80) disp = c|(~0<<8); else disp = c; } else if(mod == 2 || rm == 5) { disp = getulong(ur->pc); ur->pc += 4; } if(mod || rm != 5) disp += REG(rm); /* base */ return disp; } static ulong modrm(Ureg *ur, uchar c) { uchar rm, mod; int reg; ulong base; mod = (c>>6)&3; rm = c&7; if(mod == 3) /* register */ error("sys: fpemu: invalid addr mode"); /* no 16-bit mode */ if(rm == 4) { /* scummy sib byte */ c = getubyte(ur->pc++); reg = (c>>3)&0x07; /* index */ base = getdisp(ur, mod, c&7); if(reg != 4) base += (REG(reg) << (c>>6)); /* index */ if(fpemudebug>1) print("ur=#%lux sib=#%x reg=%d mod=%d base=%d basev=#%lux sp=%lux\n", ur, c, reg, mod, c&7, base, ur->usp); return base; } if(rm == 5 && mod == 0){ ur->pc += 4; return getulong(ur->pc-4); } return getdisp(ur, mod, rm); } static void * ea(Ureg *ur, uchar op) { ulong addr; addr = modrm(ur, op); I387.operand = addr; if(fpemudebug>1) print("EA=#%lux\n", addr); return (void*)addr; } void fpi387(Ureg *ur) { int op, i; ulong pc; FPenv *ufp; FPI *fp; Internal tmp, *s, *d; void *mem; char buf[60]; ur->ecode = (ulong)&ur->sp; /* BUG: TEMPORARY compensation for incorrect Ureg for kernel mode */ ufp = &up->env->fpu; /* because all the state is in Osenv, it need not be saved/restored */ if(ufp->fpistate != FPACTIVE) { ufp->fpistate = FPACTIVE; ufp->control = 0x037f; ufp->status = 0; ufp->tag = 0; ufp->oselector = 0x17; } while((op = getubyte(ur->pc)) >= 0xd8 && op <= 0xdf || op == 0x9B){ if(op == 0x9B){ /* WAIT */ ur->pc++; continue; } if(ufp->control & ufp->status & 0x3F) ufp->status |= 0x8000; else ufp->status &= 0x7FFF; pc = ur->pc; op = (op<<8) | getubyte(pc+1); ufp->selector = ur->cs; ufp->r4 = op-0xD800; ur->pc += 2; mem = nil; s = nil; d = nil; /* decode op, following table 10.2.4 in i486 handbook */ i = op & 0xFFE0; if(i == 0xD9E0){ fp = &optab4[op&0x1F]; s = &ST(0); if(fp->dstf & Fload) pushfp(); d = &ST(0); } else if(i == 0xDBE0){ i = op & 0x1F; if(i == 2){ /* FCLEX */ ufp->status &= 0x7f00; continue; } else if(i == 3){ /* FINIT */ ufp->control = 0x037f; ufp->status = 0; ufp->tag = 0; continue; } fp = nil; } else if((op & 0xF8C0) == 0xD8C0){ i = ((op>>6)&030)|((op>>3)&7); if(op & (1<<8)){ fp = &optab3b[i]; s = &ST(op&7); if(fp->dstf & Fload) pushfp(); d = &ST(0); } else { fp = &optab3a[i]; i = op & 7; if(op & (1<<10)){ s = &ST(0); d = &ST(i); }else{ s = &ST(i); d = &ST(0); } } } else if((op & 0xF920) == 0xD920){ mem = ea(ur, op&0xFF); fp = &optab1[(op>>9)&3][(op>>3)&3]; } else { mem = ea(ur, op&0xFF); if(op & (1<<8)){ /* load/store */ fp = &optab2b[(op>>3)&7]; if(fp->dstf & Fload){ pushfp(); d = &ST(0); } else s = &ST(0); } else { /* mem OP reg */ fp = &optab2a[(op>>3)&7]; (*loadf[(op>>9)&3])(mem, &tmp); s = &tmp; d = &ST(0); } } if(fp == nil || fp->f == nil){ if(fp == nil || fp->name == nil) snprint(buf, sizeof(buf), "sys: fp: pc=%lux invalid fp 0x%.4x", pc, op); else snprint(buf, sizeof(buf), "sys: fp: pc=%lux unimp fp 0x%.4x (%s)", pc, op, fp->name); error(buf); } if(fpemudebug) print("%8.8lux %.4x %s\n", pc, op, fp->name); (*fp->f)(ur, op, mem, s, d); if(fp->dstf & Fpop1){ popfp(); if(fp->dstf & Fpop2) popfp(); } if(anyhigher()) sched(); } }