/* * intel 10gbe pcie boot driver * copyright © 2007—2013, coraid, inc. */ #include "u.h" #include "lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "etherif.h" enum{ /* general */ Ctrl = 0x00000/4, /* Device Control */ Status = 0x00008/4, /* Device Status */ Ctrlext = 0x00018/4, /* Extended Device Control */ Esdp = 0x00020/4, /* extended sdp control */ Esodp = 0x00028/4, /* extended od sdp control */ Ledctl = 0x00200/4, /* led control */ Tcptimer = 0x0004c/4, /* tcp timer */ Ecc = 0x110b0/4, /* errata ecc control magic */ /* nvm */ Eec = 0x10010/4, /* eeprom/flash control */ Eerd = 0x10014/4, /* eeprom read */ Fla = 0x1001c/4, /* flash access */ Flop = 0x1013c/4, /* flash opcode */ Grc = 0x10200/4, /* general rx control */ /* interrupt */ Icr = 0x00800/4, /* interrupt cause read */ Ics = 0x00808/4, /* " set */ Ims = 0x00880/4, /* " mask read/set */ Imc = 0x00888/4, /* " mask clear */ Iac = 0x00810/4, /* " auto clear */ Iam = 0x00890/4, /* " auto mask enable */ Itr = 0x00820/4, /* " throttling rate (0-19) */ Ivar = 0x00900/4, /* " vector allocation regs. */ /*msi interrupt */ Msixt = 0x0000/4, /* msix table (bar3) */ Msipba = 0x2000/4, /* msix pending bit array (bar3) */ Pbacl = 0x11068/4, /* pba clear */ Gpie = 0x00898/4, /* general purpose int enable */ /* flow control */ Pfctop = 0x03008/4, /* priority flow ctl type opcode */ Fcttv = 0x03200/4, /* " transmit timer value (0-3) */ Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */ Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */ Rcrtv = 0x032a0/4, /* " refresh value threshold */ Tfcs = 0x0ce00/4, /* " tx status */ /* rx dma */ Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */ Rbah = 0x01004/4, /* " high */ Rdlen = 0x01008/4, /* " length */ Rdh = 0x01010/4, /* " head */ Rdt = 0x01018/4, /* " tail */ Rxdctl = 0x01028/4, /* " control */ Srrctl = 0x02100/4, /* split and replication rx ctl. */ Dcarxctl = 0x02200/4, /* rx dca control */ Rdrxctl = 0x02f00/4, /* rx dma control */ Rxpbsize = 0x03c00/4, /* rx packet buffer size */ Rxctl = 0x03000/4, /* rx control */ Dropen = 0x03d04/4, /* drop enable control */ /* rx */ Rxcsum = 0x05000/4, /* rx checksum control */ Rfctl = 0x04008/4, /* rx filter control */ Mta = 0x05200/4, /* multicast table array (0-127) */ Ral = 0x05400/4, /* rx address low */ Rah = 0x05404/4, Psrtype = 0x05480/4, /* packet split rx type. */ Vfta = 0x0a000/4, /* vlan filter table array. */ Fctrl = 0x05080/4, /* filter control */ Vlnctrl = 0x05088/4, /* vlan control */ Msctctrl = 0x05090/4, /* multicast control */ Mrqc = 0x05818/4, /* multiple rx queues cmd */ Vmdctl = 0x0581c/4, /* vmdq control */ Imir = 0x05a80/4, /* immediate irq rx (0-7) */ Imirext = 0x05aa0/4, /* immediate irq rx ext */ Imirvp = 0x05ac0/4, /* immediate irq vlan priority */ Reta = 0x05c00/4, /* redirection table */ Rssrk = 0x05c80/4, /* rss random key */ /* tx */ Tdbal = 0x06000/4, /* tx desc base low +0x40n */ Tdbah = 0x06004/4, /* " high */ Tdlen = 0x06008/4, /* " len */ Tdh = 0x06010/4, /* " head */ Tdt = 0x06018/4, /* " tail */ Txdctl = 0x06028/4, /* " control */ Tdwbal = 0x06038/4, /* " write-back address low */ Tdwbah = 0x0603c/4, Dtxctl = 0x04a80/4, /* tx dma control !82598 */ Tdcatxctrl = 0x07200/4, /* tx dca register (0-15) */ Tipg = 0x0cb00/4, /* tx inter-packet gap */ Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */ /* mac */ Hlreg0 = 0x04240/4, /* highlander control reg 0 */ Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */ Msca = 0x0425c/4, /* mdi signal cmd & addr */ Msrwd = 0x04260/4, /* mdi single rw data */ Mhadd = 0x04268/4, /* mac addr high & max frame */ Pcss1 = 0x04288/4, /* xgxs status 1 */ Pcss2 = 0x0428c/4, Xpcss = 0x04290/4, /* 10gb-x pcs status */ Serdesc = 0x04298/4, /* serdes control */ Macs = 0x0429c/4, /* fifo control & report */ Autoc = 0x042a0/4, /* autodetect control & status */ Links = 0x042a4/4, /* link status */ Autoc2 = 0x042a8/4, }; enum{ /* Ctrl */ Rst = 1<<26, /* full nic reset */ /* Txdctl */ Ten = 1<<25, /* Dtxctl */ Den = 1<<0, /* Fctrl */ Rfce = 1<<15, /* rcv flow control enable */ Dpf = 1<<13, /* discard pause frames */ Bam = 1<<10, /* broadcast accept mode */ Upe = 1<<9, /* unicast promiscuous */ Mpe = 1<<8, /* multicast promiscuous */ /* Rxdctl */ Pthresh = 0, /* prefresh threshold shift in bits */ Hthresh = 8, /* host buffer minimum threshold " */ Wthresh = 16, /* writeback threshold */ Renable = 1<<25, /* Rxctl */ Rxen = 1<<0, Dmbyps = 1<<1, /* Rdrxctl */ Rdmt½ = 0, Rdmt¼ = 1, Rdmt⅛ = 2, /* Rxcsum */ Ippcse = 1<<12, /* ip payload checksum enable */ /* Eerd */ EEstart = 1<<0, /* Start Read */ EEdone = 1<<1, /* Read done */ /* interrupts */ Irx0 = 1<<0, /* driver defined */ Itx0 = 1<<1, /* driver defined */ Lsc = 1<<20, /* link status change */ Ioc = 1<<31, /* other cause */ /* Links */ Lnkup = 1<<30, Lnkspd8 = 1<<29, Lnkspd9 = 3<<28, /* Hlreg0 */ Txcrcen = 1<<0, Jumboen = 1<<2, /* Ivar */ Ivtx = 1|1<<7, /* transmit interrupt */ Ivrx = 0|1<<7, /* receive interrupt */ }; typedef struct Ctlr Ctlr; typedef struct Ctlrtype Ctlrtype; typedef struct Rd Rd; typedef struct Rbpool Rbpool; typedef struct Stat Stat; typedef struct Td Td; enum { i82598, i82599, x540, Nctlrtype, }; struct Ctlrtype { int type; int mtu; int flag; char *name; }; enum { Fphyoc = 1<<0, /* phy link needs other cause interrupt */ Fsplitivar = 1<<1, /* tx and rx use different ivar entries */ Fphyspd = 1<<2, /* phy speed useful (part supports <10gbe) */ Ftxctl = 1<<3, /* part has txctl register */ }; /* real mtu is 12k. use standard 9k to save memory */ static Ctlrtype cttab[Nctlrtype] = { i82598, 9*1024, Fsplitivar|Fphyoc, "i82598", i82599, 9*1024, Fphyspd|Ftxctl, "i82599", x540, 9*1024, Fphyspd|Ftxctl, "x540", }; /* status */ enum{ Pif = 1<<7, /* past exact filter (sic) */ Ipcs = 1<<6, /* ip checksum calcuated */ L4cs = 1<<5, /* layer 2 */ Tcpcs = 1<<4, /* tcp checksum calcuated */ Vp = 1<<3, /* 802.1q packet matched vet */ Ixsm = 1<<2, /* ignore checksum */ Reop = 1<<1, /* end of packet */ Rdd = 1<<0, /* descriptor done */ }; struct Rd { u32int addr[2]; u16int length; u16int cksum; uchar status; uchar errors; u16int vlan; }; enum{ /* Td cmd */ Rs = 1<<3, Ic = 1<<2, Ifcs = 1<<1, Teop = 1<<0, /* Td status */ Tdd = 1<<0, }; struct Td { u32int addr[2]; u16int length; uchar cso; uchar cmd; uchar status; uchar css; u16int vlan; }; enum{ Factive = 1<<0, Fstarted = 1<<1, }; struct Ctlr { Pcidev *p; u32int *reg; uchar flag; int pool; int nrd, ntd, nrb, rbsz; Lock tlock; uint im; Lock imlock; char *alloc; Rd *rdba; Block **rb; uint rdt, rdfree; uint rdh; Td *tdba; uint tdh, tdt; Block **tb; uchar ra[Eaddrlen]; uchar mta[128]; int type; }; /* tweakable paramaters */ enum{ Nrd = 32, Ntd = 32, Nctlr = 4, }; static Ctlr *ctlrtab[Nctlr]; static Lock rblock[Nctlr]; static Block *rbpool[Nctlr]; static int nctlr; char* cname(Ctlr *c) { return cttab[c->type].name; } static void im(Ctlr *c, int i) { ilock(&c->imlock); c->im |= i; c->reg[Ims] = c->im; iunlock(&c->imlock); } #define Next(x, m) (((x)+1) & (m)) static int cleanup(Ctlr *c, int tdh) { Block *b; uint m, n; m = c->ntd-1; while(c->tdba[n = Next(tdh, m)].status&Tdd){ tdh = n; b = c->tb[tdh]; c->tb[tdh] = 0; freeb(b); c->tdba[tdh].status = 0; } return tdh; } static void transmit(Ether *e) { uint i, m, tdt, tdh; Ctlr *c; Block *b; Td *t; RingBuf *tb; c = e->ctlr; ilock(&c->tlock); tdh = c->tdh = cleanup(c, c->tdh); tdt = c->tdt; m = c->ntd-1; for(i = 0; i<8; i++){ if(Next(tdt, m) == tdh){ im(c, Itx0); break; } tb = e->tb + e->ti; if(tb->owner != Interface) break; b = fromringbuf(e); tb->owner = Host; e->ti = NEXT(e->ti, e->ntb); t = c->tdba+tdt; t->addr[0] = PCIWADDR(b->rp); t->length = BLEN(b); t->cmd = Rs|Ifcs|Teop; c->tb[tdt] = b; tdt = Next(tdt, m); } if(i){ c->tdt = tdt; coherence(); c->reg[Tdt] = tdt; } iunlock(&c->tlock); } static void rxinit(Ctlr *c) { Block *b; int i; c->reg[Rxctl] &= ~Rxen; for(i = 0; inrd; i++){ b = c->rb[i]; c->rb[i] = 0; if(b) freeb(b); } c->rdfree = 0; c->reg[Fctrl] |= Bam; c->reg[Rxcsum] |= Ipcs; c->reg[Srrctl] = (c->rbsz+1023)/1024; c->reg[Mhadd] = c->rbsz<<16; c->reg[Hlreg0] |= Jumboen; c->reg[Rbal] = PCIWADDR(c->rdba); c->reg[Rbah] = 0; c->reg[Rdlen] = c->nrd*sizeof(Rd); c->reg[Rdh] = 0; c->reg[Rdt] = c->rdt = 0; c->reg[Rdrxctl] = Rdmt¼; c->reg[Rxdctl] = 8<reg[Rxctl] |= Rxen|Dmbyps; } static void replenish(Ctlr *c, uint rdh) { uint rdt, m, i; Rd *r; Block *b; m = c->nrd-1; i = 0; for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){ r = c->rdba+rdt; b = allocb(2048); c->rb[rdt] = b; r->addr[0] = PCIWADDR(b->rp); r->addr[1] = 0; /* Pciwaddrh(b->rp); */ r->status = 0; c->rdfree++; i++; } if(i){ coherence(); c->reg[Rdt] = c->rdt = rdt; } } static void rx(Ether *e) { Ctlr *c; Block *b; Rd *r; uint m; c = e->ctlr; m = c->nrd-1; replenish(c, c->rdh); loop: r = c->rdba+c->rdh; if(!(r->status&Rdd)) return; b = c->rb[c->rdh]; c->rb[c->rdh] = 0; b->wp += r->length; // b->lim = b->wp; /* lie like a dog */ toringbuf(e, b->rp, BLEN(b)); freeb(b); c->rdfree--; c->rdh = Next(c->rdh, m); goto loop; } static int detach(Ctlr *c) { int i; c->reg[Imc] = ~0; c->reg[Ctrl] |= Rst; for(i = 0; i < 100; i++){ delay(1); if((c->reg[Ctrl]&Rst) == 0) goto good; } return -1; good: /* errata */ delay(50); c->reg[Ecc] &= ~(1<<21|1<<18|1<<9|1<<6); /* not cleared by reset; kill it manually. */ for(i = 1; i<16; i++) c->reg[Rah] &= ~(1<<31); for(i = 0; i<128; i++) c->reg[Mta+i] = 0; for(i = 1; i<640; i++) c->reg[Vfta+i] = 0; return 0; } static void shutdown(Ether *e) { detach(e->ctlr); } /* ≤ 20ms */ static ushort eeread(Ctlr *c, int i) { c->reg[Eerd] = EEstart|i<<2; while((c->reg[Eerd]&EEdone) == 0) ; return c->reg[Eerd]>>16; } static int eeload(Ctlr *c) { ushort u, v, p, l, i, j; if((eeread(c, 0)&0xc0) != 0x40) return -1; u = 0; for(i = 0; i < 0x40; i++) u += eeread(c, i); for(i = 3; i < 0xf; i++){ if(c->type == x540 && (i == 4 || i == 5)) continue; p = eeread(c, i); l = eeread(c, p++); if((int)p+l+1 > 0xffff) continue; for(j = p; j < p+l; j++) u += eeread(c, j); } if(u != 0xbaba) return -1; if(c->reg[Status]&1<<3) u = eeread(c, 10); else u = eeread(c, 9); u++; for(i = 0; ira[i++] = v; c->ra[i++] = v>>8; } c->ra[5] += (c->reg[Status]&0xc)>>2; return 0; } static int reset(Ctlr *c) { uchar *p; int i; if(detach(c)){ print("%s: reset timeout\n", cname(c)); return -1; } if(eeload(c)){ print("%s: eeprom failure\n", cname(c)); return -1; } p = c->ra; c->reg[Ral] = p[3]<<24|p[2]<<16|p[1]<<8|p[0]; c->reg[Rah] = p[5]<<8|p[4]|1<<31; c->reg[Ctrlext] |= 1<<16; /* make some guesses for flow control */ c->reg[Fcrtl] = 0x10000|1<<31; c->reg[Fcrth] = 0x40000|1<<31; c->reg[Rcrtv] = 0x6000; /* configure interrupt mapping (don't ask) */ if(cttab[c->type].flag & Fsplitivar){ c->reg[Ivar+0] = Ivrx; c->reg[Ivar+64/4] = Ivtx; // c->reg[Ivar+97/4] = (2|1<<7)<<8*(97%4); }else c->reg[Ivar+0] = Ivtx<<8 | Ivrx; /* interrupt throttling goes here. */ for(i = Itr; ireg[i] = 128; /* ¼µs intervals */ c->reg[Itr+Itx0] = 256; return 0; } static void txinit(Ctlr *c) { Block *b; int i; c->reg[Txdctl] = 16<ntd; i++){ b = c->tb[i]; c->tb[i] = 0; if(b) freeb(b); } memset(c->tdba, 0, c->ntd*sizeof(Td)); c->reg[Tdbal] = PCIWADDR(c->tdba); c->reg[Tdbah] = 0; c->reg[Tdlen] = c->ntd*sizeof(Td); c->reg[Tdh] = 0; c->reg[Tdt] = 0; c->tdh = c->ntd-1; c->tdt = 0; if(cttab[c->type].flag & Ftxctl) c->reg[Dtxctl] |= Den; c->reg[Txdctl] |= Ten; } static void attach(Ether *e) { Ctlr *c; int t; c = e->ctlr; c->nrd = Nrd; c->ntd = Ntd; t = c->nrd*sizeof *c->rdba+255; t += c->ntd*sizeof *c->tdba+255; t += (c->ntd+c->nrd)*sizeof(Block*); c->alloc = malloc(t); c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256); c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba+c->nrd), 256); c->rb = (Block**)(c->tdba+c->ntd); c->tb = (Block**)(c->rb+c->nrd); rxinit(c); txinit(c); c->rdh = 0; replenish(c, c->rdh); im(c, Irx0); } static void interrupt(Ureg*, void *v) { Ether *e; Ctlr *c; int icr, im; e = v; c = e->ctlr; ilock(&c->imlock); c->reg[Imc] = ~0; im = c->im; while(icr = c->reg[Icr]&c->im){ if(icr&Lsc){ } if(icr&Irx0) rx(e); if(icr&Itx0) transmit(e); } c->reg[Ims] = c->im = im; iunlock(&c->imlock); } static void hbafixup(Pcidev *p) { uint i; i = pcicfgr32(p, PciSVID); if((i & 0xffff) == 0x1b52 && p->did == 1) p->did = i>>16; } static void scan(void) { char *name; ulong io, type, mem; Ctlr *c; Pcidev *p; p = 0; while(p = pcimatch(p, 0x8086, 0)){ hbafixup(p); switch(p->did){ case 0x10c6: /* 82598 af dual port */ case 0x10c7: /* 82598 af single port */ case 0x10b6: /* 82598 backplane */ case 0x10dd: /* 82598 at cx4 */ case 0x10ec: /* 82598 at cx4 */ type = i82598; break; case 0x10f7: /* 82599 kx/kx4 */ case 0x10f8: /* 82599 backplane */ case 0x10f9: /* 82599 cx4 */ case 0x10fb: /* 82599 sfi/sfp+ */ case 0x10fc: /* 82599 xaui */ case 0x151c: /* 82599 base t kx/kx4 “niantic” */ type = i82599; break; case 0x1528: /* x540-at2 “twinville” */ type = x540; break; default: continue; } name = cttab[type].name; if(nctlr == nelem(ctlrtab)){ print("%s: too many controllers\n", name); return; } io = p->mem[0].bar&~0xf; mem = upamalloc(io, p->mem[0].size, 0); if(mem == 0){ print("%s: cant map %#p\n", name, p->mem[0].bar); continue; } c = malloc(sizeof *c); c->p = p; c->reg = (u32int*)KADDR(mem); c->rbsz = 2048; c->type = type; if(reset(c)){ print("%s: cant reset\n", name); free(c); // vunmap(mem, p->mem[0].size); continue; } pcisetbme(p); c->pool = nctlr; ctlrtab[nctlr++] = c; } } int i82598pnp(Ether *e) { Ctlr *c; int i; static int once; if(once == 0){ scan(); once = 1; } for(i = 0; iflag&Factive) continue; if(e->port == 0 || e->port == (ulong)c->reg) goto found; } return -1; found: c->flag |= Factive; e->ctlr = c; e->port = (uintptr)c->reg; e->irq = c->p->intl; e->tbdf = c->p->tbdf; e->mbps = 10000; memmove(e->ea, c->ra, Eaddrlen); e->attach = attach; e->interrupt = interrupt; e->transmit = transmit; e->detach = shutdown; return 0; }