diff -uprN vblade-15/aoe.c vblade-15-aio/aoe.c --- vblade-15/aoe.c 2008-03-07 20:22:16.000000000 +0000 +++ vblade-15-aio/aoe.c 2008-04-19 22:31:21.000000000 +0100 @@ -8,6 +8,9 @@ #include #include #include +#include +#include +#include #include "dat.h" #include "fns.h" @@ -22,6 +25,11 @@ char config[Nconfig]; int nconfig = 0; int maxscnt = 2; char *ifname; +int queuepipe[2]; +int pktlen[Nplaces], pending[Nplaces]; +Ata *pkt[Nplaces]; +Ataregs regs[Nplaces]; +struct aiocb aiocb[Nplaces]; void aoead(int fd) // advertise the virtual blade @@ -78,32 +86,52 @@ getlba(uchar *p) } int -aoeata(Ata *p, int pktlen) // do ATA reqeust +aoeata(int place) // do ATA reqeust { - Ataregs r; - int len = 60; int n; + int len = 60; // minimum ethernet packet size - r.lba = getlba(p->lba); - r.sectors = p->sectors; - r.feature = p->err; - r.cmd = p->cmd; - if (atacmd(&r, (uchar *)(p+1), maxscnt*512, pktlen - sizeof(*p)) < 0) { - p->h.flags |= Error; - p->h.error = BadArg; + regs[place].lba = getlba(pkt[place]->lba); + regs[place].sectors = pkt[place]->sectors; + regs[place].feature = pkt[place]->err; + regs[place].cmd = pkt[place]->cmd; + n = atacmd(regs + place, (uchar *)(pkt[place] + 1), maxscnt*512, + pktlen[place] - sizeof(Ata), aiocb + place); + if (n < 0) { + pkt[place]->h.flags |= Error; + pkt[place]->h.error = BadArg; return len; + } else if (n > 0) { + pending[place] = 1; + return 0; + } + if (!(pkt[place]->aflag & Write) && (n = pkt[place]->sectors)) { + n -= regs[place].sectors; + len = sizeof (Ata) + (n*512); } - if (!(p->aflag & Write)) - if ((n = p->sectors)) { - n -= r.sectors; + pkt[place]->sectors = regs[place].sectors; + pkt[place]->err = regs[place].err; + pkt[place]->cmd = regs[place].status; + return len; +} + +int aoeatacomplete(int place, int pktlen) +{ + int n; + int len = 60; // minimum ethernet packet size + atacmdcomplete(regs + place, aiocb + place); + if (!(pkt[place]->aflag & Write) && (n = pkt[place]->sectors)) { + n -= regs[place].sectors; len = sizeof (Ata) + (n*512); } - p->sectors = r.sectors; - p->err = r.err; - p->cmd = r.status; + pkt[place]->sectors = regs[place].sectors; + pkt[place]->err = regs[place].err; + pkt[place]->cmd = regs[place].status; + pending[place] = 0; return len; } + #define QCMD(x) ((x)->vercmd & 0xf) // yes, this makes unnecessary copies. @@ -156,8 +184,9 @@ confcmd(Conf *p, int payload) // process } void -doaoe(Aoehdr *p, int n) +doaoe(int place) { + Aoehdr *p = (Aoehdr *) pkt[place]; int len; enum { // config query header size CHDR_SIZ = sizeof(Conf) - sizeof(((Conf *)0)->data), @@ -165,14 +194,16 @@ doaoe(Aoehdr *p, int n) switch (p->cmd) { case ATAcmd: - if (n < sizeof(Ata)) + if (pktlen[place] < sizeof(Ata)) + return; + len = aoeata(place); + if (len == 0) return; - len = aoeata((Ata*)p, n); break; case Config: - if (n < CHDR_SIZ) + if (pktlen[place] < CHDR_SIZ) return; - len = confcmd((Conf *)p, n - CHDR_SIZ); + len = confcmd((Conf *)p, pktlen[place] - CHDR_SIZ); if (len == 0) return; break; @@ -193,25 +224,129 @@ doaoe(Aoehdr *p, int n) } void +doaoecomplete(int place) +{ + Aoehdr *p = (Aoehdr *) pkt[place]; + int len = aoeatacomplete(place, pktlen[place]); + memmove(p->dst, p->src, 6); + memmove(p->src, mac, 6); + p->maj = htons(shelf); + p->min = slot; + p->flags |= Resp; + if (putpkt(sfd, (uchar *) p, len) == -1) { + perror("write to network"); + exit(1); + } + +} + +// allocate the buffer so that the ata data area +// is page aligned for o_direct on linux + +void * +bufalloc(void **buf, long len) +{ + long psize; + unsigned long n; + + psize = sysconf(_SC_PAGESIZE); + if (psize == -1) { + perror("sysconf"); + exit(EXIT_FAILURE); + } + n = len/psize + 3; + *buf = malloc(psize * n); + if (!*buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + n = (unsigned long) *buf; + n += psize * 2; + n &= ~(psize - 1); + return (void *) (n - sizeof (Ata)); +} + +void +sigio(int signo) +{ + const char dummy = 0; + write(queuepipe[1], &dummy, 1); +} + +void aoe(void) { Aoehdr *p; - uchar *buf; - int n, sh; + char dummy; + int n, place, sh; enum { bufsz = 1<<16, }; - - buf = malloc(bufsz); + sigset_t mask, oldmask; + struct sigaction sigact; + struct pollfd pollfds[2]; + void *freeme[Nplaces]; + + for (n = 0; n < Nplaces; n++) { + pkt[n] = bufalloc(freeme + n, bufsz); + pending[n] = 0; + } aoead(sfd); + pipe(queuepipe); + fcntl(queuepipe[0], F_SETFL, O_NONBLOCK); + fcntl(queuepipe[1], F_SETFL, O_NONBLOCK); + + sigemptyset(&sigact.sa_mask); + sigact.sa_flags = 0; + sigact.sa_sigaction = (void *) sigio; + sigaction(SIGIO, &sigact, NULL); + + sigemptyset(&mask); + sigaddset(&mask, SIGIO); + sigprocmask(SIG_BLOCK, &mask, &oldmask); + + pollfds[0].fd = queuepipe[0]; + pollfds[1].fd = sfd; + pollfds[0].events = pollfds[1].events = POLLIN; + for (;;) { - n = getpkt(sfd, buf, bufsz); - if (n < 0) { + sigprocmask(SIG_SETMASK, &oldmask, NULL); + n = poll(pollfds, 2, 1000); + sigprocmask(SIG_BLOCK, &mask, NULL); + + if (n < 0 && errno != EINTR) { + perror("poll"); + continue; + } else if (n == 0 || pollfds[0].revents & POLLIN) { + while(read(queuepipe[0], &dummy, 1) > 0); + for (place = 0; place < Nplaces; place++) { + if (!pending[place]) + continue; + if (aio_error(aiocb + place) == EINPROGRESS) + continue; + doaoecomplete(place); + pollfds[1].events = POLLIN; + } + } + + if ((pollfds[1].revents & POLLIN) == 0) + continue; + + for (place = 0; pending[place] && place < Nplaces; place++); + if (place >= Nplaces) { + pollfds[1].events = 0; + continue; + } + + pktlen[place] = getpkt(sfd, (uchar *) pkt[place], bufsz); + if (pktlen[place] < 0) { + if (errno == EINTR) + continue; perror("read network"); exit(1); } - if (n < sizeof(Aoehdr)) + if (pktlen[place] < sizeof(Aoehdr)) continue; - p = (Aoehdr *) buf; + p = (Aoehdr *) pkt[place]; if (ntohs(p->type) != 0x88a2) continue; if (p->flags & Resp) @@ -223,9 +358,10 @@ aoe(void) continue; if (nmasks && !maskok(p->src)) continue; - doaoe(p, n); + doaoe(place); } - free(buf); + for (place = 0; place < Nplaces; place++) + free(freeme[place]); } void @@ -317,7 +453,7 @@ main(int argc, char **argv) } if (s.st_mode & (S_IWUSR|S_IWGRP|S_IWOTH)) omode = O_RDWR; - bfd = open(argv[3], omode); + bfd = opendisk(argv[3], omode); if (bfd == -1) { perror("open"); exit(1); diff -uprN vblade-15/ata.c vblade-15-aio/ata.c --- vblade-15/ata.c 2008-03-07 20:22:16.000000000 +0000 +++ vblade-15-aio/ata.c 2008-04-19 22:12:32.000000000 +0100 @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include "dat.h" #include "fns.h" @@ -98,7 +100,7 @@ atainit(void) * check for that. */ int -atacmd(Ataregs *p, uchar *dp, int ndp, int payload) // do the ata cmd +atacmd(Ataregs *p, uchar *dp, int ndp, int payload, struct aiocb *aiocb) // do the ata cmd { vlong lba; ushort *ip; @@ -155,14 +157,29 @@ atacmd(Ataregs *p, uchar *dp, int ndp, i return 0; } if (p->cmd == 0x20 || p->cmd == 0x24) - n = getsec(bfd, dp, lba, p->sectors); + n = getsec(bfd, dp, lba, p->sectors, aiocb); else { // packet should be big enough to contain the data if (payload < 512 * p->sectors) return -1; - n = putsec(bfd, dp, lba, p->sectors); + n = putsec(bfd, dp, lba, p->sectors, aiocb); } - n /= 512; + if (n < 0) { + p->err = ABRT; + p->status = ERR|DRDY; + p->lba += n; + p->sectors -= n; + return 0; + } + return 1; // callback expected +} + + +int +atacmdcomplete(Ataregs *p, struct aiocb *aiocb) // complete the ata cmd +{ + int n; + n = aio_return(aiocb) / 512; if (n != p->sectors) { p->err = ABRT; p->status = ERR; @@ -173,4 +190,3 @@ atacmd(Ataregs *p, uchar *dp, int ndp, i p->sectors -= n; return 0; } - diff -uprN vblade-15/dat.h vblade-15-aio/dat.h --- vblade-15/dat.h 2008-03-07 20:22:16.000000000 +0000 +++ vblade-15-aio/dat.h 2008-04-19 16:34:35.000000000 +0100 @@ -111,6 +111,8 @@ enum { Nconfig = 1024, Bufcount = 16, + + Nplaces = 32, }; int shelf, slot; diff -uprN vblade-15/fns.h vblade-15-aio/fns.h --- vblade-15/fns.h 2008-03-07 20:22:16.000000000 +0000 +++ vblade-15-aio/fns.h 2008-04-17 00:11:36.000000000 +0100 @@ -15,14 +15,16 @@ int maskok(uchar *); // ata.c void atainit(void); -int atacmd(Ataregs *, uchar *, int, int); +int atacmd(Ataregs *, uchar *, int, int, struct aiocb *); +int atacmdcomplete(Ataregs *, struct aiocb *); // os specific int dial(char *); int getea(int, char *, uchar *); -int putsec(int, uchar *, vlong, int); -int getsec(int, uchar *, vlong, int); +int opendisk(const char *, int); +int putsec(int, uchar *, vlong, int, struct aiocb *); +int getsec(int, uchar *, vlong, int, struct aiocb *); int putpkt(int, uchar *, int); int getpkt(int, uchar *, int); vlong getsize(int); diff -uprN vblade-15/freebsd.c vblade-15-aio/freebsd.c --- vblade-15/freebsd.c 2008-03-07 20:22:16.000000000 +0000 +++ vblade-15-aio/freebsd.c 2008-04-19 22:24:57.000000000 +0100 @@ -241,19 +241,40 @@ getea(int s, char *eth, uchar *ea) return(0); } - int -getsec(int fd, uchar *place, vlong lba, int nsec) +opendisk(const char *disk, int omode) { - return pread(fd, place, nsec * 512, lba * 512); + return open(disk, omode); } int -putsec(int fd, uchar *place, vlong lba, int nsec) -{ - return pwrite(fd, place, nsec * 512, lba * 512); +getsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb) +{ + bzero((char *) aiocb, sizeof(struct aiocb)); + aiocb->aio_fildes = fd; + aiocb->aio_buf = place; + aiocb->aio_nbytes = nsec * 512; + aiocb->aio_offset = lba * 512; + aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + aiocb->aio_sigevent.sigev_signo = SIGIO; + aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb; + return aio_read(aiocb); } +int +putsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb) +{ + bzero((char *) aiocb, sizeof(struct aiocb)); + aiocb->aio_fildes = fd; + aiocb->aio_buf = place; + aiocb->aio_nbytes = nsec * 512; + aiocb->aio_offset = lba * 512; + aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + aiocb->aio_sigevent.sigev_signo = SIGIO; + aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb; + return aio_write(aiocb); +} + static int pktn = 0; static uchar *pktbp = NULL; diff -uprN vblade-15/linux.c vblade-15-aio/linux.c --- vblade-15/linux.c 2008-03-07 20:22:16.000000000 +0000 +++ vblade-15-aio/linux.c 2008-04-19 22:23:51.000000000 +0100 @@ -1,5 +1,6 @@ // linux.c: low level access routines for Linux #include "config.h" +#define _GNU_SOURCE #include #include #include @@ -22,6 +23,9 @@ #include #include #include +#include +#include +#include #include "dat.h" #include "fns.h" @@ -29,8 +33,6 @@ int getindx(int, char *); int getea(int, char *, uchar *); - - int dial(char *eth) // get us a raw connection to an interface { @@ -76,7 +78,7 @@ getea(int s, char *name, uchar *ea) struct ifreq xx; int n; - strcpy(xx.ifr_name, name); + strcpy(xx.ifr_name, name); n = ioctl(s, SIOCGIFHWADDR, &xx); if (n == -1) { perror("Can't get hw addr"); @@ -102,17 +104,37 @@ getmtu(int s, char *name) } int -getsec(int fd, uchar *place, vlong lba, int nsec) +opendisk(const char *disk, int omode) +{ + return open(disk, omode|O_DIRECT); +} + +int +getsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb) { - lseek(fd, lba * 512, 0); - return read(fd, place, nsec * 512); + bzero((char *) aiocb, sizeof(struct aiocb)); + aiocb->aio_fildes = fd; + aiocb->aio_buf = place; + aiocb->aio_nbytes = nsec * 512; + aiocb->aio_offset = lba * 512; + aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + aiocb->aio_sigevent.sigev_signo = SIGIO; + aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb; + return aio_read(aiocb); } int -putsec(int fd, uchar *place, vlong lba, int nsec) +putsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb) { - lseek(fd, lba * 512, 0); - return write(fd, place, nsec * 512); + bzero((char *) aiocb, sizeof(struct aiocb)); + aiocb->aio_fildes = fd; + aiocb->aio_buf = place; + aiocb->aio_nbytes = nsec * 512; + aiocb->aio_offset = lba * 512; + aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + aiocb->aio_sigevent.sigev_signo = SIGIO; + aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb; + return aio_write(aiocb); } int diff -uprN vblade-15/linux.h vblade-15-aio/linux.h --- vblade-15/linux.h 2008-03-07 20:22:16.000000000 +0000 +++ vblade-15-aio/linux.h 2008-04-16 23:03:07.000000000 +0100 @@ -6,6 +6,6 @@ typedef long long vlong; int dial(char *); int getindx(int, char *); int getea(int, char *, uchar *); -int getsec(int, uchar *, vlong, int); -int putsec(int, uchar *, vlong, int); +int getsec(int, uchar *, vlong, int, struct aiocb *); +int putsec(int, uchar *, vlong, int, struct aiocb *); vlong getsize(int); diff -uprN vblade-15/makefile vblade-15-aio/makefile --- vblade-15/makefile 2008-03-07 20:22:16.000000000 +0000 +++ vblade-15-aio/makefile 2008-04-16 19:09:46.000000000 +0100 @@ -13,7 +13,7 @@ CFLAGS += -Wall -g -O2 CC = gcc vblade: $O - ${CC} -o vblade $O + ${CC} -lrt -o vblade $O aoe.o : aoe.c config.h dat.h fns.h makefile ${CC} ${CFLAGS} -c $<