Browse Source

Import upstream version 16

Ed L. Cashin 17 years ago
parent
commit
e462139da2
11 changed files with 773 additions and 17 deletions
  1. 15 0
      HACKING
  2. 12 0
      NEWS
  3. 4 3
      README
  4. 16 9
      aoe.c
  5. 7 3
      ata.c
  6. 31 0
      contrib/vblade-15-aio.2.README
  7. 536 0
      contrib/vblade-15-aio.2.diff
  8. 11 0
      contrib/vblade-15-socketfilter.2.README
  9. 139 0
      contrib/vblade-15-socketfilter.2.diff
  10. 1 1
      dat.h
  11. 1 1
      fns.h

+ 15 - 0
HACKING

@@ -10,3 +10,18 @@ Patches should be clean (to the point and easy to read) and should do
 one thing.  Send multiple patches if necessary.  Patches should be
 one thing.  Send multiple patches if necessary.  Patches should be
 generated with "diff -uprN" if possible, and should be designed to be
 generated with "diff -uprN" if possible, and should be designed to be
 applied with "patch -p1".
 applied with "patch -p1".
+
+When possible, the best way to submit a patch is by sending it to the
+aoetools-discuss list.  You can subscribe at the aoetools project web
+page on sourceforge.net.
+
+When you send your patch, here are some things to cover:
+
+  * What version of the vblade did you use to generate the patch?
+    (Hopefully it was the latest.)
+
+  * What was your motivation for creating the patch?  That is, what
+    problem does it solve?
+
+  * What testing did you perform to ensure that your patch did not
+    introduce bugs and accomplished what you intended?

+ 12 - 0
NEWS

@@ -1,4 +1,16 @@
 -*- change-log -*-
 -*- change-log -*-
+2008-05-07 Ed L. Cashin <ecashin@coraid.com>
+	add Chris Webb's AIO patch to the contributions
+	add Chris Webb's BPF patch to the contributions
+	vblade-16
+
+2008-02-20 Ed L. Cashin <ecashin@coraid.com>
+	require the amount of data we use, not the amount ethernet requires
+	make sure the packet length agrees with the config query length
+	make sure the packet length agrees with the amount to write
+	remove newline embedded in fw version field of ATA dev ID response
+	vblade-15
+	
 2006-11-20 Sam Hopkins <sah@coraid.com>
 2006-11-20 Sam Hopkins <sah@coraid.com>
 	apply contrib jumbo patch to standard distribution
 	apply contrib jumbo patch to standard distribution
 	add jumbo configuration app. note in README
 	add jumbo configuration app. note in README

+ 4 - 3
README

@@ -7,9 +7,10 @@ seekable file available over an ethernet local area network (LAN) via
 the ATA over Ethernet (AoE) protocol.
 the ATA over Ethernet (AoE) protocol.
 
 
 The seekable file is typically a block device like /dev/md0 but even
 The seekable file is typically a block device like /dev/md0 but even
-regular files will work.  When vblade exports the block storage over
-AoE it becomes a storage target.  Another host on the same LAN can
-access the storage if it has a compatible aoe kernel driver.
+regular files will work.  Sparse files can be especially convenient.
+When vblade exports the block storage over AoE it becomes a storage
+target.  Another host on the same LAN can access the storage if it has
+a compatible aoe kernel driver.
 
 
 BUILDING
 BUILDING
 --------
 --------

+ 16 - 9
aoe.c

@@ -78,7 +78,7 @@ getlba(uchar *p)
 }
 }
 
 
 int
 int
-aoeata(Ata *p)	// do ATA reqeust
+aoeata(Ata *p, int pktlen)	// do ATA reqeust
 {
 {
 	Ataregs r;
 	Ataregs r;
 	int len = 60;
 	int len = 60;
@@ -88,7 +88,7 @@ aoeata(Ata *p)	// do ATA reqeust
 	r.sectors = p->sectors;
 	r.sectors = p->sectors;
 	r.feature = p->err;
 	r.feature = p->err;
 	r.cmd = p->cmd;
 	r.cmd = p->cmd;
-	if (atacmd(&r, (uchar *)(p+1), maxscnt*512) < 0) {
+	if (atacmd(&r, (uchar *)(p+1), maxscnt*512, pktlen - sizeof(*p)) < 0) {
 		p->h.flags |= Error;
 		p->h.flags |= Error;
 		p->h.error = BadArg;
 		p->h.error = BadArg;
 		return len;
 		return len;
@@ -109,13 +109,13 @@ aoeata(Ata *p)	// do ATA reqeust
 // yes, this makes unnecessary copies.
 // yes, this makes unnecessary copies.
 
 
 int
 int
-confcmd(Conf *p)	// process conf request
+confcmd(Conf *p, int payload)	// process conf request
 {
 {
 	int len;
 	int len;
 
 
 	len = ntohs(p->len);
 	len = ntohs(p->len);
 	if (QCMD(p) != Qread)
 	if (QCMD(p) != Qread)
-	if (len > Nconfig)
+	if (len > Nconfig || len > payload)
 		return 0;	// if you can't play nice ...
 		return 0;	// if you can't play nice ...
 	switch (QCMD(p)) {
 	switch (QCMD(p)) {
 	case Qtest:
 	case Qtest:
@@ -156,16 +156,23 @@ confcmd(Conf *p)	// process conf request
 }
 }
 
 
 void
 void
-doaoe(Aoehdr *p)
+doaoe(Aoehdr *p, int n)
 {
 {
 	int len;
 	int len;
+	enum {	// config query header size
+		CHDR_SIZ = sizeof(Conf) - sizeof(((Conf *)0)->data),
+	};
 
 
 	switch (p->cmd) {
 	switch (p->cmd) {
 	case ATAcmd:
 	case ATAcmd:
-		len = aoeata((Ata*)p);
+		if (n < sizeof(Ata))
+			return;
+		len = aoeata((Ata*)p, n);
 		break;
 		break;
 	case Config:
 	case Config:
-		len = confcmd((Conf *)p);
+		if (n < CHDR_SIZ)
+			return;
+		len = confcmd((Conf *)p, n - CHDR_SIZ);
 		if (len == 0)
 		if (len == 0)
 			return;
 			return;
 		break;
 		break;
@@ -202,7 +209,7 @@ aoe(void)
 			perror("read network");
 			perror("read network");
 			exit(1);
 			exit(1);
 		}
 		}
-		if (n < 60)
+		if (n < sizeof(Aoehdr))
 			continue;
 			continue;
 		p = (Aoehdr *) buf;
 		p = (Aoehdr *) buf;
 		if (ntohs(p->type) != 0x88a2)
 		if (ntohs(p->type) != 0x88a2)
@@ -216,7 +223,7 @@ aoe(void)
 			continue;
 			continue;
 		if (nmasks && !maskok(p->src))
 		if (nmasks && !maskok(p->src))
 			continue;
 			continue;
-		doaoe(p);
+		doaoe(p, n);
 	}
 	}
 	free(buf);
 	free(buf);
 }
 }

+ 7 - 3
ata.c

@@ -86,7 +86,7 @@ atainit(void)
 	char buf[64];
 	char buf[64];
 
 
 	setfld(ident, 27, 40, "Coraid EtherDrive vblade");
 	setfld(ident, 27, 40, "Coraid EtherDrive vblade");
-	sprintf(buf, "V%d\n", VBLADE_VERSION);
+	sprintf(buf, "V%d", VBLADE_VERSION);
 	setfld(ident, 23, 8, buf);
 	setfld(ident, 23, 8, buf);
 	setfld(ident, 10, 20, "SSN HERE");
 	setfld(ident, 10, 20, "SSN HERE");
 }
 }
@@ -98,7 +98,7 @@ atainit(void)
  * check for that.
  * check for that.
  */
  */
 int
 int
-atacmd(Ataregs *p, uchar *dp, int ndp)		// do the ata cmd
+atacmd(Ataregs *p, uchar *dp, int ndp, int payload) // do the ata cmd
 {
 {
 	vlong lba;
 	vlong lba;
 	ushort *ip;
 	ushort *ip;
@@ -156,8 +156,12 @@ atacmd(Ataregs *p, uchar *dp, int ndp)		// do the ata cmd
 	}
 	}
 	if (p->cmd == 0x20 || p->cmd == 0x24)
 	if (p->cmd == 0x20 || p->cmd == 0x24)
 		n = getsec(bfd, dp, lba, p->sectors);
 		n = getsec(bfd, dp, lba, p->sectors);
-	else
+	else {
+		// packet should be big enough to contain the data
+		if (payload < 512 * p->sectors)
+			return -1;
 		n = putsec(bfd, dp, lba, p->sectors);
 		n = putsec(bfd, dp, lba, p->sectors);
+	}
 	n /= 512;
 	n /= 512;
 	if (n != p->sectors) {
 	if (n != p->sectors) {
 		p->err = ABRT;
 		p->err = ABRT;

+ 31 - 0
contrib/vblade-15-aio.2.README

@@ -0,0 +1,31 @@
+This proof-of-concept patch modifies vblade to access the underlying block
+device using POSIX asynchronous IO (AIO) rather than using normal blocking
+read() and write(). AIO allows vblade to receive and queue several several ATA
+read/write commands at once, returning the response to the client
+asynchronously as each IO operation completes. It should be most beneficial
+for devices which experience very non-sequential IO. An AIO-enabled vblade is
+also a good starting point if you want to generalise vblade to export multiple
+devices without the complexity and overhead of a multithreaded approach.
+
+The patch implements AIO support for both Linux and FreeBSD, but I have not
+tested the FreeBSD support and would therefore be especially interested to
+hear success/failure reports for compiling and running AIO vblade on FreeBSD.
+A SIGIO handler which writes a single byte to a pipe is used to notify the
+main poll() loop that AIO operations have completed and are ready to return to
+the client. Running oprofile on a box with a heavily loaded loopback
+vblade-aio suggests that it spends an inordinate amount of time in the signal
+handler. Some method of poll()ing directly on the AIO events at the same time
+as the socket fd could cut this overhead out completely.
+
+More generally, experimenting on Linux with standard O_DIRECT vblade and
+O_DIRECT vblade-aio on a loopback interface with MTU 9000 suggests that the
+performance difference on a single RAID1-backed block device is fairly small:
+swamped by the performance of the network and the underlying block device.
+However, the POSIX AIO in glibc librt is emulated in userspace threads rather
+than using the kernel AIO api. A kernel-backed POSIX AIO implementation should
+perform better, especially for multiple access to a single block device.
+
+I would be delighted to hear any feedback and experiences from people running
+vblade together with this patch.
+
+Chris Webb <chris@arachsys.com>, 2008-04-21.

+ 536 - 0
contrib/vblade-15-aio.2.diff

@@ -0,0 +1,536 @@
+diff -uprN vblade-15/aoe.c vblade-15-aio/aoe.c
+--- vblade-15/aoe.c	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-aio/aoe.c	2008-04-19 22:31:21.000000000 +0100
+@@ -8,6 +8,9 @@
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <netinet/in.h>
++#include <errno.h>
++#include <aio.h>
++#include <poll.h>
+ #include "dat.h"
+ #include "fns.h"
+ 
+@@ -22,6 +25,11 @@ char config[Nconfig];
+ int nconfig = 0;
+ int maxscnt = 2;
+ char *ifname;
++int queuepipe[2];
++int pktlen[Nplaces], pending[Nplaces];
++Ata *pkt[Nplaces];
++Ataregs regs[Nplaces];
++struct aiocb aiocb[Nplaces];
+ 
+ void
+ aoead(int fd)			// advertise the virtual blade
+@@ -78,32 +86,52 @@ getlba(uchar *p)
+ }
+ 
+ int
+-aoeata(Ata *p, int pktlen)	// do ATA reqeust
++aoeata(int place)	// do ATA reqeust
+ {
+-	Ataregs r;
+-	int len = 60;
+ 	int n;
++	int len = 60; // minimum ethernet packet size
+ 
+-	r.lba = getlba(p->lba);
+-	r.sectors = p->sectors;
+-	r.feature = p->err;
+-	r.cmd = p->cmd;
+-	if (atacmd(&r, (uchar *)(p+1), maxscnt*512, pktlen - sizeof(*p)) < 0) {
+-		p->h.flags |= Error;
+-		p->h.error = BadArg;
++	regs[place].lba = getlba(pkt[place]->lba);
++	regs[place].sectors = pkt[place]->sectors;
++	regs[place].feature = pkt[place]->err;
++	regs[place].cmd = pkt[place]->cmd;
++	n = atacmd(regs + place, (uchar *)(pkt[place] + 1), maxscnt*512,
++				pktlen[place] - sizeof(Ata), aiocb + place);
++	if (n < 0) {
++		pkt[place]->h.flags |= Error;
++		pkt[place]->h.error = BadArg;
+ 		return len;
++	} else if (n > 0) {
++		pending[place] = 1;
++		return 0;
++	}
++	if (!(pkt[place]->aflag & Write) && (n = pkt[place]->sectors)) {
++		n -= regs[place].sectors;
++		len = sizeof (Ata) + (n*512);
+ 	}
+-	if (!(p->aflag & Write))
+-	if ((n = p->sectors)) {
+-		n -= r.sectors;
++	pkt[place]->sectors = regs[place].sectors;
++	pkt[place]->err = regs[place].err;
++	pkt[place]->cmd = regs[place].status;
++	return len;
++}
++
++int aoeatacomplete(int place, int pktlen)
++{
++	int n;
++	int len = 60; // minimum ethernet packet size
++	atacmdcomplete(regs + place, aiocb + place);
++	if (!(pkt[place]->aflag & Write) && (n = pkt[place]->sectors)) {
++		n -= regs[place].sectors;
+ 		len = sizeof (Ata) + (n*512);
+ 	}
+-	p->sectors = r.sectors;
+-	p->err = r.err;
+-	p->cmd = r.status;
++	pkt[place]->sectors = regs[place].sectors;
++	pkt[place]->err = regs[place].err;
++	pkt[place]->cmd = regs[place].status;
++	pending[place] = 0;
+ 	return len;
+ }
+ 
++
+ #define QCMD(x) ((x)->vercmd & 0xf)
+ 
+ // yes, this makes unnecessary copies.
+@@ -156,8 +184,9 @@ confcmd(Conf *p, int payload)	// process
+ }
+ 
+ void
+-doaoe(Aoehdr *p, int n)
++doaoe(int place)
+ {
++	Aoehdr *p = (Aoehdr *) pkt[place];
+ 	int len;
+ 	enum {	// config query header size
+ 		CHDR_SIZ = sizeof(Conf) - sizeof(((Conf *)0)->data),
+@@ -165,14 +194,16 @@ doaoe(Aoehdr *p, int n)
+ 
+ 	switch (p->cmd) {
+ 	case ATAcmd:
+-		if (n < sizeof(Ata))
++		if (pktlen[place] < sizeof(Ata))
++			return;
++		len = aoeata(place);
++		if (len == 0)
+ 			return;
+-		len = aoeata((Ata*)p, n);
+ 		break;
+ 	case Config:
+-		if (n < CHDR_SIZ)
++		if (pktlen[place] < CHDR_SIZ)
+ 			return;
+-		len = confcmd((Conf *)p, n - CHDR_SIZ);
++		len = confcmd((Conf *)p, pktlen[place] - CHDR_SIZ);
+ 		if (len == 0)
+ 			return;
+ 		break;
+@@ -193,25 +224,129 @@ doaoe(Aoehdr *p, int n)
+ }
+ 
+ void
++doaoecomplete(int place)
++{
++	Aoehdr *p = (Aoehdr *) pkt[place];
++	int len = aoeatacomplete(place, pktlen[place]);
++	memmove(p->dst, p->src, 6);
++	memmove(p->src, mac, 6);
++	p->maj = htons(shelf);
++	p->min = slot;
++	p->flags |= Resp;
++	if (putpkt(sfd, (uchar *) p, len) == -1) {
++		perror("write to network");
++		exit(1);
++	}
++
++}
++
++// allocate the buffer so that the ata data area
++// is page aligned for o_direct on linux
++
++void *
++bufalloc(void **buf, long len)
++{
++	long psize;
++	unsigned long n;
++
++	psize = sysconf(_SC_PAGESIZE);
++	if (psize == -1) {
++		perror("sysconf");
++		exit(EXIT_FAILURE);
++	}
++	n = len/psize + 3;
++	*buf = malloc(psize * n);
++	if (!*buf) {
++		perror("malloc");
++		exit(EXIT_FAILURE);
++	}
++	n = (unsigned long) *buf;
++	n += psize * 2;
++	n &= ~(psize - 1);
++	return (void *) (n - sizeof (Ata));
++}
++
++void
++sigio(int signo) 
++{
++	const char dummy = 0;
++	write(queuepipe[1], &dummy, 1);
++}
++
++void
+ aoe(void)
+ {
+ 	Aoehdr *p;
+-	uchar *buf;
+-	int n, sh;
++	char dummy;
++	int n, place, sh;
+ 	enum { bufsz = 1<<16, };
+-
+-	buf = malloc(bufsz);
++	sigset_t mask, oldmask;
++	struct sigaction sigact;
++	struct pollfd pollfds[2];
++	void *freeme[Nplaces];
++
++	for (n = 0; n < Nplaces; n++) {
++		pkt[n] = bufalloc(freeme + n, bufsz);
++		pending[n] = 0;
++	}
+ 	aoead(sfd);
+ 
++	pipe(queuepipe);
++	fcntl(queuepipe[0], F_SETFL, O_NONBLOCK);
++	fcntl(queuepipe[1], F_SETFL, O_NONBLOCK);
++
++	sigemptyset(&sigact.sa_mask);
++	sigact.sa_flags = 0;
++	sigact.sa_sigaction = (void *) sigio;
++	sigaction(SIGIO, &sigact, NULL);
++
++	sigemptyset(&mask);
++	sigaddset(&mask, SIGIO);
++	sigprocmask(SIG_BLOCK, &mask, &oldmask);
++
++	pollfds[0].fd = queuepipe[0];
++	pollfds[1].fd = sfd;
++	pollfds[0].events = pollfds[1].events = POLLIN;
++
+ 	for (;;) {
+-		n = getpkt(sfd, buf, bufsz);
+-		if (n < 0) {
++		sigprocmask(SIG_SETMASK, &oldmask, NULL);
++		n = poll(pollfds, 2, 1000);
++		sigprocmask(SIG_BLOCK, &mask, NULL);
++
++		if (n < 0 && errno != EINTR) {
++			perror("poll");
++			continue;
++		} else if (n == 0 || pollfds[0].revents & POLLIN) {
++			while(read(queuepipe[0], &dummy, 1) > 0);
++			for (place = 0; place < Nplaces; place++) {
++				if (!pending[place])
++					continue;
++				if (aio_error(aiocb + place) == EINPROGRESS)
++					continue;
++				doaoecomplete(place);
++				pollfds[1].events = POLLIN;
++			}
++		}
++
++		if ((pollfds[1].revents & POLLIN) == 0)
++			continue;
++			
++		for (place = 0; pending[place] && place < Nplaces; place++);
++		if (place >= Nplaces) {
++			pollfds[1].events = 0;
++			continue;
++		}
++
++		pktlen[place] = getpkt(sfd, (uchar *) pkt[place], bufsz);
++		if (pktlen[place] < 0) {
++			if (errno == EINTR)
++				continue;
+ 			perror("read network");
+ 			exit(1);
+ 		}
+-		if (n < sizeof(Aoehdr))
++		if (pktlen[place] < sizeof(Aoehdr))
+ 			continue;
+-		p = (Aoehdr *) buf;
++		p = (Aoehdr *) pkt[place];
+ 		if (ntohs(p->type) != 0x88a2)
+ 			continue;
+ 		if (p->flags & Resp)
+@@ -223,9 +358,10 @@ aoe(void)
+ 			continue;
+ 		if (nmasks && !maskok(p->src))
+ 			continue;
+-		doaoe(p, n);
++		doaoe(place);
+ 	}
+-	free(buf);
++	for (place = 0; place < Nplaces; place++)
++		free(freeme[place]);
+ }
+ 
+ void
+@@ -317,7 +453,7 @@ main(int argc, char **argv)
+ 	}
+ 	if (s.st_mode & (S_IWUSR|S_IWGRP|S_IWOTH))
+ 		omode = O_RDWR;
+-	bfd = open(argv[3], omode);
++	bfd = opendisk(argv[3], omode);
+ 	if (bfd == -1) {
+ 		perror("open");
+ 		exit(1);
+diff -uprN vblade-15/ata.c vblade-15-aio/ata.c
+--- vblade-15/ata.c	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-aio/ata.c	2008-04-19 22:12:32.000000000 +0100
+@@ -3,6 +3,8 @@
+ #include <string.h>
+ #include <stdio.h>
+ #include <sys/types.h>
++#include <errno.h>
++#include <aio.h>
+ #include "dat.h"
+ #include "fns.h"
+ 
+@@ -98,7 +100,7 @@ atainit(void)
+  * check for that.
+  */
+ int
+-atacmd(Ataregs *p, uchar *dp, int ndp, int payload) // do the ata cmd
++atacmd(Ataregs *p, uchar *dp, int ndp, int payload, struct aiocb *aiocb) // do the ata cmd
+ {
+ 	vlong lba;
+ 	ushort *ip;
+@@ -155,14 +157,29 @@ atacmd(Ataregs *p, uchar *dp, int ndp, i
+ 		return 0;
+ 	}
+ 	if (p->cmd == 0x20 || p->cmd == 0x24)
+-		n = getsec(bfd, dp, lba, p->sectors);
++		n = getsec(bfd, dp, lba, p->sectors, aiocb);
+ 	else {
+ 		// packet should be big enough to contain the data
+ 		if (payload < 512 * p->sectors)
+ 			return -1;
+-		n = putsec(bfd, dp, lba, p->sectors);
++		n = putsec(bfd, dp, lba, p->sectors, aiocb);
+ 	}
+-	n /= 512;
++	if (n < 0) {
++		p->err = ABRT;
++		p->status = ERR|DRDY;
++		p->lba += n;
++		p->sectors -= n;
++		return 0;
++	}
++	return 1; // callback expected
++}
++
++
++int
++atacmdcomplete(Ataregs *p, struct aiocb *aiocb) // complete the ata cmd
++{
++	int n;
++	n = aio_return(aiocb) / 512;
+ 	if (n != p->sectors) {
+ 		p->err = ABRT;
+ 		p->status = ERR;
+@@ -173,4 +190,3 @@ atacmd(Ataregs *p, uchar *dp, int ndp, i
+ 	p->sectors -= n;
+ 	return 0;
+ }
+-
+diff -uprN vblade-15/dat.h vblade-15-aio/dat.h
+--- vblade-15/dat.h	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-aio/dat.h	2008-04-19 16:34:35.000000000 +0100
+@@ -111,6 +111,8 @@ enum {
+ 	Nconfig = 1024,
+ 
+ 	Bufcount = 16,
++
++	Nplaces = 32,
+ };
+ 
+ int	shelf, slot;
+diff -uprN vblade-15/fns.h vblade-15-aio/fns.h
+--- vblade-15/fns.h	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-aio/fns.h	2008-04-17 00:11:36.000000000 +0100
+@@ -15,14 +15,16 @@ int	maskok(uchar *);
+ // ata.c
+ 
+ void	atainit(void);
+-int	atacmd(Ataregs *, uchar *, int, int);
++int	atacmd(Ataregs *, uchar *, int, int, struct aiocb *);
++int	atacmdcomplete(Ataregs *, struct aiocb *);
+ 
+ // os specific
+ 
+ int	dial(char *);
+ int	getea(int, char *, uchar *);
+-int	putsec(int, uchar *, vlong, int);
+-int	getsec(int, uchar *, vlong, int);
++int	opendisk(const char *, int);
++int	putsec(int, uchar *, vlong, int, struct aiocb *);
++int	getsec(int, uchar *, vlong, int, struct aiocb *);
+ int	putpkt(int, uchar *, int);
+ int	getpkt(int, uchar *, int);
+ vlong	getsize(int);
+diff -uprN vblade-15/freebsd.c vblade-15-aio/freebsd.c
+--- vblade-15/freebsd.c	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-aio/freebsd.c	2008-04-19 22:24:57.000000000 +0100
+@@ -241,19 +241,40 @@ getea(int s, char *eth, uchar *ea)
+ 	return(0);
+ }
+ 
+-
+ int
+-getsec(int fd, uchar *place, vlong lba, int nsec)
++opendisk(const char *disk, int omode)
+ {
+-	return pread(fd, place, nsec * 512, lba * 512);
++	return open(disk, omode);
+ }
+ 
+ int
+-putsec(int fd, uchar *place, vlong lba, int nsec)
+-{
+-	return pwrite(fd, place, nsec * 512, lba * 512);
++getsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb)
++{       
++        bzero((char *) aiocb, sizeof(struct aiocb));
++        aiocb->aio_fildes = fd;
++        aiocb->aio_buf = place;
++        aiocb->aio_nbytes = nsec * 512;
++        aiocb->aio_offset = lba * 512;
++        aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
++        aiocb->aio_sigevent.sigev_signo = SIGIO;
++        aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb;
++        return aio_read(aiocb);
+ }
+ 
++int
++putsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb)
++{       
++        bzero((char *) aiocb, sizeof(struct aiocb));
++        aiocb->aio_fildes = fd;
++        aiocb->aio_buf = place;
++        aiocb->aio_nbytes = nsec * 512;
++        aiocb->aio_offset = lba * 512;
++        aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
++        aiocb->aio_sigevent.sigev_signo = SIGIO;
++        aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb;
++        return aio_write(aiocb);
++}       
++
+ static int pktn = 0;
+ static uchar *pktbp = NULL;
+ 
+diff -uprN vblade-15/linux.c vblade-15-aio/linux.c
+--- vblade-15/linux.c	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-aio/linux.c	2008-04-19 22:23:51.000000000 +0100
+@@ -1,5 +1,6 @@
+ // linux.c: low level access routines for Linux
+ #include "config.h"
++#define _GNU_SOURCE
+ #include <sys/socket.h>
+ #include <stdio.h>
+ #include <string.h>
+@@ -22,6 +23,9 @@
+ #include <netinet/in.h>
+ #include <linux/fs.h>
+ #include <sys/stat.h>
++#include <fcntl.h>
++#include <errno.h>
++#include <aio.h>
+ 
+ #include "dat.h"
+ #include "fns.h"
+@@ -29,8 +33,6 @@
+ int	getindx(int, char *);
+ int	getea(int, char *, uchar *);
+ 
+-
+-
+ int
+ dial(char *eth)		// get us a raw connection to an interface
+ {
+@@ -76,7 +78,7 @@ getea(int s, char *name, uchar *ea)
+ 	struct ifreq xx;
+ 	int n;
+ 
+-        strcpy(xx.ifr_name, name);
++	strcpy(xx.ifr_name, name);
+ 	n = ioctl(s, SIOCGIFHWADDR, &xx);
+ 	if (n == -1) {
+ 		perror("Can't get hw addr");
+@@ -102,17 +104,37 @@ getmtu(int s, char *name)
+ }
+ 
+ int
+-getsec(int fd, uchar *place, vlong lba, int nsec)
++opendisk(const char *disk, int omode)
++{
++	return open(disk, omode|O_DIRECT);
++}
++
++int
++getsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb)
+ {
+-	lseek(fd, lba * 512, 0);
+-	return read(fd, place, nsec * 512);
++	bzero((char *) aiocb, sizeof(struct aiocb));
++	aiocb->aio_fildes = fd;
++	aiocb->aio_buf = place;
++	aiocb->aio_nbytes = nsec * 512;
++	aiocb->aio_offset = lba * 512;
++	aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
++	aiocb->aio_sigevent.sigev_signo = SIGIO;
++	aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb;
++	return aio_read(aiocb);
+ }
+ 
+ int
+-putsec(int fd, uchar *place, vlong lba, int nsec)
++putsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb)
+ {
+-	lseek(fd, lba * 512, 0);
+-	return write(fd, place, nsec * 512);
++	bzero((char *) aiocb, sizeof(struct aiocb));
++	aiocb->aio_fildes = fd;
++	aiocb->aio_buf = place;
++	aiocb->aio_nbytes = nsec * 512;
++	aiocb->aio_offset = lba * 512;
++	aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
++	aiocb->aio_sigevent.sigev_signo = SIGIO;
++	aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb;
++	return aio_write(aiocb);
+ }
+ 
+ int
+diff -uprN vblade-15/linux.h vblade-15-aio/linux.h
+--- vblade-15/linux.h	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-aio/linux.h	2008-04-16 23:03:07.000000000 +0100
+@@ -6,6 +6,6 @@ typedef long long vlong;
+ int	dial(char *);
+ int	getindx(int, char *);
+ int	getea(int, char *, uchar *);
+-int	getsec(int, uchar *, vlong, int);
+-int	putsec(int, uchar *, vlong, int);
++int	getsec(int, uchar *, vlong, int, struct aiocb *);
++int	putsec(int, uchar *, vlong, int, struct aiocb *);
+ vlong	getsize(int);
+diff -uprN vblade-15/makefile vblade-15-aio/makefile
+--- vblade-15/makefile	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-aio/makefile	2008-04-16 19:09:46.000000000 +0100
+@@ -13,7 +13,7 @@ CFLAGS += -Wall -g -O2
+ CC = gcc
+ 
+ vblade: $O
+-	${CC} -o vblade $O
++	${CC} -lrt -o vblade $O
+ 
+ aoe.o : aoe.c config.h dat.h fns.h makefile
+ 	${CC} ${CFLAGS} -c $<

+ 11 - 0
contrib/vblade-15-socketfilter.2.README

@@ -0,0 +1,11 @@
+This patch uses the Berkeley Packet Filter (BPF) feature so that the
+kernel only hands packets to a vblade process when they match its
+shelf and slot address.
+
+Without this patch, each vblade process would be woken up only to
+discard an AoE packet destined for a different vblade running on the
+same host.
+
+This patch currently introduces a build dependency on the headers from
+libpcap.  Usually a Linux distribution will include these headers in a
+package with a name something like "libpcap-dev".

+ 139 - 0
contrib/vblade-15-socketfilter.2.diff

@@ -0,0 +1,139 @@
+diff -uNpr vblade-15-orig/freebsd.c vblade-15-lsf/freebsd.c
+--- vblade-15-orig/freebsd.c	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-lsf/freebsd.c	2008-04-30 10:45:11.000000000 +0100
+@@ -54,40 +54,44 @@ dial(char *eth)
+ 
+ 	/* packet filter for bpf */
+ 	struct bpf_insn bpf_insns[] = {
+-	  /* Load the type into register */
+-	  BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 12),
+-	  /* Does it match AoE Type (0x88a2)? No, goto INVALID */
+-	  BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0x88a2, 0, 10),
+-	  /* Load the flags into register */
+-	  BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 14),
+-	  /* Check to see if the Resp flag is set */
+-	  BPF_STMT(BPF_ALU+BPF_AND+BPF_K, Resp),
+-	  /* Yes, goto INVALID */
+-	  BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 7),
+-	  /* Load the command into register */
+-	  BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 19),
+-	  /* Is this a ATAcmd? No, goto VALID */
+-	  BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ATAcmd, 0, 4),
+-	  /* Load the shelf number into register */
+-	  BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 16),
+-	  /* Does it match shelf number? No, goto INVALID */
+-	  BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) shelf, 0, 3),
+-	  /* Load the slot number into register */
+-	  BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 18),
+-	  /* Does it match shelf number? No, goto INVALID */
+-	  BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) slot, 0, 1),
+-	  /* VALID: return -1 (allow the packet to be read) */
+-	  BPF_STMT(BPF_RET+BPF_K, (u_int)-1),
+-	  /* INVALID: return 0 (ignore the packet) */
+-	  BPF_STMT(BPF_RET+BPF_K, 0),
++		/* Load the type into register */
++		BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 12),
++		/* Does it match AoE Type (0x88a2)? No, goto INVALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0x88a2, 0, 12),
++		/* Load the flags into register */
++		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 14),
++		/* Check to see if the Resp flag is set */
++		BPF_STMT(BPF_ALU+BPF_AND+BPF_K, Resp),
++		/* Yes, goto INVALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 9),
++		/* Load the shelf number into register */
++		BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 16),
++		/* Does it match shelf number? No, goto CHECKBROADCAST */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) shelf, 0, 2),
++		/* Load the slot number into register */
++		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 18),
++		/* Does it match shelf number? Yes, goto VALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) slot, 4, 0),
++		/* CHECKBROADCAST: is (shelf, slot) == (0xffff, 0xff)? */
++		/* Load the shelf number into register */
++		BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 16),
++		/* Is it 0xffff? No, goto INVALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) 0xffff, 0, 3),
++		/* Load the slot number into register */
++		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 18),
++		/* Is it 0xff? No, goto INVALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) 0xff, 0, 1),
++		/* VALID: return -1 (allow the packet to be read) */
++		BPF_STMT(BPF_RET+BPF_K, (u_int) -1),
++		/* INVALID: return 0 (ignore the packet) */
++		BPF_STMT(BPF_RET+BPF_K, 0),
+ 	};
+ 
+ 	struct bpf_program bpf_program = {
+-	  sizeof(bpf_insns)/sizeof(struct bpf_insn),
+-	  bpf_insns
++		sizeof(bpf_insns)/sizeof(struct bpf_insn),
++		bpf_insns
+ 	};
+-
+-
++	
+ 	strncpy(device, BPF_DEV, sizeof BPF_DEV);
+ 
+ 	/* find a bpf device we can use, check /dev/bpf[0-9] */
+diff -uNpr vblade-15-orig/linux.c vblade-15-lsf/linux.c
+--- vblade-15-orig/linux.c	2008-03-07 20:22:16.000000000 +0000
++++ vblade-15-lsf/linux.c	2008-04-30 10:46:04.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <netinet/in.h>
+ #include <linux/fs.h>
+ #include <sys/stat.h>
++#include <pcap-bpf.h>
+ 
+ #include "dat.h"
+ #include "fns.h"
+@@ -54,6 +55,49 @@ dial(char *eth)		// get us a raw connect
+ 		perror("bind funky");
+ 		return -1;
+ 	}
++
++	/* bpf packet filter for socket */
++	struct bpf_insn bpf_insns[] = {
++		/* Load the type into register */
++		BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 12),
++		/* Does it match AoE Type (0x88a2)? No, goto INVALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0x88a2, 0, 12),
++		/* Load the flags into register */
++		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 14),
++		/* Check to see if the Resp flag is set */
++		BPF_STMT(BPF_ALU+BPF_AND+BPF_K, Resp),
++		/* Yes, goto INVALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 9),
++		/* Load the shelf number into register */
++		BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 16),
++		/* Does it match shelf number? No, goto CHECKBROADCAST */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) shelf, 0, 2),
++		/* Load the slot number into register */
++		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 18),
++		/* Does it match shelf number? Yes, goto VALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) slot, 4, 0),
++		/* CHECKBROADCAST: is (shelf, slot) == (0xffff, 0xff)? */
++		/* Load the shelf number into register */
++		BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 16),
++		/* Is it 0xffff? No, goto INVALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) 0xffff, 0, 3),
++		/* Load the slot number into register */
++		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 18),
++		/* Is it 0xff? No, goto INVALID */
++		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (u_int) 0xff, 0, 1),
++		/* VALID: return -1 (allow the packet to be read) */
++		BPF_STMT(BPF_RET+BPF_K, (u_int) -1),
++		/* INVALID: return 0 (ignore the packet) */
++		BPF_STMT(BPF_RET+BPF_K, 0),
++	};
++
++	struct bpf_program bpf_program = {
++		sizeof(bpf_insns)/sizeof(struct bpf_insn),
++		bpf_insns
++	};
++
++	setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_program, sizeof(bpf_program));
++
+ 	return s;
+ }
+ 

+ 1 - 1
dat.h

@@ -6,7 +6,7 @@
  */
  */
 
 
 enum {
 enum {
-	VBLADE_VERSION		= 12,
+	VBLADE_VERSION		= 16,
 
 
 	// Firmware version
 	// Firmware version
 	FWV			= 0x4000 + VBLADE_VERSION,
 	FWV			= 0x4000 + VBLADE_VERSION,

+ 1 - 1
fns.h

@@ -15,7 +15,7 @@ int	maskok(uchar *);
 // ata.c
 // ata.c
 
 
 void	atainit(void);
 void	atainit(void);
-int	atacmd(Ataregs *, uchar *, int);
+int	atacmd(Ataregs *, uchar *, int, int);
 
 
 // os specific
 // os specific