vblade-17-aio.2.diff 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. diff -uprN vblade-17.orig/aoe.c vblade-17/aoe.c
  2. --- vblade-17.orig/aoe.c 2008-06-09 10:53:07.000000000 -0400
  3. +++ vblade-17/aoe.c 2008-06-09 11:05:23.000000000 -0400
  4. @@ -8,6 +8,9 @@
  5. #include <sys/stat.h>
  6. #include <fcntl.h>
  7. #include <netinet/in.h>
  8. +#include <errno.h>
  9. +#include <aio.h>
  10. +#include <poll.h>
  11. #include "dat.h"
  12. #include "fns.h"
  13. @@ -22,6 +25,11 @@ char config[Nconfig];
  14. int nconfig = 0;
  15. int maxscnt = 2;
  16. char *ifname;
  17. +int queuepipe[2];
  18. +int pktlen[Nplaces], pending[Nplaces];
  19. +Ata *pkt[Nplaces];
  20. +Ataregs regs[Nplaces];
  21. +struct aiocb aiocb[Nplaces];
  22. void
  23. aoead(int fd) // advertise the virtual blade
  24. @@ -78,32 +86,52 @@ getlba(uchar *p)
  25. }
  26. int
  27. -aoeata(Ata *p, int pktlen) // do ATA reqeust
  28. +aoeata(int place) // do ATA reqeust
  29. {
  30. - Ataregs r;
  31. - int len = 60;
  32. int n;
  33. + int len = 60; // minimum ethernet packet size
  34. - r.lba = getlba(p->lba);
  35. - r.sectors = p->sectors;
  36. - r.feature = p->err;
  37. - r.cmd = p->cmd;
  38. - if (atacmd(&r, (uchar *)(p+1), maxscnt*512, pktlen - sizeof(*p)) < 0) {
  39. - p->h.flags |= Error;
  40. - p->h.error = BadArg;
  41. + regs[place].lba = getlba(pkt[place]->lba);
  42. + regs[place].sectors = pkt[place]->sectors;
  43. + regs[place].feature = pkt[place]->err;
  44. + regs[place].cmd = pkt[place]->cmd;
  45. + n = atacmd(regs + place, (uchar *)(pkt[place] + 1), maxscnt*512,
  46. + pktlen[place] - sizeof(Ata), aiocb + place);
  47. + if (n < 0) {
  48. + pkt[place]->h.flags |= Error;
  49. + pkt[place]->h.error = BadArg;
  50. return len;
  51. + } else if (n > 0) {
  52. + pending[place] = 1;
  53. + return 0;
  54. + }
  55. + if (!(pkt[place]->aflag & Write) && (n = pkt[place]->sectors)) {
  56. + n -= regs[place].sectors;
  57. + len = sizeof (Ata) + (n*512);
  58. }
  59. - if (!(p->aflag & Write))
  60. - if ((n = p->sectors)) {
  61. - n -= r.sectors;
  62. + pkt[place]->sectors = regs[place].sectors;
  63. + pkt[place]->err = regs[place].err;
  64. + pkt[place]->cmd = regs[place].status;
  65. + return len;
  66. +}
  67. +
  68. +int aoeatacomplete(int place, int pktlen)
  69. +{
  70. + int n;
  71. + int len = 60; // minimum ethernet packet size
  72. + atacmdcomplete(regs + place, aiocb + place);
  73. + if (!(pkt[place]->aflag & Write) && (n = pkt[place]->sectors)) {
  74. + n -= regs[place].sectors;
  75. len = sizeof (Ata) + (n*512);
  76. }
  77. - p->sectors = r.sectors;
  78. - p->err = r.err;
  79. - p->cmd = r.status;
  80. + pkt[place]->sectors = regs[place].sectors;
  81. + pkt[place]->err = regs[place].err;
  82. + pkt[place]->cmd = regs[place].status;
  83. + pending[place] = 0;
  84. return len;
  85. }
  86. +
  87. #define QCMD(x) ((x)->vercmd & 0xf)
  88. // yes, this makes unnecessary copies.
  89. @@ -156,8 +184,9 @@ confcmd(Conf *p, int payload) // process
  90. }
  91. void
  92. -doaoe(Aoehdr *p, int n)
  93. +doaoe(int place)
  94. {
  95. + Aoehdr *p = (Aoehdr *) pkt[place];
  96. int len;
  97. enum { // config query header size
  98. CHDR_SIZ = sizeof(Conf) - sizeof(((Conf *)0)->data),
  99. @@ -165,14 +194,16 @@ doaoe(Aoehdr *p, int n)
  100. switch (p->cmd) {
  101. case ATAcmd:
  102. - if (n < sizeof(Ata))
  103. + if (pktlen[place] < sizeof(Ata))
  104. + return;
  105. + len = aoeata(place);
  106. + if (len == 0)
  107. return;
  108. - len = aoeata((Ata*)p, n);
  109. break;
  110. case Config:
  111. - if (n < CHDR_SIZ)
  112. + if (pktlen[place] < CHDR_SIZ)
  113. return;
  114. - len = confcmd((Conf *)p, n - CHDR_SIZ);
  115. + len = confcmd((Conf *)p, pktlen[place] - CHDR_SIZ);
  116. if (len == 0)
  117. return;
  118. break;
  119. @@ -193,25 +224,129 @@ doaoe(Aoehdr *p, int n)
  120. }
  121. void
  122. +doaoecomplete(int place)
  123. +{
  124. + Aoehdr *p = (Aoehdr *) pkt[place];
  125. + int len = aoeatacomplete(place, pktlen[place]);
  126. + memmove(p->dst, p->src, 6);
  127. + memmove(p->src, mac, 6);
  128. + p->maj = htons(shelf);
  129. + p->min = slot;
  130. + p->flags |= Resp;
  131. + if (putpkt(sfd, (uchar *) p, len) == -1) {
  132. + perror("write to network");
  133. + exit(1);
  134. + }
  135. +
  136. +}
  137. +
  138. +// allocate the buffer so that the ata data area
  139. +// is page aligned for o_direct on linux
  140. +
  141. +void *
  142. +bufalloc(void **buf, long len)
  143. +{
  144. + long psize;
  145. + unsigned long n;
  146. +
  147. + psize = sysconf(_SC_PAGESIZE);
  148. + if (psize == -1) {
  149. + perror("sysconf");
  150. + exit(EXIT_FAILURE);
  151. + }
  152. + n = len/psize + 3;
  153. + *buf = malloc(psize * n);
  154. + if (!*buf) {
  155. + perror("malloc");
  156. + exit(EXIT_FAILURE);
  157. + }
  158. + n = (unsigned long) *buf;
  159. + n += psize * 2;
  160. + n &= ~(psize - 1);
  161. + return (void *) (n - sizeof (Ata));
  162. +}
  163. +
  164. +void
  165. +sigio(int signo)
  166. +{
  167. + const char dummy = 0;
  168. + write(queuepipe[1], &dummy, 1);
  169. +}
  170. +
  171. +void
  172. aoe(void)
  173. {
  174. Aoehdr *p;
  175. - uchar *buf;
  176. - int n, sh;
  177. + char dummy;
  178. + int n, place, sh;
  179. enum { bufsz = 1<<16, };
  180. -
  181. - buf = malloc(bufsz);
  182. + sigset_t mask, oldmask;
  183. + struct sigaction sigact;
  184. + struct pollfd pollfds[2];
  185. + void *freeme[Nplaces];
  186. +
  187. + for (n = 0; n < Nplaces; n++) {
  188. + pkt[n] = bufalloc(freeme + n, bufsz);
  189. + pending[n] = 0;
  190. + }
  191. aoead(sfd);
  192. + pipe(queuepipe);
  193. + fcntl(queuepipe[0], F_SETFL, O_NONBLOCK);
  194. + fcntl(queuepipe[1], F_SETFL, O_NONBLOCK);
  195. +
  196. + sigemptyset(&sigact.sa_mask);
  197. + sigact.sa_flags = 0;
  198. + sigact.sa_sigaction = (void *) sigio;
  199. + sigaction(SIGIO, &sigact, NULL);
  200. +
  201. + sigemptyset(&mask);
  202. + sigaddset(&mask, SIGIO);
  203. + sigprocmask(SIG_BLOCK, &mask, &oldmask);
  204. +
  205. + pollfds[0].fd = queuepipe[0];
  206. + pollfds[1].fd = sfd;
  207. + pollfds[0].events = pollfds[1].events = POLLIN;
  208. +
  209. for (;;) {
  210. - n = getpkt(sfd, buf, bufsz);
  211. - if (n < 0) {
  212. + sigprocmask(SIG_SETMASK, &oldmask, NULL);
  213. + n = poll(pollfds, 2, 1000);
  214. + sigprocmask(SIG_BLOCK, &mask, NULL);
  215. +
  216. + if (n < 0 && errno != EINTR) {
  217. + perror("poll");
  218. + continue;
  219. + } else if (n == 0 || pollfds[0].revents & POLLIN) {
  220. + while(read(queuepipe[0], &dummy, 1) > 0);
  221. + for (place = 0; place < Nplaces; place++) {
  222. + if (!pending[place])
  223. + continue;
  224. + if (aio_error(aiocb + place) == EINPROGRESS)
  225. + continue;
  226. + doaoecomplete(place);
  227. + pollfds[1].events = POLLIN;
  228. + }
  229. + }
  230. +
  231. + if ((pollfds[1].revents & POLLIN) == 0)
  232. + continue;
  233. +
  234. + for (place = 0; pending[place] && place < Nplaces; place++);
  235. + if (place >= Nplaces) {
  236. + pollfds[1].events = 0;
  237. + continue;
  238. + }
  239. +
  240. + pktlen[place] = getpkt(sfd, (uchar *) pkt[place], bufsz);
  241. + if (pktlen[place] < 0) {
  242. + if (errno == EINTR)
  243. + continue;
  244. perror("read network");
  245. exit(1);
  246. }
  247. - if (n < sizeof(Aoehdr))
  248. + if (pktlen[place] < sizeof(Aoehdr))
  249. continue;
  250. - p = (Aoehdr *) buf;
  251. + p = (Aoehdr *) pkt[place];
  252. if (ntohs(p->type) != 0x88a2)
  253. continue;
  254. if (p->flags & Resp)
  255. @@ -223,9 +358,10 @@ aoe(void)
  256. continue;
  257. if (nmasks && !maskok(p->src))
  258. continue;
  259. - doaoe(p, n);
  260. + doaoe(place);
  261. }
  262. - free(buf);
  263. + for (place = 0; place < Nplaces; place++)
  264. + free(freeme[place]);
  265. }
  266. void
  267. @@ -317,7 +453,7 @@ main(int argc, char **argv)
  268. }
  269. if (s.st_mode & (S_IWUSR|S_IWGRP|S_IWOTH))
  270. omode = O_RDWR;
  271. - bfd = open(argv[3], omode);
  272. + bfd = opendisk(argv[3], omode);
  273. if (bfd == -1) {
  274. perror("open");
  275. exit(1);
  276. diff -uprN vblade-17.orig/ata.c vblade-17/ata.c
  277. --- vblade-17.orig/ata.c 2008-06-09 10:53:07.000000000 -0400
  278. +++ vblade-17/ata.c 2008-06-09 11:05:23.000000000 -0400
  279. @@ -3,6 +3,8 @@
  280. #include <string.h>
  281. #include <stdio.h>
  282. #include <sys/types.h>
  283. +#include <errno.h>
  284. +#include <aio.h>
  285. #include "dat.h"
  286. #include "fns.h"
  287. @@ -98,7 +100,7 @@ atainit(void)
  288. * check for that.
  289. */
  290. int
  291. -atacmd(Ataregs *p, uchar *dp, int ndp, int payload) // do the ata cmd
  292. +atacmd(Ataregs *p, uchar *dp, int ndp, int payload, struct aiocb *aiocb) // do the ata cmd
  293. {
  294. vlong lba;
  295. ushort *ip;
  296. @@ -155,14 +157,29 @@ atacmd(Ataregs *p, uchar *dp, int ndp, i
  297. return 0;
  298. }
  299. if (p->cmd == 0x20 || p->cmd == 0x24)
  300. - n = getsec(bfd, dp, lba, p->sectors);
  301. + n = getsec(bfd, dp, lba, p->sectors, aiocb);
  302. else {
  303. // packet should be big enough to contain the data
  304. if (payload < 512 * p->sectors)
  305. return -1;
  306. - n = putsec(bfd, dp, lba, p->sectors);
  307. + n = putsec(bfd, dp, lba, p->sectors, aiocb);
  308. }
  309. - n /= 512;
  310. + if (n < 0) {
  311. + p->err = ABRT;
  312. + p->status = ERR|DRDY;
  313. + p->lba += n;
  314. + p->sectors -= n;
  315. + return 0;
  316. + }
  317. + return 1; // callback expected
  318. +}
  319. +
  320. +
  321. +int
  322. +atacmdcomplete(Ataregs *p, struct aiocb *aiocb) // complete the ata cmd
  323. +{
  324. + int n;
  325. + n = aio_return(aiocb) / 512;
  326. if (n != p->sectors) {
  327. p->err = ABRT;
  328. p->status = ERR;
  329. @@ -173,4 +190,3 @@ atacmd(Ataregs *p, uchar *dp, int ndp, i
  330. p->sectors -= n;
  331. return 0;
  332. }
  333. -
  334. diff -uprN vblade-17.orig/dat.h vblade-17/dat.h
  335. --- vblade-17.orig/dat.h 2008-06-09 10:53:07.000000000 -0400
  336. +++ vblade-17/dat.h 2008-06-09 11:05:23.000000000 -0400
  337. @@ -111,6 +111,8 @@ enum {
  338. Nconfig = 1024,
  339. Bufcount = 16,
  340. +
  341. + Nplaces = 32,
  342. };
  343. int shelf, slot;
  344. diff -uprN vblade-17.orig/fns.h vblade-17/fns.h
  345. --- vblade-17.orig/fns.h 2008-06-09 10:53:07.000000000 -0400
  346. +++ vblade-17/fns.h 2008-06-09 11:07:21.000000000 -0400
  347. @@ -15,7 +15,8 @@ int maskok(uchar *);
  348. // ata.c
  349. void atainit(void);
  350. -int atacmd(Ataregs *, uchar *, int, int);
  351. +int atacmd(Ataregs *, uchar *, int, int, struct aiocb *);
  352. +int atacmdcomplete(Ataregs *, struct aiocb *);
  353. // bpf.c
  354. @@ -26,8 +27,9 @@ void free_bpf_program(void *);
  355. int dial(char *);
  356. int getea(int, char *, uchar *);
  357. -int putsec(int, uchar *, vlong, int);
  358. -int getsec(int, uchar *, vlong, int);
  359. +int opendisk(const char *, int);
  360. +int putsec(int, uchar *, vlong, int, struct aiocb *);
  361. +int getsec(int, uchar *, vlong, int, struct aiocb *);
  362. int putpkt(int, uchar *, int);
  363. int getpkt(int, uchar *, int);
  364. vlong getsize(int);
  365. diff -uprN vblade-17.orig/freebsd.c vblade-17/freebsd.c
  366. --- vblade-17.orig/freebsd.c 2008-06-09 10:53:07.000000000 -0400
  367. +++ vblade-17/freebsd.c 2008-06-09 11:05:23.000000000 -0400
  368. @@ -209,19 +209,40 @@ getea(int s, char *eth, uchar *ea)
  369. return(0);
  370. }
  371. -
  372. int
  373. -getsec(int fd, uchar *place, vlong lba, int nsec)
  374. +opendisk(const char *disk, int omode)
  375. {
  376. - return pread(fd, place, nsec * 512, lba * 512);
  377. + return open(disk, omode);
  378. }
  379. int
  380. -putsec(int fd, uchar *place, vlong lba, int nsec)
  381. -{
  382. - return pwrite(fd, place, nsec * 512, lba * 512);
  383. +getsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb)
  384. +{
  385. + bzero((char *) aiocb, sizeof(struct aiocb));
  386. + aiocb->aio_fildes = fd;
  387. + aiocb->aio_buf = place;
  388. + aiocb->aio_nbytes = nsec * 512;
  389. + aiocb->aio_offset = lba * 512;
  390. + aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
  391. + aiocb->aio_sigevent.sigev_signo = SIGIO;
  392. + aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb;
  393. + return aio_read(aiocb);
  394. }
  395. +int
  396. +putsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb)
  397. +{
  398. + bzero((char *) aiocb, sizeof(struct aiocb));
  399. + aiocb->aio_fildes = fd;
  400. + aiocb->aio_buf = place;
  401. + aiocb->aio_nbytes = nsec * 512;
  402. + aiocb->aio_offset = lba * 512;
  403. + aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
  404. + aiocb->aio_sigevent.sigev_signo = SIGIO;
  405. + aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb;
  406. + return aio_write(aiocb);
  407. +}
  408. +
  409. static int pktn = 0;
  410. static uchar *pktbp = NULL;
  411. diff -uprN vblade-17.orig/linux.c vblade-17/linux.c
  412. --- vblade-17.orig/linux.c 2008-06-09 10:53:07.000000000 -0400
  413. +++ vblade-17/linux.c 2008-06-09 11:05:23.000000000 -0400
  414. @@ -1,5 +1,6 @@
  415. // linux.c: low level access routines for Linux
  416. #include "config.h"
  417. +#define _GNU_SOURCE
  418. #include <sys/socket.h>
  419. #include <stdio.h>
  420. #include <string.h>
  421. @@ -22,6 +23,9 @@
  422. #include <netinet/in.h>
  423. #include <linux/fs.h>
  424. #include <sys/stat.h>
  425. +#include <fcntl.h>
  426. +#include <errno.h>
  427. +#include <aio.h>
  428. #include "dat.h"
  429. #include "fns.h"
  430. @@ -29,8 +33,6 @@
  431. int getindx(int, char *);
  432. int getea(int, char *, uchar *);
  433. -
  434. -
  435. int
  436. dial(char *eth) // get us a raw connection to an interface
  437. {
  438. @@ -84,7 +86,7 @@ getea(int s, char *name, uchar *ea)
  439. struct ifreq xx;
  440. int n;
  441. - strcpy(xx.ifr_name, name);
  442. + strcpy(xx.ifr_name, name);
  443. n = ioctl(s, SIOCGIFHWADDR, &xx);
  444. if (n == -1) {
  445. perror("Can't get hw addr");
  446. @@ -110,17 +112,37 @@ getmtu(int s, char *name)
  447. }
  448. int
  449. -getsec(int fd, uchar *place, vlong lba, int nsec)
  450. +opendisk(const char *disk, int omode)
  451. +{
  452. + return open(disk, omode|O_DIRECT);
  453. +}
  454. +
  455. +int
  456. +getsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb)
  457. {
  458. - lseek(fd, lba * 512, 0);
  459. - return read(fd, place, nsec * 512);
  460. + bzero((char *) aiocb, sizeof(struct aiocb));
  461. + aiocb->aio_fildes = fd;
  462. + aiocb->aio_buf = place;
  463. + aiocb->aio_nbytes = nsec * 512;
  464. + aiocb->aio_offset = lba * 512;
  465. + aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
  466. + aiocb->aio_sigevent.sigev_signo = SIGIO;
  467. + aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb;
  468. + return aio_read(aiocb);
  469. }
  470. int
  471. -putsec(int fd, uchar *place, vlong lba, int nsec)
  472. +putsec(int fd, uchar *place, vlong lba, int nsec, struct aiocb *aiocb)
  473. {
  474. - lseek(fd, lba * 512, 0);
  475. - return write(fd, place, nsec * 512);
  476. + bzero((char *) aiocb, sizeof(struct aiocb));
  477. + aiocb->aio_fildes = fd;
  478. + aiocb->aio_buf = place;
  479. + aiocb->aio_nbytes = nsec * 512;
  480. + aiocb->aio_offset = lba * 512;
  481. + aiocb->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
  482. + aiocb->aio_sigevent.sigev_signo = SIGIO;
  483. + aiocb->aio_sigevent.sigev_value.sival_ptr = aiocb;
  484. + return aio_write(aiocb);
  485. }
  486. int
  487. diff -uprN vblade-17.orig/linux.h vblade-17/linux.h
  488. --- vblade-17.orig/linux.h 2008-06-09 10:53:07.000000000 -0400
  489. +++ vblade-17/linux.h 2008-06-09 11:05:23.000000000 -0400
  490. @@ -6,6 +6,6 @@ typedef long long vlong;
  491. int dial(char *);
  492. int getindx(int, char *);
  493. int getea(int, char *, uchar *);
  494. -int getsec(int, uchar *, vlong, int);
  495. -int putsec(int, uchar *, vlong, int);
  496. +int getsec(int, uchar *, vlong, int, struct aiocb *);
  497. +int putsec(int, uchar *, vlong, int, struct aiocb *);
  498. vlong getsize(int);
  499. diff -uprN vblade-17.orig/makefile vblade-17/makefile
  500. --- vblade-17.orig/makefile 2008-06-09 10:53:07.000000000 -0400
  501. +++ vblade-17/makefile 2008-06-09 11:05:23.000000000 -0400
  502. @@ -13,7 +13,7 @@ CFLAGS += -Wall -g -O2
  503. CC = gcc
  504. vblade: $O
  505. - ${CC} -o vblade $O
  506. + ${CC} -lrt -o vblade $O
  507. aoe.o : aoe.c config.h dat.h fns.h makefile
  508. ${CC} ${CFLAGS} -c $<