io.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. /*
  2. * This program is free software; you can redistribute it and/or modify
  3. * it under the terms of the GNU General Public License as published by
  4. * the Free Software Foundation; either version 2 of the License, or
  5. * (at your option) any later version.
  6. * Please read the file COPYING, README and AUTHORS for more information.
  7. *
  8. * I/O abstraction interface.
  9. * Copyright (c) 2005 Florian Westphal (westphal@foo.fh-furtwangen.de)
  10. *
  11. */
  12. #include "portab.h"
  13. static char UNUSED id[] = "$Id: io.c,v 1.16 2006/07/23 23:11:44 alex Exp $";
  14. #include <assert.h>
  15. #include <stdlib.h>
  16. #include <string.h>
  17. #include <sys/time.h>
  18. #include <sys/types.h>
  19. #include <unistd.h>
  20. #include <fcntl.h>
  21. #include "array.h"
  22. #include "io.h"
  23. #include "log.h"
  24. /* Enables extra debug messages in event add/delete/callback code. */
  25. /* #define DEBUG_IO */
  26. typedef struct {
  27. void (*callback)(int, short);
  28. short what;
  29. } io_event;
  30. #define INIT_IOEVENT { NULL, -1, 0, NULL }
  31. #define IO_ERROR 4
  32. #ifdef HAVE_EPOLL_CREATE
  33. #define IO_USE_EPOLL 1
  34. #else
  35. # ifdef HAVE_KQUEUE
  36. #define IO_USE_KQUEUE 1
  37. # else
  38. #define IO_USE_SELECT 1
  39. #endif
  40. #endif
  41. static bool library_initialized;
  42. #ifdef IO_USE_EPOLL
  43. #include <sys/epoll.h>
  44. static int io_masterfd;
  45. static bool io_event_change_epoll(int fd, short what, const int action);
  46. static int io_dispatch_epoll(struct timeval *tv);
  47. #endif
  48. #ifdef IO_USE_KQUEUE
  49. #include <sys/types.h>
  50. #include <sys/event.h>
  51. static array io_evcache;
  52. static int io_masterfd;
  53. static int io_dispatch_kqueue(struct timeval *tv);
  54. static bool io_event_change_kqueue(int, short, const int action);
  55. #endif
  56. #ifdef IO_USE_SELECT
  57. #include "defines.h" /* for conn.h */
  58. #include "conn.h" /* for CONN_IDX (needed by resolve.h) */
  59. #include "resolve.h" /* for RES_STAT (needed by conf.h) */
  60. #include "conf.h" /* for Conf_MaxConnections */
  61. static fd_set readers;
  62. static fd_set writers;
  63. static int select_maxfd; /* the select() interface sucks badly */
  64. static int io_dispatch_select(struct timeval *tv);
  65. #endif
  66. static array io_events;
  67. static void io_docallback PARAMS((int fd, short what));
  68. static io_event *
  69. io_event_get(int fd)
  70. {
  71. io_event *i;
  72. assert(fd >= 0);
  73. i = (io_event *) array_get(&io_events, sizeof(io_event), (size_t) fd);
  74. assert(i != NULL);
  75. return i;
  76. }
  77. bool
  78. io_library_init(unsigned int eventsize)
  79. {
  80. #if defined(IO_USE_EPOLL) || defined(IO_USE_KQUEUE)
  81. bool ret;
  82. #endif
  83. #ifdef IO_USE_EPOLL
  84. int ecreate_hint = (int)eventsize;
  85. if (ecreate_hint <= 0)
  86. ecreate_hint = 128;
  87. #endif
  88. if (library_initialized)
  89. return true;
  90. #ifdef IO_USE_SELECT
  91. #ifdef FD_SETSIZE
  92. if (eventsize >= FD_SETSIZE)
  93. eventsize = FD_SETSIZE - 1;
  94. #endif
  95. #endif
  96. if ((eventsize > 0) && !array_alloc(&io_events, sizeof(io_event), (size_t)eventsize))
  97. eventsize = 0;
  98. #ifdef IO_USE_EPOLL
  99. io_masterfd = epoll_create(ecreate_hint);
  100. Log(LOG_INFO,
  101. "IO subsystem: epoll (hint size %d, initial maxfd %u, masterfd %d).",
  102. ecreate_hint, eventsize, io_masterfd);
  103. ret = io_masterfd >= 0;
  104. if (ret) library_initialized = true;
  105. return ret;
  106. #endif
  107. #ifdef IO_USE_SELECT
  108. Log(LOG_INFO, "IO subsystem: select (initial maxfd %u).",
  109. eventsize);
  110. FD_ZERO(&readers);
  111. FD_ZERO(&writers);
  112. #ifdef FD_SETSIZE
  113. if (Conf_MaxConnections >= (int)FD_SETSIZE) {
  114. Log(LOG_WARNING,
  115. "MaxConnections (%d) exceeds limit (%u), changed MaxConnections to %u.",
  116. Conf_MaxConnections, FD_SETSIZE, FD_SETSIZE - 1);
  117. Conf_MaxConnections = FD_SETSIZE - 1;
  118. }
  119. #else
  120. Log(LOG_WARNING,
  121. "FD_SETSIZE undefined, don't know how many descriptors select() can handle on your platform ...");
  122. #endif /* FD_SETSIZE */
  123. library_initialized = true;
  124. return true;
  125. #endif /* SELECT */
  126. #ifdef IO_USE_KQUEUE
  127. io_masterfd = kqueue();
  128. Log(LOG_INFO,
  129. "IO subsystem: kqueue (initial maxfd %u, masterfd %d)",
  130. eventsize, io_masterfd);
  131. ret = io_masterfd >= 0;
  132. if (ret) library_initialized = true;
  133. return ret;
  134. #endif
  135. }
  136. void
  137. io_library_shutdown(void)
  138. {
  139. #ifdef IO_USE_SELECT
  140. FD_ZERO(&readers);
  141. FD_ZERO(&writers);
  142. #else
  143. close(io_masterfd); /* kqueue, epoll */
  144. io_masterfd = -1;
  145. #endif
  146. #ifdef IO_USE_KQUEUE
  147. array_free(&io_evcache);
  148. #endif
  149. library_initialized = false;
  150. }
  151. bool
  152. io_event_setcb(int fd, void (*cbfunc) (int, short))
  153. {
  154. io_event *i = io_event_get(fd);
  155. if (!i)
  156. return false;
  157. i->callback = cbfunc;
  158. return true;
  159. }
  160. bool
  161. io_event_create(int fd, short what, void (*cbfunc) (int, short))
  162. {
  163. bool ret;
  164. io_event *i;
  165. assert(fd >= 0);
  166. #ifdef IO_USE_SELECT
  167. #ifdef FD_SETSIZE
  168. if (fd >= FD_SETSIZE) {
  169. Log(LOG_ERR,
  170. "fd %d exceeds FD_SETSIZE (%u) (select can't handle more file descriptors)",
  171. fd, FD_SETSIZE);
  172. return false;
  173. }
  174. #endif /* FD_SETSIZE */
  175. #endif /* IO_USE_SELECT */
  176. i = (io_event *) array_alloc(&io_events, sizeof(io_event), (size_t) fd);
  177. if (!i) {
  178. Log(LOG_WARNING,
  179. "array_alloc failed: could not allocate space for %d io_event structures",
  180. fd);
  181. return false;
  182. }
  183. i->callback = cbfunc;
  184. i->what = 0;
  185. #ifdef IO_USE_EPOLL
  186. ret = io_event_change_epoll(fd, what, EPOLL_CTL_ADD);
  187. #endif
  188. #ifdef IO_USE_KQUEUE
  189. ret = io_event_change_kqueue(fd, what, EV_ADD|EV_ENABLE);
  190. #endif
  191. #ifdef IO_USE_SELECT
  192. ret = io_event_add(fd, what);
  193. #endif
  194. if (ret) i->what = what;
  195. return ret;
  196. }
  197. #ifdef IO_USE_EPOLL
  198. static bool
  199. io_event_change_epoll(int fd, short what, const int action)
  200. {
  201. struct epoll_event ev = { 0, {0} };
  202. ev.data.fd = fd;
  203. if (what & IO_WANTREAD)
  204. ev.events = EPOLLIN | EPOLLPRI;
  205. if (what & IO_WANTWRITE)
  206. ev.events |= EPOLLOUT;
  207. return epoll_ctl(io_masterfd, action, fd, &ev) == 0;
  208. }
  209. #endif
  210. #ifdef IO_USE_KQUEUE
  211. static bool
  212. io_event_kqueue_commit_cache(void)
  213. {
  214. struct kevent *events;
  215. bool ret;
  216. int len = (int) array_length(&io_evcache, sizeof (struct kevent));
  217. if (!len) /* nothing to do */
  218. return true;
  219. assert(len>0);
  220. if (len < 0) {
  221. array_free(&io_evcache);
  222. return false;
  223. }
  224. events = array_start(&io_evcache);
  225. assert(events != NULL);
  226. ret = kevent(io_masterfd, events, len, NULL, 0, NULL) == 0;
  227. if (ret)
  228. array_trunc(&io_evcache);
  229. return ret;
  230. }
  231. static bool
  232. io_event_change_kqueue(int fd, short what, const int action)
  233. {
  234. struct kevent kev;
  235. bool ret = true;
  236. if (what & IO_WANTREAD) {
  237. EV_SET(&kev, fd, EVFILT_READ, action, 0, 0, 0);
  238. ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
  239. if (!ret)
  240. ret = kevent(io_masterfd, &kev,1, NULL, 0, NULL) == 0;
  241. }
  242. if (ret && (what & IO_WANTWRITE)) {
  243. EV_SET(&kev, fd, EVFILT_WRITE, action, 0, 0, 0);
  244. ret = array_catb(&io_evcache, (char*) &kev, sizeof (kev));
  245. if (!ret)
  246. ret = kevent(io_masterfd, &kev, 1, NULL, 0, NULL) == 0;
  247. }
  248. if (array_length(&io_evcache, sizeof kev) >= 100)
  249. io_event_kqueue_commit_cache();
  250. return ret;
  251. }
  252. #endif
  253. bool
  254. io_event_add(int fd, short what)
  255. {
  256. io_event *i = io_event_get(fd);
  257. if (!i) return false;
  258. if (i->what == what) return true;
  259. #ifdef DEBUG_IO
  260. Log(LOG_DEBUG, "io_event_add(): fd %d (arg: %d), what %d.", i->fd, fd, what);
  261. #endif
  262. i->what |= what;
  263. #ifdef IO_USE_EPOLL
  264. return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
  265. #endif
  266. #ifdef IO_USE_KQUEUE
  267. return io_event_change_kqueue(fd, what, EV_ADD | EV_ENABLE);
  268. #endif
  269. #ifdef IO_USE_SELECT
  270. if (fd > select_maxfd)
  271. select_maxfd = fd;
  272. if (what & IO_WANTREAD)
  273. FD_SET(fd, &readers);
  274. if (what & IO_WANTWRITE)
  275. FD_SET(fd, &writers);
  276. return true;
  277. #endif
  278. }
  279. bool
  280. io_setnonblock(int fd)
  281. {
  282. int flags = fcntl(fd, F_GETFL);
  283. if (flags == -1)
  284. return false;
  285. #ifndef O_NONBLOCK
  286. #define O_NONBLOCK O_NDELAY
  287. #endif
  288. flags |= O_NONBLOCK;
  289. return fcntl(fd, F_SETFL, flags) == 0;
  290. }
  291. bool
  292. io_close(int fd)
  293. {
  294. io_event *i;
  295. #ifdef IO_USE_SELECT
  296. FD_CLR(fd, &writers);
  297. FD_CLR(fd, &readers);
  298. if (fd == select_maxfd) {
  299. while (select_maxfd>0) {
  300. --select_maxfd; /* find largest fd */
  301. i = io_event_get(select_maxfd);
  302. if (i && i->callback) break;
  303. }
  304. }
  305. #endif
  306. i = io_event_get(fd);
  307. #ifdef IO_USE_KQUEUE
  308. if (array_length(&io_evcache, sizeof (struct kevent))) /* pending data in cache? */
  309. io_event_kqueue_commit_cache();
  310. /* both kqueue and epoll remove fd from all sets automatically on the last close
  311. * of the descriptor. since we don't know if this is the last close we'll have
  312. * to remove the set explicitly. */
  313. if (i) {
  314. io_event_change_kqueue(fd, i->what, EV_DELETE);
  315. io_event_kqueue_commit_cache();
  316. }
  317. #endif
  318. #ifdef IO_USE_EPOLL
  319. io_event_change_epoll(fd, 0, EPOLL_CTL_DEL);
  320. #endif
  321. if (i) {
  322. i->callback = NULL;
  323. i->what = 0;
  324. }
  325. return close(fd) == 0;
  326. }
  327. bool
  328. io_event_del(int fd, short what)
  329. {
  330. io_event *i = io_event_get(fd);
  331. #ifdef DEBUG_IO
  332. Log(LOG_DEBUG, "io_event_del(): trying to delete eventtype %d on fd %d", what, fd);
  333. #endif
  334. if (!i) return false;
  335. i->what &= ~what;
  336. #ifdef IO_USE_EPOLL
  337. return io_event_change_epoll(fd, i->what, EPOLL_CTL_MOD);
  338. #endif
  339. #ifdef IO_USE_KQUEUE
  340. return io_event_change_kqueue(fd, what, EV_DISABLE);
  341. #endif
  342. #ifdef IO_USE_SELECT
  343. if (what & IO_WANTWRITE)
  344. FD_CLR(fd, &writers);
  345. if (what & IO_WANTREAD)
  346. FD_CLR(fd, &readers);
  347. return true;
  348. #endif
  349. }
  350. #ifdef IO_USE_SELECT
  351. static int
  352. io_dispatch_select(struct timeval *tv)
  353. {
  354. fd_set readers_tmp = readers;
  355. fd_set writers_tmp = writers;
  356. short what;
  357. int ret, i;
  358. int fds_ready;
  359. ret = select(select_maxfd + 1, &readers_tmp, &writers_tmp, NULL, tv);
  360. if (ret <= 0)
  361. return ret;
  362. fds_ready = ret;
  363. for (i = 0; i <= select_maxfd; i++) {
  364. what = 0;
  365. if (FD_ISSET(i, &readers_tmp)) {
  366. what = IO_WANTREAD;
  367. fds_ready--;
  368. }
  369. if (FD_ISSET(i, &writers_tmp)) {
  370. what |= IO_WANTWRITE;
  371. fds_ready--;
  372. }
  373. if (what)
  374. io_docallback(i, what);
  375. if (fds_ready <= 0)
  376. break;
  377. }
  378. return ret;
  379. }
  380. #endif
  381. #ifdef IO_USE_EPOLL
  382. static int
  383. io_dispatch_epoll(struct timeval *tv)
  384. {
  385. time_t sec = tv->tv_sec * 1000;
  386. int i, total = 0, ret, timeout = tv->tv_usec + sec;
  387. struct epoll_event epoll_ev[100];
  388. short type;
  389. if (timeout < 0)
  390. timeout = 1000;
  391. do {
  392. ret = epoll_wait(io_masterfd, epoll_ev, 100, timeout);
  393. total += ret;
  394. if (ret <= 0)
  395. return total;
  396. for (i = 0; i < ret; i++) {
  397. type = 0;
  398. if (epoll_ev[i].events & (EPOLLERR | EPOLLHUP))
  399. type = IO_ERROR;
  400. if (epoll_ev[i].events & (EPOLLIN | EPOLLPRI))
  401. type |= IO_WANTREAD;
  402. if (epoll_ev[i].events & EPOLLOUT)
  403. type |= IO_WANTWRITE;
  404. io_docallback(epoll_ev[i].data.fd, type);
  405. }
  406. timeout = 0;
  407. } while (ret == 100);
  408. return total;
  409. }
  410. #endif
  411. #ifdef IO_USE_KQUEUE
  412. static int
  413. io_dispatch_kqueue(struct timeval *tv)
  414. {
  415. int i, total = 0, ret;
  416. struct kevent kev[100];
  417. struct kevent *newevents;
  418. struct timespec ts;
  419. int newevents_len;
  420. ts.tv_sec = tv->tv_sec;
  421. ts.tv_nsec = tv->tv_usec * 1000;
  422. do {
  423. newevents_len = (int) array_length(&io_evcache, sizeof (struct kevent));
  424. newevents = (newevents_len > 0) ? array_start(&io_evcache) : NULL;
  425. assert(newevents_len >= 0);
  426. if (newevents_len < 0)
  427. newevents_len = 0;
  428. #ifdef DEBUG
  429. if (newevents_len)
  430. assert(newevents != NULL);
  431. #endif
  432. ret = kevent(io_masterfd, newevents, newevents_len, kev,
  433. 100, &ts);
  434. if ((newevents_len>0) && ret != -1)
  435. array_trunc(&io_evcache);
  436. total += ret;
  437. if (ret <= 0)
  438. return total;
  439. for (i = 0; i < ret; i++) {
  440. if (kev[i].flags & EV_EOF) {
  441. #ifdef DEBUG
  442. LogDebug("kev.flag has EV_EOF set, setting IO_ERROR",
  443. kev[i].filter, kev[i].ident);
  444. #endif
  445. io_docallback((int)kev[i].ident, IO_ERROR);
  446. continue;
  447. }
  448. switch (kev[i].filter) {
  449. case EVFILT_READ:
  450. io_docallback((int)kev[i].ident, IO_WANTREAD);
  451. break;
  452. case EVFILT_WRITE:
  453. io_docallback((int)kev[i].ident, IO_WANTWRITE);
  454. break;
  455. default:
  456. #ifdef DEBUG
  457. LogDebug("Unknown kev.filter number %d for fd %d",
  458. kev[i].filter, kev[i].ident); /* Fall through */
  459. #endif
  460. case EV_ERROR:
  461. io_docallback((int)kev[i].ident, IO_ERROR);
  462. break;
  463. }
  464. }
  465. ts.tv_sec = 0;
  466. ts.tv_nsec = 0;
  467. } while (ret == 100);
  468. return total;
  469. }
  470. #endif
  471. int
  472. io_dispatch(struct timeval *tv)
  473. {
  474. #ifdef IO_USE_SELECT
  475. return io_dispatch_select(tv);
  476. #endif
  477. #ifdef IO_USE_KQUEUE
  478. return io_dispatch_kqueue(tv);
  479. #endif
  480. #ifdef IO_USE_EPOLL
  481. return io_dispatch_epoll(tv);
  482. #endif
  483. }
  484. /* call the callback function inside the struct matching fd */
  485. static void
  486. io_docallback(int fd, short what)
  487. {
  488. io_event *i;
  489. #ifdef DEBUG_IO
  490. Log(LOG_DEBUG, "doing callback for fd %d, what %d", fd, what);
  491. #endif
  492. i = io_event_get(fd);
  493. if (i->callback) { /* callback might be NULL if a previous callback function
  494. called io_close on this fd */
  495. i->callback(fd, (what & IO_ERROR) ? i->what : what);
  496. }
  497. /* if error indicator is set, we return the event(s) that were registered */
  498. }