tree.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. /* $Id: tree.c 1801 2007-04-14 22:08:33Z aturner $ */
  2. /*
  3. * Copyright (c) 2001-2007 Aaron Turner.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. * 3. Neither the names of the copyright owners nor the names of its
  16. * contributors may be used to endorse or promote products derived from
  17. * this software without specific prior written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  20. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  22. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  23. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  25. * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  27. * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  28. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  29. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. */
  31. #include "config.h"
  32. #include "defines.h"
  33. #include "common.h"
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <string.h>
  37. #include "tree.h"
  38. #include "tcpprep.h"
  39. #include "tcpprep_opts.h"
  40. extern tcpr_data_tree_t treeroot;
  41. extern tcpprep_opt_t options;
  42. #ifdef DEBUG
  43. extern int debug;
  44. #endif
  45. /* static buffer used by tree_print*() functions */
  46. char tree_print_buff[TREEPRINTBUFFLEN];
  47. static tcpr_tree_t *new_tree();
  48. static tcpr_tree_t *packet2tree(const u_char *);
  49. static char *tree_print(tcpr_data_tree_t *);
  50. static char *tree_printnode(const char *, const tcpr_tree_t *);
  51. static void tree_buildcidr(tcpr_data_tree_t *, tcpr_buildcidr_t *);
  52. static int tree_checkincidr(tcpr_data_tree_t *, tcpr_buildcidr_t *);
  53. RB_PROTOTYPE(tcpr_data_tree_s, tcpr_tree_s, node, tree_comp)
  54. RB_GENERATE(tcpr_data_tree_s, tcpr_tree_s, node, tree_comp)
  55. /*
  56. * used with rbwalk to walk a tree and generate cidr_t * cidrdata.
  57. * is smart enough to prevent dupes. void * arg is cast to bulidcidr_t
  58. */
  59. void
  60. tree_buildcidr(tcpr_data_tree_t *treeroot, tcpr_buildcidr_t * bcdata)
  61. {
  62. tcpr_tree_t *node = NULL;
  63. tcpr_cidr_t *newcidr = NULL;
  64. unsigned long network = 0;
  65. unsigned long mask = ~0; /* turn on all bits */
  66. dbg(1, "Running: tree_buildcidr()");
  67. RB_FOREACH(node, tcpr_data_tree_s, treeroot) {
  68. /* we only check types that are vaild */
  69. if (bcdata->type != DIR_ANY) /* don't check if we're adding ANY */
  70. if (bcdata->type != node->type) /* no match, exit early */
  71. return;
  72. /*
  73. * in cases of leaves and last visit add to cidrdata if
  74. * necessary
  75. */
  76. dbgx(4, "Checking if %s exists in cidrdata...", get_addr2name4(node->ip, RESOLVE));
  77. if (!check_ip_cidr(options.cidrdata, node->ip)) { /* if we exist, abort */
  78. dbgx(3, "Node %s doesn't exist... creating.",
  79. get_addr2name4(node->ip, RESOLVE));
  80. newcidr = new_cidr();
  81. newcidr->masklen = bcdata->masklen;
  82. network = node->ip & (mask << (32 - bcdata->masklen));
  83. dbgx(3, "Using network: %s",
  84. get_addr2name4(network, RESOLVE));
  85. newcidr->network = network;
  86. add_cidr(&options.cidrdata, &newcidr);
  87. }
  88. }
  89. }
  90. /*
  91. * uses rbwalk to check to see if a given ip address of a given type in the
  92. * tree is inside any of the cidrdata
  93. *
  94. */
  95. static int
  96. tree_checkincidr(tcpr_data_tree_t *treeroot, tcpr_buildcidr_t * bcdata)
  97. {
  98. tcpr_tree_t *node = NULL;
  99. RB_FOREACH(node, tcpr_data_tree_s, treeroot) {
  100. /* we only check types that are vaild */
  101. if (bcdata->type != DIR_ANY) /* don't check if we're adding ANY */
  102. if (bcdata->type != node->type) /* no match, exit early */
  103. return 0;
  104. /*
  105. * in cases of leaves and last visit add to cidrdata if
  106. * necessary
  107. */
  108. if (check_ip_cidr(options.cidrdata, node->ip)) { /* if we exist, abort */
  109. return 1;
  110. }
  111. }
  112. return 0;
  113. }
  114. /*
  115. * processes the tree using rbwalk / tree2cidr to generate a CIDR
  116. * used for 2nd pass, router mode
  117. *
  118. * returns > 0 for success (the mask len), 0 for fail
  119. */
  120. int
  121. process_tree()
  122. {
  123. int mymask = 0;
  124. tcpr_buildcidr_t *bcdata;
  125. dbg(1, "Running: process_tree()");
  126. bcdata = (tcpr_buildcidr_t *)safe_malloc(sizeof(tcpr_buildcidr_t));
  127. for (mymask = options.max_mask; mymask <= options.min_mask; mymask++) {
  128. dbgx(1, "Current mask: %u", mymask);
  129. /* set starting vals */
  130. bcdata->type = DIR_SERVER;
  131. bcdata->masklen = mymask;
  132. /* build cidrdata with servers */
  133. tree_buildcidr(&treeroot, bcdata);
  134. /* calculate types of all IP's */
  135. tree_calculate(&treeroot);
  136. /* try to find clients in cidrdata */
  137. bcdata->type = DIR_CLIENT;
  138. if (! tree_checkincidr(&treeroot, bcdata)) { /* didn't find any clients in cidrdata */
  139. return (mymask); /* success! */
  140. }
  141. else {
  142. destroy_cidr(options.cidrdata); /* clean up after our mess */
  143. options.cidrdata = NULL;
  144. }
  145. }
  146. /* we failed to find a vaild cidr list */
  147. return (0);
  148. }
  149. /*
  150. * processes rbdata to bulid cidrdata based upon the
  151. * given type (SERVER, CLIENT, UNKNOWN) using the given masklen
  152. *
  153. * is smart enough to prevent dupes
  154. void
  155. tcpr_tree_to_cidr(const int masklen, const int type)
  156. {
  157. }
  158. */
  159. /*
  160. * Checks to see if an IP is client or server by finding it in the tree
  161. * returns TCPR_DIR_C2S or TCPR_DIR_S2C or -1 on error
  162. * if mode = UNKNOWN, then abort on unknowns
  163. * if mode = CLIENT, then unknowns become clients
  164. * if mode = SERVER, then unknowns become servers
  165. */
  166. tcpr_dir_t
  167. check_ip_tree(const int mode, const unsigned long ip)
  168. {
  169. tcpr_tree_t *node = NULL, *finder = NULL;
  170. finder = new_tree();
  171. finder->ip = ip;
  172. node = RB_FIND(tcpr_data_tree_s, &treeroot, finder);
  173. if (node == NULL && mode == DIR_UNKNOWN)
  174. errx(1, "%s (%lu) is an unknown system... aborting.!\n"
  175. "Try a different auto mode (-n router|client|server)",
  176. get_addr2name4(ip, RESOLVE), ip);
  177. #ifdef DEBUG
  178. if (node->type == DIR_SERVER) {
  179. dbgx(1, "Server: %s", get_addr2name4(ip, RESOLVE));
  180. }
  181. else if (node->type == DIR_CLIENT) {
  182. dbgx(1, "Client: %s", get_addr2name4(ip, RESOLVE));
  183. }
  184. else {
  185. dbgx(1, "Unknown: %s", get_addr2name4(ip, RESOLVE));
  186. }
  187. #endif
  188. /*
  189. * FIXME: Is this logic correct? I think this might be backwards :(
  190. */
  191. /* return node type if we found the node, else return the default (mode) */
  192. if (node != NULL) {
  193. if (node->type == DIR_SERVER) {
  194. return TCPR_DIR_C2S;
  195. } else if (node->type == DIR_CLIENT) {
  196. return TCPR_DIR_S2C;
  197. }
  198. }
  199. else {
  200. if (mode == DIR_SERVER) {
  201. return TCPR_DIR_C2S;
  202. } else if (mode == DIR_CLIENT) {
  203. return TCPR_DIR_S2C;
  204. }
  205. }
  206. err(1, "Uh, we shouldn't of gotten here.");
  207. return(-1);
  208. }
  209. /*
  210. * adds an entry to the tree (phase 1 of auto mode)
  211. */
  212. void
  213. add_tree(const unsigned long ip, const u_char * data)
  214. {
  215. tcpr_tree_t *node = NULL, *newnode = NULL;
  216. newnode = packet2tree(data);
  217. assert(ip == newnode->ip);
  218. if (newnode->type == DIR_UNKNOWN) {
  219. /* couldn't figure out if packet was client or server */
  220. dbgx(2, "%s (%lu) unknown client/server",
  221. get_addr2name4(newnode->ip, RESOLVE), newnode->ip);
  222. }
  223. /* try to find a simular entry in the tree */
  224. node = RB_FIND(tcpr_data_tree_s, &treeroot, newnode);
  225. dbgx(3, "%s", tree_printnode("add_tree", node));
  226. /* new entry required */
  227. if (node == NULL) {
  228. /* increment counters */
  229. if (newnode->type == DIR_SERVER) {
  230. newnode->server_cnt++;
  231. }
  232. else if (newnode->type == DIR_CLIENT) {
  233. newnode->client_cnt++;
  234. }
  235. /* insert it in */
  236. RB_INSERT(tcpr_data_tree_s, &treeroot, newnode);
  237. }
  238. else {
  239. /* we found something, so update it */
  240. dbgx(2, " node: %p\nnewnode: %p", node, newnode);
  241. dbgx(3, "%s", tree_printnode("update node", node));
  242. /* increment counter */
  243. if (newnode->type == DIR_SERVER) {
  244. node->server_cnt++;
  245. }
  246. else if (newnode->type == DIR_CLIENT) {
  247. /* temp debug code */
  248. node->client_cnt++;
  249. }
  250. /* didn't insert it, so free it */
  251. free(newnode);
  252. }
  253. dbg(2, "------- START NEXT -------");
  254. dbgx(3, "%s", tree_print(&treeroot));
  255. }
  256. /*
  257. * calculates wether an IP is a client, server, or unknown for each node in the tree
  258. */
  259. void
  260. tree_calculate(tcpr_data_tree_t *treeroot)
  261. {
  262. tcpr_tree_t *node;
  263. dbg(1, "Running tree_calculate()");
  264. RB_FOREACH(node, tcpr_data_tree_s, treeroot) {
  265. dbgx(4, "Processing %s", get_addr2name4(node->ip, RESOLVE));
  266. if ((node->server_cnt > 0) || (node->client_cnt > 0)) {
  267. /* type based on: server >= (client*ratio) */
  268. if ((double)node->server_cnt >= (double)node->client_cnt * options.ratio) {
  269. node->type = DIR_SERVER;
  270. dbgx(3, "Setting %s to server",
  271. get_addr2name4(node->ip, RESOLVE));
  272. }
  273. else {
  274. node->type = DIR_CLIENT;
  275. dbgx(3, "Setting %s to client",
  276. get_addr2name4(node->ip, RESOLVE));
  277. }
  278. }
  279. else { /* IP had no client or server connections */
  280. node->type = DIR_UNKNOWN;
  281. dbgx(3, "Setting %s to unknown",
  282. get_addr2name4(node->ip, RESOLVE));
  283. }
  284. }
  285. }
  286. /*
  287. * tree_comp(), called by rbsearch compares two treees and returns:
  288. * 1 = first > second
  289. * -1 = first < second
  290. * 0 = first = second
  291. * based upon the ip address stored
  292. *
  293. */
  294. int
  295. tree_comp(tcpr_tree_t *t1, tcpr_tree_t *t2)
  296. {
  297. if (t1->ip > t2->ip) {
  298. dbgx(2, "%s > %s", get_addr2name4(t1->ip, RESOLVE),
  299. get_addr2name4(t2->ip, RESOLVE));
  300. return 1;
  301. }
  302. if (t1->ip < t2->ip) {
  303. dbgx(2, "%s < %s", get_addr2name4(t1->ip, RESOLVE),
  304. get_addr2name4(t2->ip, RESOLVE));
  305. return -1;
  306. }
  307. dbgx(2, "%s = %s", get_addr2name4(t1->ip, RESOLVE),
  308. get_addr2name4(t2->ip, RESOLVE));
  309. return 0;
  310. }
  311. /*
  312. * creates a new TREE * with reasonable defaults
  313. */
  314. static tcpr_tree_t *
  315. new_tree()
  316. {
  317. tcpr_tree_t *node;
  318. node = (tcpr_tree_t *)safe_malloc(sizeof(tcpr_tree_t));
  319. memset(node, '\0', sizeof(tcpr_tree_t));
  320. node->server_cnt = 0;
  321. node->client_cnt = 0;
  322. node->type = DIR_UNKNOWN;
  323. node->masklen = -1;
  324. node->ip = 0;
  325. return (node);
  326. }
  327. /*
  328. * returns a struct of TREE * from a packet header
  329. * and sets the type to be SERVER or CLIENT or UNKNOWN
  330. * if it's an undefined packet, we return -1 for the type
  331. * the u_char * data should be the data that is passed by pcap_dispatch()
  332. */
  333. tcpr_tree_t *
  334. packet2tree(const u_char * data)
  335. {
  336. tcpr_tree_t *node = NULL;
  337. eth_hdr_t *eth_hdr = NULL;
  338. ipv4_hdr_t ip_hdr;
  339. tcp_hdr_t tcp_hdr;
  340. udp_hdr_t udp_hdr;
  341. icmpv4_hdr_t icmp_hdr;
  342. dnsv4_hdr_t dnsv4_hdr;
  343. node = new_tree();
  344. eth_hdr = (eth_hdr_t *) (data);
  345. /* prevent issues with byte alignment, must memcpy */
  346. memcpy(&ip_hdr, (data + TCPR_ETH_H), TCPR_IPV4_H);
  347. /* copy over the source mac */
  348. strncpy((char *)node->mac, (char *)eth_hdr->ether_shost, 6);
  349. /* copy over the source ip */
  350. node->ip = ip_hdr.ip_src.s_addr;
  351. /*
  352. * TCP
  353. */
  354. if (ip_hdr.ip_p == IPPROTO_TCP) {
  355. dbgx(3, "%s uses TCP... ",
  356. get_addr2name4(ip_hdr.ip_src.s_addr, RESOLVE));
  357. /* memcpy it over to prevent alignment issues */
  358. memcpy(&tcp_hdr, (data + TCPR_ETH_H + (ip_hdr.ip_hl * 4)),
  359. TCPR_TCP_H);
  360. /* ftp-data is going to skew our results so we ignore it */
  361. if (tcp_hdr.th_sport == 20) {
  362. return (node);
  363. }
  364. /* set TREE->type based on TCP flags */
  365. if (tcp_hdr.th_flags == TH_SYN) {
  366. node->type = DIR_CLIENT;
  367. dbg(3, "is a client");
  368. }
  369. else if (tcp_hdr.th_flags == (TH_SYN | TH_ACK)) {
  370. node->type = DIR_SERVER;
  371. dbg(3, "is a server");
  372. }
  373. else {
  374. dbg(3, "is an unknown");
  375. }
  376. /*
  377. * UDP
  378. */
  379. }
  380. else if (ip_hdr.ip_p == IPPROTO_UDP) {
  381. /* memcpy over to prevent alignment issues */
  382. memcpy(&udp_hdr, (data + TCPR_ETH_H + (ip_hdr.ip_hl * 4)),
  383. TCPR_UDP_H);
  384. dbgx(3, "%s uses UDP... ",
  385. get_addr2name4(ip_hdr.ip_src.s_addr, RESOLVE));
  386. switch (ntohs(udp_hdr.uh_dport)) {
  387. case 0x0035: /* dns */
  388. /* prevent memory alignment issues */
  389. memcpy(&dnsv4_hdr,
  390. (data + TCPR_ETH_H + (ip_hdr.ip_hl * 4) + TCPR_UDP_H),
  391. TCPR_DNS_H);
  392. if (dnsv4_hdr.flags & DNS_QUERY_FLAG) {
  393. /* bit set, response */
  394. node->type = DIR_SERVER;
  395. dbg(3, "is a dns server");
  396. }
  397. else {
  398. /* bit not set, query */
  399. node->type = DIR_CLIENT;
  400. dbg(3, "is a dns client");
  401. }
  402. return (node);
  403. break;
  404. default:
  405. break;
  406. }
  407. switch (ntohs(udp_hdr.uh_sport)) {
  408. case 0x0035: /* dns */
  409. /* prevent memory alignment issues */
  410. memcpy(&dnsv4_hdr,
  411. (data + TCPR_ETH_H + (ip_hdr.ip_hl * 4) + TCPR_UDP_H),
  412. TCPR_DNS_H);
  413. if ((dnsv4_hdr.flags & 0x7FFFF) ^ DNS_QUERY_FLAG) {
  414. /* bit set, response */
  415. node->type = DIR_SERVER;
  416. dbg(3, "is a dns server");
  417. }
  418. else {
  419. /* bit not set, query */
  420. node->type = DIR_CLIENT;
  421. dbg(3, "is a dns client");
  422. }
  423. return (node);
  424. break;
  425. default:
  426. dbgx(3, "unknown UDP protocol: %hu->%hu", udp_hdr.uh_sport,
  427. udp_hdr.uh_dport);
  428. break;
  429. }
  430. /*
  431. * ICMP
  432. */
  433. }
  434. else if (ip_hdr.ip_p == IPPROTO_ICMP) {
  435. /* prevent alignment issues */
  436. memcpy(&icmp_hdr, (data + TCPR_ETH_H + (ip_hdr.ip_hl * 4)),
  437. TCPR_ICMPV4_H);
  438. dbgx(3, "%s uses ICMP... ",
  439. get_addr2name4(ip_hdr.ip_src.s_addr, RESOLVE));
  440. /*
  441. * if port unreachable, then source == server, dst == client
  442. */
  443. if ((icmp_hdr.icmp_type == ICMP_UNREACH) &&
  444. (icmp_hdr.icmp_code == ICMP_UNREACH_PORT)) {
  445. node->type = DIR_SERVER;
  446. dbg(3, "is a server with a closed port");
  447. }
  448. }
  449. return (node);
  450. }
  451. /*
  452. * prints out a node of the tree to stderr
  453. */
  454. static char *
  455. tree_printnode(const char *name, const tcpr_tree_t *node)
  456. {
  457. memset(&tree_print_buff, '\0', TREEPRINTBUFFLEN);
  458. if (node == NULL) {
  459. snprintf(tree_print_buff, TREEPRINTBUFFLEN, "%s node is null", name);
  460. }
  461. else {
  462. snprintf(tree_print_buff, TREEPRINTBUFFLEN,
  463. "-- %s: %p\nIP: %s\nMask: %d\nSrvr: %d\nClnt: %d\n",
  464. name, (void *)node, get_addr2name4(node->ip, RESOLVE),
  465. node->masklen, node->server_cnt, node->client_cnt);
  466. if (node->type == DIR_SERVER) {
  467. strlcat(tree_print_buff, "Type: Server\n--\n", TREEPRINTBUFFLEN);
  468. }
  469. else {
  470. strlcat(tree_print_buff, "Type: Client\n--", TREEPRINTBUFFLEN);
  471. }
  472. }
  473. return (tree_print_buff);
  474. }
  475. /*
  476. * prints out the entire tree
  477. */
  478. static char *
  479. tree_print(tcpr_data_tree_t *treeroot)
  480. {
  481. tcpr_tree_t *node = NULL;
  482. memset(&tree_print_buff, '\0', TREEPRINTBUFFLEN);
  483. RB_FOREACH(node, tcpr_data_tree_s, treeroot) {
  484. tree_printnode("my node", node);
  485. }
  486. return (tree_print_buff);
  487. }
  488. /*
  489. Local Variables:
  490. mode:c
  491. indent-tabs-mode:nil
  492. c-basic-offset:4
  493. End:
  494. */