readcdf.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606
  1. /*-
  2. * Copyright (c) 2008 Christos Zoulas
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  15. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  16. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  18. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  19. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  20. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  21. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  22. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  23. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  24. * POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "file.h"
  27. #ifndef lint
  28. FILE_RCSID("@(#)$File: readcdf.c,v 1.53 2015/04/09 20:01:41 christos Exp $")
  29. #endif
  30. #include <assert.h>
  31. #include <stdlib.h>
  32. #include <unistd.h>
  33. #include <string.h>
  34. #include <time.h>
  35. #include <ctype.h>
  36. #include "cdf.h"
  37. #include "magic.h"
  38. #ifndef __arraycount
  39. #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
  40. #endif
  41. #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
  42. static const struct nv {
  43. const char *pattern;
  44. const char *mime;
  45. } app2mime[] = {
  46. { "Word", "msword", },
  47. { "Excel", "vnd.ms-excel", },
  48. { "Powerpoint", "vnd.ms-powerpoint", },
  49. { "Crystal Reports", "x-rpt", },
  50. { "Advanced Installer", "vnd.ms-msi", },
  51. { "InstallShield", "vnd.ms-msi", },
  52. { "Microsoft Patch Compiler", "vnd.ms-msi", },
  53. { "NAnt", "vnd.ms-msi", },
  54. { "Windows Installer", "vnd.ms-msi", },
  55. { NULL, NULL, },
  56. }, name2mime[] = {
  57. { "WordDocument", "msword", },
  58. { "PowerPoint", "vnd.ms-powerpoint", },
  59. { "DigitalSignature", "vnd.ms-msi", },
  60. { NULL, NULL, },
  61. }, name2desc[] = {
  62. { "WordDocument", "Microsoft Office Word",},
  63. { "PowerPoint", "Microsoft PowerPoint", },
  64. { "DigitalSignature", "Microsoft Installer", },
  65. { NULL, NULL, },
  66. };
  67. static const struct cv {
  68. uint64_t clsid[2];
  69. const char *mime;
  70. } clsid2mime[] = {
  71. {
  72. { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
  73. "x-msi",
  74. },
  75. { { 0, 0 },
  76. NULL,
  77. },
  78. }, clsid2desc[] = {
  79. {
  80. { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
  81. "MSI Installer",
  82. },
  83. { { 0, 0 },
  84. NULL,
  85. },
  86. };
  87. private const char *
  88. cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
  89. {
  90. size_t i;
  91. for (i = 0; cv[i].mime != NULL; i++) {
  92. if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
  93. return cv[i].mime;
  94. }
  95. #ifdef CDF_DEBUG
  96. fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0],
  97. clsid[1]);
  98. #endif
  99. return NULL;
  100. }
  101. private const char *
  102. cdf_app_to_mime(const char *vbuf, const struct nv *nv)
  103. {
  104. size_t i;
  105. const char *rv = NULL;
  106. #ifdef USE_C_LOCALE
  107. locale_t old_lc_ctype, c_lc_ctype;
  108. c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
  109. assert(c_lc_ctype != NULL);
  110. old_lc_ctype = uselocale(c_lc_ctype);
  111. assert(old_lc_ctype != NULL);
  112. #endif
  113. for (i = 0; nv[i].pattern != NULL; i++)
  114. if (strcasestr(vbuf, nv[i].pattern) != NULL) {
  115. rv = nv[i].mime;
  116. break;
  117. }
  118. #ifdef CDF_DEBUG
  119. fprintf(stderr, "unknown app %s\n", vbuf);
  120. #endif
  121. #ifdef USE_C_LOCALE
  122. (void)uselocale(old_lc_ctype);
  123. freelocale(c_lc_ctype);
  124. #endif
  125. return rv;
  126. }
  127. private int
  128. cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
  129. size_t count, const cdf_directory_t *root_storage)
  130. {
  131. size_t i;
  132. cdf_timestamp_t tp;
  133. struct timespec ts;
  134. char buf[64];
  135. const char *str = NULL;
  136. const char *s;
  137. int len;
  138. if (!NOTMIME(ms) && root_storage)
  139. str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
  140. clsid2mime);
  141. for (i = 0; i < count; i++) {
  142. cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
  143. switch (info[i].pi_type) {
  144. case CDF_NULL:
  145. break;
  146. case CDF_SIGNED16:
  147. if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
  148. info[i].pi_s16) == -1)
  149. return -1;
  150. break;
  151. case CDF_SIGNED32:
  152. if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
  153. info[i].pi_s32) == -1)
  154. return -1;
  155. break;
  156. case CDF_UNSIGNED32:
  157. if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
  158. info[i].pi_u32) == -1)
  159. return -1;
  160. break;
  161. case CDF_FLOAT:
  162. if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
  163. info[i].pi_f) == -1)
  164. return -1;
  165. break;
  166. case CDF_DOUBLE:
  167. if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
  168. info[i].pi_d) == -1)
  169. return -1;
  170. break;
  171. case CDF_LENGTH32_STRING:
  172. case CDF_LENGTH32_WSTRING:
  173. len = info[i].pi_str.s_len;
  174. if (len > 1) {
  175. char vbuf[1024];
  176. size_t j, k = 1;
  177. if (info[i].pi_type == CDF_LENGTH32_WSTRING)
  178. k++;
  179. s = info[i].pi_str.s_buf;
  180. for (j = 0; j < sizeof(vbuf) && len--; s += k) {
  181. if (*s == '\0')
  182. break;
  183. if (isprint((unsigned char)*s))
  184. vbuf[j++] = *s;
  185. }
  186. if (j == sizeof(vbuf))
  187. --j;
  188. vbuf[j] = '\0';
  189. if (NOTMIME(ms)) {
  190. if (vbuf[0]) {
  191. if (file_printf(ms, ", %s: %s",
  192. buf, vbuf) == -1)
  193. return -1;
  194. }
  195. } else if (str == NULL && info[i].pi_id ==
  196. CDF_PROPERTY_NAME_OF_APPLICATION) {
  197. str = cdf_app_to_mime(vbuf, app2mime);
  198. }
  199. }
  200. break;
  201. case CDF_FILETIME:
  202. tp = info[i].pi_tp;
  203. if (tp != 0) {
  204. char tbuf[64];
  205. if (tp < 1000000000000000LL) {
  206. cdf_print_elapsed_time(tbuf,
  207. sizeof(tbuf), tp);
  208. if (NOTMIME(ms) && file_printf(ms,
  209. ", %s: %s", buf, tbuf) == -1)
  210. return -1;
  211. } else {
  212. char *c, *ec;
  213. cdf_timestamp_to_timespec(&ts, tp);
  214. c = cdf_ctime(&ts.tv_sec, tbuf);
  215. if (c != NULL &&
  216. (ec = strchr(c, '\n')) != NULL)
  217. *ec = '\0';
  218. if (NOTMIME(ms) && file_printf(ms,
  219. ", %s: %s", buf, c) == -1)
  220. return -1;
  221. }
  222. }
  223. break;
  224. case CDF_CLIPBOARD:
  225. break;
  226. default:
  227. return -1;
  228. }
  229. }
  230. if (!NOTMIME(ms)) {
  231. if (str == NULL)
  232. return 0;
  233. if (file_printf(ms, "application/%s", str) == -1)
  234. return -1;
  235. }
  236. return 1;
  237. }
  238. private int
  239. cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
  240. const cdf_stream_t *sst)
  241. {
  242. cdf_catalog_t *cat;
  243. size_t i;
  244. char buf[256];
  245. cdf_catalog_entry_t *ce;
  246. if (NOTMIME(ms)) {
  247. if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
  248. return -1;
  249. if (cdf_unpack_catalog(h, sst, &cat) == -1)
  250. return -1;
  251. ce = cat->cat_e;
  252. /* skip first entry since it has a , or paren */
  253. for (i = 1; i < cat->cat_num; i++)
  254. if (file_printf(ms, "%s%s",
  255. cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
  256. i == cat->cat_num - 1 ? "]" : ", ") == -1) {
  257. free(cat);
  258. return -1;
  259. }
  260. free(cat);
  261. } else {
  262. if (file_printf(ms, "application/CDFV2") == -1)
  263. return -1;
  264. }
  265. return 1;
  266. }
  267. private int
  268. cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
  269. const cdf_stream_t *sst, const cdf_directory_t *root_storage)
  270. {
  271. cdf_summary_info_header_t si;
  272. cdf_property_info_t *info;
  273. size_t count;
  274. int m;
  275. if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
  276. return -1;
  277. if (NOTMIME(ms)) {
  278. const char *str;
  279. if (file_printf(ms, "Composite Document File V2 Document")
  280. == -1)
  281. return -1;
  282. if (file_printf(ms, ", %s Endian",
  283. si.si_byte_order == 0xfffe ? "Little" : "Big") == -1)
  284. return -2;
  285. switch (si.si_os) {
  286. case 2:
  287. if (file_printf(ms, ", Os: Windows, Version %d.%d",
  288. si.si_os_version & 0xff,
  289. (uint32_t)si.si_os_version >> 8) == -1)
  290. return -2;
  291. break;
  292. case 1:
  293. if (file_printf(ms, ", Os: MacOS, Version %d.%d",
  294. (uint32_t)si.si_os_version >> 8,
  295. si.si_os_version & 0xff) == -1)
  296. return -2;
  297. break;
  298. default:
  299. if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
  300. si.si_os_version & 0xff,
  301. (uint32_t)si.si_os_version >> 8) == -1)
  302. return -2;
  303. break;
  304. }
  305. if (root_storage) {
  306. str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
  307. clsid2desc);
  308. if (str) {
  309. if (file_printf(ms, ", %s", str) == -1)
  310. return -2;
  311. }
  312. }
  313. }
  314. m = cdf_file_property_info(ms, info, count, root_storage);
  315. free(info);
  316. return m == -1 ? -2 : m;
  317. }
  318. #ifdef notdef
  319. private char *
  320. format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
  321. snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
  322. PRIx64 "-%.12" PRIx64,
  323. (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
  324. (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
  325. (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL,
  326. (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
  327. (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL);
  328. return buf;
  329. }
  330. #endif
  331. private int
  332. cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
  333. const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
  334. const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
  335. {
  336. int i;
  337. if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
  338. dir, "Catalog", scn)) == -1)
  339. return i;
  340. #ifdef CDF_DEBUG
  341. cdf_dump_catalog(&h, &scn);
  342. #endif
  343. if ((i = cdf_file_catalog(ms, h, scn)) == -1)
  344. return -1;
  345. return i;
  346. }
  347. private struct sinfo {
  348. const char *name;
  349. const char *mime;
  350. const char *sections[5];
  351. const int types[5];
  352. } sectioninfo[] = {
  353. { "Encrypted", "encrypted",
  354. {
  355. "EncryptedPackage", NULL, NULL, NULL, NULL,
  356. },
  357. {
  358. CDF_DIR_TYPE_USER_STREAM, 0, 0, 0, 0,
  359. },
  360. },
  361. { "QuickBooks", "quickbooks",
  362. {
  363. #if 0
  364. "TaxForms", "PDFTaxForms", "modulesInBackup",
  365. #endif
  366. "mfbu_header", NULL, NULL, NULL, NULL,
  367. },
  368. {
  369. #if 0
  370. CDF_DIR_TYPE_USER_STORAGE,
  371. CDF_DIR_TYPE_USER_STORAGE,
  372. CDF_DIR_TYPE_USER_STREAM,
  373. #endif
  374. CDF_DIR_TYPE_USER_STREAM,
  375. 0, 0, 0, 0
  376. },
  377. },
  378. };
  379. private int
  380. cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
  381. {
  382. size_t sd, j;
  383. for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
  384. const struct sinfo *si = &sectioninfo[sd];
  385. for (j = 0; si->sections[j]; j++) {
  386. if (cdf_find_stream(dir, si->sections[j], si->types[j])
  387. <= 0) {
  388. #ifdef CDF_DEBUG
  389. fprintf(stderr, "Can't read %s\n",
  390. si->sections[j]);
  391. #endif
  392. break;
  393. }
  394. }
  395. if (si->sections[j] != NULL)
  396. continue;
  397. if (NOTMIME(ms)) {
  398. if (file_printf(ms, "CDFV2 %s", si->name) == -1)
  399. return -1;
  400. } else {
  401. if (file_printf(ms, "application/CDFV2-%s",
  402. si->mime) == -1)
  403. return -1;
  404. }
  405. return 1;
  406. }
  407. return -1;
  408. }
  409. protected int
  410. file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
  411. size_t nbytes)
  412. {
  413. cdf_info_t info;
  414. cdf_header_t h;
  415. cdf_sat_t sat, ssat;
  416. cdf_stream_t sst, scn;
  417. cdf_dir_t dir;
  418. int i;
  419. const char *expn = "";
  420. const cdf_directory_t *root_storage;
  421. info.i_fd = fd;
  422. info.i_buf = buf;
  423. info.i_len = nbytes;
  424. if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
  425. return 0;
  426. if (cdf_read_header(&info, &h) == -1)
  427. return 0;
  428. #ifdef CDF_DEBUG
  429. cdf_dump_header(&h);
  430. #endif
  431. if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
  432. expn = "Can't read SAT";
  433. goto out0;
  434. }
  435. #ifdef CDF_DEBUG
  436. cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
  437. #endif
  438. if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
  439. expn = "Can't read SSAT";
  440. goto out1;
  441. }
  442. #ifdef CDF_DEBUG
  443. cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
  444. #endif
  445. if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
  446. expn = "Can't read directory";
  447. goto out2;
  448. }
  449. if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
  450. &root_storage)) == -1) {
  451. expn = "Cannot read short stream";
  452. goto out3;
  453. }
  454. #ifdef CDF_DEBUG
  455. cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
  456. #endif
  457. #ifdef notdef
  458. if (root_storage) {
  459. if (NOTMIME(ms)) {
  460. char clsbuf[128];
  461. if (file_printf(ms, "CLSID %s, ",
  462. format_clsid(clsbuf, sizeof(clsbuf),
  463. root_storage->d_storage_uuid)) == -1)
  464. return -1;
  465. }
  466. }
  467. #endif
  468. if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
  469. "FileHeader", &scn)) != -1) {
  470. #define HWP5_SIGNATURE "HWP Document File"
  471. if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
  472. && memcmp(scn.sst_tab, HWP5_SIGNATURE,
  473. sizeof(HWP5_SIGNATURE) - 1) == 0) {
  474. if (NOTMIME(ms)) {
  475. if (file_printf(ms,
  476. "Hangul (Korean) Word Processor File 5.x") == -1)
  477. return -1;
  478. } else {
  479. if (file_printf(ms, "application/x-hwp") == -1)
  480. return -1;
  481. }
  482. i = 1;
  483. goto out5;
  484. } else {
  485. free(scn.sst_tab);
  486. scn.sst_tab = NULL;
  487. scn.sst_len = 0;
  488. scn.sst_dirlen = 0;
  489. }
  490. }
  491. if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
  492. &scn)) == -1) {
  493. if (errno != ESRCH) {
  494. expn = "Cannot read summary info";
  495. goto out4;
  496. }
  497. i = cdf_file_catalog_info(ms, &info, &h, &sat, &ssat, &sst,
  498. &dir, &scn);
  499. if (i > 0)
  500. goto out4;
  501. i = cdf_file_dir_info(ms, &dir);
  502. if (i < 0)
  503. expn = "Cannot read section info";
  504. goto out4;
  505. }
  506. #ifdef CDF_DEBUG
  507. cdf_dump_summary_info(&h, &scn);
  508. #endif
  509. if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
  510. expn = "Can't expand summary_info";
  511. if (i == 0) {
  512. const char *str = NULL;
  513. cdf_directory_t *d;
  514. char name[__arraycount(d->d_name)];
  515. size_t j, k;
  516. for (j = 0; str == NULL && j < dir.dir_len; j++) {
  517. d = &dir.dir_tab[j];
  518. for (k = 0; k < sizeof(name); k++)
  519. name[k] = (char)cdf_tole2(d->d_name[k]);
  520. str = cdf_app_to_mime(name,
  521. NOTMIME(ms) ? name2desc : name2mime);
  522. }
  523. if (NOTMIME(ms)) {
  524. if (str != NULL) {
  525. if (file_printf(ms, "%s", str) == -1)
  526. return -1;
  527. i = 1;
  528. }
  529. } else {
  530. if (str == NULL)
  531. str = "vnd.ms-office";
  532. if (file_printf(ms, "application/%s", str) == -1)
  533. return -1;
  534. i = 1;
  535. }
  536. }
  537. out5:
  538. free(scn.sst_tab);
  539. out4:
  540. free(sst.sst_tab);
  541. out3:
  542. free(dir.dir_tab);
  543. out2:
  544. free(ssat.sat_tab);
  545. out1:
  546. free(sat.sat_tab);
  547. out0:
  548. if (i == -1) {
  549. if (NOTMIME(ms)) {
  550. if (file_printf(ms,
  551. "Composite Document File V2 Document") == -1)
  552. return -1;
  553. if (*expn)
  554. if (file_printf(ms, ", %s", expn) == -1)
  555. return -1;
  556. } else {
  557. if (file_printf(ms, "application/CDFV2-unknown") == -1)
  558. return -1;
  559. }
  560. i = 1;
  561. }
  562. return i;
  563. }