readcdf.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. /*-
  2. * Copyright (c) 2008 Christos Zoulas
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  15. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  16. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  18. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  19. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  20. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  21. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  22. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  23. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  24. * POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "file.h"
  27. #ifndef lint
  28. FILE_RCSID("@(#)$File: readcdf.c,v 1.48 2014/09/10 18:41:51 christos Exp $")
  29. #endif
  30. #include <assert.h>
  31. #include <stdlib.h>
  32. #include <unistd.h>
  33. #include <string.h>
  34. #include <time.h>
  35. #include <ctype.h>
  36. #include "cdf.h"
  37. #include "magic.h"
  38. #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
  39. static const struct nv {
  40. const char *pattern;
  41. const char *mime;
  42. } app2mime[] = {
  43. { "Word", "msword", },
  44. { "Excel", "vnd.ms-excel", },
  45. { "Powerpoint", "vnd.ms-powerpoint", },
  46. { "Crystal Reports", "x-rpt", },
  47. { "Advanced Installer", "vnd.ms-msi", },
  48. { "InstallShield", "vnd.ms-msi", },
  49. { "Microsoft Patch Compiler", "vnd.ms-msi", },
  50. { "NAnt", "vnd.ms-msi", },
  51. { "Windows Installer", "vnd.ms-msi", },
  52. { NULL, NULL, },
  53. }, name2mime[] = {
  54. { "WordDocument", "msword", },
  55. { "PowerPoint", "vnd.ms-powerpoint", },
  56. { "DigitalSignature", "vnd.ms-msi", },
  57. { NULL, NULL, },
  58. }, name2desc[] = {
  59. { "WordDocument", "Microsoft Office Word",},
  60. { "PowerPoint", "Microsoft PowerPoint", },
  61. { "DigitalSignature", "Microsoft Installer", },
  62. { NULL, NULL, },
  63. };
  64. static const struct cv {
  65. uint64_t clsid[2];
  66. const char *mime;
  67. } clsid2mime[] = {
  68. {
  69. { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
  70. "x-msi",
  71. },
  72. { { 0, 0 },
  73. NULL,
  74. },
  75. }, clsid2desc[] = {
  76. {
  77. { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
  78. "MSI Installer",
  79. },
  80. { { 0, 0 },
  81. NULL,
  82. },
  83. };
  84. private const char *
  85. cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
  86. {
  87. size_t i;
  88. for (i = 0; cv[i].mime != NULL; i++) {
  89. if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
  90. return cv[i].mime;
  91. }
  92. return NULL;
  93. }
  94. private const char *
  95. cdf_app_to_mime(const char *vbuf, const struct nv *nv)
  96. {
  97. size_t i;
  98. const char *rv = NULL;
  99. #ifdef USE_C_LOCALE
  100. locale_t old_lc_ctype, c_lc_ctype;
  101. c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
  102. assert(c_lc_ctype != NULL);
  103. old_lc_ctype = uselocale(c_lc_ctype);
  104. assert(old_lc_ctype != NULL);
  105. #endif
  106. for (i = 0; nv[i].pattern != NULL; i++)
  107. if (strcasestr(vbuf, nv[i].pattern) != NULL) {
  108. rv = nv[i].mime;
  109. break;
  110. }
  111. #ifdef USE_C_LOCALE
  112. (void)uselocale(old_lc_ctype);
  113. freelocale(c_lc_ctype);
  114. #endif
  115. return rv;
  116. }
  117. private int
  118. cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
  119. size_t count, const cdf_directory_t *root_storage)
  120. {
  121. size_t i;
  122. cdf_timestamp_t tp;
  123. struct timespec ts;
  124. char buf[64];
  125. const char *str = NULL;
  126. const char *s;
  127. int len;
  128. if (!NOTMIME(ms) && root_storage)
  129. str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
  130. clsid2mime);
  131. for (i = 0; i < count; i++) {
  132. cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
  133. switch (info[i].pi_type) {
  134. case CDF_NULL:
  135. break;
  136. case CDF_SIGNED16:
  137. if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
  138. info[i].pi_s16) == -1)
  139. return -1;
  140. break;
  141. case CDF_SIGNED32:
  142. if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
  143. info[i].pi_s32) == -1)
  144. return -1;
  145. break;
  146. case CDF_UNSIGNED32:
  147. if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
  148. info[i].pi_u32) == -1)
  149. return -1;
  150. break;
  151. case CDF_FLOAT:
  152. if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
  153. info[i].pi_f) == -1)
  154. return -1;
  155. break;
  156. case CDF_DOUBLE:
  157. if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
  158. info[i].pi_d) == -1)
  159. return -1;
  160. break;
  161. case CDF_LENGTH32_STRING:
  162. case CDF_LENGTH32_WSTRING:
  163. len = info[i].pi_str.s_len;
  164. if (len > 1) {
  165. char vbuf[1024];
  166. size_t j, k = 1;
  167. if (info[i].pi_type == CDF_LENGTH32_WSTRING)
  168. k++;
  169. s = info[i].pi_str.s_buf;
  170. for (j = 0; j < sizeof(vbuf) && len--; s += k) {
  171. if (*s == '\0')
  172. break;
  173. if (isprint((unsigned char)*s))
  174. vbuf[j++] = *s;
  175. }
  176. if (j == sizeof(vbuf))
  177. --j;
  178. vbuf[j] = '\0';
  179. if (NOTMIME(ms)) {
  180. if (vbuf[0]) {
  181. if (file_printf(ms, ", %s: %s",
  182. buf, vbuf) == -1)
  183. return -1;
  184. }
  185. } else if (str == NULL && info[i].pi_id ==
  186. CDF_PROPERTY_NAME_OF_APPLICATION) {
  187. str = cdf_app_to_mime(vbuf, app2mime);
  188. }
  189. }
  190. break;
  191. case CDF_FILETIME:
  192. tp = info[i].pi_tp;
  193. if (tp != 0) {
  194. char tbuf[64];
  195. if (tp < 1000000000000000LL) {
  196. cdf_print_elapsed_time(tbuf,
  197. sizeof(tbuf), tp);
  198. if (NOTMIME(ms) && file_printf(ms,
  199. ", %s: %s", buf, tbuf) == -1)
  200. return -1;
  201. } else {
  202. char *c, *ec;
  203. cdf_timestamp_to_timespec(&ts, tp);
  204. c = cdf_ctime(&ts.tv_sec, tbuf);
  205. if (c != NULL &&
  206. (ec = strchr(c, '\n')) != NULL)
  207. *ec = '\0';
  208. if (NOTMIME(ms) && file_printf(ms,
  209. ", %s: %s", buf, c) == -1)
  210. return -1;
  211. }
  212. }
  213. break;
  214. case CDF_CLIPBOARD:
  215. break;
  216. default:
  217. return -1;
  218. }
  219. }
  220. if (!NOTMIME(ms)) {
  221. if (str == NULL)
  222. return 0;
  223. if (file_printf(ms, "application/%s", str) == -1)
  224. return -1;
  225. }
  226. return 1;
  227. }
  228. private int
  229. cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
  230. const cdf_stream_t *sst)
  231. {
  232. cdf_catalog_t *cat;
  233. size_t i;
  234. char buf[256];
  235. cdf_catalog_entry_t *ce;
  236. if (NOTMIME(ms)) {
  237. if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
  238. return -1;
  239. if (cdf_unpack_catalog(h, sst, &cat) == -1)
  240. return -1;
  241. ce = cat->cat_e;
  242. /* skip first entry since it has a , or paren */
  243. for (i = 1; i < cat->cat_num; i++)
  244. if (file_printf(ms, "%s%s",
  245. cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
  246. i == cat->cat_num - 1 ? "]" : ", ") == -1) {
  247. free(cat);
  248. return -1;
  249. }
  250. free(cat);
  251. } else {
  252. if (file_printf(ms, "application/CDFV2") == -1)
  253. return -1;
  254. }
  255. return 1;
  256. }
  257. private int
  258. cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
  259. const cdf_stream_t *sst, const cdf_directory_t *root_storage)
  260. {
  261. cdf_summary_info_header_t si;
  262. cdf_property_info_t *info;
  263. size_t count;
  264. int m;
  265. if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
  266. return -1;
  267. if (NOTMIME(ms)) {
  268. const char *str;
  269. if (file_printf(ms, "Composite Document File V2 Document")
  270. == -1)
  271. return -1;
  272. if (file_printf(ms, ", %s Endian",
  273. si.si_byte_order == 0xfffe ? "Little" : "Big") == -1)
  274. return -2;
  275. switch (si.si_os) {
  276. case 2:
  277. if (file_printf(ms, ", Os: Windows, Version %d.%d",
  278. si.si_os_version & 0xff,
  279. (uint32_t)si.si_os_version >> 8) == -1)
  280. return -2;
  281. break;
  282. case 1:
  283. if (file_printf(ms, ", Os: MacOS, Version %d.%d",
  284. (uint32_t)si.si_os_version >> 8,
  285. si.si_os_version & 0xff) == -1)
  286. return -2;
  287. break;
  288. default:
  289. if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
  290. si.si_os_version & 0xff,
  291. (uint32_t)si.si_os_version >> 8) == -1)
  292. return -2;
  293. break;
  294. }
  295. if (root_storage) {
  296. str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
  297. clsid2desc);
  298. if (str) {
  299. if (file_printf(ms, ", %s", str) == -1)
  300. return -2;
  301. }
  302. }
  303. }
  304. m = cdf_file_property_info(ms, info, count, root_storage);
  305. free(info);
  306. return m == -1 ? -2 : m;
  307. }
  308. #ifdef notdef
  309. private char *
  310. format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
  311. snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
  312. PRIx64 "-%.12" PRIx64,
  313. (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
  314. (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
  315. (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL,
  316. (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
  317. (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL);
  318. return buf;
  319. }
  320. #endif
  321. protected int
  322. file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
  323. size_t nbytes)
  324. {
  325. cdf_info_t info;
  326. cdf_header_t h;
  327. cdf_sat_t sat, ssat;
  328. cdf_stream_t sst, scn;
  329. cdf_dir_t dir;
  330. int i;
  331. const char *expn = "";
  332. const char *corrupt = "corrupt: ";
  333. const cdf_directory_t *root_storage;
  334. info.i_fd = fd;
  335. info.i_buf = buf;
  336. info.i_len = nbytes;
  337. if (ms->flags & MAGIC_APPLE)
  338. return 0;
  339. if (cdf_read_header(&info, &h) == -1)
  340. return 0;
  341. #ifdef CDF_DEBUG
  342. cdf_dump_header(&h);
  343. #endif
  344. if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
  345. expn = "Can't read SAT";
  346. goto out0;
  347. }
  348. #ifdef CDF_DEBUG
  349. cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
  350. #endif
  351. if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
  352. expn = "Can't read SSAT";
  353. goto out1;
  354. }
  355. #ifdef CDF_DEBUG
  356. cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
  357. #endif
  358. if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
  359. expn = "Can't read directory";
  360. goto out2;
  361. }
  362. if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
  363. &root_storage)) == -1) {
  364. expn = "Cannot read short stream";
  365. goto out3;
  366. }
  367. #ifdef CDF_DEBUG
  368. cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
  369. #endif
  370. #ifdef notdef
  371. if (root_storage) {
  372. if (NOTMIME(ms)) {
  373. char clsbuf[128];
  374. if (file_printf(ms, "CLSID %s, ",
  375. format_clsid(clsbuf, sizeof(clsbuf),
  376. root_storage->d_storage_uuid)) == -1)
  377. return -1;
  378. }
  379. }
  380. #endif
  381. if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
  382. "FileHeader", &scn)) != -1) {
  383. #define HWP5_SIGNATURE "HWP Document File"
  384. if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
  385. && memcmp(scn.sst_tab, HWP5_SIGNATURE,
  386. sizeof(HWP5_SIGNATURE) - 1) == 0) {
  387. if (NOTMIME(ms)) {
  388. if (file_printf(ms,
  389. "Hangul (Korean) Word Processor File 5.x") == -1)
  390. return -1;
  391. } else {
  392. if (file_printf(ms, "application/x-hwp") == -1)
  393. return -1;
  394. }
  395. i = 1;
  396. goto out5;
  397. } else {
  398. free(scn.sst_tab);
  399. scn.sst_tab = NULL;
  400. scn.sst_len = 0;
  401. scn.sst_dirlen = 0;
  402. }
  403. }
  404. if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
  405. &scn)) == -1) {
  406. if (errno == ESRCH) {
  407. if ((i = cdf_read_catalog(&info, &h, &sat, &ssat, &sst,
  408. &dir, &scn)) == -1) {
  409. corrupt = expn;
  410. if ((i = cdf_read_encrypted_package(&info, &h,
  411. &sat, &ssat, &sst, &dir, &scn)) == -1)
  412. expn = "No summary info";
  413. else {
  414. expn = "Encrypted";
  415. i = -1;
  416. }
  417. goto out4;
  418. }
  419. #ifdef CDF_DEBUG
  420. cdf_dump_catalog(&h, &scn);
  421. #endif
  422. if ((i = cdf_file_catalog(ms, &h, &scn))
  423. < 0)
  424. expn = "Can't expand catalog";
  425. } else {
  426. expn = "Cannot read summary info";
  427. }
  428. goto out4;
  429. }
  430. #ifdef CDF_DEBUG
  431. cdf_dump_summary_info(&h, &scn);
  432. #endif
  433. if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
  434. expn = "Can't expand summary_info";
  435. if (i == 0) {
  436. const char *str = NULL;
  437. cdf_directory_t *d;
  438. char name[__arraycount(d->d_name)];
  439. size_t j, k;
  440. for (j = 0; str == NULL && j < dir.dir_len; j++) {
  441. d = &dir.dir_tab[j];
  442. for (k = 0; k < sizeof(name); k++)
  443. name[k] = (char)cdf_tole2(d->d_name[k]);
  444. str = cdf_app_to_mime(name,
  445. NOTMIME(ms) ? name2desc : name2mime);
  446. }
  447. if (NOTMIME(ms)) {
  448. if (str != NULL) {
  449. if (file_printf(ms, "%s", str) == -1)
  450. return -1;
  451. i = 1;
  452. }
  453. } else {
  454. if (str == NULL)
  455. str = "vnd.ms-office";
  456. if (file_printf(ms, "application/%s", str) == -1)
  457. return -1;
  458. i = 1;
  459. }
  460. }
  461. out5:
  462. free(scn.sst_tab);
  463. out4:
  464. free(sst.sst_tab);
  465. out3:
  466. free(dir.dir_tab);
  467. out2:
  468. free(ssat.sat_tab);
  469. out1:
  470. free(sat.sat_tab);
  471. out0:
  472. if (i == -1) {
  473. if (NOTMIME(ms)) {
  474. if (file_printf(ms,
  475. "Composite Document File V2 Document") == -1)
  476. return -1;
  477. if (*expn)
  478. if (file_printf(ms, ", %s%s", corrupt, expn) == -1)
  479. return -1;
  480. } else {
  481. if (file_printf(ms, "application/CDFV2-%s",
  482. *corrupt ? "corrupt" : "encrypted") == -1)
  483. return -1;
  484. }
  485. i = 1;
  486. }
  487. return i;
  488. }