cdf.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105
  1. /*-
  2. * Copyright (c) 2008 Christos Zoulas
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  15. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  16. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  18. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  19. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  20. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  21. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  22. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  23. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  24. * POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /*
  27. * Parse composite document files, the format used in Microsoft Office
  28. * document files before they switched to zipped xml.
  29. * Info from: http://sc.openoffice.org/compdocfileformat.pdf
  30. */
  31. #include "file.h"
  32. #ifndef lint
  33. FILE_RCSID("@(#)$File: cdf.c,v 1.17 2009/02/03 20:27:51 christos Exp $")
  34. #endif
  35. #include <assert.h>
  36. #ifdef CDF_DEBUG
  37. #include <err.h>
  38. #endif
  39. #include <stdlib.h>
  40. #include <unistd.h>
  41. #include <string.h>
  42. #include <time.h>
  43. #include <ctype.h>
  44. #ifndef EFTYPE
  45. #define EFTYPE EINVAL
  46. #endif
  47. #include "cdf.h"
  48. #ifndef __arraycount
  49. #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
  50. #endif
  51. #ifdef CDF_DEBUG
  52. #define DPRINTF(a) printf a
  53. #else
  54. #define DPRINTF(a)
  55. #endif
  56. static union {
  57. char s[4];
  58. uint32_t u;
  59. } cdf_bo;
  60. #define NEED_SWAP (cdf_bo.u == (uint32_t)0x01020304)
  61. #define CDF_TOLE8(x) (NEED_SWAP ? cdf_tole8(x) : (uint64_t)(x))
  62. #define CDF_TOLE4(x) (NEED_SWAP ? cdf_tole4(x) : (uint32_t)(x))
  63. #define CDF_TOLE2(x) (NEED_SWAP ? cdf_tole2(x) : (uint16_t)(x))
  64. /*
  65. * swap a short
  66. */
  67. uint16_t
  68. cdf_tole2(uint16_t sv)
  69. {
  70. uint16_t rv;
  71. uint8_t *s = (uint8_t *)(void *)&sv;
  72. uint8_t *d = (uint8_t *)(void *)&rv;
  73. d[0] = s[1];
  74. d[1] = s[0];
  75. return rv;
  76. }
  77. /*
  78. * swap an int
  79. */
  80. uint32_t
  81. cdf_tole4(uint32_t sv)
  82. {
  83. uint32_t rv;
  84. uint8_t *s = (uint8_t *)(void *)&sv;
  85. uint8_t *d = (uint8_t *)(void *)&rv;
  86. d[0] = s[3];
  87. d[1] = s[2];
  88. d[2] = s[1];
  89. d[3] = s[0];
  90. return rv;
  91. }
  92. /*
  93. * swap a quad
  94. */
  95. uint64_t
  96. cdf_tole8(uint64_t sv)
  97. {
  98. uint64_t rv;
  99. uint8_t *s = (uint8_t *)(void *)&sv;
  100. uint8_t *d = (uint8_t *)(void *)&rv;
  101. d[0] = s[7];
  102. d[1] = s[6];
  103. d[2] = s[5];
  104. d[3] = s[4];
  105. d[4] = s[3];
  106. d[5] = s[2];
  107. d[6] = s[1];
  108. d[7] = s[0];
  109. return rv;
  110. }
  111. #define CDF_UNPACK(a) \
  112. (void)memcpy(&(a), &buf[len], sizeof(a)), len += sizeof(a)
  113. #define CDF_UNPACKA(a) \
  114. (void)memcpy((a), &buf[len], sizeof(a)), len += sizeof(a)
  115. void
  116. cdf_swap_header(cdf_header_t *h)
  117. {
  118. size_t i;
  119. h->h_magic = CDF_TOLE8(h->h_magic);
  120. h->h_uuid[0] = CDF_TOLE8(h->h_uuid[0]);
  121. h->h_uuid[1] = CDF_TOLE8(h->h_uuid[1]);
  122. h->h_revision = CDF_TOLE2(h->h_revision);
  123. h->h_version = CDF_TOLE2(h->h_version);
  124. h->h_byte_order = CDF_TOLE2(h->h_byte_order);
  125. h->h_sec_size_p2 = CDF_TOLE2(h->h_sec_size_p2);
  126. h->h_short_sec_size_p2 = CDF_TOLE2(h->h_short_sec_size_p2);
  127. h->h_num_sectors_in_sat = CDF_TOLE4(h->h_num_sectors_in_sat);
  128. h->h_secid_first_directory = CDF_TOLE4(h->h_secid_first_directory);
  129. h->h_min_size_standard_stream =
  130. CDF_TOLE4(h->h_min_size_standard_stream);
  131. h->h_secid_first_sector_in_short_sat =
  132. CDF_TOLE4(h->h_secid_first_sector_in_short_sat);
  133. h->h_num_sectors_in_short_sat =
  134. CDF_TOLE4(h->h_num_sectors_in_short_sat);
  135. h->h_secid_first_sector_in_master_sat =
  136. CDF_TOLE4(h->h_secid_first_sector_in_master_sat);
  137. h->h_num_sectors_in_master_sat =
  138. CDF_TOLE4(h->h_num_sectors_in_master_sat);
  139. for (i = 0; i < __arraycount(h->h_master_sat); i++)
  140. h->h_master_sat[i] = CDF_TOLE4(h->h_master_sat[i]);
  141. }
  142. void
  143. cdf_unpack_header(cdf_header_t *h, char *buf)
  144. {
  145. size_t i;
  146. size_t len = 0;
  147. CDF_UNPACK(h->h_magic);
  148. CDF_UNPACKA(h->h_uuid);
  149. CDF_UNPACK(h->h_revision);
  150. CDF_UNPACK(h->h_version);
  151. CDF_UNPACK(h->h_byte_order);
  152. CDF_UNPACK(h->h_sec_size_p2);
  153. CDF_UNPACK(h->h_short_sec_size_p2);
  154. CDF_UNPACKA(h->h_unused0);
  155. CDF_UNPACK(h->h_num_sectors_in_sat);
  156. CDF_UNPACK(h->h_secid_first_directory);
  157. CDF_UNPACKA(h->h_unused1);
  158. CDF_UNPACK(h->h_min_size_standard_stream);
  159. CDF_UNPACK(h->h_secid_first_sector_in_short_sat);
  160. CDF_UNPACK(h->h_num_sectors_in_short_sat);
  161. CDF_UNPACK(h->h_secid_first_sector_in_master_sat);
  162. CDF_UNPACK(h->h_num_sectors_in_master_sat);
  163. for (i = 0; i < __arraycount(h->h_master_sat); i++)
  164. CDF_UNPACK(h->h_master_sat[i]);
  165. }
  166. void
  167. cdf_swap_dir(cdf_directory_t *d)
  168. {
  169. d->d_namelen = CDF_TOLE2(d->d_namelen);
  170. d->d_left_child = CDF_TOLE4(d->d_left_child);
  171. d->d_right_child = CDF_TOLE4(d->d_right_child);
  172. d->d_storage = CDF_TOLE4(d->d_storage);
  173. d->d_storage_uuid[0] = CDF_TOLE8(d->d_storage_uuid[0]);
  174. d->d_storage_uuid[1] = CDF_TOLE8(d->d_storage_uuid[1]);
  175. d->d_flags = CDF_TOLE4(d->d_flags);
  176. d->d_created = CDF_TOLE8(d->d_created);
  177. d->d_modified = CDF_TOLE8(d->d_modified);
  178. d->d_stream_first_sector = CDF_TOLE4(d->d_stream_first_sector);
  179. d->d_size = CDF_TOLE4(d->d_size);
  180. }
  181. void
  182. cdf_swap_class(cdf_classid_t *d)
  183. {
  184. d->cl_dword = CDF_TOLE4(d->cl_dword);
  185. d->cl_word[0] = CDF_TOLE2(d->cl_word[0]);
  186. d->cl_word[1] = CDF_TOLE2(d->cl_word[1]);
  187. }
  188. void
  189. cdf_unpack_dir(cdf_directory_t *d, char *buf)
  190. {
  191. size_t len = 0;
  192. CDF_UNPACKA(d->d_name);
  193. CDF_UNPACK(d->d_namelen);
  194. CDF_UNPACK(d->d_type);
  195. CDF_UNPACK(d->d_color);
  196. CDF_UNPACK(d->d_left_child);
  197. CDF_UNPACK(d->d_right_child);
  198. CDF_UNPACK(d->d_storage);
  199. CDF_UNPACKA(d->d_storage_uuid);
  200. CDF_UNPACK(d->d_flags);
  201. CDF_UNPACK(d->d_created);
  202. CDF_UNPACK(d->d_modified);
  203. CDF_UNPACK(d->d_stream_first_sector);
  204. CDF_UNPACK(d->d_size);
  205. CDF_UNPACK(d->d_unused0);
  206. }
  207. int
  208. cdf_read_header(int fd, cdf_header_t *h)
  209. {
  210. (void)memcpy(cdf_bo.s, "\01\02\03\04", 4);
  211. char buf[512];
  212. if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1)
  213. return -1;
  214. if (read(fd, buf, sizeof(buf)) != sizeof(buf))
  215. return -1;
  216. cdf_unpack_header(h, buf);
  217. cdf_swap_header(h);
  218. if (h->h_magic != CDF_MAGIC) {
  219. DPRINTF(("Bad magic 0x%x != 0x$x\n", h->h_magic, CDF_MAGIC));
  220. errno = EFTYPE;
  221. return -1;
  222. }
  223. return 0;
  224. }
  225. ssize_t
  226. cdf_read_sector(int fd, void *buf, size_t offs, size_t len,
  227. const cdf_header_t *h, cdf_secid_t id)
  228. {
  229. assert((size_t)CDF_SEC_SIZE(h) == len);
  230. if (lseek(fd, (off_t)CDF_SEC_POS(h, id), SEEK_SET) == (off_t)-1)
  231. return -1;
  232. return read(fd, ((char *)buf) + offs, len);
  233. }
  234. ssize_t
  235. cdf_read_short_sector(const cdf_stream_t *sst, void *buf, size_t offs,
  236. size_t len, const cdf_header_t *h, cdf_secid_t id)
  237. {
  238. assert((size_t)CDF_SHORT_SEC_SIZE(h) == len);
  239. (void)memcpy(((char *)buf) + offs,
  240. ((const char *)sst->sst_tab) + CDF_SHORT_SEC_POS(h, id), len);
  241. return len;
  242. }
  243. /*
  244. * Read the sector allocation table.
  245. */
  246. int
  247. cdf_read_sat(int fd, cdf_header_t *h, cdf_sat_t *sat)
  248. {
  249. size_t i, j, k;
  250. size_t ss = CDF_SEC_SIZE(h);
  251. cdf_secid_t *msa, mid;
  252. for (i = 0; i < __arraycount(h->h_master_sat); i++)
  253. if (h->h_master_sat[i] == CDF_SECID_FREE)
  254. break;
  255. sat->sat_len = (h->h_num_sectors_in_master_sat + i);
  256. if ((sat->sat_tab = calloc(sat->sat_len, ss)) == NULL)
  257. return -1;
  258. for (i = 0; i < __arraycount(h->h_master_sat); i++) {
  259. if (h->h_master_sat[i] < 0)
  260. break;
  261. if (cdf_read_sector(fd, sat->sat_tab, ss * i, ss, h,
  262. h->h_master_sat[i]) != (ssize_t)ss) {
  263. DPRINTF(("Reading sector %d", h->h_master_sat[i]));
  264. goto out1;
  265. }
  266. }
  267. if ((msa = calloc(1, ss)) == NULL)
  268. goto out1;
  269. mid = h->h_secid_first_sector_in_master_sat;
  270. for (j = 0; j < h->h_num_sectors_in_master_sat; j++) {
  271. if (j >= CDF_LOOP_LIMIT) {
  272. DPRINTF(("Reading master sector loop limit"));
  273. errno = EFTYPE;
  274. goto out2;
  275. }
  276. if (cdf_read_sector(fd, msa, 0, ss, h, mid) != (ssize_t)ss) {
  277. DPRINTF(("Reading master sector %d", mid));
  278. goto out2;
  279. }
  280. for (k = 0; k < (ss / sizeof(mid)) - 1; k++, i++)
  281. if (cdf_read_sector(fd, sat->sat_tab, ss * i, ss, h,
  282. CDF_TOLE4(msa[k])) != (ssize_t)ss) {
  283. DPRINTF(("Reading sector %d",
  284. CDF_TOLE4(msa[k])));
  285. goto out2;
  286. }
  287. mid = CDF_TOLE4(msa[(ss / sizeof(mid)) - 1]);
  288. }
  289. free(msa);
  290. return 0;
  291. out2:
  292. free(msa);
  293. out1:
  294. free(sat->sat_tab);
  295. return -1;
  296. }
  297. size_t
  298. cdf_count_chain(const cdf_header_t *h, const cdf_sat_t *sat,
  299. cdf_secid_t sid)
  300. {
  301. size_t i, j, s = CDF_SEC_SIZE(h) / sizeof(cdf_secid_t);
  302. cdf_secid_t maxsector = (cdf_secid_t)(sat->sat_len * s);
  303. DPRINTF(("Chain:"));
  304. for (j = i = 0; sid >= 0; i++, j++) {
  305. DPRINTF((" %d", sid));
  306. if (j >= CDF_LOOP_LIMIT) {
  307. DPRINTF(("Counting chain loop limit"));
  308. errno = EFTYPE;
  309. return (size_t)-1;
  310. }
  311. if (sid > maxsector) {
  312. DPRINTF(("Sector %d > %d\n", sid, maxsector));
  313. errno = EFTYPE;
  314. return (size_t)-1;
  315. }
  316. sid = CDF_TOLE4(sat->sat_tab[sid]);
  317. }
  318. DPRINTF(("\n"));
  319. return i;
  320. }
  321. int
  322. cdf_read_long_sector_chain(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
  323. cdf_secid_t sid, size_t len, cdf_stream_t *scn)
  324. {
  325. size_t ss = CDF_SEC_SIZE(h), i, j;
  326. ssize_t nr;
  327. scn->sst_len = cdf_count_chain(h, sat, sid);
  328. scn->sst_dirlen = len;
  329. if (scn->sst_len == (size_t)-1)
  330. return -1;
  331. scn->sst_tab = calloc(scn->sst_len, ss);
  332. if (scn->sst_tab == NULL)
  333. return -1;
  334. for (j = i = 0; sid >= 0; i++, j++) {
  335. if ((nr = cdf_read_sector(fd, scn->sst_tab, i * ss, ss, h,
  336. sid)) != (ssize_t)ss) {
  337. if (i == scn->sst_len - 1 && nr > 0) {
  338. /* Last sector might be truncated */
  339. return 0;
  340. }
  341. DPRINTF(("Reading long sector chain %d", sid));
  342. goto out;
  343. }
  344. sid = CDF_TOLE4(sat->sat_tab[sid]);
  345. if (j >= CDF_LOOP_LIMIT) {
  346. DPRINTF(("Read long sector chain loop limit"));
  347. errno = EFTYPE;
  348. goto out;
  349. }
  350. }
  351. return 0;
  352. out:
  353. free(scn->sst_tab);
  354. return (size_t)-1;
  355. }
  356. int
  357. cdf_read_short_sector_chain(const cdf_header_t *h,
  358. const cdf_sat_t *ssat, const cdf_stream_t *sst,
  359. cdf_secid_t sid, size_t len, cdf_stream_t *scn)
  360. {
  361. size_t ss = CDF_SHORT_SEC_SIZE(h), i, j;
  362. scn->sst_len = cdf_count_chain(h, ssat, sid);
  363. scn->sst_dirlen = len;
  364. if (scn->sst_len == (size_t)-1)
  365. return -1;
  366. scn->sst_tab = calloc(scn->sst_len, ss);
  367. if (scn->sst_tab == NULL)
  368. return -1;
  369. for (j = i = 0; sid >= 0; i++, j++) {
  370. if (j >= CDF_LOOP_LIMIT) {
  371. DPRINTF(("Read short sector chain loop limit"));
  372. errno = EFTYPE;
  373. goto out;
  374. }
  375. if (cdf_read_short_sector(sst, scn->sst_tab, i * ss, ss, h,
  376. sid) != (ssize_t)ss) {
  377. DPRINTF(("Reading short sector chain %d", sid));
  378. goto out;
  379. }
  380. sid = CDF_TOLE4(ssat->sat_tab[sid]);
  381. }
  382. return 0;
  383. out:
  384. free(scn->sst_tab);
  385. return (size_t)-1;
  386. }
  387. int
  388. cdf_read_sector_chain(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
  389. const cdf_sat_t *ssat, const cdf_stream_t *sst,
  390. cdf_secid_t sid, size_t len, cdf_stream_t *scn)
  391. {
  392. if (len < h->h_min_size_standard_stream)
  393. return cdf_read_short_sector_chain(h, ssat, sst, sid, len,
  394. scn);
  395. else
  396. return cdf_read_long_sector_chain(fd, h, sat, sid, len, scn);
  397. }
  398. int
  399. cdf_read_dir(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
  400. cdf_dir_t *dir)
  401. {
  402. size_t i, j;
  403. size_t ss = CDF_SEC_SIZE(h), ns, nd;
  404. char *buf;
  405. cdf_secid_t sid = h->h_secid_first_directory;
  406. ns = cdf_count_chain(h, sat, sid);
  407. if (ns == (size_t)-1)
  408. return -1;
  409. nd = ss / CDF_DIRECTORY_SIZE;
  410. dir->dir_len = ns * nd;
  411. dir->dir_tab = calloc(dir->dir_len, sizeof(dir->dir_tab[0]));
  412. if (dir->dir_tab == NULL)
  413. return -1;
  414. if ((buf = malloc(ss)) == NULL) {
  415. free(dir->dir_tab);
  416. return -1;
  417. }
  418. for (j = i = 0; i < ns; i++, j++) {
  419. if (j >= CDF_LOOP_LIMIT) {
  420. DPRINTF(("Read dir loop limit"));
  421. errno = EFTYPE;
  422. goto out;
  423. }
  424. if (cdf_read_sector(fd, buf, 0, ss, h, sid) != (ssize_t)ss) {
  425. DPRINTF(("Reading directory sector %d", sid));
  426. goto out;
  427. }
  428. for (j = 0; j < nd; j++) {
  429. cdf_unpack_dir(&dir->dir_tab[i * nd + j],
  430. &buf[j * CDF_DIRECTORY_SIZE]);
  431. }
  432. sid = CDF_TOLE4(sat->sat_tab[sid]);
  433. }
  434. if (NEED_SWAP)
  435. for (i = 0; i < dir->dir_len; i++)
  436. cdf_swap_dir(&dir->dir_tab[i]);
  437. free(buf);
  438. return 0;
  439. out:
  440. free(dir->dir_tab);
  441. free(buf);
  442. return -1;
  443. }
  444. int
  445. cdf_read_ssat(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
  446. cdf_sat_t *ssat)
  447. {
  448. size_t i, j;
  449. size_t ss = CDF_SEC_SIZE(h);
  450. cdf_secid_t sid = h->h_secid_first_sector_in_short_sat;
  451. ssat->sat_len = cdf_count_chain(h, sat, sid);
  452. if (ssat->sat_len == (size_t)-1)
  453. return -1;
  454. ssat->sat_tab = calloc(ssat->sat_len, ss);
  455. if (ssat->sat_tab == NULL)
  456. return -1;
  457. for (j = i = 0; sid >= 0; i++, j++) {
  458. if (j >= CDF_LOOP_LIMIT) {
  459. DPRINTF(("Read short sat sector loop limit"));
  460. errno = EFTYPE;
  461. goto out;
  462. }
  463. if (cdf_read_sector(fd, ssat->sat_tab, i * ss, ss, h, sid) !=
  464. (ssize_t)ss) {
  465. DPRINTF(("Reading short sat sector %d", sid));
  466. goto out;
  467. }
  468. sid = CDF_TOLE4(sat->sat_tab[sid]);
  469. }
  470. return 0;
  471. out:
  472. free(ssat->sat_tab);
  473. return -1;
  474. }
  475. int
  476. cdf_read_short_stream(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
  477. const cdf_dir_t *dir, cdf_stream_t *scn)
  478. {
  479. size_t i;
  480. const cdf_directory_t *d;
  481. for (i = 0; i < dir->dir_len; i++)
  482. if (dir->dir_tab[i].d_type == CDF_DIR_TYPE_ROOT_STORAGE)
  483. break;
  484. if (i == dir->dir_len) {
  485. DPRINTF(("Cannot find root storage node\n"));
  486. errno = EFTYPE;
  487. return -1;
  488. }
  489. d = &dir->dir_tab[i];
  490. /* If the it is not there, just fake it; some docs don't have it */
  491. if (d->d_stream_first_sector < 0) {
  492. scn->sst_tab = NULL;
  493. scn->sst_len = 0;
  494. return 0;
  495. }
  496. return cdf_read_long_sector_chain(fd, h, sat,
  497. d->d_stream_first_sector, d->d_size, scn);
  498. }
  499. static int
  500. cdf_namecmp(const char *d, const uint16_t *s, size_t l)
  501. {
  502. for (; l--; d++, s++)
  503. if (*d != CDF_TOLE2(*s))
  504. return (unsigned char)*d - CDF_TOLE2(*s);
  505. return 0;
  506. }
  507. int
  508. cdf_read_summary_info(int fd, const cdf_header_t *h,
  509. const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
  510. const cdf_dir_t *dir, cdf_stream_t *scn)
  511. {
  512. size_t i;
  513. const cdf_directory_t *d;
  514. static const char name[] = "\05SummaryInformation";
  515. for (i = 0; i < dir->dir_len; i++)
  516. if (dir->dir_tab[i].d_type == CDF_DIR_TYPE_USER_STREAM &&
  517. cdf_namecmp(name, dir->dir_tab[i].d_name, sizeof(name))
  518. == 0)
  519. break;
  520. if (i == dir->dir_len) {
  521. DPRINTF(("Cannot find summary information section\n"));
  522. errno = EFTYPE;
  523. return -1;
  524. }
  525. d = &dir->dir_tab[i];
  526. return cdf_read_sector_chain(fd, h, sat, ssat, sst,
  527. d->d_stream_first_sector, d->d_size, scn);
  528. }
  529. int
  530. cdf_read_property_info(const cdf_stream_t *sst, uint32_t offs,
  531. cdf_property_info_t **info, size_t *count, size_t *maxcount)
  532. {
  533. const cdf_section_header_t *shp;
  534. cdf_section_header_t sh;
  535. const uint32_t *p, *q, *e;
  536. int16_t s16;
  537. int32_t s32;
  538. uint32_t u32;
  539. int64_t s64;
  540. uint64_t u64;
  541. cdf_timestamp_t tp;
  542. size_t i, o, nelements, j;
  543. cdf_property_info_t *inp;
  544. shp = (const void *)((const char *)sst->sst_tab + offs);
  545. sh.sh_len = CDF_TOLE4(shp->sh_len);
  546. sh.sh_properties = CDF_TOLE4(shp->sh_properties);
  547. DPRINTF(("section len: %d properties %d\n", sh.sh_len,
  548. sh.sh_properties));
  549. if (*maxcount) {
  550. *maxcount += sh.sh_properties;
  551. inp = realloc(*info, *maxcount * sizeof(*inp));
  552. } else {
  553. *maxcount = sh.sh_properties;
  554. inp = malloc(*maxcount * sizeof(*inp));
  555. }
  556. if (inp == NULL)
  557. goto out;
  558. *info = inp;
  559. inp += *count;
  560. *count += sh.sh_properties;
  561. p = (const void *)((const char *)sst->sst_tab + offs + sizeof(sh));
  562. e = (const void *)(((const char *)shp) + sh.sh_len);
  563. for (i = 0; i < sh.sh_properties; i++) {
  564. q = (const uint32_t *)((const char *)p +
  565. CDF_TOLE4(p[(i << 1) + 1])) - 2;
  566. if (q > e) {
  567. DPRINTF(("Ran of the end %p > %p\n", q, e));
  568. goto out;
  569. }
  570. inp[i].pi_id = CDF_TOLE4(p[i << 1]);
  571. inp[i].pi_type = CDF_TOLE4(q[0]);
  572. DPRINTF(("%d) id=%x type=%x offs=%x\n", i, inp[i].pi_id,
  573. inp[i].pi_type, (const char *)q - (const char *)p));
  574. if (inp[i].pi_type & CDF_VECTOR) {
  575. nelements = CDF_TOLE4(q[1]);
  576. o = 2;
  577. } else {
  578. nelements = 1;
  579. o = 1;
  580. }
  581. if (inp[i].pi_type & (CDF_ARRAY|CDF_BYREF|CDF_RESERVED))
  582. goto unknown;
  583. switch (inp[i].pi_type & CDF_TYPEMASK) {
  584. case CDF_EMPTY:
  585. break;
  586. case CDF_SIGNED16:
  587. if (inp[i].pi_type & CDF_VECTOR)
  588. goto unknown;
  589. (void)memcpy(&s16, &q[o], sizeof(s16));
  590. inp[i].pi_s16 = CDF_TOLE2(s16);
  591. break;
  592. case CDF_SIGNED32:
  593. if (inp[i].pi_type & CDF_VECTOR)
  594. goto unknown;
  595. (void)memcpy(&s32, &q[o], sizeof(s32));
  596. inp[i].pi_s32 = CDF_TOLE4(s32);
  597. break;
  598. case CDF_BOOL:
  599. case CDF_UNSIGNED32:
  600. if (inp[i].pi_type & CDF_VECTOR)
  601. goto unknown;
  602. (void)memcpy(&u32, &q[o], sizeof(u32));
  603. inp[i].pi_u32 = CDF_TOLE4(u32);
  604. break;
  605. case CDF_SIGNED64:
  606. if (inp[i].pi_type & CDF_VECTOR)
  607. goto unknown;
  608. (void)memcpy(&s64, &q[o], sizeof(s64));
  609. inp[i].pi_s64 = CDF_TOLE4(s64);
  610. break;
  611. case CDF_UNSIGNED64:
  612. if (inp[i].pi_type & CDF_VECTOR)
  613. goto unknown;
  614. (void)memcpy(&u64, &q[o], sizeof(u64));
  615. inp[i].pi_u64 = CDF_TOLE4(u64);
  616. break;
  617. case CDF_LENGTH32_STRING:
  618. if (nelements > 1) {
  619. size_t nelem = inp - *info;
  620. *maxcount += nelements;
  621. inp = realloc(*info, *maxcount * sizeof(*inp));
  622. if (inp == NULL)
  623. goto out;
  624. *info = inp;
  625. inp = *info + nelem;
  626. }
  627. DPRINTF(("nelements = %d\n", nelements));
  628. for (j = 0; j < nelements; j++, i++) {
  629. uint32_t l = CDF_TOLE4(q[o]);
  630. inp[i].pi_str.s_len = l;
  631. inp[i].pi_str.s_buf = (const char *)(&q[o+1]);
  632. DPRINTF(("l = %d, r = %d, s = %s\n", l,
  633. CDF_ROUND(l, sizeof(l)),
  634. inp[i].pi_str.s_buf));
  635. l = 4 + CDF_ROUND(l, sizeof(l));
  636. o += l >> 2;
  637. }
  638. i--;
  639. break;
  640. case CDF_FILETIME:
  641. if (inp[i].pi_type & CDF_VECTOR)
  642. goto unknown;
  643. (void)memcpy(&tp, &q[o], sizeof(tp));
  644. inp[i].pi_tp = CDF_TOLE8(tp);
  645. break;
  646. case CDF_CLIPBOARD:
  647. if (inp[i].pi_type & CDF_VECTOR)
  648. goto unknown;
  649. break;
  650. default:
  651. unknown:
  652. DPRINTF(("Don't know how to deal with %x\n",
  653. inp[i].pi_type));
  654. goto out;
  655. }
  656. }
  657. return 0;
  658. out:
  659. free(*info);
  660. return -1;
  661. }
  662. int
  663. cdf_unpack_summary_info(const cdf_stream_t *sst, cdf_summary_info_header_t *ssi,
  664. cdf_property_info_t **info, size_t *count)
  665. {
  666. size_t i, maxcount;
  667. const cdf_summary_info_header_t *si = sst->sst_tab;
  668. const cdf_section_declaration_t *sd = (const void *)
  669. ((const char *)sst->sst_tab + CDF_SECTION_DECLARATION_OFFSET);
  670. ssi->si_byte_order = CDF_TOLE2(si->si_byte_order);
  671. ssi->si_os_version = CDF_TOLE2(si->si_os_version);
  672. ssi->si_os = CDF_TOLE2(si->si_os);
  673. ssi->si_class = si->si_class;
  674. cdf_swap_class(&ssi->si_class);
  675. ssi->si_count = CDF_TOLE2(si->si_count);
  676. *count = 0;
  677. maxcount = 0;
  678. *info = NULL;
  679. for (i = 0; i < CDF_TOLE4(si->si_count); i++) {
  680. if (i >= CDF_LOOP_LIMIT) {
  681. DPRINTF(("Unpack summary info loop limit"));
  682. errno = EFTYPE;
  683. return -1;
  684. }
  685. if (cdf_read_property_info(sst, CDF_TOLE4(sd->sd_offset),
  686. info, count, &maxcount) == -1)
  687. return -1;
  688. }
  689. return 0;
  690. }
  691. int
  692. cdf_print_classid(char *buf, size_t buflen, const cdf_classid_t *id)
  693. {
  694. return snprintf(buf, buflen, "%.8x-%.4x-%.4x-%.2x%.2x-"
  695. "%.2x%.2x%.2x%.2x%.2x%.2x", id->cl_dword, id->cl_word[0],
  696. id->cl_word[1], id->cl_two[0], id->cl_two[1], id->cl_six[0],
  697. id->cl_six[1], id->cl_six[2], id->cl_six[3], id->cl_six[4],
  698. id->cl_six[5]);
  699. }
  700. static const struct {
  701. uint32_t v;
  702. const char *n;
  703. } vn[] = {
  704. { CDF_PROPERTY_CODE_PAGE, "Code page" },
  705. { CDF_PROPERTY_TITLE, "Title" },
  706. { CDF_PROPERTY_SUBJECT, "Subject" },
  707. { CDF_PROPERTY_AUTHOR, "Author" },
  708. { CDF_PROPERTY_KEYWORDS, "Keywords" },
  709. { CDF_PROPERTY_COMMENTS, "Comments" },
  710. { CDF_PROPERTY_TEMPLATE, "Template" },
  711. { CDF_PROPERTY_LAST_SAVED_BY, "Last Saved By" },
  712. { CDF_PROPERTY_REVISION_NUMBER, "Revision Number" },
  713. { CDF_PROPERTY_TOTAL_EDITING_TIME, "Total Editing Time" },
  714. { CDF_PROPERTY_LAST_PRINTED, "Last Printed" },
  715. { CDF_PROPERTY_CREATE_TIME, "Create Time/Date" },
  716. { CDF_PROPERTY_LAST_SAVED_TIME, "Last Saved Time/Date" },
  717. { CDF_PROPERTY_NUMBER_OF_PAGES, "Number of Pages" },
  718. { CDF_PROPERTY_NUMBER_OF_WORDS, "Number of Words" },
  719. { CDF_PROPERTY_NUMBER_OF_CHARACTERS, "Number of Characters" },
  720. { CDF_PROPERTY_THUMBNAIL, "Thumbnail" },
  721. { CDF_PROPERTY_NAME_OF_APPLICATION, "Name of Creating Application" },
  722. { CDF_PROPERTY_SECURITY, "Security" },
  723. { CDF_PROPERTY_LOCALE_ID, "Locale ID" },
  724. };
  725. int
  726. cdf_print_property_name(char *buf, size_t bufsiz, uint32_t p)
  727. {
  728. size_t i;
  729. for (i = 0; i < __arraycount(vn); i++)
  730. if (vn[i].v == p)
  731. return snprintf(buf, bufsiz, "%s", vn[i].n);
  732. return snprintf(buf, bufsiz, "0x%x", p);
  733. }
  734. int
  735. cdf_print_elapsed_time(char *buf, size_t bufsiz, cdf_timestamp_t ts)
  736. {
  737. size_t len = 0;
  738. int days, hours, mins, secs;
  739. ts /= CDF_TIME_PREC;
  740. secs = ts % 60;
  741. ts /= 60;
  742. mins = ts % 60;
  743. ts /= 60;
  744. hours = ts % 24;
  745. ts /= 24;
  746. days = ts;
  747. if (days) {
  748. len += snprintf(buf + len, bufsiz - len, "%dd+", days);
  749. if (len >= bufsiz)
  750. return len;
  751. }
  752. if (days || hours) {
  753. len += snprintf(buf + len, bufsiz - len, "%.2d:", hours);
  754. if (len >= bufsiz)
  755. return len;
  756. }
  757. len += snprintf(buf + len, bufsiz - len, "%.2d:", mins);
  758. if (len >= bufsiz)
  759. return len;
  760. len += snprintf(buf + len, bufsiz - len, "%.2d", secs);
  761. return len;
  762. }
  763. #ifdef CDF_DEBUG
  764. void
  765. cdf_dump_header(const cdf_header_t *h)
  766. {
  767. size_t i;
  768. #define DUMP(a, b) printf("%40.40s = " a "\n", # b, h->h_ ## b)
  769. DUMP("%d", revision);
  770. DUMP("%d", version);
  771. DUMP("0x%x", byte_order);
  772. DUMP("%d", sec_size_p2);
  773. DUMP("%d", short_sec_size_p2);
  774. DUMP("%d", num_sectors_in_sat);
  775. DUMP("%d", secid_first_directory);
  776. DUMP("%d", min_size_standard_stream);
  777. DUMP("%d", secid_first_sector_in_short_sat);
  778. DUMP("%d", num_sectors_in_short_sat);
  779. DUMP("%d", secid_first_sector_in_master_sat);
  780. DUMP("%d", num_sectors_in_master_sat);
  781. for (i = 0; i < __arraycount(h->h_master_sat); i++) {
  782. if (h->h_master_sat[i] == CDF_SECID_FREE)
  783. break;
  784. printf("%35.35s[%.3zu] = %d\n",
  785. "master_sat", i, h->h_master_sat[i]);
  786. }
  787. }
  788. void
  789. cdf_dump_sat(const char *prefix, const cdf_header_t *h, const cdf_sat_t *sat)
  790. {
  791. size_t i, j, s = CDF_SEC_SIZE(h) / sizeof(cdf_secid_t);
  792. for (i = 0; i < sat->sat_len; i++) {
  793. printf("%s[%zu]:\n", prefix, i);
  794. for (j = 0; j < s; j++) {
  795. printf("%5d, ", CDF_TOLE4(sat->sat_tab[s * i + j]));
  796. if ((j + 1) % 10 == 0)
  797. printf("\n");
  798. }
  799. printf("\n");
  800. }
  801. }
  802. void
  803. cdf_dump(void *v, size_t len)
  804. {
  805. size_t i, j;
  806. unsigned char *p = v;
  807. char abuf[16];
  808. printf("%.4x: ", 0);
  809. for (i = 0, j = 0; i < len; i++, p++) {
  810. printf("%.2x ", *p);
  811. abuf[j++] = isprint(*p) ? *p : '.';
  812. if (j == 16) {
  813. j = 0;
  814. abuf[15] = '\0';
  815. printf("%s\n%.4x: ", abuf, i + 1);
  816. }
  817. }
  818. printf("\n");
  819. }
  820. void
  821. cdf_dump_stream(const cdf_header_t *h, const cdf_stream_t *sst)
  822. {
  823. size_t ss = sst->sst_dirlen < h->h_min_size_standard_stream ?
  824. CDF_SHORT_SEC_SIZE(h) : CDF_SEC_SIZE(h);
  825. cdf_dump(sst->sst_tab, ss * sst->sst_len);
  826. }
  827. void
  828. cdf_dump_dir(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
  829. const cdf_sat_t *ssat, const cdf_stream_t *sst,
  830. const cdf_dir_t *dir)
  831. {
  832. size_t i, j;
  833. cdf_directory_t *d;
  834. char name[__arraycount(d->d_name)];
  835. cdf_stream_t scn;
  836. struct timespec ts;
  837. static const char *types[] = { "empty", "user storage",
  838. "user stream", "lockbytes", "property", "root storage" };
  839. for (i = 0; i < dir->dir_len; i++) {
  840. d = &dir->dir_tab[i];
  841. for (j = 0; j < sizeof(name); j++)
  842. name[j] = (char)CDF_TOLE2(d->d_name[j]);
  843. printf("Directory %zu: %s\n", i, name);
  844. if (d->d_type < __arraycount(types))
  845. printf("Type: %s\n", types[d->d_type]);
  846. else
  847. printf("Type: %d\n", d->d_type);
  848. printf("Color: %s\n", d->d_color ? "black" : "red");
  849. printf("Left child: %d\n", d->d_left_child);
  850. printf("Right child: %d\n", d->d_right_child);
  851. printf("Flags: 0x%x\n", d->d_flags);
  852. cdf_timestamp_to_timespec(&ts, d->d_created);
  853. printf("Created %s", ctime(&ts.tv_sec));
  854. cdf_timestamp_to_timespec(&ts, d->d_modified);
  855. printf("Modified %s", ctime(&ts.tv_sec));
  856. printf("Stream %d\n", d->d_stream_first_sector);
  857. printf("Size %d\n", d->d_size);
  858. switch (d->d_type) {
  859. case CDF_DIR_TYPE_USER_STORAGE:
  860. printf("Storage: %d\n", d->d_storage);
  861. break;
  862. case CDF_DIR_TYPE_USER_STREAM:
  863. if (sst == NULL)
  864. break;
  865. if (cdf_read_sector_chain(fd, h, sat, ssat, sst,
  866. d->d_stream_first_sector, d->d_size, &scn) == -1) {
  867. warn("Can't read stream for %s at %d len %d",
  868. name, d->d_stream_first_sector, d->d_size);
  869. break;
  870. }
  871. cdf_dump_stream(h, &scn);
  872. free(scn.sst_tab);
  873. break;
  874. default:
  875. break;
  876. }
  877. }
  878. }
  879. void
  880. cdf_dump_property_info(const cdf_property_info_t *info, size_t count)
  881. {
  882. cdf_timestamp_t tp;
  883. struct timespec ts;
  884. char buf[64];
  885. size_t i;
  886. for (i = 0; i < count; i++) {
  887. cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
  888. printf("%zu) %s: ", i, buf);
  889. switch (info[i].pi_type) {
  890. case CDF_SIGNED16:
  891. printf("signed 16 [%hd]\n", info[i].pi_s16);
  892. break;
  893. case CDF_SIGNED32:
  894. printf("signed 32 [%d]\n", info[i].pi_s32);
  895. break;
  896. case CDF_UNSIGNED32:
  897. printf("unsigned 32 [%u]\n", info[i].pi_u32);
  898. break;
  899. case CDF_LENGTH32_STRING:
  900. printf("string %u [%.*s]\n", info[i].pi_str.s_len,
  901. info[i].pi_str.s_len, info[i].pi_str.s_buf);
  902. break;
  903. case CDF_FILETIME:
  904. tp = info[i].pi_tp;
  905. if (tp < 1000000000000000LL) {
  906. cdf_print_elapsed_time(buf, sizeof(buf), tp);
  907. printf("timestamp %s\n", buf);
  908. } else {
  909. cdf_timestamp_to_timespec(&ts, tp);
  910. printf("timestamp %s", ctime(&ts.tv_sec));
  911. }
  912. break;
  913. case CDF_CLIPBOARD:
  914. printf("CLIPBOARD %u\n", info[i].pi_u32);
  915. break;
  916. default:
  917. DPRINTF(("Don't know how to deal with %x\n",
  918. info[i].pi_type));
  919. break;
  920. }
  921. }
  922. }
  923. void
  924. cdf_dump_summary_info(const cdf_header_t *h, const cdf_stream_t *sst)
  925. {
  926. char buf[128];
  927. cdf_summary_info_header_t ssi;
  928. cdf_property_info_t *info;
  929. size_t count;
  930. (void)&h;
  931. if (cdf_unpack_summary_info(sst, &ssi, &info, &count) == -1)
  932. return;
  933. printf("Endian: %x\n", ssi.si_byte_order);
  934. printf("Os Version %d.%d\n", ssi.si_os_version & 0xff,
  935. ssi.si_os_version >> 8);
  936. printf("Os %d\n", ssi.si_os);
  937. cdf_print_classid(buf, sizeof(buf), &ssi.si_class);
  938. printf("Class %s\n", buf);
  939. printf("Count %d\n", ssi.si_count);
  940. cdf_dump_property_info(info, count);
  941. free(info);
  942. }
  943. #endif
  944. #ifdef TEST
  945. int
  946. main(int argc, char *argv[])
  947. {
  948. int fd, i;
  949. cdf_header_t h;
  950. cdf_sat_t sat, ssat;
  951. cdf_stream_t sst, scn;
  952. cdf_dir_t dir;
  953. if (argc < 2) {
  954. (void)fprintf(stderr, "Usage: %s <filename>\n", getprogname());
  955. return -1;
  956. }
  957. for (i = 1; i < argc; i++) {
  958. if ((fd = open(argv[1], O_RDONLY)) == -1)
  959. err(1, "Cannot open `%s'", argv[1]);
  960. if (cdf_read_header(fd, &h) == -1)
  961. err(1, "Cannot read header");
  962. #ifdef CDF_DEBUG
  963. cdf_dump_header(&h);
  964. #endif
  965. if (cdf_read_sat(fd, &h, &sat) == -1)
  966. err(1, "Cannot read sat");
  967. #ifdef CDF_DEBUG
  968. cdf_dump_sat("SAT", &h, &sat);
  969. #endif
  970. if (cdf_read_ssat(fd, &h, &sat, &ssat) == -1)
  971. err(1, "Cannot read ssat");
  972. #ifdef CDF_DEBUG
  973. cdf_dump_sat("SSAT", &h, &ssat);
  974. #endif
  975. if (cdf_read_dir(fd, &h, &sat, &dir) == -1)
  976. err(1, "Cannot read dir");
  977. if (cdf_read_short_stream(fd, &h, &sat, &dir, &sst) == -1)
  978. err(1, "Cannot read short stream");
  979. #ifdef CDF_DEBUG
  980. cdf_dump_stream(&h, &sst);
  981. #endif
  982. #ifdef CDF_DEBUG
  983. cdf_dump_dir(fd, &h, &sat, &ssat, &sst, &dir);
  984. #endif
  985. if (cdf_read_summary_info(fd, &h, &sat, &ssat, &sst, &dir,
  986. &scn) == -1)
  987. err(1, "Cannot read summary info");
  988. #ifdef CDF_DEBUG
  989. cdf_dump_summary_info(&h, &scn);
  990. #endif
  991. (void)close(fd);
  992. }
  993. return 0;
  994. }
  995. #endif