Index: git/src/cdf.c =================================================================== --- git.orig/src/cdf.c 2012-02-29 20:13:05.341191429 +0100 +++ git/src/cdf.c 2012-02-29 20:13:19.726010338 +0100 @@ -24,15 +24,18 @@ * POSSIBILITY OF SUCH DAMAGE. */ /* - * Parse composite document files, the format used in Microsoft Office - * document files before they switched to zipped xml. + * Parse Composite Document Files, the format used in Microsoft Office + * document files before they switched to zipped XML. * Info from: http://sc.openoffice.org/compdocfileformat.pdf + * + * N.B. This is the "Composite Document File" format, and not the + * "Compound Document Format", nor the "Channel Definition Format". */ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: cdf.c,v 1.36 2010/01/22 20:56:26 christos Exp $") +FILE_RCSID("@(#)$File: cdf.c,v 1.49 2012/02/20 20:04:37 christos Exp $") #endif #include @@ -54,10 +57,6 @@ #include "cdf.h" -#ifndef __arraycount -#define __arraycount(a) (sizeof(a) / sizeof(a[0])) -#endif - #ifdef CDF_DEBUG #define DPRINTF(a) printf a, fflush(stdout) #else @@ -71,19 +70,21 @@ #define NEED_SWAP (cdf_bo.u == (uint32_t)0x01020304) -#define CDF_TOLE8(x) ((uint64_t)(NEED_SWAP ? cdf_tole8(x) : (uint64_t)(x))) -#define CDF_TOLE4(x) ((uint32_t)(NEED_SWAP ? cdf_tole4(x) : (uint32_t)(x))) -#define CDF_TOLE2(x) ((uint16_t)(NEED_SWAP ? cdf_tole2(x) : (uint16_t)(x))) +#define CDF_TOLE8(x) ((uint64_t)(NEED_SWAP ? _cdf_tole8(x) : (uint64_t)(x))) +#define CDF_TOLE4(x) ((uint32_t)(NEED_SWAP ? _cdf_tole4(x) : (uint32_t)(x))) +#define CDF_TOLE2(x) ((uint16_t)(NEED_SWAP ? _cdf_tole2(x) : (uint16_t)(x))) +#define CDF_GETUINT32(x, y) cdf_getuint32(x, y) + /* * swap a short */ -uint16_t -cdf_tole2(uint16_t sv) +static uint16_t +_cdf_tole2(uint16_t sv) { uint16_t rv; - uint8_t *s = (uint8_t *)(void *)&sv; - uint8_t *d = (uint8_t *)(void *)&rv; + uint8_t *s = (uint8_t *)(void *)&sv; + uint8_t *d = (uint8_t *)(void *)&rv; d[0] = s[1]; d[1] = s[0]; return rv; @@ -92,12 +93,12 @@ /* * swap an int */ -uint32_t -cdf_tole4(uint32_t sv) +static uint32_t +_cdf_tole4(uint32_t sv) { uint32_t rv; - uint8_t *s = (uint8_t *)(void *)&sv; - uint8_t *d = (uint8_t *)(void *)&rv; + uint8_t *s = (uint8_t *)(void *)&sv; + uint8_t *d = (uint8_t *)(void *)&rv; d[0] = s[3]; d[1] = s[2]; d[2] = s[1]; @@ -108,12 +109,12 @@ /* * swap a quad */ -uint64_t -cdf_tole8(uint64_t sv) +static uint64_t +_cdf_tole8(uint64_t sv) { uint64_t rv; - uint8_t *s = (uint8_t *)(void *)&sv; - uint8_t *d = (uint8_t *)(void *)&rv; + uint8_t *s = (uint8_t *)(void *)&sv; + uint8_t *d = (uint8_t *)(void *)&rv; d[0] = s[7]; d[1] = s[6]; d[2] = s[5]; @@ -125,11 +126,41 @@ return rv; } +/* + * grab a uint32_t from a possibly unaligned address, and return it in + * the native host order. + */ +static uint32_t +cdf_getuint32(const uint8_t *p, size_t offs) +{ + uint32_t rv; + (void)memcpy(&rv, p + offs * sizeof(uint32_t), sizeof(rv)); + return CDF_TOLE4(rv); +} + #define CDF_UNPACK(a) \ (void)memcpy(&(a), &buf[len], sizeof(a)), len += sizeof(a) #define CDF_UNPACKA(a) \ (void)memcpy((a), &buf[len], sizeof(a)), len += sizeof(a) +uint16_t +cdf_tole2(uint16_t sv) +{ + return CDF_TOLE2(sv); +} + +uint32_t +cdf_tole4(uint32_t sv) +{ + return CDF_TOLE4(sv); +} + +uint64_t +cdf_tole8(uint64_t sv) +{ + return CDF_TOLE8(sv); +} + void cdf_swap_header(cdf_header_t *h) { @@ -231,17 +262,18 @@ } static int -cdf_check_stream_offset(const cdf_stream_t *sst, const void *p, size_t tail, - int line) +cdf_check_stream_offset(const cdf_stream_t *sst, const cdf_header_t *h, + const void *p, size_t tail, int line) { const char *b = (const char *)sst->sst_tab; const char *e = ((const char *)p) + tail; (void)&line; - if (e >= b && (size_t)(e - b) < sst->sst_dirlen * sst->sst_len) + if (e >= b && (size_t)(e - b) < CDF_SEC_SIZE(h) * sst->sst_len) return 0; - DPRINTF(("%d: offset begin %p end %p %zu >= %zu [%zu %zu]\n", - line, b, e, (size_t)(e - b), sst->sst_dirlen * sst->sst_len, - sst->sst_dirlen, sst->sst_len)); + DPRINTF(("%d: offset begin %p end %p %" SIZE_T_FORMAT "u" + " >= %" SIZE_T_FORMAT "u [%" SIZE_T_FORMAT "u %" + SIZE_T_FORMAT "u]\n", line, b, e, (size_t)(e - b), + CDF_SEC_SIZE(h) * sst->sst_len, CDF_SEC_SIZE(h), sst->sst_len)); errno = EFTYPE; return -1; } @@ -284,7 +316,8 @@ cdf_unpack_header(h, buf); cdf_swap_header(h); if (h->h_magic != CDF_MAGIC) { - DPRINTF(("Bad magic 0x%llx != 0x%llx\n", + DPRINTF(("Bad magic 0x%" INT64_T_FORMAT "x != 0x%" + INT64_T_FORMAT "x\n", (unsigned long long)h->h_magic, (unsigned long long)CDF_MAGIC)); goto out; @@ -309,18 +342,27 @@ cdf_read_sector(const cdf_info_t *info, void *buf, size_t offs, size_t len, const cdf_header_t *h, cdf_secid_t id) { - assert((size_t)CDF_SEC_SIZE(h) == len); - return cdf_read(info, (off_t)CDF_SEC_POS(h, id), - ((char *)buf) + offs, len); + size_t ss = CDF_SEC_SIZE(h); + size_t pos = CDF_SEC_POS(h, id); + assert(ss == len); + return cdf_read(info, (off_t)pos, ((char *)buf) + offs, len); } ssize_t cdf_read_short_sector(const cdf_stream_t *sst, void *buf, size_t offs, size_t len, const cdf_header_t *h, cdf_secid_t id) { - assert((size_t)CDF_SHORT_SEC_SIZE(h) == len); + size_t ss = CDF_SHORT_SEC_SIZE(h); + size_t pos = CDF_SHORT_SEC_POS(h, id); + assert(ss == len); + if (pos > CDF_SEC_SIZE(h) * sst->sst_len) { + DPRINTF(("Out of bounds read %" SIZE_T_FORMAT "u > %" + SIZE_T_FORMAT "u\n", + pos, CDF_SEC_SIZE(h) * sst->sst_len)); + return -1; + } (void)memcpy(((char *)buf) + offs, - ((const char *)sst->sst_tab) + CDF_SHORT_SEC_POS(h, id), len); + ((const char *)sst->sst_tab) + pos, len); return len; } @@ -340,16 +382,18 @@ break; #define CDF_SEC_LIMIT (UINT32_MAX / (4 * ss)) - if (h->h_num_sectors_in_master_sat > CDF_SEC_LIMIT / nsatpersec || + if ((nsatpersec > 0 && + h->h_num_sectors_in_master_sat > CDF_SEC_LIMIT / nsatpersec) || i > CDF_SEC_LIMIT) { - DPRINTF(("Number of sectors in master SAT too big %u %zu\n", - h->h_num_sectors_in_master_sat, i)); + DPRINTF(("Number of sectors in master SAT too big %u %" + SIZE_T_FORMAT "u\n", h->h_num_sectors_in_master_sat, i)); errno = EFTYPE; return -1; } sat->sat_len = h->h_num_sectors_in_master_sat * nsatpersec + i; - DPRINTF(("sat_len = %zu ss = %zu\n", sat->sat_len, ss)); + DPRINTF(("sat_len = %" SIZE_T_FORMAT "u ss = %" SIZE_T_FORMAT "u\n", + sat->sat_len, ss)); if ((sat->sat_tab = CAST(cdf_secid_t *, calloc(sat->sat_len, ss))) == NULL) return -1; @@ -385,8 +429,8 @@ if (sec < 0) goto out; if (i >= sat->sat_len) { - DPRINTF(("Out of bounds reading MSA %u >= %u", - i, sat->sat_len)); + DPRINTF(("Out of bounds reading MSA %" SIZE_T_FORMAT + "u >= %" SIZE_T_FORMAT "u", i, sat->sat_len)); errno = EFTYPE; goto out2; } @@ -459,7 +503,8 @@ } if (i >= scn->sst_len) { DPRINTF(("Out of bounds reading long sector chain " - "%u > %u\n", i, scn->sst_len)); + "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", i, + scn->sst_len)); errno = EFTYPE; goto out; } @@ -504,7 +549,8 @@ } if (i >= scn->sst_len) { DPRINTF(("Out of bounds reading short sector chain " - "%u > %u\n", i, scn->sst_len)); + "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", + i, scn->sst_len)); errno = EFTYPE; goto out; } @@ -550,7 +596,7 @@ nd = ss / CDF_DIRECTORY_SIZE; dir->dir_len = ns * nd; - dir->dir_tab = CAST(cdf_directory_t *, + dir->dir_tab = CAST(cdf_directory_t *, calloc(dir->dir_len, sizeof(dir->dir_tab[0]))); if (dir->dir_tab == NULL) return -1; @@ -612,7 +658,8 @@ } if (i >= ssat->sat_len) { DPRINTF(("Out of bounds reading short sector chain " - "%u > %u\n", i, ssat->sat_len)); + "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", i, + ssat->sat_len)); errno = EFTYPE; goto out; } @@ -649,7 +696,7 @@ if (d->d_stream_first_sector < 0) goto out; - return cdf_read_long_sector_chain(info, h, sat, + return cdf_read_long_sector_chain(info, h, sat, d->d_stream_first_sector, d->d_size, scn); out: scn->sst_tab = NULL; @@ -693,19 +740,19 @@ } int -cdf_read_property_info(const cdf_stream_t *sst, uint32_t offs, - cdf_property_info_t **info, size_t *count, size_t *maxcount) +cdf_read_property_info(const cdf_stream_t *sst, const cdf_header_t *h, + uint32_t offs, cdf_property_info_t **info, size_t *count, size_t *maxcount) { const cdf_section_header_t *shp; cdf_section_header_t sh; - const uint32_t *p, *q, *e; + const uint8_t *p, *q, *e; int16_t s16; int32_t s32; uint32_t u32; int64_t s64; uint64_t u64; cdf_timestamp_t tp; - size_t i, o, nelements, j; + size_t i, o, o4, nelements, j; cdf_property_info_t *inp; if (offs > UINT32_MAX / 4) { @@ -714,7 +761,7 @@ } shp = CAST(const cdf_section_header_t *, (const void *) ((const char *)sst->sst_tab + offs)); - if (cdf_check_stream_offset(sst, shp, sizeof(*shp), __LINE__) == -1) + if (cdf_check_stream_offset(sst, h, shp, sizeof(*shp), __LINE__) == -1) goto out; sh.sh_len = CDF_TOLE4(shp->sh_len); #define CDF_SHLEN_LIMIT (UINT32_MAX / 8) @@ -744,32 +791,34 @@ *info = inp; inp += *count; *count += sh.sh_properties; - p = CAST(const uint32_t *, (const void *) + p = CAST(const uint8_t *, (const void *) ((const char *)(const void *)sst->sst_tab + offs + sizeof(sh))); - e = CAST(const uint32_t *, (const void *) + e = CAST(const uint8_t *, (const void *) (((const char *)(const void *)shp) + sh.sh_len)); - if (cdf_check_stream_offset(sst, e, 0, __LINE__) == -1) + if (cdf_check_stream_offset(sst, h, e, 0, __LINE__) == -1) goto out; for (i = 0; i < sh.sh_properties; i++) { - q = (const uint32_t *)(const void *) - ((const char *)(const void *)p + - CDF_TOLE4(p[(i << 1) + 1])) - 2; + size_t ofs = CDF_GETUINT32(p, (i << 1) + 1); + q = (const uint8_t *)(const void *) + ((const char *)(const void *)p + ofs + - 2 * sizeof(uint32_t)); if (q > e) { DPRINTF(("Ran of the end %p > %p\n", q, e)); goto out; } - inp[i].pi_id = CDF_TOLE4(p[i << 1]); - inp[i].pi_type = CDF_TOLE4(q[0]); - DPRINTF(("%d) id=%x type=%x offs=%x\n", i, inp[i].pi_id, - inp[i].pi_type, (const char *)q - (const char *)p)); + inp[i].pi_id = CDF_GETUINT32(p, i << 1); + inp[i].pi_type = CDF_GETUINT32(q, 0); + DPRINTF(("%" SIZE_T_FORMAT "u) id=%x type=%x offs=0x%tx,0x%x\n", + i, inp[i].pi_id, inp[i].pi_type, q - p, offs)); if (inp[i].pi_type & CDF_VECTOR) { - nelements = CDF_TOLE4(q[1]); + nelements = CDF_GETUINT32(q, 1); o = 2; } else { nelements = 1; o = 1; } + o4 = o * sizeof(uint32_t); if (inp[i].pi_type & (CDF_ARRAY|CDF_BYREF|CDF_RESERVED)) goto unknown; switch (inp[i].pi_type & CDF_TYPEMASK) { @@ -779,34 +828,48 @@ case CDF_SIGNED16: if (inp[i].pi_type & CDF_VECTOR) goto unknown; - (void)memcpy(&s16, &q[o], sizeof(s16)); + (void)memcpy(&s16, &q[o4], sizeof(s16)); inp[i].pi_s16 = CDF_TOLE2(s16); break; case CDF_SIGNED32: if (inp[i].pi_type & CDF_VECTOR) goto unknown; - (void)memcpy(&s32, &q[o], sizeof(s32)); + (void)memcpy(&s32, &q[o4], sizeof(s32)); inp[i].pi_s32 = CDF_TOLE4((uint32_t)s32); break; case CDF_BOOL: case CDF_UNSIGNED32: if (inp[i].pi_type & CDF_VECTOR) goto unknown; - (void)memcpy(&u32, &q[o], sizeof(u32)); + (void)memcpy(&u32, &q[o4], sizeof(u32)); inp[i].pi_u32 = CDF_TOLE4(u32); break; case CDF_SIGNED64: if (inp[i].pi_type & CDF_VECTOR) goto unknown; - (void)memcpy(&s64, &q[o], sizeof(s64)); + (void)memcpy(&s64, &q[o4], sizeof(s64)); inp[i].pi_s64 = CDF_TOLE8((uint64_t)s64); break; case CDF_UNSIGNED64: if (inp[i].pi_type & CDF_VECTOR) goto unknown; - (void)memcpy(&u64, &q[o], sizeof(u64)); + (void)memcpy(&u64, &q[o4], sizeof(u64)); inp[i].pi_u64 = CDF_TOLE8((uint64_t)u64); break; + case CDF_FLOAT: + if (inp[i].pi_type & CDF_VECTOR) + goto unknown; + (void)memcpy(&u32, &q[o4], sizeof(u32)); + u32 = CDF_TOLE4(u32); + memcpy(&inp[i].pi_f, &u32, sizeof(inp[i].pi_f)); + break; + case CDF_DOUBLE: + if (inp[i].pi_type & CDF_VECTOR) + goto unknown; + (void)memcpy(&u64, &q[o4], sizeof(u64)); + u64 = CDF_TOLE8((uint64_t)u64); + memcpy(&inp[i].pi_d, &u64, sizeof(inp[i].pi_d)); + break; case CDF_LENGTH32_STRING: case CDF_LENGTH32_WSTRING: if (nelements > 1) { @@ -822,24 +885,30 @@ *info = inp; inp = *info + nelem; } - DPRINTF(("nelements = %d\n", nelements)); + DPRINTF(("nelements = %" SIZE_T_FORMAT "u\n", + nelements)); for (j = 0; j < nelements; j++, i++) { - uint32_t l = CDF_TOLE4(q[o]); + uint32_t l = CDF_GETUINT32(q, o); inp[i].pi_str.s_len = l; - inp[i].pi_str.s_buf = - (const char *)(const void *)(&q[o+1]); - DPRINTF(("l = %d, r = %d, s = %s\n", l, + inp[i].pi_str.s_buf = (const char *) + (const void *)(&q[o4 + sizeof(l)]); + DPRINTF(("l = %d, r = %" SIZE_T_FORMAT + "u, s = %s\n", l, CDF_ROUND(l, sizeof(l)), inp[i].pi_str.s_buf)); - l = 4 + (uint32_t)CDF_ROUND(l, sizeof(l)); - o += l >> 2; + if (l & 1) + l++; + o += l >> 1; + if (q + o >= e) + goto out; + o4 = o * sizeof(uint32_t); } i--; break; case CDF_FILETIME: if (inp[i].pi_type & CDF_VECTOR) goto unknown; - (void)memcpy(&tp, &q[o], sizeof(tp)); + (void)memcpy(&tp, &q[o4], sizeof(tp)); inp[i].pi_tp = CDF_TOLE8((uint64_t)tp); break; case CDF_CLIPBOARD: @@ -850,7 +919,7 @@ unknown: DPRINTF(("Don't know how to deal with %x\n", inp[i].pi_type)); - goto out; + break; } } return 0; @@ -860,8 +929,8 @@ } int -cdf_unpack_summary_info(const cdf_stream_t *sst, cdf_summary_info_header_t *ssi, - cdf_property_info_t **info, size_t *count) +cdf_unpack_summary_info(const cdf_stream_t *sst, const cdf_header_t *h, + cdf_summary_info_header_t *ssi, cdf_property_info_t **info, size_t *count) { size_t i, maxcount; const cdf_summary_info_header_t *si = @@ -870,8 +939,8 @@ CAST(const cdf_section_declaration_t *, (const void *) ((const char *)sst->sst_tab + CDF_SECTION_DECLARATION_OFFSET)); - if (cdf_check_stream_offset(sst, si, sizeof(*si), __LINE__) == -1 || - cdf_check_stream_offset(sst, sd, sizeof(*sd), __LINE__) == -1) + if (cdf_check_stream_offset(sst, h, si, sizeof(*si), __LINE__) == -1 || + cdf_check_stream_offset(sst, h, sd, sizeof(*sd), __LINE__) == -1) return -1; ssi->si_byte_order = CDF_TOLE2(si->si_byte_order); ssi->si_os_version = CDF_TOLE2(si->si_os_version); @@ -888,9 +957,10 @@ errno = EFTYPE; return -1; } - if (cdf_read_property_info(sst, CDF_TOLE4(sd->sd_offset), - info, count, &maxcount) == -1) + if (cdf_read_property_info(sst, h, CDF_TOLE4(sd->sd_offset), + info, count, &maxcount) == -1) { return -1; + } } return 0; } @@ -1015,13 +1085,14 @@ size_t i, j, s = size / sizeof(cdf_secid_t); for (i = 0; i < sat->sat_len; i++) { - (void)fprintf(stderr, "%s[%zu]:\n%.6d: ", prefix, i, i * s); + (void)fprintf(stderr, "%s[%" SIZE_T_FORMAT "u]:\n%.6" + SIZE_T_FORMAT "u: ", prefix, i, i * s); for (j = 0; j < s; j++) { (void)fprintf(stderr, "%5d, ", CDF_TOLE4(sat->sat_tab[s * i + j])); if ((j + 1) % 10 == 0) - (void)fprintf(stderr, "\n%.6d: ", - i * s + j + 1); + (void)fprintf(stderr, "\n%.6" SIZE_T_FORMAT + "u: ", i * s + j + 1); } (void)fprintf(stderr, "\n"); } @@ -1040,7 +1111,8 @@ if (j == 16) { j = 0; abuf[15] = '\0'; - (void)fprintf(stderr, "%s\n%.4x: ", abuf, i + 1); + (void)fprintf(stderr, "%s\n%.4" SIZE_T_FORMAT "x: ", + abuf, i + 1); } } (void)fprintf(stderr, "\n"); @@ -1072,7 +1144,8 @@ d = &dir->dir_tab[i]; for (j = 0; j < sizeof(name); j++) name[j] = (char)CDF_TOLE2(d->d_name[j]); - (void)fprintf(stderr, "Directory %zu: %s\n", i, name); + (void)fprintf(stderr, "Directory %" SIZE_T_FORMAT "u: %s\n", + i, name); if (d->d_type < __arraycount(types)) (void)fprintf(stderr, "Type: %s\n", types[d->d_type]); else @@ -1083,9 +1156,9 @@ (void)fprintf(stderr, "Right child: %d\n", d->d_right_child); (void)fprintf(stderr, "Flags: 0x%x\n", d->d_flags); cdf_timestamp_to_timespec(&ts, d->d_created); - (void)fprintf(stderr, "Created %s", ctime(&ts.tv_sec)); + (void)fprintf(stderr, "Created %s", cdf_ctime(&ts.tv_sec)); cdf_timestamp_to_timespec(&ts, d->d_modified); - (void)fprintf(stderr, "Modified %s", ctime(&ts.tv_sec)); + (void)fprintf(stderr, "Modified %s", cdf_ctime(&ts.tv_sec)); (void)fprintf(stderr, "Stream %d\n", d->d_stream_first_sector); (void)fprintf(stderr, "Size %d\n", d->d_size); switch (d->d_type) { @@ -1107,7 +1180,7 @@ default: break; } - + } } @@ -1121,7 +1194,7 @@ for (i = 0; i < count; i++) { cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); - (void)fprintf(stderr, "%zu) %s: ", i, buf); + (void)fprintf(stderr, "%" SIZE_T_FORMAT "u) %s: ", i, buf); switch (info[i].pi_type) { case CDF_NULL: break; @@ -1137,17 +1210,25 @@ (void)fprintf(stderr, "unsigned 32 [%u]\n", info[i].pi_u32); break; + case CDF_FLOAT: + (void)fprintf(stderr, "float [%g]\n", + info[i].pi_f); + break; + case CDF_DOUBLE: + (void)fprintf(stderr, "double [%g]\n", + info[i].pi_d); + break; case CDF_LENGTH32_STRING: (void)fprintf(stderr, "string %u [%.*s]\n", info[i].pi_str.s_len, info[i].pi_str.s_len, info[i].pi_str.s_buf); break; case CDF_LENGTH32_WSTRING: - (void)fprintf(stderr, "string %u [", + (void)fprintf(stderr, "string %u [", info[i].pi_str.s_len); for (j = 0; j < info[i].pi_str.s_len - 1; j++) (void)fputc(info[i].pi_str.s_buf[j << 1], stderr); - (void)fprintf(stderr, "]\n"); + (void)fprintf(stderr, "]\n"); break; case CDF_FILETIME: tp = info[i].pi_tp; @@ -1157,7 +1238,7 @@ } else { cdf_timestamp_to_timespec(&ts, tp); (void)fprintf(stderr, "timestamp %s", - ctime(&ts.tv_sec)); + cdf_ctime(&ts.tv_sec)); } break; case CDF_CLIPBOARD: @@ -1181,7 +1262,7 @@ size_t count; (void)&h; - if (cdf_unpack_summary_info(sst, &ssi, &info, &count) == -1) + if (cdf_unpack_summary_info(sst, h, &ssi, &info, &count) == -1) return; (void)fprintf(stderr, "Endian: %x\n", ssi.si_byte_order); (void)fprintf(stderr, "Os Version %d.%d\n", ssi.si_os_version & 0xff, Index: git/src/cdf.h =================================================================== --- git.orig/src/cdf.h 2012-02-29 20:13:05.341191429 +0100 +++ git/src/cdf.h 2012-02-29 20:13:19.726010338 +0100 @@ -24,20 +24,35 @@ * POSSIBILITY OF SUCH DAMAGE. */ /* - * Info from: http://sc.openoffice.org/compdocfileformat.pdf + * Parse Composite Document Files, the format used in Microsoft Office + * document files before they switched to zipped XML. + * Info from: http://sc.openoffice.org/compdocfileformat.pdf + * + * N.B. This is the "Composite Document File" format, and not the + * "Compound Document Format", nor the "Channel Definition Format". */ #ifndef _H_CDF_ #define _H_CDF_ +#ifdef WIN32 +#include +#define timespec timeval +#define tv_nsec tv_usec +#endif +#ifdef __DJGPP__ +#define timespec timeval +#define tv_nsec tv_usec +#endif + typedef int32_t cdf_secid_t; #define CDF_LOOP_LIMIT 10000 #define CDF_SECID_NULL 0 #define CDF_SECID_FREE -1 -#define CDF_SECID_END_OF_CHAIN -2 -#define CDF_SECID_SECTOR_ALLOCATION_TABLE -3 +#define CDF_SECID_END_OF_CHAIN -2 +#define CDF_SECID_SECTOR_ALLOCATION_TABLE -3 #define CDF_SECID_MASTER_SECTOR_ALLOCATION_TABLE -4 typedef struct { @@ -61,15 +76,15 @@ cdf_secid_t h_master_sat[436/4]; } cdf_header_t; -#define CDF_SEC_SIZE(h) (1 << (h)->h_sec_size_p2) +#define CDF_SEC_SIZE(h) ((size_t)(1 << (h)->h_sec_size_p2)) #define CDF_SEC_POS(h, secid) (CDF_SEC_SIZE(h) + (secid) * CDF_SEC_SIZE(h)) -#define CDF_SHORT_SEC_SIZE(h) (1 << (h)->h_short_sec_size_p2) +#define CDF_SHORT_SEC_SIZE(h) ((size_t)(1 << (h)->h_short_sec_size_p2)) #define CDF_SHORT_SEC_POS(h, secid) ((secid) * CDF_SHORT_SEC_SIZE(h)) -typedef int32_t cdf_dirid_t; +typedef int32_t cdf_dirid_t; #define CDF_DIRID_NULL -1 -typedef int64_t cdf_timestamp_t; +typedef int64_t cdf_timestamp_t; #define CDF_BASE_YEAR 1601 #define CDF_TIME_PREC 10000000 @@ -78,11 +93,11 @@ uint16_t d_namelen; uint8_t d_type; #define CDF_DIR_TYPE_EMPTY 0 -#define CDF_DIR_TYPE_USER_STORAGE 1 -#define CDF_DIR_TYPE_USER_STREAM 2 -#define CDF_DIR_TYPE_LOCKBYTES 3 -#define CDF_DIR_TYPE_PROPERTY 4 -#define CDF_DIR_TYPE_ROOT_STORAGE 5 +#define CDF_DIR_TYPE_USER_STORAGE 1 +#define CDF_DIR_TYPE_USER_STREAM 2 +#define CDF_DIR_TYPE_LOCKBYTES 3 +#define CDF_DIR_TYPE_PROPERTY 4 +#define CDF_DIR_TYPE_ROOT_STORAGE 5 uint8_t d_color; #define CDF_DIR_COLOR_READ 0 #define CDF_DIR_COLOR_BLACK 1 @@ -91,8 +106,8 @@ cdf_dirid_t d_storage; uint64_t d_storage_uuid[2]; uint32_t d_flags; - cdf_timestamp_t d_created; - cdf_timestamp_t d_modified; + cdf_timestamp_t d_created; + cdf_timestamp_t d_modified; cdf_secid_t d_stream_first_sector; uint32_t d_size; uint32_t d_unused0; @@ -154,7 +169,9 @@ int32_t _pi_s32; uint64_t _pi_u64; int64_t _pi_s64; - cdf_timestamp_t _pi_tp; + cdf_timestamp_t _pi_tp; + float _pi_f; + double _pi_d; struct { uint32_t s_len; const char *s_buf; @@ -166,6 +183,8 @@ #define pi_s32 pi_val._pi_s32 #define pi_u16 pi_val._pi_u16 #define pi_s16 pi_val._pi_s16 +#define pi_f pi_val._pi_f +#define pi_d pi_val._pi_d #define pi_tp pi_val._pi_tp #define pi_str pi_val._pi_str } cdf_property_info_t; @@ -174,13 +193,13 @@ /* Variant type definitions */ #define CDF_EMPTY 0x00000000 -#define CDF_NULL 0x00000001 +#define CDF_NULL 0x00000001 #define CDF_SIGNED16 0x00000002 #define CDF_SIGNED32 0x00000003 #define CDF_FLOAT 0x00000004 #define CDF_DOUBLE 0x00000005 #define CDF_CY 0x00000006 -#define CDF_DATE 0x00000007 +#define CDF_DATE 0x00000007 #define CDF_BSTR 0x00000008 #define CDF_DISPATCH 0x00000009 #define CDF_ERROR 0x0000000a @@ -191,7 +210,7 @@ #define CDF_SIGNED8 0x00000010 #define CDF_UNSIGNED8 0x00000011 #define CDF_UNSIGNED16 0x00000012 -#define CDF_UNSIGNED32 0x00000013 +#define CDF_UNSIGNED32 0x00000013 #define CDF_SIGNED64 0x00000014 #define CDF_UNSIGNED64 0x00000015 #define CDF_INT 0x00000016 @@ -226,7 +245,7 @@ #define CDF_PROPERTY_SUBJECT 0x00000003 #define CDF_PROPERTY_AUTHOR 0x00000004 #define CDF_PROPERTY_KEYWORDS 0x00000005 -#define CDF_PROPERTY_COMMENTS 0x00000006 +#define CDF_PROPERTY_COMMENTS 0x00000006 #define CDF_PROPERTY_TEMPLATE 0x00000007 #define CDF_PROPERTY_LAST_SAVED_BY 0x00000008 #define CDF_PROPERTY_REVISION_NUMBER 0x00000009 @@ -276,19 +295,20 @@ cdf_sat_t *); int cdf_read_short_stream(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *, const cdf_dir_t *, cdf_stream_t *); -int cdf_read_property_info(const cdf_stream_t *, uint32_t, +int cdf_read_property_info(const cdf_stream_t *, const cdf_header_t *, uint32_t, cdf_property_info_t **, size_t *, size_t *); int cdf_read_summary_info(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *, cdf_stream_t *); -int cdf_unpack_summary_info(const cdf_stream_t *, cdf_summary_info_header_t *, - cdf_property_info_t **, size_t *); +int cdf_unpack_summary_info(const cdf_stream_t *, const cdf_header_t *, + cdf_summary_info_header_t *, cdf_property_info_t **, size_t *); int cdf_print_classid(char *, size_t, const cdf_classid_t *); int cdf_print_property_name(char *, size_t, uint32_t); int cdf_print_elapsed_time(char *, size_t, cdf_timestamp_t); uint16_t cdf_tole2(uint16_t); uint32_t cdf_tole4(uint32_t); uint64_t cdf_tole8(uint64_t); +char *cdf_ctime(const time_t *); #ifdef CDF_DEBUG void cdf_dump_header(const cdf_header_t *); Index: git/src/cdf_time.c =================================================================== --- git.orig/src/cdf_time.c 2012-02-29 20:13:05.341191429 +0100 +++ git/src/cdf_time.c 2012-02-29 20:13:19.726010338 +0100 @@ -27,7 +27,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: cdf_time.c,v 1.8 2009/06/20 20:47:30 christos Exp $") +FILE_RCSID("@(#)$File: cdf_time.c,v 1.10 2011/02/10 17:03:16 christos Exp $") #endif #include @@ -45,12 +45,6 @@ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; -#ifdef __DJGPP__ -#define timespec timeval -#define tv_nsec tv_usec -#endif - - /* * Return the number of days between jan 01 1601 and jan 01 of year. */ @@ -127,7 +121,7 @@ tm.tm_year = (int)(CDF_BASE_YEAR + (t / 365)); rdays = cdf_getdays(tm.tm_year); - t -= rdays; + t -= rdays - 1; tm.tm_mday = cdf_getday(tm.tm_year, (int)t); tm.tm_mon = cdf_getmonth(tm.tm_year, (int)t); tm.tm_wday = 0; @@ -171,6 +165,18 @@ return 0; } +char * +cdf_ctime(const time_t *sec) +{ + static char ctbuf[26]; + char *ptr = ctime(sec); + if (ptr != NULL) + return ptr; + (void)snprintf(ctbuf, sizeof(ctbuf), "*Bad* 0x%16.16llx\n", + (long long)*sec); + return ctbuf; +} + #ifdef TEST int @@ -182,7 +188,7 @@ char *p, *q; cdf_timestamp_to_timespec(&ts, tst); - p = ctime(&ts.tv_sec); + p = cdf_ctime(&ts.tv_sec); if ((q = strchr(p, '\n')) != NULL) *q = '\0'; if (strcmp(ref, p) != 0) Index: git/src/readcdf.c =================================================================== --- git.orig/src/readcdf.c 2012-02-29 20:13:05.341191429 +0100 +++ git/src/readcdf.c 2012-02-29 20:13:19.726010338 +0100 @@ -26,7 +26,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: readcdf.c,v 1.22 2010/01/20 01:36:55 christos Exp $") +FILE_RCSID("@(#)$File: readcdf.c,v 1.28 2012/02/17 05:27:45 christos Exp $") #endif #include @@ -44,241 +44,275 @@ cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info, size_t count) { - size_t i; - cdf_timestamp_t tp; - struct timespec ts; - char buf[64]; - const char *str = "vnd.ms-office"; - const char *s; - int len; - - for (i = 0; i < count; i++) { - cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); - switch (info[i].pi_type) { - case CDF_NULL: - break; - case CDF_SIGNED16: - if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, - info[i].pi_s16) == -1) - return -1; - break; - case CDF_SIGNED32: - if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, - info[i].pi_s32) == -1) - return -1; - break; - case CDF_UNSIGNED32: - if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, - info[i].pi_u32) == -1) - return -1; - break; - case CDF_LENGTH32_STRING: - case CDF_LENGTH32_WSTRING: - len = info[i].pi_str.s_len; - if (len > 1) { - char vbuf[1024]; - size_t j, k = 1; - - if (info[i].pi_type == CDF_LENGTH32_WSTRING) - k++; - s = info[i].pi_str.s_buf; - for (j = 0; j < sizeof(vbuf) && len--; - j++, s += k) { - if (*s == '\0') - break; - if (isprint((unsigned char)*s)) - vbuf[j] = *s; - } - if (j == sizeof(vbuf)) - --j; - vbuf[j] = '\0'; - if (NOTMIME(ms)) { - if (vbuf[0]) { - if (file_printf(ms, ", %s: %s", - buf, vbuf) == -1) - return -1; - } - } else if (info[i].pi_id == - CDF_PROPERTY_NAME_OF_APPLICATION) { - if (strstr(vbuf, "Word")) - str = "msword"; - else if (strstr(vbuf, "Excel")) - str = "vnd.ms-excel"; - else if (strstr(vbuf, "Powerpoint")) - str = "vnd.ms-powerpoint"; - else if (strstr(vbuf, - "Crystal Reports")) - str = "x-rpt"; - } - } - break; - case CDF_FILETIME: - tp = info[i].pi_tp; - if (tp != 0) { - if (tp < 1000000000000000LL) { - char tbuf[64]; - cdf_print_elapsed_time(tbuf, - sizeof(tbuf), tp); - if (NOTMIME(ms) && file_printf(ms, - ", %s: %s", buf, tbuf) == -1) - return -1; - } else { - char *c, *ec; - cdf_timestamp_to_timespec(&ts, tp); - c = ctime(&ts.tv_sec); - if ((ec = strchr(c, '\n')) != NULL) - *ec = '\0'; - - if (NOTMIME(ms) && file_printf(ms, - ", %s: %s", buf, c) == -1) - return -1; - } - } - break; - case CDF_CLIPBOARD: - break; - default: - return -1; - } - } - if (!NOTMIME(ms)) { - if (file_printf(ms, "application/%s", str) == -1) - return -1; - } - return 1; + size_t i; + cdf_timestamp_t tp; + struct timespec ts; + char buf[64]; + const char *str = NULL; + const char *s; + int len; + + for (i = 0; i < count; i++) { + cdf_print_property_name(buf, sizeof(buf), info[i].pi_id); + switch (info[i].pi_type) { + case CDF_NULL: + break; + case CDF_SIGNED16: + if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf, + info[i].pi_s16) == -1) + return -1; + break; + case CDF_SIGNED32: + if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf, + info[i].pi_s32) == -1) + return -1; + break; + case CDF_UNSIGNED32: + if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf, + info[i].pi_u32) == -1) + return -1; + break; + case CDF_FLOAT: + if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, + info[i].pi_f) == -1) + return -1; + break; + case CDF_DOUBLE: + if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf, + info[i].pi_d) == -1) + return -1; + break; + case CDF_LENGTH32_STRING: + case CDF_LENGTH32_WSTRING: + len = info[i].pi_str.s_len; + if (len > 1) { + char vbuf[1024]; + size_t j, k = 1; + + if (info[i].pi_type == CDF_LENGTH32_WSTRING) + k++; + s = info[i].pi_str.s_buf; + for (j = 0; j < sizeof(vbuf) && len--; + j++, s += k) { + if (*s == '\0') + break; + if (isprint((unsigned char)*s)) + vbuf[j] = *s; + } + if (j == sizeof(vbuf)) + --j; + vbuf[j] = '\0'; + if (NOTMIME(ms)) { + if (vbuf[0]) { + if (file_printf(ms, ", %s: %s", + buf, vbuf) == -1) + return -1; + } + } else if (info[i].pi_id == + CDF_PROPERTY_NAME_OF_APPLICATION) { + if (strstr(vbuf, "Word")) + str = "msword"; + else if (strstr(vbuf, "Excel")) + str = "vnd.ms-excel"; + else if (strstr(vbuf, "Powerpoint")) + str = "vnd.ms-powerpoint"; + else if (strstr(vbuf, + "Crystal Reports")) + str = "x-rpt"; + } + } + break; + case CDF_FILETIME: + tp = info[i].pi_tp; + if (tp != 0) { + if (tp < 1000000000000000LL) { + char tbuf[64]; + cdf_print_elapsed_time(tbuf, + sizeof(tbuf), tp); + if (NOTMIME(ms) && file_printf(ms, + ", %s: %s", buf, tbuf) == -1) + return -1; + } else { + char *c, *ec; + cdf_timestamp_to_timespec(&ts, tp); + c = cdf_ctime(&ts.tv_sec); + if ((ec = strchr(c, '\n')) != NULL) + *ec = '\0'; + + if (NOTMIME(ms) && file_printf(ms, + ", %s: %s", buf, c) == -1) + return -1; + } + } + break; + case CDF_CLIPBOARD: + break; + default: + return -1; + } + } + if (!NOTMIME(ms)) { + if (str == NULL) + return 0; + if (file_printf(ms, "application/%s", str) == -1) + return -1; + } + return 1; } private int -cdf_file_summary_info(struct magic_set *ms, const cdf_stream_t *sst) +cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h, + const cdf_stream_t *sst) { - cdf_summary_info_header_t si; - cdf_property_info_t *info; - size_t count; - int m; - - if (cdf_unpack_summary_info(sst, &si, &info, &count) == -1) - return -1; - - if (NOTMIME(ms)) { - if (file_printf(ms, "CDF V2 Document") == -1) - return -1; - - if (file_printf(ms, ", %s Endian", - si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) - return -1; - switch (si.si_os) { - case 2: - if (file_printf(ms, ", Os: Windows, Version %d.%d", - si.si_os_version & 0xff, - (uint32_t)si.si_os_version >> 8) == -1) - return -1; - break; - case 1: - if (file_printf(ms, ", Os: MacOS, Version %d.%d", - (uint32_t)si.si_os_version >> 8, - si.si_os_version & 0xff) == -1) - return -1; - break; - default: - if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, - si.si_os_version & 0xff, - (uint32_t)si.si_os_version >> 8) == -1) - return -1; - break; - } - } + cdf_summary_info_header_t si; + cdf_property_info_t *info; + size_t count; + int m; + + if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1) + return -1; + + if (NOTMIME(ms)) { + if (file_printf(ms, "Composite Document File V2 Document") + == -1) + return -1; + + if (file_printf(ms, ", %s Endian", + si.si_byte_order == 0xfffe ? "Little" : "Big") == -1) + return -2; + switch (si.si_os) { + case 2: + if (file_printf(ms, ", Os: Windows, Version %d.%d", + si.si_os_version & 0xff, + (uint32_t)si.si_os_version >> 8) == -1) + return -2; + break; + case 1: + if (file_printf(ms, ", Os: MacOS, Version %d.%d", + (uint32_t)si.si_os_version >> 8, + si.si_os_version & 0xff) == -1) + return -2; + break; + default: + if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os, + si.si_os_version & 0xff, + (uint32_t)si.si_os_version >> 8) == -1) + return -2; + break; + } + } - m = cdf_file_property_info(ms, info, count); - free(info); + m = cdf_file_property_info(ms, info, count); + free(info); - return m; + return m == -1 ? -2 : m; } protected int file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf, size_t nbytes) { - cdf_info_t info; - cdf_header_t h; - cdf_sat_t sat, ssat; - cdf_stream_t sst, scn; - cdf_dir_t dir; - int i; - const char *expn = ""; - const char *corrupt = "corrupt: "; - - info.i_fd = fd; - info.i_buf = buf; - info.i_len = nbytes; - if (ms->flags & MAGIC_APPLE) - return 0; - if (cdf_read_header(&info, &h) == -1) - return 0; + cdf_info_t info; + cdf_header_t h; + cdf_sat_t sat, ssat; + cdf_stream_t sst, scn; + cdf_dir_t dir; + int i; + const char *expn = ""; + const char *corrupt = "corrupt: "; + + info.i_fd = fd; + info.i_buf = buf; + info.i_len = nbytes; + if (ms->flags & MAGIC_APPLE) + return 0; + if (cdf_read_header(&info, &h) == -1) + return 0; #ifdef CDF_DEBUG - cdf_dump_header(&h); + cdf_dump_header(&h); #endif - if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { - expn = "Can't read SAT"; - goto out0; - } + if ((i = cdf_read_sat(&info, &h, &sat)) == -1) { + expn = "Can't read SAT"; + goto out0; + } #ifdef CDF_DEBUG - cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); + cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h)); #endif - if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { - expn = "Can't read SSAT"; - goto out1; - } + if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) { + expn = "Can't read SSAT"; + goto out1; + } #ifdef CDF_DEBUG - cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); + cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h)); #endif - if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { - expn = "Can't read directory"; - goto out2; - } - - if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst)) == -1) { - expn = "Cannot read short stream"; - goto out3; - } + if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) { + expn = "Can't read directory"; + goto out2; + } + + if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst)) == -1) { + expn = "Cannot read short stream"; + goto out3; + } #ifdef CDF_DEBUG - cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); + cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir); #endif - if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, - &scn)) == -1) { - if (errno == ESRCH) { - corrupt = expn; - expn = "No summary info"; - } else { - expn = "Cannot read summary info"; - } - goto out4; - } + if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir, + &scn)) == -1) { + if (errno == ESRCH) { + corrupt = expn; + expn = "No summary info"; + } else { + expn = "Cannot read summary info"; + } + goto out4; + } #ifdef CDF_DEBUG - cdf_dump_summary_info(&h, &scn); + cdf_dump_summary_info(&h, &scn); #endif - if ((i = cdf_file_summary_info(ms, &scn)) == -1) - expn = "Can't expand summary_info"; - free(scn.sst_tab); + if ((i = cdf_file_summary_info(ms, &h, &scn)) < 0) + expn = "Can't expand summary_info"; + if (i == 0) { + const char *str = "vnd.ms-office"; + cdf_directory_t *d; + char name[__arraycount(d->d_name)]; + size_t j, k; + for (j = 0; j < dir.dir_len; j++) { + d = &dir.dir_tab[j]; + for (k = 0; k < sizeof(name); k++) + name[k] = (char)cdf_tole2(d->d_name[k]); + if (strstr(name, "WordDocument") == 0) { + str = "msword"; + break; + } + } + if (file_printf(ms, "application/%s", str) == -1) + return -1; + i = 1; + } + free(scn.sst_tab); out4: - free(sst.sst_tab); + free(sst.sst_tab); out3: - free(dir.dir_tab); + free(dir.dir_tab); out2: - free(ssat.sat_tab); + free(ssat.sat_tab); out1: - free(sat.sat_tab); + free(sat.sat_tab); out0: - if (i != 1) { - if (file_printf(ms, "CDF V2 Document") == -1) - return -1; - if (*expn) - if (file_printf(ms, ", %s%s", corrupt, expn) == -1) - return -1; - i = 1; - } - return i; + if (i != 1) { + if (i == -1) + if (file_printf(ms, "Composite Document File V2 Document") + == -1) + return -1; + if (*expn) + if (file_printf(ms, ", %s%s", corrupt, expn) == -1) + return -1; + i = 1; + } + return i; } Index: git/src/file.h =================================================================== --- git.orig/src/file.h 2012-02-29 20:13:24.457968395 +0100 +++ git/src/file.h 2012-02-29 20:13:35.100966457 +0100 @@ -74,6 +74,10 @@ #endif #define public +#ifndef __arraycount +#define __arraycount(a) (sizeof(a) / sizeof(a[0])) +#endif + #ifndef __GNUC_PREREQ__ #ifdef __GNUC__ #define __GNUC_PREREQ__(x, y) \