made apply cleanly based on the following commits: commit 27a14bc7ba285a0a5ebfdb55e54001aa11932b08 Author: Christos Zoulas Date: Wed Jun 4 17:36:34 2014 +0000 Correctly compute the truncated pascal string size (Francisco Alonso and Jan Kaluza at RedHat) commit 2f0eeb07ba633f1d915f78a50b22808123b38ea0 Author: Christos Zoulas Date: Wed Dec 22 18:14:05 2010 +0000 support for various formats of pascal strings. commit 57e4574e062e538b16b225e822ece6ca0ce539b8 Author: Christos Zoulas Date: Wed Dec 22 19:09:10 2010 +0000 don't undo our initialization diff --git a/src/softmagic.c b/src/softmagic.c index 9ba500b..6d69419 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -800,10 +800,18 @@ mconvert(struct magic_set *ms, struct magic *m, int flip) return 1; } case FILE_PSTRING: { - char *ptr1 = p->s, *ptr2 = ptr1 + 1; - size_t len = *p->s; + size_t sz = file_pstring_length_size(m); + char *ptr1 = p->s, *ptr2 = ptr1 + sz; + size_t len = file_pstring_get_length(m, ptr1); - if (len >= sizeof(p->s)) - len = sizeof(p->s) - 1; + if (len >= sizeof(p->s)) { + /* + * The size of the pascal string length (sz) + * is 1, 2, or 4. We need at least 1 byte for NUL + * termination, but we've already truncated the + * string by p->s, so we need to deduct sz. + */ + len = sizeof(p->s) - sz; + } while (len--) *ptr1++ = *ptr2++; *ptr1 = '\0'; diff --git a/doc/magic.man b/doc/magic.man index 8486645..299bb8d 100644 --- a/doc/magic.man +++ b/doc/magic.man @@ -71,8 +71,22 @@ characters in the magic match both lower and upper case characters in the target, whereas upper case characters in the magic only match uppercase characters in the target. .It Dv pstring -A Pascal-style string where the first byte is interpreted as the an +A Pascal-style string where the first byte/short/int is interpreted as the an unsigned length. +The length defaults to byte and can be specified as a modifier. +The following modifiers are supported: +.Bl -tag -compact -width B +.It B +A byte length (default). +.It H +A 2 byte big endian length. +.It h +A 2 byte big little length. +.It L +A 4 byte big endian length. +.It l +A 4 byte big little length. +.El The string is not NUL terminated. .It Dv date A four-byte value interpreted as a UNIX date. diff --git a/src/apprentice.c b/src/apprentice.c index 40d547b..1120a69 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -932,6 +932,11 @@ string_modifier_check(struct magic_set *ms, struct magic *m) if ((ms->flags & MAGIC_CHECK) == 0) return 0; + if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { + file_magwarn(ms, + "'/BHhLl' modifiers are only allowed for pascal strings\n"); + return -1; + } switch (m->type) { case FILE_BESTRING16: case FILE_LESTRING16: @@ -1308,7 +1308,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, ++l; } m->str_range = 0; - m->str_flags = 0; + m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; m->num_mask = 0; if ((op = get_op(*l)) != -1) { if (!IS_STRING(m->type)) { @@ -1362,6 +1362,32 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, case CHAR_TEXTTEST: m->str_flags |= STRING_TEXTTEST; break; + case CHAR_PSTRING_1_LE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_1_LE; + break; + case CHAR_PSTRING_2_BE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_2_BE; + break; + case CHAR_PSTRING_2_LE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_2_LE; + break; + case CHAR_PSTRING_4_BE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_4_BE; + break; + case CHAR_PSTRING_4_LE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_4_LE; + break; + bad: default: if (ms->flags & MAGIC_CHECK) file_magwarn(ms, @@ -1990,7 +1990,7 @@ out: *p = '\0'; m->vallen = CAST(unsigned char, (p - origp)); if (m->type == FILE_PSTRING) - m->vallen++; + m->vallen += file_pstring_length_size(m); return s; } @@ -2379,3 +2379,40 @@ bs1(struct magic *m) m->num_mask = swap8(m->num_mask); } } + +protected size_t +file_pstring_length_size(const struct magic *m) +{ + switch (m->str_flags & PSTRING_LEN) { + case PSTRING_1_LE: + return 1; + case PSTRING_2_LE: + case PSTRING_2_BE: + return 2; + case PSTRING_4_LE: + case PSTRING_4_BE: + return 4; + default: + abort(); /* Impossible */ + return 1; + } +} +protected size_t +file_pstring_get_length(const struct magic *m, const char *s) +{ + switch (m->str_flags & PSTRING_LEN) { + case PSTRING_1_LE: + return *s; + case PSTRING_2_LE: + return (s[1] << 8) | s[0]; + case PSTRING_2_BE: + return (s[0] << 8) | s[1]; + case PSTRING_4_LE: + return (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; + case PSTRING_4_BE: + return (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; + default: + abort(); /* Impossible */ + return 1; + } +} diff --git a/src/file.h b/src/file.h index 25cd3a5..c84749f 100644 --- a/src/file.h +++ b/src/file.h @@ -285,6 +285,14 @@ struct magic { #define REGEX_OFFSET_START BIT(4) #define STRING_TEXTTEST BIT(5) #define STRING_BINTEST BIT(6) +#define PSTRING_1_BE BIT(7) +#define PSTRING_1_LE BIT(7) +#define PSTRING_2_BE BIT(8) +#define PSTRING_2_LE BIT(9) +#define PSTRING_4_BE BIT(10) +#define PSTRING_4_LE BIT(11) +#define PSTRING_LEN \ + (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE) #define CHAR_COMPACT_WHITESPACE 'W' #define CHAR_COMPACT_OPTIONAL_WHITESPACE 'w' #define CHAR_IGNORE_LOWERCASE 'c' @@ -292,6 +292,12 @@ struct magic { #define CHAR_REGEX_OFFSET_START 's' #define CHAR_TEXTTEST 't' #define CHAR_BINTEST 'b' +#define CHAR_PSTRING_1_BE 'B' +#define CHAR_PSTRING_1_LE 'B' +#define CHAR_PSTRING_2_BE 'H' +#define CHAR_PSTRING_2_LE 'h' +#define CHAR_PSTRING_4_BE 'L' +#define CHAR_PSTRING_4_LE 'l' #define STRING_IGNORE_CASE (STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE) #define STRING_DEFAULT_RANGE 100 @@ -400,6 +400,8 @@ protected ssize_t sread(int, void *, size_t, int); protected int file_check_mem(struct magic_set *, unsigned int); protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *); +protected size_t file_pstring_length_size(const struct magic *); +protected size_t file_pstring_get_length(const struct magic *, const char *); #ifdef __EMX__ protected int file_os2_apptype(struct magic_set *, const char *, const void *, size_t); diff --git a/src/softmagic.c b/src/softmagic.c index a565989..0f15227 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -169,6 +169,8 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, continue; } + if ((e = handle_annotation(ms, m)) != 0) + return e; /* * If we are going to print something, we'll need to print * a blank before we print something else. @@ -176,8 +176,6 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, if (*m->desc) { need_separator = 1; printed_something = 1; - if ((e = handle_annotation(ms, m)) != 0) - return e; if (print_sep(ms, firstline) == -1) return -1; } @@ -252,13 +252,13 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, ms->c.li[cont_level].got_match = 0; break; } + if ((e = handle_annotation(ms, m)) != 0) + return e; /* * If we are going to print something, * make sure that we have a separator first. */ if (*m->desc) { - if ((e = handle_annotation(ms, m)) != 0) - return e; if (!printed_something) { printed_something = 1; if (print_sep(ms, firstline) @@ -450,7 +450,7 @@ mprint(struct magic_set *ms, struct magic *m) return -1; t = ms->offset + strlen(p->s); if (m->type == FILE_PSTRING) - t++; + t += file_pstring_length_size(m); } break; @@ -615,7 +615,7 @@ moffset(struct magic_set *ms, struct magic *m) p->s[strcspn(p->s, "\n")] = '\0'; t = CAST(uint32_t, (ms->offset + strlen(p->s))); if (m->type == FILE_PSTRING) - t++; + t += file_pstring_length_size(m); return t; } diff --git a/src/apprentice.c b/src/apprentice.c index 1120a69..c77f679 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -1314,7 +1314,6 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, } m->str_range = 0; m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; - m->num_mask = 0; if ((op = get_op(*l)) != -1) { if (!IS_STRING(m->type)) { uint64_t val; @@ -2402,6 +2402,8 @@ bs1(struct magic *m) m->in_offset = swap4((uint32_t)m->in_offset); m->lineno = swap4((uint32_t)m->lineno); if (IS_STRING(m->type)) { + if (m->type == FILE_PSTRING) + printf("flags! %d\n", m->str_flags); m->str_range = swap4(m->str_range); m->str_flags = swap4(m->str_flags); }