Browse Source

Import upstream version 5.00

Christos Zoulas 15 years ago
parent
commit
c664f497c4

+ 126 - 30
ChangeLog

@@ -1,9 +1,105 @@
+2008-12-12 15:50  Christos Zoulas <christos@zoulas.com>
+
+	* fix initial offset calculation for non 4K sector files
+
+	* add loop limits to avoid DoS attacks by constructing
+	  looping sector references.
+
+2008-12-03 13:05  Christos Zoulas <christos@zoulas.com>
+
+	* fix memory botches on cdf file parsing.
+
+	* exit with non-zero value for any error, not just for the last
+	  file processed.
+
+2008-11-09 20:42  Charles Longeau <chl@tuxfamily.org>
+
+	* Replace all str{cpy,cat} functions with strl{cpy,cat}
+	* Ensure that strl{cpy,cat} are included in libmagic,
+	  as needed.
+
+2008-11-06 18:18  Christos Zoulas <christos@zoulas.com>
+
+	* Handle ID3 format files.
+
+2008-11-06 23:00  Reuben Thomas <rrt@sc3d.org>
+
+	* Fix --mime, --mime-type and --mime-encoding under new scheme.
+
+	* Rename "ascii" to "text" and add "encoding" test.
+
+	* Return a precise ("utf-16le" or "utf-16be") MIME charset for
+	  UTF-16.
+
+	* Fix error in comment caused by automatic indentation adding
+	  words!
+
+2008-11-06 10:35  Christos Zoulas <christos@astron.com>
+
+	* use memchr instead of strchr because the string
+	  might not be NUL terminated (Scott MacVicar)
+
+2008-11-03 07:31  Reuben Thomas <rrt@sc3d.org>
+
+	* Fix a printf with a non-literal format string.
+
+	* Fix formatting and punctuation of help for "--apple".
+
+2008-10-30 11:00  Reuben Thomas <rrt@sc3d.org>
+
+	* Correct words counts in comments of struct magic.
+
+	* Fix handle_annotation to allow both Apple and MIME types to be
+	  printed, and to return correct code if MIME type is
+	  printed (1, not 0) or if there's an error (-1 not 1).
+
+	* Fix output of charset for MIME type (precede with semi-colon;
+	  fixes Debian bug #501460).
+
+	* Fix potential attacks via conversion specifications in magic
+	  strings.
+
+	* Add a FIXME for Debian bug #488562 (magic files should be
+	  read in a defined order, by sorting the names).
+
+2008-10-18 16:45  Christos Zoulas <christos@astron.com>
+
+	* Added APPLE file creator/type
+
+2008-10-12 10:20  Christos Zoulas <christos@astron.com>
+
+	* Added CDF parsing
+
+2008-10-09 16:40  Christos Zoulas <christos@astron.com>
+
+	* filesystem and msdos patches (Joerg Jenderek)
+
+2008-10-09 13:20  Christos Zoulas <christos@astron.com>
+
+	* correct --exclude documentation issues: remove troff and fortran
+	  and rename "token" to "tokens". (Randy McMurchy)
+
+2008-10-01 10:30  Christos Zoulas <christos@astron.com>
+
+	* Read ~/.magic in addition to the default magic file not instead
+	  of, as documented in the man page.
+
+2008-09-10 21:30  Reuben Thomas  <rrt@sc3d.org>
+
+	* Comment out graphviz patterns, as they match too many files.
+
 2008-08-30 12:54  Christos Zoulas <christos@astron.com>
 
 	* Don't eat trailing \n in magic enties.
 
 	* Cast defines to allow compilation using a c++ compiler.
 
+2008-08-25 23:56  Reuben Thomas  <rrt@sc3d.org>
+
+	* Add text/x-lua MIME type for Lua scripts.
+
+	* Escape { in regex in graphviz patterns.
+
 2008-07-26 00:59  Reuben Thomas  <rrt@sc3d.org>
 
 	* Add MIME types for special files.
@@ -55,22 +151,22 @@
 
 2008-05-06 00:13  Robert Byrnes  <byrnes@wildpumpkin.net>
 
-        * src/Makefile.am:
+	* src/Makefile.am:
 	  Ensure that getopt_long and [v]asprintf are included in libmagic,
 	  as needed.
 
 	  Remove unnecessary EXTRA_DIST.
 
-        * src/Makefile.in:
+	* src/Makefile.in:
 	  Rerun automake.
 
-        * src/vasprintf.c (dispatch):
+	* src/vasprintf.c (dispatch):
 	  Fix variable precision bug: be sure to step past '*'.
 
-        * src/vasprintf.c (core):
+	* src/vasprintf.c (core):
 	  Remove unreachable code.
 
-        * src/apprentice.c (set_test_type):
+	* src/apprentice.c (set_test_type):
 	  Add cast to avoid compiler warning.
 
 2008-04-22 23:45  Christos Zoulas  <christos@astron.com>
@@ -81,12 +177,12 @@
 
 2008-04-04 11:00  Christos Zoulas  <christos@astron.com>
 
-        * >= <= is not supported, so fix the magic and warn about it.
+	* >= <= is not supported, so fix the magic and warn about it.
 	  reported by: Thien-Thi Nguyen <ttn@gnuvola.org>
 
 2008-03-27 16:16  Robert Byrnes  <byrnes@wildpumpkin.net>
 
-        * src/readelf.c (donote):
+	* src/readelf.c (donote):
 	  ELF core file command name/line bug fixes and enhancements:
 
 	  Try larger offsets first to avoid false matches
@@ -112,7 +208,7 @@
 	* Clarify UTF-8 BOM message (Reuben Thomas)
 
 	* Add HTML comment to token list in names.h
-	
+
 2007-02-04 15:50 Christos Zoulas <christos@astron.com>
 
 	* Debian fixes (Reuben Thomas)
@@ -152,7 +248,7 @@
 
 2007-10-28 20:48 Christos Zoulas <christos@astron.com>
 
- 	* float and double magic support (Behan Webster) 
+ 	* float and double magic support (Behan Webster)
 
 2007-10-28 20:48 Christos Zoulas <christos@astron.com>
 
@@ -199,7 +295,7 @@
 	  be easily parsed:
 	      mimetype [charset=character-set] [encoding=encoding-mime-type]
 
-	  Remove spurious extra text from some MIME type printouts 
+	  Remove spurious extra text from some MIME type printouts
 	  (mostly in is_tar).
 
 	  Fix one case where -i produced nothing at all (for a 1-byte file,
@@ -229,7 +325,7 @@
 2007-03-15 10:51 Christos Zoulas <christos@astron.com>
 
 	* fix fortran and nroff reversed tests (Dmitry V. Levin)
-	
+
 	* fix exclude option (Dmitry V. Levin)
 
 2007-02-08 17:30 Christos Zoulas <christos@astron.com>
@@ -248,7 +344,7 @@
 	* Add exclude flag.
 
 2007-01-18 05:29 Anon Ymous <do@not.spam.me>
-	
+
 	* Move the "type" detection code from parse() into its own table
 	  driven routine.  This avoids maintaining multiple lists in
 	  file.h.
@@ -256,7 +352,7 @@
 	* Add an optional conditional field (ust before the type field).
 	  This code is wrapped in "#ifdef ENABLE_CONDITIONALS" as it is
 	  likely to go away.
-	
+
 2007-01-16 23:24 Anon Ymous <do@not.spam.me>
 
 	* Fix an initialization bug in check_mem().
@@ -327,7 +423,7 @@
 2006-12-08 16:32 Christos Zoulas <christos@astron.com>
 
 	* store and print the line number of the magic
-	  entry for debugging.         
+	  entry for debugging.
 
 	* if the magic entry did not print anything,
 	  don't treat it as a match
@@ -342,7 +438,7 @@
 	  file_softmagic.
 
 2006-11-25 13:35 Christos Zoulas <christos@astron.com>
-	
+
 	* Don't store the current offset in the magic
 	  struct, because it needs to be restored and
 	  it was not done properly all the time. Bug
@@ -432,7 +528,7 @@
 	* Look for note sections in non executables.
 
 2005-09-20 13:33 Christos Zoulas <christos@astron.com>
-	
+
 	* Don't print SVR4 Style in core files multiple times
 	    (Radek Vokál)
 
@@ -443,9 +539,9 @@
 2005-08-18 09:53 Christos Zoulas <christos@astron.com>
 
 	* Remove erroreous mention of /etc/magic in the file man page
-	  This is gentoo bug 101639. (Mike Frysinger) 
+	  This is gentoo bug 101639. (Mike Frysinger)
 
-	* Cross-compile support and detection (Mike Frysinger) 
+	* Cross-compile support and detection (Mike Frysinger)
 
 2005-08-12 10:17 Christos Zoulas <christos@astron.com>
 
@@ -477,20 +573,20 @@
 	* Avoid NULL pointer dereference in time conversion.
 
 2005-03-06 00:00  Joerg Walter <jwalt@mail.garni.ch>
-	
+
 	* Add indirect magic offset support, and search mode.
 
 2005-01-12 00:00  Stepan Kasal  <kasal@ucw.cz>
 
-        * src/ascmagic.c (file_ascmagic): Fix three bugs about text files:
-          If a CRLF text file happens to have CR at offset HOWMANY - 1
-          (currently 0xffff), it should not be counted as CR line
-          terminator.
-          If a line has length exactly MAXLINELEN, it should not yet be
-          treated as a ``very long line'', as MAXLINELEN is ``longest sane
-          line length''.
-          With CRLF, the line length was not computed correctly, and even
-          lines of length MAXLINELEN - 1 were treated as ``very long''.
+	* src/ascmagic.c (file_ascmagic): Fix three bugs about text files:
+	  If a CRLF text file happens to have CR at offset HOWMANY - 1
+	  (currently 0xffff), it should not be counted as CR line
+	  terminator.
+	  If a line has length exactly MAXLINELEN, it should not yet be
+	  treated as a ``very long line'', as MAXLINELEN is ``longest sane
+	  line length''.
+	  With CRLF, the line length was not computed correctly, and even
+	  lines of length MAXLINELEN - 1 were treated as ``very long''.
 
 2004-12-07 14:15  Christos Zoulas  <christos@astron.com>
 
@@ -525,12 +621,12 @@
 
 	* Remove 3rd and 4th copyright clause; approved by Ian Darwin.
 
-	* Fix small memory leaks; caught by: Tamas Sarlos 
+	* Fix small memory leaks; caught by: Tamas Sarlos
 	    <stamas@csillag.ilab.sztaki.hu>
 
 2004-07-24 16:33  Christos Zoulas  <christos@astron.com>
 
-	* magic.mime update Danny Milosavljevic <danny.milo@gmx.net> 
+	* magic.mime update Danny Milosavljevic <danny.milo@gmx.net>
 
 	* FreeBSD version update Oliver Eikemeier <eikemeier@fillmore-labs.com>
 

+ 29 - 27
README

@@ -1,5 +1,5 @@
 ** README for file(1) Command **
-@(#) $File: README,v 1.40 2008/04/23 03:45:20 christos Exp $
+@(#) $File: README,v 1.41 2008/12/02 16:34:46 christos Exp $
 
 E-mail: christos@astron.com
 Mailing List: file@mx.gw.com
@@ -48,33 +48,35 @@ in magic(5) format please, to the maintainer, Christos Zoulas.
 
 COPYING - read this first.
 README - read this second (you are currently reading this file).
-PORTING - read this only if the program won't compile.
-Makefile - read this next, adapt it as needed (particularly
-	the location of the old existing file command and
-	the man page layouts), type "make" to compile, 
-	"make try" to try it out against your old version.
-	Expect some diffs, particularly since your original
-	file(1) may not grok the embedded-space ("\ ") in
-	the current magic file, or may even not use the
-	magic file.
-apprentice.c - parses /etc/magic to learn magic
-ascmagic.c - third & last set of tests, based on hardwired assumptions.
-core - not included in distribution due to mailer limitations.
-debug.c - includes -c printout routine
-file.1 - man page for the command
-magic.4 - man page for the magic file, courtesy Guy Harris.
+INSTALL - read on how to install
+
+src/apprentice.c - parses /etc/magic to learn magic
+src/apptype.c - used for OS/2 specific application type magic
+src/asprintf.c - replacement for OS's that don't have it.
+src/ascmagic.c - third & last set of tests, based on hardwired assumptions.
+src/cdf.c - parser for Microsoft Compound Document Files
+src/cdf_time.c - time converter for CDF.
+src/compress.c - handles decompressing files to look inside.
+src/encoding.c - handles unicode encodings
+src/file.c - the main program
+src/file.h - header file
+src/fsmagic.c - first set of tests the program runs, based on filesystem info
+src/funcs.c - utilility functions
+src/getopt_long.c - used for OS/2 specific application type magic
+src/is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
+src/names.h - header file for ascmagic.c
+src/magic.c - the libmagic api
+src/print.c - print results, errors, warnings.
+src/readcdf.c - CDF wrapper.
+src/readelf.[ch] - Stand-alone elf parsing code.
+src/softmagic.c - 2nd set of tests, based on /etc/magic
+src/strlcat.c - used for OS/2 specific application type magic
+src/strlcpy.c - used for OS/2 specific application type magic
+src/vasprintf.c - used for OS/2 specific application type magic
+doc/file.1 - man page for the command
+doc/magic.4 - man page for the magic file, courtesy Guy Harris.
 	Install as magic.4 on USG and magic.5 on V7 or Berkeley; cf Makefile.
-file.c - main program
-file.h - header file
-fsmagic.c - first set of tests the program runs, based on filesystem info
-is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
-magdir - directory of /etc/magic pieces
-	magdir/Makefile - ADJUST THIS FOR YOUR CONFIGURATION
-names.h - header file for ascmagic.c
-softmagic.c - 2nd set of tests, based on /etc/magic
-readelf.[ch] - Stand-alone elf parsing code.
-compress.c - on-the-fly decompression.
-print.c - print results, errors, warnings.
+Magdir - directory of /etc/magic pieces
 
 ------------------------------------------------------------------------------
 

+ 6 - 0
TODO

@@ -1,3 +1,9 @@
+Fix output so that tests for MIME and APPLE flags are not needed all
+over the place, and actual output is only done in one place. This
+needs a design. Suggestion: push possible outputs on to a list, then
+pick the last-pushed (most specific, one hopes) value at the end, or
+use a default if the list is empty.
+
 Continue to squash all magic bugs. See Debian BTS for a good source.
 
 Store arbitrarily long strings, for example for %s patterns, so that

+ 12 - 0
config.h.in

@@ -78,6 +78,12 @@
 /* Define to 1 if you have the <string.h> header file. */
 #undef HAVE_STRING_H
 
+/* Define to 1 if you have the `strlcat' function. */
+#undef HAVE_STRLCAT
+
+/* Define to 1 if you have the `strlcpy' function. */
+#undef HAVE_STRLCPY
+
 /* Define to 1 if you have the `strndup' function. */
 #undef HAVE_STRNDUP
 
@@ -90,6 +96,12 @@
 /* Define to 1 if `st_rdev' is member of `struct stat'. */
 #undef HAVE_STRUCT_STAT_ST_RDEV
 
+/* Define to 1 if `tm_gmtoff' is member of `struct tm'. */
+#undef HAVE_STRUCT_TM_TM_GMTOFF
+
+/* Define to 1 if `tm_zone' is member of `struct tm'. */
+#undef HAVE_STRUCT_TM_TM_ZONE
+
 /* Define to 1 if you have the <sys/mman.h> header file. */
 #undef HAVE_SYS_MMAN_H
 

+ 216 - 11
configure

@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.61 for file 4.26.
+# Generated by GNU Autoconf 2.61 for file 5.00.
 #
 # Report bugs to <christos@astron.com>.
 #
@@ -728,8 +728,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='file'
 PACKAGE_TARNAME='file'
-PACKAGE_VERSION='4.26'
-PACKAGE_STRING='file 4.26'
+PACKAGE_VERSION='5.00'
+PACKAGE_STRING='file 5.00'
 PACKAGE_BUGREPORT='christos@astron.com'
 
 # Factoring default headers for most tests.
@@ -1395,7 +1395,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures file 4.26 to adapt to many kinds of systems.
+\`configure' configures file 5.00 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1465,7 +1465,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of file 4.26:";;
+     short | recursive ) echo "Configuration of file 5.00:";;
    esac
   cat <<\_ACEOF
 
@@ -1572,7 +1572,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-file configure 4.26
+file configure 5.00
 generated by GNU Autoconf 2.61
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1586,7 +1586,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by file $as_me 4.26, which was
+It was created by file $as_me 5.00, which was
 generated by GNU Autoconf 2.61.  Invocation command line was
 
   $ $0 $@
@@ -2276,7 +2276,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='file'
- VERSION='4.26'
+ VERSION='5.00'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -22166,6 +22166,209 @@ _ACEOF
 
 fi
 
+{ echo "$as_me:$LINENO: checking for struct tm.tm_gmtoff" >&5
+echo $ECHO_N "checking for struct tm.tm_gmtoff... $ECHO_C" >&6; }
+if test "${ac_cv_member_struct_tm_tm_gmtoff+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (ac_aggr.tm_gmtoff)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_member_struct_tm_tm_gmtoff=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (sizeof ac_aggr.tm_gmtoff)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_member_struct_tm_tm_gmtoff=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_member_struct_tm_tm_gmtoff=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_member_struct_tm_tm_gmtoff" >&5
+echo "${ECHO_T}$ac_cv_member_struct_tm_tm_gmtoff" >&6; }
+if test $ac_cv_member_struct_tm_tm_gmtoff = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_TM_TM_GMTOFF 1
+_ACEOF
+
+
+fi
+{ echo "$as_me:$LINENO: checking for struct tm.tm_zone" >&5
+echo $ECHO_N "checking for struct tm.tm_zone... $ECHO_C" >&6; }
+if test "${ac_cv_member_struct_tm_tm_zone+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (ac_aggr.tm_zone)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_member_struct_tm_tm_zone=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (sizeof ac_aggr.tm_zone)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_member_struct_tm_tm_zone=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_member_struct_tm_tm_zone=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_member_struct_tm_tm_zone" >&5
+echo "${ECHO_T}$ac_cv_member_struct_tm_tm_zone" >&6; }
+if test $ac_cv_member_struct_tm_tm_zone = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_TM_TM_ZONE 1
+_ACEOF
+
+
+fi
+
 { echo "$as_me:$LINENO: checking for tm_zone in struct tm" >&5
 echo $ECHO_N "checking for tm_zone in struct tm... $ECHO_C" >&6; }
 if test "${ac_cv_struct_tm_zone+set}" = set; then
@@ -23853,7 +24056,9 @@ done
 
 
 
-for ac_func in getopt_long asprintf vasprintf
+
+
+for ac_func in getopt_long asprintf vasprintf strlcpy strlcat
 do
 as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { echo "$as_me:$LINENO: checking for $ac_func" >&5
@@ -24469,7 +24674,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by file $as_me 4.26, which was
+This file was extended by file $as_me 5.00, which was
 generated by GNU Autoconf 2.61.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -24522,7 +24727,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-file config.status 4.26
+file config.status 5.00
 configured by $0, generated by GNU Autoconf 2.61,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 

+ 4 - 2
configure.ac

@@ -1,5 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
-AC_INIT(file, 4.26, christos@astron.com)
+AC_INIT(file, 5.00, christos@astron.com)
 AM_INIT_AUTOMAKE
 AM_CONFIG_HEADER(config.h)
 
@@ -75,6 +75,8 @@ AC_TYPE_OFF_T
 AC_TYPE_SIZE_T
 AC_CHECK_MEMBERS([struct stat.st_rdev])
 
+AC_STRUCT_TM
+AC_CHECK_MEMBERS([struct tm.tm_gmtoff, struct tm.tm_zone])
 AC_STRUCT_TIMEZONE_DAYLIGHT
 AC_SYS_LARGEFILE
 AC_FUNC_FSEEKO
@@ -139,7 +141,7 @@ dnl Checks for functions
 AC_CHECK_FUNCS(mmap strerror strndup strtoul mbrtowc mkstemp utimes utime wcwidth strtof)
 
 dnl Provide implementation of some required functions if necessary
-AC_REPLACE_FUNCS(getopt_long asprintf vasprintf)
+AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat)
 
 dnl Checks for libraries
 AC_CHECK_LIB(z,gzopen)

+ 76 - 77
doc/file.man

@@ -1,5 +1,5 @@
-.\" $File: file.man,v 1.73 2008/02/19 17:58:00 rrt Exp $
-.Dd February 19, 2008
+.\" $File: file.man,v 1.79 2008/11/06 22:49:08 rrt Exp $
+.Dd October 9, 2008
 .Dt FILE __CSECTION__
 .Os
 .Sh NAME
@@ -41,12 +41,12 @@ characters and is probably safe to read on an
 terminal),
 .Em executable
 (the file contains the result of compiling a program
-in a form understandable to some 
+in a form understandable to some
 .Dv UNIX
 kernel or another),
 or
 .Em data
-meaning anything else (data is usually 
+meaning anything else (data is usually
 .Sq binary
 or non-printable).
 Exceptions are well-known file formats (core files, tar archives)
@@ -54,13 +54,13 @@ that are known to contain binary data.
 When modifying magic files or the program itself, make sure to
 .Em "preserve these keywords" .
 Users depend on knowing that all the readable files in a directory
-have the word 
-.Dq text
+have the word
+.Sq text
 printed.
-Don't do as Berkeley did and change 
-.Dq shell commands text
-to 
-.Dq shell script .
+Don't do as Berkeley did and change
+.Sq shell commands text
+to
+.Sq shell script .
 .Pp
 The filesystem tests are based on examining the return from a
 .Xr stat 2
@@ -78,16 +78,16 @@ The magic tests are used to check for files with data in
 particular fixed formats.
 The canonical example of this is a binary executable (compiled program)
 .Dv a.out
-file, whose format is defined in 
+file, whose format is defined in
 .In elf.h ,
 .In a.out.h
 and possibly
 .In exec.h
 in the standard include directory.
-These files have a 
+These files have a
 .Sq "magic number"
 stored in a particular place
-near the beginning of the file that tells the 
+near the beginning of the file that tells the
 .Dv UNIX operating system
 that the file is a binary executable, and which of several types thereof.
 The concept of a
@@ -116,11 +116,11 @@ ranges and sequences of bytes that constitute printable text
 in each set.
 If a file passes any of these tests, its character set is reported.
 ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified
-as 
-.Dq text
+as
+.Sq text
 because they will be mostly readable on nearly any terminal;
-UTF-16 and EBCDIC are only 
-.Dq character data
+UTF-16 and EBCDIC are only
+.Sq character data
 because, while
 they contain text, it is text that will require translation
 before it can be read.
@@ -144,19 +144,19 @@ For example, the keyword
 .Em .br
 indicates that the file is most likely a
 .Xr troff 1
-input file, just as the keyword 
+input file, just as the keyword
 .Em struct
 indicates a C program.
 These tests are less reliable than the previous
 two groups, so they are performed last.
 The language test routines also test for some miscellany
-(such as 
+(such as
 .Xr tar 1
 archives).
 .Pp
 Any file that cannot be identified as having been written
 in any of the character sets listed above is simply said to be
-.Dq data .
+.Sq data .
 .Sh OPTIONS
 .Bl -tag -width indent
 .It Fl b , -brief
@@ -177,40 +177,41 @@ from the list of tests made to determine the file type. Valid test names
 are:
 .Bl -tag -width
 .It apptype
-Check for
 .Dv EMX
 application type (only on EMX).
-.It ascii
-Check for various types of ascii files.
+.It text
+Various types of text files (this test will try to guess the text encoding, irrespective of the setting of the
+.Sq encoding
+option).
+.It encoding
+Different text encodings for soft magic tests.
+.It tokens
+Looks for known tokens inside text files.
+.It cdf
+Prints details of Compound Document Files.
 .It compress
-Don't look for, or inside compressed files.
+Checks for, and looks inside, compressed files.
 .It elf
-Don't print elf details.
-.It fortran
-Don't look for fortran sequences inside ascii files.
+Prints ELF file details.
 .It soft
-Don't consult magic files.
+Consults magic files.
 .It tar
-Don't examine tar files.
-.It token
-Don't look for known tokens inside ascii files.
-.It troff
-Don't look for troff sequences inside ascii files.
+Examines tar files.
 .El
 .It Fl f , -files-from Ar namefile
-Read the names of the files to be examined from 
+Read the names of the files to be examined from
 .Ar namefile
-(one per line) 
+(one per line)
 before the argument list.
-Either 
+Either
 .Ar namefile
 or at least one filename argument must be present;
-to test the standard input, use 
+to test the standard input, use
 .Sq -
 as a filename argument.
 .It Fl F , -separator Ar separator
 Use the specified string as the separator between the filename and the
-file result returned. Defaults to 
+file result returned. Defaults to
 .Sq \&: .
 .It Fl h , -no-dereference
 option causes symlinks not to be followed
@@ -221,17 +222,15 @@ is not defined.
 .It Fl i , -mime
 Causes the file command to output mime type strings rather than the more
 traditional human readable ones. Thus it may say
-.Dq text/plain charset=us-ascii
+.Sq text/plain; charset=us-ascii
 rather than
-.Dq ASCII text .
+.Sq ASCII text .
 In order for this option to work, file changes the way
 it handles files recognized by the command itself (such as many of the
 text file types, directories etc), and makes use of an alternative
-.Dq magic
+.Sq magic
 file.
-(See
-.Dq FILES
-section, below).
+(See the FILES section, below).
 .It Fl -mime-type , -mime-encoding
 Like
 .Fl i ,
@@ -239,10 +238,10 @@ but print only the specified element(s).
 .It Fl k , -keep-going
 Don't stop at the first match, keep going. Subsequent matches will be
 have the string
-.Dq "\[rs]012\- "
+.Sq "\[rs]012\- "
 prepended.
 (If you want a newline, see the
-.Dq "\-r"
+.Sq "\-r"
 option.)
 .It Fl L , -dereference
 option causes symlinks to be followed, as the like-named option in
@@ -324,7 +323,7 @@ will not attempt to open
 .Pa $HOME/.magic .
 .Nm
 adds
-.Dq .mgc
+.Sq .mgc
 to the value of this variable as appropriate.
 The environment variable
 .Dv POSIXLY_CORRECT
@@ -347,47 +346,47 @@ options.
 .Sh STANDARDS CONFORMANCE
 This program is believed to exceed the System V Interface Definition
 of FILE(CMD), as near as one can determine from the vague language
-contained therein. 
+contained therein.
 Its behavior is mostly compatible with the System V program of the same name.
 This version knows more magic, however, so it will produce
-different (albeit more accurate) output in many cases. 
+different (albeit more accurate) output in many cases.
 .\" URL: http://www.opengroup.org/onlinepubs/009695399/utilities/file.html
 .Pp
-The one significant difference 
+The one significant difference
 between this version and System V
 is that this version treats any white space
 as a delimiter, so that spaces in pattern strings must be escaped.
 For example,
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 >10	string	language impress\ 	(imPRESS data)
 .Ed
 .Pp
 in an existing magic file would have to be changed to
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 >10	string	language\e impress	(imPRESS data)
 .Ed
 .Pp
 In addition, in this version, if a pattern string contains a backslash,
 it must be escaped.
 For example
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 0	string		\ebegindata	Andrew Toolkit document
 .Ed
 .Pp
 in an existing magic file would have to be changed to
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 0	string		\e\ebegindata	Andrew Toolkit document
 .Ed
 .Pp
 SunOS releases 3.2 and later from Sun Microsystems include a
-.Nm 
+.Nm
 command derived from the System V one, but with some extensions.
 My version differs from Sun's only in minor ways.
-It includes the extension of the 
+It includes the extension of the
 .Sq &
 operator, used as,
 for example,
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 >16	long&0x7fffffff	>0		not stripped
 .Ed
 .Sh MAGIC DIRECTORY
@@ -395,7 +394,7 @@ The magic file entries have been collected from various sources,
 mainly USENET, and contributed by various authors.
 Christos Zoulas (address below) will collect additional
 or corrected magic file entries.
-A consolidation of magic file entries 
+A consolidation of magic file entries
 will be distributed periodically.
 .Pp
 The order of entries in the magic file is significant.
@@ -405,14 +404,14 @@ If your old
 .Nm
 command uses a magic file,
 keep the old magic file around for comparison purposes
-(rename it to 
+(rename it to
 .Pa __MAGIC__.orig ).
 .Sh EXAMPLES
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 $ file file.c file /dev/{wd0a,hda}
 file.c:   C program text
 file:     ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV),
-          dynamically linked (uses shared libs), stripped
+	  dynamically linked (uses shared libs), stripped
 /dev/wd0a: block special (0/0)
 /dev/hda: block special (3/0)
 
@@ -441,9 +440,9 @@ file:        application/x-executable
 
 .Ed
 .Sh HISTORY
-There has been a 
-.Nm 
-command in every 
+There has been a
+.Nm
+command in every
 .Dv UNIX since at least Research Version 4
 (man page dated November, 1973).
 The System V version introduced one significant major change:
@@ -466,7 +465,7 @@ Primary development and maintenance from 1990 to the present by
 Christos Zoulas (christos@astron.com).
 .Pp
 Altered by Chris Lowth, chris@lowth.com, 2000:
-Handle the 
+Handle the
 .Fl i
 option to output mime type strings, using an alternative
 magic file and internal logic.
@@ -480,7 +479,7 @@ support and merge MIME and non-MIME magic, support directories as well
 as files of magic, apply many bug fixes and improve the build system.
 .Pp
 The list of contributors to the
-.Dq magic
+.Sq magic
 directory (magic files)
 is too long to include here.
 You know who you are; thank you.
@@ -512,10 +511,10 @@ files.
 The support for text files (primarily for programming languages)
 is simplistic, inefficient and requires recompilation to update.
 .Pp
-The list of keywords in 
+The list of keywords in
 .Dv ascmagic
 probably belongs in the Magic file.
-This could be done by using some keyword like 
+This could be done by using some keyword like
 .Sq *
 for the offset value.
 .Pp
@@ -523,20 +522,20 @@ Complain about conflicts in the magic file entries.
 Make a rule that the magic entries sort based on file offset rather
 than position within the magic file?
 .Pp
-The program should provide a way to give an estimate 
-of 
-.Dq how good
+The program should provide a way to give an estimate
+of
+.Sq how good
 a guess is.
-We end up removing guesses (e.g. 
-.Dq From\ 
+We end up removing guesses (e.g.
+.Sq From\
 as first 5 chars of file) because
-they are not as good as other guesses (e.g. 
-.Dq Newsgroups:
+they are not as good as other guesses (e.g.
+.Sq Newsgroups:
 versus
-.Dq Return-Path:
+.Sq Return-Path:
 ).
 Still, if the others don't pan out, it should be possible to use the
-first guess.  
+first guess.
 .Pp
 This manual page, and particularly this section, is too long.
 .Sh RETURN CODE

+ 3 - 10
doc/libmagic.man

@@ -1,4 +1,4 @@
-.\" $File: libmagic.man,v 1.18 2008/02/28 22:24:46 rrt Exp $
+.\" $File: libmagic.man,v 1.19 2008/10/06 20:16:04 christos Exp $
 .\"
 .\" Copyright (c) Christos Zoulas 2003.
 .\" All Rights Reserved.
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd November 15, 2006
+.Dd October 6, 2008
 .Dt MAGIC 3
 .Os
 .Sh NAME
@@ -200,11 +200,8 @@ before any magic queries can performed.
 .Pp
 The default database file is named by the MAGIC environment variable.  If
 that variable is not set, the default database file name is __MAGIC__.
-.Pp
 .Fn magic_load
 adds
-.Dq .mime
-and/or
 .Dq .mgc
 to the database filename as appropriate.
 .Sh RETURN VALUES
@@ -237,11 +234,7 @@ when
 .Dv MAGIC_PRESERVE_ATIME
 is set.
 .Sh FILES
-.Bl -tag -width __MAGIC__.mime.mgc -compact
-.It Pa __MAGIC__.mime
-The non-compiled default magic mime database.
-.It Pa __MAGIC__.mime.mgc
-The compiled default magic mime database.
+.Bl -tag -width __MAGIC__.mgc -compact
 .It Pa __MAGIC__
 The non-compiled default magic database.
 .It Pa __MAGIC__.mgc

+ 15 - 4
doc/magic.man

@@ -1,4 +1,4 @@
-.\" $File: magic.man,v 1.57 2008/08/30 09:50:20 christos Exp $
+.\" $File: magic.man,v 1.59 2008/11/06 23:22:53 christos Exp $
 .Dd August 30, 2008
 .Dt MAGIC __FSECTION__
 .Os
@@ -84,6 +84,8 @@ local time rather than UTC.
 .It Dv qldate
 An eight-byte value interpreted as a UNIX-style date, but interpreted as
 local time rather than UTC.
+.It Dv beid3
+A 32-bit ID3 length in big-endian byte order.
 .It Dv beshort
 A two-byte value in big-endian byte order.
 .It Dv belong
@@ -110,6 +112,8 @@ interpreted as a UNIX-style date, but interpreted as local time rather
 than UTC.
 .It Dv bestring16
 A two-byte unicode (UCS16) string in big-endian byte order.
+.It Dv leid3
+A 32-bit ID3 length in little-endian byte order.
 .It Dv leshort
 A two-byte value in little-endian byte order.
 .It Dv lelong
@@ -145,6 +149,8 @@ interpreted as a UNIX date.
 A four-byte value in middle-endian (PDP-11) byte order,
 interpreted as a UNIX-style date, but interpreted as local time rather
 than UTC.
+.It Dv indirect
+Starting at the given offset, consult the magic database again.
 .It Dv regex
 A regular expression match in extended POSIX regular expression syntax
 (like egrep). Regular expressions can take exponential time to
@@ -290,6 +296,11 @@ added before it: multiple matches are normally separated by a single
 space.
 .El
 .Pp
+An APPLE 4+4 character APPLE creator and type can be specified as:
+.Bd -literal -offset indent 
+!:apple	CREATYPE
+.Ed
+.Pp
 A MIME type is given on a separate line, which must be the next
 non-blank or comment line after the magic line that identifies the
 file type, and has the following format:
@@ -361,12 +372,12 @@ the file.
 The value at that offset is read, and is used again as an offset
 in the file.
 Indirect offsets are of the form:
-.Em (( x [.[bslBSL]][+\-][ y ]) .
+.Em (( x [.[bislBISL]][+\-][ y ]) .
 The value of
 .Em x
 is used as an offset in the file.
-A byte, short or long is read at that offset depending on the
-.Em [bslBSLm]
+A byte, id3 length, short or long is read at that offset depending on the
+.Em [bislBISLm]
 type specifier.
 The capitalized types interpret the number as a big endian
 value, whereas the small letter versions interpret the number as a little

+ 23 - 0
magic/Magdir/animation

@@ -325,6 +325,7 @@
 
 # MP2, M1A
 0       beshort&0xFFFE  0xFFFC         MPEG ADTS, layer II, v1
+!:mime	audio/mpeg
 # rates
 >2      byte&0xF0       0x10           \b,  32 kbps
 >2      byte&0xF0       0x20           \b,  48 kbps
@@ -399,6 +400,7 @@
 
 # MP3, M2A
 0       beshort&0xFFFE  0xFFF2         MPEG ADTS, layer III, v2
+!:mime	audio/mpeg
 # rate
 >2      byte&0xF0       0x10           \b,   8 kbps
 >2      byte&0xF0       0x20           \b,  16 kbps
@@ -790,3 +792,24 @@
 0       belong             0x00000001
 >4      byte&0x1F	   0x07
 !:mime	video/h264
+
+# Type: Bink Video
+# URL:  http://wiki.multimedia.cx/index.php?title=3DBink_Container
+# From: <hoehle@users.sourceforge.net>  2008-07-18
+0	string		BIK	Bink Video
+>3	regex		=[a-z]	rev.%s
+#>4	ulelong		x	size %d
+>20	ulelong		x	\b, %d
+>24	ulelong		x	\bx%d
+>8	ulelong		x	\b, %d frames
+>32	ulelong		x	at rate %d/
+>28	ulelong		>1	\b%d
+>40	ulelong		=0	\b, no audio
+>40	ulelong		!0	\b, %d audio track
+>>40	ulelong		!1	\bs
+# follow properties of the first audio track only
+>>48	uleshort	x	%dHz
+>>51	byte&0x20	0	mono
+>>51	byte&0x20	!0	stereo
+#>>51	byte&0x10	0	FFT
+#>>51	byte&0x10	!0	DCT

+ 8 - 37
magic/Magdir/audio

@@ -286,43 +286,14 @@
 # SGI SoundTrack <mpruett@sgi.com>
 0	string		_SGI_SoundTrack		SGI SoundTrack project file
 # ID3 version 2 tags <waschk@informatik.uni-rostock.de>
-0	string		ID3	Audio file with ID3 version 2.
-# ??? Normally such a file is an MP3 file, but this will give false positives
-!:mime	audio/mpeg
->3	ubyte	<0xff	\b%d
-#>4	ubyte	<0xff	\b%d tag
->2584	string	fLaC		\b, FLAC encoding
->>2588	byte&0x7f		>0		\b, unknown version
->>2588	byte&0x7f		0		\b
-# some common bits/sample values
->>>2600	beshort&0x1f0		0x030		\b, 4 bit
->>>2600	beshort&0x1f0		0x050		\b, 6 bit
->>>2600	beshort&0x1f0		0x070		\b, 8 bit
->>>2600	beshort&0x1f0		0x0b0		\b, 12 bit
->>>2600	beshort&0x1f0		0x0f0		\b, 16 bit
->>>2600	beshort&0x1f0		0x170		\b, 24 bit
->>>2600	byte&0xe		0x0		\b, mono
->>>2600	byte&0xe		0x2		\b, stereo
->>>2600	byte&0xe		0x4		\b, 3 channels
->>>2600	byte&0xe		0x6		\b, 4 channels
->>>2600	byte&0xe		0x8		\b, 5 channels
->>>2600	byte&0xe		0xa		\b, 6 channels
->>>2600	byte&0xe		0xc		\b, 7 channels
->>>2600	byte&0xe		0xe		\b, 8 channels
-# some common sample rates
->>>2597	belong&0xfffff0		0x0ac440	\b, 44.1 kHz
->>>2597	belong&0xfffff0		0x0bb800	\b, 48 kHz
->>>2597	belong&0xfffff0		0x07d000	\b, 32 kHz
->>>2597	belong&0xfffff0		0x056220	\b, 22.05 kHz
->>>2597	belong&0xfffff0		0x05dc00	\b, 24 kHz
->>>2597	belong&0xfffff0		0x03e800	\b, 16 kHz
->>>2597	belong&0xfffff0		0x02b110	\b, 11.025 kHz
->>>2597	belong&0xfffff0		0x02ee00	\b, 12 kHz
->>>2597	belong&0xfffff0		0x01f400	\b, 8 kHz
->>>2597	belong&0xfffff0		0x177000	\b, 96 kHz
->>>2597	belong&0xfffff0		0x0fa000	\b, 64 kHz
->>>2601	byte&0xf		>0		\b, >4G samples
->2584	string	!fLaC		\b, MP3 encoding
+0	string		ID3	Audio file with ID3 version 2
+>3	byte		x	\b.%d
+>4	byte		x	\b.%d
+>>5	byte		&0x80	\b, unsynchronized frames
+>>5	byte		&0x40	\b, extended header
+>>5	byte		&0x20	\b, experimental
+>>5	byte		&0x10	\b, footer present
+>(6.I)	indirect	x	\b, contains: 
 
 # NSF (NES sound file) magic
 0	string		NESM\x1a	NES Sound File

+ 11 - 9
magic/Magdir/cafebabe

@@ -12,16 +12,18 @@
 # (and use as a hack). Let's not use 18, because the Mach-O people
 # might add another one or two as time goes by...
 #
-0	beshort		0xcafe
->2	beshort         0xbabe
+0	belong		0xcafebabe
 !:mime	application/x-java-applet
->>2	belong		>30		compiled Java class data,
->>>6	beshort		x	        version %d.
->>>4	beshort		x       	\b%d
->>4	belong		1		Mach-O fat file with 1 architecture
->>4	belong		>1
->>>4	belong		<20		Mach-O fat file with %ld architectures
->2	beshort		0xd00d		JAR compressed with pack200,
+>4	belong		>30		compiled Java class data,
+>>6	beshort		x	        version %d.
+>>4	beshort		x       	\b%d
+
+0	belong		0xcafebabe
+>4	belong		1		Mach-O fat file with 1 architecture
+>4	belong		>1
+>>4	belong		<20		Mach-O fat file with %ld architectures
+
+0	belong		0xcafed00d	JAR compressed with pack200,
 >>5	byte		x		version %d.
 >>4	byte		x		\b%d
 !:mime	application/x-java-pack200

+ 6 - 0
magic/Magdir/compress

@@ -11,6 +11,7 @@
 # standard unix compress
 0	string		\037\235	compress'd data
 !:mime	application/x-compress
+!:apple	LZIVZIVU
 >2	byte&0x80	>0		block compressed
 >2	byte&0x1f	x		%d bits
 
@@ -76,6 +77,11 @@
 !:mime	application/x-bzip2
 >3	byte		>47		\b, block size = %c00k
 
+# lzip
+0	string		LZIP		lzip compressed data
+!:mime application/x-lzip
+>4	byte		x		\b, version: %d
+
 # squeeze and crunch
 # Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
 0	beshort		0x76FF		squeezed data,

+ 4 - 2
magic/Magdir/elf

@@ -143,11 +143,13 @@
 >>18	leshort		91		picoJava,
 >>18	leshort		92		OpenRISC,
 >>18	leshort		93		ARC Cores Tangent-A5,
->>18	leshort		0x3426		OpenRISC (obsolete),
->>18	leshort		0x8472		OpenRISC (obsolete),
 >>18	leshort		94		Tensilica Xtensa,
 >>18	leshort		97		NatSemi 32k,
 >>18	leshort		106		Analog Devices Blackfin,
+>>18	leshort		113		Altera Nios II,
+>>18	leshort		0xae		META,
+>>18	leshort		0x3426		OpenRISC (obsolete),
+>>18	leshort		0x8472		OpenRISC (obsolete),
 >>18	leshort		0x9026		Alpha (unofficial),
 >>20	lelong		0		invalid version
 >>20	lelong		1		version 1

+ 8 - 7
magic/Magdir/epoc

@@ -1,10 +1,11 @@
-
 #------------------------------------------------------------------------------
-# Epoc 32 : file(1) magic for Epoc Documents [psion/osaris
+# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
 # Stefan Praszalowicz (hpicollo@worldnet.fr)
-#0	lelong		0x10000037	Epoc32
+# Useful information for improving this file can be found at:
+# http://software.frodo.looijaard.name/psiconv/formats/Index.html
+0	lelong		0x10000037
 >4	lelong		0x1000006D
->>8	lelong		0x1000007F	Word
->>8	lelong		0x10000088	Sheet
->>8	lelong		0x1000007D	Sketch
->>8	lelong		0x10000085	TextEd
+>>8	lelong		0x1000007F	Psion Word
+>>8	lelong		0x10000088	Psion Sheet
+>>8	lelong		0x1000007D	Psion Sketch
+>>8	lelong		0x10000085	Psion TextEd

+ 169 - 104
magic/Magdir/filesystems

@@ -104,9 +104,32 @@
 >>>346	string	des\ Betriebssystems	
 >>>>366	string	Betriebssystem\ nicht\ vorhanden	\b, Microsoft Windows XP MBR (german)
 >>>>>0x1B8	ulelong	>0				\b, Serial 0x%-.4x
->0x145	string	Default:\ F				\b, FREE-DOS MBR
+#>0x145	string	Default:\ F				\b, FREE-DOS MBR
+#>0x14B	string	Default:\ F				\b, FREE-DOS 1.0 MBR
+>0x145	search/7	Default:\ F			\b, FREE-DOS MBR
+#>>313		string	F0\ .\ .\ .			
+#>>>322		string	disk\ 1				
+#>>>>382	string	FAT3				
 >64	string	no\ active\ partition\ found	
 >>96	string	read\ error\ while\ reading\ drive	\b, FREE-DOS Beta 0.9 MBR
+# Ranish Partition Manager http://www.ranish.com/part/
+>387	search/4	\0\ Error!\r			
+>>378	search/7	Virus! 				
+>>>397	search/4	Booting\ 			
+>>>>408	search/4	HD1/\0	 			\b, Ranish MBR (
+>>>>>416	string	Writing\ changes...		\b2.37
+>>>>>>438	ubyte		x			\b,0x%x dots
+>>>>>>440	ubyte		>0			\b,virus check
+>>>>>>441	ubyte		>0			\b,partition %c
+#2.38,2.42,2.44
+>>>>>416	string	!Writing\ changes...		\b
+>>>>>>418	ubyte	1				\bvirus check,
+>>>>>>419	ubyte	x				\b0x%x seconds
+>>>>>>420	ubyte&0x0F	>0			\b,partition
+>>>>>>>420	ubyte&0x0F	<5			\b %x
+>>>>>>>420	ubyte&0x0F	0Xf			\b ask
+>>>>>420	ubyte		x			\b)
+#
 >271	string	Operating\ system\ loading 		
 >>296	string	error\r					\b, SYSLINUX MBR (2.10)
 # http://www.acronis.de/
@@ -124,18 +147,20 @@
 >0x40	string	SBML				
 # label with 11 characters of FAT 12 bit filesystem
 >>43	string	SMART\ BTMGR			
->>>430	string	SBMK\ Bad!\r			
->>>>3	string	SBM				\b, Smart Boot Manager
->>>>>6	string	>\0                             \b, version %s
+>>>430	string	SBMK\ Bad!\r			\b, Smart Boot Manager
+# OEM-ID not always "SBM"
+#>>>>3	strings	SBM				
+>>>>6	string	>\0                             \b, version %s
 >382	string	XOSLLOADXCF			\b, eXtended Operating System Loader
 >6	string	LILO				\b, LInux i386 boot LOader
 >>120	string	LILO				\b, version 22.3.4 SuSe
 >>172	string	LILO				\b, version 22.5.8 Debian
-# updated by Joerg Jenderek
+# updated by Joerg Jenderek at Oct 2008
 # variables according to grub-0.97/stage1/stage1.S or
 # http://www.gnu.org/software/grub/manual/grub.html#Embedded-data
 # usual values are marked with comments to get only informations of strange GRUB loaders
->0		ulelong		0x009048EB	
+>342		search/60	\0Geom\0	
+#>0		ulelong		x		%x=0x009048EB ,	0x2a9048EB  0
 >>0x41		ubyte		<2		
 >>>0x3E		ubyte		>2		\b; GRand Unified Bootloader
 # 0x3 for 0.5.95,0.93,0.94,0.96 0x4 for 1.90 
@@ -178,15 +203,14 @@
 >3	string	BCDL				
 >>498	string	BCDL\ \ \ \ BIN			\b, Bootable CD Loader (1.50Z)
 # mbr partion table entries
-# OEM-ID not Microsoft,SYSLINUX,or MTOOLs
+# OEM-ID does not contain MicroSoft,NEWLDR,DOS,SYSLINUX,or MTOOLs
 >3			string		!MS
 >>3			string		!SYSLINUX
 >>>3			string		!MTOOL
+>>>>3			string		!NEWLDR
+>>>>>5			string		!DOS
 # not FAT (32 bit)
->>>>82			string		!FAT32	
-#not IO.SYS
->>>>>472		string		!IO\ \ \ \ \ \ SYS
->>>>>>480		string		!IO\ \ \ \ \ \ SYS
+>>>>>>82		string		!FAT32
 #not Linux kernel
 >>>>>>>514		string		!HdrS
 #not BeOS
@@ -272,6 +296,11 @@
 >>>>>>>>>(1.b+11)	ubyte	0xb			
 >>>>>>>>>>(1.b+12)	ubyte	0x56			
 >>>>>>>>>>(1.b+13)	ubyte	0xb4			\b, mkdosfs boot message display
+>214	string	Please\ try\ to\ install\ FreeDOS\ 	\b, DOS Emulator boot message display
+#>>244	string	from\ dosemu-freedos-*-bin.tgz\r	
+#>>>170	string	Sorry,\ could\ not\ load\ an\ 		
+#>>>>195	string	operating\ system.\r\n		
+#
 >103	string	This\ is\ not\ a\ bootable\ disk.\ 	
 >>132	string	Please\ insert\ a\ bootable\ 		
 >>>157	string	floppy\ and\r\n				
@@ -374,12 +403,22 @@
 >430	string	Datentr\204ger\ entfernen\xFF\r\n	
 >>454	string	Medienfehler\xFF\r\n			
 >>>469	string	Neustart:\ Taste\ dr\201cken\r		\b, Microsoft Windows XP Bootloader (4.german)
->>>>368		ubyte&0xDF	>0			
->>>>>368	string		x 			%-.5s
->>>>>>373	ubyte&0xDF	>0			
->>>>>>>373	string		x 			\b%-.3s
->>>>>376	ubyte&0xDF	>0			
->>>>>>376	string		x 			\b.%-.3s
+>>>>379		string		\0			
+>>>>>368	ubyte&0xDF	>0			
+>>>>>>368	string		x 			%-.5s
+>>>>>>>373	ubyte&0xDF	>0			
+>>>>>>>>373	string		x 			\b%-.3s
+>>>>>>376	ubyte&0xDF	>0			
+>>>>>>>376	string		x 			\b.%-.3s
+# variant
+>>>>417		ubyte&0xDF	>0			
+>>>>>417	string		x			%-.5s
+>>>>>>422	ubyte&0xDF	>0			
+>>>>>>>422	string		x 			\b%-.3s
+>>>>>425	ubyte&0xDF	>0			
+>>>>>>425	string		>\ 			\b.%-.3s
+#
+
 #>3	string	NTFS\ \ \ \ 				
 >389	string	Fehler\ beim\ Lesen\ 
 >>407	string	des\ Datentr\204gers
@@ -567,12 +606,27 @@
 >>>489	string	Any\ key\ to\ retry			\b, DR-DOS Bootloader
 >>471	string	Cannot\ load\ DOS\ 			
 >>487	string	press\ key\ to\ retry			\b, Open-DOS Bootloader
+#??
 >444	string	KERNEL\ \ SYS					
 >>314	string	BOOT\ error!				\b, FREE-DOS Bootloader
 >499	string	KERNEL\ \ SYS				
 >>305	string	BOOT\ err!\0				\b, Free-DOS Bootloader
 >449	string	KERNEL\ \ SYS				
 >>319	string	BOOT\ error!				\b, FREE-DOS 0.5 Bootloader
+#
+>449	string	Loading\ FreeDOS			
+>>0x1AF		ulelong		>0			\b, FREE-DOS 0.95,1.0 Bootloader
+>>>497		ubyte&0xDF	>0			
+>>>>497		string		x 			\b %-.6s
+>>>>>503	ubyte&0xDF	>0			
+>>>>>>503	string		x 			\b%-.1s
+>>>>>>>504	ubyte&0xDF	>0			
+>>>>>>>>504	string		x 			\b%-.1s
+>>>>505		ubyte&0xDF	>0			
+>>>>>505	string		x 			\b.%-.3s
+#
+>331	string	Error!.0				\b, FREE-DOS 1.0 bootloader
+#
 >125	string	Loading\ FreeDOS...\r			
 >>311	string	BOOT\ error!\r				\b, FREE-DOS bootloader
 >>>441		ubyte&0xDF	>0			
@@ -706,13 +760,7 @@
 #it also hangs with another message ("NF").
 >>>>>492		string		RENF		\b, FAT (12 bit)
 >>>>>495		string		RENF		\b, FAT (16 bit)
-# added by Joerg Jenderek
-# http://syslinux.zytor.com/iso.php
-0	ulelong	0x7c40eafa		isolinux Loader
-# http://syslinux.zytor.com/pxe.php
-0	ulelong	0x007c05ea		pxelinux Loader
-0	ulelong	0x60669c66		pxelinux Loader
-# loader end
+# x86 bootloader end
 # updated by Joerg Jenderek at Sep 2007
 >3	ubyte	0			
 #no active flag
@@ -732,6 +780,7 @@
 # older drives may use Near JuMP instruction E9 xx xx
 >0		lelong&0x009000EB	0x009000EB 
 >0		lelong&0x000000E9	0x000000E9 
+# minimal short forward jump found 03cx??
 # maximal short forward jump is 07fx
 >1		ubyte			<0xff	\b, code offset 0x%x
 # mtools-3.9.8/msdos.h
@@ -740,91 +789,92 @@
 >>11		uleshort&0x000f	x		
 >>>11		uleshort	<32769		
 >>>>11		uleshort	>31		
->>>>>3		string		>\0		\b, OEM-ID "%8.8s"
+>>>>>21		ubyte&0xf0	0xF0		
+>>>>>>3		string		>\0		\b, OEM-ID "%8.8s"
 #http://mirror.href.com/thestarman/asm/debug/debug2.htm#IHC
->>>>>>8		string		IHC		\b cached by Windows 9M
->>>>>11		uleshort	>512		\b, Bytes/sector %u
-#>>>>>11	uleshort	=512		\b, Bytes/sector %u=512 (usual)
->>>>>11		uleshort	<512		\b, Bytes/sector %u
->>>>>13		ubyte		>1		\b, sectors/cluster %u
-#>>>>>13	ubyte		=1		\b, sectors/cluster %u (usual on Floppies)
->>>>>14		uleshort	>32		\b, reserved sectors %u
-#>>>>>14	uleshort	=32		\b, reserved sectors %u (usual Fat32)
-#>>>>>14	uleshort	>1		\b, reserved sectors %u
-#>>>>>14	uleshort	=1		\b, reserved sectors %u (usual FAT12,FAT16)
->>>>>14		uleshort	<1		\b, reserved sectors %u
->>>>>16		ubyte		>2		\b, FATs %u
-#>>>>>16	ubyte		=2		\b, FATs %u (usual)
->>>>>16		ubyte		=1		\b, FAT  %u
->>>>>16		ubyte		>0
->>>>>17		uleshort	>0		\b, root entries %u
-#>>>>>17	uleshort	=0		\b, root entries %u=0 (usual Fat32)
->>>>>19		uleshort	>0		\b, sectors %u (volumes <=32 MB) 
-#>>>>>19	uleshort	=0		\b, sectors %u=0 (usual Fat32)
->>>>>21		ubyte		>0xF0		\b, Media descriptor 0x%x
-#>>>>>21	ubyte		=0xF0		\b, Media descriptor 0x%x (usual floppy)
->>>>>21		ubyte		<0xF0		\b, Media descriptor 0x%x
->>>>>22		uleshort	>0		\b, sectors/FAT %u
-#>>>>>22	uleshort	=0		\b, sectors/FAT %u=0 (usual Fat32)
->>>>>26		ubyte		>2		\b, heads %u
-#>>>>>26	ubyte		=2		\b, heads %u (usual floppy)
->>>>>26		ubyte		=1		\b, heads %u
+>>>>>>>8	string		IHC		\b cached by Windows 9M
+>>>>>>11	uleshort	>512		\b, Bytes/sector %u
+#>>>>>>11	uleshort	=512		\b, Bytes/sector %u=512 (usual)
+>>>>>>11	uleshort	<512		\b, Bytes/sector %u
+>>>>>>13	ubyte		>1		\b, sectors/cluster %u
+#>>>>>>13	ubyte		=1		\b, sectors/cluster %u (usual on Floppies)
+>>>>>>14	uleshort	>32		\b, reserved sectors %u
+#>>>>>>14	uleshort	=32		\b, reserved sectors %u (usual Fat32)
+#>>>>>>14	uleshort	>1		\b, reserved sectors %u
+#>>>>>>14	uleshort	=1		\b, reserved sectors %u (usual FAT12,FAT16)
+>>>>>>14	uleshort	<1		\b, reserved sectors %u
+>>>>>>16	ubyte		>2		\b, FATs %u
+#>>>>>>16	ubyte		=2		\b, FATs %u (usual)
+>>>>>>16	ubyte		=1		\b, FAT  %u
+>>>>>>16	ubyte		>0
+>>>>>>17	uleshort	>0		\b, root entries %u
+#>>>>>>17	uleshort	=0		\b, root entries %u=0 (usual Fat32)
+>>>>>>19	uleshort	>0		\b, sectors %u (volumes <=32 MB) 
+#>>>>>>19	uleshort	=0		\b, sectors %u=0 (usual Fat32)
+>>>>>>21	ubyte		>0xF0		\b, Media descriptor 0x%x
+#>>>>>>21	ubyte		=0xF0		\b, Media descriptor 0x%x (usual floppy)
+>>>>>>21	ubyte		<0xF0		\b, Media descriptor 0x%x
+>>>>>>22	uleshort	>0		\b, sectors/FAT %u
+#>>>>>>22	uleshort	=0		\b, sectors/FAT %u=0 (usual Fat32)
+>>>>>>26	ubyte		>2		\b, heads %u
+#>>>>>>26	ubyte		=2		\b, heads %u (usual floppy)
+>>>>>>26	ubyte		=1		\b, heads %u
 #skip for Digital Research DOS (version 3.41) 1440 kB Bootdisk
->>>>>38		ubyte		!0x70		
->>>>>>28	ulelong		>0		\b, hidden sectors %u
-#>>>>>>28	ulelong		=0		\b, hidden sectors %u (usual floppy)
->>>>>>32	ulelong		>0		\b, sectors %u (volumes > 32 MB) 
-#>>>>>>32	ulelong		=0		\b, sectors %u (volumes > 32 MB)
+>>>>>>38	ubyte		!0x70		
+>>>>>>>28	ulelong		>0		\b, hidden sectors %u
+#>>>>>>>28	ulelong		=0		\b, hidden sectors %u (usual floppy)
+>>>>>>>32	ulelong		>0		\b, sectors %u (volumes > 32 MB) 
+#>>>>>>>32	ulelong		=0		\b, sectors %u (volumes > 32 MB)
 # FAT<32 specific 
->>>>>82		string		!FAT32
-#>>>>>>36	ubyte		0x80		\b, physical drive 0x%x=0x80 (usual harddisk)
-#>>>>>>36	ubyte		0		\b, physical drive 0x%x=0 (usual floppy)
->>>>>>36	ubyte		!0x80		
->>>>>>>36	ubyte		!0		\b, physical drive 0x%x
->>>>>>37	ubyte		>0		\b, reserved 0x%x
-#>>>>>>37	ubyte		=0		\b, reserved 0x%x
->>>>>>38	ubyte		>0x29		\b, dos < 4.0 BootSector (0x%x)
->>>>>>38	ubyte		<0x29		\b, dos < 4.0 BootSector (0x%x)
->>>>>>38	ubyte		=0x29
->>>>>>>39	ulelong		x		\b, serial number 0x%x
->>>>>>>43	string		<NO\ NAME	\b, label: "%11.11s"
->>>>>>>43	string		>NO\ NAME	\b, label: "%11.11s"
->>>>>>>43	string		=NO\ NAME	\b, unlabeled
->>>>>>54	string		FAT		\b, FAT
->>>>>>>54	string		FAT12		\b (12 bit)
->>>>>>>54	string		FAT16		\b (16 bit)
+>>>>>>82	string		!FAT32
+#>>>>>>>36	ubyte		0x80		\b, physical drive 0x%x=0x80 (usual harddisk)
+#>>>>>>>36	ubyte		0		\b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>36	ubyte		!0x80		
+>>>>>>>>36	ubyte		!0		\b, physical drive 0x%x
+>>>>>>>37	ubyte		>0		\b, reserved 0x%x
+#>>>>>>>37	ubyte		=0		\b, reserved 0x%x
+>>>>>>>38	ubyte		>0x29		\b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38	ubyte		<0x29		\b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38	ubyte		=0x29
+>>>>>>>>39	ulelong		x		\b, serial number 0x%x
+>>>>>>>>43	string		<NO\ NAME	\b, label: "%11.11s"
+>>>>>>>>43	string		>NO\ NAME	\b, label: "%11.11s"
+>>>>>>>>43	string		=NO\ NAME	\b, unlabeled
+>>>>>>>54	string		FAT		\b, FAT
+>>>>>>>>54	string		FAT12		\b (12 bit)
+>>>>>>>>54	string		FAT16		\b (16 bit)
 # FAT32 specific
->>>>>82		string		FAT32		\b, FAT (32 bit)
->>>>>>36	ulelong		x		\b, sectors/FAT %u
->>>>>>40	uleshort	>0		\b, extension flags %u
-#>>>>>>40	uleshort	=0		\b, extension flags %u
->>>>>>42	uleshort	>0		\b, fsVersion %u
-#>>>>>>42	uleshort	=0		\b, fsVersion %u (usual)
->>>>>>44	ulelong		>2		\b, rootdir cluster %u
-#>>>>>>44	ulelong		=2		\b, rootdir cluster %u
-#>>>>>>44	ulelong		=1		\b, rootdir cluster %u
->>>>>>48	uleshort	>1		\b, infoSector %u
-#>>>>>>48	uleshort	=1		\b, infoSector %u (usual)
->>>>>>48	uleshort	<1		\b, infoSector %u
->>>>>>50	uleshort	>6		\b, Backup boot sector %u
-#>>>>>>50	uleshort	=6		\b, Backup boot sector %u (usual) 
->>>>>>50	uleshort	<6		\b, Backup boot sector %u
->>>>>>54	ulelong		>0		\b, reserved1 0x%x
->>>>>>58	ulelong		>0		\b, reserved2 0x%x
->>>>>>62	ulelong		>0		\b, reserved3 0x%x
+>>>>>>82	string		FAT32		\b, FAT (32 bit)
+>>>>>>>36	ulelong		x		\b, sectors/FAT %u
+>>>>>>>40	uleshort	>0		\b, extension flags %u
+#>>>>>>>40	uleshort	=0		\b, extension flags %u
+>>>>>>>42	uleshort	>0		\b, fsVersion %u
+#>>>>>>>42	uleshort	=0		\b, fsVersion %u (usual)
+>>>>>>>44	ulelong		>2		\b, rootdir cluster %u
+#>>>>>>>44	ulelong		=2		\b, rootdir cluster %u
+#>>>>>>>44	ulelong		=1		\b, rootdir cluster %u
+>>>>>>>48	uleshort	>1		\b, infoSector %u
+#>>>>>>>48	uleshort	=1		\b, infoSector %u (usual)
+>>>>>>>48	uleshort	<1		\b, infoSector %u
+>>>>>>>50	uleshort	>6		\b, Backup boot sector %u
+#>>>>>>>50	uleshort	=6		\b, Backup boot sector %u (usual) 
+>>>>>>>50	uleshort	<6		\b, Backup boot sector %u
+>>>>>>>54	ulelong		>0		\b, reserved1 0x%x
+>>>>>>>58	ulelong		>0		\b, reserved2 0x%x
+>>>>>>>62	ulelong		>0		\b, reserved3 0x%x
 # same structure as FAT1X 
->>>>>>64	ubyte		>0x80		\b, physical drive 0x%x
-#>>>>>>64	ubyte		=0x80		\b, physical drive 0x%x=80 (usual harddisk)
->>>>>>64	ubyte&0x7F	>0		\b, physical drive 0x%x
-#>>>>>>64	ubyte		=0		\b, physical drive 0x%x=0 (usual floppy)
->>>>>>65	ubyte		>0		\b, reserved 0x%x
->>>>>>66	ubyte		>0x29		\b, dos < 4.0 BootSector (0x%x)
->>>>>>66	ubyte		<0x29		\b, dos < 4.0 BootSector (0x%x)
->>>>>>66	ubyte		=0x29
->>>>>>>67	ulelong		x		\b, serial number 0x%x
->>>>>>>71	string		<NO\ NAME	\b, label: "%11.11s"
->>>>>>71	string		>NO\ NAME	\b, label: "%11.11s"
->>>>>>71	string		=NO\ NAME	\b, unlabeled
+>>>>>>>64	ubyte		>0x80		\b, physical drive 0x%x
+#>>>>>>>64	ubyte		=0x80		\b, physical drive 0x%x=80 (usual harddisk)
+>>>>>>>64	ubyte&0x7F	>0		\b, physical drive 0x%x
+#>>>>>>>64	ubyte		=0		\b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>65	ubyte		>0		\b, reserved 0x%x
+>>>>>>>66	ubyte		>0x29		\b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66	ubyte		<0x29		\b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66	ubyte		=0x29
+>>>>>>>>67	ulelong		x		\b, serial number 0x%x
+>>>>>>>>71	string		<NO\ NAME	\b, label: "%11.11s"
+>>>>>>>71	string		>NO\ NAME	\b, label: "%11.11s"
+>>>>>>>71	string		=NO\ NAME	\b, unlabeled
 ### FATs end
 >0x200	lelong	0x82564557		\b, BSD disklabel
 # FATX 
@@ -854,6 +904,13 @@
 
 0x18b	string	OS/2	OS/2 Boot Manager
 
+# updated by Joerg Jenderek at Oct 2008!!
+# http://syslinux.zytor.com/iso.php
+0	ulelong	0x7c40eafa		isolinux Loader
+# http://syslinux.zytor.com/pxe.php
+0	ulelong	0x007c05ea		pxelinux Loader
+0	ulelong	0x60669c66		pxelinux Loader
+
 # added by Joerg Jenderek
 # In the second sector (+0x200) are variables according to grub-0.97/stage2/asm.S or
 # grub-1.94/kern/i386/pc/startup.S
@@ -1324,6 +1381,14 @@
 >0x10024        belong          x               (blocksize %d,
 >0x10060        string          >\0             lockproto %s)
 
+# BTRFS
+0x10040         string          _BHRfS_M        BTRFS Filesystem
+>0x1012b        string          >\0             (label "%s",
+>0x10090        lelong          x               sectorsize %d,
+>0x10094        lelong          x               nodesize %d,
+>0x10098        lelong          x               leafsize %d)
+
+
 # dvdisaster's .ecc
 # From: "Nelson A. de Oliveira" <naoliv@gmail.com>
 0	string	*dvdisaster*	dvdisaster error correction file

+ 8 - 5
magic/Magdir/graphviz

@@ -1,7 +1,10 @@
-
 #------------------------------------------------------------------------------
 # graphviz:  file(1) magic for http://www.graphviz.org/
-0	regex/100	[\r\n\t\ ]*graph[\r\n\t\ ]*.*\\{	graphviz graph text
-!:mime	text/vnd.graphviz
-0	regex/100	[\r\n\t\ ]*digraph[\r\n\t\ ]*.*\\{	graphviz digraph text
-!:mime	text/vnd.graphviz
+
+# FIXME: These patterns match too generally. For example, the first
+# line matches a LaTeX file containing the word "graph" (with a {
+# following later) and the second line matches this file.
+#0	regex/100	[\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{	graphviz graph text
+#!:mime	text/vnd.graphviz
+#0	regex/100	[\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{	graphviz digraph text
+#!:mime	text/vnd.graphviz

+ 5 - 0
magic/Magdir/images

@@ -110,6 +110,7 @@
 # GIF
 0	string		GIF8		GIF image data
 !:mime	image/gif
+!:apple	8BIMGIFf
 >4	string		7a		\b, version 8%s,
 >4	string		9a		\b, version 8%s,
 >6	leshort		>0		%hd x
@@ -600,3 +601,7 @@
 # Wavelet Scalar Quantization format used in gray-scale fingerprint images
 # From Tano M Fotang <mfotang@quanteq.com>
 0	string	\xff\xa0\xff\xa8\x00	Wavelet Scalar Quantization image data
+
+# JPEG 2000 Code Stream Bitmap
+# From Petr Splichal <psplicha@redhat.com>
+0	string	\xFF\x4F\xFF\x51\x00	JPEG-2000 Code Stream Bitmap data

+ 1 - 0
magic/Magdir/jpeg

@@ -10,6 +10,7 @@
 #
 0	beshort		0xffd8		JPEG image data
 !:mime	image/jpeg
+!:apple	8BIMJPEG
 !:strength +1
 >6	string		JFIF		\b, JFIF standard
 # The following added by Erik Rossen <rossen@freesurf.ch> 1999-09-06

+ 1 - 1
magic/Magdir/mach

@@ -4,7 +4,7 @@
 # Java ByteCode, so they are both handled in the file "cafebabe".
 # The "feedface" ones are handled herein.
 #------------------------------------------------------------
-0	lelong&0xfeffffff	0xfeedface	Mach-O
+0	lelong&0xfffffffe	0xfeedface	Mach-O
 >0	byte		0xcf		64-bit
 >12	lelong		1		object
 >12	lelong		2		executable

+ 3 - 0
magic/Magdir/macintosh

@@ -11,6 +11,8 @@
 # Stuffit archives are the de facto standard of compression for Macintosh
 # files obtained from most archives. (franklsm@tuns.ca)
 0	string		SIT!			StuffIt Archive (data)
+!:mime	application/x-stuffit
+!:apple	SIT!SIT!
 >2	string		x			: %s
 0	string		SITD			StuffIt Deluxe (data)
 >2	string		x			: %s
@@ -20,6 +22,7 @@
 # Newer StuffIt archives (grant@netbsd.org)
 0	string		StuffIt			StuffIt Archive
 !:mime	application/x-stuffit
+!:apple	SIT!SIT!
 #>162	string		>0			: %s
 
 # Macintosh Applications and Installation binaries (franklsm@tuns.ca)

+ 32 - 17
magic/Magdir/msdos

@@ -4,15 +4,15 @@
 #
 
 # .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
-# updated by Joerg Jenderek
+# updated by Joerg Jenderek at Oct 2008
 0	string	@			
->1	string/cB	\ echo\ off	MS-DOS batch file text
+>1	string/cB	\ echo\ off	DOS batch file text
 !:mime	text/x-msdos-batch
->1	string/cB	echo\ off	MS-DOS batch file text
+>1	string/cB	echo\ off	DOS batch file text
 !:mime	text/x-msdos-batch
->1	string/cB	rem\ 		MS-DOS batch file text
+>1	string/cB	rem\ 		DOS batch file text
 !:mime	text/x-msdos-batch
->1	string/cB	set\ 		MS-DOS batch file text
+>1	string/cB	set\ 		DOS batch file text
 !:mime	text/x-msdos-batch
 
 
@@ -285,8 +285,9 @@
 # Uncommenting only the first two lines will cover about 2/3 of COM files,
 # but it isn't feasible to match all COM files since there must be at least
 # two dozen different one-byte "magics".
-#0	byte		0xe9		DOS executable (COM)
-#>0x1FE leshort		0xAA55		\b, boot code
+# test too generic ?
+0	byte		0xe9		DOS executable (COM)
+>0x1FE leshort		0xAA55		\b, boot code
 >6	string		SFX\ of\ LHarc	(%s)
 0	belong	0xffffffff		DOS executable (device driver)
 #CMD640X2.SYS
@@ -309,25 +310,38 @@
 >>77	string	>\x40			
 >>>77	string	<\x5B			
 >>>>77	string	x			\b, name: %.8s
-#0	byte		0x8c		DOS executable (COM)
-# 0xeb conflicts with "sequent" magic
-#0	byte		0xeb		DOS executable (COM)
-#>0x1FE leshort		0xAA55		\b, boot code
-#>85	string		UPX		\b, UPX compressed
-#>4	string		\ $ARX		\b, ARX self-extracting archive
-#>4	string		\ $LHarc	\b, LHarc self-extracting archive
-#>0x20e string		SFX\ by\ LARC	\b, LARC self-extracting archive
+# test too generic ?
+0	byte		0x8c		DOS executable (COM)
+# updated by Joerg Jenderek at Oct 2008
+0	ulelong		0xffff10eb	DR-DOS executable (COM)
+# byte 0xeb conflicts with "sequent" magic leshort 0xn2eb
+0	ubeshort&0xeb8d	>0xeb00		
+# DR-DOS STACKER.COM SCREATE.SYS missed
+>0	byte		0xeb		DOS executable (COM)
+>>0x1FE leshort		0xAA55		\b, boot code
+>>85	string		UPX		\b, UPX compressed
+>>4	string		\ $ARX		\b, ARX self-extracting archive
+>>4	string		\ $LHarc	\b, LHarc self-extracting archive
+>>0x20e string		SFX\ by\ LARC	\b, LARC self-extracting archive
+# updated by Joerg Jenderek at Oct 2008
 #0	byte		0xb8		COM executable
+0	uleshort&0x80ff	0x00b8		
 # modified by Joerg Jenderek
->1	lelong		!0x21cd4cff	for DOS
+>1	lelong		!0x21cd4cff	COM executable for DOS
 # http://syslinux.zytor.com/comboot.php
 # (32-bit COMBOOT) programs *.C32 contain 32-bit code and run in flat-memory 32-bit protected mode
 # start with assembler instructions mov eax,21cd4cffh
->1	lelong		0x21cd4cff	(32-bit COMBOOT)
+0	uleshort&0xc0ff	0xc0b8		
+>1	lelong		0x21cd4cff	COM executable (32-bit COMBOOT)
 0	string	\x81\xfc		
 >4	string	\x77\x02\xcd\x20\xb9	
 >>36	string	UPX!			FREE-DOS executable (COM), UPX compressed
 252	string Must\ have\ DOS\ version DR-DOS executable (COM)
+# added by Joerg Jenderek at Oct 2008
+# GRR search is not working
+#34	search/2	UPX!		FREE-DOS executable (COM), UPX compressed
+34	string	UPX!			FREE-DOS executable (COM), UPX compressed
+35	string	UPX!			FREE-DOS executable (COM), UPX compressed
 # GRR search is not working
 #2	search/28	\xcd\x21	COM executable for MS-DOS
 #WHICHFAT.cOM
@@ -564,6 +578,7 @@
 # From Stuart Caie <kyzer@4u.net> (developer of cabextract)
 # Microsoft Cabinet files
 0	string		MSCF\0\0\0\0	Microsoft Cabinet archive data
+!:mime application/vnd.ms-cab-compressed
 >8	lelong		x		\b, %u bytes
 >28	leshort		1		\b, 1 file
 >28	leshort		>1		\b, %u files

+ 0 - 1
magic/Magdir/perl

@@ -23,7 +23,6 @@
 # by Dmitry V. Levin and Alexey Tourbin
 # check the first line
 0	search/1	package
-0	regex		\^package[\ \t]+[A-Za-z_]
 >0	regex		\^package[\ \t]+[0-9A-Za-z_:]+\ *;	Perl5 module source text
 # not 'p', check other lines
 0	search/1	!p

+ 2 - 0
magic/Magdir/printer

@@ -6,6 +6,7 @@
 # PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com)
 0	string		%!		PostScript document text
 !:mime	application/postscript
+!:apple	ASPSTEXT
 >2	string		PS-Adobe-	conforming
 >>11	string		>\0		DSC level %.3s
 >>>15	string		EPS		\b, type %s
@@ -16,6 +17,7 @@
 # Some PCs have the annoying habit of adding a ^D as a document separator
 0	string		\004%!		PostScript document text
 !:mime	application/postscript
+!:apple	ASPSTEXT
 >3	string		PS-Adobe-	conforming
 >>12	string		>\0		DSC level %.3s
 >>>16	string		EPS		\b, type %s

+ 17 - 0
magic/Magdir/timezone

@@ -6,6 +6,23 @@
 # this should work on Linux, SunOS, and maybe others
 # Added new official magic number for recent versions of the Olson code
 0	string	TZif	timezone data
+>4	byte	0	\b, old version
+>4	byte	>0	\b, version %c
+>20	belong	0	\b, no gmt time flags
+>20	belong	1	\b, 1 gmt time flag
+>20	belong	>1	\b, %d gmt time flags
+>24	belong	0	\b, no std time flags
+>20	belong	1	\b, 1 std time flag
+>24	belong	>1	\b, %d std time flags
+>28	belong	0	\b, no leap seconds
+>28	belong	1	\b, 1 leap second
+>28	belong  >1	\b, %d leap seconds
+>32	belong	0	\b, no transition times
+>32	belong	1	\b, 1 transition time
+>32	belong  >1	\b, %d transition times
+>36	belong	0	\b, no abbreviation chars
+>36	belong	1	\b, 1 abbreviation char
+>36	belong	>1	\b, %d abbreviation chars
 0	string	\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0	old timezone data
 0	string	\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0	old timezone data
 0	string  \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\3\0	old timezone data

+ 5 - 0
magic/Magdir/wireless

@@ -0,0 +1,5 @@
+#------------------------------------------------------------------------------
+# wireless-regdb:        file(1) magic for CRDA wireless-regdb file format
+#
+0	string	RGDB	CRDA wireless regulatory database file
+>4	belong	19	(Version 1)

+ 9 - 0
magic/Magdir/xwindows

@@ -23,3 +23,12 @@
 >24	long	x				%ldx
 >28	long	1008				YUV422]
 >28	long	1000				RGB24]
+
+# Xcursor data
+# X11 mouse cursor format defined in libXcursor, see
+# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
+# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
+0	string		Xcur		Xcursor data
+!:mime	image/x-xcursor
+>10	leshort		x		version %hd
+>>8	leshort		x		\b.%hd

+ 2 - 1
magic/Makefile.am

@@ -1,5 +1,5 @@
 #
-# $File: Makefile.am,v 1.43 2008/08/08 08:24:06 christos Exp $
+# $File: Makefile.am,v 1.44 2009/01/28 02:11:20 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_FRAGMENT_DIR = $(top_srcdir)/magic/$(MAGIC_FRAGMENT_BASE)
@@ -209,6 +209,7 @@ $(MAGIC_FRAGMENT_DIR)/vxl \
 $(MAGIC_FRAGMENT_DIR)/warc \
 $(MAGIC_FRAGMENT_DIR)/weak \
 $(MAGIC_FRAGMENT_DIR)/windows \
+$(MAGIC_FRAGMENT_DIR)/wireless \
 $(MAGIC_FRAGMENT_DIR)/wordprocessors \
 $(MAGIC_FRAGMENT_DIR)/xdelta \
 $(MAGIC_FRAGMENT_DIR)/xenix \

+ 2 - 1
magic/Makefile.in

@@ -163,7 +163,7 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 
 #
-# $File: Makefile.am,v 1.43 2008/08/08 08:24:06 christos Exp $
+# $File: Makefile.am,v 1.44 2009/01/28 02:11:20 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_FRAGMENT_DIR = $(top_srcdir)/magic/$(MAGIC_FRAGMENT_BASE)
@@ -371,6 +371,7 @@ $(MAGIC_FRAGMENT_DIR)/vxl \
 $(MAGIC_FRAGMENT_DIR)/warc \
 $(MAGIC_FRAGMENT_DIR)/weak \
 $(MAGIC_FRAGMENT_DIR)/windows \
+$(MAGIC_FRAGMENT_DIR)/wireless \
 $(MAGIC_FRAGMENT_DIR)/wordprocessors \
 $(MAGIC_FRAGMENT_DIR)/xdelta \
 $(MAGIC_FRAGMENT_DIR)/xenix \

+ 2 - 2
src/Makefile.am

@@ -8,9 +8,9 @@ AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"'
 AM_CFLAGS = @WARNINGS@
 
 libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
-	compress.c is_tar.c readelf.c print.c fsmagic.c \
+	encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
 	funcs.c file.h names.h patchlevel.h readelf.h tar.h apptype.c \
-	file_opts.h elfclass.h mygetopt.h
+	file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
 libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
 libmagic_la_LIBADD = $(LTLIBOBJS)
 

+ 13 - 5
src/Makefile.in

@@ -37,7 +37,8 @@ host_triplet = @host@
 bin_PROGRAMS = file$(EXEEXT)
 subdir = src
 DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
-	$(srcdir)/Makefile.in asprintf.c getopt_long.c vasprintf.c
+	$(srcdir)/Makefile.in asprintf.c getopt_long.c strlcat.c \
+	strlcpy.c vasprintf.c
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
 	$(top_srcdir)/configure.ac
@@ -58,8 +59,9 @@ libLTLIBRARIES_INSTALL = $(INSTALL)
 LTLIBRARIES = $(lib_LTLIBRARIES)
 libmagic_la_DEPENDENCIES = $(LTLIBOBJS)
 am_libmagic_la_OBJECTS = magic.lo apprentice.lo softmagic.lo \
-	ascmagic.lo compress.lo is_tar.lo readelf.lo print.lo \
-	fsmagic.lo funcs.lo apptype.lo
+	ascmagic.lo encoding.lo compress.lo is_tar.lo readelf.lo \
+	print.lo fsmagic.lo funcs.lo apptype.lo cdf.lo cdf_time.lo \
+	readcdf.lo
 libmagic_la_OBJECTS = $(am_libmagic_la_OBJECTS)
 libmagic_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@@ -201,9 +203,9 @@ include_HEADERS = magic.h
 AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"'
 AM_CFLAGS = @WARNINGS@
 libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
-	compress.c is_tar.c readelf.c print.c fsmagic.c \
+	encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
 	funcs.c file.h names.h patchlevel.h readelf.h tar.h apptype.c \
-	file_opts.h elfclass.h mygetopt.h
+	file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
 
 libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
 libmagic_la_LIBADD = $(LTLIBOBJS)
@@ -311,17 +313,23 @@ distclean-compile:
 
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/asprintf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getopt_long.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strlcat.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strlcpy.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/vasprintf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/apprentice.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/apptype.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ascmagic.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdf_time.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compress.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encoding.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/file.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fsmagic.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/funcs.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/is_tar.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/magic.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/print.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readcdf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readelf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/softmagic.Plo@am__quote@
 

+ 90 - 27
src/apprentice.c

@@ -30,6 +30,11 @@
  */
 
 #include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: apprentice.c,v 1.147 2009/02/03 20:27:51 christos Exp $")
+#endif	/* lint */
+
 #include "magic.h"
 #include "patchlevel.h"
 #include <stdlib.h>
@@ -40,18 +45,11 @@
 #include <assert.h>
 #include <ctype.h>
 #include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/param.h>
 #ifdef QUICK
 #include <sys/mman.h>
 #endif
-#include <sys/types.h>
 #include <dirent.h>
 
-#ifndef	lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.140 2008/07/20 04:02:15 christos Exp $")
-#endif	/* lint */
-
 #define	EATAB {while (isascii((unsigned char) *l) && \
 		      isspace((unsigned char) *l))  ++l;}
 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
@@ -106,7 +104,7 @@ private void bs1(struct magic *);
 private uint16_t swap2(uint16_t);
 private uint32_t swap4(uint32_t);
 private uint64_t swap8(uint64_t);
-private void mkdbname(const char *, char **, int);
+private char *mkdbname(struct magic_set *, const char *, int);
 private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
     const char *);
 private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
@@ -115,8 +113,8 @@ private int check_format_type(const char *, int);
 private int check_format(struct magic_set *, struct magic *);
 private int get_op(char);
 private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
-private int parse_strength(struct magic_set *, struct magic_entry *,
-    const char *);
+private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
+private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
 
 
 private size_t maxmagic = 0;
@@ -131,6 +129,7 @@ private struct {
 } bang[] = {
 #define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
 	DECLARE_FIELD(mime),
+	DECLARE_FIELD(apple),
 	DECLARE_FIELD(strength),
 #undef	DECLARE_FIELD
 	{ NULL, 0, NULL }
@@ -215,6 +214,9 @@ static const struct type_tbl_s {
 	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
 	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
 	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
+	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
+	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
+	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NONE },
 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
 # undef XX
 # undef XX_NULL
@@ -589,7 +591,8 @@ set_test_type(struct magic *mstart, struct magic *m)
 	case FILE_REGEX:
 	case FILE_SEARCH:
 		/* binary test if pattern is not text */
-		if (file_looks_utf8(m->value.us, m->vallen, NULL, NULL) <= 0)
+		if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
+		    NULL) <= 0)
 			mstart->flag |= BINTEST;
 		break;
 	case FILE_DEFAULT:
@@ -704,6 +707,8 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
 		(void)fprintf(stderr, "%s\n", usg_hdr);
 
 	/* load directory or file */
+        /* FIXME: Read file names and sort them to prevent
+           non-determinism. See Debian bug #488562. */
 	if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
 		dir = opendir(fn);
 		if (dir) {
@@ -868,6 +873,7 @@ file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
 		case FILE_REGEX:
 		case FILE_SEARCH:
 		case FILE_DEFAULT:
+		case FILE_INDIRECT:
 			break;
 		default:
 			if (ms->flags & MAGIC_CHECK)
@@ -1184,6 +1190,12 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
 			case 'G':
 				m->in_type = FILE_BEDOUBLE;
 				break;
+			case 'i':
+				m->in_type = FILE_LEID3;
+				break;
+			case 'I':
+				m->in_type = FILE_BEID3;
+				break;
 			default:
 				if (ms->flags & MAGIC_CHECK)
 					file_magwarn(ms,
@@ -1473,6 +1485,38 @@ out:
 }
 
 /*
+ * Parse an Apple CREATOR/TYPE annotation from magic file and put it into magic[index - 1]
+ */
+private int
+parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
+{
+	size_t i;
+	const char *l = line;
+	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
+
+	if (m->apple[0] != '\0') {
+		file_magwarn(ms, "Current entry already has a APPLE type `%.8s',"
+		    " new type `%s'", m->mimetype, l);
+		return -1;
+	}	
+
+	EATAB;
+	for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
+	     || strchr("-+/.", *l)) && i < sizeof(m->apple); m->apple[i++] = *l++)
+		continue;
+	if (i == sizeof(m->apple) && *l) {
+		if (ms->flags & MAGIC_CHECK)
+			file_magwarn(ms, "APPLE type `%s' truncated %zu",
+			    line, i);
+	}
+
+	if (i > 0)
+		return 0;
+	else
+		return -1;
+}
+
+/*
  * parse a MIME annotation line from magic file, put into magic[index - 1]
  * if valid
  */
@@ -1490,10 +1534,8 @@ parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
 	}	
 
 	EATAB;
-	for (i = 0;
-	     *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
-	     || strchr("-+/.", *l)) && i < sizeof(m->mimetype);
-	     m->mimetype[i++] = *l++)
+	for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
+	     || strchr("-+/.", *l)) && i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
 		continue;
 	if (i == sizeof(m->mimetype)) {
 		m->desc[sizeof(m->mimetype) - 1] = '\0';
@@ -2014,7 +2056,7 @@ apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
 	char *dbname = NULL;
 	void *mm = NULL;
 
-	mkdbname(fn, &dbname, 0);
+	dbname = mkdbname(ms, fn, 0);
 	if (dbname == NULL)
 		goto error2;
 
@@ -2111,7 +2153,7 @@ apprentice_compile(struct magic_set *ms, struct magic **magicp,
 	char *dbname;
 	int rv = -1;
 
-	mkdbname(fn, &dbname, 1);
+	dbname = mkdbname(ms, fn, 1);
 
 	if (dbname == NULL) 
 		goto out;
@@ -2149,24 +2191,45 @@ private const char ext[] = ".mgc";
 /*
  * make a dbname
  */
-private void
-mkdbname(const char *fn, char **buf, int strip)
+private char *
+mkdbname(struct magic_set *ms, const char *fn, int strip)
 {
-	const char *p;
+	const char *p, *q;
+	char *buf;
+
 	if (strip) {
 		if ((p = strrchr(fn, '/')) != NULL)
 			fn = ++p;
 	}
 
-	if ((p = strstr(fn, ext)) != NULL && p[sizeof(ext) - 1] == '\0')
-		*buf = strdup(fn);
-	else
-		(void)asprintf(buf, "%s%s", fn, ext);
+	for (q = fn; *q; q++)
+		continue;
+	/* Look for .mgc */
+	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
+		if (*p != *q)
+			break;
 
-	if (buf && *buf && strlen(*buf) > MAXPATHLEN) {
-		free(*buf);
-		*buf = NULL;
+	/* Did not find .mgc, restore q */
+	if (p >= ext)
+		while (*q)
+			q++;
+
+	q++;
+	/* Compatibility with old code that looked in .mime */
+	if (ms->flags & MAGIC_MIME) {
+		asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext);
+		if (access(buf, R_OK) != -1) {
+			ms->flags &= MAGIC_MIME_TYPE;
+			return buf;
+		}
+		free(buf);
 	}
+	asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext);
+
+	/* Compatibility with old code that looked in .mime */
+	if (strstr(p, ".mime") != NULL)
+		ms->flags &= MAGIC_MIME_TYPE;
+	return buf;
 }
 
 /*

+ 4 - 6
src/apptype.c

@@ -26,15 +26,13 @@
 
 #include "file.h"
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-
 #ifndef	lint
-FILE_RCSID("@(#)$File: apptype.c,v 1.7 2007/01/12 17:38:27 christos Exp $")
+FILE_RCSID("@(#)$File: apptype.c,v 1.10 2009/02/03 20:27:51 christos Exp $")
 #endif /* lint */
 
+#include <stdlib.h>
+#include <string.h>
+
 #ifdef __EMX__
 #include <io.h>
 #define INCL_DOSSESMGR

+ 76 - 480
src/ascmagic.c

@@ -2,7 +2,7 @@
  * Copyright (c) Ian F. Darwin 1986-1995.
  * Software written by Ian F. Darwin and others;
  * maintained 1995-present by Christos Zoulas and others.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -12,7 +12,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- *  
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -31,14 +31,15 @@
  *
  * Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
  * to handle character codes other than ASCII on a unified basis.
- *
- * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
- * international characters, now subsumed into this file.
  */
 
 #include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20:27:51 christos Exp $")
+#endif	/* lint */
+
 #include "magic.h"
-#include <stdio.h>
 #include <string.h>
 #include <memory.h>
 #include <ctype.h>
@@ -48,39 +49,71 @@
 #endif
 #include "names.h"
 
-#ifndef	lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.64 2008/07/16 18:00:57 christos Exp $")
-#endif	/* lint */
-
 #define MAXLINELEN 300	/* longest sane line length */
 #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
 		  || (x) == 0x85 || (x) == '\f')
 
-private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
-    size_t *);
-private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
-private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
 private int ascmatch(const unsigned char *, const unichar *, size_t);
 private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t);
+private size_t trim_nuls(const unsigned char *, size_t);
 
+/*
+ * Undo the NUL-termination kindly provided by process()
+ * but leave at least one byte to look at
+ */
+private size_t
+trim_nuls(const unsigned char *buf, size_t nbytes)
+{
+	while (nbytes > 1 && buf[nbytes - 1] == '\0')
+		nbytes--;
+
+	return nbytes;
+}
 
 protected int
 file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 {
-	size_t i;
-	unsigned char *nbuf = NULL, *utf8_buf = NULL, *utf8_end;
-	unichar *ubuf = NULL;	
-	size_t ulen, mlen;
-	const struct names *p;
-	int rv = -1;
-	int mime = ms->flags & MAGIC_MIME;
+	unichar *ubuf = NULL;
+	size_t ulen;
+	int rv = 1;
 
 	const char *code = NULL;
 	const char *code_mime = NULL;
 	const char *type = NULL;
+
+	if (ms->flags & MAGIC_APPLE)
+		return 0;
+
+	nbytes = trim_nuls(buf, nbytes);
+
+	/* If file doesn't look like any sort of text, give up. */
+	if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime,
+	    &type) == 0) {
+		rv = 0;
+		goto done;
+	}
+
+	rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code, 
+	    type);
+
+ done:
+	if (ubuf)
+		free(ubuf);
+
+	return rv;
+}
+
+protected int
+file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
+    size_t nbytes, unichar *ubuf, size_t ulen, const char *code,
+    const char *type)
+{
+	unsigned char *utf8_buf = NULL, *utf8_end;
+	size_t mlen, i;
+	const struct names *p;
+	int rv = -1;
+	int mime = ms->flags & MAGIC_MIME;
+
 	const char *subtype = NULL;
 	const char *subtype_mime = NULL;
 
@@ -96,82 +129,20 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 	size_t last_line_end = (size_t)-1;
 	int has_long_lines = 0;
 
-	/*
-	 * Undo the NUL-termination kindly provided by process()
-	 * but leave at least one byte to look at
-	 */
-	while (nbytes > 1 && buf[nbytes - 1] == '\0')
-		nbytes--;
-
-	if ((nbuf = CAST(unsigned char *, calloc((size_t)1,
-	    (nbytes + 1) * sizeof(nbuf[0])))) == NULL)
-		goto done;
-	if ((ubuf = CAST(unichar *, calloc((size_t)1,
-	    (nbytes + 1) * sizeof(ubuf[0])))) == NULL)
-		goto done;
+	if (ms->flags & MAGIC_APPLE)
+		return 0;
 
-	/*
-	 * Then try to determine whether it's any character code we can
-	 * identify.  Each of these tests, if it succeeds, will leave
-	 * the text converted into one-unichar-per-character Unicode in
-	 * ubuf, and the number of characters converted in ulen.
-	 */
-	if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
-		code = "ASCII";
-		code_mime = "us-ascii";
-		type = "text";
-	} else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) {
-		code = "UTF-8 Unicode (with BOM)";
-		code_mime = "utf-8";
-		type = "text";
-	} else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
-		code = "UTF-8 Unicode";
-		code_mime = "utf-8";
-		type = "text";
-	} else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) {
-		if (i == 1)
-			code = "Little-endian UTF-16 Unicode";
-		else
-			code = "Big-endian UTF-16 Unicode";
-
-		type = "character data";
-		code_mime = "utf-16";    /* is this defined? */
-	} else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
-		code = "ISO-8859";
-		type = "text";
-		code_mime = "iso-8859-1"; 
-	} else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
-		code = "Non-ISO extended-ASCII";
-		type = "text";
-		code_mime = "unknown";
-	} else {
-		from_ebcdic(buf, nbytes, nbuf);
-
-		if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
-			code = "EBCDIC";
-			type = "character data";
-			code_mime = "ebcdic";
-		} else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
-			code = "International EBCDIC";
-			type = "character data";
-			code_mime = "ebcdic";
-		} else {
-			rv = 0;
-			goto done;  /* doesn't look like text at all */
-		}
-	}
+	nbytes = trim_nuls(buf, nbytes);
 
+	/* If we have fewer than 2 bytes, give up. */
 	if (nbytes <= 1) {
 		rv = 0;
 		goto done;
 	}
 
 	/* Convert ubuf to UTF-8 and try text soft magic */
-	/* If original was ASCII or UTF-8, could use nbuf instead of
-	   re-converting. */
 	/* malloc size is a conservative overestimate; could be
-	   re-converting improved, or at least realloced after
-	   re-converting conversion. */
+	   improved, or at least realloced after conversion. */
 	mlen = ulen * 6;
 	if ((utf8_buf = CAST(unsigned char *, malloc(mlen))) == NULL) {
 		file_oomem(ms, mlen);
@@ -179,10 +150,11 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 	}
 	if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL)
 		goto done;
-	if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
-		rv = 1;
+	if ((rv = file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf),
+	    TEXTTEST)) != 0)
 		goto done;
-	}
+	else
+		rv = -1;
 
 	/* look for tokens from names.h - this is expensive! */
 	if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)
@@ -255,41 +227,30 @@ subtype_identified:
 	if (seen_cr && nbytes < HOWMANY)
 		n_cr++;
 
+	if (strcmp(type, "binary") == 0) {
+		rv = 0;
+		goto done;
+	}
 	if (mime) {
-		if (mime & MAGIC_MIME_TYPE) {
+		if ((mime & MAGIC_MIME_TYPE) != 0) {
 			if (subtype_mime) {
-				if (file_printf(ms, subtype_mime) == -1)
+				if (file_printf(ms, "%s", subtype_mime) == -1)
 					goto done;
 			} else {
 				if (file_printf(ms, "text/plain") == -1)
 					goto done;
 			}
 		}
-
-		if ((mime == 0 || mime == MAGIC_MIME) && code_mime) {
-			if ((mime & MAGIC_MIME_TYPE) &&
-			    file_printf(ms, " charset=") == -1)
-				goto done;
-			if (file_printf(ms, code_mime) == -1)
-				goto done;
-		}
-
-		if (mime == MAGIC_MIME_ENCODING)
-			file_printf(ms, "binary");
 	} else {
-		if (file_printf(ms, code) == -1)
+		if (file_printf(ms, "%s", code) == -1)
 			goto done;
 
 		if (subtype) {
-			if (file_printf(ms, " ") == -1)
-				goto done;
-			if (file_printf(ms, subtype) == -1)
+			if (file_printf(ms, " %s", subtype) == -1)
 				goto done;
 		}
 
-		if (file_printf(ms, " ") == -1)
-			goto done;
-		if (file_printf(ms, type) == -1)
+		if (file_printf(ms, " %s", type) == -1)
 			goto done;
 
 		if (has_long_lines)
@@ -305,7 +266,7 @@ subtype_identified:
 			if (file_printf(ms, ", with") == -1)
 				goto done;
 
-			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0)			{
+			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
 				if (file_printf(ms, " no") == -1)
 					goto done;
 			} else {
@@ -348,10 +309,6 @@ subtype_identified:
 	}
 	rv = 1;
 done:
-	if (nbuf)
-		free(nbuf);
-	if (ubuf)
-		free(ubuf);
 	if (utf8_buf)
 		free(utf8_buf);
 
@@ -375,144 +332,6 @@ ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
 }
 
 /*
- * This table reflects a particular philosophy about what constitutes
- * "text," and there is room for disagreement about it.
- *
- * Version 3.31 of the file command considered a file to be ASCII if
- * each of its characters was approved by either the isascii() or
- * isalpha() function.  On most systems, this would mean that any
- * file consisting only of characters in the range 0x00 ... 0x7F
- * would be called ASCII text, but many systems might reasonably
- * consider some characters outside this range to be alphabetic,
- * so the file command would call such characters ASCII.  It might
- * have been more accurate to call this "considered textual on the
- * local system" than "ASCII."
- *
- * It considered a file to be "International language text" if each
- * of its characters was either an ASCII printing character (according
- * to the real ASCII standard, not the above test), a character in
- * the range 0x80 ... 0xFF, or one of the following control characters:
- * backspace, tab, line feed, vertical tab, form feed, carriage return,
- * escape.  No attempt was made to determine the language in which files
- * of this type were written.
- *
- *
- * The table below considers a file to be ASCII if all of its characters
- * are either ASCII printing characters (again, according to the X3.4
- * standard, not isascii()) or any of the following controls: bell,
- * backspace, tab, line feed, form feed, carriage return, esc, nextline.
- *
- * I include bell because some programs (particularly shell scripts)
- * use it literally, even though it is rare in normal text.  I exclude
- * vertical tab because it never seems to be used in real text.  I also
- * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
- * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
- * character to.  It might be more appropriate to include it in the 8859
- * set instead of the ASCII set, but it's got to be included in *something*
- * we recognize or EBCDIC files aren't going to be considered textual.
- * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
- * and Latin characters, so these should possibly be allowed.  But they
- * make a real mess on VT100-style displays if they're not paired properly,
- * so we are probably better off not calling them text.
- *
- * A file is considered to be ISO-8859 text if its characters are all
- * either ASCII, according to the above definition, or printing characters
- * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
- *
- * Finally, a file is considered to be international text from some other
- * character code if its characters are all either ISO-8859 (according to
- * the above definition) or characters in the range 0x80 ... 0x9F, which
- * ISO-8859 considers to be control characters but the IBM PC and Macintosh
- * consider to be printing characters.
- */
-
-#define F 0   /* character never appears in text */
-#define T 1   /* character appears in plain ASCII text */
-#define I 2   /* character appears in ISO-8859 text */
-#define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
-
-private char text_chars[256] = {
-	/*                  BEL BS HT LF    FF CR    */
-	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
-        /*                              ESC          */
-	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
-	/*            NEL                            */
-	X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
-	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
-	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
-	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
-	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
-	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
-	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
-	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
-};
-
-private int
-looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
-    size_t *ulen)
-{
-	size_t i;
-
-	*ulen = 0;
-
-	for (i = 0; i < nbytes; i++) {
-		int t = text_chars[buf[i]];
-
-		if (t != T)
-			return 0;
-
-		ubuf[(*ulen)++] = buf[i];
-	}
-
-	return 1;
-}
-
-private int
-looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
-{
-	size_t i;
-
-	*ulen = 0;
-
-	for (i = 0; i < nbytes; i++) {
-		int t = text_chars[buf[i]];
-
-		if (t != T && t != I)
-			return 0;
-
-		ubuf[(*ulen)++] = buf[i];
-	}
-
-	return 1;
-}
-
-private int
-looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
-    size_t *ulen)
-{
-	size_t i;
-
-	*ulen = 0;
-
-	for (i = 0; i < nbytes; i++) {
-		int t = text_chars[buf[i]];
-
-		if (t != T && t != I && t != X)
-			return 0;
-
-		ubuf[(*ulen)++] = buf[i];
-	}
-
-	return 1;
-}
-
-/*
  * Encode Unicode string as UTF-8, returning pointer to character
  * after end of string, or NULL if an invalid character is found.
  */
@@ -568,226 +387,3 @@ encode_utf8(unsigned char *buf, size_t len, unichar *ubuf, size_t ulen)
 
 	return buf;
 }
-
-/*
- * Decide whether some text looks like UTF-8. Returns:
- *
- *     -1: invalid UTF-8
- *      0: uses odd control characters, so doesn't look like text
- *      1: 7-bit text
- *      2: definitely UTF-8 text (valid high-bit set bytes)
- *
- * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
- * ubuf must be big enough!
- */
-protected int
-file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
-{
-	size_t i;
-	int n;
-	unichar c;
-	int gotone = 0, ctrl = 0;
-
-	if (ubuf)
-		*ulen = 0;
-
-	for (i = 0; i < nbytes; i++) {
-		if ((buf[i] & 0x80) == 0) {	   /* 0xxxxxxx is plain ASCII */
-			/*
-			 * Even if the whole file is valid UTF-8 sequences,
-			 * still reject it if it uses weird control characters.
-			 */
-
-			if (text_chars[buf[i]] != T)
-				ctrl = 1;
-
-			if (ubuf)
-				ubuf[(*ulen)++] = buf[i];
-		} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
-			return -1;
-		} else {			   /* 11xxxxxx begins UTF-8 */
-			int following;
-
-			if ((buf[i] & 0x20) == 0) {		/* 110xxxxx */
-				c = buf[i] & 0x1f;
-				following = 1;
-			} else if ((buf[i] & 0x10) == 0) {	/* 1110xxxx */
-				c = buf[i] & 0x0f;
-				following = 2;
-			} else if ((buf[i] & 0x08) == 0) {	/* 11110xxx */
-				c = buf[i] & 0x07;
-				following = 3;
-			} else if ((buf[i] & 0x04) == 0) {	/* 111110xx */
-				c = buf[i] & 0x03;
-				following = 4;
-			} else if ((buf[i] & 0x02) == 0) {	/* 1111110x */
-				c = buf[i] & 0x01;
-				following = 5;
-			} else
-				return -1;
-
-			for (n = 0; n < following; n++) {
-				i++;
-				if (i >= nbytes)
-					goto done;
-
-				if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
-					return -1;
-
-				c = (c << 6) + (buf[i] & 0x3f);
-			}
-
-			if (ubuf)
-				ubuf[(*ulen)++] = c;
-			gotone = 1;
-		}
-	}
-done:
-	return ctrl ? 0 : (gotone ? 2 : 1);
-}
-
-/*
- * Decide whether some text looks like UTF-8 with BOM. If there is no
- * BOM, return -1; otherwise return the result of looks_utf8 on the
- * rest of the text.
- */
-private int
-looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
-    size_t *ulen)
-{
-	if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
-		return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
-	else
-		return -1;
-}
-
-private int
-looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
-    size_t *ulen)
-{
-	int bigend;
-	size_t i;
-
-	if (nbytes < 2)
-		return 0;
-
-	if (buf[0] == 0xff && buf[1] == 0xfe)
-		bigend = 0;
-	else if (buf[0] == 0xfe && buf[1] == 0xff)
-		bigend = 1;
-	else
-		return 0;
-
-	*ulen = 0;
-
-	for (i = 2; i + 1 < nbytes; i += 2) {
-		/* XXX fix to properly handle chars > 65536 */
-
-		if (bigend)
-			ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
-		else
-			ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
-
-		if (ubuf[*ulen - 1] == 0xfffe)
-			return 0;
-		if (ubuf[*ulen - 1] < 128 &&
-		    text_chars[(size_t)ubuf[*ulen - 1]] != T)
-			return 0;
-	}
-
-	return 1 + bigend;
-}
-
-#undef F
-#undef T
-#undef I
-#undef X
-
-/*
- * This table maps each EBCDIC character to an (8-bit extended) ASCII
- * character, as specified in the rationale for the dd(1) command in
- * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
- *
- * Unfortunately it does not seem to correspond exactly to any of the
- * five variants of EBCDIC documented in IBM's _Enterprise Systems
- * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
- * Edition, July, 1999, pp. I-1 - I-4.
- *
- * Fortunately, though, all versions of EBCDIC, including this one, agree
- * on most of the printing characters that also appear in (7-bit) ASCII.
- * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
- *
- * Fortunately too, there is general agreement that codes 0x00 through
- * 0x3F represent control characters, 0x41 a nonbreaking space, and the
- * remainder printing characters.
- *
- * This is sufficient to allow us to identify EBCDIC text and to distinguish
- * between old-style and internationalized examples of text.
- */
-
-private unsigned char ebcdic_to_ascii[] = {
-  0,   1,   2,   3, 156,   9, 134, 127, 151, 141, 142,  11,  12,  13,  14,  15,
- 16,  17,  18,  19, 157, 133,   8, 135,  24,  25, 146, 143,  28,  29,  30,  31,
-128, 129, 130, 131, 132,  10,  23,  27, 136, 137, 138, 139, 140,   5,   6,   7,
-144, 145,  22, 147, 148, 149, 150,   4, 152, 153, 154, 155,  20,  21, 158,  26,
-' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
-'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
-'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
-186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
-195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
-202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
-209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
-216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
-'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
-'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
-'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
-'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
-};
-
-#ifdef notdef
-/*
- * The following EBCDIC-to-ASCII table may relate more closely to reality,
- * or at least to modern reality.  It comes from
- *
- *   http://ftp.s390.ibm.com/products/oe/bpxqp9.html
- *
- * and maps the characters of EBCDIC code page 1047 (the code used for
- * Unix-derived software on IBM's 390 systems) to the corresponding
- * characters from ISO 8859-1.
- *
- * If this table is used instead of the above one, some of the special
- * cases for the NEL character can be taken out of the code.
- */
-
-private unsigned char ebcdic_1047_to_8859[] = {
-0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
-0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
-0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
-0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
-0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
-0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
-0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
-0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
-0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
-0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
-0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
-0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
-0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
-0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
-0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
-};
-#endif
-
-/*
- * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
- */
-private void
-from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
-{
-	size_t i;
-
-	for (i = 0; i < nbytes; i++) {
-		out[i] = ebcdic_to_ascii[buf[i]];
-	}
-}

+ 5 - 1
src/asprintf.c

@@ -26,7 +26,11 @@
  * SUCH DAMAGE.
  */
 
-#include <stdarg.h>
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: asprintf.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
+#endif
 
 int vasprintf(char **ptr, const char *format_string, va_list vargs);
 

File diff suppressed because it is too large
+ 1105 - 0
src/cdf.c


+ 298 - 0
src/cdf.h

@@ -0,0 +1,298 @@
+/*-
+ * Copyright (c) 2008 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Info from: http://sc.openoffice.org/compdocfileformat.pdf 
+ */
+
+#ifndef _H_CDF_
+#define _H_CDF_
+
+typedef int32_t cdf_secid_t;
+
+#define CDF_LOOP_LIMIT					10000
+
+#define CDF_SECID_NULL					0
+#define CDF_SECID_FREE					-1
+#define	CDF_SECID_END_OF_CHAIN				-2
+#define	CDF_SECID_SECTOR_ALLOCATION_TABLE		-3
+#define CDF_SECID_MASTER_SECTOR_ALLOCATION_TABLE	-4
+
+typedef struct {
+	uint64_t	h_magic;
+#define CDF_MAGIC	0xE11AB1A1E011CFD0LL
+	uint64_t	h_uuid[2];
+	uint16_t	h_revision;
+	uint16_t	h_version;
+	uint16_t	h_byte_order;
+	uint16_t	h_sec_size_p2;
+	uint16_t	h_short_sec_size_p2;
+	uint8_t		h_unused0[10];
+	uint32_t	h_num_sectors_in_sat;
+	uint32_t	h_secid_first_directory;
+	uint8_t		h_unused1[4];
+	uint32_t	h_min_size_standard_stream;
+	cdf_secid_t	h_secid_first_sector_in_short_sat;
+	uint32_t	h_num_sectors_in_short_sat;
+	cdf_secid_t	h_secid_first_sector_in_master_sat;
+	uint32_t	h_num_sectors_in_master_sat;
+	cdf_secid_t	h_master_sat[436/4];
+} cdf_header_t;
+
+#define CDF_SEC_SIZE(h)	(1 << (h)->h_sec_size_p2)
+#define CDF_SEC_POS(h, secid) (CDF_SEC_SIZE(h) + (secid) * CDF_SEC_SIZE(h))
+#define CDF_SHORT_SEC_SIZE(h)	(1 << (h)->h_short_sec_size_p2)
+#define CDF_SHORT_SEC_POS(h, secid) ((secid) * CDF_SHORT_SEC_SIZE(h))
+
+typedef int32_t	cdf_dirid_t;
+#define CDF_DIRID_NULL	-1
+
+typedef int64_t	cdf_timestamp_t;
+#define CDF_BASE_YEAR	1601
+#define CDF_TIME_PREC	10000000
+
+typedef struct {
+	uint16_t	d_name[32];
+	uint16_t	d_namelen;
+	uint8_t		d_type;
+#define CDF_DIR_TYPE_EMPTY		0
+#define	CDF_DIR_TYPE_USER_STORAGE	1
+#define	CDF_DIR_TYPE_USER_STREAM	2
+#define	CDF_DIR_TYPE_LOCKBYTES		3
+#define	CDF_DIR_TYPE_PROPERTY		4
+#define	CDF_DIR_TYPE_ROOT_STORAGE	5
+	uint8_t		d_color;
+#define CDF_DIR_COLOR_READ	0
+#define CDF_DIR_COLOR_BLACK	1
+	cdf_dirid_t	d_left_child;
+	cdf_dirid_t	d_right_child;
+	cdf_dirid_t	d_storage;
+	uint64_t	d_storage_uuid[2];
+	uint32_t	d_flags;
+	cdf_timestamp_t	d_created;
+	cdf_timestamp_t	d_modified;
+	cdf_secid_t	d_stream_first_sector;
+	uint32_t	d_size;
+	uint32_t	d_unused0;
+} cdf_directory_t;
+
+#define CDF_DIRECTORY_SIZE	128
+
+typedef struct {
+	cdf_secid_t *sat_tab;
+	size_t sat_len;
+} cdf_sat_t;
+
+typedef struct {
+	cdf_directory_t *dir_tab;
+	size_t dir_len;
+} cdf_dir_t;
+
+typedef struct {
+	void *sst_tab;
+	size_t sst_len;
+	size_t sst_dirlen;
+} cdf_stream_t;
+
+typedef struct {
+	uint32_t	cl_dword;
+	uint16_t	cl_word[2];
+	uint8_t		cl_two[2];
+	uint8_t		cl_six[6];
+} cdf_classid_t;
+
+typedef struct {
+	uint16_t	si_byte_order;
+	uint16_t	si_zero;
+	uint16_t	si_os_version;
+	uint16_t	si_os;
+	cdf_classid_t	si_class;
+	uint32_t	si_count;
+} cdf_summary_info_header_t;
+
+#define CDF_SECTION_DECLARATION_OFFSET 0x1c
+
+typedef struct {
+	cdf_classid_t	sd_class;
+	uint32_t	sd_offset;
+} cdf_section_declaration_t;
+
+typedef struct {
+	uint32_t	sh_len;
+	uint32_t	sh_properties;
+} cdf_section_header_t;
+
+typedef struct {
+	uint32_t	pi_id;
+	uint32_t	pi_type;
+	union {
+		uint16_t	_pi_u16;
+		int16_t		_pi_s16;
+		uint32_t	_pi_u32;
+		int32_t		_pi_s32;
+		uint64_t	_pi_u64;
+		int64_t		_pi_s64;
+		cdf_timestamp_t	_pi_tp;
+		struct {
+			uint32_t s_len;
+			const char *s_buf;
+		} _pi_str;
+	} pi_val;
+#define pi_u64	pi_val._pi_u64
+#define pi_s64	pi_val._pi_s64
+#define pi_u32	pi_val._pi_u32
+#define pi_s32	pi_val._pi_s32
+#define pi_u16	pi_val._pi_u16
+#define pi_s16	pi_val._pi_s16
+#define pi_tp	pi_val._pi_tp
+#define pi_str	pi_val._pi_str
+} cdf_property_info_t;
+
+#define CDF_ROUND(val, by)     (((val) + (by) - 1) & ~((by) - 1))
+
+/* Variant type definitions */
+#define CDF_EMPTY		0x00000000
+#define	CDF_NULL		0x00000001
+#define CDF_SIGNED16		0x00000002
+#define CDF_SIGNED32		0x00000003
+#define CDF_FLOAT		0x00000004
+#define CDF_DOUBLE		0x00000005
+#define CDF_CY			0x00000006
+#define	CDF_DATE		0x00000007
+#define CDF_BSTR		0x00000008
+#define CDF_DISPATCH		0x00000009
+#define CDF_ERROR		0x0000000a
+#define CDF_BOOL		0x0000000b
+#define CDF_VARIANT		0x0000000c
+#define CDF_UNKNOWN		0x0000000d
+#define CDF_DECIMAL		0x0000000e
+#define CDF_SIGNED8		0x00000010
+#define CDF_UNSIGNED8		0x00000011
+#define CDF_UNSIGNED16		0x00000012
+#define	CDF_UNSIGNED32		0x00000013
+#define CDF_SIGNED64		0x00000014
+#define CDF_UNSIGNED64		0x00000015
+#define CDF_INT			0x00000016
+#define CDF_UINT		0x00000017
+#define CDF_VOID		0x00000018
+#define CDF_HRESULT		0x00000019
+#define CDF_PTR			0x0000001a
+#define CDF_SAFEARRAY		0x0000001b
+#define CDF_CARRAY		0x0000001c
+#define CDF_USERDEFINED		0x0000001d
+#define CDF_LENGTH32_STRING	0x0000001e
+#define CDF_LENGTH32_WSTRING	0x0000001f
+#define CDF_FILETIME		0x00000040
+#define CDF_BLOB		0x00000041
+#define CDF_STREAM		0x00000042
+#define CDF_STORAGE		0x00000043
+#define CDF_STREAMED_OBJECT	0x00000044
+#define CDF_STORED_OBJECT	0x00000045
+#define CDF_BLOB_OBJECT		0x00000046
+#define CDF_CLIPBOARD		0x00000047
+#define CDF_CLSID		0x00000048
+#define CDF_VECTOR		0x00001000
+#define CDF_ARRAY		0x00002000
+#define CDF_BYREF		0x00004000
+#define CDF_RESERVED		0x00008000
+#define CDF_ILLEGAL		0x0000ffff
+#define CDF_ILLEGALMASKED	0x00000fff
+#define CDF_TYPEMASK		0x00000fff
+
+#define CDF_PROPERTY_CODE_PAGE			0x00000001
+#define CDF_PROPERTY_TITLE			0x00000002
+#define CDF_PROPERTY_SUBJECT			0x00000003
+#define CDF_PROPERTY_AUTHOR			0x00000004
+#define CDF_PROPERTY_KEYWORDS			0x00000005
+#define CDF_PROPERTY_COMMENTS 			0x00000006
+#define CDF_PROPERTY_TEMPLATE			0x00000007
+#define CDF_PROPERTY_LAST_SAVED_BY		0x00000008
+#define CDF_PROPERTY_REVISION_NUMBER		0x00000009
+#define CDF_PROPERTY_TOTAL_EDITING_TIME		0x0000000a
+#define CDF_PROPERTY_LAST_PRINTED		0X0000000b
+#define CDF_PROPERTY_CREATE_TIME		0x0000000c
+#define CDF_PROPERTY_LAST_SAVED_TIME		0x0000000d
+#define CDF_PROPERTY_NUMBER_OF_PAGES		0x0000000e
+#define CDF_PROPERTY_NUMBER_OF_WORDS		0x0000000f
+#define CDF_PROPERTY_NUMBER_OF_CHARACTERS	0x00000010
+#define CDF_PROPERTY_THUMBNAIL			0x00000011
+#define CDF_PROPERTY_NAME_OF_APPLICATION	0x00000012
+#define CDF_PROPERTY_SECURITY			0x00000013
+#define CDF_PROPERTY_LOCALE_ID			0x80000000
+
+struct timespec;
+int cdf_timestamp_to_timespec(struct timespec *, cdf_timestamp_t);
+int cdf_timespec_to_timestamp(cdf_timestamp_t *, const struct timespec *);
+int cdf_read_header(int, cdf_header_t *);
+void cdf_swap_header(cdf_header_t *);
+void cdf_unpack_header(cdf_header_t *, char *);
+void cdf_swap_dir(cdf_directory_t *);
+void cdf_unpack_dir(cdf_directory_t *, char *);
+void cdf_swap_class(cdf_classid_t *);
+ssize_t cdf_read_sector(int, void *, size_t, size_t, const cdf_header_t *,
+    cdf_secid_t);
+ssize_t cdf_read_short_sector(const cdf_stream_t *, void *, size_t, size_t,
+    const cdf_header_t *, cdf_secid_t);
+int cdf_read_sat(int, cdf_header_t *, cdf_sat_t *);
+size_t cdf_count_chain(const cdf_header_t *, const cdf_sat_t *,
+    cdf_secid_t);
+int cdf_read_long_sector_chain(int, const cdf_header_t *,
+    const cdf_sat_t *, cdf_secid_t, size_t, cdf_stream_t *);
+int cdf_read_short_sector_chain(const cdf_header_t *, const cdf_sat_t *,
+    const cdf_stream_t *, cdf_secid_t, size_t, cdf_stream_t *);
+int cdf_read_sector_chain(int, const cdf_header_t *,
+    const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *, cdf_secid_t,
+    size_t, cdf_stream_t *);
+int cdf_read_dir(int, const cdf_header_t *, const cdf_sat_t *, cdf_dir_t *);
+int cdf_read_ssat(int, const cdf_header_t *, const cdf_sat_t *, cdf_sat_t *);
+int cdf_read_short_stream(int, const cdf_header_t *, const cdf_sat_t *,
+    const cdf_dir_t *, cdf_stream_t *);
+int cdf_read_property_info(const cdf_stream_t *, uint32_t,
+    cdf_property_info_t **, size_t *, size_t *);
+int cdf_read_summary_info(int, const cdf_header_t *, const cdf_sat_t *,
+    const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *,
+    cdf_stream_t *);
+int cdf_unpack_summary_info(const cdf_stream_t *, cdf_summary_info_header_t *,
+    cdf_property_info_t **, size_t *);
+int cdf_print_classid(char *, size_t, const cdf_classid_t *);
+int cdf_print_property_name(char *, size_t, uint32_t);
+int cdf_print_elapsed_time(char *, size_t, cdf_timestamp_t);
+uint16_t cdf_tole2(uint16_t);
+uint32_t cdf_tole4(uint32_t);
+uint64_t cdf_tole8(uint64_t);
+
+#ifdef CDF_DEBUG
+void cdf_dump_header(const cdf_header_t *);
+void cdf_dump_sat(const char *, const cdf_header_t *, const cdf_sat_t *);
+void cdf_dump(void *, size_t);
+void cdf_dump_stream(const cdf_header_t *, const cdf_stream_t *);
+void cdf_dump_dir(int, const cdf_header_t *, const cdf_sat_t *,
+    const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *);
+void cdf_dump_property_info(const cdf_property_info_t *, size_t);
+void cdf_dump_summary_info(const cdf_header_t *, const cdf_stream_t *);
+#endif
+
+
+#endif /* _H_CDF_ */

+ 182 - 0
src/cdf_time.c

@@ -0,0 +1,182 @@
+/*-
+ * Copyright (c) 2008 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: cdf_time.c,v 1.5 2009/02/03 20:27:51 christos Exp $")
+#endif
+
+#include <time.h>
+#ifdef TEST
+#include <err.h>
+#endif
+#include <string.h>
+
+#include "cdf.h"
+
+#define isleap(y) ((((y) % 4) == 0) && \
+    ((((y) % 100) != 0) || (((y) % 400) == 0)))
+
+static const int mdays[] = {
+    31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+};
+
+/*
+ * Return the number of days between jan 01 1601 and jan 01 of year.
+ */
+static int
+cdf_getdays(int year)
+{
+	int days = 0;
+	int y;
+
+	for (y = CDF_BASE_YEAR; y < year; y++)
+		days += isleap(y) + 365;
+		
+	return days;
+}
+
+/*
+ * Return the day within the month
+ */
+static int
+cdf_getday(int year, int days)
+{
+	size_t m;
+
+	for (m = 0; m < sizeof(mdays) / sizeof(mdays[0]); m++) {
+		int sub = mdays[m] + (m == 1 && isleap(year));
+		if (days < sub)
+			return days;
+		days -= sub;
+	}
+	return days;
+}
+
+/* 
+ * Return the 0...11 month number.
+ */
+static int
+cdf_getmonth(int year, int days)
+{
+	size_t m;
+
+	for (m = 0; m < sizeof(mdays) / sizeof(mdays[0]); m++) {
+		days -= mdays[m];
+		if (m == 1 && isleap(year))
+			days--;
+		if (days <= 0)
+			return (int)m;
+	}
+	return (int)m;
+}
+
+int
+cdf_timestamp_to_timespec(struct timespec *ts, cdf_timestamp_t t)
+{
+	struct tm tm;
+#ifdef HAVE_STRUCT_TM_TM_ZONE
+	static char UTC[] = "UTC";
+#endif
+
+	/* Unit is 100's of nanoseconds */
+	ts->tv_nsec = (t % CDF_TIME_PREC) * 100;
+
+	t /= CDF_TIME_PREC;
+	tm.tm_sec = t % 60;
+	t /= 60;
+
+	tm.tm_min = t % 60;
+	t /= 60;
+
+	tm.tm_hour = t % 24;
+	t /= 24;
+
+	// XXX: Approx
+	tm.tm_year = CDF_BASE_YEAR + (t / 365);
+
+	int rdays = cdf_getdays(tm.tm_year);
+	t -= rdays;
+	tm.tm_mday = cdf_getday(tm.tm_year, t);
+	tm.tm_mon = cdf_getmonth(tm.tm_year, t);
+	tm.tm_wday = 0;
+	tm.tm_yday = 0;
+	tm.tm_isdst = 0;
+#ifdef HAVE_STRUCT_TM_TM_GMTOFF
+	tm.tm_gmtoff = 0;
+#endif
+#ifdef HAVE_STRUCT_TM_TM_ZONE
+	tm.tm_zone = UTC;
+#endif
+	tm.tm_year -= 1900;
+	ts->tv_sec = mktime(&tm);
+	if (ts->tv_sec == -1) {
+		errno = EINVAL;
+		return -1;
+	}
+	return 0;
+}
+
+int
+cdf_timespec_to_timestamp(cdf_timestamp_t *t, const struct timespec *ts)
+{
+	(void)&t;
+	(void)&ts;
+#ifdef notyet
+	struct tm tm;
+	if (gmtime_r(&ts->ts_sec, &tm) == NULL) {
+		errno = EINVAL;
+		return -1;
+	}
+	*t = (ts->ts_nsec / 100) * CDF_TIME_PREC;
+	*t = tm.tm_sec;
+	*t += tm.tm_min * 60;
+	*t += tm.tm_hour * 60 * 60;
+	*t += tm.tm_mday * 60 * 60 * 24;
+#endif
+	return 0;
+}
+
+
+#ifdef TEST
+int
+main(int argc, char *argv[])
+{
+	struct timespec ts;
+	static const cdf_timestamp_t tst = 0x01A5E403C2D59C00ULL;
+	static const char *ref = "Sat Apr 23 01:30:00 1977";
+	char *p, *q;
+
+	cdf_timestamp_to_timespec(&ts, tst);
+	p = ctime(&ts.tv_sec);
+	if ((q = strchr(p, '\n')) != NULL)
+		*q = '\0';
+	if (strcmp(ref, p) != 0)
+		errx(1, "Error date %s != %s\n", ref, p);
+	return 0;
+}
+#endif

+ 7 - 8
src/compress.c

@@ -33,15 +33,18 @@
  *					    using method, return sizeof new
  */
 #include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: compress.c,v 1.61 2009/02/03 20:27:51 christos Exp $")
+#endif
+
 #include "magic.h"
-#include <stdio.h>
 #include <stdlib.h>
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 #include <string.h>
 #include <errno.h>
-#include <sys/types.h>
 #include <sys/ioctl.h>
 #ifdef HAVE_SYS_WAIT_H
 #include <sys/wait.h>
@@ -54,11 +57,6 @@
 #include <zlib.h>
 #endif
 
-
-#ifndef lint
-FILE_RCSID("@(#)$File: compress.c,v 1.57 2008/07/16 18:00:57 christos Exp $")
-#endif
-
 private const struct {
 	const char magic[8];
 	size_t maglen;
@@ -77,6 +75,7 @@ private const struct {
 	{ "PK\3\4",   4, { "gzip", "-cdq", NULL }, 1 },		/* pkzipped, */
 					    /* ...only first file examined */
 	{ "BZh",      3, { "bzip2", "-cd", NULL }, 1 },		/* bzip2-ed */
+	{ "LZIP",     4, { "lzip", "-cdq", NULL }, 1 },
 };
 
 private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
@@ -237,7 +236,7 @@ file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
 	char buf[4096];
 	int r, tfd;
 
-	(void)strcpy(buf, "/tmp/file.XXXXXX");
+	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
 #ifndef HAVE_MKSTEMP
 	{
 		char *ptr = mktemp(buf);

+ 484 - 0
src/encoding.c

@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) Ian F. Darwin 1986-1995.
+ * Software written by Ian F. Darwin and others;
+ * maintained 1995-present by Christos Zoulas and others.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice immediately at the beginning of the file, without modification,
+ *    this list of conditions, and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Encoding -- determine the character encoding of a text file.
+ *
+ * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
+ * international characters.
+ */
+
+#include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: encoding.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
+#endif	/* lint */
+
+#include "magic.h"
+#include <string.h>
+#include <memory.h>
+#include <stdlib.h>
+
+
+private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
+private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
+    size_t *);
+private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
+private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
+private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
+private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
+
+/*
+ * Try to determine whether text is in some character code we can
+ * identify.  Each of these tests, if it succeeds, will leave
+ * the text converted into one-unichar-per-character Unicode in
+ * ubuf, and the number of characters converted in ulen.
+ */
+protected int
+file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type)
+{
+	size_t mlen;
+	int rv = 1, ucs_type;
+	unsigned char *nbuf = NULL;
+
+	mlen = (nbytes + 1) * sizeof(nbuf[0]);
+	if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) {
+		file_oomem(ms, mlen);
+		goto done;
+	}
+	mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
+	if ((*ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) {
+		file_oomem(ms, mlen);
+		goto done;
+	}
+
+	*type = "text";
+	if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
+		*code = "ASCII";
+		*code_mime = "us-ascii";
+	} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
+		*code = "UTF-8 Unicode (with BOM)";
+		*code_mime = "utf-8";
+	} else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
+		*code = "UTF-8 Unicode";
+		*code_mime = "utf-8";
+	} else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
+		if (ucs_type == 1) {
+			*code = "Little-endian UTF-16 Unicode";
+			*code_mime = "utf-16le";
+		} else {
+			*code = "Big-endian UTF-16 Unicode";
+			*code_mime = "utf-16be";
+		}
+	} else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
+		*code = "ISO-8859";
+		*code_mime = "iso-8859-1";
+	} else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
+		*code = "Non-ISO extended-ASCII";
+		*code_mime = "unknown-8bit";
+	} else {
+		from_ebcdic(buf, nbytes, nbuf);
+
+		if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
+			*code = "EBCDIC";
+			*code_mime = "ebcdic";
+		} else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
+			*code = "International EBCDIC";
+			*code_mime = "ebcdic";
+		} else { /* Doesn't look like text at all */
+			rv = 0;
+			*type = "binary";
+		}
+	}
+
+ done:
+	if (nbuf)
+		free(nbuf);
+
+	return rv;
+}
+
+/*
+ * This table reflects a particular philosophy about what constitutes
+ * "text," and there is room for disagreement about it.
+ *
+ * Version 3.31 of the file command considered a file to be ASCII if
+ * each of its characters was approved by either the isascii() or
+ * isalpha() function.  On most systems, this would mean that any
+ * file consisting only of characters in the range 0x00 ... 0x7F
+ * would be called ASCII text, but many systems might reasonably
+ * consider some characters outside this range to be alphabetic,
+ * so the file command would call such characters ASCII.  It might
+ * have been more accurate to call this "considered textual on the
+ * local system" than "ASCII."
+ *
+ * It considered a file to be "International language text" if each
+ * of its characters was either an ASCII printing character (according
+ * to the real ASCII standard, not the above test), a character in
+ * the range 0x80 ... 0xFF, or one of the following control characters:
+ * backspace, tab, line feed, vertical tab, form feed, carriage return,
+ * escape.  No attempt was made to determine the language in which files
+ * of this type were written.
+ *
+ *
+ * The table below considers a file to be ASCII if all of its characters
+ * are either ASCII printing characters (again, according to the X3.4
+ * standard, not isascii()) or any of the following controls: bell,
+ * backspace, tab, line feed, form feed, carriage return, esc, nextline.
+ *
+ * I include bell because some programs (particularly shell scripts)
+ * use it literally, even though it is rare in normal text.  I exclude
+ * vertical tab because it never seems to be used in real text.  I also
+ * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
+ * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
+ * character to.  It might be more appropriate to include it in the 8859
+ * set instead of the ASCII set, but it's got to be included in *something*
+ * we recognize or EBCDIC files aren't going to be considered textual.
+ * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
+ * and Latin characters, so these should possibly be allowed.  But they
+ * make a real mess on VT100-style displays if they're not paired properly,
+ * so we are probably better off not calling them text.
+ *
+ * A file is considered to be ISO-8859 text if its characters are all
+ * either ASCII, according to the above definition, or printing characters
+ * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
+ *
+ * Finally, a file is considered to be international text from some other
+ * character code if its characters are all either ISO-8859 (according to
+ * the above definition) or characters in the range 0x80 ... 0x9F, which
+ * ISO-8859 considers to be control characters but the IBM PC and Macintosh
+ * consider to be printing characters.
+ */
+
+#define F 0   /* character never appears in text */
+#define T 1   /* character appears in plain ASCII text */
+#define I 2   /* character appears in ISO-8859 text */
+#define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
+
+private char text_chars[256] = {
+	/*                  BEL BS HT LF    FF CR    */
+	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
+	/*                              ESC          */
+	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
+	/*            NEL                            */
+	X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
+	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
+};
+
+private int
+looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
+    size_t *ulen)
+{
+	size_t i;
+
+	*ulen = 0;
+
+	for (i = 0; i < nbytes; i++) {
+		int t = text_chars[buf[i]];
+
+		if (t != T)
+			return 0;
+
+		ubuf[(*ulen)++] = buf[i];
+	}
+
+	return 1;
+}
+
+private int
+looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
+{
+	size_t i;
+
+	*ulen = 0;
+
+	for (i = 0; i < nbytes; i++) {
+		int t = text_chars[buf[i]];
+
+		if (t != T && t != I)
+			return 0;
+
+		ubuf[(*ulen)++] = buf[i];
+	}
+
+	return 1;
+}
+
+private int
+looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
+    size_t *ulen)
+{
+	size_t i;
+
+	*ulen = 0;
+
+	for (i = 0; i < nbytes; i++) {
+		int t = text_chars[buf[i]];
+
+		if (t != T && t != I && t != X)
+			return 0;
+
+		ubuf[(*ulen)++] = buf[i];
+	}
+
+	return 1;
+}
+
+/*
+ * Decide whether some text looks like UTF-8. Returns:
+ *
+ *     -1: invalid UTF-8
+ *      0: uses odd control characters, so doesn't look like text
+ *      1: 7-bit text
+ *      2: definitely UTF-8 text (valid high-bit set bytes)
+ *
+ * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
+ * ubuf must be big enough!
+ */
+protected int
+file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
+{
+	size_t i;
+	int n;
+	unichar c;
+	int gotone = 0, ctrl = 0;
+
+	if (ubuf)
+		*ulen = 0;
+
+	for (i = 0; i < nbytes; i++) {
+		if ((buf[i] & 0x80) == 0) {	   /* 0xxxxxxx is plain ASCII */
+			/*
+			 * Even if the whole file is valid UTF-8 sequences,
+			 * still reject it if it uses weird control characters.
+			 */
+
+			if (text_chars[buf[i]] != T)
+				ctrl = 1;
+
+			if (ubuf)
+				ubuf[(*ulen)++] = buf[i];
+		} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
+			return -1;
+		} else {			   /* 11xxxxxx begins UTF-8 */
+			int following;
+
+			if ((buf[i] & 0x20) == 0) {		/* 110xxxxx */
+				c = buf[i] & 0x1f;
+				following = 1;
+			} else if ((buf[i] & 0x10) == 0) {	/* 1110xxxx */
+				c = buf[i] & 0x0f;
+				following = 2;
+			} else if ((buf[i] & 0x08) == 0) {	/* 11110xxx */
+				c = buf[i] & 0x07;
+				following = 3;
+			} else if ((buf[i] & 0x04) == 0) {	/* 111110xx */
+				c = buf[i] & 0x03;
+				following = 4;
+			} else if ((buf[i] & 0x02) == 0) {	/* 1111110x */
+				c = buf[i] & 0x01;
+				following = 5;
+			} else
+				return -1;
+
+			for (n = 0; n < following; n++) {
+				i++;
+				if (i >= nbytes)
+					goto done;
+
+				if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
+					return -1;
+
+				c = (c << 6) + (buf[i] & 0x3f);
+			}
+
+			if (ubuf)
+				ubuf[(*ulen)++] = c;
+			gotone = 1;
+		}
+	}
+done:
+	return ctrl ? 0 : (gotone ? 2 : 1);
+}
+
+/*
+ * Decide whether some text looks like UTF-8 with BOM. If there is no
+ * BOM, return -1; otherwise return the result of looks_utf8 on the
+ * rest of the text.
+ */
+private int
+looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
+    size_t *ulen)
+{
+	if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
+		return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
+	else
+		return -1;
+}
+
+private int
+looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
+    size_t *ulen)
+{
+	int bigend;
+	size_t i;
+
+	if (nbytes < 2)
+		return 0;
+
+	if (buf[0] == 0xff && buf[1] == 0xfe)
+		bigend = 0;
+	else if (buf[0] == 0xfe && buf[1] == 0xff)
+		bigend = 1;
+	else
+		return 0;
+
+	*ulen = 0;
+
+	for (i = 2; i + 1 < nbytes; i += 2) {
+		/* XXX fix to properly handle chars > 65536 */
+
+		if (bigend)
+			ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
+		else
+			ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
+
+		if (ubuf[*ulen - 1] == 0xfffe)
+			return 0;
+		if (ubuf[*ulen - 1] < 128 &&
+		    text_chars[(size_t)ubuf[*ulen - 1]] != T)
+			return 0;
+	}
+
+	return 1 + bigend;
+}
+
+#undef F
+#undef T
+#undef I
+#undef X
+
+/*
+ * This table maps each EBCDIC character to an (8-bit extended) ASCII
+ * character, as specified in the rationale for the dd(1) command in
+ * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
+ *
+ * Unfortunately it does not seem to correspond exactly to any of the
+ * five variants of EBCDIC documented in IBM's _Enterprise Systems
+ * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
+ * Edition, July, 1999, pp. I-1 - I-4.
+ *
+ * Fortunately, though, all versions of EBCDIC, including this one, agree
+ * on most of the printing characters that also appear in (7-bit) ASCII.
+ * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
+ *
+ * Fortunately too, there is general agreement that codes 0x00 through
+ * 0x3F represent control characters, 0x41 a nonbreaking space, and the
+ * remainder printing characters.
+ *
+ * This is sufficient to allow us to identify EBCDIC text and to distinguish
+ * between old-style and internationalized examples of text.
+ */
+
+private unsigned char ebcdic_to_ascii[] = {
+  0,   1,   2,   3, 156,   9, 134, 127, 151, 141, 142,  11,  12,  13,  14,  15,
+ 16,  17,  18,  19, 157, 133,   8, 135,  24,  25, 146, 143,  28,  29,  30,  31,
+128, 129, 130, 131, 132,  10,  23,  27, 136, 137, 138, 139, 140,   5,   6,   7,
+144, 145,  22, 147, 148, 149, 150,   4, 152, 153, 154, 155,  20,  21, 158,  26,
+' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
+'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
+'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
+186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
+195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
+202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
+209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
+216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
+'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
+'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
+'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
+'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
+};
+
+#ifdef notdef
+/*
+ * The following EBCDIC-to-ASCII table may relate more closely to reality,
+ * or at least to modern reality.  It comes from
+ *
+ *   http://ftp.s390.ibm.com/products/oe/bpxqp9.html
+ *
+ * and maps the characters of EBCDIC code page 1047 (the code used for
+ * Unix-derived software on IBM's 390 systems) to the corresponding
+ * characters from ISO 8859-1.
+ *
+ * If this table is used instead of the above one, some of the special
+ * cases for the NEL character can be taken out of the code.
+ */
+
+private unsigned char ebcdic_1047_to_8859[] = {
+0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
+0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
+0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
+0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
+0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
+0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
+0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
+0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
+0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
+0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
+0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
+0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
+0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
+0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
+0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
+};
+#endif
+
+/*
+ * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
+ */
+private void
+from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
+{
+	size_t i;
+
+	for (i = 0; i < nbytes; i++) {
+		out[i] = ebcdic_to_ascii[buf[i]];
+	}
+}

+ 99 - 86
src/file.c

@@ -2,7 +2,7 @@
  * Copyright (c) Ian F. Darwin 1986-1995.
  * Software written by Ian F. Darwin and others;
  * maintained 1995-present by Christos Zoulas and others.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -12,7 +12,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- *  
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -30,15 +30,16 @@
  */
 
 #include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: file.c,v 1.130 2009/02/03 20:27:51 christos Exp $")
+#endif	/* lint */
+
 #include "magic.h"
 
-#include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
-#include <sys/types.h>
-#include <sys/param.h>	/* for MAXPATHLEN */
-#include <sys/stat.h>
 #ifdef RESTORE_TIME
 # if (__COHERENT__ >= 0x420)
 #  include <sys/utime.h>
@@ -73,11 +74,6 @@ int getopt_long(int argc, char * const *argv, const char *optstring, const struc
 
 #include "patchlevel.h"
 
-#ifndef	lint
-FILE_RCSID("@(#)$File: file.c,v 1.121 2008/07/03 15:48:18 christos Exp $")
-#endif	/* lint */
-
-
 #ifdef S_IFLNK
 #define SYMLINKFLAG "Lh"
 #else
@@ -87,7 +83,7 @@ FILE_RCSID("@(#)$File: file.c,v 1.121 2008/07/03 15:48:18 christos Exp $")
 # define USAGE  "Usage: %s [-bcik" SYMLINKFLAG "nNrsvz0] [-e test] [-f namefile] [-F separator] [-m magicfiles] file...\n       %s -C -m magicfiles\n"
 
 #ifndef MAXPATHLEN
-#define	MAXPATHLEN	512
+#define	MAXPATHLEN	1024
 #endif
 
 private int 		/* Global command-line options 		*/
@@ -96,21 +92,45 @@ private int 		/* Global command-line options 		*/
 	nobuffer = 0,   /* Do not buffer stdout 		*/
 	nulsep = 0;	/* Append '\0' to the separator		*/
 
-private const char *magicfile = 0;	/* where the magic is	*/
 private const char *default_magicfile = MAGIC;
 private const char *separator = ":";	/* Default field separator	*/
+private	const char hmagic[] = "/.magic";
+private const struct option long_options[] = {
+#define OPT(shortname, longname, opt, doc)      \
+    {longname, opt, NULL, shortname},
+#define OPT_LONGONLY(longname, opt, doc)        \
+    {longname, opt, NULL, 0},
+#include "file_opts.h"
+#undef OPT
+#undef OPT_LONGONLY
+    {0, 0, NULL, 0}
+};
+#define OPTSTRING	"bcCde:f:F:hikLm:nNprsvz0"
 
-private char *progname;		/* used throughout 		*/
+private const struct {
+	const char *name;
+	int value;
+} nv[] = {
+	{ "apptype",	MAGIC_NO_CHECK_APPTYPE },
+	{ "ascii",	MAGIC_NO_CHECK_ASCII },
+	{ "cdf",	MAGIC_NO_CHECK_CDF },
+	{ "compress",	MAGIC_NO_CHECK_COMPRESS },
+	{ "elf",	MAGIC_NO_CHECK_ELF },
+	{ "encoding",	MAGIC_NO_CHECK_ENCODING },
+	{ "soft",	MAGIC_NO_CHECK_SOFT },
+	{ "tar",	MAGIC_NO_CHECK_TAR },
+	{ "tokens",	MAGIC_NO_CHECK_TOKENS },
+};
 
-private struct magic_set *magic;
+private char *progname;		/* used throughout 		*/
 
-private void unwrap(char *);
 private void usage(void);
 private void help(void);
-
 int main(int, char *[]);
-private void process(const char *, int);
-private void load(const char *, int);
+
+private int unwrap(struct magic_set *, const char *);
+private int process(struct magic_set *ms, const char *, int);
+private struct magic_set *load(const char *, int);
 
 
 /*
@@ -122,36 +142,12 @@ main(int argc, char *argv[])
 	int c;
 	size_t i;
 	int action = 0, didsomefiles = 0, errflg = 0;
-	int flags = 0;
+	int flags = 0, e = 0;
 	char *home, *usermagic;
-	struct stat sb;
-	static const char hmagic[] = "/.magic";
-#define OPTSTRING	"bcCde:f:F:hikLm:nNprsvz0"
+	struct magic_set *magic = NULL;
+	char magicpath[2 * MAXPATHLEN + 2];
 	int longindex;
-	static const struct option long_options[] =
-	{
-#define OPT(shortname, longname, opt, doc)      \
-    {longname, opt, NULL, shortname},
-#define OPT_LONGONLY(longname, opt, doc)        \
-    {longname, opt, NULL, 0},
-#include "file_opts.h"
-#undef OPT
-#undef OPT_LONGONLY
-    {0, 0, NULL, 0}
-};
-
-	static const struct {
-		const char *name;
-		int value;
-	} nv[] = {
-		{ "apptype",	MAGIC_NO_CHECK_APPTYPE },
-		{ "ascii",	MAGIC_NO_CHECK_ASCII },
-		{ "compress",	MAGIC_NO_CHECK_COMPRESS },
-		{ "elf",	MAGIC_NO_CHECK_ELF },
-		{ "soft",	MAGIC_NO_CHECK_SOFT },
-		{ "tar",	MAGIC_NO_CHECK_TAR },
-		{ "tokens",	MAGIC_NO_CHECK_TOKENS },
-	};
+	const char *magicfile;		/* where the magic is	*/
 
 	/* makes islower etc work for other langs */
 	(void)setlocale(LC_CTYPE, "");
@@ -171,14 +167,12 @@ main(int argc, char *argv[])
 		magicfile = usermagic;
 	else
 		if ((home = getenv("HOME")) != NULL) {
-			if ((usermagic = malloc(strlen(home)
-			    + sizeof(hmagic))) != NULL) {
-				(void)strcpy(usermagic, home);
-				(void)strcat(usermagic, hmagic);
-				if (stat(usermagic, &sb)<0) 
-					free(usermagic);
-				else
-					magicfile = usermagic;
+			(void)snprintf(magicpath, sizeof(magicpath), "%s%s",
+			     home, hmagic);
+			if (access(magicpath, R_OK) == 0) {
+				(void)snprintf(magicpath, sizeof(magicpath),
+				    "%s%s:%s", home, hmagic, magicfile);
+				magicfile = magicpath;
 			}
 		}
 
@@ -194,9 +188,12 @@ main(int argc, char *argv[])
 				help();
 				break;
 			case 10:
-				flags |= MAGIC_MIME_TYPE;
+				flags |= MAGIC_APPLE;
 				break;
 			case 11:
+				flags |= MAGIC_MIME_TYPE;
+				break;
+			case 12:
 				flags |= MAGIC_MIME_ENCODING;
 				break;
 			}
@@ -226,12 +223,14 @@ main(int argc, char *argv[])
 			else
 				flags |= nv[i].value;
 			break;
-			
+
 		case 'f':
 			if(action)
 				usage();
-			load(magicfile, flags);
-			unwrap(optarg);
+			if (magic == NULL)
+				if ((magic = load(magicfile, flags)) == NULL)
+					return 1;
+			e |= unwrap(magic, optarg);
 			++didsomefiles;
 			break;
 		case 'F':
@@ -289,10 +288,18 @@ main(int argc, char *argv[])
 	if (errflg) {
 		usage();
 	}
+	if (e)
+		return e;
 
 	switch(action) {
 	case FILE_CHECK:
 	case FILE_COMPILE:
+		/*
+		 * Don't try to check/compile ~/.magic unless we explicitly
+		 * ask for it.
+		 */
+		if (magicfile == magicpath)
+			magicfile = default_magicfile;
 		magic = magic_open(flags|MAGIC_CHECK);
 		if (magic == NULL) {
 			(void)fprintf(stderr, "%s: %s\n", progname,
@@ -304,18 +311,19 @@ main(int argc, char *argv[])
 		if (c == -1) {
 			(void)fprintf(stderr, "%s: %s\n", progname,
 			    magic_error(magic));
-			return -1;
+			return 1;
 		}
 		return 0;
 	default:
-		load(magicfile, flags);
+		if (magic == NULL)
+			if ((magic = load(magicfile, flags)) == NULL)
+				return 1;
 		break;
 	}
 
 	if (optind == argc) {
-		if (!didsomefiles) {
+		if (!didsomefiles)
 			usage();
-		}
 	}
 	else {
 		size_t j, wid, nw;
@@ -332,42 +340,43 @@ main(int argc, char *argv[])
 			bflag = optind >= argc - 1;
 		}
 		for (; optind < argc; optind++)
-			process(argv[optind], wid);
+			e |= process(magic, argv[optind], wid);
 	}
 
-	c = magic->haderr ? 1 : 0;
-	magic_close(magic);
-	return c;
+	if (magic)
+		magic_close(magic);
+	return e;
 }
 
 
-private void
+private struct magic_set *
 /*ARGSUSED*/
-load(const char *m, int flags)
+load(const char *magicfile, int flags)
 {
-	if (magic || m == NULL)
-		return;
-	magic = magic_open(flags);
+	struct magic_set *magic = magic_open(flags);
 	if (magic == NULL) {
 		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
-		exit(1);
+		return NULL;
 	}
 	if (magic_load(magic, magicfile) == -1) {
 		(void)fprintf(stderr, "%s: %s\n",
 		    progname, magic_error(magic));
-		exit(1);
+		magic_close(magic);
+		return NULL;
 	}
+	return magic;
 }
 
 /*
  * unwrap -- read a file of filenames, do each one.
  */
-private void
-unwrap(char *fn)
+private int
+unwrap(struct magic_set *ms, const char *fn)
 {
 	char buf[MAXPATHLEN];
 	FILE *f;
 	int wid = 0, cwid;
+	int e = 0;
 
 	if (strcmp("-", fn) == 0) {
 		f = stdin;
@@ -376,7 +385,7 @@ unwrap(char *fn)
 		if ((f = fopen(fn, "r")) == NULL) {
 			(void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n",
 			    progname, fn, strerror(errno));
-			exit(1);
+			return 1;
 		}
 
 		while (fgets(buf, sizeof(buf), f) != NULL) {
@@ -391,19 +400,20 @@ unwrap(char *fn)
 
 	while (fgets(buf, sizeof(buf), f) != NULL) {
 		buf[strcspn(buf, "\n")] = '\0';
-		process(buf, wid);
+		e |= process(ms, buf, wid);
 		if(nobuffer)
 			(void)fflush(stdout);
 	}
 
 	(void)fclose(f);
+	return e;
 }
 
 /*
  * Called for each input file on the command line (or in a list of files)
  */
-private void
-process(const char *inname, int wid)
+private int
+process(struct magic_set *ms, const char *inname, int wid)
 {
 	const char *type;
 	int std_in = strcmp(inname, "-") == 0;
@@ -418,11 +428,14 @@ process(const char *inname, int wid)
 		    (int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
 	}
 
-	type = magic_file(magic, std_in ? NULL : inname);
-	if (type == NULL)
-		(void)printf("ERROR: %s\n", magic_error(magic));
-	else
+	type = magic_file(ms, std_in ? NULL : inname);
+	if (type == NULL) {
+		(void)printf("ERROR: %s\n", magic_error(ms));
+		return 1;
+	} else {
 		(void)printf("%s\n", type);
+		return 0;
+	}
 }
 
 size_t
@@ -475,9 +488,9 @@ help(void)
 "Determine type of FILEs.\n"
 "\n", stderr);
 #define OPT(shortname, longname, opt, doc)      \
-        fprintf(stderr, "  -%c, --" longname doc, shortname);
+	fprintf(stderr, "  -%c, --" longname doc, shortname);
 #define OPT_LONGONLY(longname, opt, doc)        \
-        fprintf(stderr, "      --" longname doc);
+	fprintf(stderr, "      --" longname doc);
 #include "file_opts.h"
 #undef OPT
 #undef OPT_LONGONLY

+ 42 - 18
src/file.h

@@ -2,7 +2,7 @@
  * Copyright (c) Ian F. Darwin 1986-1995.
  * Software written by Ian F. Darwin and others;
  * maintained 1995-present by Christos Zoulas and others.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -12,7 +12,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- *  
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -27,7 +27,7 @@
  */
 /*
  * file.h - definitions for file(1) program
- * @(#)$File: file.h,v 1.108 2008/07/16 18:00:57 christos Exp $
+ * @(#)$File: file.h,v 1.118 2009/02/03 20:27:51 christos Exp $
  */
 
 #ifndef __file_h__
@@ -48,6 +48,7 @@
 #endif
 #include <regex.h>
 #include <sys/types.h>
+#include <sys/param.h>
 /* Do this here and now, because struct stat gets re-defined on solaris */
 #include <sys/stat.h>
 #include <stdarg.h>
@@ -103,8 +104,8 @@
 #define MAXstring 32		/* max leng of "string" types */
 
 #define MAGICNO		0xF11E041C
-#define VERSIONNO	6
-#define FILE_MAGICSIZE	(32 * 6)
+#define VERSIONNO	7
+#define FILE_MAGICSIZE	200
 
 #define	FILE_LOAD	0
 #define FILE_CHECK	1
@@ -122,7 +123,7 @@ union VALUETYPE {
 	unsigned char us[MAXstring];
 	float f;
 	double d;
-}; 
+};
 
 struct magic {
 	/* Word 1 */
@@ -134,7 +135,7 @@ struct magic {
 #define UNSIGNED	0x08	/* comparison is unsigned */
 #define NOSPACE		0x10	/* suppress space character before output */
 #define BINTEST		0x20	/* test is for a binary type (set only
-                                   for top-level tests) */
+				   for top-level tests) */
 #define TEXTTEST	0	/* for passing to file_softmagic */
 
 	uint8_t factor;
@@ -183,7 +184,10 @@ struct magic {
 #define				FILE_DOUBLE	36
 #define				FILE_BEDOUBLE	37
 #define				FILE_LEDOUBLE	38
-#define				FILE_NAMES_SIZE	39/* size of array to contain all names */
+#define				FILE_BEID3	39
+#define				FILE_LEID3	40
+#define				FILE_INDIRECT	41
+#define				FILE_NAMES_SIZE	42/* size of array to contain all names */
 
 #define IS_STRING(t) \
 	((t) == FILE_STRING || \
@@ -209,7 +213,7 @@ struct magic {
 #else
 	uint8_t dummy;
 #endif
-	uint8_t factor_op;	
+	uint8_t factor_op;
 #define		FILE_FACTOR_OP_PLUS	'+'
 #define		FILE_FACTOR_OP_MINUS	'-'
 #define		FILE_FACTOR_OP_TIMES	'*'
@@ -257,11 +261,13 @@ struct magic {
 #define str_range _u._s._count
 #define str_flags _u._s._flags
 	/* Words 9-16 */
-	union VALUETYPE value;		/* either number or string */
-	/* Words 17..31 */
+	union VALUETYPE value;	/* either number or string */
+	/* Words 17-24 */
 	char desc[MAXDESC];	/* description */
-	/* Words 32..47 */
+	/* Words 25-32 */
 	char mimetype[MAXDESC]; /* MIME type */
+	/* Words 33-34 */
+	char apple[8];
 };
 
 #define BIT(A)   (1 << (A))
@@ -302,7 +308,7 @@ struct level_info {
 	int last_match;
 	int last_cond;	/* used for error checking by parse() */
 #endif
-} *li;
+};
 struct magic_set {
 	struct mlist *mlist;
 	struct cont {
@@ -315,8 +321,9 @@ struct magic_set {
 	} o;
 	uint32_t offset;
 	int error;
-	int flags;
-	int haderr;
+	int flags;			/* Control magic tests. */
+	int event_flags;		/* Note things that happened. */
+#define 		EVENT_HAD_ERR		0x01
 	const char *file;
 	size_t line;			/* current magic line number */
 
@@ -348,11 +355,19 @@ protected int file_printf(struct magic_set *, const char *, ...)
 protected int file_reset(struct magic_set *);
 protected int file_tryelf(struct magic_set *, int, const unsigned char *,
     size_t);
+protected int file_trycdf(struct magic_set *, int, const unsigned char *,
+    size_t);
 protected int file_zmagic(struct magic_set *, int, const char *,
     const unsigned char *, size_t);
 protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
+protected int file_ascmagic_with_encoding(struct magic_set *,
+    const unsigned char *, size_t, unichar *, size_t, const char *,
+    const char *);
+protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
+    unichar **, size_t *, const char **, const char **, const char **);
 protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
-protected int file_softmagic(struct magic_set *, const unsigned char *, size_t, int);
+protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
+    int);
 protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
 protected uint64_t file_signextend(struct magic_set *, struct magic *,
     uint64_t);
@@ -397,6 +412,13 @@ int vasprintf(char **, const char *, va_list);
 int asprintf(char **ptr, const char *format_string, ...);
 #endif
 
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t siz);
+#endif
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t siz);
+#endif
+
 #if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
 #define QUICK
 #endif
@@ -407,12 +429,14 @@ int asprintf(char **ptr, const char *format_string, ...);
 
 #ifndef __cplusplus
 #ifdef __GNUC__
-static const char *rcsid(const char *) __attribute__((__used__));
-#endif
+#define FILE_RCSID(id) \
+static const char rcsid[] __attribute__((__used__)) = id;
+#else
 #define FILE_RCSID(id) \
 static const char *rcsid(const char *p) { \
 	return rcsid(p = id); \
 }
+#endif
 #else
 #define FILE_RCSID(id)
 #endif

+ 1 - 0
src/file_opts.h

@@ -28,6 +28,7 @@ OPT('f', "files-from", 1, " FILE      read the filenames to be examined from FIL
 OPT('F', "separator", 1, " STRING     use string as separator instead of `:'\n")
 OPT('i', "mime", 0, "                 output MIME type strings (--mime-type and\n"
     "                               --mime-encoding)\n")
+OPT_LONGONLY("apple", 0, "                output the Apple CREATOR/TYPE\n")
 OPT_LONGONLY("mime-type", 0, "            output the MIME type\n")
 OPT_LONGONLY("mime-encoding", 0, "        output the MIME encoding\n")
 OPT('k', "keep-going", 0, "           don't stop at the first match\n")

+ 70 - 57
src/fsmagic.c

@@ -30,13 +30,17 @@
  */
 
 #include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: fsmagic.c,v 1.59 2009/02/03 20:27:51 christos Exp $")
+#endif	/* lint */
+
 #include "magic.h"
 #include <string.h>
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 #include <stdlib.h>
-#include <sys/stat.h>
 /* Since major is a function on SVR4, we cannot use `ifndef major'.  */
 #ifdef MAJOR_IN_MKDEV
 # include <sys/mkdev.h>
@@ -56,10 +60,6 @@
 #endif
 #undef HAVE_MAJOR
 
-#ifndef	lint
-FILE_RCSID("@(#)$File: fsmagic.c,v 1.52 2008/07/25 23:59:01 rrt Exp $")
-#endif	/* lint */
-
 private int
 bad_link(struct magic_set *ms, int err, char *buf)
 {
@@ -84,6 +84,21 @@ bad_link(struct magic_set *ms, int err, char *buf)
 	return 1;
 }
 
+private int
+handle_mime(struct magic_set *ms, int mime, const char *str)
+{
+	if ((mime & MAGIC_MIME_TYPE)) {
+		if (file_printf(ms, "application/%s", str) == -1)
+			return -1;
+		if ((mime & MAGIC_MIME_ENCODING) && file_printf(ms,
+		    "; charset=") == -1)
+			return -1;
+	}
+	if ((mime & MAGIC_MIME_ENCODING) && file_printf(ms, "binary") == -1)
+		return -1;
+	return 0;
+}
+
 protected int
 file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 {
@@ -95,6 +110,8 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 	struct stat tstatbuf;
 #endif
 
+	if (ms->flags & MAGIC_APPLE)
+		return 0;
 	if (fn == NULL)
 		return 0;
 
@@ -140,11 +157,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 	
 	switch (sb->st_mode & S_IFMT) {
 	case S_IFDIR:
-		if ((mime & MAGIC_MIME_TYPE) &&
-		    file_printf(ms, "application/x-directory")
-		    == -1)
-			return -1;
-		if (!mime && file_printf(ms, "directory") == -1)
+		if (mime) {
+			if (handle_mime(ms, mime, "x-directory") == -1)
+				return -1;
+		} else if (file_printf(ms, "directory") == -1)
 			return -1;
 		return 1;
 #ifdef S_IFCHR
@@ -156,20 +172,20 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 		 */
 		if ((ms->flags & MAGIC_DEVICES) != 0)
 			break;
-		if ((mime & MAGIC_MIME_TYPE) &&
-		    file_printf(ms, "application/x-character-device")
-		    == -1)
-			return -1;
-		if (!mime) {
+		if (mime) {
+			if (handle_mime(ms, mime, "x-character-device") == -1)
+				return -1;
+		} else {
 #ifdef HAVE_STAT_ST_RDEV
 # ifdef dv_unit
 			if (file_printf(ms, "character special (%d/%d/%d)",
-					major(sb->st_rdev), dv_unit(sb->st_rdev),
+			    major(sb->st_rdev), dv_unit(sb->st_rdev),
 					dv_subunit(sb->st_rdev)) == -1)
 				return -1;
 # else
 			if (file_printf(ms, "character special (%ld/%ld)",
-					(long) major(sb->st_rdev), (long) minor(sb->st_rdev)) == -1)
+			    (long)major(sb->st_rdev), (long)minor(sb->st_rdev))
+			    == -1)
 				return -1;
 # endif
 #else
@@ -188,11 +204,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 		 */
 		if ((ms->flags & MAGIC_DEVICES) != 0)
 			break;
-		if ((mime & MAGIC_MIME_TYPE) &&
-		    file_printf(ms, "application/x-block-device")
-		    == -1)
-			return -1;
-		if (!mime) {
+		if (mime) {
+			if (handle_mime(ms, mime, "x-block-device") == -1)
+				return -1;
+		} else {
 #ifdef HAVE_STAT_ST_RDEV
 # ifdef dv_unit
 			if (file_printf(ms, "block special (%d/%d/%d)",
@@ -216,21 +231,19 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 	case S_IFIFO:
 		if((ms->flags & MAGIC_DEVICES) != 0)
 			break;
-		if ((mime & MAGIC_MIME_TYPE) &&
-		    file_printf(ms, "application/x-fifo")
-		    == -1)
-			return -1;
-		if (!mime && file_printf(ms, "fifo (named pipe)") == -1)
+		if (mime) {
+			if (handle_mime(ms, mime, "x-fifo") == -1)
+				return -1;
+		} else if (file_printf(ms, "fifo (named pipe)") == -1)
 			return -1;
 		return 1;
 #endif
 #ifdef	S_IFDOOR
 	case S_IFDOOR:
-		if ((mime & MAGIC_MIME_TYPE) &&
-		    file_printf(ms, "application/x-door")
-		    == -1)
-			return -1;
-		if (!mime && file_printf(ms, "door") == -1)
+		if (mime) {
+			if (handle_mime(ms, mime, "x-door") == -1)
+				return -1;
+		} else if (file_printf(ms, "door") == -1)
 			return -1;
 		return 1;
 #endif
@@ -242,11 +255,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 				fn);
 			    return -1;
 			}
-			if ((mime & MAGIC_MIME_TYPE) &&
-			    file_printf(ms, "application/x-symlink")
-			    == -1)
-				return -1;
-			if (!mime && file_printf(ms,
+			if (mime) {
+				if (handle_mime(ms, mime, "x-symlink") == -1)
+					return -1;
+			} else if (file_printf(ms,
 			    "unreadable symlink `%s' (%s)", fn,
 			    strerror(errno)) == -1)
 				return -1;
@@ -271,18 +283,20 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 						    "path too long: `%s'", buf);
 						return -1;
 					}
-					if ((mime & MAGIC_MIME_TYPE) &&
-					    file_printf(ms, "application/x-path-too-long")
-					    == -1)
-						return -1;
-					if (!mime && file_printf(ms,
+					if (mime) {
+						if (handle_mime(ms, mime,
+						    "x-path-too-long") == -1)
+							return -1;
+					} else if (file_printf(ms,
 					    "path too long: `%s'", fn) == -1)
 						return -1;
 					return 1;
 				}
-				(void)strcpy(buf2, fn);  /* take dir part */
+				/* take dir part */
+				(void)strlcpy(buf2, fn, sizeof buf2);
 				buf2[tmp - fn + 1] = '\0';
-				(void)strcat(buf2, buf); /* plus (rel) link */
+				/* plus (rel) link */
+				(void)strlcat(buf2, buf, sizeof buf2);
 				tmp = buf2;
 			}
 			if (stat(tmp, &tstatbuf) < 0)
@@ -297,11 +311,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 			ms->flags |= MAGIC_SYMLINK;
 			return p != NULL ? 1 : -1;
 		} else { /* just print what it points to */
-			if ((mime & MAGIC_MIME_TYPE) &&
-			    file_printf(ms, "application/x-symlink")
-			    == -1)
-				return -1;
-			if (!mime && file_printf(ms, "symbolic link to `%s'",
+			if (mime) {
+				if (handle_mime(ms, mime, "x-symlink") == -1)
+					return -1;
+			} else if (file_printf(ms, "symbolic link to `%s'",
 			    buf) == -1)
 				return -1;
 		}
@@ -310,11 +323,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 #ifdef	S_IFSOCK
 #ifndef __COHERENT__
 	case S_IFSOCK:
-		if ((mime & MAGIC_MIME_TYPE) &&
-		    file_printf(ms, "application/x-socket")
-		    == -1)
-			return -1;
-		if (!mime && file_printf(ms, "socket") == -1)
+		if (mime) {
+			if (handle_mime(ms, mime, "x-socket") == -1)
+				return -1;
+		} else if (file_printf(ms, "socket") == -1)
 			return -1;
 		return 1;
 #endif
@@ -340,9 +352,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
 	 * when we read the file.)
 	 */
 	if ((ms->flags & MAGIC_DEVICES) == 0 && sb->st_size == 0) {
-		if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
-		    file_printf(ms, mime ? "application/x-empty" :
-		    "empty") == -1)
+		if (mime) {
+			if (handle_mime(ms, mime, "x-empty") == -1)
+				return -1;
+		} else if (file_printf(ms, "empty") == -1)
 			return -1;
 		return 1;
 	}

+ 116 - 45
src/funcs.c

@@ -1,7 +1,7 @@
 /*
  * Copyright (c) Christos Zoulas 2003.
  * All Rights Reserved.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -11,7 +11,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- *  
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,6 +25,11 @@
  * SUCH DAMAGE.
  */
 #include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: funcs.c,v 1.51 2008/11/07 18:57:28 christos Exp $")
+#endif	/* lint */
+
 #include "magic.h"
 #include <stdarg.h>
 #include <stdlib.h>
@@ -40,10 +45,6 @@
 #include <limits.h>
 #endif
 
-#ifndef	lint
-FILE_RCSID("@(#)$File: funcs.c,v 1.44 2008/07/16 18:00:57 christos Exp $")
-#endif	/* lint */
-
 #ifndef SIZE_MAX
 #define SIZE_MAX	((size_t)~0)
 #endif
@@ -97,17 +98,17 @@ file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
     uint32_t lineno)
 {
 	/* Only the first error is ok */
-	if (ms->haderr)
+	if (ms->event_flags & EVENT_HAD_ERR)
 		return;
 	if (lineno != 0) {
 		free(ms->o.buf);
 		ms->o.buf = NULL;
 		file_printf(ms, "line %u: ", lineno);
 	}
-        file_vprintf(ms, f, va);
+	file_vprintf(ms, f, va);
 	if (error > 0)
 		file_printf(ms, " (%s)", strerror(error));
-	ms->haderr++;
+	ms->event_flags |= EVENT_HAD_ERR;
 	ms->error = error;
 }
 
@@ -157,9 +158,16 @@ protected int
 file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
     size_t nb)
 {
-	int m;
+	int m = 0, rv = 0, looks_text = 0;
 	int mime = ms->flags & MAGIC_MIME;
 	const unsigned char *ubuf = CAST(const unsigned char *, buf);
+	unichar *u8buf = NULL;
+	size_t ulen;
+	const char *code = NULL;
+	const char *code_mime = "binary";
+	const char *type = NULL;
+
+
 
 	if (nb == 0) {
 		if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
@@ -175,6 +183,11 @@ file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
 		return 1;
 	}
 
+	if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
+		looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen,
+		    &code, &code_mime, &type);
+	}
+
 #ifdef __EMX__
 	if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
 		switch (file_os2_apptype(ms, inname, buf, nb)) {
@@ -189,41 +202,96 @@ file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
 #endif
 
 	/* try compression stuff */
-	if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) != 0 ||
-	    (m = file_zmagic(ms, fd, inname, ubuf, nb)) == 0) {
-	    /* Check if we have a tar file */
-	    if ((ms->flags & MAGIC_NO_CHECK_TAR) != 0 ||
-		(m = file_is_tar(ms, ubuf, nb)) == 0) {
-		/* try tests in /etc/magic (or surrogate magic file) */
-		if ((ms->flags & MAGIC_NO_CHECK_SOFT) != 0 ||
-		    (m = file_softmagic(ms, ubuf, nb, BINTEST)) == 0) {
-		    /* try known keywords, check whether it is ASCII */
-		    if ((ms->flags & MAGIC_NO_CHECK_ASCII) != 0 ||
-			(m = file_ascmagic(ms, ubuf, nb)) == 0) {
-			/* abandon hope, all ye who remain here */
-			if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
-			    file_printf(ms, mime ? "application/octet-stream" :
-				"data") == -1)
-				return -1;
-			m = 1;
-		    }
+	if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0)
+		if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) {
+			if ((ms->flags & MAGIC_DEBUG) != 0)
+				(void)fprintf(stderr, "zmagic %d\n", m);
+			goto done;
 		}
-	    }
-	}
+
+	/* Check if we have a tar file */
+	if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0)
+		if ((m = file_is_tar(ms, ubuf, nb)) != 0) {
+			if ((ms->flags & MAGIC_DEBUG) != 0)
+				(void)fprintf(stderr, "tar %d\n", m);
+			goto done;
+		}
+
+	/* Check if we have a CDF file */
+	if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0)
+		if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) {
+			if ((ms->flags & MAGIC_DEBUG) != 0)
+				(void)fprintf(stderr, "cdf %d\n", m);
+			goto done;
+		}
+
+	/* try soft magic tests */
+	if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0)
+		if ((m = file_softmagic(ms, ubuf, nb, BINTEST)) != 0) {
+			if ((ms->flags & MAGIC_DEBUG) != 0)
+				(void)fprintf(stderr, "softmagic %d\n", m);
 #ifdef BUILTIN_ELF
-	if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 &&
-	    nb > 5 && fd != -1) {
-		/*
-		 * We matched something in the file, so this *might*
-		 * be an ELF file, and the file is at least 5 bytes
-		 * long, so if it's an ELF file it has at least one
-		 * byte past the ELF magic number - try extracting
-		 * information from the ELF headers that cannot easily
-		 * be extracted with rules in the magic file.
-		 */
-		(void)file_tryelf(ms, fd, ubuf, nb);
-	}
+			if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 &&
+			    nb > 5 && fd != -1) {
+				/*
+				 * We matched something in the file, so this
+				 * *might* be an ELF file, and the file is at
+				 * least 5 bytes long, so if it's an ELF file
+				 * it has at least one byte past the ELF magic
+				 * number - try extracting information from the
+				 * ELF headers that cannot easily * be
+				 * extracted with rules in the magic file.
+				 */
+				if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0)
+					if ((ms->flags & MAGIC_DEBUG) != 0)
+						(void)fprintf(stderr,
+						    "elf %d\n", m);
+			}
 #endif
+			goto done;
+		}
+
+	/* try text properties (and possibly text tokens) */
+	if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) {
+
+		if ((m = file_ascmagic(ms, ubuf, nb)) != 0) {
+			if ((ms->flags & MAGIC_DEBUG) != 0)
+				(void)fprintf(stderr, "ascmagic %d\n", m);
+			goto done;
+		}
+
+		/* try to discover text encoding */
+		if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
+			if (looks_text == 0)
+				if ((m = file_ascmagic_with_encoding( ms, ubuf,
+				    nb, u8buf, ulen, code, type)) != 0) {
+					if ((ms->flags & MAGIC_DEBUG) != 0)
+						(void)fprintf(stderr,
+						    "ascmagic/enc %d\n", m);
+					goto done;
+				}
+		}
+	}
+
+	/* give up */
+	m = 1;
+	if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
+	    file_printf(ms, mime ? "application/octet-stream" : "data") == -1) {
+	    rv = -1;
+	}
+ done:
+	if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
+		if (ms->flags & MAGIC_MIME_TYPE)
+			if (file_printf(ms, "; charset=") == -1)
+				rv = -1;
+		if (file_printf(ms, "%s", code_mime) == -1)
+			rv = -1;
+	}
+	if (u8buf)
+		free(u8buf);
+	if (rv)
+		return rv;
+
 	return m;
 }
 #endif
@@ -236,7 +304,7 @@ file_reset(struct magic_set *ms)
 		return -1;
 	}
 	ms->o.buf = NULL;
-	ms->haderr = 0;
+	ms->event_flags &= ~EVENT_HAD_ERR;
 	ms->error = -1;
 	return 0;
 }
@@ -255,12 +323,15 @@ file_getbuffer(struct magic_set *ms)
 	char *pbuf, *op, *np;
 	size_t psize, len;
 
-	if (ms->haderr)
+	if (ms->event_flags & EVENT_HAD_ERR)
 		return NULL;
 
 	if (ms->flags & MAGIC_RAW)
 		return ms->o.buf;
 
+	if (ms->o.buf == NULL)
+		return NULL;
+
 	/* * 4 is for octal representation, + 1 is for NUL */
 	len = strlen(ms->o.buf);
 	if (len > (SIZE_MAX - 1) / 4) {
@@ -315,7 +386,7 @@ file_getbuffer(struct magic_set *ms)
 
 	for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) {
 		if (isprint((unsigned char)*op)) {
-			*np++ = *op;	
+			*np++ = *op;
 		} else {
 			OCTALIFY(np, op);
 		}

+ 6 - 4
src/getopt_long.c

@@ -29,14 +29,16 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
+#include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: getopt_long.c,v 1.5 2009/02/03 20:27:51 christos Exp $")
+#endif	/* lint */
+
 #include <assert.h>
 #ifdef HAVE_ERR_H
 #include <err.h>
 #else
-#include <stdio.h>
 #define warnx printf
 #endif
 #include <errno.h>

+ 10 - 9
src/is_tar.c

@@ -38,16 +38,16 @@
  */
 
 #include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: is_tar.c,v 1.36 2009/02/03 20:27:51 christos Exp $")
+#endif
+
 #include "magic.h"
 #include <string.h>
 #include <ctype.h>
-#include <sys/types.h>
 #include "tar.h"
 
-#ifndef lint
-FILE_RCSID("@(#)$File: is_tar.c,v 1.31 2008/02/04 20:51:17 christos Exp $")
-#endif
-
 #define	isodigit(c)	( ((c) >= '0') && ((c) <= '7') )
 
 private int is_tar(const unsigned char *, size_t);
@@ -66,16 +66,17 @@ file_is_tar(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 	 * Do the tar test first, because if the first file in the tar
 	 * archive starts with a dot, we can confuse it with an nroff file.
 	 */
-	int tar = is_tar(buf, nbytes);
+	int tar;
 	int mime = ms->flags & MAGIC_MIME;
 
-	if (tar < 1 || tar > 3)
+	if ((ms->flags & MAGIC_APPLE) != 0)
 		return 0;
 
-	if (mime == MAGIC_MIME_ENCODING)
+	tar = is_tar(buf, nbytes);
+	if (tar < 1 || tar > 3)
 		return 0;
 
-	if (file_printf(ms, mime ? "application/x-tar" :
+	if (file_printf(ms, "%s", mime ? "application/x-tar" :
 	    tartype[tar - 1]) == -1)
 		return -1;
 	return 1;

+ 12 - 14
src/magic.c

@@ -26,15 +26,16 @@
  */
 
 #include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: magic.c,v 1.59 2009/02/03 20:27:51 christos Exp $")
+#endif	/* lint */
+
 #include "magic.h"
 
-#include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
-#include <sys/types.h>
-#include <sys/param.h>	/* for MAXPATHLEN */
-#include <sys/stat.h>
 #ifdef QUICK
 #include <sys/mman.h>
 #endif
@@ -64,10 +65,6 @@
 
 #include "patchlevel.h"
 
-#ifndef	lint
-FILE_RCSID("@(#)$File: magic.c,v 1.54 2008/07/25 23:30:32 rrt Exp $")
-#endif	/* lint */
-
 #ifndef PIPE_BUF
 /* Get the PIPE_BUF from pathconf */
 #ifdef _PC_PIPE_BUF
@@ -116,7 +113,7 @@ magic_open(int flags)
 	if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
 		goto free;
 
-	ms->haderr = 0;
+	ms->event_flags = 0;
 	ms->error = -1;
 	ms->mlist = NULL;
 	ms->file = "unknown";
@@ -229,7 +226,7 @@ close_and_restore(const struct magic_set *ms, const char *name, int fd,
 #elif defined(HAVE_UTIME_H) || defined(HAVE_SYS_UTIME_H)
 		struct utimbuf  utbuf;
 
-		(void)memset(utbuf, 0, sizeof(utbuf));
+		(void)memset(&utbuf, 0, sizeof(utbuf));
 		utbuf.actime = sb->st_atime;
 		utbuf.modtime = sb->st_mtime;
 		(void) utime(name, &utbuf); /* don't care if loses */
@@ -302,8 +299,9 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd)
 		if ((fd = open(inname, flags)) < 0) {
 #ifdef __CYGWIN__
 			/* FIXME: Do this with EXEEXT from autotools */
-			char *tmp = alloca(strlen(inname) + 5);
-			(void)strcat(strcpy(tmp, inname), ".exe");
+			size_t len = strlen(inname) + 5;
+			char *tmp = alloca(len);
+			(void)strlcat(strlcpy(tmp, inname, len), ".exe", len);
 			if ((fd = open(tmp, flags)) < 0) {
 #endif
 				if (unreadable_info(ms, sb.st_mode,
@@ -385,13 +383,13 @@ magic_buffer(struct magic_set *ms, const void *buf, size_t nb)
 public const char *
 magic_error(struct magic_set *ms)
 {
-	return ms->haderr ? ms->o.buf : NULL;
+	return (ms->event_flags & EVENT_HAD_ERR) ? ms->o.buf : NULL;
 }
 
 public int
 magic_errno(struct magic_set *ms)
 {
-	return ms->haderr ? ms->error : 0;
+	return (ms->event_flags & EVENT_HAD_ERR) ? ms->error : 0;
 }
 
 public int

+ 13 - 7
src/magic.h

@@ -1,7 +1,7 @@
 /*
  * Copyright (c) Christos Zoulas 2003.
  * All Rights Reserved.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -11,7 +11,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- *  
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -34,21 +34,27 @@
 #define	MAGIC_SYMLINK		0x000002 /* Follow symlinks */
 #define	MAGIC_COMPRESS		0x000004 /* Check inside compressed files */
 #define	MAGIC_DEVICES		0x000008 /* Look at the contents of devices */
-#define	MAGIC_MIME_TYPE		0x000010 /* Return only the MIME type */
+#define	MAGIC_MIME_TYPE		0x000010 /* Return the MIME type */
 #define	MAGIC_CONTINUE		0x000020 /* Return all matches */
 #define	MAGIC_CHECK		0x000040 /* Print warnings to stderr */
 #define	MAGIC_PRESERVE_ATIME	0x000080 /* Restore access time on exit */
-#define	MAGIC_RAW		0x000100 /* Don't translate unprint chars */
+#define	MAGIC_RAW		0x000100 /* Don't translate unprintable chars */
 #define	MAGIC_ERROR		0x000200 /* Handle ENOENT etc as real errors */
-#define	MAGIC_MIME_ENCODING	0x000400 /* Return only the MIME encoding */
+#define	MAGIC_MIME_ENCODING	0x000400 /* Return the MIME encoding */
 #define MAGIC_MIME		(MAGIC_MIME_TYPE|MAGIC_MIME_ENCODING)
+#define	MAGIC_APPLE		0x000800 /* Return the Apple creator and type */
 #define	MAGIC_NO_CHECK_COMPRESS	0x001000 /* Don't check for compressed files */
 #define	MAGIC_NO_CHECK_TAR	0x002000 /* Don't check for tar files */
 #define	MAGIC_NO_CHECK_SOFT	0x004000 /* Don't check magic entries */
 #define	MAGIC_NO_CHECK_APPTYPE	0x008000 /* Don't check application type */
 #define	MAGIC_NO_CHECK_ELF	0x010000 /* Don't check for elf details */
-#define	MAGIC_NO_CHECK_ASCII	0x020000 /* Don't check for ascii files */
-#define	MAGIC_NO_CHECK_TOKENS	0x100000 /* Don't check ascii/tokens */
+#define	MAGIC_NO_CHECK_TEXT	0x020000 /* Don't check for text files */
+#define	MAGIC_NO_CHECK_CDF	0x040000 /* Don't check for cdf files */
+#define	MAGIC_NO_CHECK_TOKENS	0x100000 /* Don't check tokens */
+#define MAGIC_NO_CHECK_ENCODING 0x200000 /* Don't check text encodings */
+
+/* Defined for backwards compatibility (renamed) */
+#define	MAGIC_NO_CHECK_ASCII	MAGIC_NO_CHECK_TEXT
 
 /* Defined for backwards compatibility; do nothing */
 #define	MAGIC_NO_CHECK_FORTRAN	0x000000 /* Don't check ascii/fortran */

+ 6 - 3
src/patchlevel.h

@@ -1,11 +1,14 @@
-#define	FILE_VERSION_MAJOR	4
-#define	patchlevel		26
+#define	FILE_VERSION_MAJOR	5
+#define	patchlevel		0
 
 /*
  * Patchlevel file for Ian Darwin's MAGIC command.
- * $File: patchlevel.h,v 1.70 2008/08/30 10:01:01 christos Exp $
+ * $File: patchlevel.h,v 1.71 2009/01/21 19:09:42 christos Exp $
  *
  * $Log: patchlevel.h,v $
+ * Revision 1.71  2009/01/21 19:09:42  christos
+ * file 5.0
+ *
  * Revision 1.70  2008/08/30 10:01:01  christos
  * file 4.26
  *

+ 8 - 8
src/print.c

@@ -30,8 +30,11 @@
  */
 
 #include "file.h"
-#include <stdio.h>
-#include <errno.h>
+
+#ifndef lint
+FILE_RCSID("@(#)$File: print.c,v 1.66 2009/02/03 20:27:51 christos Exp $")
+#endif  /* lint */
+
 #include <string.h>
 #include <stdarg.h>
 #include <stdlib.h>
@@ -40,10 +43,6 @@
 #endif
 #include <time.h>
 
-#ifndef lint
-FILE_RCSID("@(#)$File: print.c,v 1.63 2008/02/17 19:28:54 rrt Exp $")
-#endif  /* lint */
-
 #define SZOF(a)	(sizeof(a) / sizeof(a[0]))
 
 #ifndef COMPILE_ONLY
@@ -64,7 +63,8 @@ file_mdump(struct magic *m)
 		if (m->in_op & FILE_OPINVERSE)
 			(void) fputc('~', stderr);
 		(void) fprintf(stderr, "%c%u),",
-			       ((m->in_op & FILE_OPS_MASK) < SZOF(optyp)) ? 
+			       ((size_t)(m->in_op & FILE_OPS_MASK) <
+			       SZOF(optyp)) ? 
 					optyp[m->in_op & FILE_OPS_MASK] : '?',
 				m->in_offset);
 	}
@@ -93,7 +93,7 @@ file_mdump(struct magic *m)
 			(void) fprintf(stderr, "/%u", m->str_range);
 	}
 	else {
-		if ((m->mask_op & FILE_OPS_MASK) < SZOF(optyp))
+		if ((size_t)(m->mask_op & FILE_OPS_MASK) < SZOF(optyp))
 			(void) fputc(optyp[m->mask_op & FILE_OPS_MASK], stderr);
 		else
 			(void) fputc('?', stderr);

+ 256 - 0
src/readcdf.c

@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 2008 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: readcdf.c,v 1.11 2009/02/03 20:27:51 christos Exp $")
+#endif
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+
+#include "cdf.h"
+#include "magic.h"
+
+#define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
+
+private int
+cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
+    size_t count)
+{
+	size_t i;
+	cdf_timestamp_t tp;
+	struct timespec ts;
+	char buf[64];
+	const char *str = "vnd.ms-office";
+	const char *s;
+	int len;
+
+	for (i = 0; i < count; i++) {
+		cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
+		switch (info[i].pi_type) {
+		case CDF_SIGNED16:
+			if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
+			    info[i].pi_s16) == -1)
+				return -1;
+			break;
+		case CDF_SIGNED32:
+			if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
+			    info[i].pi_s32) == -1)
+				return -1;
+			break;
+		case CDF_UNSIGNED32:
+			if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
+			    info[i].pi_u32) == -1)
+				return -1;
+			break;
+		case CDF_LENGTH32_STRING:
+			len = info[i].pi_str.s_len;
+			if (len > 1) {
+				s = info[i].pi_str.s_buf;
+				if (NOTMIME(ms)) {
+					if (file_printf(ms, ", %s: %.*s", buf,
+					    len, s) == -1)
+						return -1;
+				} else if (info[i].pi_id == 
+					CDF_PROPERTY_NAME_OF_APPLICATION) {
+					if (strstr(s, "Word"))
+						str = "msword";
+					else if (strstr(s, "Excel"))
+						str = "vnd.ms-excel";
+					else if (strstr(s, "Powerpoint"))
+						str = "vnd.ms-powerpoint";
+				}
+			}
+			break;
+		case CDF_FILETIME:
+			tp = info[i].pi_tp;
+			if (tp != 0) {
+				if (tp < 1000000000000000LL) {
+					char tbuf[64];
+					cdf_print_elapsed_time(tbuf,
+					    sizeof(tbuf), tp);
+					if (NOTMIME(ms) && file_printf(ms,
+					    ", %s: %s", buf, tbuf) == -1)
+						return -1;
+				} else {
+					char *c, *ec;
+					cdf_timestamp_to_timespec(&ts, tp);
+					c = ctime(&ts.tv_sec);
+					if ((ec = strchr(c, '\n')) != NULL)
+						*ec = '\0';
+
+					if (NOTMIME(ms) && file_printf(ms,
+					    ", %s: %s", buf, c) == -1)
+						return -1;
+				}
+			}
+			break;
+		case CDF_CLIPBOARD:
+			break;
+		default:
+			file_error(ms, 0, "Internal parsing error");
+			return -1;
+		}
+	}
+	if (!NOTMIME(ms)) {
+		if (file_printf(ms, "application/%s", str) == -1)
+			return -1;
+	}
+	return 1;
+}
+
+private int
+cdf_file_summary_info(struct magic_set *ms, const cdf_stream_t *sst)
+{
+	cdf_summary_info_header_t si;
+	cdf_property_info_t *info;
+	size_t count;
+	int m;
+
+	if (cdf_unpack_summary_info(sst, &si, &info, &count) == -1) {
+		if (si.si_byte_order != 0xfffe)
+			return 0;
+		else
+			return -1;
+	}
+
+	if (si.si_byte_order != 0xfffe)
+		return 0;
+
+	if (NOTMIME(ms)) {
+		if (file_printf(ms, "CDF V2 Document") == -1)
+			return -1;
+
+		if (file_printf(ms, ", %s Endian",
+		    si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
+			return -1;
+		switch (si.si_os) {
+		case 2:
+			if (file_printf(ms, ", Os: Windows, Version %d.%d",
+			    si.si_os_version & 0xff, si.si_os_version >> 8)
+			    == -1)
+				return -1;
+			break;
+		case 1:
+			if (file_printf(ms, ", Os: MacOS, Version %d.%d",
+			    si.si_os_version >> 8, si.si_os_version & 0xff)
+			    == -1)
+				return -1;
+			break;
+		default:
+			if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
+			    si.si_os_version & 0xff, si.si_os_version >> 8)
+			    == -1)
+				return -1;
+			break;
+		}
+	}
+
+	m = cdf_file_property_info(ms, info, count);
+	free(info);
+
+	return m;
+}
+
+protected int
+file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
+    size_t nbytes)
+{
+	cdf_header_t h;
+	cdf_sat_t sat, ssat;
+	cdf_stream_t sst, scn;
+	cdf_dir_t dir;
+	int i;
+	(void)&nbytes;
+	(void)&buf;
+
+	if (ms->flags & MAGIC_APPLE)
+		return 0;
+	if (cdf_read_header(fd, &h) == -1)
+		return 0;
+#ifdef CDF_DEBUG
+	cdf_dump_header(&h);
+#endif
+
+	if (cdf_read_sat(fd, &h, &sat) == -1) {
+		file_error(ms, errno, "Can't read SAT");
+		return -1;
+	}
+#ifdef CDF_DEBUG
+	cdf_dump_sat("SAT", &h, &sat);
+#endif
+
+	if ((i = cdf_read_ssat(fd, &h, &sat, &ssat)) == -1) {
+		file_error(ms, errno, "Can't read SAT");
+		goto out1;
+	}
+#ifdef CDF_DEBUG
+	cdf_dump_sat("SSAT", &h, &ssat);
+#endif
+
+	if ((i = cdf_read_dir(fd, &h, &sat, &dir)) == -1) {
+		file_error(ms, errno, "Can't read directory");
+		goto out2;
+	}
+
+	if ((i = cdf_read_short_stream(fd, &h, &sat, &dir, &sst)) == -1) {
+		file_error(ms, errno, "Cannot read short stream");
+		goto out3;
+	}
+
+#ifdef CDF_DEBUG
+	cdf_dump_dir(fd, &h, &sat, &ssat, &sst, &dir);
+#endif
+	if ((i = cdf_read_summary_info(fd, &h, &sat, &ssat, &sst, &dir, &scn))
+	    == -1) {
+		/* Some files don't have summary info! */
+#ifdef notyet
+		file_error(ms, errno, "Can't read summary_info");
+#else
+		i = 0;
+#endif
+		goto out4;
+	}
+#ifdef CDF_DEBUG
+	cdf_dump_summary_info(&h, &scn);
+#endif
+	if ((i = cdf_file_summary_info(ms, &scn)) == -1)
+		file_error(ms, errno, "Can't expand summary_info");
+	free(scn.sst_tab);
+out4:
+	free(sst.sst_tab);
+out3:
+	free(dir.dir_tab);
+out2:
+	free(ssat.sat_tab);
+out1:
+	free(sat.sat_tab);
+	return i;
+}

+ 15 - 12
src/readelf.c

@@ -26,6 +26,10 @@
  */
 #include "file.h"
 
+#ifndef lint
+FILE_RCSID("@(#)$File: readelf.c,v 1.81 2008/11/04 16:38:28 christos Exp $")
+#endif
+
 #ifdef BUILTIN_ELF
 #include <string.h>
 #include <ctype.h>
@@ -37,10 +41,6 @@
 #include "readelf.h"
 #include "magic.h"
 
-#ifndef lint
-FILE_RCSID("@(#)$File: readelf.c,v 1.76 2008/07/16 18:00:57 christos Exp $")
-#endif
-
 #ifdef	ELFCORE
 private int dophn_core(struct magic_set *, int, int, int, off_t, int, size_t,
     off_t, int *);
@@ -875,7 +875,7 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
 
 			noff = 0;
 			for (;;) {
-				if (noff >= (size_t)xsh_size)
+				if (noff >= (off_t)xsh_size)
 					break;
 				noff = donote(ms, nbuf, (size_t)noff,
 				    (size_t)xsh_size, clazz, swap, 4,
@@ -907,8 +907,9 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
 			for (;;) {
 				Elf32_Cap cap32;
 				Elf64_Cap cap64;
-				char cbuf[MAX(sizeof cap32, sizeof cap64)];
-				if ((coff += xcap_sizeof) >= (size_t)xsh_size)
+				char cbuf[/*CONSTCOND*/
+				    MAX(sizeof cap32, sizeof cap64)];
+				if ((coff += xcap_sizeof) >= (off_t)xsh_size)
 					break;
 				if (read(fd, cbuf, (size_t)xcap_sizeof) !=
 				    (ssize_t)xcap_sizeof) {
@@ -929,7 +930,8 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
 					if (file_printf(ms,
 					    ", with unknown capability "
 					    "0x%llx = 0x%llx",
-					    xcap_tag, xcap_val) == -1)
+					    (unsigned long long)xcap_tag,
+					    (unsigned long long)xcap_val) == -1)
 						return -1;
 					break;
 				}
@@ -976,11 +978,12 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
 			if (cap_hw1)
 				if (file_printf(ms,
 				    " unknown hardware capability 0x%llx",
-				    cap_hw1) == -1)
+				    (unsigned long long)cap_hw1) == -1)
 					return -1;
 		} else {
 			if (file_printf(ms,
-			    " hardware capability 0x%llx", cap_hw1) == -1)
+			    " hardware capability 0x%llx",
+			    (unsigned long long)cap_hw1) == -1)
 				return -1;
 		}
 	}
@@ -996,7 +999,7 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
 		if (cap_sf1)
 			if (file_printf(ms,
 			    ", with unknown software capability 0x%llx",
-			    cap_sf1) == -1)
+			    (unsigned long long)cap_sf1) == -1)
 				return -1;
 	}
 	return 0;
@@ -1138,7 +1141,7 @@ file_tryelf(struct magic_set *ms, int fd, const unsigned char *buf,
 	Elf64_Ehdr elf64hdr;
 	uint16_t type;
 
-	if (ms->flags & MAGIC_MIME)
+	if (ms->flags & (MAGIC_MIME|MAGIC_APPLE))
 		return 0;
 	/*
 	 * ELF executables have multiple section headers in arbitrary

+ 273 - 95
src/softmagic.c

@@ -2,7 +2,7 @@
  * Copyright (c) Ian F. Darwin 1986-1995.
  * Software written by Ian F. Darwin and others;
  * maintained 1995-present by Christos Zoulas and others.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -12,7 +12,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- *  
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -30,6 +30,11 @@
  */
 
 #include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: softmagic.c,v 1.133 2008/11/07 22:50:37 christos Exp $")
+#endif	/* lint */
+
 #include "magic.h"
 #include <string.h>
 #include <ctype.h>
@@ -37,33 +42,25 @@
 #include <time.h>
 
 
-#ifndef	lint
-FILE_RCSID("@(#)$File: softmagic.c,v 1.120 2008/07/28 17:25:21 christos Exp $")
-#endif	/* lint */
-
 private int match(struct magic_set *, struct magic *, uint32_t,
     const unsigned char *, size_t, int);
 private int mget(struct magic_set *, const unsigned char *,
     struct magic *, size_t, unsigned int);
 private int magiccheck(struct magic_set *, struct magic *);
 private int32_t mprint(struct magic_set *, struct magic *);
+private int32_t moffset(struct magic_set *, struct magic *);
 private void mdebug(uint32_t, const char *, size_t);
 private int mcopy(struct magic_set *, union VALUETYPE *, int, int,
     const unsigned char *, uint32_t, size_t, size_t);
 private int mconvert(struct magic_set *, struct magic *);
 private int print_sep(struct magic_set *, int);
+private int handle_annotation(struct magic_set *, struct magic *);
 private void cvt_8(union VALUETYPE *, const struct magic *);
 private void cvt_16(union VALUETYPE *, const struct magic *);
 private void cvt_32(union VALUETYPE *, const struct magic *);
 private void cvt_64(union VALUETYPE *, const struct magic *);
 
 /*
- * Macro to give description string according to whether we want plain
- * text or MIME type
- */
-#define MAGIC_DESC ((ms->flags & MAGIC_MIME) ? m->mimetype : m->desc)
-
-/*
  * softmagic - lookup one file in parsed, in-memory copy of database
  * Passed the name and FILE * of one file to be typed.
  */
@@ -114,15 +111,16 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 	uint32_t magindex = 0;
 	unsigned int cont_level = 0;
 	int need_separator = 0;
-	int returnval = 0; /* if a match is found it is set to 1*/
+	int returnval = 0, e; /* if a match is found it is set to 1*/
 	int firstline = 1; /* a flag to print X\n  X\n- X */
 	int printed_something = 0;
+	int print = (ms->flags & (MAGIC_MIME|MAGIC_APPLE)) == 0;
 
 	if (file_check_mem(ms, cont_level) == -1)
 		return -1;
 
 	for (magindex = 0; magindex < nmagic; magindex++) {
-		int flush;
+		int flush = 0;
 		struct magic *m = &magic[magindex];
 
 		if ((m->flag & BINTEST) != mode) {
@@ -137,11 +135,16 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 		ms->line = m->lineno;
 
 		/* if main entry matches, print it... */
-		flush = !mget(ms, s, m, nbytes, cont_level);
-		if (flush) {
-			if (m->reln == '!')
-				flush = 0;
-		} else {	
+		switch (mget(ms, s, m, nbytes, cont_level)) {
+		case -1:
+			return -1;
+		case 0:
+			flush = m->reln != '!';
+			break;
+		default:
+			if (m->type == FILE_INDIRECT)
+				returnval = 1;
+				
 			switch (magiccheck(ms, m)) {
 			case -1:
 				return -1;
@@ -149,11 +152,13 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 				flush++;
 				break;
 			default:
+				flush = 0;
 				break;
 			}
+			break;
 		}
 		if (flush) {
-			/* 
+			/*
 			 * main entry didn't match,
 			 * flush its continuations
 			 */
@@ -167,16 +172,21 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 		 * If we are going to print something, we'll need to print
 		 * a blank before we print something else.
 		 */
-		if (*MAGIC_DESC) {
+		if (*m->desc) {
 			need_separator = 1;
 			printed_something = 1;
+			if ((e = handle_annotation(ms, m)) != 0)
+				return e;
 			if (print_sep(ms, firstline) == -1)
 				return -1;
 		}
 
-		if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1)
+
+		if (print && mprint(ms, m) == -1)
 			return -1;
 
+		ms->c.li[cont_level].off = moffset(ms, m);
+
 		/* and any continuations that match */
 		if (file_check_mem(ms, ++cont_level) == -1)
 			return -1;
@@ -208,10 +218,21 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 					continue;
 			}
 #endif
-			flush = !mget(ms, s, m, nbytes, cont_level);
-			if (flush && m->reln != '!')
-				continue;
-				
+			switch (mget(ms, s, m, nbytes, cont_level)) {
+			case -1:
+				return -1;
+			case 0:
+				if (m->reln != '!')
+					continue;
+				flush = 1;
+				break;
+			default:
+				if (m->type == FILE_INDIRECT)
+					returnval = 1;
+				flush = 0;
+				break;
+			}
+
 			switch (flush ? 1 : magiccheck(ms, m)) {
 			case -1:
 				return -1;
@@ -234,8 +255,10 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 				 * If we are going to print something,
 				 * make sure that we have a separator first.
 				 */
-				if (*MAGIC_DESC) {
+				if (*m->desc) {
 					printed_something = 1;
+					if ((e = handle_annotation(ms, m)) != 0)
+						return e;
 					if (print_sep(ms, firstline) == -1)
 						return -1;
 				}
@@ -248,14 +271,18 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 				/* space if previous printed */
 				if (need_separator
 				    && ((m->flag & NOSPACE) == 0)
-				    && *MAGIC_DESC) {
-					if (file_printf(ms, " ") == -1)
+				    && *m->desc) {
+					if (print &&
+					    file_printf(ms, " ") == -1)
 						return -1;
 					need_separator = 0;
 				}
-				if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1)
+				if (print && mprint(ms, m) == -1)
 					return -1;
-				if (*MAGIC_DESC)
+
+				ms->c.li[cont_level].off = moffset(ms, m);
+
+				if (*m->desc)
 					need_separator = 1;
 
 				/*
@@ -270,11 +297,12 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 		}
 		if (printed_something) {
 			firstline = 0;
-			returnval = 1;
+			if (print)
+				returnval = 1;
 		}
 		if ((ms->flags & MAGIC_CONTINUE) == 0 && printed_something) {
-			return 1; /* don't keep searching */
-		}			
+			return returnval; /* don't keep searching */
+		}
 	}
 	return returnval;  /* This is hit if -k is set or there is no match */
 }
@@ -285,7 +313,7 @@ check_fmt(struct magic_set *ms, struct magic *m)
 	regex_t rx;
 	int rc;
 
-	if (strchr(MAGIC_DESC, '%') == NULL)
+	if (strchr(m->desc, '%') == NULL)
 		return 0;
 
 	rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB);
@@ -295,7 +323,7 @@ check_fmt(struct magic_set *ms, struct magic *m)
 		file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
 		return -1;
 	} else {
-		rc = regexec(&rx, MAGIC_DESC, 0, 0, 0);
+		rc = regexec(&rx, m->desc, 0, 0, 0);
 		regfree(&rx);
 		return !rc;
 	}
@@ -328,7 +356,7 @@ mprint(struct magic_set *ms, struct magic *m)
 	float vf;
 	double vd;
 	int64_t t = 0;
- 	char *buf;
+ 	char buf[128];
 	union VALUETYPE *p = &ms->ms_value;
 
   	switch (m->type) {
@@ -338,13 +366,13 @@ mprint(struct magic_set *ms, struct magic *m)
 		case -1:
 			return -1;
 		case 1:
-			if (asprintf(&buf, "%c", (unsigned char)v) < 0)
-				return -1;
-			if (file_printf(ms, MAGIC_DESC, buf) == -1)
+			(void)snprintf(buf, sizeof(buf), "%c",
+			    (unsigned char)v);
+			if (file_printf(ms, m->desc, buf) == -1)
 				return -1;
 			break;
 		default:
-			if (file_printf(ms, MAGIC_DESC, (unsigned char) v) == -1)
+			if (file_printf(ms, m->desc, (unsigned char) v) == -1)
 				return -1;
 			break;
 		}
@@ -359,13 +387,14 @@ mprint(struct magic_set *ms, struct magic *m)
 		case -1:
 			return -1;
 		case 1:
-			if (asprintf(&buf, "%hu", (unsigned short)v) < 0)
-				return -1;
-			if (file_printf(ms, MAGIC_DESC, buf) == -1)
+			(void)snprintf(buf, sizeof(buf), "%hu",
+			    (unsigned short)v);
+			if (file_printf(ms, m->desc, buf) == -1)
 				return -1;
 			break;
 		default:
-			if (file_printf(ms, MAGIC_DESC, (unsigned short) v) == -1)
+			if (
+			    file_printf(ms, m->desc, (unsigned short) v) == -1)
 				return -1;
 			break;
 		}
@@ -381,13 +410,12 @@ mprint(struct magic_set *ms, struct magic *m)
 		case -1:
 			return -1;
 		case 1:
-			if (asprintf(&buf, "%u", (uint32_t)v) < 0)
-				return -1;
-			if (file_printf(ms, MAGIC_DESC, buf) == -1)
+			(void)snprintf(buf, sizeof(buf), "%u", (uint32_t)v);
+			if (file_printf(ms, m->desc, buf) == -1)
 				return -1;
 			break;
 		default:
-			if (file_printf(ms, MAGIC_DESC, (uint32_t) v) == -1)
+			if (file_printf(ms, m->desc, (uint32_t) v) == -1)
 				return -1;
 			break;
 		}
@@ -398,7 +426,7 @@ mprint(struct magic_set *ms, struct magic *m)
   	case FILE_BEQUAD:
   	case FILE_LEQUAD:
 		v = file_signextend(ms, m, p->q);
-		if (file_printf(ms, MAGIC_DESC, (uint64_t) v) == -1)
+		if (file_printf(ms, m->desc, (uint64_t) v) == -1)
 			return -1;
 		t = ms->offset + sizeof(int64_t);
   		break;
@@ -408,14 +436,14 @@ mprint(struct magic_set *ms, struct magic *m)
   	case FILE_BESTRING16:
   	case FILE_LESTRING16:
 		if (m->reln == '=' || m->reln == '!') {
-			if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
+			if (file_printf(ms, m->desc, m->value.s) == -1)
 				return -1;
 			t = ms->offset + m->vallen;
 		}
 		else {
 			if (*m->value.s == '\0')
 				p->s[strcspn(p->s, "\n")] = '\0';
-			if (file_printf(ms, MAGIC_DESC, p->s) == -1)
+			if (file_printf(ms, m->desc, p->s) == -1)
 				return -1;
 			t = ms->offset + strlen(p->s);
 			if (m->type == FILE_PSTRING)
@@ -427,7 +455,7 @@ mprint(struct magic_set *ms, struct magic *m)
 	case FILE_BEDATE:
 	case FILE_LEDATE:
 	case FILE_MEDATE:
-		if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 1)) == -1)
+		if (file_printf(ms, m->desc, file_fmttime(p->l, 1)) == -1)
 			return -1;
 		t = ms->offset + sizeof(time_t);
 		break;
@@ -436,7 +464,7 @@ mprint(struct magic_set *ms, struct magic *m)
 	case FILE_BELDATE:
 	case FILE_LELDATE:
 	case FILE_MELDATE:
-		if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 0)) == -1)
+		if (file_printf(ms, m->desc, file_fmttime(p->l, 0)) == -1)
 			return -1;
 		t = ms->offset + sizeof(time_t);
 		break;
@@ -444,8 +472,8 @@ mprint(struct magic_set *ms, struct magic *m)
 	case FILE_QDATE:
 	case FILE_BEQDATE:
 	case FILE_LEQDATE:
-		if (file_printf(ms, MAGIC_DESC, file_fmttime((uint32_t)p->q, 1))
-		    == -1)
+		if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q,
+		    1)) == -1)
 			return -1;
 		t = ms->offset + sizeof(uint64_t);
 		break;
@@ -453,8 +481,8 @@ mprint(struct magic_set *ms, struct magic *m)
 	case FILE_QLDATE:
 	case FILE_BEQLDATE:
 	case FILE_LEQLDATE:
-		if (file_printf(ms, MAGIC_DESC, file_fmttime((uint32_t)p->q, 0))
-		    == -1)
+		if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q,
+		    0)) == -1)
 			return -1;
 		t = ms->offset + sizeof(uint64_t);
 		break;
@@ -467,13 +495,12 @@ mprint(struct magic_set *ms, struct magic *m)
 		case -1:
 			return -1;
 		case 1:
-			if (asprintf(&buf, "%g", vf) < 0)
-				return -1;
-			if (file_printf(ms, MAGIC_DESC, buf) == -1)
+			(void)snprintf(buf, sizeof(buf), "%g", vf);
+			if (file_printf(ms, m->desc, buf) == -1)
 				return -1;
 			break;
 		default:
-			if (file_printf(ms, MAGIC_DESC, vf) == -1)
+			if (file_printf(ms, m->desc, vf) == -1)
 				return -1;
 			break;
 		}
@@ -488,13 +515,12 @@ mprint(struct magic_set *ms, struct magic *m)
 		case -1:
 			return -1;
 		case 1:
-			if (asprintf(&buf, "%g", vd) < 0)
-				return -1;
-			if (file_printf(ms, MAGIC_DESC, buf) == -1)
+			(void)snprintf(buf, sizeof(buf), "%g", vd);
+			if (file_printf(ms, m->desc, buf) == -1)
 				return -1;
 			break;
 		default:
-			if (file_printf(ms, MAGIC_DESC, vd) == -1)
+			if (file_printf(ms, m->desc, vd) == -1)
 				return -1;
 			break;
 		}
@@ -510,7 +536,7 @@ mprint(struct magic_set *ms, struct magic *m)
 			file_oomem(ms, ms->search.rm_len);
 			return -1;
 		}
-		rval = file_printf(ms, MAGIC_DESC, cp);
+		rval = file_printf(ms, m->desc, cp);
 		free(cp);
 
 		if (rval == -1)
@@ -524,7 +550,7 @@ mprint(struct magic_set *ms, struct magic *m)
 	}
 
 	case FILE_SEARCH:
-	  	if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
+	  	if (file_printf(ms, m->desc, m->value.s) == -1)
 			return -1;
 		if ((m->str_flags & REGEX_OFFSET_START))
 			t = ms->search.offset;
@@ -533,18 +559,118 @@ mprint(struct magic_set *ms, struct magic *m)
 		break;
 
 	case FILE_DEFAULT:
-	  	if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
+	  	if (file_printf(ms, m->desc, m->value.s) == -1)
 			return -1;
 		t = ms->offset;
 		break;
 
+	case FILE_INDIRECT:
+		t = ms->offset;
+		break;
+
 	default:
 		file_magerror(ms, "invalid m->type (%d) in mprint()", m->type);
 		return -1;
 	}
-	return(t);
+	return (int32_t)t;
 }
 
+private int32_t
+moffset(struct magic_set *ms, struct magic *m)
+{
+  	switch (m->type) {
+  	case FILE_BYTE:
+		return ms->offset + sizeof(char);
+
+  	case FILE_SHORT:
+  	case FILE_BESHORT:
+  	case FILE_LESHORT:
+		return ms->offset + sizeof(short);
+
+  	case FILE_LONG:
+  	case FILE_BELONG:
+  	case FILE_LELONG:
+  	case FILE_MELONG:
+		return ms->offset + sizeof(int32_t);
+
+  	case FILE_QUAD:
+  	case FILE_BEQUAD:
+  	case FILE_LEQUAD:
+		return ms->offset + sizeof(int64_t);
+
+  	case FILE_STRING:
+  	case FILE_PSTRING:
+  	case FILE_BESTRING16:
+  	case FILE_LESTRING16:
+		if (m->reln == '=' || m->reln == '!')
+			return ms->offset + m->vallen;
+		else {
+			union VALUETYPE *p = &ms->ms_value;
+			uint32_t t;
+
+			if (*m->value.s == '\0')
+				p->s[strcspn(p->s, "\n")] = '\0';
+			t = ms->offset + strlen(p->s);
+			if (m->type == FILE_PSTRING)
+				t++;
+			return t;
+		}
+
+	case FILE_DATE:
+	case FILE_BEDATE:
+	case FILE_LEDATE:
+	case FILE_MEDATE:
+		return ms->offset + sizeof(time_t);
+
+	case FILE_LDATE:
+	case FILE_BELDATE:
+	case FILE_LELDATE:
+	case FILE_MELDATE:
+		return ms->offset + sizeof(time_t);
+
+	case FILE_QDATE:
+	case FILE_BEQDATE:
+	case FILE_LEQDATE:
+		return ms->offset + sizeof(uint64_t);
+
+	case FILE_QLDATE:
+	case FILE_BEQLDATE:
+	case FILE_LEQLDATE:
+		return ms->offset + sizeof(uint64_t);
+
+  	case FILE_FLOAT:
+  	case FILE_BEFLOAT:
+  	case FILE_LEFLOAT:
+		return ms->offset + sizeof(float);
+
+  	case FILE_DOUBLE:
+  	case FILE_BEDOUBLE:
+  	case FILE_LEDOUBLE:
+		return ms->offset + sizeof(double);
+  		break;
+
+	case FILE_REGEX:
+		if ((m->str_flags & REGEX_OFFSET_START) != 0)
+			return ms->search.offset;
+		else
+			return ms->search.offset + ms->search.rm_len;
+
+	case FILE_SEARCH:
+		if ((m->str_flags & REGEX_OFFSET_START) != 0)
+			return ms->search.offset;
+		else
+			return ms->search.offset + m->vallen;
+
+	case FILE_DEFAULT:
+		return ms->offset;
+
+	case FILE_INDIRECT:
+		return ms->offset;
+
+	default:
+		return 0;
+	}
+}
 
 #define DO_CVT(fld, cast) \
 	if (m->num_mask) \
@@ -806,6 +932,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
 			const char *c;
 			const char *last;	/* end of search region */
 			const char *buf;	/* start of search region */
+			const char *end;
 			size_t lines;
 
 			if (s == NULL) {
@@ -814,10 +941,10 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
 				return 0;
 			}
 			buf = (const char *)s + offset;
-			last = (const char *)s + nbytes;
+			end = last = (const char *)s + nbytes;
 			/* mget() guarantees buf <= last */
 			for (lines = linecnt, b = buf;
-			     lines && ((b = strchr(c = b, '\n')) || (b = strchr(c, '\r')));
+			     lines && ((b = memchr(c = b, '\n', end - b)) || (b = memchr(c, '\r', end - c)));
 			     lines--, b++) {
 				last = b;
 				if (b[0] == '\r' && b[1] == '\n')
@@ -825,7 +952,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
 			}
 			if (lines)
 				last = (const char *)s + nbytes;
-			
+
 			ms->search.s = buf;
 			ms->search.s_len = last - buf;
 			ms->search.offset = offset;
@@ -838,13 +965,13 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
 			const unsigned char *esrc = s + nbytes;
 			char *dst = p->s;
 			char *edst = &p->s[sizeof(p->s) - 1];
-			
+
 			if (type == FILE_BESTRING16)
 				src++;
-			
+
 			/* check for pointer overflow */
 			if (src < s) {
-				file_magerror(ms, "invalid offset %zu in mcopy()",
+				file_magerror(ms, "invalid offset %u in mcopy()",
 				    offset);
 				return -1;
 			}
@@ -904,7 +1031,9 @@ mget(struct magic_set *ms, const unsigned char *s,
 
 	if ((ms->flags & MAGIC_DEBUG) != 0) {
 		mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE));
+#ifndef COMPILE_ONLY
 		file_mdump(m);
+#endif
 	}
 
 	if (m->flag & INDIR) {
@@ -929,9 +1058,11 @@ mget(struct magic_set *ms, const unsigned char *s,
 				off = q->l;
 				break;
 			case FILE_BELONG:
+			case FILE_BEID3:
 				off = (int32_t)((q->hl[0]<<24)|(q->hl[1]<<16)|
 						 (q->hl[2]<<8)|(q->hl[3]));
 				break;
+			case FILE_LEID3:
 			case FILE_LELONG:
 				off = (int32_t)((q->hl[3]<<24)|(q->hl[2]<<16)|
 						 (q->hl[1]<<8)|(q->hl[0]));
@@ -1119,6 +1250,7 @@ mget(struct magic_set *ms, const unsigned char *s,
 				offset = ~offset;
 			break;
 		case FILE_BELONG:
+		case FILE_BEID3:
 			if (nbytes < (offset + 4))
 				return 0;
 			if (off) {
@@ -1189,6 +1321,7 @@ mget(struct magic_set *ms, const unsigned char *s,
 				offset = ~offset;
 			break;
 		case FILE_LELONG:
+		case FILE_LEID3:
 			if (nbytes < (offset + 4))
 				return 0;
 			if (off) {
@@ -1365,8 +1498,21 @@ mget(struct magic_set *ms, const unsigned char *s,
 			break;
 		}
 
-		if (m->flag & INDIROFFADD)
+		switch (m->in_type) {
+		case FILE_LEID3:
+		case FILE_BEID3:
+			offset = ((((offset >>  0) & 0x7f) <<  0) |
+				 (((offset >>  8) & 0x7f) <<  7) |
+				 (((offset >> 16) & 0x7f) << 14) |
+				 (((offset >> 24) & 0x7f) << 21)) + 10;
+			break;
+		default:
+			break;
+		}
+
+		if (m->flag & INDIROFFADD) {
 			offset += ms->c.li[cont_level-1].off;
+		}
 		if (mcopy(ms, p, m->type, 0, s, offset, nbytes, count) == -1)
 			return -1;
 		ms->offset = offset;
@@ -1374,7 +1520,9 @@ mget(struct magic_set *ms, const unsigned char *s,
 		if ((ms->flags & MAGIC_DEBUG) != 0) {
 			mdebug(offset, (char *)(void *)p,
 			    sizeof(union VALUETYPE));
+#ifndef COMPILE_ONLY
 			file_mdump(m);
+#endif
 		}
 	}
 
@@ -1384,14 +1532,14 @@ mget(struct magic_set *ms, const unsigned char *s,
 		if (nbytes < (offset + 1)) /* should alway be true */
 			return 0;
 		break;
-		
+
 	case FILE_SHORT:
 	case FILE_BESHORT:
 	case FILE_LESHORT:
 		if (nbytes < (offset + 2))
 			return 0;
 		break;
-		
+
 	case FILE_LONG:
 	case FILE_BELONG:
 	case FILE_LELONG:
@@ -1410,7 +1558,7 @@ mget(struct magic_set *ms, const unsigned char *s,
 		if (nbytes < (offset + 4))
 			return 0;
 		break;
-		
+
 	case FILE_DOUBLE:
 	case FILE_BEDOUBLE:
 	case FILE_LEDOUBLE:
@@ -1430,6 +1578,15 @@ mget(struct magic_set *ms, const unsigned char *s,
 			return 0;
 		break;
 
+	case FILE_INDIRECT:
+	  	if ((ms->flags & (MAGIC_MIME|MAGIC_APPLE)) == 0 &&
+		    file_printf(ms, m->desc) == -1)
+			return -1;
+		if (nbytes < offset)
+			return 0;
+		return file_softmagic(ms, s + offset, nbytes - offset,
+		    BINTEST);
+
 	case FILE_DEFAULT:	/* nothing to check */
 	default:
 		break;
@@ -1460,7 +1617,7 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags)
 	if (0L == flags) { /* normal string: do it fast */
 		while (len-- > 0)
 			if ((v = *b++ - *a++) != '\0')
-				break; 
+				break;
 	}
 	else { /* combine the others */
 		while (len-- > 0) {
@@ -1474,8 +1631,8 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags)
 				if ((v = toupper(*b++) - *a++) != '\0')
 					break;
 			}
-			else if ((flags & STRING_COMPACT_BLANK) && 
-			    isspace(*a)) { 
+			else if ((flags & STRING_COMPACT_BLANK) &&
+			    isspace(*a)) {
 				a++;
 				if (isspace(*b++)) {
 					while (isspace(*b))
@@ -1570,26 +1727,27 @@ magiccheck(struct magic_set *ms, struct magic *m)
 		case 'x':
 			matched = 1;
 			break;
-	
+
 		case '!':
 			matched = fv != fl;
 			break;
-	
+
 		case '=':
 			matched = fv == fl;
 			break;
-	
+
 		case '>':
 			matched = fv > fl;
 			break;
-	
+
 		case '<':
 			matched = fv < fl;
 			break;
-	
+
 		default:
 			matched = 0;
-			file_magerror(ms, "cannot happen with float: invalid relation `%c'", m->reln);
+			file_magerror(ms, "cannot happen with float: invalid relation `%c'",
+			    m->reln);
 			return -1;
 		}
 		return matched;
@@ -1603,23 +1761,23 @@ magiccheck(struct magic_set *ms, struct magic *m)
 		case 'x':
 			matched = 1;
 			break;
-	
+
 		case '!':
 			matched = dv != dl;
 			break;
-	
+
 		case '=':
 			matched = dv == dl;
 			break;
-	
+
 		case '>':
 			matched = dv > dl;
 			break;
-	
+
 		case '<':
 			matched = dv < dl;
 			break;
-	
+
 		default:
 			matched = 0;
 			file_magerror(ms, "cannot happen with double: invalid relation `%c'", m->reln);
@@ -1727,6 +1885,8 @@ magiccheck(struct magic_set *ms, struct magic *m)
 			return -1;
 		break;
 	}
+	case FILE_INDIRECT:
+		return 1;
 	default:
 		file_magerror(ms, "invalid type %d in magiccheck()", m->type);
 		return -1;
@@ -1817,12 +1977,30 @@ magiccheck(struct magic_set *ms, struct magic *m)
 }
 
 private int
+handle_annotation(struct magic_set *ms, struct magic *m)
+{
+	if (ms->flags & MAGIC_APPLE) {
+		if (file_printf(ms, "%.8s", m->apple) == -1)
+			return -1;
+		return 1;
+	}
+	if ((ms->flags & MAGIC_MIME_TYPE) && m->mimetype[0]) {
+		if (file_printf(ms, "%s", m->mimetype) == -1)
+			return -1;
+		return 1;
+	}
+	return 0;
+}
+
+private int
 print_sep(struct magic_set *ms, int firstline)
 {
+	if (ms->flags & MAGIC_MIME)
+		return 0;
 	if (firstline)
 		return 0;
 	/*
-	 * we found another match 
+	 * we found another match
 	 * put a newline and '-' to do some simple formatting
 	 */
 	return file_printf(ms, "\n- ");

+ 57 - 0
src/strlcat.c

@@ -0,0 +1,57 @@
+/*	$OpenBSD: strlcat.c,v 1.13 2005/08/08 08:05:37 espie Exp $	*/
+
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* OPENBSD ORIGINAL: lib/libc/string/strlcat.c */
+
+#include <sys/types.h>
+#include <string.h>
+
+/*
+ * Appends src to string dst of size siz (unlike strncat, siz is the
+ * full size of dst, not space left).  At most siz-1 characters
+ * will be copied.  Always NUL terminates (unless siz <= strlen(dst)).
+ * Returns strlen(src) + MIN(siz, strlen(initial dst)).
+ * If retval >= siz, truncation occurred.
+ */
+size_t
+strlcat(char *dst, const char *src, size_t siz)
+{
+	char *d = dst;
+	const char *s = src;
+	size_t n = siz;
+	size_t dlen;
+
+	/* Find the end of dst and adjust bytes left but don't go past end */
+	while (n-- != 0 && *d != '\0')
+		d++;
+	dlen = d - dst;
+	n = siz - dlen;
+
+	if (n == 0)
+		return(dlen + strlen(s));
+	while (*s != '\0') {
+		if (n != 1) {
+			*d++ = *s;
+			n--;
+		}
+		s++;
+	}
+	*d = '\0';
+
+	return(dlen + (s - src));	/* count does not include NUL */
+}

+ 53 - 0
src/strlcpy.c

@@ -0,0 +1,53 @@
+/*	$OpenBSD: strlcpy.c,v 1.10 2005/08/08 08:05:37 espie Exp $	*/
+
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* OPENBSD ORIGINAL: lib/libc/string/strlcpy.c */
+
+#include <sys/types.h>
+#include <string.h>
+
+/*
+ * Copy src to string dst of size siz.  At most siz-1 characters
+ * will be copied.  Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t siz)
+{
+	char *d = dst;
+	const char *s = src;
+	size_t n = siz;
+
+	/* Copy as many bytes as will fit */
+	if (n != 0 && --n != 0) {
+		do {
+			if ((*d++ = *s++) == 0)
+				break;
+		} while (--n != 0);
+	}
+
+	/* Not enough room in dst, add NUL and traverse rest of src */
+	if (n == 0) {
+		if (siz != 0)
+			*d = '\0';		/* NUL-terminate dst */
+		while (*s++)
+			;
+	}
+
+	return(s - src - 1);	/* count does not include NUL */
+}

+ 5 - 4
src/vasprintf.c

@@ -105,12 +105,13 @@ A buffer overflow can only occur if your sprintf() do strange things or when
 you use strange formats.
 
 */
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
+#include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: vasprintf.c,v 1.7 2009/02/03 20:27:52 christos Exp $")
+#endif	/* lint */
 
 #include <assert.h>
-#include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdarg.h>