Browse Source

Import upstream version 5.00

Christos Zoulas 10 years ago
parent
commit
c664f497c4

+ 126 - 30
ChangeLog

@@ -1,9 +1,105 @@
+2008-12-12 15:50  Christos Zoulas <christos@zoulas.com>
+
+	* fix initial offset calculation for non 4K sector files
+
+	* add loop limits to avoid DoS attacks by constructing
+	  looping sector references.
+
+2008-12-03 13:05  Christos Zoulas <christos@zoulas.com>
+
+	* fix memory botches on cdf file parsing.
+
+	* exit with non-zero value for any error, not just for the last
+	  file processed.
+
+2008-11-09 20:42  Charles Longeau <chl@tuxfamily.org>
+
+	* Replace all str{cpy,cat} functions with strl{cpy,cat}
+	* Ensure that strl{cpy,cat} are included in libmagic,
+	  as needed.
+
+2008-11-06 18:18  Christos Zoulas <christos@zoulas.com>
+
+	* Handle ID3 format files.
+
+2008-11-06 23:00  Reuben Thomas <rrt@sc3d.org>
+
+	* Fix --mime, --mime-type and --mime-encoding under new scheme.
+
+	* Rename "ascii" to "text" and add "encoding" test.
+
+	* Return a precise ("utf-16le" or "utf-16be") MIME charset for
+	  UTF-16.
+
+	* Fix error in comment caused by automatic indentation adding
+	  words!
+
+2008-11-06 10:35  Christos Zoulas <christos@astron.com>
+
+	* use memchr instead of strchr because the string
+	  might not be NUL terminated (Scott MacVicar)
+
+2008-11-03 07:31  Reuben Thomas <rrt@sc3d.org>
+
+	* Fix a printf with a non-literal format string.
+
+	* Fix formatting and punctuation of help for "--apple".
+
+2008-10-30 11:00  Reuben Thomas <rrt@sc3d.org>
+
+	* Correct words counts in comments of struct magic.
+
+	* Fix handle_annotation to allow both Apple and MIME types to be
+	  printed, and to return correct code if MIME type is
+	  printed (1, not 0) or if there's an error (-1 not 1).
+
+	* Fix output of charset for MIME type (precede with semi-colon;
+	  fixes Debian bug #501460).
+
+	* Fix potential attacks via conversion specifications in magic
+	  strings.
+
+	* Add a FIXME for Debian bug #488562 (magic files should be
+	  read in a defined order, by sorting the names).
+
+2008-10-18 16:45  Christos Zoulas <christos@astron.com>
+
+	* Added APPLE file creator/type
+
+2008-10-12 10:20  Christos Zoulas <christos@astron.com>
+
+	* Added CDF parsing
+
+2008-10-09 16:40  Christos Zoulas <christos@astron.com>
+
+	* filesystem and msdos patches (Joerg Jenderek)
+
+2008-10-09 13:20  Christos Zoulas <christos@astron.com>
+
+	* correct --exclude documentation issues: remove troff and fortran
+	  and rename "token" to "tokens". (Randy McMurchy)
+
+2008-10-01 10:30  Christos Zoulas <christos@astron.com>
+
+	* Read ~/.magic in addition to the default magic file not instead
+	  of, as documented in the man page.
+
+2008-09-10 21:30  Reuben Thomas  <rrt@sc3d.org>
+
+	* Comment out graphviz patterns, as they match too many files.
+
 2008-08-30 12:54  Christos Zoulas <christos@astron.com>
 
 	* Don't eat trailing \n in magic enties.
 
 	* Cast defines to allow compilation using a c++ compiler.
 
+2008-08-25 23:56  Reuben Thomas  <rrt@sc3d.org>
+
+	* Add text/x-lua MIME type for Lua scripts.
+
+	* Escape { in regex in graphviz patterns.
+
 2008-07-26 00:59  Reuben Thomas  <rrt@sc3d.org>
 
 	* Add MIME types for special files.
@@ -55,22 +151,22 @@
 
 2008-05-06 00:13  Robert Byrnes  <byrnes@wildpumpkin.net>
 
-        * src/Makefile.am:
+	* src/Makefile.am:
 	  Ensure that getopt_long and [v]asprintf are included in libmagic,
 	  as needed.
 
 	  Remove unnecessary EXTRA_DIST.
 
-        * src/Makefile.in:
+	* src/Makefile.in:
 	  Rerun automake.
 
-        * src/vasprintf.c (dispatch):
+	* src/vasprintf.c (dispatch):
 	  Fix variable precision bug: be sure to step past '*'.
 
-        * src/vasprintf.c (core):
+	* src/vasprintf.c (core):
 	  Remove unreachable code.
 
-        * src/apprentice.c (set_test_type):
+	* src/apprentice.c (set_test_type):
 	  Add cast to avoid compiler warning.
 
 2008-04-22 23:45  Christos Zoulas  <christos@astron.com>
@@ -81,12 +177,12 @@
 
 2008-04-04 11:00  Christos Zoulas  <christos@astron.com>
 
-        * >= <= is not supported, so fix the magic and warn about it.
+	* >= <= is not supported, so fix the magic and warn about it.
 	  reported by: Thien-Thi Nguyen <ttn@gnuvola.org>
 
 2008-03-27 16:16  Robert Byrnes  <byrnes@wildpumpkin.net>
 
-        * src/readelf.c (donote):
+	* src/readelf.c (donote):
 	  ELF core file command name/line bug fixes and enhancements:
 
 	  Try larger offsets first to avoid false matches
@@ -112,7 +208,7 @@
 	* Clarify UTF-8 BOM message (Reuben Thomas)
 
 	* Add HTML comment to token list in names.h
-	
+
 2007-02-04 15:50 Christos Zoulas <christos@astron.com>
 
 	* Debian fixes (Reuben Thomas)
@@ -152,7 +248,7 @@
 
 2007-10-28 20:48 Christos Zoulas <christos@astron.com>
 
- 	* float and double magic support (Behan Webster) 
+ 	* float and double magic support (Behan Webster)
 
 2007-10-28 20:48 Christos Zoulas <christos@astron.com>
 
@@ -199,7 +295,7 @@
 	  be easily parsed:
 	      mimetype [charset=character-set] [encoding=encoding-mime-type]
 
-	  Remove spurious extra text from some MIME type printouts 
+	  Remove spurious extra text from some MIME type printouts
 	  (mostly in is_tar).
 
 	  Fix one case where -i produced nothing at all (for a 1-byte file,
@@ -229,7 +325,7 @@
 2007-03-15 10:51 Christos Zoulas <christos@astron.com>
 
 	* fix fortran and nroff reversed tests (Dmitry V. Levin)
-	
+
 	* fix exclude option (Dmitry V. Levin)
 
 2007-02-08 17:30 Christos Zoulas <christos@astron.com>
@@ -248,7 +344,7 @@
 	* Add exclude flag.
 
 2007-01-18 05:29 Anon Ymous <do@not.spam.me>
-	
+
 	* Move the "type" detection code from parse() into its own table
 	  driven routine.  This avoids maintaining multiple lists in
 	  file.h.
@@ -256,7 +352,7 @@
 	* Add an optional conditional field (ust before the type field).
 	  This code is wrapped in "#ifdef ENABLE_CONDITIONALS" as it is
 	  likely to go away.
-	
+
 2007-01-16 23:24 Anon Ymous <do@not.spam.me>
 
 	* Fix an initialization bug in check_mem().
@@ -327,7 +423,7 @@
 2006-12-08 16:32 Christos Zoulas <christos@astron.com>
 
 	* store and print the line number of the magic
-	  entry for debugging.         
+	  entry for debugging.
 
 	* if the magic entry did not print anything,
 	  don't treat it as a match
@@ -342,7 +438,7 @@
 	  file_softmagic.
 
 2006-11-25 13:35 Christos Zoulas <christos@astron.com>
-	
+
 	* Don't store the current offset in the magic
 	  struct, because it needs to be restored and
 	  it was not done properly all the time. Bug
@@ -432,7 +528,7 @@
 	* Look for note sections in non executables.
 
 2005-09-20 13:33 Christos Zoulas <christos@astron.com>
-	
+
 	* Don't print SVR4 Style in core files multiple times
 	    (Radek Vokál)
 
@@ -443,9 +539,9 @@
 2005-08-18 09:53 Christos Zoulas <christos@astron.com>
 
 	* Remove erroreous mention of /etc/magic in the file man page
-	  This is gentoo bug 101639. (Mike Frysinger) 
+	  This is gentoo bug 101639. (Mike Frysinger)
 
-	* Cross-compile support and detection (Mike Frysinger) 
+	* Cross-compile support and detection (Mike Frysinger)
 
 2005-08-12 10:17 Christos Zoulas <christos@astron.com>
 
@@ -477,20 +573,20 @@
 	* Avoid NULL pointer dereference in time conversion.
 
 2005-03-06 00:00  Joerg Walter <jwalt@mail.garni.ch>
-	
+
 	* Add indirect magic offset support, and search mode.
 
 2005-01-12 00:00  Stepan Kasal  <kasal@ucw.cz>
 
-        * src/ascmagic.c (file_ascmagic): Fix three bugs about text files:
-          If a CRLF text file happens to have CR at offset HOWMANY - 1
-          (currently 0xffff), it should not be counted as CR line
-          terminator.
-          If a line has length exactly MAXLINELEN, it should not yet be
-          treated as a ``very long line'', as MAXLINELEN is ``longest sane
-          line length''.
-          With CRLF, the line length was not computed correctly, and even
-          lines of length MAXLINELEN - 1 were treated as ``very long''.
+	* src/ascmagic.c (file_ascmagic): Fix three bugs about text files:
+	  If a CRLF text file happens to have CR at offset HOWMANY - 1
+	  (currently 0xffff), it should not be counted as CR line
+	  terminator.
+	  If a line has length exactly MAXLINELEN, it should not yet be
+	  treated as a ``very long line'', as MAXLINELEN is ``longest sane
+	  line length''.
+	  With CRLF, the line length was not computed correctly, and even
+	  lines of length MAXLINELEN - 1 were treated as ``very long''.
 
 2004-12-07 14:15  Christos Zoulas  <christos@astron.com>
 
@@ -525,12 +621,12 @@
 
 	* Remove 3rd and 4th copyright clause; approved by Ian Darwin.
 
-	* Fix small memory leaks; caught by: Tamas Sarlos 
+	* Fix small memory leaks; caught by: Tamas Sarlos
 	    <stamas@csillag.ilab.sztaki.hu>
 
 2004-07-24 16:33  Christos Zoulas  <christos@astron.com>
 
-	* magic.mime update Danny Milosavljevic <danny.milo@gmx.net> 
+	* magic.mime update Danny Milosavljevic <danny.milo@gmx.net>
 
 	* FreeBSD version update Oliver Eikemeier <eikemeier@fillmore-labs.com>
 

+ 29 - 27
README

@@ -1,5 +1,5 @@
 ** README for file(1) Command **
-@(#) $File: README,v 1.40 2008/04/23 03:45:20 christos Exp $
+@(#) $File: README,v 1.41 2008/12/02 16:34:46 christos Exp $
 
 E-mail: christos@astron.com
 Mailing List: file@mx.gw.com
@@ -48,33 +48,35 @@ in magic(5) format please, to the maintainer, Christos Zoulas.
 
 COPYING - read this first.
 README - read this second (you are currently reading this file).
-PORTING - read this only if the program won't compile.
-Makefile - read this next, adapt it as needed (particularly
-	the location of the old existing file command and
-	the man page layouts), type "make" to compile, 
-	"make try" to try it out against your old version.
-	Expect some diffs, particularly since your original
-	file(1) may not grok the embedded-space ("\ ") in
-	the current magic file, or may even not use the
-	magic file.
-apprentice.c - parses /etc/magic to learn magic
-ascmagic.c - third & last set of tests, based on hardwired assumptions.
-core - not included in distribution due to mailer limitations.
-debug.c - includes -c printout routine
-file.1 - man page for the command
-magic.4 - man page for the magic file, courtesy Guy Harris.
+INSTALL - read on how to install
+
+src/apprentice.c - parses /etc/magic to learn magic
+src/apptype.c - used for OS/2 specific application type magic
+src/asprintf.c - replacement for OS's that don't have it.
+src/ascmagic.c - third & last set of tests, based on hardwired assumptions.
+src/cdf.c - parser for Microsoft Compound Document Files
+src/cdf_time.c - time converter for CDF.
+src/compress.c - handles decompressing files to look inside.
+src/encoding.c - handles unicode encodings
+src/file.c - the main program
+src/file.h - header file
+src/fsmagic.c - first set of tests the program runs, based on filesystem info
+src/funcs.c - utilility functions
+src/getopt_long.c - used for OS/2 specific application type magic
+src/is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
+src/names.h - header file for ascmagic.c
+src/magic.c - the libmagic api
+src/print.c - print results, errors, warnings.
+src/readcdf.c - CDF wrapper.
+src/readelf.[ch] - Stand-alone elf parsing code.
+src/softmagic.c - 2nd set of tests, based on /etc/magic
+src/strlcat.c - used for OS/2 specific application type magic
+src/strlcpy.c - used for OS/2 specific application type magic
+src/vasprintf.c - used for OS/2 specific application type magic
+doc/file.1 - man page for the command
+doc/magic.4 - man page for the magic file, courtesy Guy Harris.
 	Install as magic.4 on USG and magic.5 on V7 or Berkeley; cf Makefile.
-file.c - main program
-file.h - header file
-fsmagic.c - first set of tests the program runs, based on filesystem info
-is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
-magdir - directory of /etc/magic pieces
-	magdir/Makefile - ADJUST THIS FOR YOUR CONFIGURATION
-names.h - header file for ascmagic.c
-softmagic.c - 2nd set of tests, based on /etc/magic
-readelf.[ch] - Stand-alone elf parsing code.
-compress.c - on-the-fly decompression.
-print.c - print results, errors, warnings.
+Magdir - directory of /etc/magic pieces
 
 ------------------------------------------------------------------------------
 

+ 6 - 0
TODO

@@ -1,3 +1,9 @@
+Fix output so that tests for MIME and APPLE flags are not needed all
+over the place, and actual output is only done in one place. This
+needs a design. Suggestion: push possible outputs on to a list, then
+pick the last-pushed (most specific, one hopes) value at the end, or
+use a default if the list is empty.
+
 Continue to squash all magic bugs. See Debian BTS for a good source.
 
 Store arbitrarily long strings, for example for %s patterns, so that

+ 12 - 0
config.h.in

@@ -78,6 +78,12 @@
 /* Define to 1 if you have the <string.h> header file. */
 #undef HAVE_STRING_H
 
+/* Define to 1 if you have the `strlcat' function. */
+#undef HAVE_STRLCAT
+
+/* Define to 1 if you have the `strlcpy' function. */
+#undef HAVE_STRLCPY
+
 /* Define to 1 if you have the `strndup' function. */
 #undef HAVE_STRNDUP
 
@@ -90,6 +96,12 @@
 /* Define to 1 if `st_rdev' is member of `struct stat'. */
 #undef HAVE_STRUCT_STAT_ST_RDEV
 
+/* Define to 1 if `tm_gmtoff' is member of `struct tm'. */
+#undef HAVE_STRUCT_TM_TM_GMTOFF
+
+/* Define to 1 if `tm_zone' is member of `struct tm'. */
+#undef HAVE_STRUCT_TM_TM_ZONE
+
 /* Define to 1 if you have the <sys/mman.h> header file. */
 #undef HAVE_SYS_MMAN_H
 

+ 216 - 11
configure

@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.61 for file 4.26.
+# Generated by GNU Autoconf 2.61 for file 5.00.
 #
 # Report bugs to <christos@astron.com>.
 #
@@ -728,8 +728,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='file'
 PACKAGE_TARNAME='file'
-PACKAGE_VERSION='4.26'
-PACKAGE_STRING='file 4.26'
+PACKAGE_VERSION='5.00'
+PACKAGE_STRING='file 5.00'
 PACKAGE_BUGREPORT='christos@astron.com'
 
 # Factoring default headers for most tests.
@@ -1395,7 +1395,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures file 4.26 to adapt to many kinds of systems.
+\`configure' configures file 5.00 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1465,7 +1465,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of file 4.26:";;
+     short | recursive ) echo "Configuration of file 5.00:";;
    esac
   cat <<\_ACEOF
 
@@ -1572,7 +1572,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-file configure 4.26
+file configure 5.00
 generated by GNU Autoconf 2.61
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1586,7 +1586,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by file $as_me 4.26, which was
+It was created by file $as_me 5.00, which was
 generated by GNU Autoconf 2.61.  Invocation command line was
 
   $ $0 $@
@@ -2276,7 +2276,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='file'
- VERSION='4.26'
+ VERSION='5.00'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -22166,6 +22166,209 @@ _ACEOF
 
 fi
 
+{ echo "$as_me:$LINENO: checking for struct tm.tm_gmtoff" >&5
+echo $ECHO_N "checking for struct tm.tm_gmtoff... $ECHO_C" >&6; }
+if test "${ac_cv_member_struct_tm_tm_gmtoff+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (ac_aggr.tm_gmtoff)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_member_struct_tm_tm_gmtoff=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (sizeof ac_aggr.tm_gmtoff)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_member_struct_tm_tm_gmtoff=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_member_struct_tm_tm_gmtoff=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_member_struct_tm_tm_gmtoff" >&5
+echo "${ECHO_T}$ac_cv_member_struct_tm_tm_gmtoff" >&6; }
+if test $ac_cv_member_struct_tm_tm_gmtoff = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_TM_TM_GMTOFF 1
+_ACEOF
+
+
+fi
+{ echo "$as_me:$LINENO: checking for struct tm.tm_zone" >&5
+echo $ECHO_N "checking for struct tm.tm_zone... $ECHO_C" >&6; }
+if test "${ac_cv_member_struct_tm_tm_zone+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (ac_aggr.tm_zone)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_member_struct_tm_tm_zone=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (sizeof ac_aggr.tm_zone)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_member_struct_tm_tm_zone=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_member_struct_tm_tm_zone=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_member_struct_tm_tm_zone" >&5
+echo "${ECHO_T}$ac_cv_member_struct_tm_tm_zone" >&6; }
+if test $ac_cv_member_struct_tm_tm_zone = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_TM_TM_ZONE 1
+_ACEOF
+
+
+fi
+
 { echo "$as_me:$LINENO: checking for tm_zone in struct tm" >&5
 echo $ECHO_N "checking for tm_zone in struct tm... $ECHO_C" >&6; }
 if test "${ac_cv_struct_tm_zone+set}" = set; then
@@ -23853,7 +24056,9 @@ done
 
 
 
-for ac_func in getopt_long asprintf vasprintf
+
+
+for ac_func in getopt_long asprintf vasprintf strlcpy strlcat
 do
 as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { echo "$as_me:$LINENO: checking for $ac_func" >&5
@@ -24469,7 +24674,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by file $as_me 4.26, which was
+This file was extended by file $as_me 5.00, which was
 generated by GNU Autoconf 2.61.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -24522,7 +24727,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-file config.status 4.26
+file config.status 5.00
 configured by $0, generated by GNU Autoconf 2.61,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 

+ 4 - 2
configure.ac

@@ -1,5 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
-AC_INIT(file, 4.26, christos@astron.com)
+AC_INIT(file, 5.00, christos@astron.com)
 AM_INIT_AUTOMAKE
 AM_CONFIG_HEADER(config.h)
 
@@ -75,6 +75,8 @@ AC_TYPE_OFF_T
 AC_TYPE_SIZE_T
 AC_CHECK_MEMBERS([struct stat.st_rdev])
 
+AC_STRUCT_TM
+AC_CHECK_MEMBERS([struct tm.tm_gmtoff, struct tm.tm_zone])
 AC_STRUCT_TIMEZONE_DAYLIGHT
 AC_SYS_LARGEFILE
 AC_FUNC_FSEEKO
@@ -139,7 +141,7 @@ dnl Checks for functions
 AC_CHECK_FUNCS(mmap strerror strndup strtoul mbrtowc mkstemp utimes utime wcwidth strtof)
 
 dnl Provide implementation of some required functions if necessary
-AC_REPLACE_FUNCS(getopt_long asprintf vasprintf)
+AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat)
 
 dnl Checks for libraries
 AC_CHECK_LIB(z,gzopen)

+ 76 - 77
doc/file.man

@@ -1,5 +1,5 @@
-.\" $File: file.man,v 1.73 2008/02/19 17:58:00 rrt Exp $
-.Dd February 19, 2008
+.\" $File: file.man,v 1.79 2008/11/06 22:49:08 rrt Exp $
+.Dd October 9, 2008
 .Dt FILE __CSECTION__
 .Os
 .Sh NAME
@@ -41,12 +41,12 @@ characters and is probably safe to read on an
 terminal),
 .Em executable
 (the file contains the result of compiling a program
-in a form understandable to some 
+in a form understandable to some
 .Dv UNIX
 kernel or another),
 or
 .Em data
-meaning anything else (data is usually 
+meaning anything else (data is usually
 .Sq binary
 or non-printable).
 Exceptions are well-known file formats (core files, tar archives)
@@ -54,13 +54,13 @@ that are known to contain binary data.
 When modifying magic files or the program itself, make sure to
 .Em "preserve these keywords" .
 Users depend on knowing that all the readable files in a directory
-have the word 
-.Dq text
+have the word
+.Sq text
 printed.
-Don't do as Berkeley did and change 
-.Dq shell commands text
-to 
-.Dq shell script .
+Don't do as Berkeley did and change
+.Sq shell commands text
+to
+.Sq shell script .
 .Pp
 The filesystem tests are based on examining the return from a
 .Xr stat 2
@@ -78,16 +78,16 @@ The magic tests are used to check for files with data in
 particular fixed formats.
 The canonical example of this is a binary executable (compiled program)
 .Dv a.out
-file, whose format is defined in 
+file, whose format is defined in
 .In elf.h ,
 .In a.out.h
 and possibly
 .In exec.h
 in the standard include directory.
-These files have a 
+These files have a
 .Sq "magic number"
 stored in a particular place
-near the beginning of the file that tells the 
+near the beginning of the file that tells the
 .Dv UNIX operating system
 that the file is a binary executable, and which of several types thereof.
 The concept of a
@@ -116,11 +116,11 @@ ranges and sequences of bytes that constitute printable text
 in each set.
 If a file passes any of these tests, its character set is reported.
 ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified
-as 
-.Dq text
+as
+.Sq text
 because they will be mostly readable on nearly any terminal;
-UTF-16 and EBCDIC are only 
-.Dq character data
+UTF-16 and EBCDIC are only
+.Sq character data
 because, while
 they contain text, it is text that will require translation
 before it can be read.
@@ -144,19 +144,19 @@ For example, the keyword
 .Em .br
 indicates that the file is most likely a
 .Xr troff 1
-input file, just as the keyword 
+input file, just as the keyword
 .Em struct
 indicates a C program.
 These tests are less reliable than the previous
 two groups, so they are performed last.
 The language test routines also test for some miscellany
-(such as 
+(such as
 .Xr tar 1
 archives).
 .Pp
 Any file that cannot be identified as having been written
 in any of the character sets listed above is simply said to be
-.Dq data .
+.Sq data .
 .Sh OPTIONS
 .Bl -tag -width indent
 .It Fl b , -brief
@@ -177,40 +177,41 @@ from the list of tests made to determine the file type. Valid test names
 are:
 .Bl -tag -width
 .It apptype
-Check for
 .Dv EMX
 application type (only on EMX).
-.It ascii
-Check for various types of ascii files.
+.It text
+Various types of text files (this test will try to guess the text encoding, irrespective of the setting of the
+.Sq encoding
+option).
+.It encoding
+Different text encodings for soft magic tests.
+.It tokens
+Looks for known tokens inside text files.
+.It cdf
+Prints details of Compound Document Files.
 .It compress
-Don't look for, or inside compressed files.
+Checks for, and looks inside, compressed files.
 .It elf
-Don't print elf details.
-.It fortran
-Don't look for fortran sequences inside ascii files.
+Prints ELF file details.
 .It soft
-Don't consult magic files.
+Consults magic files.
 .It tar
-Don't examine tar files.
-.It token
-Don't look for known tokens inside ascii files.
-.It troff
-Don't look for troff sequences inside ascii files.
+Examines tar files.
 .El
 .It Fl f , -files-from Ar namefile
-Read the names of the files to be examined from 
+Read the names of the files to be examined from
 .Ar namefile
-(one per line) 
+(one per line)
 before the argument list.
-Either 
+Either
 .Ar namefile
 or at least one filename argument must be present;
-to test the standard input, use 
+to test the standard input, use
 .Sq -
 as a filename argument.
 .It Fl F , -separator Ar separator
 Use the specified string as the separator between the filename and the
-file result returned. Defaults to 
+file result returned. Defaults to
 .Sq \&: .
 .It Fl h , -no-dereference
 option causes symlinks not to be followed
@@ -221,17 +222,15 @@ is not defined.
 .It Fl i , -mime
 Causes the file command to output mime type strings rather than the more
 traditional human readable ones. Thus it may say
-.Dq text/plain charset=us-ascii
+.Sq text/plain; charset=us-ascii
 rather than
-.Dq ASCII text .
+.Sq ASCII text .
 In order for this option to work, file changes the way
 it handles files recognized by the command itself (such as many of the
 text file types, directories etc), and makes use of an alternative
-.Dq magic
+.Sq magic
 file.
-(See
-.Dq FILES
-section, below).
+(See the FILES section, below).
 .It Fl -mime-type , -mime-encoding
 Like
 .Fl i ,
@@ -239,10 +238,10 @@ but print only the specified element(s).
 .It Fl k , -keep-going
 Don't stop at the first match, keep going. Subsequent matches will be
 have the string
-.Dq "\[rs]012\- "
+.Sq "\[rs]012\- "
 prepended.
 (If you want a newline, see the
-.Dq "\-r"
+.Sq "\-r"
 option.)
 .It Fl L , -dereference
 option causes symlinks to be followed, as the like-named option in
@@ -324,7 +323,7 @@ will not attempt to open
 .Pa $HOME/.magic .
 .Nm
 adds
-.Dq .mgc
+.Sq .mgc
 to the value of this variable as appropriate.
 The environment variable
 .Dv POSIXLY_CORRECT
@@ -347,47 +346,47 @@ options.
 .Sh STANDARDS CONFORMANCE
 This program is believed to exceed the System V Interface Definition
 of FILE(CMD), as near as one can determine from the vague language
-contained therein. 
+contained therein.
 Its behavior is mostly compatible with the System V program of the same name.
 This version knows more magic, however, so it will produce
-different (albeit more accurate) output in many cases. 
+different (albeit more accurate) output in many cases.
 .\" URL: http://www.opengroup.org/onlinepubs/009695399/utilities/file.html
 .Pp
-The one significant difference 
+The one significant difference
 between this version and System V
 is that this version treats any white space
 as a delimiter, so that spaces in pattern strings must be escaped.
 For example,
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 >10	string	language impress\ 	(imPRESS data)
 .Ed
 .Pp
 in an existing magic file would have to be changed to
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 >10	string	language\e impress	(imPRESS data)
 .Ed
 .Pp
 In addition, in this version, if a pattern string contains a backslash,
 it must be escaped.
 For example
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 0	string		\ebegindata	Andrew Toolkit document
 .Ed
 .Pp
 in an existing magic file would have to be changed to
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 0	string		\e\ebegindata	Andrew Toolkit document
 .Ed
 .Pp
 SunOS releases 3.2 and later from Sun Microsystems include a
-.Nm 
+.Nm
 command derived from the System V one, but with some extensions.
 My version differs from Sun's only in minor ways.
-It includes the extension of the 
+It includes the extension of the
 .Sq &
 operator, used as,
 for example,
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 >16	long&0x7fffffff	>0		not stripped
 .Ed
 .Sh MAGIC DIRECTORY
@@ -395,7 +394,7 @@ The magic file entries have been collected from various sources,
 mainly USENET, and contributed by various authors.
 Christos Zoulas (address below) will collect additional
 or corrected magic file entries.
-A consolidation of magic file entries 
+A consolidation of magic file entries
 will be distributed periodically.
 .Pp
 The order of entries in the magic file is significant.
@@ -405,14 +404,14 @@ If your old
 .Nm
 command uses a magic file,
 keep the old magic file around for comparison purposes
-(rename it to 
+(rename it to
 .Pa __MAGIC__.orig ).
 .Sh EXAMPLES
-.Bd -literal -offset indent 
+.Bd -literal -offset indent
 $ file file.c file /dev/{wd0a,hda}
 file.c:   C program text
 file:     ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV),
-          dynamically linked (uses shared libs), stripped
+	  dynamically linked (uses shared libs), stripped
 /dev/wd0a: block special (0/0)
 /dev/hda: block special (3/0)
 
@@ -441,9 +440,9 @@ file:        application/x-executable
 
 .Ed
 .Sh HISTORY
-There has been a 
-.Nm 
-command in every 
+There has been a
+.Nm
+command in every
 .Dv UNIX since at least Research Version 4
 (man page dated November, 1973).
 The System V version introduced one significant major change:
@@ -466,7 +465,7 @@ Primary development and maintenance from 1990 to the present by
 Christos Zoulas (christos@astron.com).
 .Pp
 Altered by Chris Lowth, chris@lowth.com, 2000:
-Handle the 
+Handle the
 .Fl i
 option to output mime type strings, using an alternative
 magic file and internal logic.
@@ -480,7 +479,7 @@ support and merge MIME and non-MIME magic, support directories as well
 as files of magic, apply many bug fixes and improve the build system.
 .Pp
 The list of contributors to the
-.Dq magic
+.Sq magic
 directory (magic files)
 is too long to include here.
 You know who you are; thank you.
@@ -512,10 +511,10 @@ files.
 The support for text files (primarily for programming languages)
 is simplistic, inefficient and requires recompilation to update.
 .Pp
-The list of keywords in 
+The list of keywords in
 .Dv ascmagic
 probably belongs in the Magic file.
-This could be done by using some keyword like 
+This could be done by using some keyword like
 .Sq *
 for the offset value.
 .Pp
@@ -523,20 +522,20 @@ Complain about conflicts in the magic file entries.
 Make a rule that the magic entries sort based on file offset rather
 than position within the magic file?
 .Pp
-The program should provide a way to give an estimate 
-of 
-.Dq how good
+The program should provide a way to give an estimate
+of
+.Sq how good
 a guess is.
-We end up removing guesses (e.g. 
-.Dq From\ 
+We end up removing guesses (e.g.
+.Sq From\
 as first 5 chars of file) because
-they are not as good as other guesses (e.g. 
-.Dq Newsgroups:
+they are not as good as other guesses (e.g.
+.Sq Newsgroups:
 versus
-.Dq Return-Path:
+.Sq Return-Path:
 ).
 Still, if the others don't pan out, it should be possible to use the
-first guess.  
+first guess.
 .Pp
 This manual page, and particularly this section, is too long.
 .Sh RETURN CODE

+ 3 - 10
doc/libmagic.man

@@ -1,4 +1,4 @@
-.\" $File: libmagic.man,v 1.18 2008/02/28 22:24:46 rrt Exp $
+.\" $File: libmagic.man,v 1.19 2008/10/06 20:16:04 christos Exp $
 .\"
 .\" Copyright (c) Christos Zoulas 2003.
 .\" All Rights Reserved.
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd November 15, 2006
+.Dd October 6, 2008
 .Dt MAGIC 3
 .Os
 .Sh NAME
@@ -200,11 +200,8 @@ before any magic queries can performed.
 .Pp
 The default database file is named by the MAGIC environment variable.  If
 that variable is not set, the default database file name is __MAGIC__.
-.Pp
 .Fn magic_load
 adds
-.Dq .mime
-and/or
 .Dq .mgc
 to the database filename as appropriate.
 .Sh RETURN VALUES
@@ -237,11 +234,7 @@ when
 .Dv MAGIC_PRESERVE_ATIME
 is set.
 .Sh FILES
-.Bl -tag -width __MAGIC__.mime.mgc -compact
-.It Pa __MAGIC__.mime
-The non-compiled default magic mime database.
-.It Pa __MAGIC__.mime.mgc
-The compiled default magic mime database.
+.Bl -tag -width __MAGIC__.mgc -compact
 .It Pa __MAGIC__
 The non-compiled default magic database.
 .It Pa __MAGIC__.mgc

+ 15 - 4
doc/magic.man

@@ -1,4 +1,4 @@
-.\" $File: magic.man,v 1.57 2008/08/30 09:50:20 christos Exp $
+.\" $File: magic.man,v 1.59 2008/11/06 23:22:53 christos Exp $
 .Dd August 30, 2008
 .Dt MAGIC __FSECTION__
 .Os
@@ -84,6 +84,8 @@ local time rather than UTC.
 .It Dv qldate
 An eight-byte value interpreted as a UNIX-style date, but interpreted as
 local time rather than UTC.
+.It Dv beid3
+A 32-bit ID3 length in big-endian byte order.
 .It Dv beshort
 A two-byte value in big-endian byte order.
 .It Dv belong
@@ -110,6 +112,8 @@ interpreted as a UNIX-style date, but interpreted as local time rather
 than UTC.
 .It Dv bestring16
 A two-byte unicode (UCS16) string in big-endian byte order.
+.It Dv leid3
+A 32-bit ID3 length in little-endian byte order.
 .It Dv leshort
 A two-byte value in little-endian byte order.
 .It Dv lelong
@@ -145,6 +149,8 @@ interpreted as a UNIX date.
 A four-byte value in middle-endian (PDP-11) byte order,
 interpreted as a UNIX-style date, but interpreted as local time rather
 than UTC.
+.It Dv indirect
+Starting at the given offset, consult the magic database again.
 .It Dv regex
 A regular expression match in extended POSIX regular expression syntax
 (like egrep). Regular expressions can take exponential time to
@@ -290,6 +296,11 @@ added before it: multiple matches are normally separated by a single
 space.
 .El
 .Pp
+An APPLE 4+4 character APPLE creator and type can be specified as:
+.Bd -literal -offset indent 
+!:apple	CREATYPE
+.Ed
+.Pp
 A MIME type is given on a separate line, which must be the next
 non-blank or comment line after the magic line that identifies the
 file type, and has the following format:
@@ -361,12 +372,12 @@ the file.
 The value at that offset is read, and is used again as an offset
 in the file.
 Indirect offsets are of the form:
-.Em (( x [.[bslBSL]][+\-][ y ]) .
+.Em (( x [.[bislBISL]][+\-][ y ]) .
 The value of
 .Em x
 is used as an offset in the file.
-A byte, short or long is read at that offset depending on the
-.Em [bslBSLm]
+A byte, id3 length, short or long is read at that offset depending on the
+.Em [bislBISLm]
 type specifier.
 The capitalized types interpret the number as a big endian
 value, whereas the small letter versions interpret the number as a little

+ 23 - 0
magic/Magdir/animation

@@ -325,6 +325,7 @@
 
 # MP2, M1A
 0       beshort&0xFFFE  0xFFFC         MPEG ADTS, layer II, v1
+!:mime	audio/mpeg
 # rates
 >2      byte&0xF0       0x10           \b,  32 kbps
 >2      byte&0xF0       0x20           \b,  48 kbps
@@ -399,6 +400,7 @@
 
 # MP3, M2A
 0       beshort&0xFFFE  0xFFF2         MPEG ADTS, layer III, v2
+!:mime	audio/mpeg
 # rate
 >2      byte&0xF0       0x10           \b,   8 kbps
 >2      byte&0xF0       0x20           \b,  16 kbps
@@ -790,3 +792,24 @@
 0       belong             0x00000001
 >4      byte&0x1F	   0x07
 !:mime	video/h264
+
+# Type: Bink Video
+# URL:  http://wiki.multimedia.cx/index.php?title=3DBink_Container
+# From: <hoehle@users.sourceforge.net>  2008-07-18
+0	string		BIK	Bink Video
+>3	regex		=[a-z]	rev.%s
+#>4	ulelong		x	size %d
+>20	ulelong		x	\b, %d
+>24	ulelong		x	\bx%d
+>8	ulelong		x	\b, %d frames
+>32	ulelong		x	at rate %d/
+>28	ulelong		>1	\b%d
+>40	ulelong		=0	\b, no audio
+>40	ulelong		!0	\b, %d audio track
+>>40	ulelong		!1	\bs
+# follow properties of the first audio track only
+>>48	uleshort	x	%dHz
+>>51	byte&0x20	0	mono
+>>51	byte&0x20	!0	stereo
+#>>51	byte&0x10	0	FFT
+#>>51	byte&0x10	!0	DCT

+ 8 - 37
magic/Magdir/audio

@@ -286,43 +286,14 @@
 # SGI SoundTrack <mpruett@sgi.com>
 0	string		_SGI_SoundTrack		SGI SoundTrack project file
 # ID3 version 2 tags <waschk@informatik.uni-rostock.de>
-0	string		ID3	Audio file with ID3 version 2.
-# ??? Normally such a file is an MP3 file, but this will give false positives
-!:mime	audio/mpeg
->3	ubyte	<0xff	\b%d
-#>4	ubyte	<0xff	\b%d tag
->2584	string	fLaC		\b, FLAC encoding
->>2588	byte&0x7f		>0		\b, unknown version
->>2588	byte&0x7f		0		\b
-# some common bits/sample values
->>>2600	beshort&0x1f0		0x030		\b, 4 bit
->>>2600	beshort&0x1f0		0x050		\b, 6 bit
->>>2600	beshort&0x1f0		0x070		\b, 8 bit
->>>2600	beshort&0x1f0		0x0b0		\b, 12 bit
->>>2600	beshort&0x1f0		0x0f0		\b, 16 bit
->>>2600	beshort&0x1f0		0x170		\b, 24 bit
->>>2600	byte&0xe		0x0		\b, mono
->>>2600	byte&0xe		0x2		\b, stereo
->>>2600	byte&0xe		0x4		\b, 3 channels
->>>2600	byte&0xe		0x6		\b, 4 channels
->>>2600	byte&0xe		0x8		\b, 5 channels
->>>2600	byte&0xe		0xa		\b, 6 channels
->>>2600	byte&0xe		0xc		\b, 7 channels
->>>2600	byte&0xe		0xe		\b, 8 channels
-# some common sample rates
->>>2597	belong&0xfffff0		0x0ac440	\b, 44.1 kHz
->>>2597	belong&0xfffff0		0x0bb800	\b, 48 kHz
->>>2597	belong&0xfffff0		0x07d000	\b, 32 kHz
->>>2597	belong&0xfffff0		0x056220	\b, 22.05 kHz
->>>2597	belong&0xfffff0		0x05dc00	\b, 24 kHz
->>>2597	belong&0xfffff0		0x03e800	\b, 16 kHz
->>>2597	belong&0xfffff0		0x02b110	\b, 11.025 kHz
->>>2597	belong&0xfffff0		0x02ee00	\b, 12 kHz
->>>2597	belong&0xfffff0		0x01f400	\b, 8 kHz
->>>2597	belong&0xfffff0		0x177000	\b, 96 kHz
->>>2597	belong&0xfffff0		0x0fa000	\b, 64 kHz
->>>2601	byte&0xf		>0		\b, >4G samples
->2584	string	!fLaC		\b, MP3 encoding
+0	string		ID3	Audio file with ID3 version 2
+>3	byte		x	\b.%d
+>4	byte		x	\b.%d
+>>5	byte		&0x80	\b, unsynchronized frames
+>>5	byte		&0x40	\b, extended header
+>>5	byte		&0x20	\b, experimental
+>>5	byte		&0x10	\b, footer present
+>(6.I)	indirect	x	\b, contains: 
 
 # NSF (NES sound file) magic
 0	string		NESM\x1a	NES Sound File

+ 11 - 9
magic/Magdir/cafebabe

@@ -12,16 +12,18 @@
 # (and use as a hack). Let's not use 18, because the Mach-O people
 # might add another one or two as time goes by...
 #
-0	beshort		0xcafe
->2	beshort         0xbabe
+0	belong		0xcafebabe
 !:mime	application/x-java-applet
->>2	belong		>30		compiled Java class data,
->>>6	beshort		x	        version %d.
->>>4	beshort		x       	\b%d
->>4	belong		1		Mach-O fat file with 1 architecture
->>4	belong		>1
->>>4	belong		<20		Mach-O fat file with %ld architectures
->2	beshort		0xd00d		JAR compressed with pack200,
+>4	belong		>30		compiled Java class data,
+>>6	beshort		x	        version %d.
+>>4	beshort		x       	\b%d
+
+0	belong		0xcafebabe
+>4	belong		1		Mach-O fat file with 1 architecture
+>4	belong		>1
+>>4	belong		<20		Mach-O fat file with %ld architectures
+
+0	belong		0xcafed00d	JAR compressed with pack200,
 >>5	byte		x		version %d.
 >>4	byte		x		\b%d
 !:mime	application/x-java-pack200

+ 6 - 0
magic/Magdir/compress

@@ -11,6 +11,7 @@
 # standard unix compress
 0	string		\037\235	compress'd data
 !:mime	application/x-compress
+!:apple	LZIVZIVU
 >2	byte&0x80	>0		block compressed
 >2	byte&0x1f	x		%d bits
 
@@ -76,6 +77,11 @@
 !:mime	application/x-bzip2
 >3	byte		>47		\b, block size = %c00k
 
+# lzip
+0	string		LZIP		lzip compressed data
+!:mime application/x-lzip
+>4	byte		x		\b, version: %d
+
 # squeeze and crunch
 # Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
 0	beshort		0x76FF		squeezed data,

+ 4 - 2
magic/Magdir/elf

@@ -143,11 +143,13 @@
 >>18	leshort		91		picoJava,
 >>18	leshort		92		OpenRISC,
 >>18	leshort		93		ARC Cores Tangent-A5,
->>18	leshort		0x3426		OpenRISC (obsolete),
->>18	leshort		0x8472		OpenRISC (obsolete),
 >>18	leshort		94		Tensilica Xtensa,
 >>18	leshort		97		NatSemi 32k,
 >>18	leshort		106		Analog Devices Blackfin,
+>>18	leshort		113		Altera Nios II,
+>>18	leshort		0xae		META,
+>>18	leshort		0x3426		OpenRISC (obsolete),
+>>18	leshort		0x8472		OpenRISC (obsolete),
 >>18	leshort		0x9026		Alpha (unofficial),
 >>20	lelong		0		invalid version
 >>20	lelong		1		version 1

+ 8 - 7
magic/Magdir/epoc

@@ -1,10 +1,11 @@
-
 #------------------------------------------------------------------------------
-# Epoc 32 : file(1) magic for Epoc Documents [psion/osaris
+# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
 # Stefan Praszalowicz (hpicollo@worldnet.fr)
-#0	lelong		0x10000037	Epoc32
+# Useful information for improving this file can be found at:
+# http://software.frodo.looijaard.name/psiconv/formats/Index.html
+0	lelong		0x10000037
 >4	lelong		0x1000006D
->>8	lelong		0x1000007F	Word
->>8	lelong		0x10000088	Sheet
->>8	lelong		0x1000007D	Sketch
->>8	lelong		0x10000085	TextEd
+>>8	lelong		0x1000007F	Psion Word
+>>8	lelong		0x10000088	Psion Sheet
+>>8	lelong		0x1000007D	Psion Sketch
+>>8	lelong		0x10000085	Psion TextEd

+ 169 - 104
magic/Magdir/filesystems

@@ -104,9 +104,32 @@
 >>>346	string	des\ Betriebssystems	
 >>>>366	string	Betriebssystem\ nicht\ vorhanden	\b, Microsoft Windows XP MBR (german)
 >>>>>0x1B8	ulelong	>0				\b, Serial 0x%-.4x
->0x145	string	Default:\ F				\b, FREE-DOS MBR
+#>0x145	string	Default:\ F				\b, FREE-DOS MBR
+#>0x14B	string	Default:\ F				\b, FREE-DOS 1.0 MBR
+>0x145	search/7	Default:\ F			\b, FREE-DOS MBR
+#>>313		string	F0\ .\ .\ .			
+#>>>322		string	disk\ 1				
+#>>>>382	string	FAT3				
 >64	string	no\ active\ partition\ found	
 >>96	string	read\ error\ while\ reading\ drive	\b, FREE-DOS Beta 0.9 MBR
+# Ranish Partition Manager http://www.ranish.com/part/
+>387	search/4	\0\ Error!\r			
+>>378	search/7	Virus! 				
+>>>397	search/4	Booting\ 			
+>>>>408	search/4	HD1/\0	 			\b, Ranish MBR (
+>>>>>416	string	Writing\ changes...		\b2.37
+>>>>>>438	ubyte		x			\b,0x%x dots
+>>>>>>440	ubyte		>0			\b,virus check
+>>>>>>441	ubyte		>0			\b,partition %c
+#2.38,2.42,2.44
+>>>>>416	string	!Writing\ changes...		\b
+>>>>>>418	ubyte	1				\bvirus check,
+>>>>>>419	ubyte	x				\b0x%x seconds
+>>>>>>420	ubyte&0x0F	>0			\b,partition
+>>>>>>>420	ubyte&0x0F	<5			\b %x
+>>>>>>>420	ubyte&0x0F	0Xf			\b ask
+>>>>>420	ubyte		x			\b)
+#
 >271	string	Operating\ system\ loading 		
 >>296	string	error\r					\b, SYSLINUX MBR (2.10)
 # http://www.acronis.de/
@@ -124,18 +147,20 @@
 >0x40	string	SBML				
 # label with 11 characters of FAT 12 bit filesystem
 >>43	string	SMART\ BTMGR			
->>>430	string	SBMK\ Bad!\r			
->>>>3	string	SBM				\b, Smart Boot Manager
->>>>>6	string	>\0                             \b, version %s
+>>>430	string	SBMK\ Bad!\r			\b, Smart Boot Manager
+# OEM-ID not always "SBM"
+#>>>>3	strings	SBM				
+>>>>6	string	>\0                             \b, version %s
 >382	string	XOSLLOADXCF			\b, eXtended Operating System Loader
 >6	string	LILO				\b, LInux i386 boot LOader
 >>120	string	LILO				\b, version 22.3.4 SuSe
 >>172	string	LILO				\b, version 22.5.8 Debian
-# updated by Joerg Jenderek
+# updated by Joerg Jenderek at Oct 2008
 # variables according to grub-0.97/stage1/stage1.S or
 # http://www.gnu.org/software/grub/manual/grub.html#Embedded-data
 # usual values are marked with comments to get only informations of strange GRUB loaders
->0		ulelong		0x009048EB	
+>342		search/60	\0Geom\0	
+#>0		ulelong		x		%x=0x009048EB ,	0x2a9048EB  0
 >>0x41		ubyte		<2		
 >>>0x3E		ubyte		>2		\b; GRand Unified Bootloader
 # 0x3 for 0.5.95,0.93,0.94,0.96 0x4 for 1.90 
@@ -178,15 +203,14 @@
 >3	string	BCDL				
 >>498	string	BCDL\ \ \ \ BIN			\b, Bootable CD Loader (1.50Z)
 # mbr partion table entries
-# OEM-ID not Microsoft,SYSLINUX,or MTOOLs
+# OEM-ID does not contain MicroSoft,NEWLDR,DOS,SYSLINUX,or MTOOLs
 >3			string		!MS
 >>3			string		!SYSLINUX
 >>>3			string		!MTOOL
+>>>>3			string		!NEWLDR
+>>>>>5			string		!DOS
 # not FAT (32 bit)
->>>>82			string		!FAT32	
-#not IO.SYS
->>>>>472		string		!IO\ \ \ \ \ \ SYS
->>>>>>480		string		!IO\ \ \ \ \ \ SYS
+>>>>>>82		string		!FAT32
 #not Linux kernel
 >>>>>>>514		string		!HdrS
 #not BeOS
@@ -272,6 +296,11 @@
 >>>>>>>>>(1.b+11)	ubyte	0xb			
 >>>>>>>>>>(1.b+12)	ubyte	0x56			
 >>>>>>>>>>(1.b+13)	ubyte	0xb4			\b, mkdosfs boot message display
+>214	string	Please\ try\ to\ install\ FreeDOS\ 	\b, DOS Emulator boot message display
+#>>244	string	from\ dosemu-freedos-*-bin.tgz\r	
+#>>>170	string	Sorry,\ could\ not\ load\ an\ 		
+#>>>>195	string	operating\ system.\r\n		
+#
 >103	string	This\ is\ not\ a\ bootable\ disk.\ 	
 >>132	string	Please\ insert\ a\ bootable\ 		
 >>>157	string	floppy\ and\r\n				
@@ -374,12 +403,22 @@
 >430	string	Datentr\204ger\ entfernen\xFF\r\n	
 >>454	string	Medienfehler\xFF\r\n			
 >>>469	string	Neustart:\ Taste\ dr\201cken\r		\b, Microsoft Windows XP Bootloader (4.german)
->>>>368		ubyte&0xDF	>0			
->>>>>368	string		x 			%-.5s
->>>>>>373	ubyte&0xDF	>0			
->>>>>>>373	string		x 			\b%-.3s
->>>>>376	ubyte&0xDF	>0			
->>>>>>376	string		x 			\b.%-.3s
+>>>>379		string		\0			
+>>>>>368	ubyte&0xDF	>0			
+>>>>>>368	string		x 			%-.5s
+>>>>>>>373	ubyte&0xDF	>0			
+>>>>>>>>373	string		x 			\b%-.3s
+>>>>>>376	ubyte&0xDF	>0			
+>>>>>>>376	string		x 			\b.%-.3s
+# variant
+>>>>417		ubyte&0xDF	>0			
+>>>>>417	string		x			%-.5s
+>>>>>>422	ubyte&0xDF	>0			
+>>>>>>>422	string		x 			\b%-.3s
+>>>>>425	ubyte&0xDF	>0			
+>>>>>>425	string		>\ 			\b.%-.3s
+#
+
 #>3	string	NTFS\ \ \ \ 				
 >389	string	Fehler\ beim\ Lesen\ 
 >>407	string	des\ Datentr\204gers
@@ -567,12 +606,27 @@
 >>>489	string	Any\ key\ to\ retry			\b, DR-DOS Bootloader
 >>471	string	Cannot\ load\ DOS\ 			
 >>487	string	press\ key\ to\ retry			\b, Open-DOS Bootloader
+#??
 >444	string	KERNEL\ \ SYS					
 >>314	string	BOOT\ error!				\b, FREE-DOS Bootloader
 >499	string	KERNEL\ \ SYS				
 >>305	string	BOOT\ err!\0				\b, Free-DOS Bootloader
 >449	string	KERNEL\ \ SYS				
 >>319	string	BOOT\ error!				\b, FREE-DOS 0.5 Bootloader
+#
+>449	string	Loading\ FreeDOS			
+>>0x1AF		ulelong		>0			\b, FREE-DOS 0.95,1.0 Bootloader
+>>>497		ubyte&0xDF	>0			
+>>>>497		string		x 			\b %-.6s
+>>>>>503	ubyte&0xDF	>0			
+>>>>>>503	string		x 			\b%-.1s
+>>>>>>>504	ubyte&0xDF	>0			
+>>>>>>>>504	string		x 			\b%-.1s
+>>>>505		ubyte&0xDF	>0			
+>>>>>505	string		x 			\b.%-.3s
+#
+>331	string	Error!.0				\b, FREE-DOS 1.0 bootloader
+#
 >125	string	Loading\ FreeDOS...\r			
 >>311	string	BOOT\ error!\r				\b, FREE-DOS bootloader
 >>>441		ubyte&0xDF	>0			
@@ -706,13 +760,7 @@
 #it also hangs with another message ("NF").
 >>>>>492		string		RENF		\b, FAT (12 bit)
 >>>>>495		string		RENF		\b, FAT (16 bit)
-# added by Joerg Jenderek
-# http://syslinux.zytor.com/iso.php
-0	ulelong	0x7c40eafa		isolinux Loader
-# http://syslinux.zytor.com/pxe.php
-0	ulelong	0x007c05ea		pxelinux Loader
-0	ulelong	0x60669c66		pxelinux Loader
-# loader end
+# x86 bootloader end
 # updated by Joerg Jenderek at Sep 2007
 >3	ubyte	0			
 #no active flag
@@ -732,6 +780,7 @@
 # older drives may use Near JuMP instruction E9 xx xx
 >0		lelong&0x009000EB	0x009000EB 
 >0		lelong&0x000000E9	0x000000E9 
+# minimal short forward jump found 03cx??
 # maximal short forward jump is 07fx
 >1		ubyte			<0xff	\b, code offset 0x%x
 # mtools-3.9.8/msdos.h
@@ -740,91 +789,92 @@
 >>11		uleshort&0x000f	x		
 >>>11		uleshort	<32769		
 >>>>11		uleshort	>31		
->>>>>3		string		>\0		\b, OEM-ID "%8.8s"
+>>>>>21		ubyte&0xf0	0xF0		
+>>>>>>3		string		>\0		\b, OEM-ID "%8.8s"
 #http://mirror.href.com/thestarman/asm/debug/debug2.htm#IHC
->>>>>>8		string		IHC		\b cached by Windows 9M
->>>>>11		uleshort	>512		\b, Bytes/sector %u
-#>>>>>11	uleshort	=512		\b, Bytes/sector %u=512 (usual)
->>>>>11		uleshort	<512		\b, Bytes/sector %u
->>>>>13		ubyte		>1		\b, sectors/cluster %u
-#>>>>>13	ubyte		=1		\b, sectors/cluster %u (usual on Floppies)
->>>>>14		uleshort	>32		\b, reserved sectors %u
-#>>>>>14	uleshort	=32		\b, reserved sectors %u (usual Fat32)
-#>>>>>14	uleshort	>1		\b, reserved sectors %u
-#>>>>>14	uleshort	=1		\b, reserved sectors %u (usual FAT12,FAT16)
->>>>>14		uleshort	<1		\b, reserved sectors %u
->>>>>16		ubyte		>2		\b, FATs %u
-#>>>>>16	ubyte		=2		\b, FATs %u (usual)
->>>>>16		ubyte		=1		\b, FAT  %u
->>>>>16		ubyte		>0
->>>>>17		uleshort	>0		\b, root entries %u
-#>>>>>17	uleshort	=0		\b, root entries %u=0 (usual Fat32)
->>>>>19		uleshort	>0		\b, sectors %u (volumes <=32 MB) 
-#>>>>>19	uleshort	=0		\b, sectors %u=0 (usual Fat32)
->>>>>21		ubyte		>0xF0		\b, Media descriptor 0x%x
-#>>>>>21	ubyte		=0xF0		\b, Media descriptor 0x%x (usual floppy)
->>>>>21		ubyte		<0xF0		\b, Media descriptor 0x%x
->>>>>22		uleshort	>0		\b, sectors/FAT %u
-#>>>>>22	uleshort	=0		\b, sectors/FAT %u=0 (usual Fat32)
->>>>>26		ubyte		>2		\b, heads %u
-#>>>>>26	ubyte		=2		\b, heads %u (usual floppy)
->>>>>26		ubyte		=1		\b, heads %u
+>>>>>>>8	string		IHC		\b cached by Windows 9M
+>>>>>>11	uleshort	>512		\b, Bytes/sector %u
+#>>>>>>11	uleshort	=512		\b, Bytes/sector %u=512 (usual)
+>>>>>>11	uleshort	<512		\b, Bytes/sector %u
+>>>>>>13	ubyte		>1		\b, sectors/cluster %u
+#>>>>>>13	ubyte		=1		\b, sectors/cluster %u (usual on Floppies)
+>>>>>>14	uleshort	>32		\b, reserved sectors %u
+#>>>>>>14	uleshort	=32		\b, reserved sectors %u (usual Fat32)
+#>>>>>>14	uleshort	>1		\b, reserved sectors %u
+#>>>>>>14	uleshort	=1		\b, reserved sectors %u (usual FAT12,FAT16)
+>>>>>>14	uleshort	<1		\b, reserved sectors %u
+>>>>>>16	ubyte		>2		\b, FATs %u
+#>>>>>>16	ubyte		=2		\b, FATs %u (usual)
+>>>>>>16	ubyte		=1		\b, FAT  %u
+>>>>>>16	ubyte		>0
+>>>>>>17	uleshort	>0		\b, root entries %u
+#>>>>>>17	uleshort	=0		\b, root entries %u=0 (usual Fat32)
+>>>>>>19	uleshort	>0		\b, sectors %u (volumes <=32 MB) 
+#>>>>>>19	uleshort	=0		\b, sectors %u=0 (usual Fat32)
+>>>>>>21	ubyte		>0xF0		\b, Media descriptor 0x%x
+#>>>>>>21	ubyte		=0xF0		\b, Media descriptor 0x%x (usual floppy)
+>>>>>>21	ubyte		<0xF0		\b, Media descriptor 0x%x
+>>>>>>22	uleshort	>0		\b, sectors/FAT %u
+#>>>>>>22	uleshort	=0		\b, sectors/FAT %u=0 (usual Fat32)
+>>>>>>26	ubyte		>2		\b, heads %u
+#>>>>>>26	ubyte		=2		\b, heads %u (usual floppy)
+>>>>>>26	ubyte		=1		\b, heads %u
 #skip for Digital Research DOS (version 3.41) 1440 kB Bootdisk
->>>>>38		ubyte		!0x70		
->>>>>>28	ulelong		>0		\b, hidden sectors %u
-#>>>>>>28	ulelong		=0		\b, hidden sectors %u (usual floppy)
->>>>>>32	ulelong		>0		\b, sectors %u (volumes > 32 MB) 
-#>>>>>>32	ulelong		=0		\b, sectors %u (volumes > 32 MB)
+>>>>>>38	ubyte		!0x70		
+>>>>>>>28	ulelong		>0		\b, hidden sectors %u
+#>>>>>>>28	ulelong		=0		\b, hidden sectors %u (usual floppy)
+>>>>>>>32	ulelong		>0		\b, sectors %u (volumes > 32 MB) 
+#>>>>>>>32	ulelong		=0		\b, sectors %u (volumes > 32 MB)
 # FAT<32 specific 
->>>>>82		string		!FAT32
-#>>>>>>36	ubyte		0x80		\b, physical drive 0x%x=0x80 (usual harddisk)
-#>>>>>>36	ubyte		0		\b, physical drive 0x%x=0 (usual floppy)
->>>>>>36	ubyte		!0x80		
->>>>>>>36	ubyte		!0		\b, physical drive 0x%x
->>>>>>37	ubyte		>0		\b, reserved 0x%x
-#>>>>>>37	ubyte		=0		\b, reserved 0x%x
->>>>>>38	ubyte		>0x29		\b, dos < 4.0 BootSector (0x%x)
->>>>>>38	ubyte		<0x29		\b, dos < 4.0 BootSector (0x%x)
->>>>>>38	ubyte		=0x29
->>>>>>>39	ulelong		x		\b, serial number 0x%x
->>>>>>>43	string		<NO\ NAME	\b, label: "%11.11s"
->>>>>>>43	string		>NO\ NAME	\b, label: "%11.11s"
->>>>>>>43	string		=NO\ NAME	\b, unlabeled
->>>>>>54	string		FAT		\b, FAT
->>>>>>>54	string		FAT12		\b (12 bit)
->>>>>>>54	string		FAT16		\b (16 bit)
+>>>>>>82	string		!FAT32
+#>>>>>>>36	ubyte		0x80		\b, physical drive 0x%x=0x80 (usual harddisk)
+#>>>>>>>36	ubyte		0		\b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>36	ubyte		!0x80		
+>>>>>>>>36	ubyte		!0		\b, physical drive 0x%x
+>>>>>>>37	ubyte		>0		\b, reserved 0x%x
+#>>>>>>>37	ubyte		=0		\b, reserved 0x%x
+>>>>>>>38	ubyte		>0x29		\b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38	ubyte		<0x29		\b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38	ubyte		=0x29
+>>>>>>>>39	ulelong		x		\b, serial number 0x%x
+>>>>>>>>43	string		<NO\ NAME	\b, label: "%11.11s"
+>>>>>>>>43	string		>NO\ NAME	\b, label: "%11.11s"
+>>>>>>>>43	string		=NO\ NAME	\b, unlabeled
+>>>>>>>54	string		FAT		\b, FAT
+>>>>>>>>54	string		FAT12		\b (12 bit)
+>>>>>>>>54	string		FAT16		\b (16 bit)
 # FAT32 specific
->>>>>82		string		FAT32		\b, FAT (32 bit)
->>>>>>36	ulelong		x		\b, sectors/FAT %u
->>>>>>40	uleshort	>0		\b, extension flags %u
-#>>>>>>40	uleshort	=0		\b, extension flags %u
->>>>>>42	uleshort	>0		\b, fsVersion %u
-#>>>>>>42	uleshort	=0		\b, fsVersion %u (usual)
->>>>>>44	ulelong		>2		\b, rootdir cluster %u
-#>>>>>>44	ulelong		=2		\b, rootdir cluster %u
-#>>>>>>44	ulelong		=1		\b, rootdir cluster %u
->>>>>>48	uleshort	>1		\b, infoSector %u
-#>>>>>>48	uleshort	=1		\b, infoSector %u (usual)
->>>>>>48	uleshort	<1		\b, infoSector %u
->>>>>>50	uleshort	>6		\b, Backup boot sector %u
-#>>>>>>50	uleshort	=6		\b, Backup boot sector %u (usual) 
->>>>>>50	uleshort	<6		\b, Backup boot sector %u
->>>>>>54	ulelong		>0		\b, reserved1 0x%x
->>>>>>58	ulelong		>0		\b, reserved2 0x%x
->>>>>>62	ulelong		>0		\b, reserved3 0x%x
+>>>>>>82	string		FAT32		\b, FAT (32 bit)
+>>>>>>>36	ulelong		x		\b, sectors/FAT %u
+>>>>>>>40	uleshort	>0		\b, extension flags %u
+#>>>>>>>40	uleshort	=0		\b, extension flags %u
+>>>>>>>42	uleshort	>0		\b, fsVersion %u
+#>>>>>>>42	uleshort	=0		\b, fsVersion %u (usual)
+>>>>>>>44	ulelong		>2		\b, rootdir cluster %u
+#>>>>>>>44	ulelong		=2		\b, rootdir cluster %u
+#>>>>>>>44	ulelong		=1		\b, rootdir cluster %u
+>>>>>>>48	uleshort	>1		\b, infoSector %u
+#>>>>>>>48	uleshort	=1		\b, infoSector %u (usual)
+>>>>>>>48	uleshort	<1		\b, infoSector %u
+>>>>>>>50	uleshort	>6		\b, Backup boot sector %u
+#>>>>>>>50	uleshort	=6		\b, Backup boot sector %u (usual) 
+>>>>>>>50	uleshort	<6		\b, Backup boot sector %u
+>>>>>>>54	ulelong		>0		\b, reserved1 0x%x
+>>>>>>>58	ulelong		>0		\b, reserved2 0x%x
+>>>>>>>62	ulelong		>0		\b, reserved3 0x%x
 # same structure as FAT1X 
->>>>>>64	ubyte		>0x80		\b, physical drive 0x%x
-#>>>>>>64	ubyte		=0x80		\b, physical drive 0x%x=80 (usual harddisk)
->>>>>>64	ubyte&0x7F	>0		\b, physical drive 0x%x
-#>>>>>>64	ubyte		=0		\b, physical drive 0x%x=0 (usual floppy)
->>>>>>65	ubyte		>0		\b, reserved 0x%x
->>>>>>66	ubyte		>0x29		\b, dos < 4.0 BootSector (0x%x)
->>>>>>66	ubyte		<0x29		\b, dos < 4.0 BootSector (0x%x)
->>>>>>66	ubyte		=0x29
->>>>>>>67	ulelong		x		\b, serial number 0x%x
->>>>>>>71	string		<NO\ NAME	\b, label: "%11.11s"
->>>>>>71	string		>NO\ NAME	\b, label: "%11.11s"
->>>>>>71	string		=NO\ NAME	\b, unlabeled
+>>>>>>>64	ubyte		>0x80		\b, physical drive 0x%x
+#>>>>>>>64	ubyte		=0x80		\b, physical drive 0x%x=80 (usual harddisk)
+>>>>>>>64	ubyte&0x7F	>0		\b, physical drive 0x%x
+#>>>>>>>64	ubyte		=0		\b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>65	ubyte		>0		\b, reserved 0x%x
+>>>>>>>66	ubyte		>0x29		\b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66	ubyte		<0x29		\b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66	ubyte		=0x29
+>>>>>>>>67	ulelong		x		\b, serial number 0x%x
+>>>>>>>>71	string		<NO\ NAME	\b, label: "%11.11s"
+>>>>>>>71	string		>NO\ NAME	\b, label: "%11.11s"
+>>>>>>>71	string		=NO\ NAME	\b, unlabeled
 ### FATs end
 >0x200	lelong	0x82564557		\b, BSD disklabel
 # FATX 
@@ -854,6 +904,13 @@
 
 0x18b	string	OS/2	OS/2 Boot Manager
 
+# updated by Joerg Jenderek at Oct 2008!!
+# http://syslinux.zytor.com/iso.php
+0	ulelong	0x7c40eafa		isolinux Loader
+# http://syslinux.zytor.com/pxe.php
+0	ulelong	0x007c05ea		pxelinux Loader
+0	ulelong	0x60669c66		pxelinux Loader
+
 # added by Joerg Jenderek
 # In the second sector (+0x200) are variables according to grub-0.97/stage2/asm.S or
 # grub-1.94/kern/i386/pc/startup.S
@@ -1324,6 +1381,14 @@
 >0x10024        belong          x               (blocksize %d,
 >0x10060        string          >\0             lockproto %s)
 
+# BTRFS
+0x10040         string          _BHRfS_M        BTRFS Filesystem
+>0x1012b        string          >\0             (label "%s",
+>0x10090        lelong          x               sectorsize %d,
+>0x10094        lelong          x               nodesize %d,
+>0x10098        lelong          x               leafsize %d)
+
+
 # dvdisaster's .ecc
 # From: "Nelson A. de Oliveira" <naoliv@gmail.com>
 0	string	*dvdisaster*	dvdisaster error correction file

+ 8 - 5
magic/Magdir/graphviz

@@ -1,7 +1,10 @@
-
 #------------------------------------------------------------------------------
 # graphviz:  file(1) magic for http://www.graphviz.org/
-0	regex/100	[\r\n\t\ ]*graph[\r\n\t\ ]*.*\\{	graphviz graph text
-!:mime	text/vnd.graphviz
-0	regex/100	[\r\n\t\ ]*digraph[\r\n\t\ ]*.*\\{	graphviz digraph text
-!:mime	text/vnd.graphviz
+
+# FIXME: These patterns match too generally. For example, the first
+# line matches a LaTeX file containing the word "graph" (with a {
+# following later) and the second line matches this file.
+#0	regex/100	[\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{	graphviz graph text
+#!:mime	text/vnd.graphviz
+#0	regex/100	[\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{	graphviz digraph text
+#!:mime	text/vnd.graphviz

+ 5 - 0
magic/Magdir/images

@@ -110,6 +110,7 @@
 # GIF
 0	string		GIF8		GIF image data
 !:mime	image/gif
+!:apple	8BIMGIFf
 >4	string		7a		\b, version 8%s,
 >4	string		9a		\b, version 8%s,
 >6	leshort		>0		%hd x
@@ -600,3 +601,7 @@
 # Wavelet Scalar Quantization format used in gray-scale fingerprint images
 # From Tano M Fotang <mfotang@quanteq.com>
 0	string	\xff\xa0\xff\xa8\x00	Wavelet Scalar Quantization image data
+
+# JPEG 2000 Code Stream Bitmap
+# From Petr Splichal <psplicha@redhat.com>
+0	string	\xFF\x4F\xFF\x51\x00	JPEG-2000 Code Stream Bitmap data

+ 1 - 0
magic/Magdir/jpeg

@@ -10,6 +10,7 @@
 #
 0	beshort		0xffd8		JPEG image data
 !:mime	image/jpeg
+!:apple	8BIMJPEG
 !:strength +1
 >6	string		JFIF		\b, JFIF standard
 # The following added by Erik Rossen <rossen@freesurf.ch> 1999-09-06

+ 1 - 1
magic/Magdir/mach

@@ -4,7 +4,7 @@
 # Java ByteCode, so they are both handled in the file "cafebabe".
 # The "feedface" ones are handled herein.
 #------------------------------------------------------------
-0	lelong&0xfeffffff	0xfeedface	Mach-O
+0	lelong&0xfffffffe	0xfeedface	Mach-O
 >0	byte		0xcf		64-bit
 >12	lelong		1		object
 >12	lelong		2		executable

+ 3 - 0
magic/Magdir/macintosh

@@ -11,6 +11,8 @@
 # Stuffit archives are the de facto standard of compression for Macintosh
 # files obtained from most archives. (franklsm@tuns.ca)
 0	string		SIT!			StuffIt Archive (data)
+!:mime	application/x-stuffit
+!:apple	SIT!SIT!
 >2	string		x			: %s
 0	string		SITD			StuffIt Deluxe (data)
 >2	string		x			: %s
@@ -20,6 +22,7 @@
 # Newer StuffIt archives (grant@netbsd.org)
 0	string		StuffIt			StuffIt Archive
 !:mime	application/x-stuffit
+!:apple	SIT!SIT!
 #>162	string		>0			: %s
 
 # Macintosh Applications and Installation binaries (franklsm@tuns.ca)

+ 32 - 17
magic/Magdir/msdos

@@ -4,15 +4,15 @@
 #
 
 # .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
-# updated by Joerg Jenderek
+# updated by Joerg Jenderek at Oct 2008
 0	string	@			
->1	string/cB	\ echo\ off	MS-DOS batch file text
+>1	string/cB	\ echo\ off	DOS batch file text
 !:mime	text/x-msdos-batch
->1	string/cB	echo\ off	MS-DOS batch file text
+>1	string/cB	echo\ off	DOS batch file text
 !:mime	text/x-msdos-batch
->1	string/cB	rem\ 		MS-DOS batch file text
+>1	string/cB	rem\ 		DOS batch file text
 !:mime	text/x-msdos-batch
->1	string/cB	set\ 		MS-DOS batch file text
+>1	string/cB	set\ 		DOS batch file text
 !:mime	text/x-msdos-batch
 
 
@@ -285,8 +285,9 @@
 # Uncommenting only the first two lines will cover about 2/3 of COM files,
 # but it isn't feasible to match all COM files since there must be at least
 # two dozen different one-byte "magics".
-#0	byte		0xe9		DOS executable (COM)
-#>0x1FE leshort		0xAA55		\b, boot code
+# test too generic ?
+0	byte		0xe9		DOS executable (COM)
+>0x1FE leshort		0xAA55		\b, boot code
 >6	string		SFX\ of\ LHarc	(%s)
 0	belong	0xffffffff		DOS executable (device driver)
 #CMD640X2.SYS
@@ -309,25 +310,38 @@
 >>77	string	>\x40			
 >>>77	string	<\x5B			
 >>>>77	string	x			\b, name: %.8s
-#0	byte		0x8c		DOS executable (COM)
-# 0xeb conflicts with "sequent" magic
-#0	byte		0xeb		DOS executable (COM)
-#>0x1FE leshort		0xAA55		\b, boot code
-#>85	string		UPX		\b, UPX compressed
-#>4	string		\ $ARX		\b, ARX self-extracting archive
-#>4	string		\ $LHarc	\b, LHarc self-extracting archive
-#>0x20e string		SFX\ by\ LARC	\b, LARC self-extracting archive
+# test too generic ?
+0	byte		0x8c		DOS executable (COM)
+# updated by Joerg Jenderek at Oct 2008
+0	ulelong		0xffff10eb	DR-DOS executable (COM)
+# byte 0xeb conflicts with "sequent" magic leshort 0xn2eb
+0	ubeshort&0xeb8d	>0xeb00		
+# DR-DOS STACKER.COM SCREATE.SYS missed
+>0	byte		0xeb		DOS executable (COM)
+>>0x1FE leshort		0xAA55		\b, boot code
+>>85	string		UPX		\b, UPX compressed
+>>4	string		\ $ARX		\b, ARX self-extracting archive
+>>4	string		\ $LHarc	\b, LHarc self-extracting archive
+>>0x20e string		SFX\ by\ LARC	\b, LARC self-extracting archive
+# updated by Joerg Jenderek at Oct 2008
 #0	byte		0xb8		COM executable
+0	uleshort&0x80ff	0x00b8		
 # modified by Joerg Jenderek
->1	lelong		!0x21cd4cff	for DOS
+>1	lelong		!0x21cd4cff	COM executable for DOS
 # http://syslinux.zytor.com/comboot.php
 # (32-bit COMBOOT) programs *.C32 contain 32-bit code and run in flat-memory 32-bit protected mode
 # start with assembler instructions mov eax,21cd4cffh
->1	lelong		0x21cd4cff	(32-bit COMBOOT)
+0	uleshort&0xc0ff	0xc0b8		
+>1	lelong		0x21cd4cff	COM executable (32-bit COMBOOT)
 0	string	\x81\xfc		
 >4	string	\x77\x02\xcd\x20\xb9	
 >>36	string	UPX!			FREE-DOS executable (COM), UPX compressed
 252	string Must\ have\ DOS\ version DR-DOS executable (COM)
+# added by Joerg Jenderek at Oct 2008
+# GRR search is not working
+#34	search/2	UPX!		FREE-DOS executable (COM), UPX compressed
+34	string	UPX!			FREE-DOS executable (COM), UPX compressed
+35	string	UPX!			FREE-DOS executable (COM), UPX compressed
 # GRR search is not working
 #2	search/28	\xcd\x21	COM executable for MS-DOS
 #WHICHFAT.cOM
@@ -564,6 +578,7 @@
 # From Stuart Caie <kyzer@4u.net> (developer of cabextract)
 # Microsoft Cabinet files
 0	string		MSCF\0\0\0\0	Microsoft Cabinet archive data
+!:mime application/vnd.ms-cab-compressed
 >8	lelong		x		\b, %u bytes
 >28	leshort		1		\b, 1 file
 >28	leshort		>1		\b, %u files

+ 0 - 1
magic/Magdir/perl

@@ -23,7 +23,6 @@
 # by Dmitry V. Levin and Alexey Tourbin
 # check the first line
 0	search/1	package
-0	regex		\^package[\ \t]+[A-Za-z_]
 >0	regex		\^package[\ \t]+[0-9A-Za-z_:]+\ *;	Perl5 module source text
 # not 'p', check other lines
 0	search/1	!p

+ 2 - 0
magic/Magdir/printer

@@ -6,6 +6,7 @@
 # PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com)
 0	string		%!		PostScript document text
 !:mime	application/postscript
+!:apple	ASPSTEXT
 >2	string		PS-Adobe-	conforming
 >>11	string		>\0		DSC level %.3s
 >>>15	string		EPS		\b, type %s
@@ -16,6 +17,7 @@
 # Some PCs have the annoying habit of adding a ^D as a document separator
 0	string		\004%!		PostScript document text
 !:mime	application/postscript
+!:apple	ASPSTEXT
 >3	string		PS-Adobe-	conforming
 >>12	string		>\0		DSC level %.3s
 >>>16	string		EPS		\b, type %s

+ 17 - 0
magic/Magdir/timezone

@@ -6,6 +6,23 @@
 # this should work on Linux, SunOS, and maybe others
 # Added new official magic number for recent versions of the Olson code
 0	string	TZif	timezone data
+>4	byte	0	\b, old version
+>4	byte	>0	\b, version %c
+>20	belong	0	\b, no gmt time flags
+>20	belong	1	\b, 1 gmt time flag
+>20	belong	>1	\b, %d gmt time flags
+>24	belong	0	\b, no std time flags
+>20	belong	1	\b, 1 std time flag
+>24	belong	>1	\b, %d std time flags
+>28	belong	0	\b, no leap seconds
+>28	belong	1	\b, 1 leap second
+>28	belong  >1	\b, %d leap seconds
+>32	belong	0	\b, no transition times
+>32	belong	1	\b, 1 transition time
+>32	belong  >1	\b, %d transition times
+>36	belong	0	\b, no abbreviation chars
+>36	belong	1	\b, 1 abbreviation char
+>36	belong	>1	\b, %d abbreviation chars
 0	string	\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0	old timezone data
 0	string	\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0	old timezone data
 0	string  \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\3\0	old timezone data

+ 5 - 0
magic/Magdir/wireless

@@ -0,0 +1,5 @@
+#------------------------------------------------------------------------------
+# wireless-regdb:        file(1) magic for CRDA wireless-regdb file format
+#
+0	string	RGDB	CRDA wireless regulatory database file
+>4	belong	19	(Version 1)

+ 9 - 0
magic/Magdir/xwindows

@@ -23,3 +23,12 @@
 >24	long	x				%ldx
 >28	long	1008				YUV422]
 >28	long	1000				RGB24]
+
+# Xcursor data
+# X11 mouse cursor format defined in libXcursor, see
+# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
+# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
+0	string		Xcur		Xcursor data
+!:mime	image/x-xcursor
+>10	leshort		x		version %hd
+>>8	leshort		x		\b.%hd

+ 2 - 1
magic/Makefile.am

@@ -1,5 +1,5 @@
 #
-# $File: Makefile.am,v 1.43 2008/08/08 08:24:06 christos Exp $
+# $File: Makefile.am,v 1.44 2009/01/28 02:11:20 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_FRAGMENT_DIR = $(top_srcdir)/magic/$(MAGIC_FRAGMENT_BASE)
@@ -209,6 +209,7 @@ $(MAGIC_FRAGMENT_DIR)/vxl \
 $(MAGIC_FRAGMENT_DIR)/warc \
 $(MAGIC_FRAGMENT_DIR)/weak \
 $(MAGIC_FRAGMENT_DIR)/windows \
+$(MAGIC_FRAGMENT_DIR)/wireless \
 $(MAGIC_FRAGMENT_DIR)/wordprocessors \
 $(MAGIC_FRAGMENT_DIR)/xdelta \
 $(MAGIC_FRAGMENT_DIR)/xenix \

+ 2 - 1
magic/Makefile.in

@@ -163,7 +163,7 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 
 #
-# $File: Makefile.am,v 1.43 2008/08/08 08:24:06 christos Exp $
+# $File: Makefile.am,v 1.44 2009/01/28 02:11:20 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_FRAGMENT_DIR = $(top_srcdir)/magic/$(MAGIC_FRAGMENT_BASE)
@@ -371,6 +371,7 @@ $(MAGIC_FRAGMENT_DIR)/vxl \
 $(MAGIC_FRAGMENT_DIR)/warc \
 $(MAGIC_FRAGMENT_DIR)/weak \
 $(MAGIC_FRAGMENT_DIR)/windows \
+$(MAGIC_FRAGMENT_DIR)/wireless \
 $(MAGIC_FRAGMENT_DIR)/wordprocessors \
 $(MAGIC_FRAGMENT_DIR)/xdelta \
 $(MAGIC_FRAGMENT_DIR)/xenix \

+ 2 - 2
src/Makefile.am

@@ -8,9 +8,9 @@ AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"'
 AM_CFLAGS = @WARNINGS@
 
 libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
-	compress.c is_tar.c readelf.c print.c fsmagic.c \
+	encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
 	funcs.c file.h names.h patchlevel.h readelf.h tar.h apptype.c \
-	file_opts.h elfclass.h mygetopt.h
+	file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
 libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
 libmagic_la_LIBADD = $(LTLIBOBJS)
 

+ 13 - 5
src/Makefile.in

@@ -37,7 +37,8 @@ host_triplet = @host@
 bin_PROGRAMS = file$(EXEEXT)
 subdir = src
 DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
-	$(srcdir)/Makefile.in asprintf.c getopt_long.c vasprintf.c
+	$(srcdir)/Makefile.in asprintf.c getopt_long.c strlcat.c \
+	strlcpy.c vasprintf.c
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
 	$(top_srcdir)/configure.ac
@@ -58,8 +59,9 @@ libLTLIBRARIES_INSTALL = $(INSTALL)
 LTLIBRARIES = $(lib_LTLIBRARIES)
 libmagic_la_DEPENDENCIES = $(LTLIBOBJS)
 am_libmagic_la_OBJECTS = magic.lo apprentice.lo softmagic.lo \
-	ascmagic.lo compress.lo is_tar.lo readelf.lo print.lo \
-	fsmagic.lo funcs.lo apptype.lo
+	ascmagic.lo encoding.lo compress.lo is_tar.lo readelf.lo \
+	print.lo fsmagic.lo funcs.lo apptype.lo cdf.lo cdf_time.lo \
+	readcdf.lo
 libmagic_la_OBJECTS = $(am_libmagic_la_OBJECTS)
 libmagic_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@@ -201,9 +203,9 @@ include_HEADERS = magic.h
 AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"'
 AM_CFLAGS = @WARNINGS@
 libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
-	compress.c is_tar.c readelf.c print.c fsmagic.c \
+	encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
 	funcs.c file.h names.h patchlevel.h readelf.h tar.h apptype.c \
-	file_opts.h elfclass.h mygetopt.h
+	file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
 
 libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
 libmagic_la_LIBADD = $(LTLIBOBJS)
@@ -311,17 +313,23 @@ distclean-compile:
 
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/asprintf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getopt_long.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strlcat.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strlcpy.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/vasprintf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/apprentice.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/apptype.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ascmagic.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdf_time.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compress.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encoding.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/file.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fsmagic.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/funcs.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/is_tar.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/magic.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/print.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readcdf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readelf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/softmagic.Plo@am__quote@
 

+ 90 - 27
src/apprentice.c

@@ -30,6 +30,11 @@
  */
 
 #include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: apprentice.c,v 1.147 2009/02/03 20:27:51 christos Exp $")
+#endif	/* lint */
+
 #include "magic.h"
 #include "patchlevel.h"
 #include <stdlib.h>
@@ -40,18 +45,11 @@
 #include <assert.h>
 #include <ctype.h>
 #include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/param.h>
 #ifdef QUICK
 #include <sys/mman.h>
 #endif
-#include <sys/types.h>
 #include <dirent.h>
 
-#ifndef	lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.140 2008/07/20 04:02:15 christos Exp $")
-#endif	/* lint */
-
 #define	EATAB {while (isascii((unsigned char) *l) && \
 		      isspace((unsigned char) *l))  ++l;}
 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
@@ -106,7 +104,7 @@ private void bs1(struct magic *);
 private uint16_t swap2(uint16_t);
 private uint32_t swap4(uint32_t);
 private uint64_t swap8(uint64_t);
-private void mkdbname(const char *, char **, int);
+private char *mkdbname(struct magic_set *, const char *, int);
 private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
     const char *);
 private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
@@ -115,8 +113,8 @@ private int check_format_type(const char *, int);
 private int check_format(struct magic_set *, struct magic *);
 private int get_op(char);
 private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
-private int parse_strength(struct magic_set *, struct magic_entry *,
-    const char *);
+private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
+private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
 
 
 private size_t maxmagic = 0;
@@ -131,6 +129,7 @@ private struct {
 } bang[] = {
 #define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
 	DECLARE_FIELD(mime),
+	DECLARE_FIELD(apple),
 	DECLARE_FIELD(strength),
 #undef	DECLARE_FIELD
 	{ NULL, 0, NULL }
@@ -215,6 +214,9 @@ static const struct type_tbl_s {
 	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
 	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
 	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
+	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
+	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
+	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NONE },
 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
 # undef XX
 # undef XX_NULL
@@ -589,7 +591,8 @@ set_test_type(struct magic *mstart, struct magic *m)
 	case FILE_REGEX:
 	case FILE_SEARCH:
 		/* binary test if pattern is not text */
-		if (file_looks_utf8(m->value.us, m->vallen, NULL, NULL) <= 0)
+		if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
+		    NULL) <= 0)
 			mstart->flag |= BINTEST;
 		break;
 	case FILE_DEFAULT:
@@ -704,6 +707,8 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
 		(void)fprintf(stderr, "%s\n", usg_hdr);
 
 	/* load directory or file */
+        /* FIXME: Read file names and sort them to prevent
+           non-determinism. See Debian bug #488562. */
 	if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
 		dir = opendir(fn);
 		if (dir) {
@@ -868,6 +873,7 @@ file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
 		case FILE_REGEX:
 		case FILE_SEARCH:
 		case FILE_DEFAULT:
+		case FILE_INDIRECT:
 			break;
 		default:
 			if (ms->flags & MAGIC_CHECK)
@@ -1184,6 +1190,12 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
 			case 'G':
 				m->in_type = FILE_BEDOUBLE;
 				break;
+			case 'i':
+				m->in_type = FILE_LEID3;
+				break;
+			case 'I':
+				m->in_type = FILE_BEID3;
+				break;
 			default:
 				if (ms->flags & MAGIC_CHECK)
 					file_magwarn(ms,
@@ -1473,6 +1485,38 @@ out:
 }
 
 /*
+ * Parse an Apple CREATOR/TYPE annotation from magic file and put it into magic[index - 1]
+ */
+private int
+parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
+{
+	size_t i;
+	const char *l = line;
+	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
+
+	if (m->apple[0] != '\0') {
+		file_magwarn(ms, "Current entry already has a APPLE type `%.8s',"
+		    " new type `%s'", m->mimetype, l);
+		return -1;
+	}	
+
+	EATAB;
+	for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
+	     || strchr("-+/.", *l)) && i < sizeof(m->apple); m->apple[i++] = *l++)
+		continue;
+	if (i == sizeof(m->apple) && *l) {
+		if (ms->flags & MAGIC_CHECK)
+			file_magwarn(ms, "APPLE type `%s' truncated %zu",
+			    line, i);
+	}
+
+	if (i > 0)
+		return 0;
+	else
+		return -1;
+}
+
+/*
  * parse a MIME annotation line from magic file, put into magic[index - 1]
  * if valid
  */
@@ -1490,10 +1534,8 @@ parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
 	}	
 
 	EATAB;
-	for (i = 0;
-	     *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
-	     || strchr("-+/.", *l)) && i < sizeof(m->mimetype);
-	     m->mimetype[i++] = *l++)
+	for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
+	     || strchr("-+/.", *l)) && i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
 		continue;
 	if (i == sizeof(m->mimetype)) {
 		m->desc[sizeof(m->mimetype) - 1] = '\0';
@@ -2014,7 +2056,7 @@ apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
 	char *dbname = NULL;
 	void *mm = NULL;
 
-	mkdbname(fn, &dbname, 0);
+	dbname = mkdbname(ms, fn, 0);
 	if (dbname == NULL)
 		goto error2;
 
@@ -2111,7 +2153,7 @@ apprentice_compile(struct magic_set *ms, struct magic **magicp,
 	char *dbname;
 	int rv = -1;
 
-	mkdbname(fn, &dbname, 1);
+	dbname = mkdbname(ms, fn, 1);
 
 	if (dbname == NULL) 
 		goto out;
@@ -2149,24 +2191,45 @@ private const char ext[] = ".mgc";
 /*
  * make a dbname
  */
-private void
-mkdbname(const char *fn, char **buf, int strip)
+private char *
+mkdbname(struct magic_set *ms, const char *fn, int strip)
 {
-	const char *p;
+	const char *p, *q;
+	char *buf;
+
 	if (strip) {
 		if ((p = strrchr(fn, '/')) != NULL)
 			fn = ++p;
 	}
 
-	if ((p = strstr(fn, ext)) != NULL && p[sizeof(ext) - 1] == '\0')
-		*buf = strdup(fn);
-	else
-		(void)asprintf(buf, "%s%s", fn, ext);
+	for (q = fn; *q; q++)
+		continue;
+	/* Look for .mgc */
+	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
+		if (*p != *q)
+			break;
 
-	if (buf && *buf && strlen(*buf) > MAXPATHLEN) {
-		free(*buf);
-		*buf = NULL;
+	/* Did not find .mgc, restore q */
+	if (p >= ext)
+		while (*q)
+			q++;
+
+	q++;
+	/* Compatibility with old code that looked in .mime */
+	if (ms->flags & MAGIC_MIME) {
+		asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext);
+		if (access(buf, R_OK) != -1) {
+			ms->flags &= MAGIC_MIME_TYPE;
+			return buf;
+		}
+		free(buf);
 	}
+	asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext);
+
+	/* Compatibility with old code that looked in .mime */
+	if (strstr(p, ".mime") != NULL)
+		ms->flags &= MAGIC_MIME_TYPE;
+	return buf;
 }
 
 /*

+ 4 - 6
src/apptype.c

@@ -26,15 +26,13 @@
 
 #include "file.h"
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-
 #ifndef	lint
-FILE_RCSID("@(#)$File: apptype.c,v 1.7 2007/01/12 17:38:27 christos Exp $")
+FILE_RCSID("@(#)$File: apptype.c,v 1.10 2009/02/03 20:27:51 christos Exp $")
 #endif /* lint */
 
+#include <stdlib.h>
+#include <string.h>
+
 #ifdef __EMX__
 #include <io.h>
 #define INCL_DOSSESMGR

+ 76 - 480
src/ascmagic.c

@@ -2,7 +2,7 @@
  * Copyright (c) Ian F. Darwin 1986-1995.
  * Software written by Ian F. Darwin and others;
  * maintained 1995-present by Christos Zoulas and others.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -12,7 +12,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- *  
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -31,14 +31,15 @@
  *
  * Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
  * to handle character codes other than ASCII on a unified basis.
- *
- * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
- * international characters, now subsumed into this file.
  */
 
 #include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20:27:51 christos Exp $")
+#endif	/* lint */
+
 #include "magic.h"
-#include <stdio.h>
 #include <string.h>
 #include <memory.h>
 #include <ctype.h>
@@ -48,39 +49,71 @@
 #endif
 #include "names.h"
 
-#ifndef	lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.64 2008/07/16 18:00:57 christos Exp $")
-#endif	/* lint */
-
 #define MAXLINELEN 300	/* longest sane line length */
 #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
 		  || (x) == 0x85 || (x) == '\f')
 
-private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
-    size_t *);
-private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
-private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
 private int ascmatch(const unsigned char *, const unichar *, size_t);
 private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t);
+private size_t trim_nuls(const unsigned char *, size_t);
 
+/*
+ * Undo the NUL-termination kindly provided by process()
+ * but leave at least one byte to look at
+ */
+private size_t
+trim_nuls(const unsigned char *buf, size_t nbytes)
+{
+	while (nbytes > 1 && buf[nbytes - 1] == '\0')
+		nbytes--;
+
+	return nbytes;
+}
 
 protected int
 file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 {
-	size_t i;
-	unsigned char *nbuf = NULL, *utf8_buf = NULL, *utf8_end;
-	unichar *ubuf = NULL;	
-	size_t ulen, mlen;
-	const struct names *p;
-	int rv = -1;
-	int mime = ms->flags & MAGIC_MIME;
+	unichar *ubuf = NULL;
+	size_t ulen;
+	int rv = 1;
 
 	const char *code = NULL;
 	const char *code_mime = NULL;
 	const char *type = NULL;
+
+	if (ms->flags & MAGIC_APPLE)
+		return 0;
+
+	nbytes = trim_nuls(buf, nbytes);
+
+	/* If file doesn't look like any sort of text, give up. */
+	if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime,
+	    &type) == 0) {
+		rv = 0;
+		goto done;
+	}
+
+	rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code, 
+	    type);
+
+ done:
+	if (ubuf)
+		free(ubuf);
+
+	return rv;
+}
+
+protected int
+file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
+    size_t nbytes, unichar *ubuf, size_t ulen, const char *code,
+    const char *type)
+{
+	unsigned char *utf8_buf = NULL, *utf8_end;
+	size_t mlen, i;
+	const struct names *p;
+	int rv = -1;
+	int mime = ms->flags & MAGIC_MIME;
+
 	const char *subtype = NULL;
 	const char *subtype_mime = NULL;
 
@@ -96,82 +129,20 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 	size_t last_line_end = (size_t)-1;
 	int has_long_lines = 0;
 
-	/*
-	 * Undo the NUL-termination kindly provided by process()
-	 * but leave at least one byte to look at
-	 */
-	while (nbytes > 1 && buf[nbytes - 1] == '\0')
-		nbytes--;
-
-	if ((nbuf = CAST(unsigned char *, calloc((size_t)1,
-	    (nbytes + 1) * sizeof(nbuf[0])))) == NULL)
-		goto done;
-	if ((ubuf = CAST(unichar *, calloc((size_t)1,
-	    (nbytes + 1) * sizeof(ubuf[0])))) == NULL)
-		goto done;
+	if (ms->flags & MAGIC_APPLE)
+		return 0;
 
-	/*
-	 * Then try to determine whether it's any character code we can
-	 * identify.  Each of these tests, if it succeeds, will leave
-	 * the text converted into one-unichar-per-character Unicode in
-	 * ubuf, and the number of characters converted in ulen.
-	 */
-	if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
-		code = "ASCII";
-		code_mime = "us-ascii";
-		type = "text";
-	} else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) {
-		code = "UTF-8 Unicode (with BOM)";
-		code_mime = "utf-8";
-		type = "text";
-	} else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
-		code = "UTF-8 Unicode";
-		code_mime = "utf-8";
-		type = "text";
-	} else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) {
-		if (i == 1)
-			code = "Little-endian UTF-16 Unicode";
-		else
-			code = "Big-endian UTF-16 Unicode";
-
-		type = "character data";
-		code_mime = "utf-16";    /* is this defined? */
-	} else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
-		code = "ISO-8859";
-		type = "text";
-		code_mime = "iso-8859-1"; 
-	} else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
-		code = "Non-ISO extended-ASCII";
-		type = "text";
-		code_mime = "unknown";
-	} else {
-		from_ebcdic(buf, nbytes, nbuf);
-
-		if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
-			code = "EBCDIC";
-			type = "character data";
-			code_mime = "ebcdic";
-		} else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
-			code = "International EBCDIC";
-			type = "character data";
-			code_mime = "ebcdic";
-		} else {
-			rv = 0;
-			goto done;  /* doesn't look like text at all */
-		}
-	}
+	nbytes = trim_nuls(buf, nbytes);
 
+	/* If we have fewer than 2 bytes, give up. */
 	if (nbytes <= 1) {
 		rv = 0;
 		goto done;
 	}
 
 	/* Convert ubuf to UTF-8 and try text soft magic */
-	/* If original was ASCII or UTF-8, could use nbuf instead of
-	   re-converting. */
 	/* malloc size is a conservative overestimate; could be
-	   re-converting improved, or at least realloced after
-	   re-converting conversion. */
+	   improved, or at least realloced after conversion. */
 	mlen = ulen * 6;
 	if ((utf8_buf = CAST(unsigned char *, malloc(mlen))) == NULL) {
 		file_oomem(ms, mlen);
@@ -179,10 +150,11 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 	}
 	if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL)
 		goto done;
-	if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
-		rv = 1;
+	if ((rv = file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf),
+	    TEXTTEST)) != 0)
 		goto done;
-	}
+	else
+		rv = -1;
 
 	/* look for tokens from names.h - this is expensive! */
 	if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)
@@ -255,41 +227,30 @@ subtype_identified:
 	if (seen_cr && nbytes < HOWMANY)
 		n_cr++;
 
+	if (strcmp(type, "binary") == 0) {
+		rv = 0;
+		goto done;
+	}
 	if (mime) {
-		if (mime & MAGIC_MIME_TYPE) {
+		if ((mime & MAGIC_MIME_TYPE) != 0) {
 			if (subtype_mime) {
-				if (file_printf(ms, subtype_mime) == -1)
+				if (file_printf(ms, "%s", subtype_mime) == -1)
 					goto done;
 			} else {
 				if (file_printf(ms, "text/plain") == -1)
 					goto done;
 			}
 		}
-
-		if ((mime == 0 || mime == MAGIC_MIME) && code_mime) {
-			if ((mime & MAGIC_MIME_TYPE) &&
-			    file_printf(ms, " charset=") == -1)
-				goto done;
-			if (file_printf(ms, code_mime) == -1)
-				goto done;
-		}
-
-		if (mime == MAGIC_MIME_ENCODING)
-			file_printf(ms, "binary");
 	} else {
-		if (file_printf(ms, code) == -1)
+		if (file_printf(ms, "%s", code) == -1)
 			goto done;
 
 		if (subtype) {
-			if (file_printf(ms, " ") == -1)
-				goto done;
-			if (file_printf(ms, subtype) == -1)
+			if (file_printf(ms, " %s", subtype) == -1)
 				goto done;
 		}
 
-		if (file_printf(ms, " ") == -1)
-			goto done;
-		if (file_printf(ms, type) == -1)
+		if (file_printf(ms, " %s", type) == -1)
 			goto done;
 
 		if (has_long_lines)
@@ -305,7 +266,7 @@ subtype_identified:
 			if (file_printf(ms, ", with") == -1)
 				goto done;
 
-			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0)			{
+			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
 				if (file_printf(ms, " no") == -1)
 					goto done;
 			} else {
@@ -348,10 +309,6 @@ subtype_identified:
 	}
 	rv = 1;
 done:
-	if (nbuf)
-		free(nbuf);
-	if (ubuf)
-		free(ubuf);
 	if (utf8_buf)