Quellcode durchsuchen

Merge upstream version 5.28

Christoph Biedl vor 8 Jahren
Ursprung
Commit
4648dc3f1a
90 geänderte Dateien mit 5300 neuen und 1297 gelöschten Zeilen
  1. 71 0
      ChangeLog
  2. 32 5
      README
  3. 6 0
      config.h.in
  4. 51 12
      configure
  5. 22 4
      configure.ac
  6. 11 2
      doc/file.man
  7. 5 4
      doc/libmagic.man
  8. 2 2
      magic/Magdir/android
  9. 3 1
      magic/Magdir/animation
  10. 41 11
      magic/Magdir/apple
  11. 247 53
      magic/Magdir/archive
  12. 43 2
      magic/Magdir/audio
  13. 65 0
      magic/Magdir/ber
  14. 178 0
      magic/Magdir/bioinformatics
  15. 9 6
      magic/Magdir/c-lang
  16. 7 4
      magic/Magdir/cafebabe
  17. 77 0
      magic/Magdir/coff
  18. 5 3
      magic/Magdir/commands
  19. 2 1
      magic/Magdir/compress
  20. 558 80
      magic/Magdir/console
  21. 91 0
      magic/Magdir/coverage
  22. 79 2
      magic/Magdir/database
  23. 116 0
      magic/Magdir/der
  24. 21 23
      magic/Magdir/elf
  25. 31 29
      magic/Magdir/filesystems
  26. 16 0
      magic/Magdir/finger
  27. 36 0
      magic/Magdir/flif
  28. 42 4
      magic/Magdir/fonts
  29. 4 2
      magic/Magdir/fortran
  30. 19 13
      magic/Magdir/hitachi-sh
  31. 351 26
      magic/Magdir/images
  32. 16 4
      magic/Magdir/intel
  33. 9 2
      magic/Magdir/java
  34. 27 4
      magic/Magdir/lisp
  35. 46 11
      magic/Magdir/mach
  36. 6 3
      magic/Magdir/macintosh
  37. 21 0
      magic/Magdir/microfocus
  38. 30 2
      magic/Magdir/misctools
  39. 43 3
      magic/Magdir/modem
  40. 132 47
      magic/Magdir/msdos
  41. 27 6
      magic/Magdir/msvc
  42. 134 80
      magic/Magdir/msx
  43. 18 1
      magic/Magdir/netbsd
  44. 2 2
      magic/Magdir/os2
  45. 24 0
      magic/Magdir/pc88
  46. 77 0
      magic/Magdir/pc98
  47. 4 4
      magic/Magdir/perl
  48. 23 0
      magic/Magdir/polyml
  49. 10 4
      magic/Magdir/psdbms
  50. 5 5
      magic/Magdir/python
  51. 22 5
      magic/Magdir/sendmail
  52. 6 6
      magic/Magdir/sgml
  53. 8 15
      magic/Magdir/sinclair
  54. 26 13
      magic/Magdir/sql
  55. 16 2
      magic/Magdir/terminfo
  56. 54 0
      magic/Magdir/vacuum-cleaner
  57. 240 3
      magic/Magdir/windows
  58. 182 92
      magic/Magdir/wordprocessors
  59. 25 0
      magic/Magdir/x68000
  60. 21 2
      magic/Magdir/xenix
  61. 15 2
      magic/Makefile.am
  62. 15 2
      magic/Makefile.in
  63. 0 13
      python/README
  64. 73 17
      python/magic.py
  65. 22 10
      python/setup.py
  66. 2 2
      src/Makefile.am
  67. 22 21
      src/Makefile.in
  68. 82 40
      src/apprentice.c
  69. 8 10
      src/ascmagic.c
  70. 80 49
      src/cdf.c
  71. 2 1
      src/cdf.h
  72. 407 193
      src/compress.c
  73. 384 0
      src/der.c
  74. 28 0
      src/der.h
  75. 58 0
      src/dprintf.c
  76. 39 19
      src/file.c
  77. 17 8
      src/file.h
  78. 28 28
      src/file_opts.h
  79. 17 0
      src/fmtcheck.c
  80. 20 6
      src/funcs.c
  81. 17 9
      src/magic.c
  82. 0 125
      src/magic.h
  83. 1 0
      src/magic.h.in
  84. 2 1
      src/print.c
  85. 11 3
      src/readcdf.c
  86. 199 20
      src/readelf.c
  87. 36 0
      src/readelf.h
  88. 218 111
      src/softmagic.c
  89. 1 1
      tests/Makefile.am
  90. 1 1
      tests/Makefile.in

+ 71 - 0
ChangeLog

@@ -1,3 +1,74 @@
+2016-06-13  19:40  Christos Zoulas <christos@zoulas.com>
+
+	* release 5.28
+	* fix leak on allocation failure
+
+2016-06-01   1:20  Christos Zoulas <christos@zoulas.com>
+
+	* PR/555: Avoid overflow for offset > nbytes
+	* PR/550: Segv on DER parsing:
+	    - use the correct variable for length
+	    - set offset to 0 on failure.
+
+2016-05-13  12:00  Christos Zoulas <christos@zoulas.com>
+	
+	* release 5.27
+
+2016-04-18   9:35  Christos Zoulas <christos@zoulas.com>
+	
+	* Errors comparing DER entries or computing offsets
+	  are just indications of malformed non-DER files.
+	  Don't print them.
+	* Offset comparison was off-by-one.
+	* Fix compression code (Werner Fink)
+	* Put new bytes constant in the right file (not the generated one)
+
+2016-04-16  18:34  Christos Zoulas <christos@zoulas.com>
+	
+	* release 5.26
+
+2016-03-31  13:50  Christos Zoulas <christos@zoulas.com>
+
+	* make the number of bytes read from files configurable.
+
+2016-03-21  13:40  Christos Zoulas <christos@zoulas.com>
+
+	* Add bounds checks for DER code (discovered by Thomas Jarosch)
+	* Change indirect recursion limit to indirect use count and
+	  bump from 15 to 50 to prevent abuse.
+
+2016-03-13  20:39  Christos Zoulas <christos@zoulas.com>
+
+	* Add -00 which prints filename\0description\0
+
+2016-03-01  13:28  Christos Zoulas <christos@zoulas.com>
+
+	* Fix ID3 indirect parsing
+
+2016-01-19  10:18  Christos Zoulas <christos@zoulas.com>
+
+	* add DER parsing capability
+
+2015-11-13  10:35  Christos Zoulas <christos@zoulas.com>
+
+	* provide dprintf(3) for the OS's that don't have it.
+
+2015-11-11  16:25  Christos Zoulas <christos@zoulas.com>
+
+	* redo the compression code report decompression errors
+
+2015-11-10  23:25  Christos Zoulas <christos@zoulas.com>
+
+	* REG_STARTEND code is not working as expected, delete it.
+
+2015-11-09  16:05  Christos Zoulas <christos@zoulas.com>
+
+	* Add zlib support if we have it.
+
+2015-11-05  11:22  Christos Zoulas <christos@zoulas.com>
+
+	* PR/492: compression forking was broken with magic_buffer.
+
 2015-09-16   9:50  Christos Zoulas <christos@zoulas.com>
 	
 	* release 5.25

+ 32 - 5
README

@@ -1,6 +1,6 @@
 ## README for file(1) Command ##
 
-    @(#) $File: README,v 1.49 2015/01/02 20:23:04 christos Exp $
+    @(#) $File: README,v 1.50 2016/04/16 22:40:54 christos Exp $
 
 Mailing List: file@mx.gw.com  
 Mailing List archives: http://mx.gw.com/pipermail/file/  
@@ -67,17 +67,41 @@ in magic(5) format please, to the maintainer, Christos Zoulas.
 COPYING - read this first.  
 README - read this second (you are currently reading this file).  
 INSTALL - read on how to install
+src/localtime_r.c
+src/magic.c
+src/magic.h
+src/mygetopt.h
+src/newtest2.c
+src/newtest3.c
+src/pread.c
+src/print.c
+src/readcdf.c
+src/readelf.c
+src/readelf.h
+src/regex.c
+src/regex2.c
+src/softmagic.c
+src/strcasestr.c
+src/strlcat.c
+src/strlcpy.c
+src/strndup.c
+src/tar.h
+src/teststrchr.c
+src/vasprintf.c
+src/x.c
 src/apprentice.c - parses /etc/magic to learn magic  
-src/asctime_r.c - replacement for OS's that don't have it.  
 src/apptype.c - used for OS/2 specific application type magic  
-src/asprintf.c - replacement for OS's that don't have it.  
 src/ascmagic.c - third & last set of tests, based on hardwired assumptions.  
 src/asctime_r.c - replacement for OS's that don't have it.  
 src/asprintf.c - replacement for OS's that don't have it.  
+src/asctime_r.c - replacement for OS's that don't have it.  
+src/asprintf.c - replacement for OS's that don't have it.  
 src/cdf.[ch] - parser for Microsoft Compound Document Files  
 src/cdf_time.c - time converter for CDF.  
 src/compress.c - handles decompressing files to look inside.  
 src/ctime_r.c - replacement for OS's that don't have it.  
+src/der.[ch] - parser for Distinguished Encoding Rules
+src/dprintf.c - replacement for OS's that don't have it.
 src/elfclass.h - common code for elf 32/64.
 src/encoding.c - handles unicode encodings  
 src/file.c - the main program  
@@ -88,10 +112,13 @@ src/fsmagic.c - first set of tests the program runs, based on filesystem info
 src/funcs.c - utilility functions  
 src/getline.c - replacement for OS's that don't have it.  
 src/getopt_long.c - replacement for OS's that don't have it.  
-src/is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).  
-src/names.h - header file for ascmagic.c  
+src/gmtime_r.c - replacement for OS's that don't have it.  
+src/is_tar.c, tar.h - knows about Tape ARchive format (courtesy John Gilmore).  
+src/localtime_r.c - replacement for OS's that don't have it.  
 src/magic.h.in - source file for magic.h
+src/mygetopt.h - replacement for OS's that don't have it.  
 src/magic.c - the libmagic api  
+src/names.h - header file for ascmagic.c  
 src/pread.c - replacement for OS's that don't have it.  
 src/print.c - print results, errors, warnings.  
 src/readcdf.c - CDF wrapper.  

+ 6 - 0
config.h.in

@@ -32,6 +32,9 @@
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #undef HAVE_DLFCN_H
 
+/* Define to 1 if you have the `dprintf' function. */
+#undef HAVE_DPRINTF
+
 /* Define to 1 if you have the <err.h> header file. */
 #undef HAVE_ERR_H
 
@@ -325,6 +328,9 @@
 # endif
 #endif
 
+/* Enable zlib compression support */
+#undef ZLIBSUPPORT
+
 /* Enable large inode numbers on Mac OS X 10.5.  */
 #ifndef _DARWIN_USE_64_BIT_INODE
 # define _DARWIN_USE_64_BIT_INODE 1

+ 51 - 12
configure

@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for file 5.25.
+# Generated by GNU Autoconf 2.69 for file 5.28.
 #
 # Report bugs to <christos@astron.com>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='file'
 PACKAGE_TARNAME='file'
-PACKAGE_VERSION='5.25'
-PACKAGE_STRING='file 5.25'
+PACKAGE_VERSION='5.28'
+PACKAGE_STRING='file 5.28'
 PACKAGE_BUGREPORT='christos@astron.com'
 PACKAGE_URL=''
 
@@ -766,6 +766,7 @@ enable_option_checking
 enable_silent_rules
 enable_elf
 enable_elf_core
+enable_zlib
 enable_fsect_man5
 enable_dependency_tracking
 enable_static
@@ -1327,7 +1328,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures file 5.25 to adapt to many kinds of systems.
+\`configure' configures file 5.28 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1397,7 +1398,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of file 5.25:";;
+     short | recursive ) echo "Configuration of file 5.28:";;
    esac
   cat <<\_ACEOF
 
@@ -1409,6 +1410,7 @@ Optional Features:
   --disable-silent-rules  verbose build output (undo: "make V=0")
   --disable-elf            disable builtin ELF support
   --disable-elf-core       disable ELF core file support
+  --disable-zlib          disable zlib compression support [default=auto]
   --enable-fsect-man5      enable file formats in man section 5
   --enable-dependency-tracking
                           do not reject slow dependency extractors
@@ -1507,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-file configure 5.25
+file configure 5.28
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2163,7 +2165,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by file $as_me 5.25, which was
+It was created by file $as_me 5.28, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -3029,7 +3031,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='file'
- VERSION='5.25'
+ VERSION='5.28'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -3218,6 +3220,16 @@ $as_echo "#define ELFCORE 1" >>confdefs.h
 fi
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for zlib support" >&5
+$as_echo_n "checking for zlib support... " >&6; }
+# Check whether --enable-zlib was given.
+if test "${enable_zlib+set}" = set; then :
+  enableval=$enable_zlib;
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_zlib" >&5
+$as_echo "$enable_zlib" >&6; }
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file formats in man section 5" >&5
 $as_echo_n "checking for file formats in man section 5... " >&6; }
 # Check whether --enable-fsect-man5 was given.
@@ -12812,7 +12824,8 @@ fi
 
 done
 
-for ac_header in zlib.h
+if test "$enable_zlib" != "no"; then
+  for ac_header in zlib.h
 do :
   ac_fn_c_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default"
 if test "x$ac_cv_header_zlib_h" = xyes; then :
@@ -12824,6 +12837,7 @@ fi
 
 done
 
+fi
 ac_fn_c_check_type "$LINENO" "sig_t" "ac_cv_type_sig_t" "
 #ifdef HAVE_SIGNAL_H
 #include <signal.h>
@@ -14385,9 +14399,23 @@ esac
 
 fi
 
+ac_fn_c_check_func "$LINENO" "dprintf" "ac_cv_func_dprintf"
+if test "x$ac_cv_func_dprintf" = xyes; then :
+  $as_echo "#define HAVE_DPRINTF 1" >>confdefs.h
+
+else
+  case " $LIBOBJS " in
+  *" dprintf.$ac_objext "* ) ;;
+  *) LIBOBJS="$LIBOBJS dprintf.$ac_objext"
+ ;;
+esac
+
+fi
+
 
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for gzopen in -lz" >&5
+if test "$enable_zlib" != "no"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gzopen in -lz" >&5
 $as_echo_n "checking for gzopen in -lz... " >&6; }
 if ${ac_cv_lib_z_gzopen+:} false; then :
   $as_echo_n "(cached) " >&6
@@ -14432,6 +14460,7 @@ _ACEOF
 
 fi
 
+fi
 if test "$MINGW" = 1; then
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for regexec in -lgnurx" >&5
 $as_echo_n "checking for regexec in -lgnurx... " >&6; }
@@ -14491,6 +14520,16 @@ else
 fi
 
 
+if test "$enable_zlib" = "yes"; then
+  if test "$ac_cv_header_zlib_h$ac_cv_lib_z_gzopen" != "yesyes"; then
+    as_fn_error $? "zlib support requested but not found" "$LINENO" 5
+  fi
+elif  test "$ac_cv_header_zlib_h$ac_cv_lib_z_gzopen" = "yesyes"; then
+
+$as_echo "#define ZLIBSUPPORT 1" >>confdefs.h
+
+fi
+
 ac_config_files="$ac_config_files Makefile src/Makefile magic/Makefile tests/Makefile doc/Makefile python/Makefile"
 
 cat >confcache <<\_ACEOF
@@ -15036,7 +15075,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by file $as_me 5.25, which was
+This file was extended by file $as_me 5.28, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -15102,7 +15141,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-file config.status 5.25
+file config.status 5.28
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 

+ 22 - 4
configure.ac

@@ -1,5 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
-AC_INIT([file],[5.25],[christos@astron.com])
+AC_INIT([file],[5.28],[christos@astron.com])
 AM_INIT_AUTOMAKE([subdir-objects foreign])
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
@@ -34,6 +34,11 @@ fi], [
   AC_DEFINE([ELFCORE], 1, [Define for ELF core file support])
 ])
 
+AC_MSG_CHECKING(for zlib support)
+AC_ARG_ENABLE(zlib,
+[AS_HELP_STRING([--disable-zlib], [disable zlib compression support @<:@default=auto@:>@])])
+AC_MSG_RESULT($enable_zlib)
+
 AC_MSG_CHECKING(for file formats in man section 5)
 AC_ARG_ENABLE(fsect-man5,
 [  --enable-fsect-man5      enable file formats in man section 5],
@@ -84,7 +89,9 @@ AC_CHECK_HEADERS(stdint.h fcntl.h locale.h stdint.h inttypes.h unistd.h)
 AC_CHECK_HEADERS(stddef.h utime.h wchar.h wctype.h limits.h)
 AC_CHECK_HEADERS(getopt.h err.h xlocale.h signal.h)
 AC_CHECK_HEADERS(sys/mman.h sys/stat.h sys/types.h sys/utime.h sys/time.h)
-AC_CHECK_HEADERS(zlib.h)
+if test "$enable_zlib" != "no"; then
+  AC_CHECK_HEADERS(zlib.h)
+fi
 AC_CHECK_TYPE([sig_t],[AC_DEFINE([HAVE_SIG_T],1,[Have sig_t type])],,[
 #ifdef HAVE_SIGNAL_H
 #include <signal.h>
@@ -145,10 +152,12 @@ dnl Checks for functions
 AC_CHECK_FUNCS(strerror strndup strtoul mkstemp mkostemp utimes utime wcwidth strtof newlocale uselocale freelocale setlocale)
 
 dnl Provide implementation of some required functions if necessary
-AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat getline ctime_r asctime_r localtime_r gmtime_r pread strcasestr fmtcheck)
+AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat getline ctime_r asctime_r localtime_r gmtime_r pread strcasestr fmtcheck dprintf)
 
 dnl Checks for libraries
-AC_CHECK_LIB(z,gzopen)
+if test "$enable_zlib" != "no"; then
+  AC_CHECK_LIB(z, gzopen)
+fi
 if test "$MINGW" = 1; then
   AC_CHECK_LIB(gnurx,regexec,,AC_MSG_ERROR([libgnurx is required to build file(1) with MinGW]))
 fi
@@ -156,5 +165,14 @@ fi
 dnl See if we are cross-compiling
 AM_CONDITIONAL(IS_CROSS_COMPILE, test "$cross_compiling" = yes)
 
+dnl Final sanity checks
+if test "$enable_zlib" = "yes"; then
+  if test "$ac_cv_header_zlib_h$ac_cv_lib_z_gzopen" != "yesyes"; then
+    AC_MSG_ERROR([zlib support requested but not found])
+  fi
+elif  test "$ac_cv_header_zlib_h$ac_cv_lib_z_gzopen" = "yesyes"; then
+  AC_DEFINE([ZLIBSUPPORT], 1, [Enable zlib compression support])
+fi
+
 AC_CONFIG_FILES([Makefile src/Makefile magic/Makefile tests/Makefile doc/Makefile python/Makefile])
 AC_OUTPUT

+ 11 - 2
doc/file.man

@@ -1,5 +1,5 @@
-.\" $File: file.man,v 1.118 2015/09/11 17:24:09 christos Exp $
-.Dd September 11, 2015
+.\" $File: file.man,v 1.121 2016/06/07 22:09:20 rrt Exp $
+.Dd March 13, 2016
 .Dt FILE __CSECTION__
 .Os
 .Sh NAME
@@ -213,6 +213,9 @@ Prints ELF file details.
 Consults magic files.
 .It tar
 Examines tar files.
+.It text
+A synonym for
+.Sq ascii .
 .El
 .It Fl Fl extension 
 Print a slash-separated list of valid extensions for the file type found.
@@ -317,6 +320,7 @@ Set various parameter limits.
 .It Li elf_phnum Ta 128 Ta max ELF program sections processed
 .It Li elf_shnum Ta 32768 Ta max ELF sections processed
 .It Li regex Ta 8192 Ta length limit for regex searches
+.It Li bytes Ta 1048576 Ta max number of bytes to read from file
 .El
 .It Fl r , Fl Fl raw
 Don't translate unprintable characters to \eooo.
@@ -358,6 +362,11 @@ Nice to
 .Xr cut 1
 the output.
 This does not affect the separator, which is still printed.
+.Pp
+If this option is repeated more than once, then 
+.Nm
+prints just the filename followed by a NUL followed by the description
+(or ERROR: text) followed by a second NUL for each entry.
 .It Fl -help
 Print a help message and exit.
 .El

+ 5 - 4
doc/libmagic.man

@@ -1,4 +1,4 @@
-.\" $File: libmagic.man,v 1.38 2015/09/11 17:24:09 christos Exp $
+.\" $File: libmagic.man,v 1.40 2016/03/31 17:51:12 christos Exp $
 .\"
 .\" Copyright (c) Christos Zoulas 2003.
 .\" All Rights Reserved.
@@ -225,7 +225,7 @@ It returns 0 on success and \-1 on failure.
 .Pp
 The
 .Fn magic_compile
-function can be used to compile the the colon
+function can be used to compile the colon
 separated list of database files passed in as
 .Ar filename ,
 or
@@ -251,7 +251,7 @@ for the default database.
 .Pp
 The
 .Fn magic_load
-function must be used to load the the colon
+function must be used to load the colon
 separated list of database files passed in as
 .Ar filename ,
 or
@@ -282,7 +282,7 @@ The
 .Fn magic_getparam
 and
 .Fn magic_setparam
-allow getting and setting various limits related to the the magic
+allow getting and setting various limits related to the magic
 library.
 .Bl -column "MAGIC_PARAM_ELF_PHNUM_MAX" "size_t" "Default" -offset indent
 .It Sy "Parameter" Ta Sy "Type" Ta Sy "Default"
@@ -292,6 +292,7 @@ library.
 .It Li MAGIC_PARAM_ELF_PHNUM_MAX Ta size_t Ta 128
 .It Li MAGIC_PARAM_ELF_SHNUM_MAX Ta size_t Ta 32768
 .It Li MAGIC_PARAM_REGEX_MAX Ta size_t Ta 8192
+.It Li MAGIC_PARAM_BYTES_MAX Ta size_t Ta 1048576
 .El
 .Pp
 The

+ 2 - 2
magic/Magdir/android

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------
-# $File: android,v 1.8 2015/03/19 18:04:37 christos Exp $
+# $File: android,v 1.9 2016/01/11 21:19:18 christos Exp $
 # Various android related magic entries
 #------------------------------------------------------------
 
@@ -128,7 +128,7 @@
 # partition size in blocks ?
 #>>>>0x22	ulelong				x			\b*%d
 
-# Android bootimg format
+# Android sparse img format
 # From https://android.googlesource.com/\
 # platform/system/core/+/master/libsparse/sparse_format.h
 0		lelong	0xed26ff3a		Android sparse image

+ 3 - 1
magic/Magdir/animation

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: animation,v 1.56 2014/10/23 23:12:51 christos Exp $
+# $File: animation,v 1.57 2015/11/29 22:11:07 christos Exp $
 # animation:  file(1) magic for animation/movie formats
 #
 # animation formats
@@ -76,6 +76,8 @@
 >8	string		da2b		\b, DMB MAF, ext da2a, with 3GPP timed text, DID, TVA, REL, IPMP
 >8	string		da3a		\b, DMB MAF aud with HE-AAC aud, JPG/PNG/MNG images
 >8	string		da3b		\b, DMB MAF, ext da3a w/ BIFS, 3GPP, DID, TVA, REL, IPMP
+>8	string		dash		\b, MPEG v4 system, Dynamic Adaptive Streaming over HTTP
+!:mime	video/mp4
 >8	string		dmb1		\b, DMB MAF supporting all the components defined in the spec
 >8	string		dmpf		\b, Digital Media Project
 >8	string		drc1		\b, Dirac (wavelet compression), encap in ISO base media (MP4)

+ 41 - 11
magic/Magdir/apple

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: apple,v 1.31 2015/08/29 07:10:35 christos Exp $
+# $File: apple,v 1.32 2015/12/04 20:40:10 christos Exp $
 # apple:  file(1) magic for Apple file formats
 #
 0	search/1/t	FiLeStArTfIlEsTaRt	binscii (apple ][) text
@@ -65,18 +65,48 @@
 # Eric Fischer <enf@pobox.com>
 
 # AppleWorks word processor:
-#
-# This matches the standard tab stops for an AppleWorks file, but if
-# a file has a tab stop set in the first four columns this will fail.
-#
+# URL: https://en.wikipedia.org/wiki/AppleWorks
+# Reference: http://www.gno.org/pub/apple2/doc/apple/filetypes/ftn.1a.xxxx
+# Update: Joerg Jenderek 
+# NOTE:
 # The "O" is really the magic number, but that's so common that it's
 # necessary to check the tab stops that follow it to avoid false positives.
-
-4       string          O====   AppleWorks word processor data
->85     byte&0x01       >0      \b, zoomed
->90     byte&0x01       >0      \b, paginated
->92     byte&0x01       >0      \b, with mail merge
-#>91    byte            x       \b, left margin %d
+# and/or look for unused bits of booleans bytes like zoom, paginated, mail merge
+# the newer AppleWorks is from claris with extension CWK
+4	string		O	
+# test for unused bits of zoom- , paginated-boolean bytes
+>84	ubequad		^0x00Fe00000000Fe00		
+# look for tabstop definitions "=" no tab, "|" no tab
+# "<" left tab,"^" center tab,">" right tab, "." decimal tab,
+# unofficial "!" other , "\x8a" other
+# official only if SFMinVers is nonzero
+>>5	regex/s	[=.<>|!^\x8a]{79}	AppleWorks Word Processor
+# AppleWorks Word Processor File (Apple II)
+# ./apple (version 5.25) labeled the entry as "AppleWorks word processor data"
+# application/x-appleworks is mime type for claris version with cwk extension
+!:mime	application/x-appleworks3
+# http://home.earthlink.net/~hughhood/appleiiworksenvoy/
+# ('p' + 1-byte ProDOS File Type + 2-byte ProDOS Aux Type')
+# $70 $1A $F8 $FF is this the apple type ?
+#:apple pdospøÿ
+!:ext awp
+# minimum version needed to read this files. SFMinVers (0 , 30~3.0 )
+>>>183	ubyte		30	3.0
+>>>183	ubyte		!30	
+>>>>183	ubyte		!0	0x%x
+# usual tabstop start sequence "=====<" 
+>>>5	string		x	\b, tabstop ruler "%6.6s"
+# tabstop ruler
+#>>>5	string		>\0	\b, tabstops "%-79s"
+# zoom switch
+>>>85	  byte&0x01	>0	\b, zoomed
+# whether paginated
+>>>90	  byte&0x01	>0	\b, paginated
+# contains any mail-merge commands
+>>>92	  byte&0x01	>0	\b, with mail merge
+# left margin in 1/10 inches ( normally 0 or 10 )
+>>>91	ubyte		>0	
+>>>>91	ubyte		x	\b, %d/10 inch left margin
 
 # AppleWorks database:
 #

+ 247 - 53
magic/Magdir/archive

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: archive,v 1.91 2015/09/16 13:49:33 christos Exp $
+# $File: archive,v 1.103 2016/05/05 17:07:40 christos Exp $
 # archive:  file(1) magic for archive formats (see also "msdos" for self-
 #           extracting compressed archives)
 #
@@ -246,7 +246,15 @@
 # BA
 # TODO: idarc says "bytes 0-2 == bytes 3-5"
 # TTComp
-0	string	\0\6 TTComp archive data
+# URL: http://fileformats.archiveteam.org/wiki/TTComp_archive
+# Update: Joerg Jenderek
+# GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others
+0	string	\0\6 
+# look for first keyword of Panorama database *.pan
+>12	search/261	DESIGN	
+# skip keyword with low entropy
+>12	default		x	TTComp archive, binary, 4K dictionary
+# (version 5.25) labeled the above entry as "TTComp archive data"
 # ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
 0	string	ESP ESP archive data
 # ZPack
@@ -544,55 +552,212 @@
 >>0x36	string		>\0			fstype %.8s
 
 # LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
-2	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
-!:mime	application/x-lharc
-2	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
-!:mime	application/x-lharc
-2	string		-lz4-		LHarc 1.x archive data [lz4]
-!:mime	application/x-lharc
-2	string		-lz5-		LHarc 1.x archive data [lz5]
-!:mime	application/x-lharc
+# Update: Joerg Jenderek 
+# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
+# Reference: http://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html
+#
+#	check and display information of lharc (LHa,PMarc) file
+0	name				lharc-file
+# check 1st character of method id like -lz4- -lh5- or -pm2-
+>2	string		-
+# check 5th character of method id
+>>6	string		-		
+# check header level 0 1 2 3
+>>>20	ubyte		<4		
+# check 2nd, 3th and 4th character of method id
+>>>>3	regex		\^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1)		\b 
+!:mime	application/x-lzh-compressed
+# creator type "LHA "
+!:apple	????LHA 
+# display archive type name like "LHa/LZS archive data" or "LArc archive"
+>>>>>2	string		-lz		\b 
+!:ext	lzs
+# already known  -lzs- -lz4- -lz5- with old names
+>>>>>>2	string	-lzs		LHa/LZS archive data
+>>>>>>3	regex	\^lz[45]	LHarc 1.x archive data
+# missing -lz?- with wikipedia names
+>>>>>>3	regex	\^lz[2378]	LArc archive
+# display archive type name like "LHa (2.x) archive data"
+>>>>>2	string		-lh		\b 
+# already known -lh0- -lh1- -lh2- -lh3-  -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names
+>>>>>>3	regex		\^lh[01]	LHarc 1.x/ARX archive data
+# LHice archiver use ".ICE" as name extension instead usual one ".lzh"
+# FOOBAR archiver use ".foo" as name extension instead usual one
+# "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment
+>>>>>>>2	string	-lh1		\b 
+!:ext lha/lzh/ice
+>>>>>>3	regex		\^lh[23d]	LHa 2.x? archive data
+>>>>>>3	regex		\^lh[7]		LHa (2.x)/LHark archive data
+>>>>>>3	regex		\^lh[456]	LHa (2.x) archive data
+>>>>>>>2	string	-lh5		\b 
+# https://en.wikipedia.org/wiki/BIOS
+# Some mainboard BIOS like Award use LHa compression. So archives with unusal extension are found like
+# bios.rom , kd7_v14.bin, 1010.004, ...
+!:ext lha/lzh/rom/bin
+# missing -lh?- variants (Joe Jared)
+>>>>>>3	regex		\^lh[89a-ce]	LHa (Joe Jared) archive
+# UNLHA32 2.67a
+>>>>>>2	string		-lhx		LHa (UNLHA32) archive
+# lha archives with standard file name extensions ".lha" ".lzh"
+>>>>>>3	regex		!\^(lh1|lh5)	\b 
+!:ext lha/lzh
+# this should not happen if all -lh variants are described
+>>>>>>2	default		x		LHa (unknown) archive
+#!:ext	lha
+# PMarc
+>>>>>3	regex		\^pm[012]	PMarc archive data
+!:ext pma
+# append method id without leading and trailing minus character
+>>>>>3	string		x		[%3.3s]
+>>>>>>0	use	lharc-header
+#
+#	check and display information of lharc header
+0	name				lharc-header
+# header size 0x4 , 0x1b-0x61
+>0	ubyte		x		
+# compressed data size != compressed file size
+#>7	ulelong		x		\b, data size %d
+# attribute: 0x2~?? 0x10~symlink|target 0x20~normal 
+#>19	ubyte		x		\b, 19_0x%x
+# level identifier 0 1 2 3
+#>20	ubyte		x		\b, level %d
+# time stamp
+#>15		ubelong	x		DATE 0x%8.8x
+# OS ID for level 1
+>20	ubyte		1		
+# 0x20 types find for *.rom files
+>>(21.b+24)	ubyte	<0x21		\b, 0x%x OS
+# ascii type like M for MSDOS
+>>(21.b+24)	ubyte	>0x20		\b, '%c' OS
+# OS ID for level 2
+>20	ubyte		2		
+#>>23	ubyte		x		\b, OS ID 0x%x
+>>23	ubyte		<0x21		\b, 0x%x OS
+>>23	ubyte		>0x20		\b, '%c' OS
+# filename only for level 0 and 1
+>20	ubyte		<2		
+# length of filename
+>>21		ubyte	>0		\b, with
+# filename
+>>>21		pstring	x		"%s"
+#
+#2	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
+#!:mime	application/x-lharc
+2	string		-lh0-		
+>0	use	lharc-file
+#2	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
+#!:mime	application/x-lharc
+2	string		-lh1-		
+>0	use	lharc-file
+# NEW -lz2- ... -lz8-
+2	string		-lz2-		
+>0	use	lharc-file
+2	string		-lz3-		
+>0	use	lharc-file
+2	string		-lz4-		
+>0	use	lharc-file
+2	string		-lz5-		
+>0	use	lharc-file
+2	string		-lz7-		
+>0	use	lharc-file
+2	string		-lz8-		
+>0	use	lharc-file
 #	[never seen any but the last; -lh4- reported in comp.compression:]
-2	string		-lzs-		LHa/LZS archive data [lzs]
-!:mime	application/x-lha
-2	string		-lh\40-		LHa 2.x? archive data [lh ]
-!:mime	application/x-lha
-2	string		-lhd-		LHa 2.x? archive data [lhd]
-!:mime	application/x-lha
-2	string		-lh2-		LHa 2.x? archive data [lh2]
-!:mime	application/x-lha
-2	string		-lh3-		LHa 2.x? archive data [lh3]
-!:mime	application/x-lha
-2	string		-lh4-		LHa (2.x) archive data [lh4]
-!:mime	application/x-lha
-2	string		-lh5-		LHa (2.x) archive data [lh5]
-!:mime	application/x-lha
-2	string		-lh6-		LHa (2.x) archive data [lh6]
-!:mime	application/x-lha
-2	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
-!:mime	application/x-lha
->20	byte		x		- header level %d
+#2	string		-lzs-		LHa/LZS archive data [lzs]
+2	string		-lzs-		
+>0	use	lharc-file
+# According to wikipedia and others such a version does not exist
+#2	string		-lh\40-		LHa 2.x? archive data [lh ]
+#2	string		-lhd-		LHa 2.x? archive data [lhd]
+2	string		-lhd-		
+>0	use	lharc-file
+#2	string		-lh2-		LHa 2.x? archive data [lh2]
+2	string		-lh2-		
+>0	use	lharc-file
+#2	string		-lh3-		LHa 2.x? archive data [lh3]
+2	string		-lh3-		
+>0	use	lharc-file
+#2	string		-lh4-		LHa (2.x) archive data [lh4]
+2	string		-lh4-		
+>0	use	lharc-file
+#2	string		-lh5-		LHa (2.x) archive data [lh5]
+2	string		-lh5-		
+>0	use	lharc-file
+#2	string		-lh6-		LHa (2.x) archive data [lh6]
+2	string		-lh6-		
+>0	use	lharc-file
+#2	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
+2	string		-lh7-		
+# !:mime	application/x-lha
+# >20	byte		x		- header level %d
+>0	use	lharc-file
+# NEW -lh8- ... -lhe- , -lhx-
+2	string		-lh8-		
+>0	use	lharc-file
+2	string		-lh9-		
+>0	use	lharc-file
+2	string		-lha-		
+>0	use	lharc-file
+2	string		-lhb-		
+>0	use	lharc-file
+2	string		-lhc-		
+>0	use	lharc-file
+2	string		-lhe-		
+>0	use	lharc-file
+2	string		-lhx-		
+>0	use	lharc-file
 # taken from idarc [JW]
 2   string      -lZ         PUT archive data
-2   string      -lz         LZS archive data
+# already done by LHarc magics
+# this should never happen if all sub types of LZS archive are identified 
+#2   string      -lz         LZS archive data
 2   string      -sw1-       Swag archive data
 
-# RAR archiver (Greg Roelofs, newt@uchicago.edu)
-0	string		Rar!		RAR archive data,
+0	name		rar-file-header
+>24	byte		15		\b, v1.5
+>24	byte		20		\b, v2.0
+>24	byte		29		\b, v4
+>15	byte		0		\b, os: MS-DOS
+>15	byte		1		\b, os: OS/2
+>15	byte		2		\b, os: Win32
+>15	byte		3		\b, os: Unix
+>15	byte		4		\b, os: Mac OS
+>15	byte		5		\b, os: BeOS
+
+0	name		rar-archive-header
+>3	leshort&0x1ff	>0		\b, flags:
+>>3	leshort		&0x01		ArchiveVolume
+>>3	leshort		&0x02		Commented
+>>3	leshort		&0x04		Locked
+>>3	leshort		&0x10		NewVolumeNaming
+>>3	leshort		&0x08		Solid
+>>3	leshort		&0x20		Authenticated
+>>3	leshort		&0x40		RecoveryRecordPresent
+>>3	leshort		&0x80		EncryptedBlockHeader
+>>3	leshort		&0x100		FirstVolume
+
+# RAR (Roshal Archive) archive
+0	string		Rar!\x1a\7\0		RAR archive data
+!:mime	application/x-rar
+!:ext	rar/cbr
+# file header
+>(0xc.l+9)	byte	0x74
+>>(0xc.l+7)	use	rar-file-header
+# subblock seems to share information with file header
+>(0xc.l+9)	byte	0x7a
+>>(0xc.l+7)	use	rar-file-header
+>9		byte	0x73
+>>7		use	rar-archive-header
+
+0	string		Rar!\x1a\7\1\0		RAR archive data, v5
 !:mime	application/x-rar
->44	byte		x		v%0x,
->10	byte		>0		flags:
->>10	byte		&0x01		Archive volume,
->>10	byte		&0x02		Commented,
->>10	byte		&0x04		Locked,
->>10	byte		&0x08		Solid,
->>10	byte		&0x20		Authenticated,
->35	byte		0		os: MS-DOS
->35	byte		1		os: OS/2
->35	byte		2		os: Win32
->35	byte		3		os: Unix
-# some old version? idarc says:
-0   string      RE\x7e\x5e  RAR archive data
+!:ext	rar
+
+# Very old RAR archive
+# http://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf
+0	string		RE\x7e\x5e  RAR archive data (<v1.5)
+!:mime	application/x-rar
+!:ext	rar/cbr
 
 # SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
 0	string		SQSH		squished archive data (Acorn RISCOS)
@@ -604,9 +769,12 @@
 # PKZIP multi-volume archive
 0	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
 !:mime	application/zip
+!:ext zip/cbz
 
 # Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
 0	string		PK\005\006	Zip archive data (empty)
+!:mime application/zip
+!:ext zip/cbz
 0	string		PK\003\004
 
 # Specialised zip formats which start with a member named 'mimetype'
@@ -723,6 +891,14 @@
 >(26.s+30)	leshort	0xcafe		Java archive data (JAR)
 !:mime	application/java-archive
 
+# iOS App
+>(26.s+30)	leshort	!0xcafe
+>>26		string	!\x8\0\0\0mimetype
+>>>30		string	Payload/
+>>>>38		search/64       .app/   iOS App
+!:mime application/x-ios-app
+
+
 # Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
 #   Next line excludes specialized formats:
 >(26.s+30)	leshort	!0xcafe
@@ -764,12 +940,24 @@
 0       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
 #
 # PMA (CP/M derivative of LHA)
+# Update: Joerg Jenderek 
+# URL: https://en.wikipedia.org/wiki/LHA_(file_format)
 #
-2       string          -pm0-           PMarc archive data [pm0]
-2       string          -pm1-           PMarc archive data [pm1]
-2       string          -pm2-           PMarc archive data [pm2]
+#2       string          -pm0-           PMarc archive data [pm0]
+2	string		-pm0-		
+>0	use	lharc-file
+#2       string          -pm1-           PMarc archive data [pm1]
+2	string		-pm1-		
+>0	use	lharc-file
+#2       string          -pm2-           PMarc archive data [pm2]
+2	string		-pm2-		
+>0	use	lharc-file
 2       string          -pms-           PMarc SFX archive (CP/M, DOS)
+#!:mime	application/x-foobar-exec
+!:ext com
 5       string          -pc1-           PopCom compressed executable (CP/M)
+#!:mime	application/x-
+#!:ext com
 
 # From Rafael Laboissiere <rafael@laboissiere.net>
 # The Project Revision Control System (see
@@ -802,6 +990,9 @@
 # Felix von Leitner <felix-file@fefe.de>
 0	string	d8:announce	BitTorrent file
 !:mime	application/x-bittorrent
+# Durval Menezes, <jmgthbfile at durval dot com>
+0	string	d13:announce-list	BitTorrent file
+!:mime	application/x-bittorrent
 
 # Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
 0	beshort 0x0e0f		Atari MSA archive data
@@ -889,19 +1080,16 @@
 # From "Nelson A. de Oliveira" <naoliv@gmail.com>
 0	string	MPQ\032		MoPaQ (MPQ) archive
 
-# From: Dirk Jagdmann <doj@cubic.org>
-# xar archive format: http://code.google.com/p/xar/
-0	string	xar!		xar archive
->6	beshort	x		- version %d
-
 # From: "Nelson A. de Oliveira" <naoliv@gmail.com>
 # .kgb
 0	string KGB_arch		KGB Archiver file
 >10	string x		with compression level %.1s
 
 # xar (eXtensible ARchiver) archive
+# xar archive format: http://code.google.com/p/xar/
 # From: "David Remahl" <dremahl@apple.com>
 0	string	xar!		xar archive
+!:mime	application/x-xar
 #>4	beshort	x		header size %d
 >6	beshort	x		version %d,
 #>8	quad	x		compressed TOC: %d,
@@ -975,3 +1163,9 @@
 >0xE08	search/7776		\x55\xAA	
 >>&-512	indirect		x		\b; contains 
 
+# Google Chrome extensions
+# https://developer.chrome.com/extensions/crx
+# https://developer.chrome.com/extensions/hosting
+0	string	Cr24	Google Chrome extension
+!:mime	application/x-chrome-extension
+>4	ulong	x	\b, version %u

+ 43 - 2
magic/Magdir/audio

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: audio,v 1.73 2015/03/15 23:21:42 christos Exp $
+# $File: audio,v 1.75 2016/02/08 17:30:11 christos Exp $
 # audio:  file(1) magic for sound formats (see also "iff")
 #
 # Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com),
@@ -585,7 +585,7 @@
 0	string		SC68\ Music-file\ /\ (c)\ (BeN)jami	sc68 Atari ST music
 
 # musepak support From: "Jiri Pejchal" <jiri.pejchal@gmail.com>
-0       string          MP+     Musepack audio
+0       string          MP+     Musepack audio (MP+)
 !:mime	audio/x-musepack
 >3      byte            255     \b, SV pre8
 >3      byte&0xF        0x6     \b, SV 6
@@ -619,6 +619,9 @@
 >>27    byte            114     \b, Beta 1.14
 >>27    byte            115     \b, Alpha 1.15
 
+0       string          MPCK    Musepack audio (MPCK)
+!:mime	audio/x-musepack
+
 # IMY
 # from http://filext.com/detaillist.php?extdetail=IMY
 # http://cellphones.about.com/od/cellularfaqs/f/rf_imelody.htm
@@ -715,3 +718,41 @@
 0	string	ZBOT
 >4	byte	0xc5	GVOX Encore music, version < 5.0
 
+# Summary:	Garmin Voice Processing Module (WAVE audios)
+# From:		Joerg Jenderek
+# URL:		http://www.garmin.com/
+# Reference:	http://turboccc.wikispaces.com/share/view/28622555
+# NOTE:		there exist 2 other Garmin VPM formats
+0		string	AUDIMG		
+# skip text files starting with string "AUDIMG"
+>13		ubyte		<13	Garmin Voice Processing Module
+!:mime	audio/x-vpm-wav-garmin
+!:ext	vpm
+# 3 bytes indicating the voice version (200,220)
+>>6		string		x	\b, version %3.3s
+# day of release (01-31)
+>>12		ubyte		x	\b, %.2d
+# month of release (01-12)
+>>13		ubyte		x	\b.%.2d
+# year of release (like 2006, 2007, 2008)
+>>14		uleshort	x	\b.%.4d
+# hour of release (0-23)
+>>11		ubyte		x	%.2d
+# minute of release (0-59)
+>>10		ubyte		x	\b:%.2d
+# second of release (0-59)
+>>9		ubyte		x	\b:%.2d
+# if you select a language like german on your garmin device
+# you can only select voice modules with correponding language byte ID like 1 
+>>18		ubyte		x	\b, language ID %d
+# pointer to 1st audio WAV sample
+>>16		uleshort	>0	
+>>>(16.s)	ulelong		>0	\b, at offset 0x%x
+# WAV length
+>>>>(16.s+4)	ulelong		>0	%d Bytes
+# look for magic
+>>>>>(&-8.l)	string		RIFF	
+# determine type by ./riff
+>>>>>>&-4	indirect	x	\b 
+# 2 - ~ 131 WAV samples following same way
+

+ 65 - 0
magic/Magdir/ber

@@ -0,0 +1,65 @@
+
+#------------------------------------------------------------------------------
+# $File: ber,v 1.1 2016/06/05 00:21:30 christos Exp $
+# ber:  file(1) magic for several BER formats used in the mobile
+# telecommunications industry (Georg Sauthoff)
+
+# The file formats are standardized by the GSMA (GSM association).
+# They are specified via ASN.1 schemas and some prose. Basic encoding
+# rules (BER) is the used encoding. The formats are used for exchanging
+# call data records (CDRs) between mobile operators and associated
+# parties for roaming clearing purposes and fraud detection.
+
+# The magic file covers:
+
+# - TAP files (TD.57) - CDR batches and notifications
+# - RAP files (TD.32) - return batches and acknowledgements
+# - NRT files (TD.35) - CDR batches for 'near real time' processing
+
+#
+# TAP 3 Files
+# TAP -> Transferred Account Procedure
+# cf. http://www.gsma.com/newsroom/wp-content/uploads/TD.57-v32.31.pdf
+# TransferBatch short tag
+0	byte	0x61
+# BatchControlInfo short tag
+>&1	search/b5	\x64
+# Sender long tag #TAP 3.x (BER encoded)
+>>&1	search/b8	\x5f\x81\x44
+# <SpecificationVersionNumber>3</><ReleaseVersionNumber> block
+>>>&64	search/b64	\x5f\x81\x49\x01\x03\x5f\x81\x3d\x01
+>>>>&0	byte	x	TAP 3.%d Batch (TD.57, Transferred Account)
+
+# Notification short tag
+0	byte	0x62
+# Sender long tag
+>2	search/b8	\x5f\x81\x44
+# <SpecificationVersionNumber>3</><ReleaseVersionNumber> block
+>>&64	search/b64	\x5f\x81\x49\x01\x03\x5f\x81\x3d\x01
+>>>&0	byte	x	TAP 3.%d Notification (TD.57, Transferred Account)
+
+
+# NRT Files
+# NRT a.k.a. NRTRDE
+0	byte	0x61
+# <SpecificationVersionNumber>2</><ReleaseVersionNumber> block
+>&1	search/b8 \x5f\x29\x01\x02\x5f\x25\x01
+>>&0	byte	x	NRT 2.%d (TD.35, Near Real Time Roaming Data Exchange)
+
+# RAP Files
+# cf. http://www.gsma.com/newsroom/wp-content/uploads/TD.32-v6.11.pdf
+# Long ReturnBatch tag
+0	string	\x7f\x84\x16
+# Long RapBatchControlInfo tag
+>&1	search/b8	\x7f\x84\x19
+# <SpecificationVersionNumber>3</><ReleaseVersionNumber> block
+>>&64	search/b64	\x5f\x81\x49\x01\x03\x5f\x81\x3d\x01
+# <RapSpecificationVersionNumber>1</><RapReleaseVersionNumber> block
+>>>&1	string/b	\x5f\x84\x20\x01\x01\x5f\x84\x1f\x01
+>>>>&0	byte	x	RAP 1.%d Batch (TD.32, Returned Account Procedure),
+>>>&0	byte	x	TAP 3.%d
+
+# Long Acknowledgement tag
+0	string \x7f\x84\x17
+# Long Sender tag
+>&1	search/b5	\x5f\x81\x44	RAP Acknowledgement (TD.32, Returned Account Procedure)

+ 178 - 0
magic/Magdir/bioinformatics

@@ -0,0 +1,178 @@
+
+#------------------------------------------------------------------------------
+# $File: bioinformatics,v 1.2 2016/02/14 15:53:53 christos Exp $
+# bioinfomatics:  file(1) magic for Bioinfomatics file formats
+
+###############################################################################
+# BGZF (Blocked GNU Zip Format) - gzip compatible, but also indexable
+# used by SAMtools bgzip/tabix (http://samtools.sourceforge.net/tabix.shtml)
+###############################################################################
+0	string		\037\213
+>3	byte		&0x04
+>>12	string		BC
+>>>14	leshort		&0x02	Blocked GNU Zip Format (BGZF; gzip compatible)
+>>>>16	leshort		x	\b, block length %d
+!:mime	application/x-gzip
+
+
+###############################################################################
+# Tabix index file 
+# used by SAMtools bgzip/tabix (http://samtools.sourceforge.net/tabix.shtml)
+###############################################################################
+0	string	TBI\1		SAMtools TBI (Tabix index format)
+>0x04	lelong	=1		\b, with %d reference sequence
+>0x04	lelong	>1		\b, with %d reference sequences
+>0x08	lelong	&0x10000	\b, using half-closed-half-open coordinates (BED style)
+>0x08	lelong	^0x10000	
+>>0x08	lelong	=0		\b, using closed and one based coordinates (GFF style)
+>>0x08	lelong	=1		\b, using SAM format
+>>0x08	lelong	=2		\b, using VCF format
+>0x0c	lelong	x		\b, sequence name column: %d
+>0x10	lelong	x		\b, region start column: %d
+>0x08	lelong	=0		
+>>0x14	lelong	x		\b, region end column: %d
+>0x18	byte	x		\b, comment character: %c
+>0x1c	lelong	x		\b, skip line count: %d
+
+
+###############################################################################
+# BAM (Binary Sequence Alignment/Map format) 
+# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf) 
+# data is normally present only within compressed BGZF blocks (CDATA), so use file -z to examine it
+###############################################################################
+0	string	BAM\1	SAMtools BAM (Binary Sequence Alignment/Map)
+>0x04	lelong	>0	
+>>&0x00 regex	=^[@]HD\t.*VN:		\b, with SAM header
+>>>&0	regex	=[0-9.]+		\b version %s
+>>&(0x04)	lelong	>0	\b, with %d reference sequences
+
+
+###############################################################################
+# BAI (BAM indexing format)
+# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf) 
+###############################################################################
+0		string	BAI\1	SAMtools BAI (BAM indexing format)
+>0x04		lelong	>0	\b, with %d reference sequences
+
+
+###############################################################################
+# CRAM (Binary Sequence Alignment/Map format) 
+###############################################################################
+0	string	CRAM	CRAM
+>0x04	byte	>-1	version %d.
+>0x05	byte	>-1	\b%d
+>0x06	string	>\0	(identified as %s)
+
+
+###############################################################################
+# BCF (Binary Call Format), version 1
+# used by SAMtools & VCFtools (http://vcftools.sourceforge.net/bcf.pdf)
+# data is normally present only within compressed BGZF blocks (CDATA), so use file -z to examine it
+###############################################################################
+0		string	   BCF\4    
+# length of seqnm data in bytes is positive
+>&0x00		lelong	  >0	
+# length of smpl data in bytes is positive
+>>&(&-0x04)	lelong	  >0			SAMtools BCF (Binary Call Format)
+# length of meta in bytes
+>>>&(&-0x04)	lelong	  >0	
+# have meta text string
+>>>>&0x00	search	  ##samtoolsVersion=
+>>>>>&0x00	string	  x			\b, generated by SAMtools version %s
+
+
+###############################################################################
+# BCF (Binary Call Format), version 2.1
+# used by SAMtools (http://samtools.github.io/hts-specs/BCFv2_qref.pdf)
+# data is normally present only within compressed BGZF blocks (CDATA), so use file -z to examine it
+###############################################################################
+0		string	   BCF\2\1    Binary Call Format (BCF) version 2.1
+# length of header text
+>&0x00		lelong	  >0	
+# have header string
+>>&0x00 search	  ##samtoolsVersion=
+>>>&0x00	string	  x			\b, generated by SAMtools version %s
+
+
+###############################################################################
+# BCF (Binary Call Format), version 2.2
+# used by SAMtools (http://samtools.github.io/hts-specs/BCFv2_qref.pdf)
+# data is normally present only within compressed BGZF blocks (CDATA), so use file -z to examine it
+###############################################################################
+0		string	   BCF\2\2    Binary Call Format (BCF) version 2.2
+# length of header text
+>&0x00		lelong	  >0	
+# have header string
+>>&0x00 search	  ##samtoolsVersion=
+>>>&0x00	string	  x			\b, generated by SAMtools version %s
+
+###############################################################################
+# VCF (Variant Call Format)
+# used by VCFtools (http://vcftools.sourceforge.net/)
+###############################################################################
+0      search	   ##fileformat=VCFv	Variant Call Format (VCF)
+>&0    string	   x			\b version %s
+
+###############################################################################
+# FASTQ
+# used by MAQ (http://maq.sourceforge.net/fastq.shtml)
+###############################################################################
+# XXX Broken?
+# @<seqname>
+#0	regex	=^@[A-Za-z0-9_.:-]+\?\n	
+# <seq>
+#>&1	regex	=^[A-Za-z\n.~]++
+# +[<seqname>]
+#>>&1	regex	=^[A-Za-z0-9_.:-]*\?\n	
+# <qual>
+#>>>&1	regex	=^[!-~\n]+\n		FASTQ
+
+###############################################################################
+# FASTA
+# used by FASTA (http://fasta.bioch.virginia.edu/fasta_www2/fasta_guide.pdf)
+###############################################################################
+#0	byte	0x3e
+# q>0	regex	=^[>][!-~\t\ ]+$	
+# Amino Acid codes: [A-IK-Z*-]+
+#>>1	regex	!=[!-'Jj;:=?@^`|~\\]		FASTA
+# IUPAC codes/gaps: [ACGTURYKMSWBDHVNX-]+
+# not in IUPAC codes/gaps: [EFIJLOPQZ]
+#>>>1	regex	!=[EFIJLOPQZefijlopqz]		\b, with IUPAC nucleotide codes
+#>>>1	regex	=^[EFIJLOPQZefijlopqz]+$	\b, with Amino Acid codes
+
+###############################################################################
+# SAM (Sequence Alignment/Map format) 
+# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf) 
+###############################################################################
+# Short-cut version to recognise SAM files with (optional) header at beginning
+###############################################################################
+0      string	   @HD\t	
+>4     search	   VN:		Sequence Alignment/Map (SAM), with header
+>>&0   regex	   [0-9.]+	\b version %s
+###############################################################################
+# Longer version to recognise SAM alignment lines using (many) regexes
+###############################################################################
+# SAM Alignment QNAME
+0		regex	=^[!-?A-~]{1,255}(\t[^\t]+){11}		
+# SAM Alignment FLAG
+>0		regex	=^([^\t]+\t){1}[0-9]{1,5}\t		
+# SAM Alignment RNAME
+>>0		regex	=^([^\t]+\t){2}\\*|[^*=]*\t		
+# SAM Alignment POS
+>>>0		regex	=^([^\t]+\t){3}[0-9]{1,9}\t		
+# SAM Alignment MAPQ
+>>>>0		regex	=^([^\t]+\t){4}[0-9]{1,3}\t		
+# SAM Alignment CIGAR
+>>>>>0		regex	=\t\\*|([0-9]+[MIDNSHPX=])+)\t		
+# SAM Alignment RNEXT
+>>>>>>0		regex	=\t(\\*|=|[!-()+->?-~][!-~]*)\t		
+# SAM Alignment PNEXT
+>>>>>>>0	regex	=^([^\t]+\t){7}[0-9]{1,9}\t		
+# SAM Alignment TLEN
+>>>>>>>>0	regex	=\t[+-]{0,1}[0-9]{1,9}\t.*\t		
+# SAM Alignment SEQ
+>>>>>>>>>0	regex	=^([^\t]+\t){9}(\\*|[A-Za-z=.]+)\t	
+# SAM Alignment QUAL
+>>>>>>>>>>0	regex	=^([^\t]+\t){10}[!-~]+	Sequence Alignment/Map (SAM)
+>>>>>>>>>>>0	regex	=^[@]HD\t.*VN:		\b, with header
+>>>>>>>>>>>>&0	regex	=[0-9.]+		\b version %s

+ 9 - 6
magic/Magdir/c-lang

@@ -1,7 +1,8 @@
 #------------------------------------------------------------------------------
-# $File: c-lang,v 1.20 2015/07/27 14:33:10 christos Exp $
+# $File: c-lang,v 1.23 2016/05/21 14:28:27 christos Exp $
 # c-lang:  file(1) magic for C and related languages programs
 #
+# The strength is to beat standard HTML
 
 # BCPL
 0	search/8192	"libhdr"	BCPL source text
@@ -11,6 +12,7 @@
 
 # C
 0	regex	\^#include	C source text
+!:strength +25
 !:mime	text/x-c
 0	regex	\^char[\ \t\n]+	C source text
 !:mime	text/x-c
@@ -30,19 +32,20 @@
 # C++
 # The strength of these rules is increased so they beat the C rules above
 0	regex	\^template[\ \t]+<.*>[\ \t\n]+	C++ source text
-!:strength + 5
+!:strength + 30
 !:mime	text/x-c++
 0	regex	\^virtual[\ \t\n]+		C++ source text
-!:strength + 5
+!:strength + 30
 !:mime	text/x-c++
 0	regex	\^class[\ \t\n]+		C++ source text
-!:strength + 5
+# But class is reduced to avoid beating php (Jens Schleusener)
+!:strength + 13
 !:mime	text/x-c++
 0	regex	\^public:		C++ source text
-!:strength + 5
+!:strength + 30
 !:mime	text/x-c++
 0	regex	\^private:		C++ source text
-!:strength + 5
+!:strength + 30
 !:mime	text/x-c++
 
 # From: Mikhail Teterin <mi@aldan.algebra.com> 

+ 7 - 4
magic/Magdir/cafebabe

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: cafebabe,v 1.20 2015/05/29 14:21:58 christos Exp $
+# $File: cafebabe,v 1.21 2015/10/15 20:56:51 christos Exp $
 # Cafe Babes unite!
 #
 # Since Java bytecode and Mach-O universal binaries have the same magic number,
@@ -58,12 +58,15 @@
 >>4	belong		<20		Mach-O universal binary with %d architectures:
 !:mime application/x-mach-binary
 >>>8	use		mach-o		\b
->>>28	use		mach-o		\b
 >>4	belong		2
->>>48	use		mach-o		\b
+>>>28	use		mach-o		\b
 >>4	belong		3
->>>68	use		mach-o		\b
+>>>48	use		mach-o		\b
 >>4	belong		4
+>>>68	use		mach-o		\b
+>>4	belong		5
 >>>88	use		mach-o		\b
+>>4	belong		6
+>>>108	use		mach-o		\b
 
 ### MACH-O END ###

+ 77 - 0
magic/Magdir/coff

@@ -0,0 +1,77 @@
+
+#------------------------------------------------------------------------------
+# $File: coff,v 1.1 2015/09/30 20:32:35 christos Exp $
+# coff: file(1) magic for Common Object Files not specific to known cpu types or manufactures
+#
+# COFF
+#
+# by Joerg Jenderek at Oct 2015
+# https://en.wikipedia.org/wiki/COFF
+# https://de.wikipedia.org/wiki/Common_Object_File_Format
+# http://www.delorie.com/djgpp/doc/coff/filhdr.html
+
+# display name+variables+flags of Common Object Files Format (32bit)
+# Maybe used also in adi,att3b,clipper,hitachi-sh,hp,ibm6000,intel,
+# mips,motorola,msdos,osf1,sharc,varied.out,vax
+0	name				display-coff
+# test for unused flag bits (0x8000,0x0800,0x0400,0x0200,x0080) in f_flags
+>18	uleshort&0x8E80	0		
+>>0	clear		x
+# f_magic - magic number
+# DJGPP, 80386 COFF executable, MS Windows COFF Intel 80386 object file (./intel)
+>>0	uleshort	0x014C		Intel 80386
+# Hitachi SH big-endian COFF (./hitachi-sh)
+>>0	uleshort	0x0500		Hitachi SH big-endian
+# Hitachi SH little-endian COFF (./hitachi-sh)
+>>0	uleshort	0x0550		Hitachi SH little-endian
+# executable (RISC System/6000 V3.1) or obj module (./ibm6000)
+#>>0	uleshort	0x01DF		
+# TODO for other COFFs
+#>>0	uleshort	0xABCD		COFF_TEMPLATE
+>>0	default		x
+>>>0	uleshort	x		type 0x%04x
+>>0	uleshort	x		COFF
+# F_EXEC flag bit
+>>18	leshort		^0x0002		object file
+#!:mime	application/x-coff
+#!:ext cof/o/obj/lib
+>>18	leshort		&0x0002		executable
+#!:mime	application/x-coffexec
+# F_RELFLG flag bit,static object
+>>18	leshort		&0x0001		\b, no relocation info
+# F_LNNO flag bit
+>>18	leshort		&0x0004		\b, no line number info
+# F_LSYMS flag bit
+>>18	leshort		&0x0008		\b, stripped
+>>18	leshort		^0x0008		\b, not stripped
+# flags in other COFF versions
+#0x0010    F_FDPR_PROF 
+#0x0020    F_FDPR_OPTI
+#0x0040    F_DSA
+# F_AR32WR flag bit
+#>>>18	leshort		&0x0100		\b, 32 bit little endian
+#0x1000    F_DYNLOAD 
+#0x2000    F_SHROBJ
+#0x4000    F_LOADONLY
+# f_nscns - number of sections
+>>2	uleshort	<2		\b, %d section
+>>2	uleshort	>1		\b, %d sections
+# f_timdat - file time & date stamp only for little endian
+#>>4	date		x		\b, %s
+# f_symptr - symbol table pointer, only for not stripped
+>>8	ulelong		>0		\b, symbol offset=0x%x
+# f_nsyms - number of symbols, only for not stripped
+>>12	ulelong		>0		\b, %d symbols
+# f_opthdr - optional header size 
+>>16	uleshort	>0		\b, optional header size %d
+# at offset 20 can be optional header, extra bytes FILHSZ-20 because
+# do not rely on sizeof(FILHDR) to give the correct size for header.
+# or first section header
+# additional variables for other COFF files
+# >20	beshort		0407		(impure)
+# >20	beshort		0410		(pure)
+# >20	beshort		0413		(demand paged)
+# >20	beshort		0421		(standalone)
+# >22	leshort		>0		- version %d
+# >168	string		.lowmem		Apple toolbox
+

+ 5 - 3
magic/Magdir/commands

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: commands,v 1.52 2015/06/04 19:16:55 christos Exp $
+# $File: commands,v 1.54 2016/04/19 13:40:02 christos Exp $
 # commands:  file(1) magic for various shells and interpreters
 #
 #0	string/w	:			shell archive or script for antique kernel text
@@ -98,8 +98,10 @@
 !:mime	text/x-php
 # Smarty compiled template, http://www.smarty.net/
 # Elan Ruusamae <glen@delfi.ee>
-0	string	=<?php\ /*\ Smarty\ version	Smarty compiled template
->24	regex	[0-9.]+				\b, version %s
+0	string	=<?php
+>5	regex	[\ \n]
+>>6	string	/*\ Smarty\ version		Smarty compiled template
+>>>24	regex	[0-9.]+				\b, version %s
 !:mime	text/x-php
 
 0	string		Zend\x00		PHP script Zend Optimizer data

+ 2 - 1
magic/Magdir/compress

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: compress,v 1.64 2015/07/27 15:41:09 christos Exp $
+# $File: compress,v 1.65 2015/12/04 20:48:03 christos Exp $
 # compress:  file(1) magic for pure-compression formats (no archives)
 #
 # compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
@@ -188,6 +188,7 @@
 >6	byte		x			version %d
 >7	byte		x			\b.%d
 !:mime	application/x-7z-compressed
+!:ext 7z/cb7
 
 # Type: LZMA
 0	lelong&0xffffff	=0x5d

+ 558 - 80
magic/Magdir/console

@@ -1,53 +1,135 @@
 
 #------------------------------------------------------------------------------
-# $File: console,v 1.20 2015/03/15 23:21:42 christos Exp $
+# $File: console,v 1.26 2016/06/12 15:20:37 christos Exp $
 # Console game magic
 # Toby Deshane <hac@shoelace.digivill.net>
-#    ines:  file(1) magic for Marat's iNES Nintendo Entertainment System
-#           ROM dump format
-
-0 string NES\032 iNES ROM dump,
->4 byte  x     %dx16k PRG
->5 byte  x     \b, %dx8k CHR
->6 byte&0x01  =0x1  \b, [Vert.]
->6 byte&0x01  =0x0  \b, [Horiz.]
->6 byte&0x02  =0x2  \b, [SRAM]
->6 byte&0x04  =0x4  \b, [Trainer]
->6 byte&0x04  =0x8  \b, [4-Scr]
+
+# ines: file(1) magic for Marat's iNES Nintendo Entertainment System ROM dump format
+# Updated by David Korth <gerbilsoft@gerbilsoft.com>
+# References:
+# - http://wiki.nesdev.com/w/index.php/INES
+# - http://wiki.nesdev.com/w/index.php/NES_2.0
+0	string		NES\x1A		iNES ROM image
+>7	byte&0x0C	=0x8		(NES 2.0)
+>4	byte		x		\b: %ux16k PRG
+>5	byte		x		\b, %ux16k CHR
+>6	byte&0x08	=0x8		[4-Scr]
+>6	byte&0x09	=0x0		[H-mirror]
+>6	byte&0x09	=0x1		[V-mirror]
+>6	byte&0x02	=0x2		[SRAM]
+>6	byte&0x04	=0x4		[Trainer]
+>7	byte&0x03	=0x2		[PC10]
+>7	byte&0x03	=0x1		[VS
+>>7	byte&0x0C	=0x8
+# NES 2.0: VS PPU
+>>>13	byte&0x0F	=0x0		\b, RP2C03B
+>>>13	byte&0x0F	=0x1		\b, RP2C03G
+>>>13	byte&0x0F	=0x2		\b, RP2C04-0001
+>>>13	byte&0x0F	=0x3		\b, RP2C04-0002
+>>>13	byte&0x0F	=0x4		\b, RP2C04-0003
+>>>13	byte&0x0F	=0x5		\b, RP2C04-0004
+>>>13	byte&0x0F	=0x6		\b, RP2C03B
+>>>13	byte&0x0F	=0x7		\b, RP2C03C
+>>>13	byte&0x0F	=0x8		\b, RP2C05-01
+>>>13	byte&0x0F	=0x9		\b, RP2C05-02
+>>>13	byte&0x0F	=0xA		\b, RP2C05-03
+>>>13	byte&0x0F	=0xB		\b, RP2C05-04
+>>>13	byte&0x0F	=0xC		\b, RP2C05-05
+# TODO: VS protection hardware?
+>>7	byte		x		\b]
+# NES 2.0-specific flags.
+>7	byte&0x0C	=0x8
+>>12	byte&0x03	=0x0		[NTSC]
+>>12	byte&0x03	=0x1		[PAL]
+>>12	byte&0x02	=0x2		[NTSC+PAL]
+
+#------------------------------------------------------------------------------
+# unif: file(1) magic for UNIF-format Nintendo Entertainment System ROM images
+# Reference: http://wiki.nesdev.com/w/index.php/UNIF
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# TODO commit on 2016/03/21
+#
+# NOTE: The UNIF format uses chunks instead of a fixed header,
+# so most of the data isn't easily parseable.
+#
+0	string	UNIF
+>4	lelong	<16	UNIF v%d format NES ROM image
+
+#------------------------------------------------------------------------------
+# fds: file(1) magic for Famciom Disk System disk images
+# Reference: http://wiki.nesdev.com/w/index.php/Family_Computer_Disk_System#.FDS_format
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# TODO: Check "Disk info block" and get info from that in addition to the optional header.
+
+# Disk info block. (block 1)
+0	name	nintendo-fds-disk-info-block
+>1	string	*NINTENDO-HVC*	Famicom Disk System disk image:
+>23	byte	!1		FMC-
+>23	byte	1		FSC-
+>16	string	x		\b%.3s
+>15	byte	x		\b, mfr 0x%02X
+>20	byte	x		(Rev.%02u)
+
+# Headered version.
+0	string	FDS\x1A
+>0x11	string	*NINTENDO-HVC*
+>>0x10	use	nintendo-fds-disk-info-block
+>4	byte	1	(%u side)
+>4	byte	!1	(%u sides)
+
+# Unheadered version.
+1	string	*NINTENDO-HVC*
+>0	use	nintendo-fds-disk-info-block
 
 #------------------------------------------------------------------------------
-# gameboy:  file(1) magic for the Nintendo (Color) Gameboy raw ROM format
+# gameboy: file(1) magic for the Nintendo (Color) Gameboy raw ROM format
+# Reference: http://gbdev.gg8.se/wiki/articles/The_Cartridge_Header
 #
-0x104 belong 0xCEED6666 Gameboy ROM:
->0x134 string >\0 "%.16s"
->0x146 byte 0x03  \b,[SGB]
->0x147 byte 0x00  \b, [ROM ONLY]
->0x147 byte 0x01  \b, [ROM+MBC1]
->0x147 byte 0x02  \b, [ROM+MBC1+RAM]
->0x147 byte 0x03  \b, [ROM+MBC1+RAM+BATT]
->0x147 byte 0x05  \b, [ROM+MBC2]
->0x147 byte 0x06  \b, [ROM+MBC2+BATTERY]
->0x147 byte 0x08  \b, [ROM+RAM]
->0x147 byte 0x09  \b, [ROM+RAM+BATTERY]
->0x147 byte 0x0B  \b, [ROM+MMM01]
->0x147 byte 0x0C  \b, [ROM+MMM01+SRAM]
->0x147 byte 0x0D  \b, [ROM+MMM01+SRAM+BATT]
->0x147 byte 0x0F  \b, [ROM+MBC3+TIMER+BATT]
->0x147 byte 0x10  \b, [ROM+MBC3+TIMER+RAM+BATT]
->0x147 byte 0x11  \b, [ROM+MBC3]
->0x147 byte 0x12  \b, [ROM+MBC3+RAM]
->0x147 byte 0x13  \b, [ROM+MBC3+RAM+BATT]
->0x147 byte 0x19  \b, [ROM+MBC5]
->0x147 byte 0x1A  \b, [ROM+MBC5+RAM]
->0x147 byte 0x1B  \b, [ROM+MBC5+RAM+BATT]
->0x147 byte 0x1C  \b, [ROM+MBC5+RUMBLE]
->0x147 byte 0x1D  \b, [ROM+MBC5+RUMBLE+SRAM]
->0x147 byte 0x1E  \b, [ROM+MBC5+RUMBLE+SRAM+BATT]
->0x147 byte 0x1F  \b, [Pocket Camera]
->0x147 byte 0xFD  \b, [Bandai TAMA5]
->0x147 byte 0xFE  \b, [Hudson HuC-3]
->0x147 byte 0xFF  \b, [Hudson HuC-1]
+0x104		bequad		0xCEED6666CC0D000B	Game Boy ROM image
+>0x143		byte&0x80	0x80
+>>0x134		string		>\0			\b: "%.15s"
+>0x143		byte&0x80	!0x80
+>>0x134		string		>\0			\b: "%.16s"
+>0x14c		byte		x			(Rev.%02u)
 
+# Machine type. (SGB, CGB, SGB+CGB)
+>0x14b		byte		0x33
+>>0x146		byte		0x03
+>>>0x143	byte&0x80	0x80	[SGB+CGB]
+>>>0x143	byte&0x80	!0x80	[SGB]
+>>0x146		byte		!0x03
+>>>0x143	byte&0xC0	0x80	[CGB]
+>>>0x143	byte&0xC0	0xC0	[CGB ONLY]
+
+# Mapper.
+>0x147 byte 0x00  [ROM ONLY]
+>0x147 byte 0x01  [MBC1]
+>0x147 byte 0x02  [MBC1+RAM]
+>0x147 byte 0x03  [MBC1+RAM+BATT]
+>0x147 byte 0x05  [MBC2]
+>0x147 byte 0x06  [MBC2+BATTERY]
+>0x147 byte 0x08  [ROM+RAM]
+>0x147 byte 0x09  [ROM+RAM+BATTERY]
+>0x147 byte 0x0B  [MMM01]
+>0x147 byte 0x0C  [MMM01+SRAM]
+>0x147 byte 0x0D  [MMM01+SRAM+BATT]
+>0x147 byte 0x0F  [MBC3+TIMER+BATT]
+>0x147 byte 0x10  [MBC3+TIMER+RAM+BATT]
+>0x147 byte 0x11  [MBC3]
+>0x147 byte 0x12  [MBC3+RAM]
+>0x147 byte 0x13  [MBC3+RAM+BATT]
+>0x147 byte 0x19  [MBC5]
+>0x147 byte 0x1A  [MBC5+RAM]
+>0x147 byte 0x1B  [MBC5+RAM+BATT]
+>0x147 byte 0x1C  [MBC5+RUMBLE]
+>0x147 byte 0x1D  [MBC5+RUMBLE+SRAM]
+>0x147 byte 0x1E  [MBC5+RUMBLE+SRAM+BATT]
+>0x147 byte 0xFC  [Pocket Camera]
+>0x147 byte 0xFD  [Bandai TAMA5]
+>0x147 byte 0xFE  [Hudson HuC-3]
+>0x147 byte 0xFF  [Hudson HuC-1]
+
+# ROM size.
 >0x148 byte 0     \b, ROM: 256Kbit
 >0x148 byte 1     \b, ROM: 512Kbit
 >0x148 byte 2     \b, ROM: 1Mbit
@@ -55,58 +137,198 @@
 >0x148 byte 4     \b, ROM: 4Mbit
 >0x148 byte 5     \b, ROM: 8Mbit
 >0x148 byte 6     \b, ROM: 16Mbit
+>0x148 byte 7     \b, ROM: 32Mbit
 >0x148 byte 0x52  \b, ROM: 9Mbit
 >0x148 byte 0x53  \b, ROM: 10Mbit
 >0x148 byte 0x54  \b, ROM: 12Mbit
 
+# RAM size.
 >0x149 byte 1     \b, RAM: 16Kbit
 >0x149 byte 2     \b, RAM: 64Kbit
 >0x149 byte 3     \b, RAM: 128Kbit
 >0x149 byte 4     \b, RAM: 1Mbit
-
-#>0x14e long  x     \b, CRC: %x
+>0x149 byte 5     \b, RAM: 512Kbit
 
 #------------------------------------------------------------------------------
-# genesis:  file(1) magic for the Sega MegaDrive/Genesis raw ROM format
+# genesis: file(1) magic for various Sega Mega Drive / Genesis ROM image and disc formats
+# Updated by David Korth <gerbilsoft@gerbilsoft.com>
+# References:
+# - http://www.retrodev.com/segacd.html
+# - http://devster.monkeeh.com/sega/32xguide1.txt
 #
-0x100 string SEGA  Sega MegaDrive/Genesis raw ROM dump
->0x120 string >\0 Name: "%.16s"
->0x110 string >\0 %.16s
->0x1B0 string RA with SRAM
+
+# Common Sega Mega Drive header format.
+# FIXME: Name fields are 48 bytes, but have spaces for padding instead of 00s.
+0		name	sega-mega-drive-header
+# ROM title. (Use domestic if present; if not, use international.)
+>0x120		byte	>0x20
+>>0x120		string	>\0	\b: "%.16s"
+>0x120		byte	<0x21
+>>0x150		string	>\0	\b: "%.16s"
+# Other information.
+>0x180		string	>\0	(%.14s
+>>0x110		string  >\0	\b, %.16s
+>0x180		byte	0
+>>0x110		string  >\0	(%.16s
+>0		byte	x	\b)
+
+# TODO: Check for 32X CD?
+# Sega Mega CD disc images: 2048-byte sectors.
+0	string	SEGADISCSYSTEM\ \ 	Sega Mega CD disc image
+>0	use	sega-mega-drive-header
+>0	byte	x			\b, 2048-byte sectors
+0	string	SEGABOOTDISC\ \ \ \ 	Sega Mega CD disc image
+>0	use	sega-mega-drive-header
+>0	byte	x			\b, 2048-byte sectors
+# Sega Mega CD disc images: 2352-byte sectors.
+0x10	string	SEGADISCSYSTEM\ \ 	Sega Mega CD disc image
+>0x10	use	sega-mega-drive-header
+>0	byte	x			\b, 2352-byte sectors
+0x10	string	SEGABOOTDISC\ \ \ \ 	Sega Mega CD disc image
+>0x10	use	sega-mega-drive-header
+>0	byte	x			\b, 2352-byte sectors
+
+# Sega Mega Drive, 32X, Pico, and Mega CD Boot ROM images.
+0x100		string	SEGA
+>0x3C0		bequad	0x4D41525320434845	Sega 32X ROM image
+>>0		use	sega-mega-drive-header
+>0x3C0		bequad	!0x4D41525320434845
+>>0x105		belong	0x5049434F	Sega Pico ROM image
+>>>0		use	sega-mega-drive-header
+>>0x105		belong	!0x5049434F
+>>>0x180	beshort	0x4252		Sega Mega CD Boot ROM image
+>>>0x180	beshort	!0x4252		Sega Mega Drive / Genesis ROM image
+>>>0		use	sega-mega-drive-header
 
 #------------------------------------------------------------------------------
-# genesis:  file(1) magic for the Super MegaDrive ROM dump format
+# genesis: file(1) magic for the Super MegaDrive ROM dump format
 #
-0x280 string EAGN  Super MagicDrive ROM dump
->0 byte x %dx16k blocks
->2 byte 0 \b, last in series or standalone
->2 byte >0 \b, split ROM
->8 byte 0xAA
->9 byte 0xBB
+
+# NOTE: Due to interleaving, we can't display anything
+# other than the copier header information.
+0      name    sega-genesis-smd-header
+>0     byte    x       %dx16k blocks
+>2     byte    0       \b, last in series or standalone
+>2     byte    >0      \b, split ROM
+
+# "Sega Genesis" header.
+0x280	string EAGN
+>8	beshort	0xAABB	Sega Mega Drive / Genesis ROM image (SMD format):
+>>0	use     sega-genesis-smd-header
+
+# "Sega Mega Drive" header.
+0x280	string EAMG
+>8	beshort	0xAABB	Sega Mega Drive / Genesis ROM image (SMD format):
+>>0	use     sega-genesis-smd-header
 
 #------------------------------------------------------------------------------
-# genesis:  file(1) alternate magic for the Super MegaDrive ROM dump format
+# smsgg:  file(1) magic for Sega Master System and Game Gear ROM images
+# Detects all Game Gear and export Sega Master System ROM images,
+# and some Japanese Sega Master System ROM images.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: http://www.smspower.org/Development/ROMHeader
 #
-0x280 string EAMG  Super MagicDrive ROM dump
->0 byte x %dx16k blocks
->2 byte x \b, last in series or standalone
->8 byte 0xAA
->9 byte 0xBB
+
+# General SMS header rule.
+# The SMS boot ROM checks the header at three locations.
+0	name	sega-master-system-rom-header
+# Machine type.
+>0x0F	byte&0xF0	0x30	Sega Master System
+>0x0F	byte&0xF0	0x40	Sega Master System
+>0x0F	byte&0xF0	0x50	Sega Game Gear
+>0x0F	byte&0xF0	0x60	Sega Game Gear
+>0x0F	byte&0xF0	0x70	Sega Game Gear
+>0x0F	byte&0xF0	<0x30	Sega Master System / Game Gear
+>0x0F	byte&0xF0	>0x70	Sega Master System / Game Gear
+>0	byte		x	ROM image:
+# Product code.
+>0x0E	byte&0xF0	0x10	1
+>0x0E	byte&0xF0	0x20	2
+>0x0E	byte&0xF0	0x30	3
+>0x0E	byte&0xF0	0x40	4
+>0x0E	byte&0xF0	0x50	5
+>0x0E	byte&0xF0	0x60	6
+>0x0E	byte&0xF0	0x70	7
+>0x0E	byte&0xF0	0x80	8
+>0x0E	byte&0xF0	0x90	9
+>0x0E	byte&0xF0	0xA0	10
+>0x0E	byte&0xF0	0xB0	11
+>0x0E	byte&0xF0	0xC0	12
+>0x0E	byte&0xF0	0xD0	13
+>0x0E	byte&0xF0	0xE0	14
+>0x0E	byte&0xF0	0xF0	15
+# If the product code is 5 digits, we'll need to backspace here.
+>0x0E	byte&0xF0	!0
+>>0x0C	leshort		x	\b%04x
+>0x0E	byte&0xF0	0
+>>0x0C	leshort		x	%04x
+# Revision.
+>0x0E	byte&0x0F	x	(Rev.%02d)
+# ROM size. (Used for the boot ROM checksum routine.)
+>0x0F	byte&0x0F	0x0A	(8 KB)
+>0x0F	byte&0x0F	0x0B	(16 KB)
+>0x0F	byte&0x0F	0x0C	(32 KB)
+>0x0F	byte&0x0F	0x0D	(48 KB)
+>0x0F	byte&0x0F	0x0E	(64 KB)
+>0x0F	byte&0x0F	0x0F	(128 KB)
+>0x0F	byte&0x0F	0x00	(256 KB)
+>0x0F	byte&0x0F	0x01	(512 KB)
+>0x0F	byte&0x0F	0x02	(1 MB)
+
+# SMS/GG header locations.
+0x7FF0	string	TMR\ SEGA
+>0x7FF0	use	sega-master-system-rom-header
+0x3FF0	string	TMR\ SEGA
+>0x3FF0	use	sega-master-system-rom-header
+0x1FF0	string	TMR\ SEGA
+>0x1FF0	use	sega-master-system-rom-header
 
 #------------------------------------------------------------------------------
-# smsgg:  file(1) magic for Sega Master System and Game Gear ROM dumps
-#
-# Does not detect all images.  Very preliminary guesswork.  Need more data
-# on format.
+# saturn: file(1) magic for the Sega Saturn disc image format.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
 #
-# FIXME: need a little more info...;P
+
+# Common Sega Saturn disc header format.
+# NOTE: Title is 112 bytes, but we're only showing 32 due to space padding.
+# TODO: Release date, device information, region code, others?
+0	name	sega-saturn-disc-header
+>0x60	string	>\0	\b: "%.32s"
+>0x20	string	>\0	(%.10s
+>>0x2A	string	>\0	\b, %.6s)
+>>0x2A	byte	0	\b)
+
+# 2048-byte sector version.
+0	string	SEGA\ SEGASATURN\ 	Sega Saturn disc image
+>0	use	sega-saturn-disc-header
+>0	byte	x			(2048-byte sectors)
+# 2352-byte sector version.
+0x10	string	SEGA\ SEGASATURN\ 	Sega Saturn disc image
+>0x10	use	sega-saturn-disc-header
+>0	byte	x			(2352-byte sectors)
+
+#------------------------------------------------------------------------------
+# dreamcast: file(1) magic for the Sega Dreamcast disc image format.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: http://mc.pp.se/dc/ip0000.bin.html
 #
-#0 byte 0xF3
-#>1 byte 0xED  Sega Master System/Game Gear ROM dump
-#>1 byte 0x31  Sega Master System/Game Gear ROM dump
-#>1 byte 0xDB  Sega Master System/Game Gear ROM dump
-#>1 byte 0xAF  Sega Master System/Game Gear ROM dump
-#>1 byte 0xC3  Sega Master System/Game Gear ROM dump
+
+# Common Sega Dreamcast disc header format.
+# NOTE: Title is 128 bytes, but we're only showing 32 due to space padding.
+# TODO: Release date, device information, region code, others?
+0	name	sega-dreamcast-disc-header
+>0x80	string	>\0	\b: "%.32s"
+>0x40	string	>\0	(%.10s
+>>0x4A	string	>\0	\b, %.6s)
+>>0x4A	byte	0	\b)
+
+# 2048-byte sector version.
+0	string	SEGA\ SEGAKATANA\ 	Sega Dreamcast disc image
+>0	use	sega-dreamcast-disc-header
+>0	byte	x			(2048-byte sectors)
+# 2352-byte sector version.
+0x10	string	SEGA\ SEGAKATANA\ 	Sega Dreamcast disc image
+>0x10	use	sega-dreamcast-disc-header
+>0	byte	x			(2352-byte sectors)
 
 #------------------------------------------------------------------------------
 # dreamcast:  file(1) uncertain magic for the Sega Dreamcast VMU image format
@@ -115,15 +337,77 @@
 0 string LCDi         Dream Animator file
 
 #------------------------------------------------------------------------------
-# v64: file(1) uncertain magic for the V64 format N64 ROM dumps
+# z64: file(1) magic for the Z64 format N64 ROM dumps
+# Reference: http://forum.pj64-emu.com/showthread.php?t=2239
+# From: David Korth <gerbilsoft@gerbilsoft.com>
 #
-0 belong 0x37804012    V64 Nintendo 64 ROM dump
+0	bequad	0x803712400000000F	Nintendo 64 ROM image
+>0x20	string	>\0	\b: "%.20s"
+>0x3B	string	x	(%.4s
+>0x3F	byte	x	\b, Rev.%02u)
 
-# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
-# Nintendo .nds
-192	string	\044\377\256Qi\232	Nintendo DS Game ROM Image
-# Nintendo .gba
-0	string	\056\000\000\352$\377\256Qi	Nintendo Game Boy Advance ROM Image
+#------------------------------------------------------------------------------
+# v64: file(1) magic for the V64 format N64 ROM dumps
+# Same as z64 format, but with 16-bit byteswapping.
+#
+0	bequad	0x3780401200000F00	Nintendo 64 ROM image (V64)
+
+#------------------------------------------------------------------------------
+# n64-swap2: file(1) magic for the swap2 format N64 ROM dumps
+# Same as z64 format, but with swapped 16-bit words.
+#
+0	bequad	0x12408037000F0000	Nintendo 64 ROM image (wordswapped)
+
+#------------------------------------------------------------------------------
+# n64-le32: file(1) magic for the 32-bit byteswapped format N64 ROM dumps
+# Same as z64 format, but with 32-bit byteswapping.
+#
+0	bequad	0x401237800F000000	Nintendo 64 ROM image (32-bit byteswapped)
+
+#------------------------------------------------------------------------------
+# gba: file(1) magic for the Nintendo Game Boy Advance raw ROM format
+# Reference: http://problemkaputt.de/gbatek.htm#gbacartridgeheader
+#
+# Original version from: "Nelson A. de Oliveira" <naoliv@gmail.com>
+# Updated version from: David Korth <gerbilsoft@gerbilsoft.com>
+#
+4	bequad	0x24FFAE51699AA221	Game Boy Advance ROM image
+>0xA0	string	>\0	\b: "%.12s"
+>0xAC	string	x	(%.6s
+>0xBC	byte	x	\b, Rev.%02u)
+
+#------------------------------------------------------------------------------
+# nds: file(1) magic for the Nintendo DS(i) raw ROM format
+# Reference: http://problemkaputt.de/gbatek.htm#dscartridgeheader
+#
+# Original version from: "Nelson A. de Oliveira" <naoliv@gmail.com>
+# Updated version from: David Korth <gerbilsoft@gerbilsoft.com>
+#
+0xC0	bequad	0x24FFAE51699AA221	Nintendo DS ROM image
+>0x00	string	>\0		\b: "%.12s"
+>0x0C	string	x		(%.6s
+>0x1E	byte	x		\b, Rev.%02u)
+>0x12	byte	2		(DSi enhanced)
+>0x12	byte	3		(DSi only)
+
+#------------------------------------------------------------------------------
+# nds_passme: file(1) magic for Nintendo DS ROM images for GBA cartridge boot.
+# This is also used for loading .nds files using the MSET exploit on 3DS.
+# Reference: https://github.com/devkitPro/ndstool/blob/master/source/ndscreate.cpp
+0xC0	bequad	0xC8604FE201708FE2	Nintendo DS Slot-2 ROM image (PassMe)
+
+#------------------------------------------------------------------------------
+# ngp: file(1) magic for the Neo Geo Pocket (Color) raw ROM format.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# References:
+# - https://neogpc.googlecode.com/svn-history/r10/trunk/src/core/neogpc.cpp
+# - http://www.devrs.com/ngp/files/ngpctech.txt
+#
+0x0A	string	BY\ SNK\ CORPORATION	Neo Geo Pocket
+>0x23	byte	0x10			Color
+>0	byte	x			ROM image
+>0x24	string	>\0			\b: "%.12s"
+>0x1F	byte	0xFF			(debug mode enabled)
 
 #------------------------------------------------------------------------------
 # msx: file(1) magic for MSX game cartridge dumps
@@ -133,9 +417,25 @@
 #------------------------------------------------------------------------------
 # Sony Playstation executables (Adam Sjoegren <asjo@diku.dk>) :
 0	string	PS-X\ EXE	Sony Playstation executable
+>16	lelong	x		PC=0x%08x,
+>20	lelong	!0		GP=0x%08x,
+>24	lelong	!0		.text=[0x%08x,
+>>28	lelong	x		\b0x%x],
+>32	lelong	!0		.data=[0x%08x,
+>>36	lelong	x		\b0x%x],
+>40	lelong	!0		.bss=[0x%08x,
+>>44	lelong	x		\b0x%x],
+>48	lelong	!0		Stack=0x%08x,
+>48	lelong	=0		No Stack!,
+>52	lelong	!0		StackSize=0x%x,
+#>76	string	>\0		(%s)
 #  Area:
 >113	string	x		(%s)
 
+# CPE executables
+0	string	CPE		CPE executable
+>3	byte	x		(version %d)
+
 #------------------------------------------------------------------------------
 # Microsoft Xbox executables .xbe (Esa Hyytia <ehyytia@cc.hut.fi>)
 0       string          XBEH            XBE, Microsoft Xbox executable
@@ -272,3 +572,181 @@
 # From: Sven Hartge <debian@ds9.argh.org>
 0	string	SCVM	ScummVM savegame
 >12	string	>\0	"%s"
+
+#------------------------------------------------------------------------------
+# Nintendo GameCube / Wii file formats.
+#
+
+# Type: Nintendo GameCube/Wii common disc header data.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: http://wiibrew.org/wiki/Wii_Disc
+0	name	nintendo-gcn-disc-common
+>0x20	string	x	"%.64s"
+>0x00	string	x	(%.6s
+>0x06	byte	>0
+>>0x06	byte	1	\b, Disc 2
+>>0x06	byte	2	\b, Disc 3
+>>0x06	byte	3	\b, Disc 4
+>0x07	byte	x	\b, Rev.%02u)
+
+# Type: Nintendo GameCube disc image
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: http://wiibrew.org/wiki/Wii_Disc
+0x1C	belong	0xC2339F3D	Nintendo GameCube disc image:
+>0	use	nintendo-gcn-disc-common
+
+# Type: Nintendo GameCube embedded disc image
+# Commonly found on demo discs.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: http://hitmen.c02.at/files/yagcd/yagcd/index.html#idx14.8
+0		belong	0xAE0F38A2
+>0x0C		belong	0x00100000
+>>(8.L+0x1C)	belong	0xC2339F3D	Nintendo GameCube embedded disc image:
+>>>(8.L)	use	nintendo-gcn-disc-common
+
+# Type: Nintendo Wii disc image
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: http://wiibrew.org/wiki/Wii_Disc
+0x18	belong	0x5D1C9EA3	Nintendo Wii disc image:
+>0	use	nintendo-gcn-disc-common
+
+# Type: Nintendo Wii disc image (WBFS format)
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: http://wiibrew.org/wiki/Wii_Disc
+0	string	WBFS
+>0x218	belong	0x5D1C9EA3	Nintendo Wii disc image (WBFS format):
+>>0x200	use	nintendo-gcn-disc-common
+
+#------------------------------------------------------------------------------
+# Nintendo 3DS file formats.
+#
+
+# Type: Nintendo 3DS "NCSD" image. (game cards and eMMC)
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: https://www.3dbrew.org/wiki/NCSD
+0x100		string		NCSD
+>0x118		lequad		0		Nintendo 3DS Game Card image
+# NCCH header for partition 0. (game data)
+>>0x1150	string		>\0	\b: "%.16s"
+>>0x312		byte		x	(Rev.%02u)
+>>0x118C	byte		2	(New3DS only)
+>>0x18D		byte		0		(inner device)
+>>0x18D		byte		1		(Card1)
+>>0x18D		byte		2		(Card2)
+>>0x18D		byte		3		(extended device)
+>0x118		bequad		0x0102020202000000	Nintendo 3DS eMMC dump (Old3DS)
+>0x118		bequad		0x0102020203000000	Nintendo 3DS eMMC dump (New3DS)
+
+# Nintendo 3DS version code.
+# Reference: https://www.3dbrew.org/wiki/Titles
+# Format: leshort containing three fields:
+# - 6-bit: Major
+# - 6-bit: Minor
+# - 4-bit: Revision
+# NOTE: Only supporting major/minor versions from 0-15 right now.
+# NOTE: Should be prefixed with "v".
+0	name	nintendo-3ds-version-code
+# Raw version.
+>0	leshort	x	\b%u,
+# Major version.
+>0	leshort&0xFC00	0x0000	0
+>0	leshort&0xFC00	0x0400	1
+>0	leshort&0xFC00	0x0800	2
+>0	leshort&0xFC00	0x0C00	3
+>0	leshort&0xFC00	0x1000	4
+>0	leshort&0xFC00	0x1400	5
+>0	leshort&0xFC00	0x1800	6
+>0	leshort&0xFC00	0x1C00	7
+>0	leshort&0xFC00	0x2000	8
+>0	leshort&0xFC00	0x2400	9
+>0	leshort&0xFC00	0x2800	10
+>0	leshort&0xFC00	0x2C00	11
+>0	leshort&0xFC00	0x3000	12
+>0	leshort&0xFC00	0x3400	13
+>0	leshort&0xFC00	0x3800	14
+>0	leshort&0xFC00	0x3C00	15
+# Minor version.
+>0	leshort&0x03F0	0x0000	\b.0
+>0	leshort&0x03F0	0x0010	\b.1
+>0	leshort&0x03F0	0x0020	\b.2
+>0	leshort&0x03F0	0x0030	\b.3
+>0	leshort&0x03F0	0x0040	\b.4
+>0	leshort&0x03F0	0x0050	\b.5
+>0	leshort&0x03F0	0x0060	\b.6
+>0	leshort&0x03F0	0x0070	\b.7
+>0	leshort&0x03F0	0x0080	\b.8
+>0	leshort&0x03F0	0x0090	\b.9
+>0	leshort&0x03F0	0x00A0	\b.10
+>0	leshort&0x03F0	0x00B0	\b.11
+>0	leshort&0x03F0	0x00C0	\b.12
+>0	leshort&0x03F0	0x00D0	\b.13
+>0	leshort&0x03F0	0x00E0	\b.14
+>0	leshort&0x03F0	0x00F0	\b.15
+# Revision.
+>0	leshort&0x000F	x	\b.%u
+
+# Type: Nintendo 3DS "NCCH" container.
+# https://www.3dbrew.org/wiki/NCCH
+0x100		string	NCCH	Nintendo 3DS
+>0x18D		byte&2	0	File Archive (CFA)
+>0x18D		byte&2	2	Executable Image (CXI)
+>0x150		string	>\0	\b: "%.16s"
+>0x18D		byte	0x05
+>>0x10E		leshort	x	(Old3DS System Update v
+>>0x10E		use	nintendo-3ds-version-code
+>>0x10E		leshort	x	\b)
+>0x18D		byte	0x15
+>>0x10E		leshort	x	(New3DS System Update v
+>>0x10E		use	nintendo-3ds-version-code
+>>0x10E		leshort	x	\b)
+>0x18D		byte	!0x05
+>>0x18D		byte	!0x15
+>>>0x112	byte	x	(v
+>>>0x112	use	nintendo-3ds-version-code
+>>>0x112	byte	x	\b)
+>0x18C		byte	2	(New3DS only)
+
+# Type: Nintendo 3DS "SMDH" file. (application description)
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: https://3dbrew.org/wiki/SMDH
+0		string		SMDH		Nintendo 3DS SMDH file
+>0x208		leshort		!0
+>>0x208		lestring16	x		\b: "%.128s"
+>>0x388		leshort		!0
+>>>0x388	lestring16	x		by %.128s
+>0x208		leshort		0
+>>0x008		leshort		!0
+>>>0x008	lestring16	x		\b: "%.128s"
+>>>0x188	leshort		!0
+>>>>0x188	lestring16	x		by %.128s
+
+# Type: Nintendo 3DS Homebrew Application.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Refernece: https://3dbrew.org/wiki/3DSX_Format
+0	string	3DSX	Nintendo 3DS Homebrew Application (3DSX)
+
+#------------------------------------------------------------------------------
+# a7800: file(1) magic for the Atari 7800 raw ROM format.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: https://sites.google.com/site/atari7800wiki/a78-header
+
+0	byte	>0
+>0	byte	<3
+>>1	string	ATARI7800	Atari 7800 ROM image
+>>>0x11	string	>\0	\b: "%.32s"
+# Display type.
+>>>0x39	byte	0	(NTSC)
+>>>0x39	byte	1	(PAL)
+>>>0x36	byte&1	1	(POKEY)
+
+#------------------------------------------------------------------------------
+# vectrex: file(1) magic for the GCE Vectrex raw ROM format.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# Reference: http://www.playvectrex.com/designit/chrissalo/hello1.htm
+#
+# NOTE: Title is terminated with 0x80, not 0.
+# The header is terminated with a 0, so that will
+# terminate the title as well.
+#
+0	string	g\ GCE	Vectrex ROM image
+>0x11	string	>\0	\b: "%.16s"

+ 91 - 0
magic/Magdir/coverage

@@ -0,0 +1,91 @@
+
+#------------------------------------------------------------------------------
+# $File: coverage,v 1.1 2016/06/05 00:26:32 christos Exp $
+# xoverage:  file(1) magic for test coverage data
+
+# File formats used to store test coverage data
+# 2016-05-21, Georg Sauthoff <mail@georg.so>
+
+
+# - GCC gcno - written by GCC at compile time when compiling with
+# 	gcc -ftest-coverage
+# - GCC gcda - written by a program that was compiled with
+#	gcc -fprofile-arcs
+# - LLVM raw profiles - generated by a program compiled with
+#	clang -fprofile-instr-generate -fcoverage-mapping ...
+# - LLVM indexed profiles - generated by
+#	llvm-profdata
+# - GCOV reports, i.e. the annotated source code
+# - LCOV trace files, i.e. aggregated GCC profiles
+#
+# GCC coverage tracefiles
+# .gcno file are created during compile time,
+# while data collected during runtime is stored in .gcda files
+# cf. gcov-io.h
+# https://gcc.gnu.org/onlinedocs/gcc-5.3.0/gcc/Gcov-Data-Files.html
+# Examples:
+# Fedora 23/x86-64/gcc-5.3.1: 6f 6e 63 67 52 33 30 35
+# Debian 8 PPC64/gcc-4.9.2  : 67 63 6e 6f 34 30 39 2a
+0	lelong	0x67636e6f	GCC gcno coverage (-ftest-coverage),
+>&3	byte	x	version %c.
+>&1	byte	x	\b%c
+
+# big endian
+0	belong	0x67636e6f	GCC gcno coverage (-ftest-coverage),
+>&0	byte	x	version %c.
+>&2	byte	x	\b%c (big-endian)
+
+# Examples:
+# Fedora 23/x86-64/gcc-5.3.1: 61 64 63 67 52 33 30 35
+# Debian 8 PPC64/gcc-4.9.2  : 67 63 64 61 34 30 39 2a
+0	lelong	0x67636461	GCC gcda coverage (-fprofile-arcs),
+>&3	byte	x	version %c.
+>&1	byte	x	\b%c
+
+# big endian
+0	belong	0x67636461	GCC gcda coverage (-fprofile-arcs),
+>&0	byte	x	version %c.
+>&2	byte	x	\b%c (big-endian)
+
+
+# LCOV tracefiles
+# cf. http://ltp.sourceforge.net/coverage/lcov/geninfo.1.php
+0	string	TN:
+>&0	search/64	\nSF:/	LCOV coverage tracefile
+
+
+# Coverage reports generated by gcov
+# i.e. source code annoted with coverage information
+0	string	\x20\x20\x20\x20\x20\x20\x20\x20-:\x20\x20\x20\ 0:Source:
+>&0	search/128	\x20\x20\x20\x20\x20\x20\x20\x20-:\x20\x20\x20\ 0:Graph:
+>>&0	search/128	\x20\x20\x20\x20\x20\x20\x20\x20-:\x20\x20\x20\ 0:Data:	GCOV coverage report
+
+
+# LLVM coverage files
+
+# raw data after running a program compiled with:
+# `clang -fprofile-instr-generate -fcoverage-mapping ...`
+# default name: default.profraw
+# magic is: \xFF lprofr \x81
+# cf. http://llvm.org/docs/doxygen/html/InstrProfData_8inc_source.html
+0	lequad	0xff6c70726f667281	LLVM raw profile data,
+>&0	byte	x	version %d
+
+# big endian
+0	bequad	0xff6c70726f667281	LLVM raw profile data,
+>&7	byte	x	version %d (big-endian)
+
+
+# LLVM indexed instruction profile (as generated by llvm-profdata)
+# magic is: reverse(\xFF lprofi \x81)
+# cf. http://llvm.org/docs/CoverageMappingFormat.html
+# http://llvm.org/docs/doxygen/html/namespacellvm_1_1IndexedInstrProf.html
+# http://llvm.org/docs/CommandGuide/llvm-cov.html
+# http://llvm.org/docs/CommandGuide/llvm-profdata.html
+0	lequad	0x8169666f72706cff	LLVM indexed profile data,
+>&0	byte	x	version %d
+
+# big endian
+0	bequad	0x8169666f72706cff	LLVM indexed profile data,
+>&7	byte	x	version %d (big-endian)
+

+ 79 - 2
magic/Magdir/database

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: database,v 1.45 2015/09/09 16:25:29 christos Exp $
+# $File: database,v 1.49 2016/06/11 17:01:51 christos Exp $
 # database:  file(1) magic for various databases
 #
 # extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk)
@@ -377,7 +377,10 @@
 >>>>>>>>>>>>0	use		dbase3-memo-print
 # dBASE IV DBT with positive block size
 >>>>>>>20	uleshort	>0		
->>>>>>>>0	use		dbase4-memo-print
+# dBASE IV DBT with valid block length like 512, 1024 
+# multiple of 2 in between 16 and 16 K ,implies upper and lower bits are zero
+>>>>>>>>20	uleshort&0x800f	0		
+>>>>>>>>>0	use		dbase4-memo-print
 
 #		Print the information of dBase III DBT memo file 
 0	name				dbase3-memo-print
@@ -395,6 +398,8 @@
 #		Print the information of dBase IV DBT memo file 
 0	name				dbase4-memo-print
 >0		lelong		x		dBase IV DBT
+!:mime	application/x-dbt
+!:ext dbt
 # 8 character shorted main name of coresponding dBASE IV DBF file
 >8		ubelong		>0x20000000	
 # skip unusual like for angest.dbt
@@ -455,6 +460,52 @@
 4	string	Standard\ ACE\ DB	Microsoft Access Database
 !:mime	application/x-msaccess
 
+# From: Joerg Jenderek
+# URL: http://fileformats.archiveteam.org/wiki/Extensible_Storage_Engine
+# Reference: https://github.com/libyal/libesedb/archive/master.zip
+#	libesedb-master/documentation/
+#	Extensible Storage Engine (ESE) Database File (EDB) format.asciidoc
+# Note: also known as "JET Blue". Used by numerous Windows components such as 
+# Windows Search, Mail, Exchange and Active Directory.
+4	ubelong		0xefcdab89	
+# unknown1
+>132	ubelong		0		Extensible storage engine
+!:mime	application/x-ms-ese
+# file_type 0~database 1~stream
+>>12	ulelong		0		DataBase
+# Security DataBase (sdb)
+!:ext	edb/sdb
+>>12	ulelong		1		STreaMing
+!:ext	stm
+# format_version 620h
+>>8	uleshort	x		\b, version 0x%x
+>>10	uleshort	>0		revision 0x%4.4x
+>>0	ubelong		x	 	\b, checksum 0x%8.8x
+# Page size 4096 8192 32768
+>>236	ulequad		x		\b, page size %lld
+# database_state
+>>52	ulelong		1		\b, JustCreated
+>>52	ulelong		2		\b, DirtyShutdown
+#>>52	ulelong		3		\b, CleanShutdown
+>>52	ulelong		4		\b, BeingConverted
+>>52	ulelong		5		\b, ForceDetach
+# Windows NT major version when the databases indexes were updated.
+>>216	ulelong		x		\b, Windows version %d
+# Windows NT minor version
+>>220	ulelong		x		\b.%d
+
+# From: Joerg Jenderek
+# URL: http://forensicswiki.org/wiki/Windows_Application_Compatibility
+# Note: files contain application compatibility fixes, application compatibility modes and application help messages.
+8	string		sdbf		
+>7	ubyte		0		
+# TAG_TYPE_LIST+TAG_INDEXES
+>>12	uleshort	0x7802		Windows application compatibility Shim DataBase
+# version? 2 3
+#>>>0	ulelong		x		\b, version %d
+!:mime	application/x-ms-sdb
+!:ext	sdb
+
 # TDB database from Samba et al - Martin Pool <mbp@samba.org>
 0	string	TDB\ file		TDB database
 >32	lelong	0x2601196D		version 6, little-endian
@@ -545,3 +596,29 @@
 # Hopper (reverse engineering tool) http://www.hopperapp.com/
 0	string		hopperdb	Hopper database
 
+# URL: https://en.wikipedia.org/wiki/Panorama_(database_engine)
+# Reference: http://www.provue.com/Panorama/
+# From: Joerg Jenderek
+# NOTE: test only versions 4 and 6.0 with Windows
+# length of Panorama database name 
+5	ubyte				>0		
+# look after database name for "some" null bits
+>(5.B+7)	ubelong&0xF3ffF000	0		
+# look for first keyword
+>>&1		search/2		DESIGN		Panorama database
+#!:mime	application/x-panorama-database
+!:apple	KASXZEPD
+!:ext	pan
+# database name
+>>>5	pstring				x		\b, "%s"
+
+#
+#
+# askSam Database by Stefan A. Haubenthal <polluks@web.de>
+0	string	askw40\0	askSam DB
+
+#
+#
+# MUIbase Database Tool by Stefan A. Haubenthal <polluks@web.de>
+0	string	MBSTV\040	MUIbase DB
+>6	string	x		version %s

+ 116 - 0
magic/Magdir/der

@@ -0,0 +1,116 @@
+#------------------------------------------------------------------------------
+# $File: der,v 1.1 2016/01/19 15:07:45 christos Exp $
+# der: file(1) magic for DER encoded files
+#
+
+# Certificate information piece
+0	name	certinfo
+>0	der	seq
+>>&0	der	set
+>>>&0	der	seq
+>>>>&0	der	obj_id3=550406
+>>>>&0	der	prt_str=x	\b, countryName=%s
+>>&0	der	set
+>>>&0	der	seq
+>>>>&0	der	obj_id3=550408
+>>>>&0	der	utf8_str=x	\b, stateOrProvinceName=%s
+>>&0	der	set
+>>>&0	der	seq
+>>>>&0	der	obj_id3=55040a
+>>>>&0	der	utf8_str=x	\b, organizationName=%s
+>>&0	der	set
+>>>&0	der	seq
+>>>>&0	der	obj_id3=550403
+>>>>&0	der	utf8_str=x	\b, commonName=%s
+>>&0	der	seq
+
+# Certificate requests
+0	der	seq
+>&0	der	seq
+>>&0	der	int1=00		DER Encoded Certificate request
+>>&0	use	certinfo
+
+# Key Pairs
+0	der	seq
+>&0	der	int1=00	
+>&0	der	int65=x
+>&0	der	int3=010001	DER Encoded Key Pair, 512 bits
+
+0	der	seq
+>&0	der	int1=00	
+>&0	der	int129=x
+>&0	der	int3=010001	DER Encoded Key Pair, 1024 bits
+
+0	der	seq
+>&0	der	int1=00	
+>&0	der	int257=x
+>&0	der	int3=010001	DER Encoded Key Pair, 2048 bits
+
+0	der	seq
+>&0	der	int1=00	
+>&0	der	int513=x
+>&0	der	int3=010001	DER Encoded Key Pair, 4096 bits
+
+0	der	seq
+>&0	der	int1=00	
+>&0	der	int1025=x
+>&0	der	int3=010001	DER Encoded Key Pair, 8192 bits
+
+0	der	seq
+>&0	der	int1=00	
+>&0	der	int2049=x
+>&0	der	int3=010001	DER Encoded Key Pair, 16k bits
+
+0	der	seq
+>&0	der	int1=00	
+>&0	der	int4097=x
+>&0	der	int3=010001	DER Encoded Key Pair, 32k bits
+
+# Certificates
+0	der	seq
+>&0	der	seq
+>>&0	der	int2=0dfa	DER Encoded Certificate, 512 bits
+>>&0	der	int2=0dfb	DER Encoded Certificate, 1024 bits
+>>&0	der	int2=0dfc	DER Encoded Certificate, 2048 bits
+>>&0	der	int2=0dfd	DER Encoded Certificate, 4096 bits
+>>&0	der	int2=0dfe	DER Encoded Certificate, 8192 bits
+>>&0	der	int2=0dff	DER Encoded Certificate, 16k bits
+>>&0	der	int2=0e04	DER Encoded Certificate, 32k bits
+>>&0	der	int2=x		DER Encoded Certificate, ? bits (%s)
+>>&0	der	seq
+>>>&0	der	obj_id9=2a864886f70d010105	\b, sha1WithRSAEncryption
+>>>&0	der	obj_id9=x			\b, ? Encryption (%s)
+>>>&0	der	null
+>>&0	der	seq
+>>>&0	der	set
+>>>>&0	der	seq
+>>>>>&0	der	obj_id3=550406
+>>>>>&0	der	prt_str=x	\b, countryName=%s
+>>>&0	der	set
+>>>>&0	der	seq
+>>>>>&0	der	obj_id3=550408
+>>>>>&0	der	prt_str=x	\b, stateOrProvinceName=%s
+>>>&0	der	set
+>>>>&0	der	seq
+>>>>>&0	der	obj_id3=550407
+>>>>>&0	der	prt_str=x	\b, localityName=%s
+>>>&0	der	set
+>>>>&0	der	seq
+>>>>>&0	der	obj_id3=55040a
+>>>>>&0	der	prt_str=x	\b, organizationName=%s
+>>>&0	der	set
+>>>>&0	der	seq
+>>>>>&0	der	obj_id3=55040b
+>>>>>&0	der	prt_str=x	\b, organizationUnitName=%s
+>>>&0	der	set
+>>>>&0	der	seq
+>>>>>&0	der	obj_id3=550403
+>>>>>&0	der	prt_str=x	\b, commonName=%s
+>>>&0	der	set
+>>>>&0	der	seq
+>>>>>&0	der	obj_id9=2a864886f70d010901
+>>>>>&0	der	ia5_str=x	\b, emailAddress=%s
+>>&0	der	seq
+>>>&0	der	utc_time=x	\b, utcTime=%s
+>>>&0	der	utc_time=x	\b, utcTime=%s
+>>&0	use	certinfo

+ 21 - 23
magic/Magdir/elf

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: elf,v 1.69 2015/06/16 17:23:08 christos Exp $
+# $File: elf,v 1.70 2016/06/02 12:36:30 christos Exp $
 # elf:  file(1) magic for ELF executables
 #
 # We have to check the byte order flag to see what byte order all the
@@ -301,25 +301,23 @@
 >>0	use		elf-le
 >5	byte		2		MSB
 >>0	use		\^elf-le
-# Up to now only 0, 1 and 2 are defined; I've seen a file with 0x83, it seemed
-# like proper ELF, but extracting the string had bad results.
->4      byte            <0x80
->>8	string		>\0		(%s)
->8	string		\0
->>7	byte		0		(SYSV)
->>7	byte		1		(HP-UX)
->>7	byte		2		(NetBSD)
->>7	byte		3		(GNU/Linux)
->>7	byte		4		(GNU/Hurd)
->>7	byte		5		(86Open)
->>7	byte		6		(Solaris)
->>7	byte		7		(Monterey)
->>7	byte		8		(IRIX)
->>7	byte		9		(FreeBSD)
->>7	byte		10		(Tru64)
->>7	byte		11		(Novell Modesto)
->>7	byte		12		(OpenBSD)
->8      string          \2
->>7     byte            13              (OpenVMS)
->>7	byte		97		(ARM)
->>7	byte		255		(embedded)
+>7	byte		0		(SYSV)
+>7	byte		1		(HP-UX)
+>7	byte		2		(NetBSD)
+>7	byte		3		(GNU/Linux)
+>7	byte		4		(GNU/Hurd)
+>7	byte		5		(86Open)
+>7	byte		6		(Solaris)
+>7	byte		7		(Monterey)
+>7	byte		8		(IRIX)
+>7	byte		9		(FreeBSD)
+>7	byte		10		(Tru64)
+>7	byte		11		(Novell Modesto)
+>7	byte		12		(OpenBSD)
+>7	byte		13		(OpenVMS)
+>7	byte		14		(HP NonStop Kernel)
+>7	byte		15		(AROS Research Operating System)
+>7	byte		16		(FenixOS)
+>7	byte		17		(Nuxi CloudABI)
+>7	byte		97		(ARM)
+>7	byte		255		(embedded)

+ 31 - 29
magic/Magdir/filesystems

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: filesystems,v 1.111 2015/09/09 16:26:54 christos Exp $
+# $File: filesystems,v 1.113 2016/02/14 14:38:24 christos Exp $
 # filesystems:  file(1) magic for different filesystems
 #
 0	name	partid  
@@ -1738,28 +1738,30 @@
 >0x402	beshort		< 100
 >0x402	beshort		> -1		Minix filesystem, V1, 30 char names (big endian), %d zones
 >0x1e	string		minix		\b, bootable
-0x410	leshort		0x2468
->0x402	beshort		< 100
->>0x402	beshort		> -1		Minix filesystem, V2, 14 char names
->0x1e	string		minix		\b, bootable
-0x410	beshort		0x2468
->0x402	beshort		< 100
->0x402	beshort		> -1		Minix filesystem, V2 (big endian)
->0x1e	string		minix		\b, bootable
-0x410	leshort		0x2478
->0x402	beshort		< 100
->0x402	beshort		> -1		Minix filesystem, V2, 30 char names
->0x1e	string		minix		\b, bootable
-0x410	leshort		0x2478
->0x402	beshort		< 100
->0x402	beshort		> -1		Minix filesystem, V2, 30 char names
->0x1e	string		minix		\b, bootable
-0x410	beshort		0x2478
->0x402	beshort		!0		Minix filesystem, V2, 30 char names (big endian)
->0x1e	string		minix		\b, bootable
-0x418	leshort		0x4d5a
->0x402	beshort		<100
->>0x402	beshort		> -1		Minix filesystem, V3, 60 char names
+# Weak Magic: this is $x
+#0x410	leshort		0x2468
+#>0x402	beshort		< 100
+#>>0x402	beshort		> -1		Minix filesystem, V2, 14 char names
+#>0x1e	string		minix		\b, bootable
+#0x410	beshort		0x2468
+#>0x402	beshort		< 100
+#>0x402	beshort		> -1		Minix filesystem, V2 (big endian)
+#>0x1e	string		minix		\b, bootable
+#0x410	leshort		0x2478
+#>0x402	beshort		< 100
+#>0x402	beshort		> -1		Minix filesystem, V2, 30 char names
+#>0x1e	string		minix		\b, bootable
+#0x410	leshort		0x2478
+#>0x402	beshort		< 100
+#>0x402	beshort		> -1		Minix filesystem, V2, 30 char names
+#>0x1e	string		minix		\b, bootable
+#0x410	beshort		0x2478
+#>0x402	beshort		!0		Minix filesystem, V2, 30 char names (big endian)
+#>0x1e	string		minix		\b, bootable
+# Weak Magic! this is MD
+#0x418	leshort		0x4d5a
+#>0x402	beshort		<100
+#>>0x402	beshort		> -1		Minix filesystem, V3, 60 char names
 
 # SGI disk labels - Nathan Scott <nathans@debian.org>
 0	belong		0x0BE5A941	SGI disk label (volume header)
@@ -2214,12 +2216,12 @@
 >0x10090	lelong	x		sectorsize %d,
 >0x10094	lelong	x		nodesize %d,
 >0x10098	lelong	x		leafsize %d,
->0x10020	belong	x		UUID=%8x-
->0x10024	beshort	x		\b%4x-
->0x10026	beshort	x		\b%4x-
->0x10028	beshort	x		\b%4x-
->0x1002a	beshort	x		\b%4x
->0x1002c	belong	x		\b%8x,
+>0x10020	belong	x		UUID=%08x-
+>0x10024	beshort	x		\b%04x-
+>0x10026	beshort	x		\b%04x-
+>0x10028	beshort	x		\b%04x-
+>0x1002a	beshort	x		\b%04x
+>0x1002c	belong	x		\b%08x,
 >0x10078	lequad	x		%lld/
 >0x10070	lequad	x		\b%lld bytes used,
 >0x10088	lequad	x		%lld devices

+ 16 - 0
magic/Magdir/finger

@@ -0,0 +1,16 @@
+
+#------------------------------------------------------------------------------
+# $File: finger,v 1.2 2015/10/07 02:37:57 christos Exp $
+# fingerprint:  file(1) magic for fingerprint data
+# XPM bitmaps)
+#
+
+# http://cgit.freedesktop.org/libfprint/libfprint/tree/libfprint/data.c
+
+0	string	FP1		libfprint fingerprint data V1
+>3	beshort	x		\b, driver_id %x
+>5	belong	x		\b, devtype %x
+
+0	string	FP2		libfprint fingerprint data V2
+>3	beshort	x		\b, driver_id %x
+>5	belong	x		\b, devtype %x

+ 36 - 0
magic/Magdir/flif

@@ -0,0 +1,36 @@
+
+#------------------------------------------------------------------------------
+#	$File: flif,v 1.1 2015/11/23 22:04:36 christos Exp $
+#	flif:	Magic	data	for	file(1)	command.
+#	FLIF	(Free	Lossless	Image	Format)
+
+0	string	FLIF	FLIF
+>4	string	<H	image data
+>>6	beshort	x	\b, %u
+>>8	beshort	x	\bx%u
+>>5	string	1	\b, 8-bit/color,
+>>5	string	2	\b, 16-bit/color,
+>>4	string	1	\b, grayscale, non-interlaced
+>>4	string	3	\b, RGB, non-interlaced
+>>4	string	4	\b, RGBA, non-interlaced
+>>4	string	A	\b, grayscale
+>>4	string	C	\b, RGB, interlaced
+>>4	string	D	\b, RGBA, interlaced
+>4	string	>H	\b, animation data
+>>5	ubyte	<255	\b, %i frames
+>>>7	beshort	x	\b, %u
+>>>9	beshort	x	\bx%u
+>>>6	string	=1	\b, 8-bit/color
+>>>6	string	=2	\b, 16-bit/color
+>>5	ubyte	0xFF
+>>>6	beshort	x	\b, %i frames,
+>>>9	beshort	x	\b, %u
+>>>11	beshort	x	\bx%u
+>>>8	string	=1	\b, 8-bit/color
+>>>8	string	=2	\b, 16-bit/color
+>>4	string	=Q	\b, grayscale, non-interlaced
+>>4	string	=S	\b, RGB, non-interlaced
+>>4	string	=T	\b, RGBA, non-interlaced
+>>4	string	=a	\b, grayscale
+>>4	string	=c	\b, RGB, interlaced
+>>4	string	=d	\b, RGBA, interlaced

+ 42 - 4
magic/Magdir/fonts

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: fonts,v 1.27 2014/04/30 21:41:02 christos Exp $
+# $File: fonts,v 1.30 2016/03/22 22:27:47 christos Exp $
 # fonts:  file(1) magic for font data
 #
 0	search/1	FONT		ASCII vfont text
@@ -29,6 +29,25 @@
 # X11 Bitmap Distribution Format, from Daniel Quinlan (quinlan@yggdrasil.com)
 0	search/1	STARTFONT\ 		X11 BDF font text
 
+# From: Joerg Jenderek
+# URL: http://grub.gibibit.com/New_font_format
+# Reference: util/grub-mkfont.c
+#		include/grub/fontformat.h 
+# FONT_FORMAT_SECTION_NAMES_FILE
+0			string		FILE		
+# FONT_FORMAT_PFF2_MAGIC
+>8			string		PFF2		
+# leng 4 only at the moment
+>>4			ubelong		4		
+# FONT_FORMAT_SECTION_NAMES_FONT_NAME
+>>>12			string		NAME		GRUB2 font
+!:mime			application/x-font-pf2
+!:ext			pf2
+# length of font_name
+>>>>16			ubelong		>0		
+# font_name
+>>>>>20			string		>\0		"%-s"
+
 # X11 fonts, from Daniel Quinlan (quinlan@yggdrasil.com)
 # PCF must come before SGI additions ("MIPSEL MIPS-II COFF" collides)
 0	string		\001fcp			X11 Portable Compiled Font data
@@ -58,8 +77,11 @@
 4098	string		DOSFONT		DOSFONT2 encrypted font data
 
 # downloadable fonts for browser (prints type) anthon@mnt.org
-0	string		PFR1		PFR1 font
+# https://tools.ietf.org/html/rfc3073
+0	string		PFR1		Portable Font Resource font data (new)
 >102	string		>0		\b: %s
+0	string		PFR0		Portable Font Resource font data (old)
+>4	beshort		>0		version %d
 
 # True Type fonts
 0	string	\000\001\000\000\000	TrueType font data
@@ -92,9 +114,25 @@
 !:mime application/vnd.ms-fontobject
 
 # Web Open Font Format (.woff)
+0	name		woff
+>4	belong		0x00010000	\b, TrueType
+>4	belong		0x4F54544F	\b, CFF
+>4	belong		0x74727565	\b, TrueType
+>4	default		x
+>>4	belong		x		\b, flavor %d
+>8	belong		x		\b, length %d
+#>12	beshort		x		\b, numTables %d
+#>14	beshort		x		\b, reserved %d
+#>16	belong		x		\b, totalSfntSize %d
+
 # http://www.w3.org/TR/WOFF/
 0	string		wOFF	Web Open Font Format
->4	belong		x	\b, flavor %d
->8	belong		x	\b, length %d
+>0	use		woff
 >20	beshort		x	\b, version %d
 >22	beshort		x	\b.%d
+# http://www.w3.org/TR/WOFF2/
+0	string		wOF2	Web Open Font Format (Version 2)
+>0	use		woff
+#>20	belong		x	\b, totalCompressedSize %d
+>24	beshort		x	\b, version %d
+>26	beshort		x	\b.%d

+ 4 - 2
magic/Magdir/fortran

@@ -1,7 +1,9 @@
 
 #------------------------------------------------------------------------------
-# $File: fortran,v 1.9 2015/06/17 19:55:27 christos Exp $
+# $File: fortran,v 1.10 2015/11/05 18:47:16 christos Exp $
 # FORTRAN source
-0	regex/100l	\^[Cc][\ \t]	FORTRAN program text
+# Check that the first 100 lines start with C or whitespace first.
+0       regex/100l      !\^[^Cc\ \t].*$
+>0	regex/100l	\^[Cc][\ \t]	FORTRAN program text
 !:mime	text/x-fortran
 !:strength - 5

+ 19 - 13
magic/Magdir/hitachi-sh

@@ -1,22 +1,28 @@
 
 #------------------------------------------------------------------------------
-# $File: hitachi-sh,v 1.6 2013/01/29 19:31:33 christos Exp $
+# $File: hitachi-sh,v 1.7 2015/09/30 20:32:35 christos Exp $
 # hitach-sh: file(1) magic for Hitachi Super-H
 #
 # Super-H COFF
 #
+# updated by Joerg Jenderek at Oct 2015
+# https://en.wikipedia.org/wiki/COFF
+# https://de.wikipedia.org/wiki/Common_Object_File_Format
+# http://www.delorie.com/djgpp/doc/coff/filhdr.html
 # below test line conflicts with 2nd NTFS filesystem sector 
-0	beshort		0x0500		Hitachi SH big-endian COFF
 # 2nd NTFS filesystem sector often starts with 0x05004e00 for unicode string 5 NTLDR
-#0	ubelong&0xFFFFNMPQ	0x0500NMPQ     Hitachi SH big-endian COFF
->18	beshort&0x0002	=0x0000		object
->18	beshort&0x0002	=0x0002		executable
->18	beshort&0x0008	=0x0008		\b, stripped
->18	beshort&0x0008	=0x0000		\b, not stripped
-#
-0	leshort		0x0550		Hitachi SH little-endian COFF
->18	leshort&0x0002	=0x0000		object
->18	leshort&0x0002	=0x0002		executable
->18	leshort&0x0008	=0x0008		\b, stripped
->18	leshort&0x0008	=0x0000		\b, not stripped
+# and Portable Gaming Notation Compressed format (*.WID http://pgn.freeservers.com/)
+0	beshort		0x0500		
+# test for unused flag bits (0x8000,0x0800,0x0400,0x0200,x0080) in f_flags
+>18	ubeshort&0x8E80	0		
+# use big endian variant of subroutine to display name+variables+flags
+# for common object formated files 
+>>0	use				\^display-coff
+
+0	leshort		0x0550		
+# test for unused flag bits in f_flags
+>18	uleshort&0x8E80	0		
+# use little endian variant of subroutine to 
+# display name+variables+flags for common object formated files 
+>>0	use				display-coff
 

+ 351 - 26
magic/Magdir/images

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: images,v 1.107 2015/07/11 14:40:10 christos Exp $
+# $File: images,v 1.116 2016/03/23 15:29:20 christos Exp $
 # images:  file(1) magic for image formats (see also "iff", and "c-lang" for
 # XPM bitmaps)
 #
@@ -12,26 +12,155 @@
 
 # Targa - matches `povray', `ppmtotga' and `xv' outputs
 # by Philippe De Muyter <phdm@macqel.be>
+# URL: http://justsolve.archiveteam.org/wiki/TGA
+# Reference: http://www.dca.fee.unicamp.br/~martino/disciplinas/ea978/tgaffs.pdf
+# Update: Joerg Jenderek
 # at 2, byte ImgType must be 1, 2, 3, 9, 10 or 11
+#	,32 or 33 (both not observed)
 # at 1, byte CoMapType must be 1 if ImgType is 1 or 9, 0 otherwise
+#	or theoretically 2-128 reserved for use by Truevision or 128-255 may be used for developer applications
 # at 3, leshort Index is 0 for povray, ppmtotga and xv outputs
 # `xv' recognizes only a subset of the following (RGB with pixelsize = 24)
 # `tgatoppm' recognizes a superset (Index may be anything)
-1	belong&0xfff7ffff	0x01010000	Targa image data - Map
-!:strength + 2
->2	byte&8			8		- RLE
->12	leshort			>0		%d x
->14	leshort			>0		%d
-1	belong&0xfff7ffff	0x00020000	Targa image data - RGB
-!:strength + 2
->2	byte&8			8		- RLE
->12	leshort			>0		%d x
->14	leshort			>0		%d
-1	belong&0xfff7ffff	0x00030000	Targa image data - Mono
-!:strength + 2
->2	byte&8			8		- RLE
->12	leshort			>0		%d x
->14	leshort			>0		%d
+#
+# test of Color Map Type 0~no 1~color map
+# and Image Type 1 2 3 9 10 11 32 33
+# and Color Map Entry Size 0 15 16 24 32
+0	ubequad&0x00FeC400000000C0	0	
+# skip more garbage by looking for positive image type
+>2	ubyte			>0		
+# skip some compiled terminfo by looking for image type less equal 33
+>>2	ubyte			<34		
+# skip arches.3200 , Finder.Root , Slp.1 by looking for low pixel sizes 15 16 24 32
+>>>16	ubyte			<33		
+# skip more by looking for pixel size 0Fh 10h 18h 20h
+>>>>16	ubyte&0xC0		0x00	
+# skip 260-16.ico by looking for no color map
+>>>>>1	ubyte			0	
+# implies no first map entry
+>>>>>>3	uleshort		0	
+>>>>>>>0	use		tga-image
+# Color Map
+>>>>>1	ubyte			>0	
+>>>>>>0		use		tga-image
+#	display tga bitmap image information
+0	name				tga-image
+>2	ubyte		<34		Targa image data
+!:mime	image/x-tga
+!:apple	????TPIC
+# normal extension .tga but some Truevision products used others:
+# tpic (Apple),icb (Image Capture Board),vda (Video Display Adapter),vst (NuVista),win (UNSURE about that)
+!:ext	tga/tpic/icb/vda/vst
+# image type 1 2 3 9 10 11 32 33
+>2	ubyte&0xF7	1		- Map
+>2	ubyte&0xF7	2		- RGB
+# alpha channel
+>>17	ubyte&0x0F	>0		\bA
+>2	ubyte&0xF7	3		- Mono
+# type not found, but by http://www.fileformat.info/format/tga/corion.htm
+# Compressed color-mapped data, using Huffman, Delta, and runlength encoding
+>2	ubyte		32		- Color
+# Compressed color-mapped data, using Huffman, Delta, and RLE. 4-pass quadtree- type process
+>2	ubyte		33		- Color
+# Color Map Type 0~no 1~color map
+>1	ubyte		1		(
+# first color map entry, 0 normal
+>>3	uleshort	>0		\b%d-
+# color map length 0 2 1dh 3bh d9h 100h
+>>5	uleshort	x		\b%d)
+# 8~run length encoding bit
+>2	ubyte&0x08	8		- RLE
+# gimp can create big pictures!
+>12	uleshort	>0		%d x
+>12	uleshort	=0		65536 x
+# image height. 0 interpreted as 65536
+>14	uleshort	>0		%d
+>14	uleshort	=0		65536
+# Image Pixel Size 15 16 24 32
+>16	ubyte		x		x %d
+# X origin of image. 0 normal 
+>8	uleshort	>0		+%d
+# Y origin of image. 0 normal; positive for top
+>10	uleshort	>0		+%d
+# Image descriptor: bits 3-0 give the alpha channel depth, bits 5-4 give direction
+>17	ubyte&0x0F	>0		- %d-bit alpha
+# bits 5-4 give direction. normal bottom left
+>17	ubyte		&0x20		- top
+#>17	ubyte		^0x20		- bottom
+>17	ubyte		&0x10		- right
+#>17	ubyte		^0x10		- left
+# some info say other bits 6-7 should be zero
+# but data storage interleave by http://www.fileformat.info/format/tga/corion.htm 
+# 00 - no interleave;01 - even/odd interleave; 10 - four way interleave; 11 - reserved
+#>17	ubyte&0xC0	0x00		- no interleave
+>17	ubyte&0xC0	0x40		- interleave
+>17	ubyte&0xC0	0x80		- four way interleave
+>17	ubyte&0xC0	0xC0		- reserved
+# positive length implies identification field 
+>0	ubyte		>0		
+>>18	string		x		"%s"
+# last 18 bytes of newer tga file footer signature
+>18	search/4261301/s	TRUEVISION-XFILE.\0	
+# extension area offset if not 0
+>>&-8		ulelong			>0			
+# length of the extension area. normal 495 for version 2.0
+>>>(&-4.l)	uleshort		0x01EF		
+# AuthorName[41]
+>>>>&0		string			>\0		- author "%-.40s"
+# Comment[324]=4 * 80 null terminated
+>>>>&41		string			>\0		- comment "%-.80s"
+# date
+>>>>&365	ubequad&0xffffFFFFffff0000	!0		
+# Day
+>>>>>&-6		uleshort		x		%d
+# Month
+>>>>>&-8		uleshort		x		\b-%d
+# Year
+>>>>>&-4		uleshort		x		\b-%d
+# time
+>>>>&371	ubequad&0xffffFFFFffff0000	!0		
+# hour
+>>>>>&-8		uleshort		x		%d
+# minutes
+>>>>>&-6		uleshort		x		\b:%.2d
+# second
+>>>>>&-4		uleshort		x		\b:%.2d
+# JobName[41]
+>>>>&377		string			>\0		- job "%-.40s"
+# JobHour Jobminute Jobsecond
+>>>>&418	ubequad&0xffffFFFFffff0000	!0		
+>>>>>&-8		uleshort		x		%d
+>>>>>&-6		uleshort		x		\b:%.2d
+>>>>>&-4		uleshort		x		\b:%.2d
+# SoftwareId[41]
+>>>>&424		string			>\0		- %-.40s
+# SoftwareVersionNumber
+>>>>&424	ubyte				>0		
+>>>>>&40		uleshort/100		x		%d
+>>>>>&40		uleshort%100		x		\b.%d
+# VersionLetter
+>>>>>&42		ubyte			>0x20		\b%c
+# KeyColor
+>>>>&468		ulelong			>0		- keycolor 0x%8.8x
+# Denominator of Pixel ratio. 0~no pixel aspect
+>>>>&474	uleshort			>0			
+# Numerator
+>>>>>&-4		uleshort		>0		- aspect %d
+>>>>>&-2		uleshort		x		\b/%d
+# Denominator of Gamma ratio. 0~no Gamma value
+>>>>&478	uleshort			>0		
+# Numerator
+>>>>>&-4		uleshort		>0		- gamma %d
+>>>>>&-2		uleshort		x		\b/%d
+# ColorOffset 
+#>>>>&480	ulelong			x		- col offset 0x%8.8x
+# StampOffset
+#>>>>&484	ulelong			x		- stamp offset 0x%8.8x
+# ScanOffset
+#>>>>&488	ulelong			x		- scan offset 0x%8.8x
+# AttributesType
+#>>>>&492	ubyte			x		- Attributes 0x%x
+## EndOfTGA
 
 # PBMPLUS images
 # The next byte following the magic is always whitespace.
@@ -545,8 +674,12 @@
 0	beshort		0x1010		PEX Binary Archive
 
 # DICOM medical imaging data
+# URL:		https://en.wikipedia.org/wiki/DICOM#Data_format
+# Note:		"dcm" is the official file name extension
+# 		XnView mention also "dc3" and "acr" as file name extension
 128	string	DICM			DICOM medical imaging data
 !:mime	application/dicom
+!:ext dcm/dicom/dic
 
 # XWD - X Window Dump file.
 #   As described in /usr/X11R6/include/X11/XWDFile.h
@@ -686,6 +819,7 @@
 
 # GEM Image: Version 1, Headerlen 8 (Wolfram Kleff)
 # Format variations from: Bernd Nuernberger <bernd.nuernberger@web.de>
+# Update: Joerg Jenderek
 # See http://fileformats.archiveteam.org/wiki/GEM_Raster
 # For variations, also see:
 #    http://www.seasip.info/Gem/ff_img.html (Ventura) 
@@ -693,23 +827,59 @@
 #    http://www.fileformat.info/format/gemraster/spec/index.htm (XIMG, STTT)
 #    http://sylvana.net/1stguide/1STGUIDE.ENG (TIMG)
 0       beshort     0x0001
->2      beshort     0x0008      GEM Image data
+# header_size
+>2      beshort     0x0008      
+>>0     use gem_info
+>2      beshort     0x0009      
+>>0     use gem_info
+# no example for NOSIG
+>2      beshort     24      
 >>0     use gem_info
->2      beshort     0x0009      GEM Image data (Ventura)
+# no example for HYPERPAINT
+>2      beshort     25      
 >>0     use gem_info
-16      string      XIMG\0      GEM XIMG Image data
+16      string      XIMG\0      
 >0      use gem_info
-16      string      STTT\0\x10  GEM STTT Image data 
+# no example
+16      string      STTT\0\x10  
 >0      use gem_info
-16      string      TIMG\0      GEM TIMG Image data
+# no example or description
+16      string      TIMG\0      
 >0      use gem_info
 
 0   name        gem_info
->12	beshort		x		%d x
->14	beshort		x		%d,
->4	beshort		x		%d planes,
->8	beshort		x		%d x
->10	beshort		x		%d pixelsize
+# version is 2 for some XIMG and 1 for all others
+>0	beshort		<0x0003		GEM
+# http://www.snowstone.org.uk/riscos/mimeman/mimemap.txt
+!:mime	image/x-gem
+# header_size 24 25 27 59 779 words for colored bitmaps
+>>2	beshort		>9		
+>>>16	string		STTT\0\x10	STTT
+>>>16	string		TIMG\0		TIMG
+# HYPERPAINT or NOSIG variant
+>>>16	string		\0\x80		
+>>>>2	beshort		=24		NOSIG
+>>>>2	beshort		!24		HYPERPAINT
+# NOSIG or XIMG variant
+>>>16	default		x		
+>>>>16	string		!XIMG\0		NOSIG
+>>16	string		=XIMG\0		XIMG Image data
+!:ext	img/ximg
+# to avoid Warning: Current entry does not yet have a description for adding a EXTENSION type
+>>16	string		!XIMG\0		Image data
+!:ext	img
+# header_size is 9 for Ventura files and 8 for other GEM Paint files
+>>2	beshort		9		(Ventura)
+#>>2	beshort		8		(Paint)
+>>12	beshort		x		%d x
+>>14	beshort		x		%d,
+# 1 4 8
+>>4	beshort		x		%d planes,
+# in tenths of a millimetre
+>>8	beshort		x		%d x
+>>10	beshort		x		%d pixelsize
+# pattern_size 1-8. 2 for GEM Paint
+>>6	beshort		!2		\b, pattern size %d
 
 # GEM Metafile (Wolfram Kleff)
 0	lelong		0x0018FFFF	GEM Metafile data
@@ -998,7 +1168,22 @@
 !:mime	image/x-polar-monitor-bitmap
 
 # From: Rick Richardson <rickrich@gmail.com>
+# updated by: Joerg Jenderek
+# URL: http://techmods.net/nuvi/
 0	string	GARMIN\ BITMAP\ 01	Garmin Bitmap file
+# extension is also used for 
+# Sony SRF raw image (image/x-sony-srf)
+# SRF map
+# Terragen Surface Map (http://www.planetside.co.uk/terragen)
+# FileLocator Pro search criteria file (http://www.mythicsoft.com/filelocatorpro)
+!:ext srf
+#!:mime	image/x-garmin-srf
+# version 1.00,2.00,2.10,2.40,2.50
+>0x2f	string		>0		\b, version %4.4s
+# width (2880,2881,3240)
+>0x55	uleshort	>0		\b, %dx
+# height (80,90)
+>>0x53	uleshort	x		\b%d
 
 # Type:	Ulead Photo Explorer5 (.pe5)
 # URL:	http://www.jisyo.com/cgibin/view.cgi?EXT=pe5 (Japanese)
@@ -1120,3 +1305,143 @@
 #
 0	string	\x42\x50\x47\xFB	BPG (Better Portable Graphics)
 !:mime  image/bpg
+
+# From: Joerg Jenderek
+# URL: https://en.wikipedia.org/wiki/Apple_Icon_Image_format
+0	string		icns		Mac OS X icon
+!:mime	image/x-icns
+!:apple	????icns
+!:ext icns
+>4	ubelong		>0		
+# file size
+>>4	ubelong		x		\b, %d bytes
+# icon type
+>>8	string		x		\b, "%4.4s" type
+
+# TIM images
+0		lelong		0x00000010	TIM image,
+>4		lelong  	0x8		4-Bit,
+>4		lelong  	0x9		8-Bit,
+>4		lelong  	0x2		15-Bit,
+>4		lelong  	0x3		24-Bit,
+>4		lelong 		&8
+>>(8.l+12)	leshort		x		Pixel at (%d,
+>>(8.l+14)	leshort		x		\b%d)
+>>(8.l+16)	leshort		x		Size=%dx
+>>(8.l+18)	leshort		x		\b%d,
+>>4		lelong 		0x8		16 CLUT Entries at
+>>4		lelong 		0x9		256 CLUT Entries at
+>>12		leshort		x		(%d,
+>>14		leshort		x		\b%d)
+>4		lelong		^8
+>>12		leshort		x		Pixel at (%d,
+>>14		leshort		x		\b%d)
+>>16		leshort		x		Size=%dx
+>>18		leshort		x		\b%d
+
+# MDEC streams
+0		lelong		0x80010160	MDEC video stream,
+>16		leshort		x		%dx
+>18		leshort		x		\b%d
+#>8		lelong		x		%d frames
+#>4		leshort		x		secCount=%d;
+#>6		leshort		x		nSectors=%d;
+#>12		lelong		x		frameSize=%d;
+
+# BS encoded bitstreams
+2		leshort		0x3800		BS image,
+>6		leshort		x		Version %d,
+>4		leshort		x		Quantization %d,
+>0		leshort		x		(Decompresses to %d words)
+
+# Type: farbfeld image.
+# Url: http://tools.suckless.org/farbfeld/
+# From: Ian D. Scott <ian@iandouglasscott.com>
+#
+0		string		farbfeld	farbfeld image data,
+>8		ubelong		x		%dx
+>12		ubelong		x		\b%d
+
+# Type: Sega PVR image.
+# From: David Korth <gerbilsoft@gerbilsoft.com>
+# References:
+# - http://fabiensanglard.net/Mykaruga/tools/segaPVRFormat.txt
+# - https://github.com/yazgoo/pvrx2png
+# - https://github.com/nickworonekin/puyotools
+
+# Sega PVR header.
+0	name	sega-pvr-image-header
+>0x0C	leshort	x	%d x
+>0x0E	leshort	x	%d
+# Image format.
+>0x08	byte	0	\b, ARGB1555
+>0x08	byte	1	\b, RGB565
+>0x08	byte	2	\b, ARGB4444
+>0x08	byte	3	\b, YUV442
+>0x08	byte	4	\b, Bump
+>0x08	byte	5	\b, 4bpp
+>0x08	byte	6	\b, 8bpp
+# Image data type.
+>0x09	byte	0x01	\b, square twiddled
+>0x09	byte	0x02	\b, square twiddled & mipmap
+>0x09	byte	0x03	\b, VQ
+>0x09	byte	0x04	\b, VQ & mipmap
+>0x09	byte	0x05	\b, 8-bit CLUT twiddled
+>0x09	byte	0x06	\b, 4-bit CLUT twiddled
+>0x09	byte	0x07	\b, 8-bit direct twiddled
+>0x09	byte	0x08	\b, 4-bit direct twiddled
+>0x09	byte	0x09	\b, rectangle
+>0x09	byte	0x0B	\b, rectangular stride
+>0x09	byte	0x0D	\b, rectangular twiddled
+>0x09	byte	0x10	\b, small VQ
+>0x09	byte	0x11	\b, small VQ & mipmap
+>0x09	byte	0x12	\b, square twiddled & mipmap
+
+# Sega PVR (Xbox) image header.
+# Contains an embedded DirectDraw surface instead of PVR data.
+0	name	sega-pvr-xbox-dds-header
+>16	lelong	x	%d x
+>12	lelong	x	%d,
+>84	string	x	%.4s
+
+# Sega PVR image.
+0	string	PVRT
+>0x10	string	DDS\040\174\000\000\000 Sega PVR (Xbox) image:
+>>0x20	use	sega-pvr-xbox-dds-header
+>0x10	belong	!0x44445320		Sega PVR image:
+>>0	use	sega-pvr-image-header
+
+# Sega PVR image with GBIX.
+0	string	GBIX
+>0x10	string	PVRT
+>>0x10	string	DDS\040\174\000\000\000 Sega PVR (Xbox) image:
+>>>0x20	use	sega-pvr-xbox-dds-header
+>>0x10	belong	!0x44445320		Sega PVR image:
+>>>0x10	use	sega-pvr-image-header
+>>0x08	lelong	x	\b, global index = %u
+
+# Sega GVR header.
+0	name	sega-gvr-image-header
+>0x0C	beshort	x	%d x
+>0x0E	beshort	x	%d
+# Image data format.
+>0x0B	byte	0	\b, I4
+>0x0B	byte	1	\b, I8
+>0x0B	byte	2	\b, IA4
+>0x0B	byte	3	\b, IA8
+>0x0B	byte	4	\b, RGB565
+>0x0B	byte	5	\b, RGB5A3
+>0x0B	byte	6	\b, ARGB8888
+>0x0B	byte	8	\b, CI4
+>0x0B	byte	9	\b, CI8
+>0x0B	byte	14	\b, DXT1
+
+# Sega GVR image.
+0	string	GVRT	Sega GVR image:
+>0x10	use	sega-gvr-image-header
+
+# Sega GVR image with GBIX.
+0	string	GBIX
+>0x10	string	GVRT	Sega GVR image:
+>>0x10	use	sega-gvr-image-header
+>>0x08	belong	x	\b, global index = %u

+ 16 - 4
magic/Magdir/intel

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: intel,v 1.12 2014/04/30 21:41:02 christos Exp $
+# $File: intel,v 1.14 2015/11/10 00:13:27 christos Exp $
 # intel:  file(1) magic for x86 Unix
 #
 # Various flavors of x86 UNIX executable/object (other than Xenix, which
@@ -30,15 +30,27 @@
 0	leshort		=0522		iAPX 286 executable large model (COFF)
 >12	lelong		>0		not stripped
 #>22	leshort		>0		- version %d
+# updated by Joerg Jenderek at Oct 2015
+# https://de.wikipedia.org/wiki/Common_Object_File_Format
+# http://www.delorie.com/djgpp/doc/coff/filhdr.html
+# ./msdos (version 5.25) labeled the next entry as "MS Windows COFF Intel 80386 object file"
+# ./intel (version 5.25) label labeled the next entry as "80386 COFF executable"
 # SGI labeled the next entry as "iAPX 386 executable" --Dan Quinlan
-0	leshort		=0514		80386 COFF executable
->12	lelong		>0		not stripped
->22	leshort		>0		- version %d
+0	leshort		=0514		
+# use subroutine to display name+flags+variables for common object formated files 
+>0	use				display-coff
+#>12	lelong		>0		not stripped
+# no hint found, that at offset 22 is version
+#>22	leshort		>0		- version %d
 
 # rom: file(1) magic for BIOS ROM Extensions found in intel machines
 #      mapped into memory between 0xC0000 and 0xFFFFF
 # From Gurkan Sengun <gurkan@linuks.mine.nu>, www.linuks.mine.nu
+# updated by Joerg Jenderek
+# https://en.wikipedia.org/wiki/Option_ROM
 0        beshort         0x55AA       BIOS (ia32) ROM Ext.
+!:mime	application/octet-stream
+!:ext	rom/bin
 >5       string          USB          USB
 >7       string          LDR          UNDI image
 >30      string          IBM          IBM comp. Video

+ 9 - 2
magic/Magdir/java

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------
-# $File: java,v 1.16 2013/09/24 20:22:03 christos Exp $
+# $File: java,v 1.18 2015/11/29 22:08:14 christos Exp $
 # Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the
 # same magic number, 0xcafebabe, so they are both handled
 # in the entry called "cafebabe".
@@ -16,5 +16,12 @@
 !:mime	application/x-java-jce-keystore
 
 # Java source
-0	regex	^import.*;$	Java source
+0	regex	\^import.*;$	Java source
 !:mime	text/x-java
+
+# Java HPROF dumps
+# https://java.net/downloads/heap-snapshot/hprof-binary-format.html
+0	string		JAVA\x20PROFILE\x201.0.
+>0x12	short		0
+>>0x11	ushort-0x31	<2      Java HPROF dump,
+>>0x17	beqdate/1000	x       created %s

+ 27 - 4
magic/Magdir/lisp

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: lisp,v 1.23 2009/09/19 16:28:10 christos Exp $
+# $File: lisp,v 1.24 2015/11/30 20:54:26 christos Exp $
 # lisp:  file(1) magic for lisp programs
 #
 # various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
@@ -26,16 +26,39 @@
 0	search/4096	(custom-set-variables\ 	Lisp/Scheme program text
 !:mime	text/x-lisp
 
+# URL: https://en.wikipedia.org/wiki/Emacs_Lisp
+# Reference: http://ftp.gnu.org/old-gnu/emacs/elisp-manual-18-1.03.tar.gz
+# Update: Joerg Jenderek 
 # Emacs 18 - this is always correct, but not very magical.
-0	string	\012(			Emacs v18 byte-compiled Lisp data
+0	string	\012(			
+# look for emacs lisp keywords
+# GRR: split regex because it is too long or get error like
+# lisp, 36: Warning: cannot get string from `^(defun|defvar|defconst|defmacro|setq|fset|put|provide|require|'
+>&0	regex	\^(defun|defvar|defconst|defmacro|setq|fset)	Emacs v18 byte-compiled Lisp data
 !:mime	application/x-elc
+# https://searchcode.com/codesearch/view/2173420/
+# not really pure text
+!:apple	EMAxTEXT
+!:ext elc
+# remaining regex
+>&0	regex	\^(put|provide|require|random)	Emacs v18 byte-compiled Lisp data
+!:mime	application/x-elc
+!:apple	EMAxTEXT
+!:ext elc
+# missed cl.elc dbx.elc simple.elc look like normal lisp starting with ;;;
+
 # Emacs 19+ - ver. recognition added by Ian Springer
 # Also applies to XEmacs 19+ .elc files; could tell them apart with regexs
 # - Chris Chittleborough <cchittleborough@yahoo.com.au>
+# Update: Joerg Jenderek 
 0	string	;ELC	
->4	byte	>18			
->4	byte    <32			Emacs/XEmacs v%d byte-compiled Lisp data
+# version\0\0\0
+>4	byte	>18			Emacs/XEmacs v%d byte-compiled Lisp data
+# why less than 32 ? does not make sense to me. GNU Emacs version is 24.5 at April 2015
+#>4	byte    <32			Emacs/XEmacs v%d byte-compiled Lisp data
 !:mime	application/x-elc		
+!:apple	EMAxTEXT
+!:ext elc
 
 # Files produced by CLISP Common Lisp From: Bruno Haible <haible@ilog.fr>
 0	string	(SYSTEM::VERSION\040'	CLISP byte-compiled Lisp program (pre 2004-03-27)

+ 46 - 11
magic/Magdir/mach

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------
-# $File: mach,v 1.20 2015/05/21 18:28:41 christos Exp $
+# $File: mach,v 1.23 2015/10/15 21:51:22 christos Exp $
 # Mach has two magic numbers, 0xcafebabe and 0xfeedface.
 # Unfortunately the first, cafebabe, is shared with
 # Java ByteCode, so they are both handled in the file "cafebabe".
@@ -106,15 +106,19 @@
 >>>4		belong&0x00ffffff	2	subarchitecture=%d
 >>>4		belong&0x00ffffff	3	subarchitecture=%d
 >>>4		belong&0x00ffffff	4	subarchitecture=%d
->>>4		belong&0x00ffffff	5	\b_v4t
->>>4		belong&0x00ffffff	6	\b_v6
->>>4		belong&0x00ffffff	7	\b_v5tej
->>>4		belong&0x00ffffff	8	\b_xscale
->>>4		belong&0x00ffffff	9	\b_v7
->>>4		belong&0x00ffffff	10	\b_v7f
->>>4		belong&0x00ffffff	11	subarchitecture=%d
->>>4		belong&0x00ffffff	12	\b_v7k
->>>4		belong&0x00ffffff	>12	subarchitecture=%d
+>>>4		belong&0x00ffffff	5	\bv4t
+>>>4		belong&0x00ffffff	6	\bv6
+>>>4		belong&0x00ffffff	7	\bv5tej
+>>>4		belong&0x00ffffff	8	\bxscale
+>>>4		belong&0x00ffffff	9	\bv7
+>>>4		belong&0x00ffffff	10	\bv7f
+>>>4		belong&0x00ffffff	11	\bv7s
+>>>4		belong&0x00ffffff	12	\bv7k
+>>>4		belong&0x00ffffff	13	\bv8
+>>>4		belong&0x00ffffff	14	\bv6m
+>>>4		belong&0x00ffffff	15	\bv7m
+>>>4		belong&0x00ffffff	16	\bv7em
+>>>4		belong&0x00ffffff	>16	subarchitecture=%d
 #				13	m88k
 >>0	belong&0x00ffffff	13
 >>>4		belong&0x00ffffff	0	mc88000
@@ -158,12 +162,15 @@
 >>>4		belong&0x00ffffff	2	subarchitecture=%d
 >>>4		belong&0x00ffffff	3
 >>>4		belong&0x00ffffff	4	\b_arch1
+>>>4		belong&0x00ffffff	8	\b_haswell
 >>>4		belong&0x00ffffff	>4	subarchitecture=%d
 >>0	belong&0x00ffffff	8	64-bit architecture=%d
 >>0	belong&0x00ffffff	9	64-bit architecture=%d
 >>0	belong&0x00ffffff	10	64-bit architecture=%d
 >>0	belong&0x00ffffff	11	64-bit architecture=%d
->>0	belong&0x00ffffff	12	64-bit architecture=%d
+>>0	belong&0x00ffffff	12	arm64
+>>>4		belong&0x00ffffff	0
+>>>4		belong&0x00ffffff	1	\bv8
 >>0	belong&0x00ffffff	13	64-bit architecture=%d
 >>0	belong&0x00ffffff	14	64-bit architecture=%d
 >>0	belong&0x00ffffff	15	64-bit architecture=%d
@@ -203,6 +210,34 @@
 >12	belong		11		kext bundle
 >12	belong		>11
 >>12	belong		x		filetype=%d
+>24	belong		>0		\b, flags:<
+>>24	belong		&0x0000001	\bNOUNDEFS
+>>24	belong		&0x0000002	\b|INCRLINK
+>>24	belong		&0x0000004	\b|DYLDLINK
+>>24	belong		&0x0000008	\b|BINDATLOAD
+>>24	belong		&0x0000010	\b|PREBOUND
+>>24	belong		&0x0000020	\b|SPLIT_SEGS
+>>24	belong		&0x0000040	\b|LAZY_INIT
+>>24	belong		&0x0000080	\b|TWOLEVEL
+>>24	belong		&0x0000100	\b|FORCE_FLAT
+>>24	belong		&0x0000200	\b|NOMULTIDEFS
+>>24	belong		&0x0000400	\b|NOFIXPREBINDING
+>>24	belong		&0x0000800	\b|PREBINDABLE
+>>24	belong		&0x0001000	\b|ALLMODSBOUND
+>>24	belong		&0x0002000	\b|SUBSECTIONS_VIA_SYMBOLS
+>>24	belong		&0x0004000	\b|CANONICAL
+>>24	belong		&0x0008000	\b|WEAK_DEFINES
+>>24	belong		&0x0010000	\b|BINDS_TO_WEAK
+>>24	belong		&0x0020000	\b|ALLOW_STACK_EXECUTION
+>>24	belong		&0x0040000	\b|ROOT_SAFE
+>>24	belong		&0x0080000	\b|SETUID_SAFE
+>>24	belong		&0x0100000	\b|NO_REEXPORTED_DYLIBS
+>>24	belong		&0x0200000	\b|PIE
+>>24	belong		&0x0400000	\b|DEAD_STRIPPABLE_DYLIB
+>>24	belong		&0x0800000	\b|HAS_TLV_DESCRIPTORS
+>>24	belong		&0x1000000	\b|NO_HEAP_EXECUTION
+>>24	belong		&0x2000000	\b|APP_EXTENSION_SAFE
+>>24	belong		x		\b>
 
 #
 0	lelong&0xfffffffe	0xfeedface	Mach-O

+ 6 - 3
magic/Magdir/macintosh

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: macintosh,v 1.25 2014/09/03 13:34:16 christos Exp $
+# $File: macintosh,v 1.26 2015/11/25 00:36:02 christos Exp $
 # macintosh description
 #
 # BinHex is the Macintosh ASCII-encoded file format (see also "apple")
@@ -297,11 +297,14 @@
 >0x40e	ubeshort		0x0003		
 # maximal length of volume name is 27
 >>0x424		ubyte			<28	Macintosh HFS data
-#!:mime	application/octet-stream
-# these mime and apple types are not sure
 !:mime	application/x-apple-diskimage
 #!:apple	hfsdINIT
 #!:apple	MACSdisk
+# http://www.macdisk.com/macsigen.php
+#!:apple	ddskdevi
+!:apple	????devi
+# https://en.wikipedia.org/wiki/Apple_Disk_Image
+!:ext hfs/dmg
 >>>0		beshort			0x4C4B	(bootable)
 #>>>0		beshort			0x0000	(not bootable)
 >>>0x40a	beshort			&0x8000	(locked)

+ 21 - 0
magic/Magdir/microfocus

@@ -0,0 +1,21 @@
+
+#------------------------------------------------------------------------------
+# $File: microfocus,v 1.1 2016/02/09 01:22:49 christos Exp $
+# Micro Focus COBOL data files. 
+
+# http://documentation.microfocus.com/help/index.jsp?topic=\
+# %2FGUID-0E0191D8-C39A-44D1-BA4C-D67107BAF784%2FHRFLRHFILE05.html
+# http://www.cobolproducts.com/datafile/data-viewer.html
+# https://github.com/miracle2k/mfcobol-export
+
+0 string \x30\x00\x00\x7C
+>36 string \x00\x3E Micro Focus File with Header (DAT)
+!:mime application/octet-stream
+
+0 string \x30\x7E\x00\x00
+>36 string \x00\x3E Micro Focus File with Header (DAT)
+!:mime application/octet-stream
+
+39 string \x02
+>136 string \x02\x02\x04\x04 Micro Focus Index File (IDX)
+!:mime application/octet-stream

+ 30 - 2
magic/Magdir/misctools

@@ -1,6 +1,6 @@
 
 #-----------------------------------------------------------------------------
-# $File: misctools,v 1.15 2015/04/15 18:29:30 christos Exp $
+# $File: misctools,v 1.16 2016/02/14 15:46:52 christos Exp $
 # misctools:  file(1) magic for miscellaneous UNIX tools.
 #
 0	search/1	%%!!			X-Post-It-Note text
@@ -29,7 +29,35 @@
 0	search/80	.lo\ -\ a\ libtool\ object\ file	libtool object file
 
 # From: Daniel Novotny <dnovotny@redhat.com>
-0	string		MDMP\x93\xA7				MDMP crash report data
+# Update: Joerg Jenderek
+# URL: https://en.wikipedia.org/wiki/Core_dump#User-mode_memory_dumps
+# Reference: https://msdn.microsoft.com/en-us/library/ms680378%28VS.85%29.aspx
+#
+# "Windows Minidump" by TrID
+# ./misctools (version 5.25) labeled the entry as "MDMP crash report data"
+0	string		MDMP					Mini DuMP crash report
+# http://filext.com/file-extension/DMP
+!:mime	application/x-dmp
+!:ext	dmp/mdmp
+# The high-order word is an internal value that is implementation specific.
+# The low-order word is MINIDUMP_VERSION 0xA793
+>4	ulelong&0x0000FFFF	!0xA793				\b, version 0x%4.4x
+# NumberOfStreams 8,9,10,13
+>8	ulelong			x				\b, %d streams
+# StreamDirectoryRva 0x20
+>12	ulelong			!0x20				\b, 0x%8.8x RVA
+# CheckSum 0
+>16	ulelong			!0				\b, CheckSum 0x%8.8x
+# Reserved or TimeDateStamp 
+>20	ledate			x				\b, %s
+# https://msdn.microsoft.com/en-us/library/windows/desktop/ms680519%28v=vs.85%29.aspx
+# Flags MINIDUMP_TYPE enumeration type 0 0x121 0x800
+>24	ulelong			x				\b, 0x%x type
+# >24	ulelong			>0				\b; include
+# >>24	ulelong			&0x00000001			\b data sections,
+# >>24	ulelong			&0x00000020			\b list of unloaded modules,
+# >>24	ulelong			&0x00000100			\b process and thread information,
+# >>24	ulelong			&0x00000800			\b memory information,
 
 # Summary: abook addressbook file
 # Submitted by: Mark Schreiber <mark7@alumni.cmu.edu>

+ 43 - 3
magic/Magdir/modem

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: modem,v 1.6 2015/02/14 17:35:47 christos Exp $
+# $File: modem,v 1.7 2016/01/08 00:56:42 christos Exp $
 # modem:  file(1) magic for modem programs
 #
 # From: Florian La Roche <florian@knorke.saar.de>
@@ -8,8 +8,48 @@
 >29	byte		1			\b, fine resolution
 >29	byte		0			\b, normal resolution
 
-0	short		0x0100		raw G3 data, byte-padded
-0	short		0x1400		raw G3 data
+# Summary: CCITT Group 3 Facsimile in "raw" form (i.e. no header).
+# Modified by: Joerg Jenderek
+# URL: https://de.wikipedia.org/wiki/Fax
+# Reference: http://web.archive.org/web/20020628195336/http://www.netnam.vn/unescocourse/computervision/104.htm
+# GRR: EOL of G3 is too general as it catches also TrueType fonts, Postscript PrinterFontMetric, others
+0	short		0x0100		
+# 16 0-bits near beginning like True Type fonts *.ttf, Postscript PrinterFontMetric *.pfm, FTYPE.HYPERCARD, XFER
+>2	search/9	\0\0		
+# maximal 7 0-bits for pixel sequences or 11 0-bits for EOL in G3
+>2	default		x		
+# skip IRCAM file (VAX big-endian)	./audio
+>>0	belong		!0x0001a364	
+# skip GEM Image data			./images
+>>>2	beshort		!0x0008		
+# look for first keyword of Panorama database *.pan
+>>>>11	search/262	\x06DESIGN	
+# skip Panorama database
+>>>>11	default		x		
+# old Apple DreamWorld DreamGrafix *.3200 with keyword at end of g3 looking files
+>>>>>27118	search/1864	DreamWorld	
+>>>>>27118	default		x		
+# skip MouseTrap/Mt.Defaults with file size 16 found on Golden Orchard Apple II CD Rom
+>>>>>>8		ubequad		!0x2e01010454010203	
+# skip PICTUREH.SML found on Golden Orchard Apple II CD Rom
+>>>>>>>8	ubequad		!0x5dee74ad1aa56394	raw G3 (Group 3) FAX, byte-padded
+# version 5.25 labeled the entry above "raw G3 data, byte-padded"
+!:mime	image/g3fax
+#!:apple	????TIFF
+!:ext	g3
+# unusual image starting with black pixel
+#0	short		0x1300		raw G3 (Group 3) FAX
+0	short		0x1400		
+# 16 0-bits near beginning like PicturePuzzler found on Golden Orchard Apple CD Rom
+>2	search/9	\0\0		
+# maximal 7 0-bits for pixel sequences or 11 0-bits for EOL in G3
+>2	default		x		raw G3 (Group 3) FAX
+# version 5.25 labeled the above entry as "raw G3 data"
+!:mime	image/g3fax
+!:ext	g3
+# unusual image with black pixel near beginning
+#0	short		0x1900		raw G3 (Group 3) FAX
+
 #
 # Magic data for vgetty voice formats
 # (Martin Seine & Marc Eberhard)

+ 132 - 47
magic/Magdir/msdos

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: msdos,v 1.101 2015/08/24 05:08:48 christos Exp $
+# $File: msdos,v 1.106 2016/06/11 00:52:14 christos Exp $
 # msdos:  file(1) magic for MS-DOS files
 #
 
@@ -24,7 +24,11 @@
 100	search/0xffff   say
 >100	regex/c =^[\ \t]{0,10}say\ ['"]			OS/2 REXX batch file text
 
-0	leshort		0x14c	MS Windows COFF Intel 80386 object file
+# updated by Joerg Jenderek at Oct 2015
+# https://de.wikipedia.org/wiki/Common_Object_File_Format
+# http://www.delorie.com/djgpp/doc/coff/filhdr.html
+# ./intel already labeled COFF type 0x14c=0514 as "80386 COFF executable"
+#0	leshort		0x14c	MS Windows COFF Intel 80386 object file
 #>4	ledate		x	stamp %s
 0	leshort		0x166	MS Windows COFF MIPS R4000 object file
 #>4	ledate		x	stamp %s
@@ -405,8 +409,31 @@
 #>>10	string				x			%-.8s
 #>4	uleshort&0x4000			0x4000			\b,control strings-support)
 
-# test too generic ?
-0	byte		0x8c		DOS executable (COM)
+# updated by Joerg Jenderek
+# GRR: line below too general as it catches also 
+# rt.lib DYADISKS.PIC and many more
+# start with assembler instruction MOV
+0	ubyte		0x8c		
+# skip "AppleWorks word processor data" like ARTICLE.1 ./apple
+>4	string			!O====	
+# skip some unknown basic binaries like RocketRnger.SHR
+>>5	string			!MAIN	
+# skip "GPG symmetrically encrypted data" ./gnu
+# skip "PGP symmetric key encrypted data" ./pgp 
+# openpgpdefs.h: fourth byte < 14 indicate cipher algorithm type
+>>>4	ubyte			>13	DOS executable (COM, 0x8C-variant)
+# the remaining files should be DOS *.COM executables
+# dosshell.COM	8cc0 2ea35f07 e85211 e88a11 b80058 cd
+# hmload.COM	8cc8 8ec0 bbc02b 89dc 83c30f c1eb04 b4
+# UNDELETE.COM	8cca 2e8916 6503 b430 cd21 8b 2e0200 8b
+# BOOTFIX.COM	8cca 2e8916 9603 b430 cd21 8b 2e0200 8b
+# RAWRITE3.COM	8cca 2e8916 d602 b430 cd21 8b 2e0200 8b
+# SHARE.COM	8cca 2e8916 d602 b430 cd21 8b 2e0200 8b
+# validchr.COM	8cca 2e8916 9603 b430 cd21 8b 2e028b1e
+# devload.COM	8cca 8916ad01 b430 cd21 8b2e0200 892e
+!:mime	application/x-dosexec
+!:ext com
+
 # updated by Joerg Jenderek at Oct 2008
 0	ulelong		0xffff10eb	DR-DOS executable (COM)
 # byte 0xeb conflicts with "sequent" magic leshort 0xn2eb
@@ -418,23 +445,41 @@
 >>4	string		\ $ARX		DOS executable (COM), ARX self-extracting archive
 >>4	string		\ $LHarc	DOS executable (COM), LHarc self-extracting archive
 >>0x20e string		SFX\ by\ LARC	DOS executable (COM), LARC self-extracting archive
-# updated by Joerg Jenderek at Oct 2008
-#0	byte		0xb8		COM executable
-0	uleshort&0x80ff	0x00b8		
+# updated by Joerg Jenderek at Oct 2008,2015
+# following line is too general
+0	ubyte		0xb8		
+# skip 2 linux kernels like memtest.bin with "\xb8\xc0\x07\x8e" in ./linux
+>0	string		!\xb8\xc0\x07\x8e	
 # modified by Joerg Jenderek
->1	lelong		!0x21cd4cff	COM executable for DOS
+# syslinux COM32 or COM32R executable
+>>1	lelong&0xFFFFFFFe 0x21CD4CFe	COM executable (32-bit COMBOOT
+# http://www.syslinux.org/wiki/index.php/Comboot_API
+# Since version 5.00 c32 modules switched from the COM32 object format to ELF
+!:mime	application/x-c32-comboot-syslinux-exec
+!:ext c32
 # http://syslinux.zytor.com/comboot.php
+# older syslinux version ( <4 )
 # (32-bit COMBOOT) programs *.C32 contain 32-bit code and run in flat-memory 32-bit protected mode
 # start with assembler instructions mov eax,21cd4cffh
-0	uleshort&0xc0ff	0xc0b8		
->1	lelong		0x21cd4cff	COM executable (32-bit COMBOOT)
+>>>1	lelong		0x21CD4CFf	\b)
 # syslinux:doc/comboot.txt
 # A COM32R program must start with the byte sequence B8 FE 4C CD 21 (mov
 # eax,21cd4cfeh) as a magic number.
-0       string/b	\xb8\xfe\x4c\xcd\x21	COM executable (COM32R)
-# start with assembler instructions mov eax,21cd4cfeh
-0	uleshort&0xc0ff	0xc0b8		
->1	lelong		0x21cd4cfe	COM executable (32-bit COMBOOT, relocatable)
+# syslinux version (4.x)
+# "COM executable (COM32R)" or "Syslinux COM32 module" by TrID
+>>>1	lelong		0x21CD4CFe	\b, relocatable)
+# remaining are DOS COM executables starting with assembler instruction MOV
+# like FreeDOS BANNER*.COM FINDDISK.COM GIF2RAW.COM WINCHK.COM
+# MS-DOS SYS.COM RESTART.COM
+# SYSLINUX.COM (version 1.40 - 2.13)
+# GFXBOOT.COM (version 3.75)
+# COPYBS.COM POWEROFF.COM INT18.COM
+>>1	default	x			COM executable for DOS
+!:mime	application/x-dosexec
+#!:mime	application/x-ms-dos-executable
+#!:mime	application/x-msdos-program
+!:ext com
+
 0	string/b	\x81\xfc		
 >4	string	\x77\x02\xcd\x20\xb9	
 >>36	string	UPX!			FREE-DOS executable (COM), UPX compressed
@@ -588,47 +633,86 @@
 0	string/b	\102\101\050\000\000\000\056\000\000\000\000\000\000\000	Icon for MS Windows
 
 # Windows icons
-0   name    ico-dir
-# not entirely accurate, the number of icons is part of the header
->0  byte    1   - 1 icon
->0  ubyte   >1  - %d icons
->2  byte    0   \b, 256x
->2  byte    !0  \b, %dx
->3  byte    0   \b256
->3  byte    !0  \b%d
->4  ubyte   !0  \b, %d colors
-
+# Update: Joerg Jenderek
+# URL: https://en.wikipedia.org/wiki/CUR_(file_format)
+# Note: similiar to Windows CURsor. container for BMP (only DIB part) or PNG
 0   belong  0x00000100
 >9  byte    0
->>0 byte    x           MS Windows icon resource
-!:mime	image/x-icon
->>4 use     ico-dir
+>>0 byte    x           
+>>0 use     cur-ico-dir
 >9  ubyte   0xff
->>0 byte    x           MS Windows icon resource
-!:mime	image/x-icon
->>4 use     ico-dir
+>>0 byte    x           
+>>0 use     cur-ico-dir
+#	displays number of icons and information for icon or cursor
+0	name		cur-ico-dir
+# skip some Lotus 1-2-3 worksheets, CYCLE.PIC and keep Windows cursors with
+# 1st data offset = dir header size + n * dir entry size = 6 + n * 10h = ?6h
+>18		ulelong		&0x00000006	
+# skip remaining worksheets, because valid only for DIB image (40) or PNG image (\x89PNG)
+>>(18.l)	ulelong		x		MS Windows
+>>>0		ubelong		0x00000100	icon resource
+#!:mime		image/vnd.microsoft.icon
+!:mime		image/x-icon
+!:ext		ico
+>>>>4 		uleshort	x		- %d icon
+# plural s
+>>>>4 		uleshort	>1		\bs
+# 1st icon
+>>>>0x06	use		ico-entry
+# 2nd icon
+>>>>4 		uleshort	>1		
+>>>>>0x16	use		ico-entry
+>>>0		ubelong		0x00000200	cursor resource
+#!:mime		image/x-cur
+!:mime		image/x-win-bitmap
+!:ext		cur
+>>>>4 		uleshort	x		- %d icon
+>>>>4 		uleshort	>1		\bs
+# 1st cursor
+>>>>0x06	use		cur-entry
+#>>>>0x16	use		cur-entry
+#	display information of one cursor entry
+0	name		cur-entry
+>0	use		cur-ico-entry
+>4	uleshort	x	\b, hotspot @%dx
+>6	uleshort	x	\b%d
+#	display information of one icon entry
+0	name		ico-entry
+>0			use	cur-ico-entry
+# normally 0 1 but also found 14
+>4	uleshort	>1	\b, %d planes
+# normally 0 1 but also found some 3, 4, some 6, 8, 24, many 32, two 256
+>6	uleshort	>1	\b, %d bits/pixel
+#	display shared information of cursor or icon entry
+0		name		cur-ico-entry
+>0		byte		=0		\b, 256x
+>0		byte		!0		\b, %dx
+>1		byte        	=0		\b256
+>1		byte        	!0		\b%d
+# number of colors in palette
+>2		ubyte		!0		\b, %d colors
+# reserved 0 FFh
+#>3		ubyte        	x		\b, reserved %x
+#>8		ulelong		x		\b, image size %d
+# offset of PNG or DIB image
+#>12		ulelong		x		\b, offset 0x%x
+# PNG header (\x89PNG)
+>(12.l)		ubelong		=0x89504e47	
+>>&-4		indirect	x	\b with 
+# DIB image
+>(12.l)		ubelong		!0x89504e47	
+#>>&-4		use     	dib-image
 
 # Windows non-animated cursors
-0   name    cur-dir
-# not entirely accurate, the number of icons is part of the header
->0  byte        1   - 1 icon
->0  ubyte       >1  - %d icons
->2  byte        0   \b, 256x
->2  byte        !0  \b, %dx
->3  byte        0   \b256
->3  byte        !0  \b%d
->6  uleshort    x   \b, hotspot @%dx
->8  uleshort    x   \b%d
-
+# Update: Joerg Jenderek
+# URL: https://en.wikipedia.org/wiki/CUR_(file_format)
+# Note: similiar to Windows ICOn. container for BMP ( only DIB part)
+# GRR: line below is too general as it catches also Lotus 1-2-3 files
 0   belong  0x00000200
 >9  byte    0
->>0 byte    x           MS Windows cursor resource
-!:mime image/x-cur
->>4 use     cur-dir
+>>0 use     cur-ico-dir
 >9  ubyte   0xff
->>0 byte    x           MS Windows cursor resource
-!:mime image/x-cur
->>4 use     cur-dir
+>>0 use     cur-ico-dir
 
 # .chr files
 0	string/b	PK\010\010BGI	Borland font 
@@ -869,6 +953,7 @@
 
 # Windows Imaging (WIM) Image
 0	string/b	MSWIM\000\000\000	Windows imaging (WIM) image
+0	string/b	WLPWM\000\000\000	Windows imaging (WIM) image, wimlib pipable format
 
 # The second byte of these signatures is a file version; I don't know what, 
 # if anything, produced files with version numbers 0-2.

+ 27 - 6
magic/Magdir/msvc

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: msvc,v 1.5 2009/09/19 16:28:11 christos Exp $
+# $File: msvc,v 1.6 2016/01/26 00:03:19 christos Exp $
 # msvc:  file(1) magic for msvc
 # "H. Nanosecond" <aldomel@ix.netcom.com>
 # Microsoft visual C
@@ -27,11 +27,32 @@
 #.pch
 0	string	DTJPCH0\000\022\103\006\200	Microsoft Visual C .pch
 
-# .pdb
-# too long 0	string	Microsoft\ C/C++\ program\ database\ 
-0	string	Microsoft\ C/C++\ 	MSVC program database
->18	string	program\ database\ 	
->33	string	>\0	ver %s
+# Summary: Symbol Table / Debug info used by Microsoft compilers
+# URL: https://en.wikipedia.org/wiki/Program_database
+# Reference: https://code.google.com/p/pdbparser/wiki/MSF_Format
+# Update: Joerg Jenderek 
+# Note:	test only for Windows XP+SP3 x86 , 8.1 x64 arm and 10.1 x86
+#	info does only applies partly for older files like msvbvm50.pdb about year 2001
+0	string	Microsoft\ C/C++\ 	
+# "Microsoft Program DataBase" by TrID
+>24	search/14	\r\n\x1A	MSVC program database
+!:mime	application/x-ms-pdb
+!:ext	pdb
+# "MSF 7.00" "program database 2.00" for msvbvm50.pdb
+>>16	regex	\([0-9.]+\)	ver %s
+#>>>0x38	search/128123456	/LinkInfo	\b with linkinfo
+# "MSF 7.00" variant
+>>0x1e	leshort	0	
+# PageSize 400h 1000h
+>>>0x20	lelong	x	\b, %d
+# Page Count
+>>>0x28	lelong	x	\b*%d bytes
+# "program database 2.00"  variant
+>>0x1e	leshort	!0	
+# PageSize 400h
+>>>0x2c	lelong	x	\b, %d
+# Page Count for msoo-dll.pdb 4379h
+>>>0x32	leshort	x	\b*%d bytes
 
 #.sbr
 0	string	\000\002\000\007\000	MSVC .sbr

+ 134 - 80
magic/Magdir/msx

@@ -1,26 +1,26 @@
 
 #------------------------------------------------------------------------------
 # msx:  file(1) magic for the MSX Home Computer
-# v1.1
+# v1.3
 # Fabio R. Schmidlin <sd-snatcher@users.sourceforge.net>
 
 ############## MSX Music file formats ##############
 
 # Gigamix MGSDRV music file
-0	string		MGS	MSX Gigamix MGSDRV3 music file, 
+0	string/b		MGS	MSX Gigamix MGSDRV3 music file, 
 >6	ubeshort	0x0D0A
 >>3	byte		x	\bv%c
 >>4	byte		x	\b.%c
 >>5	byte		x	\b%c
 >>8	string		>\0	\b, title: %s
 
-1	string		mgs2\ 	MSX Gigamix MGSDRV2 music file
+1	string/b		mgs2\ 	MSX Gigamix MGSDRV2 music file
 >6	uleshort	0x80
 >>0x2E	uleshort	0
 >>>0x30	string		>\0	\b, title: %s
 
 # KSS music file
-0	string		KSCC	KSS music file v1.03
+0	string/b		KSCC	KSS music file v1.03
 >0xE	byte		0
 >>0xF	byte&0x02	0	\b, soundchips: AY-3-8910, SCC(+)
 >>0xF	byte&0x02	2	\b, soundchip(s): SN76489
@@ -28,7 +28,7 @@
 >>0xF	byte&0x01	1	\b, YM2413
 >>0xF	byte&0x08	8	\b, Y8950
 
-0	string		KSSX	KSS music file v1.20
+0	string/b		KSSX	KSS music file v1.20
 >0xE	byte&0xEF	0
 >>0xF	byte&0x40	0x00	\b, 60Hz
 >>0xF	byte&0x40	0x40	\b, 50Hz
@@ -42,11 +42,11 @@
 >>0xF	byte&0x18	0x10	\b, Majyutsushi DAC
 
 # Moonblaster for Moonsound
-0	string		MBMS
+0	string/b		MBMS
 >4	byte		0x10	MSX Moonblaster for MoonSound music
 
 # Music Player K-kaz
-0	string		MPK	MSX Music Player K-kaz song
+0	string/b		MPK	MSX Music Player K-kaz song
 >6	ubeshort	0x0D0A
 >>3	byte		x	v%c
 >>4	byte		x	\b.%c
@@ -70,7 +70,7 @@
 >>>>>0	string		>\32		\b, title: %s
 
 # SCMD music file
-0x8B	string		SCMD
+0x8B	string/b		SCMD
 >0xCE	uleshort	0	MSX SCMD Music file
 #>>-2	uleshort	0x6a71	; The file must end with this value. How to code this here?
 >>0x8F	string		>\0		\b, title: %s
@@ -99,36 +99,8 @@
 >>5	uleshort	0
 >>>3	uleshort	>0x013D		MSX Graph Saurus compressed image
 
-# Maki-chan Graphic format
-0	string		MAKI02\ \ 	Maki-chan image,
->8	byte		x		system ID: %c
->9	byte		x		\b%c
->10	byte		x		\b%c
->11	byte		x		\b%c,
->13	search/0x200	\x1A
-# >>&3	ubyte		0		, video mode: PC-98 400 lines, 16 analog colors
-# >>&3	ubyte		1		, video mode: MSX SC7, 16 analog colors
-# >>&3	ubyte		2		, video mode: VM-98 400 lines, 8 analog colors
-# >>&3	ubyte		3		, video mode: PC-88 analog, 200 lines, 8 analog colors
-# >>&3	ubyte		4		, video mode: 400 lines, 16 digital colors
-# >>&3	ubyte		5		, video mode: 200 lines, 16 digital colors
-# >>&3	ubyte		6		, video mode: old PC-98 digital 400 lines, 8 colors
-# >>&3	ubyte		7		, video mode: PC-88 400 lines, 8 digital colors
->>&8	uleshort+1	x		%dx
->>&10	uleshort+1	x		\b%d,
->>&3	ubyte&0x82	0x80		256 colors
->>&3	ubyte&0x82	0x00		16 colors
->>&3	ubyte&0x82	0x01		8 colors
->>&3	ubyte&0x04	4		digital
->>&3	ubyte&0x04	0		analog
->>&3	ubyte&0x01	1		\b, 2:1 dot aspect ratio
-
-# Japanese PIC file
-0	string		PIC\x1A
->4	lelong		0		Japanese PIC image file
-
 # MSX G9B image file
-0	string		G9B
+0	string/b		G9B
 >1	uleshort	11
 >>3	uleshort	>10
 >>>5	ubyte		>0		MSX G9B image, depth=%d
@@ -146,70 +118,152 @@
 
 ############## Other MSX file formats ##############
 
-# MSX ROMs
-0	string		AB
+# MSX internal ROMs
+0		ubeshort	0xF3C3
+>2		uleshort	<0x4000
+>>8		ubyte		0xC3
+>>>9		uleshort	<0x4000
+>>>>0x0B	ubeshort	0x00C3
+>>>>>0x0D	uleshort	<0x4000
+>>>>>>0x0F	ubeshort	0x00C3
+>>>>>>>0x11	uleshort	<0x4000
+>>>>>>>>0x13	ubeshort	0x00C3
+>>>>>>>>>0x15	uleshort	<0x4000
+>>>>>>>>>>0x50	ubyte		0xC3
+>>>>>>>>>>>0x51	uleshort	<0x4000
+>>>>>>>>>>>>(9.s)	ubyte	0xC3
+>>>>>>>>>>>>>&0	uleshort	>0x4000
+>>>>>>>>>>>>>>&0	ubyte	0xC3		MSX BIOS+BASIC
+>>>>>>>>>>>>>>>0x002D	ubyte+1	<3		\b. version=MSX%d
+>>>>>>>>>>>>>>>0x002D	ubyte	2		\b, version=MSX2+
+>>>>>>>>>>>>>>>0x002D	ubyte	3		\b, version=MSX Turbo-R
+>>>>>>>>>>>>>>>0x002D	ubyte	>3		\b, version=Unknown MSX %d version
+>>>>>>>>>>>>>>>0x0006	ubyte	x		\b, VDP.DR=0x%2x
+>>>>>>>>>>>>>>>0x0007	ubyte	x		\b, VDP.DW=0x%2x
+>>>>>>>>>>>>>>>0x002B	ubyte&0xF	0		\b, charset=Japanese
+>>>>>>>>>>>>>>>0x002B	ubyte&0xF	1		\b, charset=International
+>>>>>>>>>>>>>>>0x002B	ubyte&0xF	2		\b, charset=Korean
+>>>>>>>>>>>>>>>0x002B	ubyte&0xF	>2		\b, charset=Unknown id:%d
+>>>>>>>>>>>>>>>0x002B	ubyte&0x70	0x00		\b, date format=Y-M-D
+>>>>>>>>>>>>>>>0x002B	ubyte&0x70	0x10		\b, date format=M-D-Y
+>>>>>>>>>>>>>>>0x002B	ubyte&0x70	0x20		\b, date format=D-M-Y
+>>>>>>>>>>>>>>>0x002B	ubyte&0x80	0x00		\b, vfreq=60Hz
+>>>>>>>>>>>>>>>0x002B	ubyte&0x80	0x80		\b, vfreq=50Hz
+>>>>>>>>>>>>>>>0x002C	ubyte&0x0F	0		\b, keyboard=Japanese
+>>>>>>>>>>>>>>>0x002C	ubyte&0x0F	1		\b, keyboard=International
+>>>>>>>>>>>>>>>0x002C	ubyte&0x0F	2		\b, keyboard=French
+>>>>>>>>>>>>>>>0x002C	ubyte&0x0F	3		\b, keyboard=UK
+>>>>>>>>>>>>>>>0x002C	ubyte&0x0F	4		\b, keyboard=German
+>>>>>>>>>>>>>>>0x002C	ubyte&0x0F	5		\b, keyboard=Unknown id:%d
+>>>>>>>>>>>>>>>0x002C	ubyte&0x0F	6		\b, keyboard=Spanish
+>>>>>>>>>>>>>>>0x002C	ubyte&0x0F	>6		\b, keyboard=Unknown id:%d
+>>>>>>>>>>>>>>>0x002C	ubyte&0xF0	0x00		\b, basic=Japanese
+>>>>>>>>>>>>>>>0x002C	ubyte&0xF0	0x10		\b, basic=International
+>>>>>>>>>>>>>>>0x002C	ubyte&0xF0	>0x10		\b, basic=Unknown id:%d
+>>>>>>>>>>>>>>>0x002E	ubyte&1		1		\b, built-in MIDI
+
+
+0		string/b		CD
+>2		uleshort	>0x10
+>>2		uleshort	<0x4000
+>>>4		uleshort	<0x4000
+>>>>6		uleshort	<0x4000
+>>>>>8		ubyte		0xC3
+>>>>>>9		uleshort	<0x4000
+>>>>>>>0x10	ubyte		0xC3
+>>>>>>>>0x11	uleshort	<0x4000
+>>>>>>>>>0x14	ubyte		0xC3
+>>>>>>>>>>0x15	uleshort	<0x4000		MSX2/2+/TR SubROM
+
+0		string		\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0
+>0x5F0		ubequad		0x8282828244380000
+>>0x150		ubyte		0x38
+>>>0x170	string		\20\20\20
+>>>>0x1E32	string		())
+>>>>>0x2130	ubequad		0xA5A5594924231807
+>>>>>0x2138	ubequad		0x4A4A3424488830C0	MSX Kanji Font
+
+
+
+# MSX extension ROMs
+0	string/b		AB
 >2	uleshort	0x0010			MSX ROM
 >>2	uleshort	x			\b, init=0x%4x
->>4	uleshort	>0			\b, stat=0x%4x
->>6	uleshort	>0			\b, dev=0x%4x
+>>4	uleshort	>0			\b, stahdl=0x%4x
+>>6	uleshort	>0			\b, devhdl=0x%4x
 >>8	uleshort	>0			\b, bas=0x%4x
 >2	uleshort	0x4010			MSX ROM
 >>2	uleshort	x			\b, init=0x%04x
->>4	uleshort	>0			\b, stat=0x%04x
->>6	uleshort	>0			\b, dev=0x%04x
+>>4	uleshort	>0			\b, stahdl=0x%04x
+>>6	uleshort	>0			\b, devhdl=0x%04x
 >>8	uleshort	>0			\b, bas=0x%04x
 >2	uleshort	0x8010			MSX ROM
 >>2	uleshort	x			\b, init=0x%04x
->>4	uleshort	>0			\b, stat=0x%04x
->>6	uleshort	>0			\b, dev=0x%04x
+>>4	uleshort	>0			\b, stahdl=0x%04x
+>>6	uleshort	>0			\b, devhdl=0x%04x
 >>8	uleshort	>0			\b, bas=0x%04x
+0	string/b		AB\0\0
+>6	uleshort	0
+>>4	uleshort	>0x400F			MSX-BASIC extension ROM
+>>>4	uleshort	>0			\b, stahdl=0x%04x
+>>>6	uleshort	>0			\b, devhdl=0x%04x
+>>>0x1C		string		OPLL			\b, MSX-Music
+>>>>0x18	string		PAC2			\b (external)
+>>>>0x18	string		APRL			\b (internal)
+
+0	string/b		AB\0\0\0\0
+>6	uleshort	>0x400F			MSX device BIOS
+>>6	uleshort	>0			\b, devhdl=0x%04x
+
 
-0	string		AB
+0	string/b		AB
 #>2	string		5JSuperLAYDOCK		MSX Super Laydock ROM
 #>3	string		@HYDLIDE3MSX		MSX Hydlide-3 ROM
 #>3	string		@3\x80IA862		Golvellius MSX1 ROM
->2	uleshort	>10
->>10	string		\0\0\0\0\0\0		MSX ROM
->>>0x10	string		YZ\0\0\0\0		Konami Game Master 2 MSX ROM
->>>0x10	string		CD			\b, Konami RC-
->>>>0x12	ubyte		x			\b%d
->>>>0x13	ubyte/16	x			\b%d
->>>>0x13	ubyte&0xF	x			\b%d
->>>0x10	string		EF			\b, Konami RC-
->>>>0x12	ubyte		x			\b%d
->>>>0x13	ubyte/16	x			\b%d
->>>>0x13	ubyte&0xF	x			\b%d
->>>2	uleshort	x			\b, init=0x%04x
->>>4	uleshort	>0			\b, stat=0x%04x
->>>6	uleshort	>0			\b, dev=0x%04x
->>>8	uleshort	>0			\b, bas=0x%04x
->2	uleshort	0
->>4	uleshort	0
->>>6	uleshort	0
->>>>8	uleshort	>0			MSX BASIC program in ROM, bas=0x%04x
-
-0x4000	string		AB
->0x4002	uleshort	>0x4010
->>0x400A	string		\0\0\0\0\0\0	MSX MegaROM with nonstandard page order
+>2	uleshort	>15
+>>2	uleshort	<0xC000
+>>>8	string		\0\0\0\0\0\0\0\0
+>>>>(2.s&0x3FFF)	uleshort	>0		MSX ROM
+>>>>>0x10	string		YZ\0\0\0\0		Konami Game Master 2 MSX ROM
+>>>>>0x10	string		CD			\b, Konami RC-
+>>>>>>0x12	ubyte		x			\b%d
+>>>>>>0x13	ubyte/16	x			\b%d
+>>>>>>0x13	ubyte&0xF	x			\b%d
+>>>>>0x10	string		EF			\b, Konami RC-
+>>>>>>0x12	ubyte		x			\b%d
+>>>>>>0x13	ubyte/16	x			\b%d
+>>>>>>0x13	ubyte&0xF	x			\b%d
+>>>>>2	uleshort	x			\b, init=0x%04x
+>>>>>4	uleshort	>0			\b, stahdl=0x%04x
+>>>>>6	uleshort	>0			\b, devhdl=0x%04x
+>>>>>8	uleshort	>0			\b, bas=0x%04x
+>>>2	uleshort	0
+>>>>4	uleshort	0
+>>>>>6	uleshort	0
+>>>>>>8	uleshort	>0			MSX BASIC program in ROM, bas=0x%04x
+
+0x4000	string/b		AB
+>0x4002	uleshort	>0x400F
+>>0x400A	string		\0\0\0\0\0\0	MSX ROM with nonstandard page order
 >>0x4002	uleshort	x			\b, init=0x%04x
->>0x4004	uleshort	>0			\b, stat=0x%04x
->>0x4006	uleshort	>0			\b, dev=0x%04x
+>>0x4004	uleshort	>0			\b, stahdl=0x%04x
+>>0x4006	uleshort	>0			\b, devhdl=0x%04x
 >>0x4008	uleshort	>0			\b, bas=0x%04x
 
-0x8000	string		AB
->0x8002	uleshort	>0x4010
->>0x800A	string		\0\0\0\0\0\0	MSX MegaROM with nonstandard page order
+0x8000	string/b		AB
+>0x8002	uleshort	>0x400F
+>>0x800A	string		\0\0\0\0\0\0	MSX ROM with nonstandard page order
 >>0x8002	uleshort	x			\b, init=0x%04x
->>0x8004	uleshort	>0			\b, stat=0x%04x
->>0x8006	uleshort	>0			\b, dev=0x%04x
+>>0x8004	uleshort	>0			\b, stahdl=0x%04x
+>>0x8006	uleshort	>0			\b, devhdl=0x%04x
 >>0x8008	uleshort	>0			\b, bas=0x%04x
 
 
-0x3C000	string		AB
+0x3C000	string/b		AB
 >0x3C008	string		\0\0\0\0\0\0\0\0	MSX MegaROM with nonstandard page order
 >>0x3C002	uleshort	x			\b, init=0x%04x
->>0x3C004	uleshort	>0			\b, stat=0x%04x
->>0x3C006	uleshort	>0			\b, dev=0x%04x
+>>0x3C004	uleshort	>0			\b, stahdl=0x%04x
+>>0x3C006	uleshort	>0			\b, devhdl=0x%04x
 >>0x3C008	uleshort	>0			\b, bas=0x%04x
 
 # MSX BIN file
@@ -224,7 +278,7 @@
 >>1	uleshort	>0x8000			MSX-BASIC program
 
 # MSX .CAS file
-0	string	\x1F\xA6\xDE\xBA\xCC\x13\x7D\x74	MSX cassette archive
+0	string/b	\x1F\xA6\xDE\xBA\xCC\x13\x7D\x74	MSX cassette archive
 
 # Mega-Assembler file
 0	byte		0xFE

+ 18 - 1
magic/Magdir/netbsd

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: netbsd,v 1.22 2014/12/08 20:53:52 christos Exp $
+# $File: netbsd,v 1.23 2015/11/29 01:55:14 christos Exp $
 # netbsd:  file(1) magic for NetBSD objects
 #
 # All new-style magic numbers are in network byte order.
@@ -286,3 +286,20 @@
 >4	leshort	x			\b, (headersize = %d
 >6	leshort	x			\b, segmentsize = %d
 >6	lelong	x			\b, segments = %d)
+
+# little endian only for now.
+0	name		ktrace
+>4	leshort		7
+>>6	leshort		<3		NetBSD ktrace file version %d
+>>>12	string		x		from %s
+>>>56	string		x		\b, emulation %s
+>>>8	lelong		<65536		\b, pid=%d
+
+56	string		netbsd
+>0	use		ktrace
+56	string		linux
+>0	use		ktrace
+56	string		sunos
+>0	use		ktrace
+56	string		hpux
+>0	use		ktrace

+ 2 - 2
magic/Magdir/os2

@@ -1,12 +1,12 @@
 
 #------------------------------------------------------------------------------
-# $File: os2,v 1.8 2015/01/05 00:17:13 christos Exp $
+# $File: os2,v 1.9 2016/05/11 15:51:57 christos Exp $
 # os2:  file(1) magic for OS/2 files
 #
 
 # Provided 1998/08/22 by
 # David Mediavilla <davidme.news@REMOVEIFNOTSPAMusa.net>
-1	search/1	InternetShortcut	MS Windows 95 Internet shortcut text
+1	search/100	InternetShortcut	MS Windows 95 Internet shortcut text
 >17	search/100	URL= 			(URL=<
 >>&0	string		x			\b%s>)
 

+ 24 - 0
magic/Magdir/pc88

@@ -0,0 +1,24 @@
+#------------------------------------------------------------------------------
+# pc88:  file(1) magic for the NEC Home Computer
+# v1.0
+# Fabio R. Schmidlin <sd-snatcher@users.sourceforge.net>
+
+# PC88 2D disk image
+0x20		ulelong&0xFFFFFEFF	0x2A0
+>0x10		string		\0\0\0\0\0\0\0\0\0\0
+>>0x280		string		\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0
+>>>0x1A		ubyte&0xEF	0
+>>>>0x1B	ubyte&0x8F	0
+>>>>>0x1B	ubyte&70	<0x40	
+>>>>>>0x1C	ulelong	>0x21
+>>>>>>>0		regex	[[:print:]]*	NEC PC-88 disk image, name=%s
+>>>>>>>>0x1B	ubyte	0	\b, media=2D
+>>>>>>>>0x1B	ubyte	0x10	\b, media=2DD
+>>>>>>>>0x1B	ubyte	0x20	\b, media=2HD
+>>>>>>>>0x1B	ubyte	0x30	\b, media=1D
+>>>>>>>>0x1B	ubyte	0x40	\b, media=1DD
+>>>>>>>>0x1A	ubyte	0x10	\b, write-protected
+
+
+
+

+ 77 - 0
magic/Magdir/pc98

@@ -0,0 +1,77 @@
+#------------------------------------------------------------------------------
+# pc98:  file(1) magic for the MSX Home Computer
+# v1.0
+# Fabio R. Schmidlin <sd-snatcher@users.sourceforge.net>
+
+# Maki-chan v1 Graphic format
+# The image resolution should be X=(44.L - 40.L) and Y=(46.L - 42.L), but I couldn't find a way to do so
+# http://www.jisyo.com/viewer/faq/maki_tech.htm
+0	string/b		MAKI01 	Maki-chan v1.
+>6	ubyte|0x20	x		\b%c image
+>8	ubelong		>0x40404040	\b, system ID: 
+>>8	byte		x		%c
+>>9	byte		x		\b%c
+>>10	byte		x		\b%c
+>>11	byte		x		\b%c
+>44	ubeshort	x		\b, %dx
+>46	ubeshort	x		\b%d
+>38	ubeshort&2	0		\b, 16 paletted RGB colors
+>38	ubeshort&2	2		\b, 8 fixed RGB colors
+>38	ubeshort&1	1		\b, 2:1 dot aspect ratio
+
+# Maki-chan v2 Graphic format
+# http://www.jisyo.com/viewer/faq/mag_tech.htm
+# http://mooncore.eu/bunny/txt/makichan.htm
+# http://metanest.jp/mag/mag.xhtml
+0	string/b		MAKI02\ \ 	Maki-chan v2 image,
+>8	byte		x		system ID: %c
+>9	byte		x		\b%c
+>10	byte		x		\b%c
+>11	byte		x		\b%c,
+>13	search/0x200	\x1A
+#Maki-chan video modes are a bit messy and seems to have been expanded over the years without too much planing:
+#1) When offset1(ubeshort) !=0x0344:
+# 1.1) And  offset3(ubyte).b7=0:
+# - b0=pixel aspect ratio: 1=2:1   (note: this ignores that the machine's 1:1 pixel aspect ratio isn't really 1:1)
+# - b1=number of colors: 0=16 colors, 1=8 colors
+# - b2=Palette or fixed colors flag (called "analog" and "digital" in the doc): 0=Paletted, 1=Fixed colors encoded directly in the pixel data
+# 1.2) And  offset3(ubyte).B7=1:
+# - b0=256 paletted colors
+# - b1=256 fixed colors using the MSX SCR8 palette
+#2) When offset1(ubeshort) =0x0344:
+# - 256x212 image with 19268 YJK colors. The usual resolution and color information fields from the file must be ignored
+>>&1	ubeshort	0x0344		256x212, 19268 fixed YJK colors
+>>&1	ubeshort	!0x0344
+>>>&5	uleshort+1	x		%dx
+>>>&7	uleshort+1	x		\b%d,
+>>>&0	ubyte&0x86	0x00		16 paletted RGB colors
+>>>&0	ubyte&0x86	0x02		8 paletted RGB colors
+>>>&0	ubyte&0x86	0x04		16 fixed RGB colors
+>>>&0	ubyte&0x86	0x06		8 fixed RGB colors
+>>>&0	ubyte&0x81	0x80		256 paletted RGB colors
+>>>&0	ubyte&0x81	0x81		256 fixed MSX-SCR8 colors
+>>>&0	ubyte&0x01	1		\b, 2:1 dot aspect ratio
+
+# XLD4 (Q4) picture
+11	string/b	MAJYO		XLD4(Q4) picture
+
+# Yanagisawa Pi picture
+#0	string		Pi\x1A\0	Yanagisawa Pi picture
+#>3	search/0x200	\x04
+0	string		Pi
+>2	search/0x200	\x1A
+>>&0	ubyte		0
+>>>&3	ubyte		4		Yanagisawa Pi 16 color picture,
+>>>&4	byte		x		system ID: %c
+>>>&5	byte		x		\b%c
+>>>&6	byte		x		\b%c
+>>>&7	byte		x		\b%c,
+>>>&10	ubeshort	x		%dx
+>>>&12	ubeshort	x		\b%d
+>>>&3	ubyte		8		Yanagisawa Pi 256 color picture
+>>>&4	byte		x		system ID: %c
+>>>&5	byte		x		\b%c
+>>>&6	byte		x		\b%c
+>>>&7	byte		x		\b%c,
+>>>&10	ubeshort	x		%dx
+>>>&12	ubeshort	x		\b%d

+ 4 - 4
magic/Magdir/perl

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: perl,v 1.24 2015/03/27 17:58:58 christos Exp $
+# $File: perl,v 1.25 2016/06/07 23:28:37 rrt Exp $
 # perl:  file(1) magic for Larry Wall's perl language.
 #
 # The `eval' lines recognizes an outrageously clever hack.
@@ -23,11 +23,11 @@
 !:mime	text/x-perl
 0	search/1024	eval\ '(exit\ $?0)'\ &&\ eval\ 'exec	Perl script text
 !:mime	text/x-perl
-0	search/1024	#!/usr/bin/env\ perl	Perl script text executable
+0	string	#!/usr/bin/env\ perl	Perl script text executable
 !:mime	text/x-perl
-0	search/1024	#!\ /usr/bin/env\ perl	Perl script text executable
+0	string	#!\ /usr/bin/env\ perl	Perl script text executable
 !:mime	text/x-perl
-0	search/1024	#!
+0	string	#!
 >0	regex	\^#!.*/bin/perl([[:space:]].*)*$	Perl script text executable
 !:mime	text/x-perl
 

+ 23 - 0
magic/Magdir/polyml

@@ -0,0 +1,23 @@
+
+#------------------------------------------------------------------------------
+# $File: polyml,v 1.1 2016/02/26 15:52:45 christos Exp $
+# polyml:  file(1) magic for PolyML
+#
+# PolyML
+# MPEG, FLI, DL originally from vax@ccwf.cc.utexas.edu (VaX#n8)
+# FLC, SGI, Apple originally from Daniel Quinlan (quinlan@yggdrasil.com)
+
+# [0]: http://www.polyml.org/
+# [1]: https://github.com/polyml/polyml/blob/master/\
+#	libpolyml/savestate.cpp#L146-L147
+# [2]: https://github.com/polyml/polyml/blob/master/\
+#	libpolyml/savestate.cpp#L1262-L1263
+
+# Type: Poly/ML saved data
+# From: Matthew Fernandez <matthew.fernandez@gmail.com>
+
+0	string	POLYSAVE	Poly/ML saved state
+>8	long	x		version %u
+
+0	string  POLYMODU	Poly/ML saved module
+>8	long	x		version %u

+ 10 - 4
magic/Magdir/psdbms

@@ -1,8 +1,14 @@
 
 #------------------------------------------------------------------------------
-# $File: psdbms,v 1.6 2009/09/19 16:28:11 christos Exp $
+# $File: psdbms,v 1.7 2016/01/08 00:41:02 christos Exp $
 # psdbms:  file(1) magic for psdatabase
 #
-0	belong&0xff00ffff	0x56000000	ps database
->1	string	>\0	version %s
->4	string	>\0	from kernel %s
+# Update: Joerg Jenderek
+# GRR: line below too general as it catches also some Panorama database *.pan ,
+# AppleWorks word processor
+0	belong&0xff00ffff	0x56000000	
+# assume version starts with digit
+>1	regex/s			=^[0-9]		ps database
+>>1	string	>\0	version %s
+# kernel name
+>>4	string	>\0	from kernel %s

+ 5 - 5
magic/Magdir/python

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: python,v 1.27 2015/09/08 13:59:44 christos Exp $
+# $File: python,v 1.28 2015/09/16 22:19:54 christos Exp $
 # python:  file(1) magic for python
 #
 # Outlook puts """ too for urgent messages
@@ -26,16 +26,16 @@
 0	belong		0xee0c0d0a	python 3.4 byte-compiled
 
 0	search/1/w	#!\ /usr/bin/python	Python script text executable
-!:strength + 10
+!:strength + 15
 !:mime text/x-python
 0	search/1/w	#!\ /usr/local/bin/python	Python script text executable
-!:strength + 10
+!:strength + 15
 !:mime text/x-python
 0	search/1	#!/usr/bin/env\ python	Python script text executable
-!:strength + 10
+!:strength + 15
 !:mime text/x-python
 0	search/10	#!\ /usr/bin/env\ python	Python script text executable
-!:strength + 10
+!:strength + 15
 !:mime text/x-python
 
 

+ 22 - 5
magic/Magdir/sendmail

@@ -1,14 +1,31 @@
 
 #------------------------------------------------------------------------------
-# $File: sendmail,v 1.7 2009/09/19 16:28:12 christos Exp $
+# $File: sendmail,v 1.8 2015/11/11 15:27:03 christos Exp $
 # sendmail:  file(1) magic for sendmail config files
 #
 # XXX - byte order?
 #
-0	byte	046	  Sendmail frozen configuration 
->16	string	>\0	  - version %s
-0	short	0x271c	  Sendmail frozen configuration
->16	string	>\0	  - version %s
+# Update: Joerg Jenderek 
+# GRR: this test is too general as it catches also
+# READ.ME.FIRST.AWP Sendmail frozen configuration
+# - version ====|====|====|====|====|====|====|====|====|====|====|====|===
+# Email_23_f217153422.ts Sendmail frozen configuration
+# - version \330jK\354
+0	byte	046	  
+# http://www.sendmail.com/sm/open_source/docs/older_release_notes/
+# freezed configuration file (dbm format?) created from sendmal.cf with -bz
+# by older sendmail. til version 8.6 support for frozen configuration files is removed
+# valid version numbers look like "7.14.4" and should be simliar to output of commands
+# "sendmail -d0 -bt < /dev/null |grep -i Version" or "egrep '^DZ' /etc/sendmail.cf" 
+>16	regex/s	=^[0-78][0-9.]{4}	Sendmail frozen configuration
+# normally only /etc/sendmail.fc or /var/adm/sendmail/sendmail.fc
+!:ext fc
+>>16	string	>\0			- version %s
+0	short	0x271c	  
+# look for valid version number
+>16	regex/s	=^[0-78][0-9.]{4}	Sendmail frozen configuration
+!:ext fc
+>>16	string	>\0			- version %s
 
 #------------------------------------------------------------------------------
 # sendmail:  file(1) magic for sendmail m4(1) files

+ 6 - 6
magic/Magdir/sgml

@@ -1,4 +1,4 @@
-#------------------------------------------------------------------------------ # $File: sgml,v 1.32 2015/07/11 15:08:53 christos Exp $
+#------------------------------------------------------------------------------ # $File: sgml,v 1.33 2015/11/29 22:14:49 christos Exp $
 # Type:	SVG Vectorial Graphics
 # From:	Noel Torres <tecnico@ejerciciosresueltos.com>
 0	string		\<?xml\ version="
@@ -88,25 +88,25 @@
 # Extensible markup language (XML), a subset of SGML
 # from Marc Prud'hommeaux (marc@apocalypse.org)
 0	search/1/cwt	\<?xml			XML document text
-!:mime	application/xml
+!:mime	text/xml
 !:strength + 5
 0	string/t		\<?xml\ version\ "	XML
-!:mime	application/xml
+!:mime	text/xml
 !:strength + 5
 0	string/t		\<?xml\ version="	XML
-!:mime	application/xml
+!:mime	text/xml
 !:strength + 5
 >15	string/t	>\0			%.3s document text
 >>23	search/1	\<xsl:stylesheet	(XSL stylesheet)
 >>24	search/1	\<xsl:stylesheet	(XSL stylesheet)
 0	string		\<?xml\ version='	XML
-!:mime	application/xml
+!:mime	text/xml
 !:strength + 5
 >15	string/t	>\0			%.3s document text
 >>23	search/1	\<xsl:stylesheet	(XSL stylesheet)
 >>24	search/1	\<xsl:stylesheet	(XSL stylesheet)
 0	search/1/wt	\<?XML			broken XML document text
-!:mime	application/xml
+!:mime	text/xml
 !:strength - 10
 
 

+ 8 - 15
magic/Magdir/sinclair

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: sinclair,v 1.5 2009/09/19 16:28:12 christos Exp $
+# $File: sinclair,v 1.6 2015/11/14 13:38:35 christos Exp $
 # sinclair:  file(1) sinclair QL
 
 # additions to /etc/magic by Thomas M. Ott (ThMO)
@@ -13,20 +13,13 @@
 >4	string	>\0		label:%.10s
 
 # Sinclair QL OS dump (ThMO)
-# (NOTE: if `file' would be able to use indirect references in a endian format
-#	 differing from the natural host format, this could be written more
-#	 reliably and faster...)
-#
-# we *can't* lookup QL OS code dumps, because `file' is UNABLE to read more
-# than the first 8K of a file... #-(
-#
-#0		belong	=0x30000
-#>49124		belong	<47104
-#>>49128		belong	<47104
-#>>>49132	belong	<47104
-#>>>>49136	belong	<47104	QL OS dump data,
-#>>>>>49148	string	>\0	type %.3s,
-#>>>>>49142	string	>\0	version %.4s
+0		belong	=0x30000
+>49124		belong	<47104
+>>49128		belong	<47104
+>>>49132	belong	<47104
+>>>>49136	belong	<47104	QL OS dump data,
+>>>>>49148	string	>\0	type %.3s,
+>>>>>49142	string	>\0	version %.4s
 
 # Sinclair QL firmware executables (ThMO)
 0	string	NqNqNq`\004	QL firmware executable (BCPL)

+ 26 - 13
magic/Magdir/sql

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: sql,v 1.15 2014/04/30 21:41:02 christos Exp $
+# $File: sql,v 1.18 2015/12/04 20:38:43 christos Exp $
 # sql:  file(1) magic for SQL files
 #
 # From: "Marty Leisner" <mleisner@eng.mc.xerox.com>
@@ -54,21 +54,32 @@
 # at offset 68 that is preferred over "user version" for indicating the
 # associated application.
 #
-0   string  SQLite\ format\ 3
->60 belong  =0x5f4d544e  Monotone source repository - SQLite3 database
->68 belong  =0x0f055112  Fossil checkout - SQLite3 database
->68 belong  =0x0f055113  Fossil global configuration - SQLite3 database
->68 belong  =0x0f055111  Fossil repository - SQLite3 database
->68 belong  =0x42654462  Bentley Systems BeSQLite Database - SQLite3 database
->68 belong  =0x42654c6e  Bentley Systems Localization File - SQLite3 database
->68 belong  =0x47504b47  OGC GeoPackage file - SQLite3 database
->68 default x            SQLite 3.x database
+0   string  SQLite\ format\ 3	SQLite 3.x database
+!:mime	application/x-sqlite3
+# seldom found extension sqlite3 like in SyncData.sqlite3
+# db
+# Avira Antivir use extension "dbe" like in avevtdb.dbe, avguard_tchk.dbe
+# Unfortunately extension sqlite also used for other databases starting with string
+# "TTCONTAINER" like in tracks.sqlite contentconsumer.sqlite contentproducerrepository.sqlite
+# and with string "ZV-zlib" in like extra.sqlite
+!:ext sqlite/sqlite3/db/dbe
+>60 belong  =0x5f4d544e  (Monotone source repository)
+>68 belong  =0x0f055112  (Fossil checkout)
+>68 belong  =0x0f055113  (Fossil global configuration)
+>68 belong  =0x0f055111  (Fossil repository)
+>68 belong  =0x42654462  (Bentley Systems BeSQLite Database)
+>68 belong  =0x42654c6e  (Bentley Systems Localization File)
+>68 belong  =0x47504b47  (OGC GeoPackage file)
+>68 default x
 >>68 belong  !0          \b, application id %u
 >>60 belong  !0          \b, user version %d
+>96 belong  x            \b, last written using SQLite version %d
+
 
 # SQLite Write-Ahead Log from SQLite version >= 3.7.0
 # http://www.sqlite.org/fileformat.html#walformat
 0	belong&0xfffffffe	0x377f0682	SQLite Write-Ahead Log,
+!:ext sqlite-wal/db-wal
 >4	belong	x	version %d
 
 # SQLite Rollback Journal
@@ -76,8 +87,10 @@
 0	string	\xd9\xd5\x05\xf9\x20\xa1\x63\xd7	SQLite Rollback Journal
 
 # Panasonic channel list database svl.bin or svl.db added by Joerg Jenderek
-# http://www.ullrich.es/job/service-menue/panasonic/panasonic-sendersortierung-sat-am-pc/
-# pceditor_V2003.jar
-0	string		PSDB\0			Panasonic channel list database
+# https://github.com/PredatH0r/ChanSort
+0	string		PSDB\0			Panasonic channel list DataBase
+!:ext db/bin
+#!:mime	application/x-db-svl-panasonic
 >126	string		SQLite\ format\ 3	
+#!:mime	application/x-panasonic-sqlite3
 >>&-15	indirect	x			\b; contains 

+ 16 - 2
magic/Magdir/terminfo

@@ -1,10 +1,24 @@
 
 #------------------------------------------------------------------------------
-# $File: terminfo,v 1.6 2009/09/19 16:28:12 christos Exp $
+# $File: terminfo,v 1.7 2016/03/17 21:02:29 christos Exp $
 # terminfo:  file(1) magic for terminfo
 #
 # XXX - byte order for screen images?
 #
-0	string		\032\001	Compiled terminfo entry
+# URL: https://en.wikipedia.org/wiki/Terminfo
+# Reference: ncurses-5.9/ncurses/tinfo/write_entry.c
+# Update: Joerg Jenderek
+#
+# GRR: line below too general as it catches also 
+# Targa image type 1 with 26 long identification field
+# and HELP.DSK
+0	string		\032\001	
+# 5th character of terminal name list, but not Targa image pixel size (15 16 24 32)
+>16	ubyte		>32		
+# namelist, if more than 1 separated by "|" like "st|stterm| simpleterm 0.4.1"
+>>12	regex		\^[a-zA-Z0-9][a-zA-Z0-9.][^|]*	Compiled terminfo entry "%-s"
+!:mime	application/x-terminfo
+# no extension
+#!:ext	
 0	short		0433		Curses screen image
 0	short		0434		Curses screen image

+ 54 - 0
magic/Magdir/vacuum-cleaner

@@ -0,0 +1,54 @@
+
+#------------------------------------------------------------------------------
+# $File: vacuum-cleaner,v 1.1 2015/11/14 13:38:35 christos Exp $
+# vacuum cleaner magic by Thomas M. Ott (ThMO)
+#
+# navigation map for LG robot vacuum cleaner models VR62xx, VR64xx, VR63xx
+# file: MAPDATAyyyymmddhhmmss_xxxxxx_cc.blk
+# -> yyyymmdd: year, month, day of cleaning
+# -> hhmmss: hour, minute, second of cleaning
+# -> xxxxxx: 6 digits
+# -> cc: cleaning runs counter
+# size: 136044 bytes
+#
+# struct maphdr {
+#     int32_t  map_cnt;	     /*  0: single map */
+#     int32_t  min_ceil;     /*  4: 100 mm == 10 cm == min. ceil */
+#     int32_t  max_ceil;     /*  8: 10000 mm == 100 m == max. ceil */
+#     int32_t  max_climb;    /* 12: 50 mm = 5 cm == max. height to climb */
+#     int32_t  unknown;	     /* 16: 50000 ??? */
+#     int32_t  cell_bytes;   /* 20: # of bytes for cells per block */
+#     int32_t  block_max;    /* 24: 1000 == max. # of blocks */
+#     int32_t  route_max;    /* 28: 1000 == max. # of routes */
+#     int32_t  used_blocks;  /* 32: 5/45/33/... == # of block entries used! */
+#     int32_t  cell_dim;     /* 36: 10 == cell dimension */
+#     int32_t  clock_tick;   /* 40: 100 == clock ticks */
+# #if	0
+#     struct {		     /* 44: 1000 blocks for 10x10 cells */
+#         int32_t  yoffset;
+#         int32_t  xoffset;
+#         int32_t  posxy;
+#         int32_t  timecode;
+#       }      blocks[ 1000];
+#     char     cells[ 1000* 100]; /* 16044: 1000 10x10 cells */
+#     int16_t  routes[ 1000* 10]; /* 116044: 1000 10-routes */
+# #endif
+#   };
+
+0                lelong =1
+>4               lelong =100
+>>8              lelong =10000
+>>>12            lelong =50
+>>>>16           lelong =50000
+>>>>>20          lelong =100
+>>>>>>24         lelong =1000
+>>>>>>>28        lelong =1000
+>>>>>>>>36       lelong =10
+>>>>>>>>>40      lelong =100
+>>>>>>>>>>32     lelong x       LG robot VR6[234]xx %dm^2 navigation
+>>>>>>>>>>136040 lelong =-1     reuse map data
+>>>>>>>>>>136040 lelong =0      map data
+>>>>>>>>>>136040 lelong >0      spurious map data
+>>>>>>>>>>136040 lelong <-1     spurious map data
+
+

+ 240 - 3
magic/Magdir/windows

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: windows,v 1.12 2015/08/29 07:10:35 christos Exp $
+# $File: windows,v 1.14 2015/12/15 01:06:17 christos Exp $
 # windows:  file(1) magic for Microsoft Windows
 #
 # This file is mainly reserved for files where programs
@@ -64,10 +64,148 @@
 
 
 # Summary: Old format help files
-# Extension: .hlp
+# URL: https://en.wikipedia.org/wiki/WinHelp
+# Reference: http://www.oocities.org/mwinterhoff/helpfile.htm
+# Update: Joerg Jenderek 
 # Created by: Dirk Jagdmann <doj@cubic.org>
-0	lelong		0x00035f3f		MS Windows 3.x help file
+#
+# check and then display version and date inside MS Windows HeLP file fragment
+0	name				help-ver-date
+# look for Magic of SYSTEMHEADER
+>0	leshort		0x036C		
+# version Major		1 for right file fragment
+>>4	leshort		1		Windows
+# print non empty string above to avoid error message
+# Warning: Current entry does not yet have a description for adding a MIME type
+!:mime	application/winhelp
+!:ext	hlp
+# version Minor of help file format is hint for windows version
+>>>2	leshort		0x0F		3.x
+>>>2	leshort		0x15		3.0
+>>>2	leshort		0x21		3.1
+>>>2	leshort		0x27		x.y
+>>>2	leshort		0x33		95
+>>>2	default		x		y.z
+>>>>2	leshort		x		0x%x
+# to complete message string like "MS Windows 3.x help file"
+>>>2	leshort		x		help
+# GenDate often older than file creation date
+>>>6	ldate		x		\b, %s
+#
+# Magic for HeLP files
+0	lelong		0x00035f3f		
+# ./windows (version 5.25) labeled the entry as "MS Windows 3.x help file"
+# file header magic 0x293B at DirectoryStart+9
+>(4.l+9)	uleshort	0x293B		MS
+# look for @VERSION	bmf.. like IBMAVW.ANN
+>>0xD4		string	=\x62\x6D\x66\x01\x00	Windows help annotation
+!:mime	application/x-winhelp
+!:ext	ann
+>>0xD4		string	!\x62\x6D\x66\x01\x00	
+# "GID Help index" by TrID
+>>>(4.l+0x65)	string	=|Pete			Windows help Global Index
+!:mime	application/x-winhelp
+!:ext	gid
+# HeLP Bookmark or
+# "Windows HELP File" by TrID
+>>>(4.l+0x65)		string		!|Pete		
+# maybe there exist a cleaner way to detect HeLP fragments
+# brute search for Magic 0x036C with matching Major maximal 7 iterations
+# discapp.hlp
+>>>>16			search/0x49AF/s	\x6c\x03 	
+>>>>>&0			use 		help-ver-date
+>>>>>&4			leshort		!1		
+# putty.hlp
+>>>>>>&0		search/0x69AF/s	\x6c\x03 	
+>>>>>>>&0		use 		help-ver-date
+>>>>>>>&4		leshort		!1		
+>>>>>>>>&0		search/0x49AF/s	\x6c\x03 	
+>>>>>>>>>&0		use 		help-ver-date
+>>>>>>>>>&4		leshort		!1		
+>>>>>>>>>>&0		search/0x49AF/s	\x6c\x03 	
+>>>>>>>>>>>&0		use 		help-ver-date
+>>>>>>>>>>>&4		leshort		!1		
+>>>>>>>>>>>>&0		search/0x49AF/s	\x6c\x03 	
+>>>>>>>>>>>>>&0		use 		help-ver-date
+>>>>>>>>>>>>>&4		leshort		!1		
+>>>>>>>>>>>>>>&0	search/0x49AF/s	\x6c\x03 	
+>>>>>>>>>>>>>>>&0	use 		help-ver-date
+>>>>>>>>>>>>>>>&4	leshort		!1		
+>>>>>>>>>>>>>>>>&0	search/0x49AF/s	\x6c\x03 	
+# GCC.HLP is detected after 7 iterations
+>>>>>>>>>>>>>>>>>&0	use 		help-ver-date
+# this only happens if bigger hlp file is detected after used search iterations
+>>>>>>>>>>>>>>>>>&4	leshort		!1		Windows y.z help
+!:mime	application/winhelp
+!:ext	hlp
+# repeat search again or following default line does not work
+>>>>16			search/0x49AF/s	\x6c\x03 	
+# remaining files should be HeLP Bookmark WinHlp32.BMK (XP 32-bit) or WinHlp32 (Windows 8.1 64-bit)
+>>>>16	default				x	Windows help Bookmark
+!:mime	application/x-winhelp
+!:ext	/bmk
+## FirstFreeBlock normally FFFFFFFFh 10h for *ANN
+##>>8	lelong			x		\b, FirstFreeBlock 0x%8.8x
+# EntireFileSize
+>>12	lelong			x		\b, %d bytes
+## ReservedSpace normally 042Fh AFh for *.ANN
+#>>(4.l)	lelong		x		\b, ReservedSpace 0x%8.8x
+## UsedSpace normally 0426h A6h for *.ANN
+#>>(4.l+4)	lelong		x		\b, UsedSpace 0x%8.8x
+## FileFlags normally 04...
+#>>(4.l+5)	lelong		x		\b, FileFlags 0x%8.8x
+## file header magic 0x293B
+#>>(4.l+9)	uleshort	x		\b, file header magic 0x%4.4x
+## file header Flags		0x0402
+#>>(4.l+11)	uleshort	x		\b, file header Flags 0x%4.4x
+## file header PageSize	0400h 80h for *.ANN
+#>>(4.l+13)	uleshort	x		\b, PageSize 0x%4.4x
+## Structure[16]		z4
+#>>(4.l+15)	string		>\0		\b, Structure_"%-.16s"
+## MustBeZero			0
+#>>(4.l+31)	uleshort	x		\b, MustBeZero 0x%4.4x
+## PageSplits
+#>>(4.l+33)	uleshort	x		\b, PageSplits 0x%4.4x
+## RootPage
+#>>(4.l+35)	uleshort	x		\b, RootPage 0x%4.4x
+## MustBeNegOne			0xffff
+#>>(4.l+37)	uleshort	x		\b, MustBeNegOne 0x%4.4x
+## TotalPages			1
+#>>(4.l+39)	uleshort	x		\b, TotalPages 0x%4.4x
+## NLevels			0x0001
+#>>(4.l+41)	uleshort	x		\b, NLevels 0x%4.4x
+## TotalBtreeEntries
+#>>(4.l+43)	ulelong		x		\b, TotalBtreeEntries 0x%8.8x
+## pages of the B+ tree
+#>>(4.l+47)	ubequad		x		\b, PageStart 0x%16.16llx
 
+# start with colon or semicolon for comment line like Back2Life.cnt
+0		regex		\^(:|;)		
+# look for first keyword Base
+>0		search/45	:Base 		
+>>&0				use 		cnt-name
+# only solution to search again from beginning , because relative offsets changes when use is called
+>0		search/45	:Base 		
+>0		default		x		
+# look for other keyword Title like in putty.cnt
+>>0		search/45	:Title 		
+>>>&0				use 		cnt-name
+#
+# display mime type and name of Windows help Content source
+0	name				cnt-name
+# skip space at beginning
+>0     string		\ 		
+# name without extension and greater character or name with hlp extension
+>>1	regex/c		\^([^\xd>]*|.*\.hlp)	MS Windows help file Content, based "%s"
+!:mime	text/plain
+!:apple	????TEXT
+!:ext	cnt
+#
+# Windows creates an full text search from hlp file, if the user clicks the "Find" tab and enables keyword indexing
+0	string		tfMR			MS Windows help Full Text Search index
+!:mime application/x-winhelp-fts
+!:ext	fts
+>16	string		>\0			for "%s"
 
 # Summary: Hyper terminal
 # Extension: .ht
@@ -336,3 +474,102 @@
 >>>>>4	ulelong&0x00000001	!0x00000001	
 >>>>>>(84.l)	string		>\0		InfName "%s"
 
+# Summary: backup file created with utility like NTBACKUP.EXE shipped with Windows NT/2K/XP/2003
+# Extension: .bkf
+# Created by: Joerg Jenderek
+# URL: http://en.wikipedia.org/wiki/NTBackup
+# Reference: http://laytongraphics.com/mtf/MTF_100a.PDF
+# Descriptor BloCK name of Microsoft Tape Format
+0	string			TAPE		
+# Format Logical Address is zero
+>20	ulequad			0		
+# Reserved for MBC is zero
+>>28	uleshort		0		
+# Control Block ID is zero
+>>>36	ulelong			0		
+# BIT4-BIT15, BIT18-BIT31 of block attributes are unused
+>>>>4	ulelong&0xFFfcFFe0	0		Windows NTbackup archive
+#!:mime application/x-ntbackup
+!:ext bkf
+# OS ID
+>>>>>10	ubyte			1		\b NetWare
+>>>>>10	ubyte			13		\b NetWare SMS
+>>>>>10	ubyte			14		\b NT
+>>>>>10	ubyte			24		\b 3
+>>>>>10	ubyte			25		\b OS/2
+>>>>>10	ubyte			26		\b 95
+>>>>>10	ubyte			27		\b Macintosh
+>>>>>10	ubyte			28		\b UNIX
+# OS Version (2)
+#>>>>>11	ubyte			x		OS V=%x
+# MTF_CONTINUATION	Media Sequence Number > 1
+#>>>>>4	ulelong&0x00000001	!0		\b, continued
+# MTF_COMPRESSION
+>>>>>4	ulelong&0x00000004	!0		\b, compressed
+# MTF_EOS_AT_EOM	End Of Medium was hit during end of set processing
+>>>>>4	ulelong&0x00000008	!0		\b, End Of Medium hit
+>>>>>4	ulelong&0x00020000	0		
+# MTF_SET_MAP_EXISTS	A Media Based Catalog Set Map may exist on tape
+>>>>>>4	ulelong&0x00010000	!0		\b, with catalog
+# MTF_FDD_ALLOWED	However File/Directory Detail can only exist if a Set Map is also present
+>>>>>4	ulelong&0x00020000	!0		\b, with file catalog
+# Offset To First Event 238h,240h,28Ch
+#>>>>>8	uleshort		x		\b, event offset %4.4x
+# Displayable Size (20e0230h 20e024ch 20e0224h)
+#>>>>>8	ulequad			x		dis. size %16.16llx
+# Media Family ID (455288C4h 4570BD1Ah 45708F2Fh 4570BBF5h)
+#>>>>>52	ulelong			x		family ID %8.8x
+# TAPE Attributes (3)
+#>>>>>56	ulelong			x		TAPE %8.8x
+# Media Sequence Number
+>>>>>60	uleshort		>1		\b, sequence %u
+# Password Encryption Algorithm (3)
+>>>>>62	uleshort		>0		\b, 0x%x encrypted
+# Soft Filemark Block Size * 512 (2)
+#>>>>>64	uleshort		=2		\b, soft size %u*512
+>>>>>64	uleshort		!2		\b, soft size %u*512
+# Media Based Catalog Type (1,2)
+#>>>>>66	uleshort		x		\b, catalog type %4.4x
+# size of Media Name (66,68,6Eh)
+>>>>>68	uleshort		>0		
+# offset of Media Name (5Eh)
+>>>>>>70	uleshort	>0		
+# 0~, 1~ANSI, 2~UNICODE
+>>>>>>>48	ubyte		1		
+# size terminated ansi coded string normally followed by "MTF Media Label"
+>>>>>>>>(70.s)	string		>\0		\b, name: %s
+>>>>>>>48	ubyte		2		
+# Not null, but size terminated unicoded string
+>>>>>>>>(70.s)	lestring16	x		\b, name: %s
+# size of Media Label (104h)
+>>>>>72	uleshort		>0		
+# offset of Media Label (C4h,C6h,CCh)
+>>>>>74		uleshort	>0		
+>>>>>>48	ubyte		1		
+#Tag|Version|Vendor|Vendor ID|Creation Time Stamp|Cartridge Label|Side|Media ID|Media Domain ID|Vendor Specific fields
+>>>>>>>(74.s)	string		>\0		\b, label: %s
+>>>>>>48	ubyte		2		
+>>>>>>>(74.s)	lestring16	x		\b, label: %s
+# size of password name (0,1Ch)
+#>>>>>76	uleshort		>0		\b, password size %4.4x
+# Software Vendor ID (CBEh)
+>>>>>86	uleshort		x		\b, software (0x%x)
+# size of Software Name (6Eh)
+>>>>>80	uleshort		>0		
+# offset of Software Name (1C8h,1CAh,1D0h)
+>>>>>>82	uleshort	>0		
+# 1~ANSI, 2~UNICODE
+>>>>>>>48	ubyte		1		
+>>>>>>>>(82.s)	string		>\0		\b: %s
+>>>>>>>48	ubyte		2		
+# size terminated unicoded coded string normally followed by "SPAD"
+>>>>>>>>(82.s)	lestring16	x		\b: %s
+# Format Logical Block Size (512,1024)
+#>>>>>84	uleshort		=1024		\b, block size %u
+>>>>>84	uleshort		!1024		\b, block size %u
+# Media Date of MTF_DATE_TIME type with 5 bytes
+#>>>>>>88	ubequad			x		DATE %16.16llx
+# MTF Major Version (1)
+#>>>>>>93	ubyte		x		\b, MFT version %x
+#
+

+ 182 - 92
magic/Magdir/wordprocessors

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: wordprocessors,v 1.18 2013/06/03 19:07:29 christos Exp $
+# $File: wordprocessors,v 1.19 2015/10/16 15:11:07 christos Exp $
 # wordprocessors:  file(1) magic fo word processors.
 #
 ####### PWP file format used on Smith Corona Personal Word Processors:
@@ -12,97 +12,187 @@
 >25	byte	0x54	\b, legal
 >26	byte	0x46	\b, A4
 
-#WordPerfect type files Version 1.6 - PLEASE DO NOT REMOVE THIS LINE
-0	string	\377WPC\020\000\000\000\022\012\001\001\000\000\000\000	(WP) loadable file
->15	byte	0	Optimized for Intel
->15	byte	1	Optimized for Non-Intel
-1	string	WPC	(Corel/WP)
->8	short	257	WordPerfect macro
->8	short	258	WordPerfect help file
->8	short	259	WordPerfect keyboard file
->8	short	266	WordPerfect document
->8	short	267	WordPerfect dictionary
->8	short	268	WordPerfect thesaurus
->8	short	269	WordPerfect block
->8	short	270	WordPerfect rectangular block
->8	short	271	WordPerfect column block
->8	short	272	WordPerfect printer data
->8	short	275	WordPerfect printer data
->8	short	276	WordPerfect driver resource data
->8	short	279	WordPerfect hyphenation code
->8	short	280	WordPerfect hyphenation data
->8	short	281	WordPerfect macro resource data
->8	short	283	WordPerfect hyphenation lex
->8	short	285	WordPerfect wordlist
->8	short	286	WordPerfect equation resource data
->8	short	289	WordPerfect spell rules
->8	short	290	WordPerfect dictionary rules
->8	short	295	WordPerfect spell rules (Microlytics)
->8	short	299	WordPerfect settings file
->8	short	301	WordPerfect 4.2 document
->8	short	325	WordPerfect dialog file
->8	short	332	WordPerfect button bar
->8	short	513	Shell macro
->8	short	522	Shell definition
->8	short	769	Notebook macro
->8	short	770	Notebook help file
->8	short	771	Notebook keyboard file
->8	short	778	Notebook definition
->8	short	1026	Calculator help file
->8	short 	1538	Calendar help file
->8	short 	1546	Calendar data file
->8	short	1793	Editor macro
->8	short	1794	Editor help file
->8	short	1795	Editor keyboard file
->8	short	1817	Editor macro resource file
->8	short 	2049	Macro editor macro
->8	short 	2050	Macro editor help file
->8	short	2051	Macro editor keyboard file
->8	short	2305	PlanPerfect macro
->8	short	2306	PlanPerfect help file
->8	short	2307	PlanPerfect keyboard file
->8	short	2314	PlanPerfect worksheet
->8	short	2319	PlanPerfect printer definition
->8	short	2322	PlanPerfect graphic definition
->8	short	2323	PlanPerfect data
->8	short	2324	PlanPerfect temporary printer
->8	short	2329	PlanPerfect macro resource data
->8	byte	11	Mail
->8	short	2818	help file
->8	short	2821	distribution list
->8	short	2826	out box
->8	short	2827	in box
->8	short	2836	users archived mailbox
->8	short	2837	archived message database
->8	short	2838	archived attachments
->8	short	3083	Printer temporary file
->8	short	3330	Scheduler help file
->8	short	3338	Scheduler in file
->8	short	3339	Scheduler out file
->8	short	3594	GroupWise settings file
->8	short	3601	GroupWise directory services
->8	short	3627	GroupWise settings file
->8	short	4362	Terminal resource data
->8	short	4363	Terminal resource data
->8	short	4395	Terminal resource data
->8	short	4619	GUI loadable text
->8	short	4620	graphics resource data
->8	short	4621	printer settings file
->8	short	4622	port definition file
->8	short	4623	print queue parameters
->8	short	4624	compressed file
->8	short	5130	Network service msg file
->8	short	5131	Network service msg file
->8	short	5132	Async gateway login msg
->8	short	5134	GroupWise message file
->8	short	7956	GroupWise admin domain database
->8	short	7957	GroupWise admin host database
->8	short	7959	GroupWise admin remote host database
->8	short	7960	GroupWise admin ADS deferment data file
->8	short	8458	IntelliTAG (SGML) compiled DTD
->8	long	18219264	WordPerfect graphic image (1.0)
->8	long	18219520	WordPerfect graphic image (2.0)
-#end of WordPerfect type files Version 1.6 - PLEASE DO NOT REMOVE THIS LINE
+# Corel/WordPerfect
+0	string	\xffWPC
+# WordPerfect
+>8	byte	1
+>>9	byte	1	WordPerfect macro
+>>9	byte	2	WordPerfect help file
+>>9	byte	3	WordPerfect keyboard file
+>>9	byte	10	WordPerfect document
+>>9	byte	11	WordPerfect dictionary
+>>9	byte	12	WordPerfect thesaurus
+>>9	byte	13	WordPerfect block
+>>9	byte	14	WordPerfect rectangular block
+>>9	byte	15	WordPerfect column block
+>>9	byte	16	WordPerfect printer data
+>>9	byte	19	WordPerfect printer data
+>>9	byte	20	WordPerfect driver resource data
+>>9	byte	22	WordPerfect graphic image
+>>9	byte	23	WordPerfect hyphenation code
+>>9	byte	24	WordPerfect hyphenation data
+>>9	byte	25	WordPerfect macro resource data
+>>9	byte	27	WordPerfect hyphenation lex
+>>9	byte	29	WordPerfect wordlist
+>>9	byte	30	WordPerfect equation resource data
+>>9	byte	33	WordPerfect spell rules
+>>9	byte	34	WordPerfect dictionary rules
+>>9	byte	39	WordPerfect spell rules (Microlytics)
+>>9	byte	43	WordPerfect settings file
+>>9	byte	44	WordPerfect 3.5 document
+>>9	byte	45	WordPerfect 4.2 document
+>>9	byte	69	WordPerfect dialog file
+>>9	byte	76	WordPerfect button bar
+>>9	default x
+>>>9	byte	x	Corel WordPerfect: Unknown filetype %d
+# Corel Shell
+>8	byte	2
+>>9	byte	1	Corel shell macro
+>>9	byte	10	Corel shell definition
+>>9	default x
+>>>9	byte	x	Corel Shell: Unknown filetype %d
+# Corel Notebook
+>8	byte	3
+>>9	byte	1	Corel Notebook macro
+>>9	byte	2	Corel Notebook help file
+>>9	byte	3	Corel Notebook keyboard file
+>>9	byte	10	Corel Notebook definition
+>>9	default	x
+>>>9	byte	x	Corel Notebook: Unknown filetype %d
+# Corel Calculator
+>8	byte	4
+>>9	byte	2	Corel Calculator help file
+>>9	default	x
+>>>9	byte	x	Corel Calculator: Unknown filetype %d
+# Corel File Manager
+>8	byte	5
+>>9	default	x
+>>>9	byte	x	Corel File Manager: Unknown filetype %d
+# Corel Calendar
+>8	byte	6
+>>9	byte 	2	Corel Calendar help file
+>>9	byte 	10	Corel Calendar data file
+>>9	default	x
+>>>9	byte	x	Corel Calendar: Unknown filetype %d
+# Corel Program Editor/Ed Editor
+>8	byte	7
+>>9	byte	1	Corel Editor macro
+>>9	byte	2	Corel Editor help file
+>>9	byte	3	Corel Editor keyboard file
+>>9	byte	25	Corel Editor macro resource file
+>>9	default	x
+>>>9	byte	x	Corel Program Editor/Ed Editor: Unknown filetype %d
+# Corel Macro Editor
+>8	byte	8
+>>9	byte 	1	Corel Macro editor macro
+>>9	byte 	2	Corel Macro editor help file
+>>9	byte	3	Corel Macro editor keyboard file
+>>9	default	x
+>>>9	byte	x	Corel Macro Editor: Unknown filetype %d
+# Corel Plan Perfect
+>8	byte	9
+>>9	default	x
+>>>9	byte	x	Corel Plan Perfect: Unknown filetype %d
+# Corel DataPerfect
+>8	byte	10
+# CHECK: Don't these belong into product 9?
+>>9	byte	1	Corel PlanPerfect macro
+>>9	byte	2	Corel PlanPerfect help file
+>>9	byte	3	Corel PlanPerfect keyboard file
+>>9	byte	10	Corel PlanPerfect worksheet
+>>9	byte	15	Corel PlanPerfect printer definition
+>>9	byte	18	Corel PlanPerfect graphic definition
+>>9	byte	19	Corel PlanPerfect data
+>>9	byte	20	Corel PlanPerfect temporary printer
+>>9	byte	25	Corel PlanPerfect macro resource data
+>>9	default	x
+>>>9	byte	x	Corel DataPerfect: Unknown filetype %d
+# Corel Mail
+>8	byte	11
+>>9	byte	2	Corel Mail help file
+>>9	byte	5	Corel Mail distribution list
+>>9	byte	10	Corel Mail out box
+>>9	byte	11	Corel Mail in box
+>>9	byte	20	Corel Mail users archived mailbox
+>>9	byte	21	Corel Mail archived message database
+>>9	byte	22	Corel Mail archived attachments
+>>9	default	x
+>>>9	byte	x	Corel Mail: Unknown filetype %d
+# Corel Printer
+>8	byte	12
+>>9	byte	11	Corel Printer temporary file
+>>9	default	x
+>>>9	byte	x	Corel Printer: Unknown filetype %d
+# Corel Scheduler
+>8	byte	13
+>>9	byte	2	Corel Scheduler help file
+>>9	byte	10	Corel Scheduler in file
+>>9	byte	11	Corel Scheduler out file
+>>9	default	x
+>>>9	byte	x	Corel Scheduler: Unknown filetype %d
+# Corel WordPerfect Office
+>8	byte	14
+>>9	byte	10	Corel GroupWise settings file
+>>9	byte	17	Corel GroupWise directory services
+>>9	byte	43	Corel GroupWise settings file
+>>9	default	x
+>>>9	byte	x	Corel WordPerfect Office: Unknown filetype %d
+# Corel DrawPerfect
+>8	byte	15
+>>9	default	x
+>>>9	byte	x	Corel DrawPerfect: Unknown filetype %d
+# Corel LetterPerfect
+>8	byte	16
+>>9	default	x
+>>>9	byte	x	Corel LetterPerfect: Unknown filetype %d
+# Corel Terminal
+>8	byte	17
+>>9	byte	10	Corel Terminal resource data
+>>9	byte	11	Corel Terminal resource data
+>>9	byte	43	Corel Terminal resource data
+>>9	default	x
+>>>9	byte	x	Corel Terminal: Unknown filetype %d
+# Corel loadable file
+>8	byte	18
+>>9	byte	10	Corel loadable file
+>>9	byte	11	Corel GUI loadable text
+>>9	byte	12	Corel graphics resource data
+>>9	byte	13	Corel printer settings file
+>>9	byte	14	Corel port definition file
+>>9	byte	15	Corel print queue parameters
+>>9	byte	16	Corel compressed file
+>>9	default	x
+>>>9	byte	x	Corel loadable file: Unknown filetype %d
+>>15	byte	0	\b, optimized for Intel
+>>15	byte	1	\b, optimized for Non-Intel
+# Network service
+>8	byte	20
+>>9	byte	10	Corel Network service msg file
+>>9	byte	11	Corel Network service msg file
+>>9	byte	12	Corel Async gateway login msg
+>>9	byte	14	Corel GroupWise message file
+>>9	default	x
+>>>9	byte	x	Corel Network service: Unknown filetype %d
+# GroupWise
+>8	byte	31
+>>9	byte	20	GroupWise admin domain database
+>>9	byte	21	GroupWise admin host database
+>>9	byte	23	GroupWise admin remote host database
+>>9	byte	24	GroupWise admin ADS deferment data file
+>>9	default	x
+>>>9	byte	x	GroupWise: Unknown filetype %d
+# IntelliTAG
+>8	byte	33
+>>9	byte	10	IntelliTAG (SGML) compiled DTD
+>>9	default	x
+>>>9	byte	x	IntelliTAG: Unknown filetype %d
+# everything else
+>8	default x
+>>8	byte	x	Unknown Corel/Wordperfect product %d,
+>>>9	byte	x	file type %d
+>10	byte	0	\b, v5.
+>10	byte	!0	\b, v%d.
+>11	byte	x	\b%d
 
 # Hangul (Korean) Word Processor File
 0	string	HWP\ Document\ File	Hangul (Korean) Word Processor File 3.0

+ 25 - 0
magic/Magdir/x68000

@@ -0,0 +1,25 @@
+#------------------------------------------------------------------------------
+# x68000:  file(1) magic for the Sharp Home Computer
+# v1.0
+# Fabio R. Schmidlin <sd-snatcher@users.sourceforge.net>
+
+# Yanagisawa PIC picture
+0	string		PIC
+>3	search/0x200	\x1A
+>>&0	search/0x200	\x0
+>>>&0	ubyte		0		Yanagisawa PIC image file,
+>>>>&0	ubyte&15	0		model: X68000,
+>>>>&0	ubyte&15	1		model: PC-88VA,
+>>>>&0	ubyte&15	2		model: FM-TOWNS,
+>>>>&0	ubyte&15	3		model: MAC,
+>>>>&0	ubyte&15	15		model: Generic,
+>>>>&3	ubeshort	x		%dx
+>>>>&5	ubeshort	x		\b%d,
+>>>>&1	ubeshort	4		colors: 16
+>>>>&1	ubeshort	8		colors: 256
+>>>>&1	ubeshort	12		colors: 4096
+>>>>&1	ubeshort	15		colors: 32768
+>>>>&1	ubeshort	16		colors: 65536
+>>>>&1	ubeshort	>16		colors: %d-bit
+
+

+ 21 - 2
magic/Magdir/xenix

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: xenix,v 1.9 2009/09/19 16:28:13 christos Exp $
+# $File: xenix,v 1.10 2016/04/19 18:14:19 christos Exp $
 # xenix:  file(1) magic for Microsoft Xenix
 #
 # "Middle model" stuff, and "Xenix 8086 relocatable or 80286 small
@@ -12,7 +12,26 @@
 # XXX - "x.out" collides with PDP-11 archives
 #
 0	string		core		core file (Xenix)
-0	byte		0x80		8086 relocatable (Microsoft)
+# URL: http://www.polarhome.com/service/man/?qf=86rel&tf=2&of=Xenix
+# Reference: http://www.azillionmonkeys.com/qed/Omfg.pdf
+# Update: Joerg Jenderek
+# recordtype~TranslatorHEADerRecord
+0	byte		0x80		
+# GRR: line above is too general as it catches also Extensible storage engine DataBase
+# skip examples like GENA.SND Switch.Snd by looking for record length maximal 1024-3
+>1	uleshort	<1022		
+# skip examples like GAME.PICTURE Strange.Pic by looking for positiv record length
+>>1	uleshort	>0		
+# skip examples like Xtable.Data FRACTAL.GEN SHR.VIEW by looking for positiv string length
+>>>3	ubyte		>0		
+# skip examples like OMBRE.6 with "UUUUUU" by looking for filename like "hello.c"
+>>>>4	regex	[a-zA-Z_/]{1,8}[.]	8086 relocatable (Microsoft)
+#!:mime	application/octet-stream
+!:mime	application/x-object
+!:ext	o/a
+>>>>>3	pstring		x		\b, "%s"
+# checksum
+#>>>>>(3.b+4)	ubyte	x		\b, checksum 0x%2.2x
 0	leshort		0xff65		x.out
 >2	string		__.SYMDEF	 randomized
 >0	byte		x		archive

+ 15 - 2
magic/Makefile.am

@@ -1,5 +1,5 @@
 #
-# $File: Makefile.am,v 1.103 2015/03/17 15:15:12 christos Exp $
+# $File: Makefile.am,v 1.116 2016/06/13 19:09:31 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_DIR = $(top_srcdir)/magic
@@ -29,7 +29,9 @@ $(MAGIC_FRAGMENT_DIR)/asterix \
 $(MAGIC_FRAGMENT_DIR)/att3b \
 $(MAGIC_FRAGMENT_DIR)/audio \
 $(MAGIC_FRAGMENT_DIR)/basis \
+$(MAGIC_FRAGMENT_DIR)/ber \
 $(MAGIC_FRAGMENT_DIR)/bflt \
+$(MAGIC_FRAGMENT_DIR)/bioinformatics \
 $(MAGIC_FRAGMENT_DIR)/blackberry \
 $(MAGIC_FRAGMENT_DIR)/blcr \
 $(MAGIC_FRAGMENT_DIR)/blender \
@@ -50,11 +52,13 @@ $(MAGIC_FRAGMENT_DIR)/citrus \
 $(MAGIC_FRAGMENT_DIR)/clarion \
 $(MAGIC_FRAGMENT_DIR)/claris \
 $(MAGIC_FRAGMENT_DIR)/clipper \
+$(MAGIC_FRAGMENT_DIR)/coff \
 $(MAGIC_FRAGMENT_DIR)/commands \
 $(MAGIC_FRAGMENT_DIR)/communications \
 $(MAGIC_FRAGMENT_DIR)/compress \
 $(MAGIC_FRAGMENT_DIR)/console \
 $(MAGIC_FRAGMENT_DIR)/convex \
+$(MAGIC_FRAGMENT_DIR)/coverage \
 $(MAGIC_FRAGMENT_DIR)/cracklib \
 $(MAGIC_FRAGMENT_DIR)/ctags \
 $(MAGIC_FRAGMENT_DIR)/ctf \
@@ -62,6 +66,7 @@ $(MAGIC_FRAGMENT_DIR)/cubemap \
 $(MAGIC_FRAGMENT_DIR)/cups \
 $(MAGIC_FRAGMENT_DIR)/dact \
 $(MAGIC_FRAGMENT_DIR)/database \
+$(MAGIC_FRAGMENT_DIR)/der \
 $(MAGIC_FRAGMENT_DIR)/diamond \
 $(MAGIC_FRAGMENT_DIR)/diff \
 $(MAGIC_FRAGMENT_DIR)/digital \
@@ -78,7 +83,9 @@ $(MAGIC_FRAGMENT_DIR)/erlang \
 $(MAGIC_FRAGMENT_DIR)/esri \
 $(MAGIC_FRAGMENT_DIR)/fcs \
 $(MAGIC_FRAGMENT_DIR)/filesystems \
+$(MAGIC_FRAGMENT_DIR)/finger \
 $(MAGIC_FRAGMENT_DIR)/flash \
+$(MAGIC_FRAGMENT_DIR)/flif \
 $(MAGIC_FRAGMENT_DIR)/fonts \
 $(MAGIC_FRAGMENT_DIR)/fortran \
 $(MAGIC_FRAGMENT_DIR)/frame \
@@ -145,6 +152,7 @@ $(MAGIC_FRAGMENT_DIR)/mcrypt \
 $(MAGIC_FRAGMENT_DIR)/mercurial \
 $(MAGIC_FRAGMENT_DIR)/metastore \
 $(MAGIC_FRAGMENT_DIR)/meteorological \
+$(MAGIC_FRAGMENT_DIR)/microfocus \
 $(MAGIC_FRAGMENT_DIR)/mime \
 $(MAGIC_FRAGMENT_DIR)/mips \
 $(MAGIC_FRAGMENT_DIR)/mirage \
@@ -185,6 +193,8 @@ $(MAGIC_FRAGMENT_DIR)/parrot \
 $(MAGIC_FRAGMENT_DIR)/pascal \
 $(MAGIC_FRAGMENT_DIR)/pbf \
 $(MAGIC_FRAGMENT_DIR)/pbm \
+$(MAGIC_FRAGMENT_DIR)/pc88 \
+$(MAGIC_FRAGMENT_DIR)/pc98 \
 $(MAGIC_FRAGMENT_DIR)/pdf \
 $(MAGIC_FRAGMENT_DIR)/pdp \
 $(MAGIC_FRAGMENT_DIR)/perl \
@@ -193,6 +203,7 @@ $(MAGIC_FRAGMENT_DIR)/pgp \
 $(MAGIC_FRAGMENT_DIR)/pkgadd \
 $(MAGIC_FRAGMENT_DIR)/plan9 \
 $(MAGIC_FRAGMENT_DIR)/plus5 \
+$(MAGIC_FRAGMENT_DIR)/polyml \
 $(MAGIC_FRAGMENT_DIR)/printer \
 $(MAGIC_FRAGMENT_DIR)/project \
 $(MAGIC_FRAGMENT_DIR)/psdbms \
@@ -247,6 +258,7 @@ $(MAGIC_FRAGMENT_DIR)/unknown \
 $(MAGIC_FRAGMENT_DIR)/uterus \
 $(MAGIC_FRAGMENT_DIR)/uuencode \
 $(MAGIC_FRAGMENT_DIR)/varied.out \
+$(MAGIC_FRAGMENT_DIR)/vacuum-cleaner \
 $(MAGIC_FRAGMENT_DIR)/varied.script \
 $(MAGIC_FRAGMENT_DIR)/vax \
 $(MAGIC_FRAGMENT_DIR)/vicar \
@@ -263,6 +275,7 @@ $(MAGIC_FRAGMENT_DIR)/windows \
 $(MAGIC_FRAGMENT_DIR)/wireless \
 $(MAGIC_FRAGMENT_DIR)/wordprocessors \
 $(MAGIC_FRAGMENT_DIR)/wsdl \
+$(MAGIC_FRAGMENT_DIR)/x68000 \
 $(MAGIC_FRAGMENT_DIR)/xdelta \
 $(MAGIC_FRAGMENT_DIR)/xenix \
 $(MAGIC_FRAGMENT_DIR)/xilinx \
@@ -291,7 +304,7 @@ ${MAGIC}: $(EXTRA_DIST) $(FILE_COMPILE_DEP)
 	@(if expr "${FILE_COMPILE}" : '.*/.*' > /dev/null; then \
 	    echo "Using ${FILE_COMPILE} to generate ${MAGIC}" > /dev/null; \
 	  else \
-	    v=$$(file --version | sed -e s/file-// -e q); \
+	    v=$$(${FILE_COMPILE} --version | sed -e s/file-// -e q); \
 	    if [ "$$v" != "${PACKAGE_VERSION}" ]; then \
 		echo "Cannot use the installed version of file ($$v) to"; \
 		echo "cross-compile file ${PACKAGE_VERSION}"; \

+ 15 - 2
magic/Makefile.in

@@ -273,7 +273,7 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 
 #
-# $File: Makefile.am,v 1.103 2015/03/17 15:15:12 christos Exp $
+# $File: Makefile.am,v 1.116 2016/06/13 19:09:31 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_DIR = $(top_srcdir)/magic
@@ -301,7 +301,9 @@ $(MAGIC_FRAGMENT_DIR)/asterix \
 $(MAGIC_FRAGMENT_DIR)/att3b \
 $(MAGIC_FRAGMENT_DIR)/audio \
 $(MAGIC_FRAGMENT_DIR)/basis \
+$(MAGIC_FRAGMENT_DIR)/ber \
 $(MAGIC_FRAGMENT_DIR)/bflt \
+$(MAGIC_FRAGMENT_DIR)/bioinformatics \
 $(MAGIC_FRAGMENT_DIR)/blackberry \
 $(MAGIC_FRAGMENT_DIR)/blcr \
 $(MAGIC_FRAGMENT_DIR)/blender \
@@ -322,11 +324,13 @@ $(MAGIC_FRAGMENT_DIR)/citrus \
 $(MAGIC_FRAGMENT_DIR)/clarion \
 $(MAGIC_FRAGMENT_DIR)/claris \
 $(MAGIC_FRAGMENT_DIR)/clipper \
+$(MAGIC_FRAGMENT_DIR)/coff \
 $(MAGIC_FRAGMENT_DIR)/commands \
 $(MAGIC_FRAGMENT_DIR)/communications \
 $(MAGIC_FRAGMENT_DIR)/compress \
 $(MAGIC_FRAGMENT_DIR)/console \
 $(MAGIC_FRAGMENT_DIR)/convex \
+$(MAGIC_FRAGMENT_DIR)/coverage \
 $(MAGIC_FRAGMENT_DIR)/cracklib \
 $(MAGIC_FRAGMENT_DIR)/ctags \
 $(MAGIC_FRAGMENT_DIR)/ctf \
@@ -334,6 +338,7 @@ $(MAGIC_FRAGMENT_DIR)/cubemap \
 $(MAGIC_FRAGMENT_DIR)/cups \
 $(MAGIC_FRAGMENT_DIR)/dact \
 $(MAGIC_FRAGMENT_DIR)/database \
+$(MAGIC_FRAGMENT_DIR)/der \
 $(MAGIC_FRAGMENT_DIR)/diamond \
 $(MAGIC_FRAGMENT_DIR)/diff \
 $(MAGIC_FRAGMENT_DIR)/digital \
@@ -350,7 +355,9 @@ $(MAGIC_FRAGMENT_DIR)/erlang \
 $(MAGIC_FRAGMENT_DIR)/esri \
 $(MAGIC_FRAGMENT_DIR)/fcs \
 $(MAGIC_FRAGMENT_DIR)/filesystems \
+$(MAGIC_FRAGMENT_DIR)/finger \
 $(MAGIC_FRAGMENT_DIR)/flash \
+$(MAGIC_FRAGMENT_DIR)/flif \
 $(MAGIC_FRAGMENT_DIR)/fonts \
 $(MAGIC_FRAGMENT_DIR)/fortran \
 $(MAGIC_FRAGMENT_DIR)/frame \
@@ -417,6 +424,7 @@ $(MAGIC_FRAGMENT_DIR)/mcrypt \
 $(MAGIC_FRAGMENT_DIR)/mercurial \
 $(MAGIC_FRAGMENT_DIR)/metastore \
 $(MAGIC_FRAGMENT_DIR)/meteorological \
+$(MAGIC_FRAGMENT_DIR)/microfocus \
 $(MAGIC_FRAGMENT_DIR)/mime \
 $(MAGIC_FRAGMENT_DIR)/mips \
 $(MAGIC_FRAGMENT_DIR)/mirage \
@@ -457,6 +465,8 @@ $(MAGIC_FRAGMENT_DIR)/parrot \
 $(MAGIC_FRAGMENT_DIR)/pascal \
 $(MAGIC_FRAGMENT_DIR)/pbf \
 $(MAGIC_FRAGMENT_DIR)/pbm \
+$(MAGIC_FRAGMENT_DIR)/pc88 \
+$(MAGIC_FRAGMENT_DIR)/pc98 \
 $(MAGIC_FRAGMENT_DIR)/pdf \
 $(MAGIC_FRAGMENT_DIR)/pdp \
 $(MAGIC_FRAGMENT_DIR)/perl \
@@ -465,6 +475,7 @@ $(MAGIC_FRAGMENT_DIR)/pgp \
 $(MAGIC_FRAGMENT_DIR)/pkgadd \
 $(MAGIC_FRAGMENT_DIR)/plan9 \
 $(MAGIC_FRAGMENT_DIR)/plus5 \
+$(MAGIC_FRAGMENT_DIR)/polyml \
 $(MAGIC_FRAGMENT_DIR)/printer \
 $(MAGIC_FRAGMENT_DIR)/project \
 $(MAGIC_FRAGMENT_DIR)/psdbms \
@@ -519,6 +530,7 @@ $(MAGIC_FRAGMENT_DIR)/unknown \
 $(MAGIC_FRAGMENT_DIR)/uterus \
 $(MAGIC_FRAGMENT_DIR)/uuencode \
 $(MAGIC_FRAGMENT_DIR)/varied.out \
+$(MAGIC_FRAGMENT_DIR)/vacuum-cleaner \
 $(MAGIC_FRAGMENT_DIR)/varied.script \
 $(MAGIC_FRAGMENT_DIR)/vax \
 $(MAGIC_FRAGMENT_DIR)/vicar \
@@ -535,6 +547,7 @@ $(MAGIC_FRAGMENT_DIR)/windows \
 $(MAGIC_FRAGMENT_DIR)/wireless \
 $(MAGIC_FRAGMENT_DIR)/wordprocessors \
 $(MAGIC_FRAGMENT_DIR)/wsdl \
+$(MAGIC_FRAGMENT_DIR)/x68000 \
 $(MAGIC_FRAGMENT_DIR)/xdelta \
 $(MAGIC_FRAGMENT_DIR)/xenix \
 $(MAGIC_FRAGMENT_DIR)/xilinx \
@@ -776,7 +789,7 @@ ${MAGIC}: $(EXTRA_DIST) $(FILE_COMPILE_DEP)
 	@(if expr "${FILE_COMPILE}" : '.*/.*' > /dev/null; then \
 	    echo "Using ${FILE_COMPILE} to generate ${MAGIC}" > /dev/null; \
 	  else \
-	    v=$$(file --version | sed -e s/file-// -e q); \
+	    v=$$(${FILE_COMPILE} --version | sed -e s/file-// -e q); \
 	    if [ "$$v" != "${PACKAGE_VERSION}" ]; then \
 		echo "Cannot use the installed version of file ($$v) to"; \
 		echo "cross-compile file ${PACKAGE_VERSION}"; \

+ 0 - 13
python/README

@@ -1,13 +0,0 @@
-This directory contains Python bindings to allow you to access the
-libmagic api. At the moment their status is "experimental".
-
-You can install the modules either with:
-
-$ python setup.py build
-$ python setup.py install
-
-or, if you have easy_install:
-
-$ easy_install .
-
-magic-python should work now!

+ 73 - 17
python/magic.py

@@ -1,10 +1,13 @@
-#!/usr/bin/env python
+# coding: utf-8
+
 '''
 Python bindings for libmagic
 '''
 
 import ctypes
 
+from collections import namedtuple
+
 from ctypes import *
 from ctypes.util import find_library
 
@@ -32,7 +35,7 @@ MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
 MAGIC_RAW = RAW = 256
 MAGIC_ERROR = ERROR = 512
 MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
-MAGIC_MIME = MIME = 1040
+MAGIC_MIME = MIME = 1040  # MIME_TYPE + MIME_ENCODING
 MAGIC_APPLE = APPLE = 2048
 
 MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
@@ -47,6 +50,8 @@ MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
 
 MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
 
+FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
+
 
 class magic_set(Structure):
     pass
@@ -118,14 +123,18 @@ class Magic(object):
         as a filename or None if an error occurred and the MAGIC_ERROR flag
         is set.  A call to errno() will return the numeric error code.
         """
-        try:  # attempt python3 approach first
-            if isinstance(filename, bytes):
-                bi = filename
-            else:
+        if isinstance(filename, bytes):
+            bi = filename
+        else:
+            try:  # keep Python 2 compatibility
                 bi = bytes(filename, 'utf-8')
-            return str(_file(self._magic_t, bi), 'utf-8')
-        except:
-            return _file(self._magic_t, filename.encode('utf-8'))
+            except TypeError:
+                bi = bytes(filename)
+        r = _file(self._magic_t, bi)
+        if isinstance(r, str):
+            return r
+        else:
+            return str(r).encode('utf-8')
 
     def descriptor(self, fd):
         """
@@ -139,20 +148,22 @@ class Magic(object):
         as a buffer or None if an error occurred and the MAGIC_ERROR flag
         is set. A call to errno() will return the numeric error code.
         """
-        try:  # attempt python3 approach first
-            return str(_buffer(self._magic_t, buf, len(buf)), 'utf-8')
-        except:
-            return _buffer(self._magic_t, buf, len(buf))
+        r = _buffer(self._magic_t, buf, len(buf))
+        if isinstance(r, str):
+            return r
+        else:
+            return str(r).encode('utf-8')
 
     def error(self):
         """
         Returns a textual explanation of the last error or None
         if there was no error.
         """
-        try:  # attempt python3 approach first
-            return str(_error(self._magic_t), 'utf-8')
-        except:
-            return _error(self._magic_t)
+        e = _error(self._magic_t)
+        if isinstance(e, str):
+            return e
+        else:
+            return str(e).encode('utf-8')
 
     def setflags(self, flags):
         """
@@ -219,3 +230,48 @@ def open(flags):
     Flags argument as for setflags.
     """
     return Magic(_open(flags))
+
+
+# Objects used by `detect_from_` functions
+mime_magic = Magic(_open(MAGIC_MIME))
+mime_magic.load()
+none_magic = Magic(_open(MAGIC_NONE))
+none_magic.load()
+
+
+def _create_filemagic(mime_detected, type_detected):
+    mime_type, mime_encoding = mime_detected.split('; ')
+
+    return FileMagic(name=type_detected, mime_type=mime_type,
+                     encoding=mime_encoding.replace('charset=', ''))
+
+
+def detect_from_filename(filename):
+    '''Detect mime type, encoding and file type from a filename
+
+    Returns a `FileMagic` namedtuple.
+    '''
+
+    return _create_filemagic(mime_magic.file(filename),
+                             none_magic.file(filename))
+
+
+def detect_from_fobj(fobj):
+    '''Detect mime type, encoding and file type from file-like object
+
+    Returns a `FileMagic` namedtuple.
+    '''
+
+    file_descriptor = fobj.fileno()
+    return _create_filemagic(mime_magic.descriptor(file_descriptor),
+                             none_magic.descriptor(file_descriptor))
+
+
+def detect_from_content(byte_content):
+    '''Detect mime type, encoding and file type from bytes
+
+    Returns a `FileMagic` namedtuple.
+    '''
+
+    return _create_filemagic(mime_magic.buffer(byte_content),
+                             none_magic.buffer(byte_content))

+ 22 - 10
python/setup.py

@@ -1,10 +1,22 @@
-# Python distutils build script for magic extension
-from distutils.core import setup
-
-setup(name = 'Magic file extensions',
-    version = '0.2',
-    author = 'Reuben Thomas',
-    author_email = 'rrt@sc3d.org',
-    license = 'BSD',
-    description = 'libmagic Python bindings',
-    py_modules = ['magic'])
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from setuptools import setup
+
+
+setup(name='file-magic',
+      version='0.3.0',
+      author='Reuben Thomas, Álvaro Justen',
+      author_email='rrt@sc3d.org, alvarojusten@gmail.com',
+      url='https://github.com/file/file',
+      license='BSD',
+      description='(official) libmagic Python bindings',
+      py_modules=['magic'],
+      test_suite='tests',
+      classifiers = [
+          'Intended Audience :: Developers',
+          'License :: OSI Approved :: BSD License',
+          'Natural Language :: English',
+          'Topic :: Software Development :: Libraries :: Python Modules',
+      ])

+ 2 - 2
src/Makefile.am

@@ -1,6 +1,6 @@
 MAGIC = $(pkgdatadir)/magic
 lib_LTLIBRARIES = libmagic.la
-include_HEADERS = magic.h
+nodist_include_HEADERS = magic.h
 
 bin_PROGRAMS = file
 
@@ -9,7 +9,7 @@ AM_CFLAGS = $(CFLAG_VISIBILITY) @WARNINGS@
 
 libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
 	encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
-	funcs.c file.h readelf.h tar.h apptype.c \
+	funcs.c file.h readelf.h tar.h apptype.c der.c der.h \
 	file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
 libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
 if MINGW

+ 22 - 21
src/Makefile.in

@@ -98,8 +98,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
 	$(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
 	$(ACLOCAL_M4)
-DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \
-	$(am__DIST_COMMON)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
 mkinstalldirs = $(install_sh) -d
 CONFIG_HEADER = $(top_builddir)/config.h
 CONFIG_CLEAN_FILES =
@@ -138,8 +137,8 @@ am__DEPENDENCIES_1 =
 libmagic_la_DEPENDENCIES = $(LTLIBOBJS) $(am__DEPENDENCIES_1)
 am_libmagic_la_OBJECTS = magic.lo apprentice.lo softmagic.lo \
 	ascmagic.lo encoding.lo compress.lo is_tar.lo readelf.lo \
-	print.lo fsmagic.lo funcs.lo apptype.lo cdf.lo cdf_time.lo \
-	readcdf.lo
+	print.lo fsmagic.lo funcs.lo apptype.lo der.lo cdf.lo \
+	cdf_time.lo readcdf.lo
 libmagic_la_OBJECTS = $(am_libmagic_la_OBJECTS)
 AM_V_lt = $(am__v_lt_@AM_V@)
 am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -193,7 +192,7 @@ am__can_run_installinfo = \
     n|no|NO) false;; \
     *) (install-info --version) >/dev/null 2>&1;; \
   esac
-HEADERS = $(include_HEADERS)
+HEADERS = $(nodist_include_HEADERS)
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
 # Read a list of newline-separated strings from the standard input,
 # and print each of them once, without duplicates.  Input order is
@@ -214,9 +213,9 @@ am__define_uniq_tagged_files = \
 ETAGS = etags
 CTAGS = ctags
 am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp \
-	asctime_r.c asprintf.c ctime_r.c fmtcheck.c getline.c \
-	getopt_long.c gmtime_r.c localtime_r.c pread.c strcasestr.c \
-	strlcat.c strlcpy.c vasprintf.c
+	asctime_r.c asprintf.c ctime_r.c dprintf.c fmtcheck.c \
+	getline.c getopt_long.c gmtime_r.c localtime_r.c pread.c \
+	strcasestr.c strlcat.c strlcpy.c vasprintf.c
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 pkgdatadir = @pkgdatadir@
 ACLOCAL = @ACLOCAL@
@@ -340,12 +339,12 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 MAGIC = $(pkgdatadir)/magic
 lib_LTLIBRARIES = libmagic.la
-include_HEADERS = magic.h
+nodist_include_HEADERS = magic.h
 AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"'
 AM_CFLAGS = $(CFLAG_VISIBILITY) @WARNINGS@
 libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
 	encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
-	funcs.c file.h readelf.h tar.h apptype.c \
+	funcs.c file.h readelf.h tar.h apptype.c der.c der.h \
 	file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
 
 libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
@@ -493,6 +492,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/asctime_r.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/asprintf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/ctime_r.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/dprintf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/fmtcheck.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getline.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getopt_long.Plo@am__quote@
@@ -509,6 +509,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdf_time.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compress.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/der.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encoding.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/file.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fsmagic.Plo@am__quote@
@@ -549,9 +550,9 @@ mostlyclean-libtool:
 
 clean-libtool:
 	-rm -rf .libs _libs
-install-includeHEADERS: $(include_HEADERS)
+install-nodist_includeHEADERS: $(nodist_include_HEADERS)
 	@$(NORMAL_INSTALL)
-	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	@list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \
 	if test -n "$$list"; then \
 	  echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
 	  $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
@@ -565,9 +566,9 @@ install-includeHEADERS: $(include_HEADERS)
 	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
 	done
 
-uninstall-includeHEADERS:
+uninstall-nodist_includeHEADERS:
 	@$(NORMAL_UNINSTALL)
-	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	@list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \
 	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
 	dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
 
@@ -719,7 +720,7 @@ info: info-am
 
 info-am:
 
-install-data-am: install-includeHEADERS
+install-data-am: install-nodist_includeHEADERS
 
 install-dvi: install-dvi-am
 
@@ -765,8 +766,8 @@ ps: ps-am
 
 ps-am:
 
-uninstall-am: uninstall-binPROGRAMS uninstall-includeHEADERS \
-	uninstall-libLTLIBRARIES
+uninstall-am: uninstall-binPROGRAMS uninstall-libLTLIBRARIES \
+	uninstall-nodist_includeHEADERS
 
 .MAKE: all check install install-am install-strip
 
@@ -777,15 +778,15 @@ uninstall-am: uninstall-binPROGRAMS uninstall-includeHEADERS \
 	distclean-tags distdir dvi dvi-am html html-am info info-am \
 	install install-am install-binPROGRAMS install-data \
 	install-data-am install-dvi install-dvi-am install-exec \
-	install-exec-am install-html install-html-am \
-	install-includeHEADERS install-info install-info-am \
-	install-libLTLIBRARIES install-man install-pdf install-pdf-am \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-libLTLIBRARIES install-man \
+	install-nodist_includeHEADERS install-pdf install-pdf-am \
 	install-ps install-ps-am install-strip installcheck \
 	installcheck-am installdirs maintainer-clean \
 	maintainer-clean-generic mostlyclean mostlyclean-compile \
 	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
 	tags tags-am uninstall uninstall-am uninstall-binPROGRAMS \
-	uninstall-includeHEADERS uninstall-libLTLIBRARIES
+	uninstall-libLTLIBRARIES uninstall-nodist_includeHEADERS
 
 .PRECIOUS: Makefile
 

+ 82 - 40
src/apprentice.c

@@ -32,7 +32,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.238 2015/09/12 18:10:42 christos Exp $")
+FILE_RCSID("@(#)$File: apprentice.c,v 1.249 2016/05/17 21:43:07 christos Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -86,9 +86,9 @@ FILE_RCSID("@(#)$File: apprentice.c,v 1.238 2015/09/12 18:10:42 christos Exp $")
 #define ALLOC_CHUNK	(size_t)10
 #define ALLOC_INCR	(size_t)200
 
-#define MAP_TYPE_MMAP	0
+#define MAP_TYPE_USER	0
 #define MAP_TYPE_MALLOC	1
-#define MAP_TYPE_USER	2
+#define MAP_TYPE_MMAP	2
 
 struct magic_entry {
 	struct magic *mp;	
@@ -143,7 +143,7 @@ private int check_buffer(struct magic_set *, struct magic_map *, const char *);
 private void apprentice_unmap(struct magic_map *);
 private int apprentice_compile(struct magic_set *, struct magic_map *,
     const char *);
-private int check_format_type(const char *, int);
+private int check_format_type(const char *, int, const char **);
 private int check_format(struct magic_set *, struct magic *);
 private int get_op(char);
 private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
@@ -268,6 +268,7 @@ static const struct type_tbl_s type_tbl[] = {
 	{ XX("name"),		FILE_NAME,		FILE_FMT_NONE },
 	{ XX("use"),		FILE_USE,		FILE_FMT_NONE },
 	{ XX("clear"),		FILE_CLEAR,		FILE_FMT_NONE },
+	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
 };
 
@@ -276,6 +277,7 @@ static const struct type_tbl_s type_tbl[] = {
  * unsigned.
  */
 static const struct type_tbl_s special_tbl[] = {
+	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
 	{ XX("name"),		FILE_NAME,		FILE_FMT_STR },
 	{ XX("use"),		FILE_USE,		FILE_FMT_STR },
 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
@@ -532,6 +534,7 @@ file_ms_alloc(int flags)
 	ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
 	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
 	ms->regex_max = FILE_REGEX_MAX;
+	ms->bytes_max = FILE_BYTES_MAX;
 	return ms;
 free:
 	free(ms);
@@ -546,19 +549,23 @@ apprentice_unmap(struct magic_map *map)
 		return;
 
 	switch (map->type) {
-#ifdef QUICK
-	case MAP_TYPE_MMAP:
-		if (map->p)
-			(void)munmap(map->p, map->len);
+	case MAP_TYPE_USER:
 		break;
-#endif
 	case MAP_TYPE_MALLOC:
-		free(map->p);
-		for (i = 0; i < MAGIC_SETS; i++)
+		for (i = 0; i < MAGIC_SETS; i++) {
+			if ((char *)map->magic[i] >= (char *)map->p &&
+			    (char *)map->magic[i] < (char *)map->p + map->len)
+				continue;
 			free(map->magic[i]);
+		}
+		free(map->p);
 		break;
-	case MAP_TYPE_USER:
+#ifdef QUICK
+	case MAP_TYPE_MMAP:
+		if (map->p && map->p != MAP_FAILED)
+			(void)munmap(map->p, map->len);
 		break;
+#endif
 	default:
 		abort();
 	}
@@ -862,6 +869,10 @@ apprentice_magic_strength(const struct magic *m)
 	case FILE_USE:
 		break;
 
+	case FILE_DER:
+		val += MULT;
+		break;
+
 	default:
 		(void)fprintf(stderr, "Bad type %d\n", m->type);
 		abort();
@@ -1017,6 +1028,7 @@ set_test_type(struct magic *mstart, struct magic *m)
 	case FILE_DOUBLE:
 	case FILE_BEDOUBLE:
 	case FILE_LEDOUBLE:
+	case FILE_DER:
 		mstart->flag |= BINTEST;
 		break;
 	case FILE_STRING:
@@ -1448,6 +1460,7 @@ file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
 		case FILE_NAME:
 		case FILE_USE:
 		case FILE_CLEAR:
+		case FILE_DER:
 			break;
 		default:
 			if (ms->flags & MAGIC_CHECK)
@@ -2103,7 +2116,7 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
 
 	/*
 	 * TODO finish this macro and start using it!
-	 * #define offsetcheck {if (offset > HOWMANY-1) 
+	 * #define offsetcheck {if (offset > ms->bytes_max -1) 
 	 *	magwarn("offset too big"); }
 	 */
 
@@ -2267,7 +2280,7 @@ parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
 
 	return parse_extra(ms, me, line,
 	    CAST(off_t, offsetof(struct magic, apple)),
-	    sizeof(m->apple), "APPLE", "!+-./", 0);
+	    sizeof(m->apple), "APPLE", "!+-./?", 0);
 }
 
 /*
@@ -2298,11 +2311,13 @@ parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
 }
 
 private int
-check_format_type(const char *ptr, int type)
+check_format_type(const char *ptr, int type, const char **estr)
 {
 	int quad = 0, h;
+	size_t len, cnt;
 	if (*ptr == '\0') {
 		/* Missing format string; bad */
+		*estr = "missing format spec";
 		return -1;
 	}
 
@@ -2339,15 +2354,22 @@ check_format_type(const char *ptr, int type)
 			ptr++;
 		if (*ptr == '.')
 			ptr++;
-		while (isdigit((unsigned char)*ptr)) ptr++;
+#define CHECKLEN() do { \
+	for (len = cnt = 0; isdigit((unsigned char)*ptr); ptr++, cnt++) \
+		len = len * 10 + (*ptr - '0'); \
+	if (cnt > 5 || len > 1024) \
+		goto toolong; \
+} while (/*CONSTCOND*/0)
+
+		CHECKLEN();
 		if (*ptr == '.')
 			ptr++;
-		while (isdigit((unsigned char)*ptr)) ptr++;
+		CHECKLEN();
 		if (quad) {
 			if (*ptr++ != 'l')
-				return -1;
+				goto invalid;
 			if (*ptr++ != 'l')
-				return -1;
+				goto invalid;
 		}
 	
 		switch (*ptr++) {
@@ -2361,9 +2383,11 @@ check_format_type(const char *ptr, int type)
 			case 'o':
 			case 'x':
 			case 'X':
-				return h != 0 ? -1 : 0;
+				if (h == 0)
+					return 0;
+				/*FALLTHROUGH*/
 			default:
-				return -1;
+				goto invalid;
 			}
 		
 		/*
@@ -2372,11 +2396,11 @@ check_format_type(const char *ptr, int type)
 		 */
 		case 'h':
 			if (h-- <= 0)
-				return -1;
+				goto invalid;
 			switch (*ptr++) {
 			case 'h':
 				if (h-- <= 0)
-					return -1;
+					goto invalid;
 				switch (*ptr++) {
 				case 'i':
 				case 'd':
@@ -2386,7 +2410,7 @@ check_format_type(const char *ptr, int type)
 				case 'X':
 					return 0;
 				default:
-					return -1;
+					goto invalid;
 				}
 			case 'i':
 			case 'd':
@@ -2394,13 +2418,17 @@ check_format_type(const char *ptr, int type)
 			case 'o':
 			case 'x':
 			case 'X':
-				return h != 0 ? -1 : 0;
+				if (h == 0)
+					return 0;
+				/*FALLTHROUGH*/
 			default:
-				return -1;
+				goto invalid;
 			}
 #endif
 		case 'c':
-			return h != 2 ? -1 : 0;
+			if (h == 2)
+				return 0;
+			goto invalid;
 		case 'i':
 		case 'd':
 		case 'u':
@@ -2408,12 +2436,14 @@ check_format_type(const char *ptr, int type)
 		case 'x':
 		case 'X':
 #ifdef STRICT_FORMAT
-			return h != 0 ? -1 : 0;
+			if (h == 0)
+				return 0;
+			/*FALLTHROUGH*/
 #else
 			return 0;
 #endif
 		default:
-			return -1;
+			goto invalid;
 		}
 		
 	case FILE_FMT_FLOAT:
@@ -2422,11 +2452,10 @@ check_format_type(const char *ptr, int type)
 			ptr++;
 		if (*ptr == '.')
 			ptr++;
-		while (isdigit((unsigned char)*ptr)) ptr++;
+		CHECKLEN();
 		if (*ptr == '.')
 			ptr++;
-		while (isdigit((unsigned char)*ptr)) ptr++;
-	
+		CHECKLEN();
 		switch (*ptr++) {
 		case 'e':
 		case 'E':
@@ -2437,7 +2466,7 @@ check_format_type(const char *ptr, int type)
 			return 0;
 			
 		default:
-			return -1;
+			goto invalid;
 		}
 		
 
@@ -2456,14 +2485,17 @@ check_format_type(const char *ptr, int type)
 		case 's':
 			return 0;
 		default:
-			return -1;
+			goto invalid;
 		}
 		
 	default:
 		/* internal error */
 		abort();
 	}
-	/*NOTREACHED*/
+invalid:
+	*estr = "not valid";
+toolong:
+	*estr = "too long";
 	return -1;
 }
 	
@@ -2475,6 +2507,7 @@ private int
 check_format(struct magic_set *ms, struct magic *m)
 {
 	char *ptr;
+	const char *estr;
 
 	for (ptr = m->desc; *ptr; ptr++)
 		if (*ptr == '%')
@@ -2498,13 +2531,13 @@ check_format(struct magic_set *ms, struct magic *m)
 	}
 
 	ptr++;
-	if (check_format_type(ptr, m->type) == -1) {
+	if (check_format_type(ptr, m->type, &estr) == -1) {
 		/*
 		 * TODO: this error message is unhelpful if the format
 		 * string is not one character long
 		 */
-		file_magwarn(ms, "Printf format `%c' is not valid for type "
-		    "`%s' in description `%s'", *ptr ? *ptr : '?',
+		file_magwarn(ms, "Printf format is %s for type "
+		    "`%s' in description `%s'", estr,
 		    file_names[m->type], m->desc);
 		return -1;
 	}
@@ -2538,6 +2571,7 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
 	case FILE_SEARCH:
 	case FILE_NAME:
 	case FILE_USE:
+	case FILE_DER:
 		*p = getstr(ms, m, *p, action == FILE_COMPILE);
 		if (*p == NULL) {
 			if (ms->flags & MAGIC_CHECK)
@@ -2902,6 +2936,7 @@ apprentice_map(struct magic_set *ms, const char *fn)
 		file_oomem(ms, sizeof(*map));
 		goto error;
 	}
+	map->type = MAP_TYPE_USER;	/* unspecified */
 
 	dbname = mkdbname(ms, fn, 0);
 	if (dbname == NULL)
@@ -2922,13 +2957,14 @@ apprentice_map(struct magic_set *ms, const char *fn)
 
 	map->len = (size_t)st.st_size;
 #ifdef QUICK
+	map->type = MAP_TYPE_MMAP;
 	if ((map->p = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
 	    MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
 		file_error(ms, errno, "cannot map `%s'", dbname);
 		goto error;
 	}
-	map->type = MAP_TYPE_MMAP;
 #else
+	map->type = MAP_TYPE_MALLOC;
 	if ((map->p = CAST(void *, malloc(map->len))) == NULL) {
 		file_oomem(ms, map->len);
 		goto error;
@@ -2937,7 +2973,6 @@ apprentice_map(struct magic_set *ms, const char *fn)
 		file_badread(ms);
 		goto error;
 	}
-	map->type = MAP_TYPE_MALLOC;
 #define RET	1
 #endif
 	(void)close(fd);
@@ -2945,6 +2980,12 @@ apprentice_map(struct magic_set *ms, const char *fn)
 
 	if (check_buffer(ms, map, dbname) != 0)
 		goto error;
+#ifdef QUICK
+	if (mprotect(map->p, (size_t)st.st_size, PROT_READ) == -1) {
+		file_error(ms, errno, "cannot mprotect `%s'", dbname);
+		goto error;
+	}
+#endif
 
 	free(dbname);
 	return map;
@@ -3063,6 +3104,7 @@ apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
 		(void)close(fd);
 	rv = 0;
 out:
+	apprentice_unmap(map);
 	free(dbname);
 	return rv;
 }

+ 8 - 10
src/ascmagic.c

@@ -35,7 +35,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.92 2015/04/09 20:01:41 christos Exp $")
+FILE_RCSID("@(#)$File: ascmagic.c,v 1.95 2016/05/03 16:10:37 christos Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -79,9 +79,6 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes,
 	const char *code_mime = NULL;
 	const char *type = NULL;
 
-	if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
-		return 0;
-
 	nbytes = trim_nuls(buf, nbytes);
 
 	/* If file doesn't look like any sort of text, give up. */
@@ -123,9 +120,6 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
 	size_t last_line_end = (size_t)-1;
 	int has_long_lines = 0;
 
-	if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
-		return 0;
-
 	nbytes = trim_nuls(buf, nbytes);
 
 	/* If we have fewer than 2 bytes, give up. */
@@ -147,10 +141,14 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
 		    == NULL)
 			goto done;
 		if ((rv = file_softmagic(ms, utf8_buf,
-		    (size_t)(utf8_end - utf8_buf), 0, NULL,
+		    (size_t)(utf8_end - utf8_buf), NULL, NULL,
 		    TEXTTEST, text)) == 0)
 			rv = -1;
+		if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)))
+			return rv == -1 ? 0 : 1;
 	}
+	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)))
+		return 0;
 
 	/* Now try to discover other details about the file. */
 	for (i = 0; i < ulen; i++) {
@@ -183,10 +181,10 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
 	}
 
 	/* Beware, if the data has been truncated, the final CR could have
-	   been followed by a LF.  If we have HOWMANY bytes, it indicates
+	   been followed by a LF.  If we have ms->bytes_max bytes, it indicates
 	   that the data might have been truncated, probably even before
 	   this function was called. */
-	if (seen_cr && nbytes < HOWMANY)
+	if (seen_cr && nbytes < ms->bytes_max)
 		n_cr++;
 
 	if (strcmp(type, "binary") == 0) {

+ 80 - 49
src/cdf.c

@@ -35,7 +35,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: cdf.c,v 1.76 2015/02/28 00:18:02 christos Exp $")
+FILE_RCSID("@(#)$File: cdf.c,v 1.82 2016/06/01 22:25:25 christos Exp $")
 #endif
 
 #include <assert.h>
@@ -267,13 +267,32 @@ cdf_unpack_dir(cdf_directory_t *d, char *buf)
 }
 
 static int
+cdf_zero_stream(cdf_stream_t *scn)
+{
+	scn->sst_len = 0;
+	scn->sst_dirlen = 0;
+	scn->sst_ss = 0;
+	free(scn->sst_tab);
+	scn->sst_tab = NULL;
+	return -1;
+}
+
+static size_t
+cdf_check_stream(const cdf_stream_t *sst, const cdf_header_t *h)
+{
+	size_t ss = sst->sst_dirlen < h->h_min_size_standard_stream ?
+	    CDF_SHORT_SEC_SIZE(h) : CDF_SEC_SIZE(h);
+	assert(ss == sst->sst_ss);
+	return sst->sst_ss;
+}
+
+static int
 cdf_check_stream_offset(const cdf_stream_t *sst, const cdf_header_t *h,
     const void *p, size_t tail, int line)
 {
 	const char *b = (const char *)sst->sst_tab;
 	const char *e = ((const char *)p) + tail;
-	size_t ss = sst->sst_dirlen < h->h_min_size_standard_stream ?
-	    CDF_SHORT_SEC_SIZE(h) : CDF_SEC_SIZE(h);
+	size_t ss = cdf_check_stream(sst, h);
 	/*LINTED*/(void)&line;
 	if (e >= b && (size_t)(e - b) <= ss * sst->sst_len)
 		return 0;
@@ -290,10 +309,8 @@ cdf_read(const cdf_info_t *info, off_t off, void *buf, size_t len)
 {
 	size_t siz = (size_t)off + len;
 
-	if ((off_t)(off + len) != (off_t)siz) {
-		errno = EINVAL;
-		return -1;
-	}
+	if ((off_t)(off + len) != (off_t)siz)
+		goto out;
 
 	if (info->i_buf != NULL && info->i_len >= siz) {
 		(void)memcpy(buf, &info->i_buf[off], len);
@@ -301,12 +318,15 @@ cdf_read(const cdf_info_t *info, off_t off, void *buf, size_t len)
 	}
 
 	if (info->i_fd == -1)
-		return -1;
+		goto out;
 
 	if (pread(info->i_fd, buf, len, off) != (ssize_t)len)
 		return -1;
 
 	return (ssize_t)len;
+out:
+	errno = EINVAL;
+	return -1;
 }
 
 int
@@ -363,11 +383,14 @@ cdf_read_short_sector(const cdf_stream_t *sst, void *buf, size_t offs,
 		DPRINTF(("Out of bounds read %" SIZE_T_FORMAT "u > %"
 		    SIZE_T_FORMAT "u\n",
 		    pos + len, CDF_SEC_SIZE(h) * sst->sst_len));
-		return -1;
+		goto out;
 	}
 	(void)memcpy(((char *)buf) + offs,
 	    ((const char *)sst->sst_tab) + pos, len);
 	return len;
+out:
+	errno = EFTYPE;
+	return -1;
 }
 
 /*
@@ -421,8 +444,7 @@ cdf_read_sat(const cdf_info_t *info, cdf_header_t *h, cdf_sat_t *sat)
 			goto out;
 		if (j >= CDF_LOOP_LIMIT) {
 			DPRINTF(("Reading master sector loop limit"));
-			errno = EFTYPE;
-			goto out2;
+			goto out3;
 		}
 		if (cdf_read_sector(info, msa, 0, ss, h, mid) != (ssize_t)ss) {
 			DPRINTF(("Reading master sector %d", mid));
@@ -435,8 +457,7 @@ cdf_read_sat(const cdf_info_t *info, cdf_header_t *h, cdf_sat_t *sat)
 			if (i >= sat->sat_len) {
 			    DPRINTF(("Out of bounds reading MSA %" SIZE_T_FORMAT
 				"u >= %" SIZE_T_FORMAT "u", i, sat->sat_len));
-			    errno = EFTYPE;
-			    goto out2;
+			    goto out3;
 			}
 			if (cdf_read_sector(info, sat->sat_tab, ss * i, ss, h,
 			    sec) != (ssize_t)ss) {
@@ -451,6 +472,8 @@ out:
 	sat->sat_len = i;
 	free(msa);
 	return 0;
+out3:
+	errno = EFTYPE;
 out2:
 	free(msa);
 out1:
@@ -476,23 +499,24 @@ cdf_count_chain(const cdf_sat_t *sat, cdf_secid_t sid, size_t size)
 		DPRINTF((" %d", sid));
 		if (j >= CDF_LOOP_LIMIT) {
 			DPRINTF(("Counting chain loop limit"));
-			errno = EFTYPE;
-			return (size_t)-1;
+			goto out;
 		}
 		if (sid >= maxsector) {
 			DPRINTF(("Sector %d >= %d\n", sid, maxsector));
-			errno = EFTYPE;
-			return (size_t)-1;
+			goto out;
 		}
 		sid = CDF_TOLE4((uint32_t)sat->sat_tab[sid]);
 	}
 	if (i == 0) {
 		DPRINTF((" none, sid: %d\n", sid));
-		return (size_t)-1;
+		goto out;
 
 	}
 	DPRINTF(("\n"));
 	return i;
+out:
+	errno = EFTYPE;
+	return (size_t)-1;
 }
 
 int
@@ -501,27 +525,27 @@ cdf_read_long_sector_chain(const cdf_info_t *info, const cdf_header_t *h,
 {
 	size_t ss = CDF_SEC_SIZE(h), i, j;
 	ssize_t nr;
+	scn->sst_tab = NULL;
 	scn->sst_len = cdf_count_chain(sat, sid, ss);
 	scn->sst_dirlen = len;
+	scn->sst_ss = ss;
 
 	if (scn->sst_len == (size_t)-1)
-		return -1;
+		goto out;
 
 	scn->sst_tab = calloc(scn->sst_len, ss);
 	if (scn->sst_tab == NULL)
-		return -1;
+		return cdf_zero_stream(scn);
 
 	for (j = i = 0; sid >= 0; i++, j++) {
 		if (j >= CDF_LOOP_LIMIT) {
 			DPRINTF(("Read long sector chain loop limit"));
-			errno = EFTYPE;
 			goto out;
 		}
 		if (i >= scn->sst_len) {
 			DPRINTF(("Out of bounds reading long sector chain "
 			    "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", i,
 			    scn->sst_len));
-			errno = EFTYPE;
 			goto out;
 		}
 		if ((nr = cdf_read_sector(info, scn->sst_tab, i * ss, ss, h,
@@ -537,8 +561,8 @@ cdf_read_long_sector_chain(const cdf_info_t *info, const cdf_header_t *h,
 	}
 	return 0;
 out:
-	free(scn->sst_tab);
-	return -1;
+	errno = EFTYPE;
+	return cdf_zero_stream(scn);
 }
 
 int
@@ -547,27 +571,27 @@ cdf_read_short_sector_chain(const cdf_header_t *h,
     cdf_secid_t sid, size_t len, cdf_stream_t *scn)
 {
 	size_t ss = CDF_SHORT_SEC_SIZE(h), i, j;
+	scn->sst_tab = NULL;
 	scn->sst_len = cdf_count_chain(ssat, sid, CDF_SEC_SIZE(h));
 	scn->sst_dirlen = len;
+	scn->sst_ss = ss;
 
-	if (sst->sst_tab == NULL || scn->sst_len == (size_t)-1)
-		return -1;
+	if (scn->sst_len == (size_t)-1)
+		goto out;
 
 	scn->sst_tab = calloc(scn->sst_len, ss);
 	if (scn->sst_tab == NULL)
-		return -1;
+		return cdf_zero_stream(scn);
 
 	for (j = i = 0; sid >= 0; i++, j++) {
 		if (j >= CDF_LOOP_LIMIT) {
 			DPRINTF(("Read short sector chain loop limit"));
-			errno = EFTYPE;
 			goto out;
 		}
 		if (i >= scn->sst_len) {
 			DPRINTF(("Out of bounds reading short sector chain "
 			    "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n",
 			    i, scn->sst_len));
-			errno = EFTYPE;
 			goto out;
 		}
 		if (cdf_read_short_sector(sst, scn->sst_tab, i * ss, ss, h,
@@ -579,8 +603,8 @@ cdf_read_short_sector_chain(const cdf_header_t *h,
 	}
 	return 0;
 out:
-	free(scn->sst_tab);
-	return -1;
+	errno = EFTYPE;
+	return cdf_zero_stream(scn);
 }
 
 int
@@ -625,7 +649,6 @@ cdf_read_dir(const cdf_info_t *info, const cdf_header_t *h,
 	for (j = i = 0; i < ns; i++, j++) {
 		if (j >= CDF_LOOP_LIMIT) {
 			DPRINTF(("Read dir loop limit"));
-			errno = EFTYPE;
 			goto out;
 		}
 		if (cdf_read_sector(info, buf, 0, ss, h, sid) != (ssize_t)ss) {
@@ -646,6 +669,7 @@ cdf_read_dir(const cdf_info_t *info, const cdf_header_t *h,
 out:
 	free(dir->dir_tab);
 	free(buf);
+	errno = EFTYPE;
 	return -1;
 }
 
@@ -658,36 +682,37 @@ cdf_read_ssat(const cdf_info_t *info, const cdf_header_t *h,
 	size_t ss = CDF_SEC_SIZE(h);
 	cdf_secid_t sid = h->h_secid_first_sector_in_short_sat;
 
-	ssat->sat_len = cdf_count_chain(sat, sid, CDF_SEC_SIZE(h));
+	ssat->sat_tab = NULL;
+	ssat->sat_len = cdf_count_chain(sat, sid, ss);
 	if (ssat->sat_len == (size_t)-1)
-		return -1;
+		goto out;
 
 	ssat->sat_tab = CAST(cdf_secid_t *, calloc(ssat->sat_len, ss));
 	if (ssat->sat_tab == NULL)
-		return -1;
+		goto out1;
 
 	for (j = i = 0; sid >= 0; i++, j++) {
 		if (j >= CDF_LOOP_LIMIT) {
 			DPRINTF(("Read short sat sector loop limit"));
-			errno = EFTYPE;
 			goto out;
 		}
 		if (i >= ssat->sat_len) {
 			DPRINTF(("Out of bounds reading short sector chain "
 			    "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", i,
 			    ssat->sat_len));
-			errno = EFTYPE;
 			goto out;
 		}
 		if (cdf_read_sector(info, ssat->sat_tab, i * ss, ss, h, sid) !=
 		    (ssize_t)ss) {
 			DPRINTF(("Reading short sat sector %d", sid));
-			goto out;
+			goto out1;
 		}
 		sid = CDF_TOLE4((uint32_t)sat->sat_tab[sid]);
 	}
 	return 0;
 out:
+	errno = EFTYPE;
+out1:
 	free(ssat->sat_tab);
 	return -1;
 }
@@ -715,13 +740,13 @@ cdf_read_short_stream(const cdf_info_t *info, const cdf_header_t *h,
 	if (d->d_stream_first_sector < 0)
 		goto out;
 
-	return	cdf_read_long_sector_chain(info, h, sat,
+	return cdf_read_long_sector_chain(info, h, sat,
 	    d->d_stream_first_sector, d->d_size, scn);
 out:
 	scn->sst_tab = NULL;
-	scn->sst_len = 0;
-	scn->sst_dirlen = 0;
-	return 0;
+	(void)cdf_zero_stream(scn);
+	errno = EFTYPE;
+	return -1;
 }
 
 static int
@@ -750,8 +775,10 @@ cdf_read_user_stream(const cdf_info_t *info, const cdf_header_t *h,
 	const cdf_directory_t *d;
 	int i = cdf_find_stream(dir, name, CDF_DIR_TYPE_USER_STREAM);
 
-	if (i <= 0)
+	if (i <= 0) {
+		memset(scn, 0, sizeof(*scn));
 		return -1;
+	}
 
 	d = &dir->dir_tab[i - 1];
 	return cdf_read_sector_chain(info, h, sat, ssat, sst,
@@ -824,7 +851,7 @@ cdf_read_property_info(const cdf_stream_t *sst, const cdf_header_t *h,
 		    malloc(*maxcount * sizeof(*inp)));
 	}
 	if (inp == NULL)
-		goto out;
+		goto out1;
 	*info = inp;
 	inp += *count;
 	*count += sh.sh_properties;
@@ -931,7 +958,7 @@ cdf_read_property_info(const cdf_stream_t *sst, const cdf_header_t *h,
 				inp = CAST(cdf_property_info_t *,
 				    realloc(*info, *maxcount * sizeof(*inp)));
 				if (inp == NULL)
-					goto out;
+					goto out1;
 				*info = inp;
 				inp = *info + nelem;
 			}
@@ -976,6 +1003,8 @@ cdf_read_property_info(const cdf_stream_t *sst, const cdf_header_t *h,
 	}
 	return 0;
 out:
+	errno = EFTYPE;
+out1:
 	free(*info);
 	return -1;
 }
@@ -1022,8 +1051,7 @@ int
 cdf_unpack_catalog(const cdf_header_t *h, const cdf_stream_t *sst,
     cdf_catalog_t **cat)
 {
-	size_t ss = sst->sst_dirlen < h->h_min_size_standard_stream ?
-	    CDF_SHORT_SEC_SIZE(h) : CDF_SEC_SIZE(h);
+	size_t ss = cdf_check_stream(sst, h);
 	const char *b = CAST(const char *, sst->sst_tab);
 	const char *eb = b + ss * sst->sst_len;
 	size_t nr, i, j, k;
@@ -1040,9 +1068,13 @@ cdf_unpack_catalog(const cdf_header_t *h, const cdf_stream_t *sst,
 		if (b > eb)
 		    break;
 	}
+	if (nr == 0)
+		return -1;
 	nr--;
 	*cat = CAST(cdf_catalog_t *,
 	    malloc(sizeof(cdf_catalog_t) + nr * sizeof(*ce)));
+	if (*cat == NULL)
+		return -1;
 	ce = (*cat)->cat_e;
 	memset(ce, 0, nr * sizeof(*ce));
 	b = CAST(const char *, sst->sst_tab);
@@ -1245,8 +1277,7 @@ cdf_dump(const void *v, size_t len)
 void
 cdf_dump_stream(const cdf_header_t *h, const cdf_stream_t *sst)
 {
-	size_t ss = sst->sst_dirlen < h->h_min_size_standard_stream ?
-	    CDF_SHORT_SEC_SIZE(h) : CDF_SEC_SIZE(h);
+	size_t ss = sst->sst_ss;
 	cdf_dump(sst->sst_tab, ss * sst->sst_len);
 }
 

+ 2 - 1
src/cdf.h

@@ -129,6 +129,7 @@ typedef struct {
 	void *sst_tab;
 	size_t sst_len;
 	size_t sst_dirlen;
+	size_t sst_ss;
 } cdf_stream_t;
 
 typedef struct {
@@ -277,7 +278,7 @@ typedef struct {
 
 typedef struct {
 	size_t cat_num;
-	cdf_catalog_entry_t cat_e[0];
+	cdf_catalog_entry_t cat_e[1];
 } cdf_catalog_t;
 
 struct timespec;

+ 407 - 193
src/compress.c

@@ -35,7 +35,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: compress.c,v 1.80 2015/06/03 18:21:24 christos Exp $")
+FILE_RCSID("@(#)$File: compress.c,v 1.97 2016/05/13 23:02:28 christos Exp $")
 #endif
 
 #include "magic.h"
@@ -45,6 +45,8 @@ FILE_RCSID("@(#)$File: compress.c,v 1.80 2015/06/03 18:21:24 christos Exp $")
 #endif
 #include <string.h>
 #include <errno.h>
+#include <ctype.h>
+#include <stdarg.h>
 #ifdef HAVE_SIGNAL_H
 #include <signal.h>
 # ifndef HAVE_SIG_T
@@ -60,46 +62,121 @@ typedef void (*sig_t)(int);
 #if defined(HAVE_SYS_TIME_H)
 #include <sys/time.h>
 #endif
-#if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
+#if defined(HAVE_ZLIB_H)
 #define BUILTIN_DECOMPRESS
 #include <zlib.h>
 #endif
+#ifdef DEBUG
+int tty = -1;
+#define DPRINTF(...)	do { \
+	if (tty == -1) \
+		tty = open("/dev/tty", O_RDWR); \
+	if (tty == -1) \
+		abort(); \
+	dprintf(tty, __VA_ARGS__); \
+} while (/*CONSTCOND*/0)
+#else
+#define DPRINTF(...)
+#endif
+
+#ifdef ZLIBSUPPORT
+/*
+ * The following python code is not really used because ZLIBSUPPORT is only
+ * defined if we have a built-in zlib, and the built-in zlib handles that.
+ */
+static const char zlibcode[] =
+    "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
+
+static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
+
+static int
+zlibcmp(const unsigned char *buf)
+{
+	unsigned short x = 1;
+	unsigned char *s = (unsigned char *)&x;
+
+	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
+		return 0;
+	if (s[0] != 1)	/* endianness test */
+		x = buf[0] | (buf[1] << 8);
+	else
+		x = buf[1] | (buf[0] << 8);
+	if (x % 31)
+		return 0;
+	return 1;
+}
+#endif
+
+#define gzip_flags "-cd"
+#define lrzip_flags "-do"
+#define lzip_flags gzip_flags
+
+static const char *gzip_args[] = {
+	"gzip", gzip_flags, NULL
+};
+static const char *uncompress_args[] = {
+	"uncompress", "-c", NULL
+};
+static const char *bzip2_args[] = {
+	"bzip2", "-cd", NULL
+};
+static const char *lzip_args[] = {
+	"lzip", lzip_flags, NULL
+};
+static const char *xz_args[] = {
+	"xz", "-cd", NULL
+};
+static const char *lrzip_args[] = {
+	"lrzip", lrzip_flags, NULL
+};
+static const char *lz4_args[] = {
+	"lz4", "-cd", NULL
+};
 
 private const struct {
-	const char magic[8];
+	const void *magic;
 	size_t maglen;
-	const char *argv[3];
-	int silent;
+	const char **argv;
 } compr[] = {
-	{ "\037\235", 2, { "gzip", "-cdq", NULL }, 1 },		/* compressed */
+	{ "\037\235",	2, gzip_args },		/* compressed */
 	/* Uncompress can get stuck; so use gzip first if we have it
 	 * Idea from Damien Clark, thanks! */
-	{ "\037\235", 2, { "uncompress", "-c", NULL }, 1 },	/* compressed */
-	{ "\037\213", 2, { "gzip", "-cdq", NULL }, 1 },		/* gzipped */
-	{ "\037\236", 2, { "gzip", "-cdq", NULL }, 1 },		/* frozen */
-	{ "\037\240", 2, { "gzip", "-cdq", NULL }, 1 },		/* SCO LZH */
+	{ "\037\235",	2, uncompress_args },	/* compressed */
+	{ "\037\213",	2, gzip_args },		/* gzipped */
+	{ "\037\236",	2, gzip_args },		/* frozen */
+	{ "\037\240",	2, gzip_args },		/* SCO LZH */
 	/* the standard pack utilities do not accept standard input */
-	{ "\037\036", 2, { "gzip", "-cdq", NULL }, 0 },		/* packed */
-	{ "PK\3\4",   4, { "gzip", "-cdq", NULL }, 1 },		/* pkzipped, */
-					    /* ...only first file examined */
-	{ "BZh",      3, { "bzip2", "-cd", NULL }, 1 },		/* bzip2-ed */
-	{ "LZIP",     4, { "lzip", "-cdq", NULL }, 1 },
- 	{ "\3757zXZ\0",6,{ "xz", "-cd", NULL }, 1 },		/* XZ Utils */
- 	{ "LRZI",     4, { "lrzip", "-dqo-", NULL }, 1 },	/* LRZIP */
- 	{ "\004\"M\030", 4, { "lz4", "-cd", NULL }, 1 },	/* LZ4 */
+	{ "\037\036",	2, gzip_args },		/* packed */
+	{ "PK\3\4",	4, gzip_args },		/* pkzipped, */
+	/* ...only first file examined */
+	{ "BZh",	3, bzip2_args },	/* bzip2-ed */
+	{ "LZIP",	4, lzip_args },		/* lzip-ed */
+ 	{ "\3757zXZ\0",	6, xz_args },		/* XZ Utils */
+ 	{ "LRZI",	4, lrzip_args },	/* LRZIP */
+ 	{ "\004\"M\030",4, lz4_args },		/* LZ4 */
+#ifdef ZLIBSUPPORT
+	{ zlibcmp,	0, zlib_args },		/* zlib */
+#endif
 };
 
-#define NODATA ((size_t)~0)
+#define OKDATA 	0
+#define NODATA	1
+#define ERRDATA	2
 
 private ssize_t swrite(int, const void *, size_t);
 #if HAVE_FORK
 private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
-private size_t uncompressbuf(struct magic_set *, int, size_t,
-    const unsigned char *, unsigned char **, size_t);
+private int uncompressbuf(int, size_t, size_t, const unsigned char *,
+    unsigned char **, size_t *);
 #ifdef BUILTIN_DECOMPRESS
-private size_t uncompressgzipped(struct magic_set *, const unsigned char *,
-    unsigned char **, size_t);
+private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
+    size_t *, int);
+private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
+    size_t *);
 #endif
+static int makeerror(unsigned char **, size_t *, const char *, ...)
+    __attribute__((__format__(__printf__, 3, 4)));
+private const char *methodname(size_t);
 
 protected int
 file_zmagic(struct magic_set *ms, int fd, const char *name,
@@ -107,7 +184,9 @@ file_zmagic(struct magic_set *ms, int fd, const char *name,
 {
 	unsigned char *newbuf = NULL;
 	size_t i, nsz;
-	int rv = 0;
+	char *rbuf;
+	file_pushbuf_t *pb;
+	int urv, prv, rv = 0;
 	int mime = ms->flags & MAGIC_MIME;
 #ifdef HAVE_SIGNAL_H
 	sig_t osigpipe;
@@ -120,37 +199,80 @@ file_zmagic(struct magic_set *ms, int fd, const char *name,
 	osigpipe = signal(SIGPIPE, SIG_IGN);
 #endif
 	for (i = 0; i < ncompr; i++) {
+		int zm;
 		if (nbytes < compr[i].maglen)
 			continue;
-		if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
-		    (nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
-		    nbytes)) != NODATA) {
+#ifdef ZLIBSUPPORT
+		if (compr[i].maglen == 0)
+			zm = (CAST(int (*)(const unsigned char *),
+			    CCAST(void *, compr[i].magic)))(buf);
+		else
+#endif
+			zm = memcmp(buf, compr[i].magic, compr[i].maglen) == 0;
+
+		if (!zm)
+			continue;
+		nsz = nbytes;
+		urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
+		DPRINTF("uncompressbuf = %d, %s, %zu\n", urv, (char *)newbuf,
+		    nsz);
+		switch (urv) {
+		case OKDATA:
+		case ERRDATA:
+			
 			ms->flags &= ~MAGIC_COMPRESS;
-			rv = -1;
-			if (file_buffer(ms, -1, name, newbuf, nsz) == -1)
+			if (urv == ERRDATA)
+				prv = file_printf(ms, "%s ERROR: %s",
+				    methodname(i), newbuf);
+			else
+				prv = file_buffer(ms, -1, name, newbuf, nsz);
+			if (prv == -1)
 				goto error;
-
-			if ((ms->flags & MAGIC_COMPRESS_TRANSP) == 0 &&
-			    (mime == MAGIC_MIME || mime == 0)) {
-				if (file_printf(ms, mime ?
-				    " compressed-encoding=" : " (") == -1)
-					goto error;
-				if (file_buffer(ms, -1, NULL, buf, nbytes) == -1)
-					goto error;
-				if (!mime && file_printf(ms, ")") == -1)
+			rv = 1;
+			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
+				goto out;
+			if (mime != MAGIC_MIME && mime != 0)
+				goto out;
+			if ((file_printf(ms,
+			    mime ? " compressed-encoding=" : " (")) == -1)
+				goto error;
+			if ((pb = file_push_buffer(ms)) == NULL)
+				goto error;
+			/*
+			 * XXX: If file_buffer fails here, we overwrite
+			 * the compressed text. FIXME.
+			 */
+			if (file_buffer(ms, -1, NULL, buf, nbytes) == -1)
+				goto error;
+			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
+				if (file_printf(ms, "%s", rbuf) == -1) {
+					free(rbuf);
 					goto error;
+				}
+				free(rbuf);
 			}
-
-			rv = 1;
+			if (!mime && file_printf(ms, ")") == -1)
+				goto error;
+			/*FALLTHROUGH*/
+		case NODATA:
+			break;
+		default:
+			abort();
+			/*NOTREACHED*/
+		error:
+			rv = -1;
 			break;
 		}
 	}
-error:
+out:
+	DPRINTF("rv = %d\n", rv);
+
 #ifdef HAVE_SIGNAL_H
 	(void)signal(SIGPIPE, osigpipe);
 #endif
 	free(newbuf);
 	ms->flags |= MAGIC_COMPRESS;
+	DPRINTF("Zmagic returns %d\n", rv);
 	return rv;
 }
 #endif
@@ -322,222 +444,314 @@ file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
 #define FNAME		(1 << 3)
 #define FCOMMENT	(1 << 4)
 
-private size_t
-uncompressgzipped(struct magic_set *ms, const unsigned char *old,
-    unsigned char **newch, size_t n)
+
+private int
+uncompressgzipped(const unsigned char *old, unsigned char **newch,
+    size_t bytes_max, size_t *n)
 {
 	unsigned char flg = old[3];
 	size_t data_start = 10;
-	z_stream z;
-	int rc;
 
 	if (flg & FEXTRA) {
-		if (data_start+1 >= n)
-			return 0;
+		if (data_start + 1 >= *n)
+			goto err;
 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
 	}
 	if (flg & FNAME) {
-		while(data_start < n && old[data_start])
+		while(data_start < *n && old[data_start])
 			data_start++;
 		data_start++;
 	}
-	if(flg & FCOMMENT) {
-		while(data_start < n && old[data_start])
+	if (flg & FCOMMENT) {
+		while(data_start < *n && old[data_start])
 			data_start++;
 		data_start++;
 	}
-	if(flg & FHCRC)
+	if (flg & FHCRC)
 		data_start += 2;
 
-	if (data_start >= n)
-		return 0;
-	if ((*newch = CAST(unsigned char *, malloc(HOWMANY + 1))) == NULL) {
-		return 0;
-	}
-	
-	/* XXX: const castaway, via strchr */
-	z.next_in = (Bytef *)strchr((const char *)old + data_start,
-	    old[data_start]);
-	z.avail_in = CAST(uint32_t, (n - data_start));
+	if (data_start >= *n)
+		goto err;
+
+	*n -= data_start;
+	old += data_start;
+	return uncompresszlib(old, newch, bytes_max, n, 0);
+err:
+	return makeerror(newch, n, "File too short");
+}
+
+private int
+uncompresszlib(const unsigned char *old, unsigned char **newch,
+    size_t bytes_max, size_t *n, int zlib)
+{
+	int rc;
+	z_stream z;
+
+	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) 
+		return makeerror(newch, n, "No buffer, %s", strerror(errno));
+
+	z.next_in = CCAST(Bytef *, old);
+	z.avail_in = CAST(uint32_t, *n);
 	z.next_out = *newch;
-	z.avail_out = HOWMANY;
+	z.avail_out = bytes_max;
 	z.zalloc = Z_NULL;
 	z.zfree = Z_NULL;
 	z.opaque = Z_NULL;
 
 	/* LINTED bug in header macro */
-	rc = inflateInit2(&z, -15);
-	if (rc != Z_OK) {
-		file_error(ms, 0, "zlib: %s", z.msg);
-		return 0;
-	}
+	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
+	if (rc != Z_OK)
+		goto err;
 
 	rc = inflate(&z, Z_SYNC_FLUSH);
-	if (rc != Z_OK && rc != Z_STREAM_END) {
-		file_error(ms, 0, "zlib: %s", z.msg);
-		return 0;
-	}
+	if (rc != Z_OK && rc != Z_STREAM_END)
+		goto err;
 
-	n = (size_t)z.total_out;
-	(void)inflateEnd(&z);
+	*n = (size_t)z.total_out;
+	rc = inflateEnd(&z);
+	if (rc != Z_OK)
+		goto err;
 	
 	/* let's keep the nul-terminate tradition */
-	(*newch)[n] = '\0';
+	(*newch)[*n] = '\0';
 
-	return n;
+	return OKDATA;
+err:
+	strlcpy((char *)*newch, z.msg, bytes_max);
+	*n = strlen((char *)*newch);
+	return ERRDATA;
 }
 #endif
 
-private size_t
-uncompressbuf(struct magic_set *ms, int fd, size_t method,
-    const unsigned char *old, unsigned char **newch, size_t n)
+static int
+makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
+{
+	char *msg;
+	va_list ap;
+	int rv;
+
+	va_start(ap, fmt);
+	rv = vasprintf(&msg, fmt, ap);
+	va_end(ap);
+	if (rv < 0) {
+		*buf = NULL;
+		*len = 0;
+		return NODATA;
+	}
+	*buf = (unsigned char *)msg;
+	*len = strlen(msg);
+	return ERRDATA;
+}
+
+static void
+closefd(int *fd, size_t i)
+{
+	if (fd[i] == -1)
+		return;
+	(void) close(fd[i]);
+	fd[i] = -1;
+}
+
+static void
+closep(int *fd)
+{
+	size_t i;
+	for (i = 0; i < 2; i++)
+		closefd(fd, i);
+}
+
+static void
+copydesc(int i, int *fd)
+{
+	int j = fd[i == STDIN_FILENO ? 0 : 1];
+	if (j == i)
+		return;
+	if (dup2(j, i) == -1) {
+		DPRINTF("dup(%d, %d) failed (%s)\n", j, i, strerror(errno));
+		exit(1);
+	}
+	closep(fd);
+}
+
+static void
+writechild(int fdp[3][2], const void *old, size_t n)
 {
-	int fdin[2], fdout[2];
 	int status;
+
+	closefd(fdp[STDIN_FILENO], 0);
+	/* 
+	 * fork again, to avoid blocking because both
+	 * pipes filled
+	 */
+	switch (fork()) {
+	case 0: /* child */
+		closefd(fdp[STDOUT_FILENO], 0);
+		if (swrite(fdp[STDIN_FILENO][1], old, n) != (ssize_t)n) {
+			DPRINTF("Write failed (%s)\n", strerror(errno));
+			exit(1);
+		}
+		exit(0);
+		/*NOTREACHED*/
+
+	case -1:
+		DPRINTF("Fork failed (%s)\n", strerror(errno));
+		exit(1);
+		/*NOTREACHED*/
+
+	default:  /* parent */
+		if (wait(&status) == -1) {
+			DPRINTF("Wait failed (%s)\n", strerror(errno));
+			exit(1);
+		}
+		DPRINTF("Grandchild wait return %#x\n", status);
+	}
+	closefd(fdp[STDIN_FILENO], 1);
+}
+
+static ssize_t
+filter_error(unsigned char *ubuf, ssize_t n)
+{
+	char *p;
+	char *buf;
+
+	ubuf[n] = '\0';
+	buf = (char *)ubuf;
+	while (isspace((unsigned char)*buf))
+		buf++;
+	DPRINTF("Filter error[[[%s]]]\n", buf);
+	if ((p = strchr((char *)buf, '\n')) != NULL)
+		*p = '\0';
+	if ((p = strchr((char *)buf, ';')) != NULL)
+		*p = '\0';
+	if ((p = strrchr((char *)buf, ':')) != NULL) {
+		++p;
+		while (isspace((unsigned char)*p))
+			p++;
+		n = strlen(p);
+		memmove(ubuf, p, n + 1);
+	}
+	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
+	if (islower(*ubuf))
+		*ubuf = toupper(*ubuf);
+	return n;
+}
+
+private const char *
+methodname(size_t method)
+{
+#ifdef BUILTIN_DECOMPRESS
+        /* FIXME: This doesn't cope with bzip2 */
+	if (method == 2 || compr[method].maglen == 0)
+	    return "zlib";
+#endif
+	return compr[method].argv[0];
+}
+
+private int
+uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
+    unsigned char **newch, size_t* n)
+{
+	int fdp[3][2];
+	int status, rv;
+	size_t i;
 	ssize_t r;
 
 #ifdef BUILTIN_DECOMPRESS
         /* FIXME: This doesn't cope with bzip2 */
 	if (method == 2)
-		return uncompressgzipped(ms, old, newch, n);
+		return uncompressgzipped(old, newch, bytes_max, n);
+	if (compr[method].maglen == 0)
+		return uncompresszlib(old, newch, bytes_max, n, 1);
 #endif
 	(void)fflush(stdout);
 	(void)fflush(stderr);
 
-	if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
-		file_error(ms, errno, "cannot create pipe");	
-		return NODATA;
+	for (i = 0; i < __arraycount(fdp); i++)
+		fdp[i][0] = fdp[i][1] = -1;
+
+	if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
+	    pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
+		closep(fdp[STDIN_FILENO]);
+		closep(fdp[STDOUT_FILENO]);
+		return makeerror(newch, n, "Cannot create pipe, %s",
+		    strerror(errno));
 	}
 	switch (fork()) {
 	case 0:	/* child */
-		(void) close(0);
 		if (fd != -1) {
-		    if (dup(fd) == -1)
-			_exit(1);
-		    (void) lseek(0, (off_t)0, SEEK_SET);
-		} else {
-		    if (dup(fdin[0]) == -1)
-			_exit(1);
-		    (void) close(fdin[0]);
-		    (void) close(fdin[1]);
+			fdp[STDIN_FILENO][0] = fd;
+			(void) lseek(fd, (off_t)0, SEEK_SET);
 		}
-
-		(void) close(1);
-		if (dup(fdout[1]) == -1)
-			_exit(1);
-		(void) close(fdout[0]);
-		(void) close(fdout[1]);
-#ifndef DEBUG
-		if (compr[method].silent)
-			(void)close(2);
-#endif
+		
+		for (i = 0; i < __arraycount(fdp); i++)
+			copydesc(i, fdp[i]);
 
 		(void)execvp(compr[method].argv[0],
 		    (char *const *)(intptr_t)compr[method].argv);
-#ifdef DEBUG
-		(void)fprintf(stderr, "exec `%s' failed (%s)\n",
+		dprintf(STDERR_FILENO, "exec `%s' failed, %s", 
 		    compr[method].argv[0], strerror(errno));
-#endif
 		exit(1);
 		/*NOTREACHED*/
 	case -1:
-		file_error(ms, errno, "could not fork");
-		return NODATA;
+		return makeerror(newch, n, "Cannot fork, %s",
+		    strerror(errno));
 
 	default: /* parent */
-		(void) close(fdout[1]);
-		if (fd == -1) {
-			(void) close(fdin[0]);
-			/* 
-			 * fork again, to avoid blocking because both
-			 * pipes filled
-			 */
-			switch (fork()) {
-			case 0: /* child */
-				(void)close(fdout[0]);
-				if (swrite(fdin[1], old, n) != (ssize_t)n) {
-#ifdef DEBUG
-					(void)fprintf(stderr,
-					    "Write failed (%s)\n",
-					    strerror(errno));
-#endif
-					exit(1);
-				}
-				exit(0);
-				/*NOTREACHED*/
+		for (i = 1; i < __arraycount(fdp); i++)
+			closefd(fdp[i], 1);
 
-			case -1:
-#ifdef DEBUG
-				(void)fprintf(stderr, "Fork failed (%s)\n",
-				    strerror(errno));
-#endif
-				exit(1);
-				/*NOTREACHED*/
+		/* Write the buffer data to the child, if we don't have fd */
+		if (fd == -1)
+			writechild(fdp, old, *n);
 
-			default:  /* parent */
-				if (wait(&status) == -1) {
-#ifdef DEBUG
-					(void)fprintf(stderr,
-					    "Wait failed (%s)\n",
-					    strerror(errno));
-#endif
-					exit(1);
-				}
-				exit(WIFEXITED(status) ?
-				    WEXITSTATUS(status) : 1);
-				/*NOTREACHED*/
-			}
-			(void) close(fdin[1]);
-			fdin[1] = -1;
-		}
-
-		if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) {
-#ifdef DEBUG
-			(void)fprintf(stderr, "Malloc failed (%s)\n",
+		*newch = CAST(unsigned char *, malloc(bytes_max + 1));
+		if (*newch == NULL) {
+			rv = makeerror(newch, n, "No buffer, %s",
 			    strerror(errno));
-#endif
-			n = NODATA;
 			goto err;
 		}
-		if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) {
-#ifdef DEBUG
-			(void)fprintf(stderr, "Read failed (%s)\n",
-			    strerror(errno));
-#endif
-			free(*newch);
-			n = NODATA;
-			*newch = NULL;
-			goto err;
-		} else {
-			n = r;
+		rv = OKDATA;
+		if ((r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0)) > 0)
+			break;
+		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
+		    r != -1 ? strerror(errno) : "no data");
+
+		rv = ERRDATA;
+		if (r == 0 &&
+		    (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
+		{
+			r = filter_error(*newch, r);
+			break;
 		}
- 		/* NUL terminate, as every buffer is handled here. */
- 		(*newch)[n] = '\0';
-err:
-		if (fdin[1] != -1)
-			(void) close(fdin[1]);
-		(void) close(fdout[0]);
-		if (wait(&status) == -1) {
-#ifdef DEBUG
-			(void)fprintf(stderr, "Wait failed (%s)\n",
+		free(*newch);
+		if  (r == 0)
+			rv = makeerror(newch, n, "Read failed, %s",
 			    strerror(errno));
-#endif
-			n = NODATA;
-		} else if (!WIFEXITED(status)) {
-#ifdef DEBUG
-			(void)fprintf(stderr, "Child not exited (0x%x)\n",
-			    status);
-#endif
-		} else if (WEXITSTATUS(status) != 0) {
-#ifdef DEBUG
-			(void)fprintf(stderr, "Child exited (0x%d)\n",
-			    WEXITSTATUS(status));
-#endif
-		}
+		else
+			rv = makeerror(newch, n, "No data");
+		goto err;
+	}
 
-		(void) close(fdin[0]);
-	    
-		return n;
+	*n = r;
+	/* NUL terminate, as every buffer is handled here. */
+	(*newch)[*n] = '\0';
+err:
+	closefd(fdp[STDIN_FILENO], 1);
+	closefd(fdp[STDOUT_FILENO], 0);
+	closefd(fdp[STDERR_FILENO], 0);
+	if (wait(&status) == -1) {
+		free(*newch);
+		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
+		DPRINTF("Child wait return %#x\n", status);
+	} else if (!WIFEXITED(status)) {
+		DPRINTF("Child not exited (0x%x)\n", status);
+	} else if (WEXITSTATUS(status) != 0) {
+		DPRINTF("Child exited (0x%d)\n", WEXITSTATUS(status));
 	}
+
+	closefd(fdp[STDIN_FILENO], 0);
+	DPRINTF("Returning %p n=%zu rv=%d\n", *newch, *n, rv);
+    
+	return rv;
 }
 #endif

+ 384 - 0
src/der.c

@@ -0,0 +1,384 @@
+/*-
+ * Copyright (c) 2016 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * DER (Distinguished Encoding Rules) Parser
+ *
+ * Sources:
+ * https://en.wikipedia.org/wiki/X.690
+ * http://fm4dd.com/openssl/certexamples.htm
+ * http://blog.engelke.com/2014/10/17/parsing-ber-and-der-encoded-asn-1-objects/
+ */
+#ifndef TEST_DER
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: der.c,v 1.7 2016/06/01 22:01:15 christos Exp $")
+#endif
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#ifndef TEST_DER
+#include "magic.h"
+#include "der.h"
+#else
+#include <err.h>
+#endif
+
+#define DER_BAD	((uint32_t)-1)
+
+#define DER_CLASS_UNIVERSAL	0
+#define	DER_CLASS_APPLICATION	1
+#define	DER_CLASS_CONTEXT	2
+#define	DER_CLASS_PRIVATE	3
+#ifdef DEBUG_DER
+static const char der_class[] = "UACP";
+#endif
+
+#define DER_TYPE_PRIMITIVE	0
+#define DER_TYPE_CONSTRUCTED	1
+#ifdef DEBUG_DER
+static const char der_type[] = "PC";
+#endif
+
+#define	DER_TAG_EOC			0x00
+#define	DER_TAG_BOOLEAN			0x01
+#define	DER_TAG_INTEGER			0x02
+#define	DER_TAG_BIT STRING		0x03
+#define	DER_TAG_OCTET_STRING		0x04
+#define	DER_TAG_NULL			0x05
+#define	DER_TAG_OBJECT_IDENTIFIER	0x06
+#define	DER_TAG_OBJECT_DESCRIPTOR	0x07
+#define	DER_TAG_EXTERNAL		0x08
+#define	DER_TAG_REAL			0x09
+#define	DER_TAG_ENUMERATED		0x0a
+#define	DER_TAG_EMBEDDED_PDV		0x0b
+#define	DER_TAG_UTF8_STRING		0x0c
+#define	DER_TAG_RELATIVE_OID		0x0d
+#define DER_TAG_RESERVED_1		0x0e
+#define DER_TAG_RESERVED_2		0x0f
+#define	DER_TAG_SEQUENCE		0x10
+#define	DER_TAG_SET			0x11
+#define	DER_TAG_NUMERIC_STRING		0x12
+#define	DER_TAG_PRINTABLE_STRING	0x13
+#define	DER_TAG_T61_STRING		0x14
+#define	DER_TAG_VIDEOTEX_STRING		0x15
+#define	DER_TAG_IA5_STRING		0x16
+#define	DER_TAG_UTCTIME			0x17
+#define	DER_TAG_GENERALIZED_TIME	0x18
+#define	DER_TAG_GRAPHIC_STRING		0x19
+#define	DER_TAG_VISIBLE_STRING		0x1a
+#define	DER_TAG_GENERAL_STRING		0x1b
+#define	DER_TAG_UNIVERSAL_STRING	0x1c
+#define	DER_TAG_CHARACTER_STRING	0x1d
+#define	DER_TAG_BMP_STRING		0x1e
+#define	DER_TAG_LONG			0x1f
+
+static const char *der__tag[] = {
+	"eoc", "bool", "int", "bit_str", "octet_str",
+	"null", "obj_id", "obj_desc", "ext", "real",
+	"enum", "embed", "utf8_str", "oid", "res1",
+	"res2", "seq", "set", "num_str", "prt_str",
+	"t61_str", "vid_str", "ia5_str", "utc_time",
+	"gen_time", "gr_str", "vis_str", "gen_str",
+	"char_str", "bmp_str", "long"
+};
+
+#ifdef DEBUG_DER
+#define DPRINTF(a) printf a
+#else
+#define DPRINTF(a)
+#endif
+
+#ifdef TEST_DER
+static uint8_t
+getclass(uint8_t c)
+{
+	return c >> 6;
+}
+
+static uint8_t
+gettype(uint8_t c)
+{
+	return (c >> 5) & 1;
+}
+#endif
+
+static uint32_t
+gettag(const uint8_t *c, size_t *p, size_t l)
+{
+	uint32_t tag;
+
+	if (*p >= l)
+		return DER_BAD;
+
+	tag = c[(*p)++] & 0x1f;
+
+	if (tag != 0x1f)
+		return tag;
+
+	if (*p >= l)
+		return DER_BAD;
+
+	while (c[*p] >= 0x80) {
+		tag = tag * 128 + c[(*p)++] - 0x80;
+		if (*p >= l)
+			return DER_BAD;
+	}
+	return tag;
+}
+
+static uint32_t
+getlength(const uint8_t *c, size_t *p, size_t l)
+{
+	uint8_t digits, i;
+	size_t len;
+
+	if (*p >= l)
+		return DER_BAD;
+
+	digits = c[(*p)++];
+
+        if ((digits & 0x80) == 0)
+		return digits;
+
+        digits &= 0x7f;
+	len = 0;
+
+	if (*p + digits >= l)
+		return DER_BAD;
+
+	for (i = 0; i < digits; i++)
+		len = (len << 8) | c[(*p)++];
+        return len;
+}
+
+static const char *
+der_tag(char *buf, size_t len, uint32_t tag)
+{
+	if (tag < DER_TAG_LONG) 
+		strlcpy(buf, der__tag[tag], len);
+	else
+		snprintf(buf, len, "%#x", tag);
+	return buf;
+}
+
+#ifndef TEST_DER
+static int
+der_data(char *buf, size_t blen, uint32_t tag, const void *q, uint32_t len)
+{
+	const uint8_t *d = q;
+	switch (tag) {
+	case DER_TAG_PRINTABLE_STRING:
+	case DER_TAG_UTF8_STRING:
+	case DER_TAG_IA5_STRING:
+	case DER_TAG_UTCTIME:
+		return snprintf(buf, blen, "%.*s", len, (const char *)q);
+	default:
+		break;
+	}
+		
+	for (uint32_t i = 0; i < len; i++) {
+		uint32_t z = i << 1;
+		if (z < blen - 2)
+			snprintf(buf + z, blen - z, "%.2x", d[i]);
+	}
+	return len * 2;
+}
+
+int32_t
+der_offs(struct magic_set *ms, struct magic *m, size_t nbytes)
+{
+	const uint8_t *b = CAST(const void *, ms->search.s);
+	size_t offs = 0, len = ms->search.s_len ? ms->search.s_len : nbytes;
+
+	if (gettag(b, &offs, len) == DER_BAD)
+		return -1;
+	DPRINTF(("%s1: %d %zu %u\n", __func__, ms->offset, offs, m->offset));
+
+	uint32_t tlen = getlength(b, &offs, len);
+	if (tlen == DER_BAD)
+		return -1;
+	DPRINTF(("%s2: %d %zu %u\n", __func__, ms->offset, offs, tlen));
+
+	offs += ms->offset + m->offset;
+	DPRINTF(("cont_level = %d\n", m->cont_level));
+#ifdef DEBUG_DER
+	for (size_t i = 0; i < m->cont_level; i++)
+		printf("cont_level[%zu] = %u\n", i, ms->c.li[i].off);
+#endif
+	if (m->cont_level != 0) {
+		if (offs + tlen > nbytes)
+			return DER_BAD;
+		ms->c.li[m->cont_level - 1].off = offs + tlen;
+		DPRINTF(("cont_level[%u] = %u\n", m->cont_level - 1,
+		    ms->c.li[m->cont_level - 1].off));
+	}
+	return offs;
+}
+
+int
+der_cmp(struct magic_set *ms, struct magic *m)
+{
+	const uint8_t *b = CAST(const void *, ms->search.s);
+	const char *s = m->value.s;
+	size_t offs = 0, len = ms->search.s_len;
+	uint32_t tag, tlen;
+	char buf[128];
+
+	tag = gettag(b, &offs, len);
+	if (tag == DER_BAD)
+		return -1;
+
+	tlen = getlength(b, &offs, len);
+	if (tlen == DER_BAD)
+		return -1;
+
+	der_tag(buf, sizeof(buf), tag);
+	if ((ms->flags & MAGIC_DEBUG) != 0)
+		fprintf(stderr, "%s: tag %p got=%s exp=%s\n", __func__, b,
+		    buf, s);
+	size_t slen = strlen(buf);
+
+	if (strncmp(buf, s, slen) != 0)
+		return 0;
+
+	s += slen;
+
+again:
+	switch (*s) {
+	case '\0':
+		return 1;
+	case '=':
+		s++;
+		goto val;
+	default:
+		if (!isdigit((unsigned char)*s))
+			return 0;
+
+		slen = 0;
+		do
+			slen = slen * 10 + *s - '0';
+		while (isdigit((unsigned char)*++s));
+		if ((ms->flags & MAGIC_DEBUG) != 0)
+			fprintf(stderr, "%s: len %zu %u\n", __func__,
+			    slen, tlen);
+		if (tlen != slen)
+			return 0;
+		goto again;
+	}
+val:
+	DPRINTF(("%s: before data %zu %u\n", __func__, offs, tlen));
+	der_data(buf, sizeof(buf), tag, b + offs, tlen);
+	if ((ms->flags & MAGIC_DEBUG) != 0)
+		fprintf(stderr, "%s: data %s %s\n", __func__, buf, s);
+	if (strcmp(buf, s) != 0 && strcmp("x", s) != 0)
+		return 0;
+	strlcpy(ms->ms_value.s, buf, sizeof(ms->ms_value.s));
+	return 1;
+}
+#endif
+
+#ifdef TEST_DER
+static void
+printtag(uint32_t tag, const void *q, uint32_t len)
+{
+	const uint8_t *d = q;
+	switch (tag) {
+	case DER_TAG_PRINTABLE_STRING:
+	case DER_TAG_UTF8_STRING:
+		printf("%.*s\n", len, (const char *)q);
+		return;
+	default:
+		break;
+	}
+		
+	for (uint32_t i = 0; i < len; i++)
+		printf("%.2x", d[i]);
+	printf("\n");
+}
+
+static void
+printdata(size_t level, const void *v, size_t x, size_t l)
+{
+	const uint8_t *p = v, *ep = p + l;
+	size_t ox;
+	char buf[128];
+
+	while (p + x < ep) {
+		const uint8_t *q;
+		uint8_t c = getclass(p[x]);
+		uint8_t t = gettype(p[x]);
+		ox = x;
+		if (x != 0)
+		printf("%.2x %.2x %.2x\n", p[x - 1], p[x], p[x + 1]);
+		uint32_t tag = gettag(p, &x, ep - p + x);
+		if (p + x >= ep)
+			break;
+		uint32_t len = getlength(p, &x, ep - p + x);
+		
+		printf("%zu %zu-%zu %c,%c,%s,%u:", level, ox, x,
+		    der_class[c], der_type[t],
+		    der_tag(buf, sizeof(buf), tag), len);
+		q = p + x;
+		if (p + len > ep)
+			errx(EXIT_FAILURE, "corrupt der");
+		printtag(tag, q, len);
+		if (t != DER_TYPE_PRIMITIVE)
+			printdata(level + 1, p, x, len + x);
+		x += len;
+	}
+}
+
+int
+main(int argc, char *argv[])
+{
+	int fd;
+	struct stat st;
+	size_t l;
+	void *p;
+
+	if ((fd = open(argv[1], O_RDONLY)) == -1)
+		err(EXIT_FAILURE, "open `%s'", argv[1]);
+	if (fstat(fd, &st) == -1)
+		err(EXIT_FAILURE, "stat `%s'", argv[1]);
+	l = (size_t)st.st_size;
+	if ((p = mmap(NULL, l, PROT_READ, MAP_FILE, fd, 0)) == MAP_FAILED)
+		err(EXIT_FAILURE, "mmap `%s'", argv[1]);
+
+	printdata(0, p, 0, l);
+	munmap(p, l);
+	return 0;
+}
+#endif

+ 28 - 0
src/der.h

@@ -0,0 +1,28 @@
+/*-
+ * Copyright (c) 2016 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+extern int der_offs(struct magic_set *, struct magic *, size_t);
+extern int der_cmp(struct magic_set *, struct magic *);

+ 58 - 0
src/dprintf.c

@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) Ian F. Darwin 1986-1995.
+ * Software written by Ian F. Darwin and others;
+ * maintained 1995-present by Christos Zoulas and others.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice immediately at the beginning of the file, without modification,
+ *    this list of conditions, and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "file.h"
+
+#ifndef	lint
+FILE_RCSID("@(#)$File: dprintf.c,v 1.1 2015/11/13 15:36:14 christos Exp $")
+#endif	/* lint */
+
+#include <assert.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+int
+dprintf(int fd, const char *fmt, ...)
+{
+	va_list ap;
+	/* Simpler than using vasprintf() here, since we never need more */
+	char buf[1024];
+	int len;
+
+	va_start(ap, fmt);
+	len = vsnprintf(buf, sizeof(buf), fmt, ap);
+	va_end(ap);
+
+	if ((size_t)len >= sizeof(buf))
+		return -1;
+
+	if (write(fd, buf, (size_t)len) != len)
+		return -1;
+
+	return len;
+}

+ 39 - 19
src/file.c

@@ -32,7 +32,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: file.c,v 1.167 2015/09/11 17:24:09 christos Exp $")
+FILE_RCSID("@(#)$File: file.c,v 1.171 2016/05/17 15:52:45 christos Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -94,9 +94,9 @@ private const struct option long_options[] = {
 #define OPT_EXTENSIONS		3
 #define OPT_MIME_TYPE		4
 #define OPT_MIME_ENCODING	5
-#define OPT(shortname, longname, opt, doc)      \
+#define OPT(shortname, longname, opt, def, doc)      \
     {longname, opt, NULL, shortname},
-#define OPT_LONGONLY(longname, opt, doc, id)        \
+#define OPT_LONGONLY(longname, opt, def, doc, id)        \
     {longname, opt, NULL, id},
 #include "file_opts.h"
 #undef OPT
@@ -132,15 +132,17 @@ private struct {
 	{ "elf_shnum",	MAGIC_PARAM_ELF_SHNUM_MAX, 0 },
 	{ "elf_notes",	MAGIC_PARAM_ELF_NOTES_MAX, 0 },
 	{ "regex",	MAGIC_PARAM_REGEX_MAX, 0 },
+	{ "bytes",	MAGIC_PARAM_BYTES_MAX, 0 },
 };
 
 private char *progname;		/* used throughout 		*/
+private int posixly;
 
 #ifdef __dead
 __dead
 #endif
 private void usage(void);
-private void docprint(const char *);
+private void docprint(const char *, int);
 #ifdef __dead
 __dead
 #endif
@@ -183,7 +185,8 @@ main(int argc, char *argv[])
 		progname = argv[0];
 
 #ifdef S_IFLNK
-	flags |= getenv("POSIXLY_CORRECT") ? MAGIC_SYMLINK : 0;
+	posixly = getenv("POSIXLY_CORRECT") != NULL;
+	flags |=  posixly ? MAGIC_SYMLINK : 0;
 #endif
 	while ((c = getopt_long(argc, argv, OPTSTRING, long_options,
 	    &longindex)) != -1)
@@ -204,7 +207,7 @@ main(int argc, char *argv[])
 			flags |= MAGIC_MIME_ENCODING;
 			break;
 		case '0':
-			nulsep = 1;
+			nulsep++;
 			break;
 		case 'b':
 			bflag++;
@@ -348,9 +351,10 @@ main(int argc, char *argv[])
 		if (c == -1) {
 			(void)fprintf(stderr, "%s: %s\n", progname,
 			    magic_error(magic));
-			return 1;
+			e = 1;
+			goto out;
 		}
-		return 0;
+		goto out;
 	default:
 		if (magic == NULL)
 			if ((magic = load(magicfile, flags)) == NULL)
@@ -380,6 +384,7 @@ main(int argc, char *argv[])
 			e |= process(magic, argv[optind], wid);
 	}
 
+out:
 	if (magic)
 		magic_close(magic);
 	return e;
@@ -492,24 +497,28 @@ unwrap(struct magic_set *ms, const char *fn)
 private int
 process(struct magic_set *ms, const char *inname, int wid)
 {
-	const char *type;
+	const char *type, c = nulsep > 1 ? '\0' : '\n';
 	int std_in = strcmp(inname, "-") == 0;
 
 	if (wid > 0 && !bflag) {
 		(void)printf("%s", std_in ? "/dev/stdin" : inname);
 		if (nulsep)
 			(void)putc('\0', stdout);
-		(void)printf("%s", separator);
-		(void)printf("%*s ",
-		    (int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
+		if (nulsep < 2) {
+			(void)printf("%s", separator);
+			(void)printf("%*s ",
+			    (int) (nopad ? 0 : (wid - file_mbswidth(inname))),
+			    "");
+		}
 	}
 
 	type = magic_file(ms, std_in ? NULL : inname);
+
 	if (type == NULL) {
-		(void)printf("ERROR: %s\n", magic_error(ms));
+		(void)printf("ERROR: %s%c", magic_error(ms), c);
 		return 1;
 	} else {
-		(void)printf("%s\n", type);
+		(void)printf("%s%c", type, c);
 		return 0;
 	}
 }
@@ -559,7 +568,17 @@ usage(void)
 }
 
 private void
-docprint(const char *opts)
+defprint(int def)
+{
+	if (!def)
+		return;
+	if (((def & 1) && posixly) || ((def & 2) && !posixly))
+		fprintf(stdout, " (default)");
+	fputc('\n', stdout);
+}
+
+private void
+docprint(const char *opts, int def)
 {
 	size_t i;
 	int comma;
@@ -568,6 +587,7 @@ docprint(const char *opts)
 	p = strstr(opts, "%o");
 	if (p == NULL) {
 		fprintf(stdout, "%s", opts);
+		defprint(def);
 		return;
 	}
 
@@ -595,12 +615,12 @@ help(void)
 "Usage: file [OPTION...] [FILE...]\n"
 "Determine type of FILEs.\n"
 "\n", stdout);
-#define OPT(shortname, longname, opt, doc)      \
+#define OPT(shortname, longname, opt, def, doc)      \
 	fprintf(stdout, "  -%c, --" longname, shortname), \
-	docprint(doc);
-#define OPT_LONGONLY(longname, opt, doc, id)        \
+	docprint(doc, def);
+#define OPT_LONGONLY(longname, opt, def, doc, id)        \
 	fprintf(stdout, "      --" longname),	\
-	docprint(doc);
+	docprint(doc, def);
 #include "file_opts.h"
 #undef OPT
 #undef OPT_LONGONLY

+ 17 - 8
src/file.h

@@ -27,7 +27,7 @@
  */
 /*
  * file.h - definitions for file(1) program
- * @(#)$File: file.h,v 1.172 2015/09/11 17:24:09 christos Exp $
+ * @(#)$File: file.h,v 1.178 2016/03/31 17:51:12 christos Exp $
  */
 
 #ifndef __file_h__
@@ -127,8 +127,8 @@
 #define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
 #endif
 
-#ifndef HOWMANY
-# define HOWMANY (1024 * 1024)	/* how much of the file to look at */
+#ifndef FILE_BYTES_MAX
+# define FILE_BYTES_MAX (1024 * 1024)	/* how much of the file to look at */
 #endif
 #define MAXMAGIS 8192		/* max entries in any one magic file
 				   or directory */
@@ -227,7 +227,8 @@ struct magic {
 #define				FILE_NAME	45
 #define				FILE_USE	46
 #define				FILE_CLEAR	47
-#define				FILE_NAMES_SIZE	48 /* size of array to contain all names */
+#define				FILE_DER	48
+#define				FILE_NAMES_SIZE	49 /* size of array to contain all names */
 
 #define IS_STRING(t) \
 	((t) == FILE_STRING || \
@@ -365,9 +366,11 @@ struct mlist {
 #ifdef __cplusplus
 #define CAST(T, b)	static_cast<T>(b)
 #define RCAST(T, b)	reinterpret_cast<T>(b)
+#define CCAST(T, b)	const_cast<T>(b)
 #else
-#define CAST(T, b)	(T)(b)
-#define RCAST(T, b)	(T)(b)
+#define CAST(T, b)	((T)(b))
+#define RCAST(T, b)	((T)(b))
+#define CCAST(T, b)	((T)(uintptr_t)(b))
 #endif
 
 struct level_info {
@@ -416,7 +419,8 @@ struct magic_set {
 	uint16_t elf_phnum_max;
 	uint16_t elf_notes_max;
 	uint16_t regex_max;
-#define	FILE_INDIR_MAX			15
+	size_t bytes_max;		/* number of bytes to read from file */
+#define	FILE_INDIR_MAX			50
 #define	FILE_NAME_MAX			30
 #define	FILE_ELF_SHNUM_MAX		32768
 #define	FILE_ELF_PHNUM_MAX		2048
@@ -461,7 +465,7 @@ protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
     unichar **, size_t *, const char **, const char **, const char **);
 protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
 protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
-    uint16_t, uint16_t *, int, int);
+    uint16_t *, uint16_t *, int, int);
 protected int file_apprentice(struct magic_set *, const char *, int);
 protected int buffer_apprentice(struct magic_set *, struct magic **,
     size_t *, size_t);
@@ -506,6 +510,8 @@ typedef struct {
 #define USE_C_LOCALE
 	locale_t old_lc_ctype;
 	locale_t c_lc_ctype;
+#else
+	char *old_lc_ctype;
 #endif
 	int rc;
 	regex_t rx;
@@ -550,6 +556,9 @@ int vasprintf(char **, const char *, va_list);
 #ifndef HAVE_ASPRINTF
 int asprintf(char **, const char *, ...);
 #endif
+#ifndef HAVE_DPRINTF
+int dprintf(int, const char *, ...);
+#endif
 
 #ifndef HAVE_STRLCPY
 size_t strlcpy(char *, const char *, size_t);

+ 28 - 28
src/file_opts.h

@@ -12,47 +12,47 @@
  * switch statement!
  */
 
-OPT_LONGONLY("help", 0, "                 display this help and exit\n", OPT_HELP)
-OPT('v', "version", 0, "              output version information and exit\n")
-OPT('m', "magic-file", 1, " LIST      use LIST as a colon-separated list of magic\n"
+OPT_LONGONLY("help", 0, 0, "                 display this help and exit\n", OPT_HELP)
+OPT('v', "version", 0, 0, "              output version information and exit\n")
+OPT('m', "magic-file", 1, 0, " LIST      use LIST as a colon-separated list of magic\n"
     "                               number files\n")
-OPT('z', "uncompress", 0, "           try to look inside compressed files\n")
-OPT('Z', "uncompress-noreport", 0, "  only print the contents of compressed files\n")
-OPT('b', "brief", 0, "                do not prepend filenames to output lines\n")
-OPT('c', "checking-printout", 0, "    print the parsed form of the magic file, use in\n"
+OPT('z', "uncompress", 0, 0, "           try to look inside compressed files\n")
+OPT('Z', "uncompress-noreport", 0, 0, "  only print the contents of compressed files\n")
+OPT('b', "brief", 0, 0, "                do not prepend filenames to output lines\n")
+OPT('c', "checking-printout", 0, 0, "    print the parsed form of the magic file, use in\n"
     "                               conjunction with -m to debug a new magic file\n"
     "                               before installing it\n")
-OPT('e', "exclude", 1, " TEST         exclude TEST from the list of test to be\n"
+OPT('e', "exclude", 1, 0, " TEST         exclude TEST from the list of test to be\n"
     "                               performed for file. Valid tests are:\n"
     "                               %o\n")
-OPT('f', "files-from", 1, " FILE      read the filenames to be examined from FILE\n")
-OPT('F', "separator", 1, " STRING     use string as separator instead of `:'\n")
-OPT('i', "mime", 0, "                 output MIME type strings (--mime-type and\n"
+OPT('f', "files-from", 1, 0, " FILE      read the filenames to be examined from FILE\n")
+OPT('F', "separator", 1, 0, " STRING     use string as separator instead of `:'\n")
+OPT('i', "mime", 0, 0, "                 output MIME type strings (--mime-type and\n"
     "                               --mime-encoding)\n")
-OPT_LONGONLY("apple", 0, "                output the Apple CREATOR/TYPE\n", OPT_APPLE)
-OPT_LONGONLY("extension", 0, "            output a slash-separated list of extensions\n", OPT_EXTENSIONS)
-OPT_LONGONLY("mime-type", 0, "            output the MIME type\n", OPT_MIME_TYPE)
-OPT_LONGONLY("mime-encoding", 0, "        output the MIME encoding\n", OPT_MIME_ENCODING)
-OPT('k', "keep-going", 0, "           don't stop at the first match\n")
-OPT('l', "list", 0, "                 list magic strength\n")
+OPT_LONGONLY("apple", 0, 0, "                output the Apple CREATOR/TYPE\n", OPT_APPLE)
+OPT_LONGONLY("extension", 0, 0, "            output a slash-separated list of extensions\n", OPT_EXTENSIONS)
+OPT_LONGONLY("mime-type", 0, 0, "            output the MIME type\n", OPT_MIME_TYPE)
+OPT_LONGONLY("mime-encoding", 0, 0, "        output the MIME encoding\n", OPT_MIME_ENCODING)
+OPT('k', "keep-going", 0, 0, "           don't stop at the first match\n")
+OPT('l', "list", 0, 0, "                 list magic strength\n")
 #ifdef S_IFLNK
-OPT('L', "dereference", 0, "          follow symlinks (default)\n")
-OPT('h', "no-dereference", 0, "       don't follow symlinks\n")
+OPT('L', "dereference", 0, 1, "          follow symlinks")
+OPT('h', "no-dereference", 0, 2, "       don't follow symlinks")
 #endif
-OPT('n', "no-buffer", 0, "            do not buffer output\n")
-OPT('N', "no-pad", 0, "               do not pad output\n")
-OPT('0', "print0", 0, "               terminate filenames with ASCII NUL\n")
+OPT('n', "no-buffer", 0, 0, "            do not buffer output\n")
+OPT('N', "no-pad", 0, 0, "               do not pad output\n")
+OPT('0', "print0", 0, 0, "               terminate filenames with ASCII NUL\n")
 #if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
-OPT('p', "preserve-date", 0, "        preserve access times on files\n")
+OPT('p', "preserve-date", 0, 0, "        preserve access times on files\n")
 #endif
-OPT('P', "parameter", 1, "            set file engine parameter limits\n"
+OPT('P', "parameter", 1, 0, "            set file engine parameter limits\n"
     "                               indir        15 recursion limit for indirection\n"
     "                               name         30 use limit for name/use magic\n"
     "                               elf_notes   256 max ELF notes processed\n"
     "                               elf_phnum   128 max ELF prog sections processed\n"
     "                               elf_shnum 32768 max ELF sections processed\n")
-OPT('r', "raw", 0, "                  don't translate unprintable chars to \\ooo\n")
-OPT('s', "special-files", 0, "        treat special (block/char devices) files as\n"
+OPT('r', "raw", 0, 0, "                  don't translate unprintable chars to \\ooo\n")
+OPT('s', "special-files", 0, 0, "        treat special (block/char devices) files as\n"
     "                             ordinary ones\n")
-OPT('C', "compile", 0, "              compile file specified by -m\n")
-OPT('d', "debug", 0, "                print debugging messages\n")
+OPT('C', "compile", 0, 0, "              compile file specified by -m\n")
+OPT('d', "debug", 0, 0, "                print debugging messages\n")

+ 17 - 0
src/fmtcheck.c

@@ -91,6 +91,23 @@ get_next_format_from_precision(const char **pf)
 		f++;
 		longdouble = 1;
 		break;
+#ifdef WIN32
+	case 'I':
+		f++;
+		if (!*f) RETURN(pf,f,FMTCHECK_UNKNOWN);
+		if (*f == '3' && f[1] == '2') {
+			f += 2;
+		} else if (*f == '6' && f[1] == '4') {
+			f += 2;
+			quad = 1;
+		}
+#ifdef _WIN64
+		else {
+			quad = 1;
+		}
+#endif
+		break;
+#endif
 	default:
 		break;
 	}

+ 20 - 6
src/funcs.c

@@ -27,7 +27,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: funcs.c,v 1.84 2015/09/10 13:32:19 christos Exp $")
+FILE_RCSID("@(#)$File: funcs.c,v 1.89 2016/03/21 15:56:53 christos Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -178,7 +178,6 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__u
     const void *buf, size_t nb)
 {
 	int m = 0, rv = 0, looks_text = 0;
-	int mime = ms->flags & MAGIC_MIME;
 	const unsigned char *ubuf = CAST(const unsigned char *, buf);
 	unichar *u8buf = NULL;
 	size_t ulen;
@@ -252,7 +251,8 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__u
 
 	/* try soft magic tests */
 	if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0)
-		m = file_softmagic(ms, ubuf, nb, 0, NULL, BINTEST, looks_text);
+		m = file_softmagic(ms, ubuf, nb, NULL, NULL, BINTEST,
+		    looks_text);
 		if ((ms->flags & MAGIC_DEBUG) != 0)
 			(void)fprintf(stderr, "[try softmagic %d]\n", m);
 		if (m) {
@@ -293,9 +293,19 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__u
 simple:
 	/* give up */
 	m = 1;
-	if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
-	    file_printf(ms, "%s", mime ? type : def) == -1) {
-	    rv = -1;
+	if (ms->flags & MAGIC_MIME) {
+		if ((ms->flags & MAGIC_MIME_TYPE) &&
+		    file_printf(ms, "%s", type) == -1)
+			rv = -1;
+	} else if (ms->flags & MAGIC_APPLE) {
+		if (file_printf(ms, "UNKNUNKN") == -1)
+			rv = -1;
+	} else if (ms->flags & MAGIC_EXTENSION) {
+		if (file_printf(ms, "???") == -1)
+			rv = -1;
+	} else {
+		if (file_printf(ms, "%s", def) == -1)
+			rv = -1;
 	}
  done:
 	if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
@@ -485,6 +495,8 @@ file_regcomp(file_regex_t *rx, const char *pat, int flags)
 	assert(rx->c_lc_ctype != NULL);
 	rx->old_lc_ctype = uselocale(rx->c_lc_ctype);
 	assert(rx->old_lc_ctype != NULL);
+#else
+	rx->old_lc_ctype = setlocale(LC_CTYPE, "C");
 #endif
 	rx->pat = pat;
 
@@ -507,6 +519,8 @@ file_regfree(file_regex_t *rx)
 #ifdef USE_C_LOCALE
 	(void)uselocale(rx->old_lc_ctype);
 	freelocale(rx->c_lc_ctype);
+#else
+	(void)setlocale(LC_CTYPE, rx->old_lc_ctype);
 #endif
 }
 

+ 17 - 9
src/magic.c

@@ -33,7 +33,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: magic.c,v 1.95 2015/09/11 17:24:09 christos Exp $")
+FILE_RCSID("@(#)$File: magic.c,v 1.99 2016/05/03 16:09:38 christos Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -417,7 +417,7 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd)
 	 * some overlapping space for matches near EOF
 	 */
 #define SLOP (1 + sizeof(union VALUETYPE))
-	if ((buf = CAST(unsigned char *, malloc(HOWMANY + SLOP))) == NULL)
+	if ((buf = CAST(unsigned char *, malloc(ms->bytes_max + SLOP))) == NULL)
 		return NULL;
 
 	switch (file_fsmagic(ms, inname, &sb)) {
@@ -481,13 +481,13 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd)
 	}
 
 	/*
-	 * try looking at the first HOWMANY bytes
+	 * try looking at the first ms->bytes_max bytes
 	 */
 	if (ispipe) {
 		ssize_t r = 0;
 
 		while ((r = sread(fd, (void *)&buf[nbytes],
-		    (size_t)(HOWMANY - nbytes), 1)) > 0) {
+		    (size_t)(ms->bytes_max - nbytes), 1)) > 0) {
 			nbytes += r;
 			if (r < PIPE_BUF) break;
 		}
@@ -503,10 +503,10 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd)
 	} else {
 		/* Windows refuses to read from a big console buffer. */
 		size_t howmany =
-#if defined(WIN32) && HOWMANY > 8 * 1024
+#if defined(WIN32)
 				_isatty(fd) ? 8 * 1024 :
 #endif
-				HOWMANY;
+				ms->bytes_max;
 		if ((nbytes = read(fd, (char *)buf, howmany)) == -1) {
 			if (inname == NULL && fd != STDIN_FILENO)
 				file_error(ms, errno, "cannot read fd %d", fd);
@@ -523,9 +523,11 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd)
 	rv = 0;
 done:
 	free(buf);
-	if (pos != (off_t)-1)
-		(void)lseek(fd, pos, SEEK_SET);
-	close_and_restore(ms, inname, fd, &sb);
+	if (fd != -1) {
+		if (pos != (off_t)-1)
+			(void)lseek(fd, pos, SEEK_SET);
+		close_and_restore(ms, inname, fd, &sb);
+	}
 out:
 	return rv == 0 ? file_getbuffer(ms) : NULL;
 }
@@ -606,6 +608,9 @@ magic_setparam(struct magic_set *ms, int param, const void *val)
 	case MAGIC_PARAM_REGEX_MAX:
 		ms->elf_notes_max = (uint16_t)*(const size_t *)val;
 		return 0;
+	case MAGIC_PARAM_BYTES_MAX:
+		ms->bytes_max = *(const size_t *)val;
+		return 0;
 	default:
 		errno = EINVAL;
 		return -1;
@@ -634,6 +639,9 @@ magic_getparam(struct magic_set *ms, int param, void *val)
 	case MAGIC_PARAM_REGEX_MAX:
 		*(size_t *)val = ms->regex_max;
 		return 0;
+	case MAGIC_PARAM_BYTES_MAX:
+		*(size_t *)val = ms->bytes_max;
+		return 0;
 	default:
 		errno = EINVAL;
 		return -1;

+ 0 - 125
src/magic.h

@@ -1,125 +0,0 @@
-/*
- * Copyright (c) Christos Zoulas 2003.
- * All Rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice immediately at the beginning of the file, without modification,
- *    this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#ifndef _MAGIC_H
-#define _MAGIC_H
-
-#include <sys/types.h>
-
-#define	MAGIC_NONE		0x0000000 /* No flags */
-#define	MAGIC_DEBUG		0x0000001 /* Turn on debugging */
-#define	MAGIC_SYMLINK		0x0000002 /* Follow symlinks */
-#define	MAGIC_COMPRESS		0x0000004 /* Check inside compressed files */
-#define	MAGIC_DEVICES		0x0000008 /* Look at the contents of devices */
-#define	MAGIC_MIME_TYPE		0x0000010 /* Return the MIME type */
-#define	MAGIC_CONTINUE		0x0000020 /* Return all matches */
-#define	MAGIC_CHECK		0x0000040 /* Print warnings to stderr */
-#define	MAGIC_PRESERVE_ATIME	0x0000080 /* Restore access time on exit */
-#define	MAGIC_RAW		0x0000100 /* Don't convert unprintable chars */
-#define	MAGIC_ERROR		0x0000200 /* Handle ENOENT etc as real errors */
-#define	MAGIC_MIME_ENCODING	0x0000400 /* Return the MIME encoding */
-#define MAGIC_MIME		(MAGIC_MIME_TYPE|MAGIC_MIME_ENCODING)
-#define	MAGIC_APPLE		0x0000800 /* Return the Apple creator/type */
-#define	MAGIC_EXTENSION		0x1000000 /* Return a /-separated list of
-					   * extensions */
-#define MAGIC_COMPRESS_TRANSP	0x2000000 /* Check inside compressed files
-					   * but not report compression */
-#define MAGIC_NODESC		(MAGIC_EXTENSION|MAGIC_MIME|MAGIC_APPLE)
-
-#define	MAGIC_NO_CHECK_COMPRESS	0x0001000 /* Don't check for compressed files */
-#define	MAGIC_NO_CHECK_TAR	0x0002000 /* Don't check for tar files */
-#define	MAGIC_NO_CHECK_SOFT	0x0004000 /* Don't check magic entries */
-#define	MAGIC_NO_CHECK_APPTYPE	0x0008000 /* Don't check application type */
-#define	MAGIC_NO_CHECK_ELF	0x0010000 /* Don't check for elf details */
-#define	MAGIC_NO_CHECK_TEXT	0x0020000 /* Don't check for text files */
-#define	MAGIC_NO_CHECK_CDF	0x0040000 /* Don't check for cdf files */
-#define	MAGIC_NO_CHECK_TOKENS	0x0100000 /* Don't check tokens */
-#define MAGIC_NO_CHECK_ENCODING 0x0200000 /* Don't check text encodings */
-
-/* No built-in tests; only consult the magic file */
-#define MAGIC_NO_CHECK_BUILTIN	( \
-	MAGIC_NO_CHECK_COMPRESS	| \
-	MAGIC_NO_CHECK_TAR	| \
-/*	MAGIC_NO_CHECK_SOFT	| */ \
-	MAGIC_NO_CHECK_APPTYPE	| \
-	MAGIC_NO_CHECK_ELF	| \
-	MAGIC_NO_CHECK_TEXT	| \
-	MAGIC_NO_CHECK_CDF	| \
-	MAGIC_NO_CHECK_TOKENS	| \
-	MAGIC_NO_CHECK_ENCODING	| \
-	0			  \
-)
-
-/* Defined for backwards compatibility (renamed) */
-#define	MAGIC_NO_CHECK_ASCII	MAGIC_NO_CHECK_TEXT
-
-/* Defined for backwards compatibility; do nothing */
-#define	MAGIC_NO_CHECK_FORTRAN	0x000000 /* Don't check ascii/fortran */
-#define	MAGIC_NO_CHECK_TROFF	0x000000 /* Don't check ascii/troff */
-
-#define MAGIC_VERSION		524	/* This implementation */
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct magic_set *magic_t;
-magic_t magic_open(int);
-void magic_close(magic_t);
-
-const char *magic_getpath(const char *, int);
-const char *magic_file(magic_t, const char *);
-const char *magic_descriptor(magic_t, int);
-const char *magic_buffer(magic_t, const void *, size_t);
-
-const char *magic_error(magic_t);
-int magic_setflags(magic_t, int);
-
-int magic_version(void);
-int magic_load(magic_t, const char *);
-int magic_load_buffers(magic_t, void **, size_t *, size_t);
-
-int magic_compile(magic_t, const char *);
-int magic_check(magic_t, const char *);
-int magic_list(magic_t, const char *);
-int magic_errno(magic_t);
-
-#define MAGIC_PARAM_INDIR_MAX		0
-#define MAGIC_PARAM_NAME_MAX		1
-#define MAGIC_PARAM_ELF_PHNUM_MAX	2
-#define MAGIC_PARAM_ELF_SHNUM_MAX	3
-#define MAGIC_PARAM_ELF_NOTES_MAX	4
-#define MAGIC_PARAM_REGEX_MAX		5
-
-int magic_setparam(magic_t, int, const void *);
-int magic_getparam(magic_t, int, void *);
-
-#ifdef __cplusplus
-};
-#endif
-
-#endif /* _MAGIC_H */

+ 1 - 0
src/magic.h.in

@@ -114,6 +114,7 @@ int magic_errno(magic_t);
 #define MAGIC_PARAM_ELF_SHNUM_MAX	3
 #define MAGIC_PARAM_ELF_NOTES_MAX	4
 #define MAGIC_PARAM_REGEX_MAX		5
+#define	MAGIC_PARAM_BYTES_MAX		6
 
 int magic_setparam(magic_t, int, const void *);
 int magic_getparam(magic_t, int, void *);

+ 2 - 1
src/print.c

@@ -32,7 +32,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: print.c,v 1.80 2015/07/16 14:28:57 christos Exp $")
+FILE_RCSID("@(#)$File: print.c,v 1.81 2016/01/19 15:09:03 christos Exp $")
 #endif  /* lint */
 
 #include <string.h>
@@ -198,6 +198,7 @@ file_mdump(struct magic *m)
 			break;
 		case FILE_USE:
 		case FILE_NAME:
+		case FILE_DER:
 			(void) fprintf(stderr, "'%s'", m->value.s);
 			break;
 		default:

+ 11 - 3
src/readcdf.c

@@ -26,7 +26,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: readcdf.c,v 1.53 2015/04/09 20:01:41 christos Exp $")
+FILE_RCSID("@(#)$File: readcdf.c,v 1.57 2016/05/03 16:08:49 christos Exp $")
 #endif
 
 #include <assert.h>
@@ -60,12 +60,16 @@ static const struct nv {
 	{ "Windows Installer",		"vnd.ms-msi",		},
 	{ NULL,				NULL,			},
 }, name2mime[] = {
+	{ "Book",			"vnd.ms-excel",		},
+	{ "Workbook",			"vnd.ms-excel",		},
 	{ "WordDocument",		"msword",		},
 	{ "PowerPoint",			"vnd.ms-powerpoint",	},
 	{ "DigitalSignature",		"vnd.ms-msi",		},
 	{ NULL,				NULL,			},
 }, name2desc[] = {
-	{ "WordDocument",		"Microsoft Office Word",},
+	{ "Book",			"Microsoft Excel",	},
+	{ "Workbook",			"Microsoft Excel",	},
+	{ "WordDocument",		"Microsoft Word",	},
 	{ "PowerPoint",			"Microsoft PowerPoint",	},
 	{ "DigitalSignature",		"Microsoft Installer",	},
 	{ NULL,				NULL,			},
@@ -119,6 +123,8 @@ cdf_app_to_mime(const char *vbuf, const struct nv *nv)
 	assert(c_lc_ctype != NULL);
 	old_lc_ctype = uselocale(c_lc_ctype);
 	assert(old_lc_ctype != NULL);
+#else
+	char *old_lc_ctype = setlocale(LC_CTYPE, "C");
 #endif
 	for (i = 0; nv[i].pattern != NULL; i++)
 		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
@@ -131,6 +137,8 @@ cdf_app_to_mime(const char *vbuf, const struct nv *nv)
 #ifdef USE_C_LOCALE
 	(void)uselocale(old_lc_ctype);
 	freelocale(c_lc_ctype);
+#else
+	setlocale(LC_CTYPE, old_lc_ctype);
 #endif
 	return rv;
 }
@@ -365,7 +373,7 @@ cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
 	    dir, "Catalog", scn)) == -1)
 		return i;
 #ifdef CDF_DEBUG
-	cdf_dump_catalog(&h, &scn);
+	cdf_dump_catalog(&h, scn);
 #endif
 	if ((i = cdf_file_catalog(ms, h, scn)) == -1)
 		return -1;

+ 199 - 20
src/readelf.c

@@ -27,7 +27,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: readelf.c,v 1.122 2015/09/10 13:59:32 christos Exp $")
+FILE_RCSID("@(#)$File: readelf.c,v 1.127 2015/11/18 12:29:29 christos Exp $")
 #endif
 
 #ifdef BUILTIN_ELF
@@ -50,7 +50,7 @@ private int dophn_exec(struct magic_set *, int, int, int, off_t, int, size_t,
 private int doshn(struct magic_set *, int, int, int, off_t, int, size_t,
     off_t, int, int, int *, uint16_t *);
 private size_t donote(struct magic_set *, void *, size_t, size_t, int,
-    int, size_t, int *, uint16_t *);
+    int, size_t, int *, uint16_t *, int, off_t, int, off_t);
 
 #define	ELF_ALIGN(a)	((((a) + align - 1) / align) * align)
 
@@ -177,6 +177,11 @@ getu64(int swap, uint64_t value)
 			    elf_getu32(swap, ph32.p_align) : 4) \
 			 : (off_t) (ph64.p_align ?		\
 			    elf_getu64(swap, ph64.p_align) : 4)))
+#define xph_vaddr	(size_t)((clazz == ELFCLASS32		\
+			 ? (off_t) (ph32.p_vaddr ? 		\
+			    elf_getu32(swap, ph32.p_vaddr) : 4) \
+			 : (off_t) (ph64.p_vaddr ?		\
+			    elf_getu64(swap, ph64.p_vaddr) : 4)))
 #define xph_filesz	(size_t)((clazz == ELFCLASS32		\
 			 ? elf_getu32(swap, ph32.p_filesz)	\
 			 : elf_getu64(swap, ph64.p_filesz)))
@@ -187,8 +192,8 @@ getu64(int swap, uint64_t value)
 			 ? elf_getu32(swap, ph32.p_memsz)	\
 			 : elf_getu64(swap, ph64.p_memsz)))
 #define xnh_sizeof	(clazz == ELFCLASS32			\
-			 ? sizeof nh32				\
-			 : sizeof nh64)
+			 ? sizeof(nh32)				\
+			 : sizeof(nh64))
 #define xnh_type	(clazz == ELFCLASS32			\
 			 ? elf_getu32(swap, nh32.n_type)	\
 			 : elf_getu32(swap, nh64.n_type))
@@ -213,6 +218,18 @@ getu64(int swap, uint64_t value)
 #define xcap_val	(clazz == ELFCLASS32			\
 			 ? elf_getu32(swap, cap32.c_un.c_val)	\
 			 : elf_getu64(swap, cap64.c_un.c_val))
+#define xauxv_addr	(clazz == ELFCLASS32			\
+			 ? (void *)&auxv32			\
+			 : (void *)&auxv64)
+#define xauxv_sizeof	(clazz == ELFCLASS32			\
+			 ? sizeof(auxv32)			\
+			 : sizeof(auxv64))
+#define xauxv_type	(clazz == ELFCLASS32			\
+			 ? elf_getu32(swap, auxv32.a_type)	\
+			 : elf_getu64(swap, auxv64.a_type))
+#define xauxv_val	(clazz == ELFCLASS32			\
+			 ? elf_getu32(swap, auxv32.a_v)		\
+			 : elf_getu64(swap, auxv64.a_v))
 
 #ifdef ELFCORE
 /*
@@ -302,6 +319,7 @@ private const char os_style_names[][8] = {
 #define FLAGS_DID_NETBSD_CMODEL		0x040
 #define FLAGS_DID_NETBSD_UNKNOWN	0x080
 #define FLAGS_IS_CORE			0x100
+#define FLAGS_DID_AUXV			0x200
 
 private int
 dophn_core(struct magic_set *ms, int clazz, int swap, int fd, off_t off,
@@ -312,6 +330,8 @@ dophn_core(struct magic_set *ms, int clazz, int swap, int fd, off_t off,
 	size_t offset, len;
 	unsigned char nbuf[BUFSIZ];
 	ssize_t bufsize;
+	off_t ph_off = off;
+	int ph_num = num;
 
 	if (size != xph_sizeof) {
 		if (file_printf(ms, ", corrupted program header size") == -1)
@@ -351,7 +371,8 @@ dophn_core(struct magic_set *ms, int clazz, int swap, int fd, off_t off,
 			if (offset >= (size_t)bufsize)
 				break;
 			offset = donote(ms, nbuf, offset, (size_t)bufsize,
-			    clazz, swap, 4, flags, notecount);
+			    clazz, swap, 4, flags, notecount, fd, ph_off,
+			    ph_num, fsize);
 			if (offset == 0)
 				break;
 
@@ -813,9 +834,157 @@ do_core_note(struct magic_set *ms, unsigned char *nbuf, uint32_t type,
 	return 0;
 }
 
+private off_t
+get_offset_from_virtaddr(struct magic_set *ms, int swap, int clazz, int fd,
+    off_t off, int num, off_t fsize, uint64_t virtaddr)
+{
+	Elf32_Phdr ph32;
+	Elf64_Phdr ph64;
+
+	/*
+	 * Loop through all the program headers and find the header with
+	 * virtual address in which the "virtaddr" belongs to.
+	 */
+	for ( ; num; num--) {
+		if (pread(fd, xph_addr, xph_sizeof, off) < (ssize_t)xph_sizeof) {
+			file_badread(ms);
+			return -1;
+		}
+		off += xph_sizeof;
+
+		if (fsize != SIZE_UNKNOWN && xph_offset > fsize) {
+			/* Perhaps warn here */
+			continue;
+		}
+
+		if (virtaddr >= xph_vaddr && virtaddr < xph_vaddr + xph_filesz)
+			return xph_offset + (virtaddr - xph_vaddr);
+	}
+	return 0;
+}
+
+private size_t
+get_string_on_virtaddr(struct magic_set *ms,
+    int swap, int clazz, int fd, off_t ph_off, int ph_num,
+    off_t fsize, uint64_t virtaddr, char *buf, ssize_t buflen)
+{
+	char *bptr;
+	off_t offset;
+
+	if (buflen == 0)
+		return 0;
+
+	offset = get_offset_from_virtaddr(ms, swap, clazz, fd, ph_off, ph_num,
+	    fsize, virtaddr);
+	if ((buflen = pread(fd, buf, buflen, offset)) <= 0) {
+		file_badread(ms);
+		return 0;
+	}
+
+	buf[buflen - 1] = '\0';
+
+	/* We expect only printable characters, so return if buffer contains
+	 * non-printable character before the '\0' or just '\0'. */
+	for (bptr = buf; *bptr && isprint((unsigned char)*bptr); bptr++)
+		continue;
+	if (*bptr != '\0')
+		return 0;
+
+	return bptr - buf;
+}
+
+
+private int
+do_auxv_note(struct magic_set *ms, unsigned char *nbuf, uint32_t type,
+    int swap, uint32_t namesz __attribute__((__unused__)),
+    uint32_t descsz __attribute__((__unused__)),
+    size_t noff __attribute__((__unused__)), size_t doff,
+    int *flags, size_t size __attribute__((__unused__)), int clazz,
+    int fd, off_t ph_off, int ph_num, off_t fsize)
+{
+#ifdef ELFCORE
+	Aux32Info auxv32;
+	Aux64Info auxv64;
+	size_t elsize = xauxv_sizeof;
+	const char *tag;
+	int is_string;
+	size_t nval;
+
+	if (type != NT_AUXV || (*flags & FLAGS_IS_CORE) == 0)
+		return 0;
+
+	*flags |= FLAGS_DID_AUXV;
+
+	nval = 0;
+	for (size_t off = 0; off + elsize <= descsz; off += elsize) {
+		(void)memcpy(xauxv_addr, &nbuf[doff + off], xauxv_sizeof);
+		/* Limit processing to 50 vector entries to prevent DoS */
+		if (nval++ >= 50) {
+			file_error(ms, 0, "Too many ELF Auxv elements");
+			return 1;
+		}
+
+		switch(xauxv_type) {
+		case AT_LINUX_EXECFN:
+			is_string = 1;
+			tag = "execfn";
+			break;
+		case AT_LINUX_PLATFORM:
+			is_string = 1;
+			tag = "platform";
+			break;
+		case AT_LINUX_UID:
+			is_string = 0;
+			tag = "real uid";
+			break;
+		case AT_LINUX_GID:
+			is_string = 0;
+			tag = "real gid";
+			break;
+		case AT_LINUX_EUID:
+			is_string = 0;
+			tag = "effective uid";
+			break;
+		case AT_LINUX_EGID:
+			is_string = 0;
+			tag = "effective gid";
+			break;
+		default:
+			is_string = 0;
+			tag = NULL;
+			break;
+		}
+
+		if (tag == NULL)
+			continue;
+
+		if (is_string) {
+			char buf[256];
+			ssize_t buflen;
+			buflen = get_string_on_virtaddr(ms, swap, clazz, fd,
+			    ph_off, ph_num, fsize, xauxv_val, buf, sizeof(buf));
+
+			if (buflen == 0)
+				continue;
+			
+			if (file_printf(ms, ", %s: '%s'", tag, buf) == -1)
+				return 0;
+		} else {
+			if (file_printf(ms, ", %s: %d", tag, (int) xauxv_val)
+			    == -1)
+				return 0;
+		}
+	}
+	return 1;
+#else
+	return 0;
+#endif
+}
+
 private size_t
 donote(struct magic_set *ms, void *vbuf, size_t offset, size_t size,
-    int clazz, int swap, size_t align, int *flags, uint16_t *notecount)
+    int clazz, int swap, size_t align, int *flags, uint16_t *notecount,
+    int fd, off_t ph_off, int ph_num, off_t fsize)
 {
 	Elf32_Nhdr nh32;
 	Elf64_Nhdr nh64;
@@ -839,6 +1008,7 @@ donote(struct magic_set *ms, void *vbuf, size_t offset, size_t size,
 
 	namesz = xnh_namesz;
 	descsz = xnh_descsz;
+
 	if ((namesz == 0) && (descsz == 0)) {
 		/*
 		 * We're out of note headers.
@@ -876,28 +1046,36 @@ donote(struct magic_set *ms, void *vbuf, size_t offset, size_t size,
 		return (offset >= size) ? offset : size;
 	}
 
+
 	if ((*flags & FLAGS_DID_OS_NOTE) == 0) {
 		if (do_os_note(ms, nbuf, xnh_type, swap,
 		    namesz, descsz, noff, doff, flags))
-			return size;
+			return offset;
 	}
 
 	if ((*flags & FLAGS_DID_BUILD_ID) == 0) {
 		if (do_bid_note(ms, nbuf, xnh_type, swap,
 		    namesz, descsz, noff, doff, flags))
-			return size;
+			return offset;
 	}
 		
 	if ((*flags & FLAGS_DID_NETBSD_PAX) == 0) {
 		if (do_pax_note(ms, nbuf, xnh_type, swap,
 		    namesz, descsz, noff, doff, flags))
-			return size;
+			return offset;
 	}
 
 	if ((*flags & FLAGS_DID_CORE) == 0) {
 		if (do_core_note(ms, nbuf, xnh_type, swap,
 		    namesz, descsz, noff, doff, flags, size, clazz))
-			return size;
+			return offset;
+	}
+
+	if ((*flags & FLAGS_DID_AUXV) == 0) {
+		if (do_auxv_note(ms, nbuf, xnh_type, swap,
+			namesz, descsz, noff, doff, flags, size, clazz,
+			fd, ph_off, ph_num, fsize))
+			return offset;
 	}
 
 	if (namesz == 7 && strcmp((char *)&nbuf[noff], "NetBSD") == 0) {
@@ -905,32 +1083,32 @@ donote(struct magic_set *ms, void *vbuf, size_t offset, size_t size,
 			descsz = 100;
 		switch (xnh_type) {
 	    	case NT_NETBSD_VERSION:
-			return size;
+			return offset;
 		case NT_NETBSD_MARCH:
 			if (*flags & FLAGS_DID_NETBSD_MARCH)
-				return size;
+				return offset;
 			*flags |= FLAGS_DID_NETBSD_MARCH;
 			if (file_printf(ms, ", compiled for: %.*s",
 			    (int)descsz, (const char *)&nbuf[doff]) == -1)
-				return size;
+				return offset;
 			break;
 		case NT_NETBSD_CMODEL:
 			if (*flags & FLAGS_DID_NETBSD_CMODEL)
-				return size;
+				return offset;
 			*flags |= FLAGS_DID_NETBSD_CMODEL;
 			if (file_printf(ms, ", compiler model: %.*s",
 			    (int)descsz, (const char *)&nbuf[doff]) == -1)
-				return size;
+				return offset;
 			break;
 		default:
 			if (*flags & FLAGS_DID_NETBSD_UNKNOWN)
-				return size;
+				return offset;
 			*flags |= FLAGS_DID_NETBSD_UNKNOWN;
 			if (file_printf(ms, ", note=%u", xnh_type) == -1)
-				return size;
+				return offset;
 			break;
 		}
-		return size;
+		return offset;
 	}
 
 	return offset;
@@ -1080,7 +1258,8 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
 				if (noff >= (off_t)xsh_size)
 					break;
 				noff = donote(ms, nbuf, (size_t)noff,
-				    xsh_size, clazz, swap, 4, flags, notecount);
+				    xsh_size, clazz, swap, 4, flags, notecount,
+				    fd, 0, 0, 0);
 				if (noff == 0)
 					break;
 			}
@@ -1329,7 +1508,7 @@ dophn_exec(struct magic_set *ms, int clazz, int swap, int fd, off_t off,
 					break;
 				offset = donote(ms, nbuf, offset,
 				    (size_t)bufsize, clazz, swap, align,
-				    flags, notecount);
+				    flags, notecount, fd, 0, 0, 0);
 				if (offset == 0)
 					break;
 			}

+ 36 - 0
src/readelf.h

@@ -54,6 +54,42 @@ typedef uint8_t		Elf64_Char;
 #define	EI_NIDENT	16
 
 typedef struct {
+	Elf32_Word	a_type;		/* 32-bit id */
+	Elf32_Word	a_v;		/* 32-bit id */
+} Aux32Info;
+
+typedef struct {
+	Elf64_Xword	a_type;		/* 64-bit id */
+	Elf64_Xword	a_v;		/* 64-bit id */
+} Aux64Info;
+
+#define AT_NULL   0     /* end of vector */
+#define AT_IGNORE 1     /* entry should be ignored */
+#define AT_EXECFD 2     /* file descriptor of program */
+#define AT_PHDR   3     /* program headers for program */
+#define AT_PHENT  4     /* size of program header entry */
+#define AT_PHNUM  5     /* number of program headers */
+#define AT_PAGESZ 6     /* system page size */
+#define AT_BASE   7     /* base address of interpreter */
+#define AT_FLAGS  8     /* flags */
+#define AT_ENTRY  9     /* entry point of program */
+#define AT_LINUX_NOTELF 10    /* program is not ELF */
+#define AT_LINUX_UID    11    /* real uid */
+#define AT_LINUX_EUID   12    /* effective uid */
+#define AT_LINUX_GID    13    /* real gid */
+#define AT_LINUX_EGID   14    /* effective gid */
+#define AT_LINUX_PLATFORM 15  /* string identifying CPU for optimizations */
+#define AT_LINUX_HWCAP  16    /* arch dependent hints at CPU capabilities */
+#define AT_LINUX_CLKTCK 17    /* frequency at which times() increments */
+/* AT_* values 18 through 22 are reserved */
+#define AT_LINUX_SECURE 23   /* secure mode boolean */
+#define AT_LINUX_BASE_PLATFORM 24     /* string identifying real platform, may
+                                 * differ from AT_PLATFORM. */
+#define AT_LINUX_RANDOM 25    /* address of 16 random bytes */
+#define AT_LINUX_HWCAP2 26    /* extension of AT_HWCAP */
+#define AT_LINUX_EXECFN 31   /* filename of program */
+
+typedef struct {
     Elf32_Char	e_ident[EI_NIDENT];
     Elf32_Half	e_type;
     Elf32_Half	e_machine;

+ 218 - 111
src/softmagic.c

@@ -32,7 +32,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: softmagic.c,v 1.218 2015/09/11 17:24:09 christos Exp $")
+FILE_RCSID("@(#)$File: softmagic.c,v 1.234 2016/06/13 12:02:06 christos Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -41,26 +41,27 @@ FILE_RCSID("@(#)$File: softmagic.c,v 1.218 2015/09/11 17:24:09 christos Exp $")
 #include <ctype.h>
 #include <stdlib.h>
 #include <time.h>
+#include "der.h"
 
 private int match(struct magic_set *, struct magic *, uint32_t,
-    const unsigned char *, size_t, size_t, int, int, int, uint16_t,
+    const unsigned char *, size_t, size_t, int, int, int, uint16_t *,
     uint16_t *, int *, int *, int *);
 private int mget(struct magic_set *, const unsigned char *,
-    struct magic *, size_t, size_t, unsigned int, int, int, int, uint16_t,
+    struct magic *, size_t, size_t, unsigned int, int, int, int, uint16_t *,
     uint16_t *, int *, int *, int *);
 private int magiccheck(struct magic_set *, struct magic *);
 private int32_t mprint(struct magic_set *, struct magic *);
-private int32_t moffset(struct magic_set *, struct magic *);
+private int moffset(struct magic_set *, struct magic *, size_t, int32_t *);
 private void mdebug(uint32_t, const char *, size_t);
 private int mcopy(struct magic_set *, union VALUETYPE *, int, int,
     const unsigned char *, uint32_t, size_t, struct magic *);
 private int mconvert(struct magic_set *, struct magic *, int);
 private int print_sep(struct magic_set *, int);
 private int handle_annotation(struct magic_set *, struct magic *);
-private void cvt_8(union VALUETYPE *, const struct magic *);
-private void cvt_16(union VALUETYPE *, const struct magic *);
-private void cvt_32(union VALUETYPE *, const struct magic *);
-private void cvt_64(union VALUETYPE *, const struct magic *);
+private int cvt_8(union VALUETYPE *, const struct magic *);
+private int cvt_16(union VALUETYPE *, const struct magic *);
+private int cvt_32(union VALUETYPE *, const struct magic *);
+private int cvt_64(union VALUETYPE *, const struct magic *);
 
 #define OFFSET_OOB(n, o, i)	((n) < (o) || (i) > ((n) - (o)))
 #define BE64(p) (((uint64_t)(p)->hq[0]<<56)|((uint64_t)(p)->hq[1]<<48)| \
@@ -87,20 +88,24 @@ private void cvt_64(union VALUETYPE *, const struct magic *);
 /*ARGSUSED1*/		/* nbytes passed for regularity, maybe need later */
 protected int
 file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes,
-    uint16_t indir_level, uint16_t *name_count, int mode, int text)
+    uint16_t *indir_count, uint16_t *name_count, int mode, int text)
 {
 	struct mlist *ml;
 	int rv, printed_something = 0, need_separator = 0;
-	uint16_t nc;
+	uint16_t nc, ic;
 
 	if (name_count == NULL) {
 		nc = 0;
 		name_count = &nc;
 	}
+	if (indir_count == NULL) {
+		ic = 0;
+		indir_count = &ic;
+	}
 
 	for (ml = ms->mlist[0]->next; ml != ms->mlist[0]; ml = ml->next)
 		if ((rv = match(ms, ml->magic, ml->nmagic, buf, nbytes, 0, mode,
-		    text, 0, indir_level, name_count,
+		    text, 0, indir_count, name_count,
 		    &printed_something, &need_separator, NULL)) != 0)
 			return rv;
 
@@ -156,7 +161,7 @@ file_fmtcheck(struct magic_set *ms, const struct magic *m, const char *def,
 private int
 match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
     const unsigned char *s, size_t nbytes, size_t offset, int mode, int text,
-    int flip, uint16_t indir_level, uint16_t *name_count,
+    int flip, uint16_t *indir_count, uint16_t *name_count,
     int *printed_something, int *need_separator, int *returnval)
 {
 	uint32_t magindex = 0;
@@ -181,11 +186,11 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 		     ((text && (m->str_flags & FLT) == STRING_BINTEST) ||
 		      (!text && (m->str_flags & FLT) == STRING_TEXTTEST))) ||
 		    (m->flag & mode) != mode) {
+flush:
 			/* Skip sub-tests */
-			while (magindex + 1 < nmagic &&
-                               magic[magindex + 1].cont_level != 0 &&
-			       ++magindex)
-				continue;
+			while (magindex < nmagic - 1 &&
+			    magic[magindex + 1].cont_level != 0)
+				magindex++;
 			continue; /* Skip to next top-level test*/
 		}
 
@@ -194,7 +199,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 
 		/* if main entry matches, print it... */
 		switch (mget(ms, s, m, nbytes, offset, cont_level, mode, text,
-		    flip, indir_level, name_count,
+		    flip, indir_count, name_count,
 		    printed_something, need_separator, returnval)) {
 		case -1:
 			return -1;
@@ -222,10 +227,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 			 * main entry didn't match,
 			 * flush its continuations
 			 */
-			while (magindex < nmagic - 1 &&
-			    magic[magindex + 1].cont_level != 0)
-				magindex++;
-			continue;
+			goto flush;
 		}
 
 		if ((e = handle_annotation(ms, m)) != 0) {
@@ -234,6 +236,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 			*returnval = 1;
 			return e;
 		}
+
 		/*
 		 * If we are going to print something, we'll need to print
 		 * a blank before we print something else.
@@ -249,7 +252,13 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 		if (print && mprint(ms, m) == -1)
 			return -1;
 
-		ms->c.li[cont_level].off = moffset(ms, m);
+		switch (moffset(ms, m, nbytes, &ms->c.li[cont_level].off)) {
+		case -1:
+		case 0:
+			goto flush;
+		default:
+			break;
+		}
 
 		/* and any continuations that match */
 		if (file_check_mem(ms, ++cont_level) == -1)
@@ -283,7 +292,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 			}
 #endif
 			switch (mget(ms, s, m, nbytes, offset, cont_level, mode,
-			    text, flip, indir_level, name_count,
+			    text, flip, indir_count, name_count,
 			    printed_something, need_separator, returnval)) {
 			case -1:
 				return -1;
@@ -318,6 +327,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 						break;
 				} else
 					ms->c.li[cont_level].got_match = 1;
+
 				if ((e = handle_annotation(ms, m)) != 0) {
 					*need_separator = 1;
 					*printed_something = 1;
@@ -354,7 +364,15 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 				if (print && mprint(ms, m) == -1)
 					return -1;
 
-				ms->c.li[cont_level].off = moffset(ms, m);
+				switch (moffset(ms, m, nbytes,
+				    &ms->c.li[cont_level].off)) {
+				case -1:
+				case 0:
+					flush = 1;
+					break;
+				default:
+					break;
+				}
 
 				if (*m->desc)
 					*need_separator = 1;
@@ -682,7 +700,12 @@ mprint(struct magic_set *ms, struct magic *m)
 	case FILE_NAME:
 		t = ms->offset;
 		break;
-
+	case FILE_DER:
+		if (file_printf(ms, F(ms, m, "%s"), 
+		    file_printable(sbuf, sizeof(sbuf), ms->ms_value.s)) == -1)
+			return -1;
+		t = ms->offset;
+		break;
 	default:
 		file_magerror(ms, "invalid m->type (%d) in mprint()", m->type);
 		return -1;
@@ -690,100 +713,152 @@ mprint(struct magic_set *ms, struct magic *m)
 	return (int32_t)t;
 }
 
-private int32_t
-moffset(struct magic_set *ms, struct magic *m)
+private int
+moffset(struct magic_set *ms, struct magic *m, size_t nbytes, int32_t *op)
 {
+	int32_t o;
+
   	switch (m->type) {
   	case FILE_BYTE:
-		return CAST(int32_t, (ms->offset + sizeof(char)));
+		o = CAST(int32_t, (ms->offset + sizeof(char)));
+		break;
 
   	case FILE_SHORT:
   	case FILE_BESHORT:
   	case FILE_LESHORT:
-		return CAST(int32_t, (ms->offset + sizeof(short)));
+		o = CAST(int32_t, (ms->offset + sizeof(short)));
+		break;
 
   	case FILE_LONG:
   	case FILE_BELONG:
   	case FILE_LELONG:
   	case FILE_MELONG:
-		return CAST(int32_t, (ms->offset + sizeof(int32_t)));
+		o = CAST(int32_t, (ms->offset + sizeof(int32_t)));
+		break;
 
   	case FILE_QUAD:
   	case FILE_BEQUAD:
   	case FILE_LEQUAD:
-		return CAST(int32_t, (ms->offset + sizeof(int64_t)));
+		o = CAST(int32_t, (ms->offset + sizeof(int64_t)));
+		break;
 
   	case FILE_STRING:
   	case FILE_PSTRING:
   	case FILE_BESTRING16:
   	case FILE_LESTRING16:
-		if (m->reln == '=' || m->reln == '!')
-			return ms->offset + m->vallen;
-		else {
+		if (m->reln == '=' || m->reln == '!') {
+			o = ms->offset + m->vallen;
+		} else {
 			union VALUETYPE *p = &ms->ms_value;
-			uint32_t t;
 
 			if (*m->value.s == '\0')
 				p->s[strcspn(p->s, "\r\n")] = '\0';
-			t = CAST(uint32_t, (ms->offset + strlen(p->s)));
+			o = CAST(uint32_t, (ms->offset + strlen(p->s)));
 			if (m->type == FILE_PSTRING)
-				t += (uint32_t)file_pstring_length_size(m);
-			return t;
+				o += (uint32_t)file_pstring_length_size(m);
 		}
+		break;
 
 	case FILE_DATE:
 	case FILE_BEDATE:
 	case FILE_LEDATE:
 	case FILE_MEDATE:
-		return CAST(int32_t, (ms->offset + sizeof(uint32_t)));
+		o = CAST(int32_t, (ms->offset + sizeof(uint32_t)));
+		break;
 
 	case FILE_LDATE:
 	case FILE_BELDATE:
 	case FILE_LELDATE:
 	case FILE_MELDATE:
-		return CAST(int32_t, (ms->offset + sizeof(uint32_t)));
+		o = CAST(int32_t, (ms->offset + sizeof(uint32_t)));
+		break;
 
 	case FILE_QDATE:
 	case FILE_BEQDATE:
 	case FILE_LEQDATE:
-		return CAST(int32_t, (ms->offset + sizeof(uint64_t)));
+		o = CAST(int32_t, (ms->offset + sizeof(uint64_t)));
+		break;
 
 	case FILE_QLDATE:
 	case FILE_BEQLDATE:
 	case FILE_LEQLDATE:
-		return CAST(int32_t, (ms->offset + sizeof(uint64_t)));
+		o = CAST(int32_t, (ms->offset + sizeof(uint64_t)));
+		break;
 
   	case FILE_FLOAT:
   	case FILE_BEFLOAT:
   	case FILE_LEFLOAT:
-		return CAST(int32_t, (ms->offset + sizeof(float)));
+		o = CAST(int32_t, (ms->offset + sizeof(float)));
+		break;
 
   	case FILE_DOUBLE:
   	case FILE_BEDOUBLE:
   	case FILE_LEDOUBLE:
-		return CAST(int32_t, (ms->offset + sizeof(double)));
+		o = CAST(int32_t, (ms->offset + sizeof(double)));
+		break;
 
 	case FILE_REGEX:
 		if ((m->str_flags & REGEX_OFFSET_START) != 0)
-			return CAST(int32_t, ms->search.offset);
+			o = CAST(int32_t, ms->search.offset);
 		else
-			return CAST(int32_t, (ms->search.offset +
-			    ms->search.rm_len));
+			o = CAST(int32_t,
+			    (ms->search.offset + ms->search.rm_len));
+		break;
 
 	case FILE_SEARCH:
 		if ((m->str_flags & REGEX_OFFSET_START) != 0)
-			return CAST(int32_t, ms->search.offset);
+			o = CAST(int32_t, ms->search.offset);
 		else
-			return CAST(int32_t, (ms->search.offset + m->vallen));
+			o = CAST(int32_t, (ms->search.offset + m->vallen));
+		break;
 
 	case FILE_CLEAR:
 	case FILE_DEFAULT:
 	case FILE_INDIRECT:
-		return ms->offset;
+		o = ms->offset;
+		break;
+
+	case FILE_DER:
+		{
+			o = der_offs(ms, m, nbytes);
+			if (o == -1 || (size_t)o > nbytes) {
+				if ((ms->flags & MAGIC_DEBUG) != 0) {
+					(void)fprintf(stderr,
+					    "Bad DER offset %d nbytes=%zu",
+					    o, nbytes);
+				}
+				*op = 0;
+				return 0;
+			}
+			break;
+		}
 
 	default:
-		return 0;
+		o = 0;
+		break;
+	}
+
+	if ((size_t)o > nbytes) {
+#if 0
+		file_error(ms, 0, "Offset out of range %zu > %zu",
+		    (size_t)o, nbytes);
+#endif
+		return -1;
 	}
+	*op = o;
+	return 1;
+}
+
+private uint32_t
+cvt_id3(struct magic_set *ms, uint32_t v)
+{
+	v = ((((v >>  0) & 0x7f) <<  0) |
+	     (((v >>  8) & 0x7f) <<  7) |
+	     (((v >> 16) & 0x7f) << 14) |
+	     (((v >> 24) & 0x7f) << 21));
+	if ((ms->flags & MAGIC_DEBUG) != 0)
+		fprintf(stderr, "id3 offs=%u\n", v);
+	return v;
 }
 
 private int
@@ -858,37 +933,45 @@ cvt_flip(int type, int flip)
 			p->fld *= cast m->num_mask; \
 			break; \
 		case FILE_OPDIVIDE: \
+			if (cast m->num_mask == 0) \
+				return -1; \
 			p->fld /= cast m->num_mask; \
 			break; \
 		case FILE_OPMODULO: \
+			if (cast m->num_mask == 0) \
+				return -1; \
 			p->fld %= cast m->num_mask; \
 			break; \
 		} \
 	if (m->mask_op & FILE_OPINVERSE) \
 		p->fld = ~p->fld \
 
-private void
+private int
 cvt_8(union VALUETYPE *p, const struct magic *m)
 {
 	DO_CVT(b, (uint8_t));
+	return 0;
 }
 
-private void
+private int
 cvt_16(union VALUETYPE *p, const struct magic *m)
 {
 	DO_CVT(h, (uint16_t));
+	return 0;
 }
 
-private void
+private int
 cvt_32(union VALUETYPE *p, const struct magic *m)
 {
 	DO_CVT(l, (uint32_t));
+	return 0;
 }
 
-private void
+private int
 cvt_64(union VALUETYPE *p, const struct magic *m)
 {
 	DO_CVT(q, (uint64_t));
+	return 0;
 }
 
 #define DO_CVT2(fld, cast) \
@@ -904,20 +987,24 @@ cvt_64(union VALUETYPE *p, const struct magic *m)
 			p->fld *= cast m->num_mask; \
 			break; \
 		case FILE_OPDIVIDE: \
+			if (cast m->num_mask == 0) \
+				return -1; \
 			p->fld /= cast m->num_mask; \
 			break; \
 		} \
 
-private void
+private int
 cvt_float(union VALUETYPE *p, const struct magic *m)
 {
 	DO_CVT2(f, (float));
+	return 0;
 }
 
-private void
+private int
 cvt_double(union VALUETYPE *p, const struct magic *m)
 {
 	DO_CVT2(d, (double));
+	return 0;
 }
 
 /*
@@ -933,21 +1020,25 @@ mconvert(struct magic_set *ms, struct magic *m, int flip)
 
 	switch (type = cvt_flip(m->type, flip)) {
 	case FILE_BYTE:
-		cvt_8(p, m);
+		if (cvt_8(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_SHORT:
-		cvt_16(p, m);
+		if (cvt_16(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_LONG:
 	case FILE_DATE:
 	case FILE_LDATE:
-		cvt_32(p, m);
+		if (cvt_32(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_QUAD:
 	case FILE_QDATE:
 	case FILE_QLDATE:
 	case FILE_QWDATE:
-		cvt_64(p, m);
+		if (cvt_64(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_STRING:
 	case FILE_BESTRING16:
@@ -979,65 +1070,78 @@ mconvert(struct magic_set *ms, struct magic *m, int flip)
 	}
 	case FILE_BESHORT:
 		p->h = (short)BE16(p);
-		cvt_16(p, m);
+		if (cvt_16(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_BELONG:
 	case FILE_BEDATE:
 	case FILE_BELDATE:
 		p->l = (int32_t)BE32(p);
-		cvt_32(p, m);
+		if (cvt_32(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_BEQUAD:
 	case FILE_BEQDATE:
 	case FILE_BEQLDATE:
 	case FILE_BEQWDATE:
 		p->q = (uint64_t)BE64(p);
-		cvt_64(p, m);
+		if (cvt_64(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_LESHORT:
 		p->h = (short)LE16(p);
-		cvt_16(p, m);
+		if (cvt_16(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_LELONG:
 	case FILE_LEDATE:
 	case FILE_LELDATE:
 		p->l = (int32_t)LE32(p);
-		cvt_32(p, m);
+		if (cvt_32(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_LEQUAD:
 	case FILE_LEQDATE:
 	case FILE_LEQLDATE:
 	case FILE_LEQWDATE:
 		p->q = (uint64_t)LE64(p);
-		cvt_64(p, m);
+		if (cvt_64(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_MELONG:
 	case FILE_MEDATE:
 	case FILE_MELDATE:
 		p->l = (int32_t)ME32(p);
-		cvt_32(p, m);
+		if (cvt_32(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_FLOAT:
-		cvt_float(p, m);
+		if (cvt_float(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_BEFLOAT:
 		p->l = BE32(p);
-		cvt_float(p, m);
+		if (cvt_float(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_LEFLOAT:
 		p->l = LE32(p);
-		cvt_float(p, m);
+		if (cvt_float(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_DOUBLE:
-		cvt_double(p, m);
+		if (cvt_double(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_BEDOUBLE:
 		p->q = BE64(p); 
-		cvt_double(p, m);
+		if (cvt_double(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_LEDOUBLE:
 		p->q = LE64(p);
-		cvt_double(p, m);
+		if (cvt_double(p, m) == -1)
+			goto out;
 		return 1;
 	case FILE_REGEX:
 	case FILE_SEARCH:
@@ -1045,11 +1149,15 @@ mconvert(struct magic_set *ms, struct magic *m, int flip)
 	case FILE_CLEAR:
 	case FILE_NAME:
 	case FILE_USE:
+	case FILE_DER:
 		return 1;
 	default:
 		file_magerror(ms, "invalid type %d in mconvert()", m->type);
 		return 0;
 	}
+out:
+	file_magerror(ms, "zerodivide in mconvert()");
+	return 0;
 }
 
 
@@ -1072,7 +1180,10 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
 	 */
 	if (indir == 0) {
 		switch (type) {
+		case FILE_DER:
 		case FILE_SEARCH:
+			if (offset > nbytes)
+				offset = nbytes;
 			ms->search.s = RCAST(const char *, s) + offset;
 			ms->search.s_len = nbytes - offset;
 			ms->search.offset = offset;
@@ -1186,7 +1297,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
 private int
 mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
     size_t nbytes, size_t o, unsigned int cont_level, int mode, int text,
-    int flip, uint16_t indir_level, uint16_t *name_count,
+    int flip, uint16_t *indir_count, uint16_t *name_count,
     int *printed_something, int *need_separator, int *returnval)
 {
 	uint32_t offset = ms->offset;
@@ -1197,9 +1308,9 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 	union VALUETYPE *p = &ms->ms_value;
 	struct mlist ml;
 
-	if (indir_level >= ms->indir_max) {
-		file_error(ms, 0, "indirect recursion nesting (%hu) exceeded",
-		    indir_level);
+	if (*indir_count >= ms->indir_max) {
+		file_error(ms, 0, "indirect count (%hu) exceeded",
+		    *indir_count);
 		return -1;
 	}
 
@@ -1218,7 +1329,7 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 		    SIZE_T_FORMAT "u, " "nbytes=%" SIZE_T_FORMAT
 		    "u, il=%hu, nc=%hu)\n",
 		    m->type, m->flag, offset, o, nbytes,
-		    indir_level, *name_count);
+		    *indir_count, *name_count);
 		mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE));
 #ifndef COMPILE_ONLY
 		file_mdump(m);
@@ -1230,6 +1341,8 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 		if (m->in_op & FILE_OPINDIRECT) {
 			const union VALUETYPE *q = CAST(const union VALUETYPE *,
 			    ((const void *)(s + offset + off)));
+			if (OFFSET_OOB(nbytes, offset + off, sizeof(*q)))
+				return 0;
 			switch (cvt_flip(m->in_type, flip)) {
 			case FILE_BYTE:
 				off = q->b;
@@ -1410,6 +1523,8 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 			if (OFFSET_OOB(nbytes, offset, 4))
 				return 0;
 			lhs = BE32(p);
+			if (in_type == FILE_BEID3)
+				lhs = cvt_id3(ms, lhs);
 			if (off) {
 				switch (m->in_op & FILE_OPS_MASK) {
 				case FILE_OPAND:
@@ -1447,6 +1562,8 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 			if (OFFSET_OOB(nbytes, offset, 4))
 				return 0;
 			lhs = LE32(p);
+			if (in_type == FILE_LEID3)
+				lhs = cvt_id3(ms, lhs);
 			if (off) {
 				switch (m->in_op & FILE_OPS_MASK) {
 				case FILE_OPAND:
@@ -1554,20 +1671,6 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 			break;
 		}
 
-		switch (in_type) {
-		case FILE_LEID3:
-		case FILE_BEID3:
-			offset = ((((offset >>  0) & 0x7f) <<  0) |
-				  (((offset >>  8) & 0x7f) <<  7) |
-				  (((offset >> 16) & 0x7f) << 14) |
-				  (((offset >> 24) & 0x7f) << 21));
-			if ((ms->flags & MAGIC_DEBUG) != 0)
-				fprintf(stderr, "id3 offs=%u\n", offset);
-			break;
-		default:
-			break;
-		}
-
 		if (m->flag & INDIROFFADD) {
 			offset += ms->c.li[cont_level-1].off;
 			if (offset == 0) {
@@ -1656,8 +1759,9 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 		if ((pb = file_push_buffer(ms)) == NULL)
 			return -1;
 
+		(*indir_count)++;
 		rv = file_softmagic(ms, s + offset, nbytes - offset,
-		    indir_level + 1, name_count, BINTEST, text);
+		    indir_count, name_count, BINTEST, text);
 
 		if ((ms->flags & MAGIC_DEBUG) != 0)
 			fprintf(stderr, "indirect @offs=%u[%d]\n", offset, rv);
@@ -1697,7 +1801,7 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 		if (m->flag & NOSPACE)
 			*need_separator = 0;
 		rv = match(ms, ml.magic, ml.nmagic, s, nbytes, offset + o,
-		    mode, text, flip, indir_level, name_count,
+		    mode, text, flip, indir_count, name_count,
 		    printed_something, need_separator, returnval);
 		if (rv != 1)
 		    *need_separator = oneed_separator;
@@ -1709,6 +1813,7 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 		if (file_printf(ms, "%s", m->desc) == -1)
 			return -1;
 		return 1;
+	case FILE_DER:
 	case FILE_DEFAULT:	/* nothing to check */
 	case FILE_CLEAR:
 	default:
@@ -1969,14 +2074,13 @@ magiccheck(struct magic_set *ms, struct magic *m)
 			file_regerror(&rx, rc, ms);
 			v = (uint64_t)-1;
 		} else {
-			regmatch_t pmatch[1];
+			regmatch_t pmatch;
 			size_t slen = ms->search.s_len;
-#ifndef REG_STARTEND
-#define	REG_STARTEND	0
 			char *copy;
 			if (slen != 0) {
 			    copy = malloc(slen);
 			    if (copy == NULL)  {
+				file_regfree(&rx);
 				file_error(ms, errno,
 				    "can't allocate %" SIZE_T_FORMAT "u bytes",
 				    slen);
@@ -1989,22 +2093,15 @@ magiccheck(struct magic_set *ms, struct magic *m)
 			    search = ms->search.s;
 			    copy = NULL;
 			}
-#else
-			search = ms->search.s;
-			pmatch[0].rm_so = 0;
-			pmatch[0].rm_eo = slen;
-#endif
 			rc = file_regexec(&rx, (const char *)search,
-			    1, pmatch, REG_STARTEND);
-#if REG_STARTEND == 0
+			    1, &pmatch, 0);
 			free(copy);
-#endif
 			switch (rc) {
 			case 0:
-				ms->search.s += (int)pmatch[0].rm_so;
-				ms->search.offset += (size_t)pmatch[0].rm_so;
+				ms->search.s += (int)pmatch.rm_so;
+				ms->search.offset += (size_t)pmatch.rm_so;
 				ms->search.rm_len =
-				    (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);
+				    (size_t)(pmatch.rm_eo - pmatch.rm_so);
 				v = 0;
 				break;
 
@@ -2027,6 +2124,16 @@ magiccheck(struct magic_set *ms, struct magic *m)
 	case FILE_USE:
 	case FILE_NAME:
 		return 1;
+	case FILE_DER:
+		matched = der_cmp(ms, m);
+		if (matched == -1) {
+			if ((ms->flags & MAGIC_DEBUG) != 0) {
+				(void) fprintf(stderr,
+				    "EOF comparing DER entries");
+			}
+			return 0;
+		}
+		return matched;
 	default:
 		file_magerror(ms, "invalid type %d in magiccheck()", m->type);
 		return -1;
@@ -2126,12 +2233,12 @@ magiccheck(struct magic_set *ms, struct magic *m)
 private int
 handle_annotation(struct magic_set *ms, struct magic *m)
 {
-	if (ms->flags & MAGIC_APPLE) {
+	if ((ms->flags & MAGIC_APPLE) && m->apple[0]) {
 		if (file_printf(ms, "%.8s", m->apple) == -1)
 			return -1;
 		return 1;
 	}
-	if (ms->flags & MAGIC_EXTENSION) {
+	if ((ms->flags & MAGIC_EXTENSION) && m->ext[0]) {
 		if (file_printf(ms, "%s", m->ext) == -1)
 			return -1;
 		return 1;

+ 1 - 1
tests/Makefile.am

@@ -13,4 +13,4 @@ issue311docx.testfile
 T = $(top_srcdir)/tests
 check-local:
 	MAGIC=$(top_builddir)/magic/magic ./test
-	for i in $T/*.testfile; do echo Running test: $$i; MAGIC=$(top_builddir)/magic/magic ./test $$i $${i%%.testfile}.result; done
+	set -e; for i in $T/*.testfile; do echo Running test: $$i; TZ=UTC MAGIC=$(top_builddir)/magic/magic ./test $$i $${i%%.testfile}.result; done

+ 1 - 1
tests/Makefile.in

@@ -608,7 +608,7 @@ uninstall-am:
 
 check-local:
 	MAGIC=$(top_builddir)/magic/magic ./test
-	for i in $T/*.testfile; do echo Running test: $$i; MAGIC=$(top_builddir)/magic/magic ./test $$i $${i%%.testfile}.result; done
+	set -e; for i in $T/*.testfile; do echo Running test: $$i; TZ=UTC MAGIC=$(top_builddir)/magic/magic ./test $$i $${i%%.testfile}.result; done
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.