Browse Source

Import upstream version 5.18

Christos Zoulas 5 years ago
parent
commit
81b19b71bd

+ 34 - 0
ChangeLog

@@ -1,3 +1,37 @@
+2014-03-26  11:25  Christos Zoulas <christos@zoulas.com>
+
+	* release 5.18
+
+2014-03-15  17:45  Christos Zoulas <christos@zoulas.com>
+
+        * add fmtcheck(3) for those who don't have it
+
+2014-03-14  15:12  Christos Zoulas <christos@zoulas.com>
+
+	* prevent mime entries from being attached to magic
+	  entries with no descriptions
+
+	* adjust magic strength for regex type
+
+	* remove superfluous ascmagic with encoding test
+
+2014-03-06  12:01  Christos Zoulas <christos@zoulas.com>
+
+	* fix regression fix echo -ne "\012\013\014" | file -i -
+	  which printed "binary" instead of "application/octet-stream"
+
+	* add size_t overflow check for magic file size
+
+2014-02-27  16:01  Christos Zoulas <christos@zoulas.com>
+
+	* experimental support for matching with CFD CLSID
+
+2014-02-18  13:04  Kimmo Suominen (kimmo@suominen.com)
+
+	* Cache old LC_CTYPE locale before setting it to "C", so
+	  we can use it to restore LC_CTYPE instead of asking
+	  setlocale() to scan the environment variables.
+
 2014-02-12  18:21  Christos Zoulas <christos@zoulas.com>
 
 	* Count recursion levels through indirect magic

+ 40 - 36
README

@@ -1,11 +1,14 @@
-** README for file(1) Command **
-@(#) $File: README,v 1.47 2013/06/04 23:15:02 ian Exp $
+## README for file(1) Command ##
 
-Mailing List: file@mx.gw.com
-Mailing List archives: http://mx.gw.com/pipermail/file/
-Bug tracker: http://bugs.gw.com/
+    @(#) $File: README,v 1.48 2014/03/07 13:55:30 christos Exp $
+
+Mailing List: file@mx.gw.com  
+Mailing List archives: http://mx.gw.com/pipermail/file/  
+Bug tracker: http://bugs.gw.com/  
 E-mail: christos@astron.com
 
+[![Build Status](https://travis-ci.org/file/file.png?branch=master)](https://travis-ci.org/file/file)
+
 Phone: Do not even think of telephoning me about this program. Send cash first!
 
 This is Release 5.x of Ian Darwin's (copyright but distributable)
@@ -20,7 +23,7 @@ You can download the latest version of the original sources for file from:
 
 A public read-only git repository of the same sources is available at:
 
-	https://github.com/glensc/file
+	https://github.com/file/file
 
 The major changes for 5.x are CDF file parsing, indirect magic, and
 overhaul in mime and ascii encoding handling.
@@ -61,40 +64,41 @@ magic numbers assigned to all sorts of data files that
 are in reasonable circulation. Send your magic numbers,
 in magic(5) format please, to the maintainer, Christos Zoulas.
 
-COPYING - read this first.
-README - read this second (you are currently reading this file).
+COPYING - read this first.  
+README - read this second (you are currently reading this file).  
 INSTALL - read on how to install
 
-src/apprentice.c - parses /etc/magic to learn magic
-src/apptype.c - used for OS/2 specific application type magic
-src/asprintf.c - replacement for OS's that don't have it.
-src/ascmagic.c - third & last set of tests, based on hardwired assumptions.
-src/asctime_r.c - for systems that don't have it.
-src/asprintf.c - for systems that don't have it.
-src/cdf.c - parser for Microsoft Compound Document Files
-src/cdf_time.c - time converter for CDF.
-src/compress.c - handles decompressing files to look inside.
-src/ctime_r.c - for systems that don't have it.
-src/encoding.c - handles unicode encodings
-src/file.c - the main program
-src/file.h - header file
-src/fsmagic.c - first set of tests the program runs, based on filesystem info
-src/funcs.c - utilility functions
-src/getopt_long.c - for systems that don't have it.
-src/getline.c - for systems that don't have it.
-src/is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
-src/names.h - header file for ascmagic.c
-src/magic.c - the libmagic api
-src/print.c - print results, errors, warnings.
-src/readcdf.c - CDF wrapper.
-src/readelf.[ch] - Stand-alone elf parsing code.
-src/softmagic.c - 2nd set of tests, based on /etc/magic
-src/strlcat.c - for systems that don't have it.
-src/strlcpy.c - for systems that don't have it.
-src/vasprintf.c - for systems that don't have it.
-doc/file.man - man page for the command
+src/apprentice.c - parses /etc/magic to learn magic  
+src/apptype.c - used for OS/2 specific application type magic  
+src/asprintf.c - replacement for OS's that don't have it.  
+src/ascmagic.c - third & last set of tests, based on hardwired assumptions.  
+src/asctime_r.c - for systems that don't have it.  
+src/asprintf.c - for systems that don't have it.  
+src/cdf.c - parser for Microsoft Compound Document Files  
+src/cdf_time.c - time converter for CDF.  
+src/compress.c - handles decompressing files to look inside.  
+src/ctime_r.c - for systems that don't have it.  
+src/encoding.c - handles unicode encodings  
+src/file.c - the main program  
+src/file.h - header file  
+src/fsmagic.c - first set of tests the program runs, based on filesystem info  
+src/funcs.c - utilility functions  
+src/getopt_long.c - for systems that don't have it.  
+src/getline.c - for systems that don't have it.  
+src/is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).  
+src/names.h - header file for ascmagic.c  
+src/magic.c - the libmagic api  
+src/print.c - print results, errors, warnings.  
+src/readcdf.c - CDF wrapper.  
+src/readelf.[ch] - Stand-alone elf parsing code.  
+src/softmagic.c - 2nd set of tests, based on /etc/magic  
+src/strlcat.c - for systems that don't have it.  
+src/strlcpy.c - for systems that don't have it.  
+src/vasprintf.c - for systems that don't have it.  
+doc/file.man - man page for the command  
 doc/magic.man - man page for the magic file, courtesy Guy Harris.
 	Install as magic.4 on USG and magic.5 on V7 or Berkeley; cf Makefile.
+
 Magdir - directory of /etc/magic pieces
 ------------------------------------------------------------------------------
 

+ 24 - 11
configure

@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for file 5.17.
+# Generated by GNU Autoconf 2.69 for file 5.18.
 #
 # Report bugs to <christos@astron.com>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='file'
 PACKAGE_TARNAME='file'
-PACKAGE_VERSION='5.17'
-PACKAGE_STRING='file 5.17'
+PACKAGE_VERSION='5.18'
+PACKAGE_STRING='file 5.18'
 PACKAGE_BUGREPORT='christos@astron.com'
 PACKAGE_URL=''
 
@@ -1327,7 +1327,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures file 5.17 to adapt to many kinds of systems.
+\`configure' configures file 5.18 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1397,7 +1397,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of file 5.17:";;
+     short | recursive ) echo "Configuration of file 5.18:";;
    esac
   cat <<\_ACEOF
 
@@ -1507,7 +1507,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-file configure 5.17
+file configure 5.18
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2163,7 +2163,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by file $as_me 5.17, which was
+It was created by file $as_me 5.18, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -3029,7 +3029,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='file'
- VERSION='5.17'
+ VERSION='5.18'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -14118,7 +14118,7 @@ fi
 fi
 
 
-for ac_func in strerror strndup strtoul mkstemp mkostemp utimes utime wcwidth strtof fmtcheck
+for ac_func in strerror strndup strtoul mkstemp mkostemp utimes utime wcwidth strtof
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
@@ -14261,6 +14261,19 @@ esac
 
 fi
 
+ac_fn_c_check_func "$LINENO" "fmtcheck" "ac_cv_func_fmtcheck"
+if test "x$ac_cv_func_fmtcheck" = xyes; then :
+  $as_echo "#define HAVE_FMTCHECK 1" >>confdefs.h
+
+else
+  case " $LIBOBJS " in
+  *" fmtcheck.$ac_objext "* ) ;;
+  *) LIBOBJS="$LIBOBJS fmtcheck.$ac_objext"
+ ;;
+esac
+
+fi
+
 
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gzopen in -lz" >&5
@@ -14912,7 +14925,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by file $as_me 5.17, which was
+This file was extended by file $as_me 5.18, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -14978,7 +14991,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-file config.status 5.17
+file config.status 5.18
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 

+ 3 - 3
configure.ac

@@ -1,5 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
-AC_INIT([file],[5.17],[christos@astron.com])
+AC_INIT([file],[5.18],[christos@astron.com])
 AM_INIT_AUTOMAKE([subdir-objects foreign])
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
@@ -136,10 +136,10 @@ else
 fi])
 
 dnl Checks for functions
-AC_CHECK_FUNCS(strerror strndup strtoul mkstemp mkostemp utimes utime wcwidth strtof fmtcheck)
+AC_CHECK_FUNCS(strerror strndup strtoul mkstemp mkostemp utimes utime wcwidth strtof)
 
 dnl Provide implementation of some required functions if necessary
-AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat getline ctime_r asctime_r pread strcasestr)
+AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat getline ctime_r asctime_r pread strcasestr fmtcheck)
 
 dnl Checks for libraries
 AC_CHECK_LIB(z,gzopen)

+ 5 - 1
doc/file.man

@@ -1,4 +1,4 @@
-.\" $File: file.man,v 1.105 2014/01/31 01:57:00 christos Exp $
+.\" $File: file.man,v 1.106 2014/03/07 23:11:51 christos Exp $
 .Dd January 30, 2014
 .Dt FILE __CSECTION__
 .Os
@@ -164,6 +164,10 @@ in any of the character sets listed above is simply said to be
 .Dq data .
 .Sh OPTIONS
 .Bl -tag -width indent
+.It Fl Fl apple
+Causes the file command to output the file type and creator code as
+used by older MacOS versions. The code consists of eight letters,
+the first describing the file type, the latter the creator.
 .It Fl b , Fl Fl brief
 Do not prepend filenames to output lines (brief mode).
 .It Fl C , Fl Fl compile

+ 2 - 1
doc/libmagic.man

@@ -1,4 +1,4 @@
-.\" $File: libmagic.man,v 1.27 2013/01/06 20:56:52 christos Exp $
+.\" $File: libmagic.man,v 1.28 2014/03/02 14:47:16 christos Exp $
 .\"
 .\" Copyright (c) Christos Zoulas 2003.
 .\" All Rights Reserved.
@@ -32,6 +32,7 @@
 .Nm magic_open ,
 .Nm magic_close ,
 .Nm magic_error ,
+.Nm magic_errno ,
 .Nm magic_descriptor ,
 .Nm magic_buffer ,
 .Nm magic_setflags ,

+ 8 - 5
doc/magic.man

@@ -1,4 +1,4 @@
-.\" $File: magic.man,v 1.79 2013/04/22 15:30:10 christos Exp $
+.\" $File: magic.man,v 1.81 2014/03/08 17:28:08 christos Exp $
 .Dd April 22, 2013
 .Dt MAGIC __FSECTION__
 .Os
@@ -249,15 +249,15 @@ not beginning and end of file.
 .It Dv search
 A literal string search starting at the given offset.
 The same modifier flags can be used as for string patterns.
-The modifier flags (if any) must be followed by
-.Dv /number
-the range, that is, the number of positions at which the match will be
+The search expression must contain the range in the form
+.Dv /number,
+that is the number of positions at which the match will be
 attempted, starting from the start offset.
 This is suitable for
 searching larger binary expressions with variable offsets, using
 .Dv \e
 escapes for special characters.
-The offset works as for regex.
+The order of modifier and number is not relevant.
 .It Dv default
 This is intended to be used with the test
 .Em x
@@ -426,6 +426,9 @@ then print the string), with
 .Em \*[Gt]\e0
 (because all non-empty strings are greater than the empty string).
 .Pp
+Dates are treated as numerical values in the respective internal
+representation.
+.Pp
 The special test
 .Em x
 always evaluates to true.

+ 22 - 20
magic/Magdir/animation

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: animation,v 1.49 2013/08/15 13:15:11 christos Exp $
+# $File: animation,v 1.51 2014/03/14 18:47:29 christos Exp $
 # animation:  file(1) magic for animation/movie formats
 #
 # animation formats
@@ -451,6 +451,7 @@
 
 # MP2, M2A
 0       beshort&0xFFFE  0xFFF4         MPEG ADTS, layer II, v2
+!:mime	audio/mpeg
 # rate 
 >2      byte&0xF0       0x10           \b,   8 kbps
 >2      byte&0xF0       0x20           \b,  16 kbps 
@@ -788,25 +789,26 @@
 
 #						MPEG file
 # MPEG sequences
-# FIXME: This section is from the old magic.mime file and needs integrating with the rest
-0       belong             0x000001BA
->4      byte               &0x40
-!:mime	video/mp2p
->4      byte               ^0x40
-!:mime	video/mpeg
-0       belong             0x000001BB
-!:mime	video/mpeg
-0       belong             0x000001B0
-!:mime	video/mp4v-es
-0       belong             0x000001B5
-!:mime	video/mp4v-es
-0       belong             0x000001B3
-!:mime	video/mpv
-0       belong&0xFF5FFF10  0x47400010
-!:mime	video/mp2t
-0       belong             0x00000001
->4      byte&0x1F	   0x07
-!:mime	video/h264
+# FIXME: This section is from the old magic.mime file and needs
+# integrating with the rest
+#0       belong             0x000001BA
+#>4      byte               &0x40
+#!:mime	video/mp2p
+#>4      byte               ^0x40
+#!:mime	video/mpeg
+#0       belong             0x000001BB
+#!:mime	video/mpeg
+#0       belong             0x000001B0
+#!:mime	video/mp4v-es
+#0       belong             0x000001B5
+#!:mime	video/mp4v-es
+#0       belong             0x000001B3
+#!:mime	video/mpv
+#0       belong&0xFF5FFF10  0x47400010
+#!:mime	video/mp2t
+#0       belong             0x00000001
+#>4      byte&0x1F	   0x07
+#!:mime	video/h264
 
 # Type: Bink Video
 # Extension: .bik

+ 3 - 2
magic/Magdir/archive

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: archive,v 1.81 2014/01/08 22:27:31 christos Exp $
+# $File: archive,v 1.82 2014/03/14 18:47:29 christos Exp $
 # archive:  file(1) magic for archive formats (see also "msdos" for self-
 #           extracting compressed archives)
 #
@@ -92,9 +92,10 @@
 # "debian".
 #
 0	string		=!<arch>\ndebian
-!:mime	application/x-debian-package
 >8	string		debian-split	part of multipart Debian package
+!:mime	application/x-debian-package
 >8	string		debian-binary	Debian binary package
+!:mime	application/x-debian-package
 >8	string		!debian
 >68	string		>\0		(format %s)
 # These next two lines do not work, because a bzip2 Debian archive

+ 6 - 1
magic/Magdir/cad

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: cad,v 1.12 2013/07/04 15:24:37 christos Exp $
+# $File: cad,v 1.13 2014/03/23 18:05:38 christos Exp $
 # autocad:  file(1) magic for cad files
 #
 
@@ -149,3 +149,8 @@
 
 # 3DS (3d Studio files) Conflicts with diff output 0x3d '='
 #16	beshort		0x3d3d		image/x-3ds
+
+# MegaCAD 2D/3D drawing (.prt)
+# http://megacad.de/
+# From: Markus Heidelberg <markus.heidelberg@web.de>
+0	string	MegaCad23\0	MegaCAD 2D/3D drawing

+ 2 - 2
magic/Magdir/cafebabe

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: cafebabe,v 1.14 2013/02/27 16:59:59 christos Exp $
+# $File: cafebabe,v 1.15 2014/03/14 18:47:29 christos Exp $
 # Cafe Babes unite!
 #
 # Since Java bytecode and Mach-O universal binaries have the same magic number,
@@ -16,8 +16,8 @@
 #
 ### JAVA START ###
 0	belong		0xcafebabe
-!:mime	application/x-java-applet
 >4	belong		>30		compiled Java class data,
+!:mime	application/x-java-applet
 >>6	beshort		x	        version %d.
 >>4	beshort		x       	\b%d
 # Which is which?

+ 16 - 1
magic/Magdir/commands

@@ -1,16 +1,23 @@
 
 #------------------------------------------------------------------------------
-# $File: commands,v 1.46 2013/03/25 14:06:55 christos Exp $
+# $File: commands,v 1.48 2014/03/04 12:20:42 kim Exp $
 # commands:  file(1) magic for various shells and interpreters
 #
 #0	string/w	:			shell archive or script for antique kernel text
 0	string/wt	#!\ /bin/sh		POSIX shell script text executable
 !:mime	text/x-shellscript
+0	string/wb	#!\ /bin/sh		POSIX shell script executable (binary data)
+!:mime	text/x-shellscript
+
 0	string/wt	#!\ /bin/csh		C shell script text executable
 !:mime	text/x-shellscript
+
 # korn shell magic, sent by George Wu, gwu@clyde.att.com
 0	string/wt	#!\ /bin/ksh		Korn shell script text executable
 !:mime	text/x-shellscript
+0	string/wb	#!\ /bin/ksh		Korn shell script executable (binary data)
+!:mime	text/x-shellscript
+
 0	string/wt 	#!\ /bin/tcsh		Tenex C shell script text executable
 !:mime	text/x-shellscript
 0	string/wt	#!\ /usr/bin/tcsh	Tenex C shell script text executable
@@ -57,12 +64,20 @@
 # bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
 0	string/wt	#!\ /bin/bash	Bourne-Again shell script text executable
 !:mime	text/x-shellscript
+0	string/wb	#!\ /bin/bash	Bourne-Again shell script executable (binary data)
+!:mime	text/x-shellscript
 0	string/wt	#!\ /usr/bin/bash	Bourne-Again shell script text executable
 !:mime	text/x-shellscript
+0	string/wb	#!\ /usr/bin/bash	Bourne-Again shell script executable (binary data)
+!:mime	text/x-shellscript
 0	string/wt	#!\ /usr/local/bash	Bourne-Again shell script text executable
 !:mime	text/x-shellscript
+0	string/wb	#!\ /usr/local/bash	Bourne-Again shell script executable (binary data)
+!:mime	text/x-shellscript
 0	string/wt	#!\ /usr/local/bin/bash	Bourne-Again shell script text executable
 !:mime	text/x-shellscript
+0	string/wb	#!\ /usr/local/bin/bash	Bourne-Again shell script executable (binary data)
+!:mime	text/x-shellscript
 
 # PHP scripts
 # Ulf Harnhammar <ulfh@update.uu.se>

+ 2 - 2
magic/Magdir/compress

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: compress,v 1.53 2014/01/05 15:55:21 christos Exp $
+# $File: compress,v 1.54 2014/03/14 18:47:29 christos Exp $
 # compress:  file(1) magic for pure-compression formats (no archives)
 #
 # compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
@@ -191,8 +191,8 @@
 
 # Type: LZMA
 0	lelong&0xffffff	=0x5d
-!:mime	application/x-lzma
 >12	leshort		0xff			LZMA compressed data,
+!:mime	application/x-lzma
 >>5	lequad		=0xffffffffffffffff	streamed
 >>5	lequad		!0xffffffffffffffff	non-streamed, size %lld
 >12	leshort		0			LZMA compressed data,

+ 24 - 3
magic/Magdir/database

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: database,v 1.36 2013/12/09 20:31:53 christos Exp $
+# $File: database,v 1.37 2014/03/14 18:47:29 christos Exp $
 # database:  file(1) magic for various databases
 #
 # extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk)
@@ -170,7 +170,6 @@
 >>>>>>>>>>8	uleshort		>31		
 # skip PIC15736.PCX by test for language driver name or field name
 >>>>>>>>>>>32	ubyte			>0		
-!:mime	application/x-dbf
 #!:mime	application/x-dbf; charset=unknown-8bit ??
 #!:mime	application/x-dbase
 >>>>>>>>>>>>0	use			xbase-type
@@ -237,47 +236,69 @@
 >>0	ubyte		0x02		FoxBase
 # FoxBase+/dBaseIII+, no memo
 >>0	ubyte		0x03		FoxBase+/dBase III
+!:mime	application/x-dbf
 # dBASE IV no memo file
 >>0	ubyte		0x04		dBase IV
+!:mime	application/x-dbf
 # dBASE V no memo file
 >>0	ubyte		0x05		dBase V
+!:mime	application/x-dbf
 >>0	ubyte		0x30		Visual FoxPro
+!:mime	application/x-dbf
 >>0	ubyte		0x31		Visual FoxPro, autoincrement
+!:mime	application/x-dbf
 # Visual FoxPro, with field type Varchar or Varbinary
 >>0	ubyte		0x32		Visual FoxPro, with field type Varchar
+!:mime	application/x-dbf
 # dBASE IV SQL, no memo;dbv memo var size (Flagship)
 >>0	ubyte		0x43		dBase IV, with SQL table
+!:mime	application/x-dbf
 # http://msdn.microsoft.com/en-US/library/st4a0s68(v=vs.80).aspx
 #>>0	ubyte		0x62		dBase IV, with SQL table
+#!:mime	application/x-dbf
 # dBASE IV, with memo!!
 >>0	ubyte		0x7b		dBase IV, with memo
+!:mime	application/x-dbf
 # http://msdn.microsoft.com/en-US/library/st4a0s68(v=vs.80).aspx
 #>>0	ubyte		0x82		dBase IV, with SQL system
+#!:mime	application/x-dbf
 # FoxBase+/dBaseIII+ with memo .DBT!
 >>0	ubyte		0x83		FoxBase+/dBase III, with memo .DBT
+!:mime	application/x-dbf
 # VISUAL OBJECTS (first 1.0 versions) for the Dbase III files (NTX clipper driver); memo file
 >>0	ubyte		0x87		VISUAL OBJECTS, with memo file
+!:mime	application/x-dbf
 # http://msdn.microsoft.com/en-US/library/st4a0s68(v=vs.80).aspx
 #>>0	ubyte		0x8A		FoxBase+/dBase III, with memo .DBT
+#!:mime	application/x-dbf
 # dBASE IV with memo!
 >>0	ubyte		0x8B		dBase IV, with memo .DBT
+!:mime	application/x-dbf
 # dBase IV with SQL Table,no memo?
 >>0	ubyte		0x8E		dBase IV, with SQL table
+!:mime	application/x-dbf
 # .dbv and .dbt memo (Flagship)?
 >>0	ubyte		0xB3		Flagship
 # http://msdn.microsoft.com/en-US/library/st4a0s68(v=vs.80).aspx
 #>>0	ubyte		0xCA		dBase IV with memo .DBT
+#!:mime	application/x-dbf
 # dBASE IV with SQL table, with memo .DBT
 >>0	ubyte		0xCB		dBase IV with SQL table, with memo .DBT
+!:mime	application/x-dbf
 # HiPer-Six format;Clipper SIX, with SMT memo file		
 >>0	ubyte		0xE5		Clipper SIX with memo
+!:mime	application/x-dbf
 # http://msdn.microsoft.com/en-US/library/st4a0s68(v=vs.80).aspx
 #>>0	ubyte		0xF4		dBase IV, with SQL table, with memo
+#!:mime	application/x-dbf
 >>0	ubyte		0xF5		FoxPro with memo
+!:mime	application/x-dbf
 # http://msdn.microsoft.com/en-US/library/st4a0s68(v=vs.80).aspx
 #>>0	ubyte		0xFA		FoxPro 2.x, with memo
-# unkown version (should not happen)
+#!:mime	application/x-dbf
+# unknown version (should not happen)
 >>0	default		x		xBase
+!:mime	application/x-dbf
 >>>0	ubyte		x		(0x%x)
 # flags in version byte
 # DBT flag (with dBASE III memo .DBT)!!

+ 94 - 7
magic/Magdir/elf

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: elf,v 1.64 2013/08/04 09:31:47 christos Exp $
+# $File: elf,v 1.66 2014/03/06 16:37:39 christos Exp $
 # elf:  file(1) magic for ELF executables
 #
 # We have to check the byte order flag to see what byte order all the
@@ -32,15 +32,15 @@
 >16	leshort		&0xff00		processor-specific,
 >18	clear		x
 >18	leshort		0		no machine,
->18	leshort		1		AT&T WE32100
->18	leshort		2		SPARC
+>18	leshort		1		AT&T WE32100,
+>18	leshort		2		SPARC,
 >18	leshort		3		Intel 80386,
->18	leshort		4		Motorola
+>18	leshort		4		Motorola m68k,
 >>4	byte		1
 >>>36	lelong		&0x01000000	68000,
 >>>36	lelong		&0x00810000	CPU32,
 >>>36	lelong		0		68020,
->18	leshort		5		Motorola 88000,
+>18	leshort		5		Motorola m88k,
 >18	leshort		6		Intel 80486,
 >18	leshort		7		Intel 80860,
 # The official e_machine number for MIPS is now #8, regardless of endianness.
@@ -96,6 +96,7 @@
 >>>36	lelong&0xffff00	0x000200	Sun UltraSPARC1 Extensions Required,
 >>>36	lelong&0xffff00	0x000400	HaL R1 Extensions Required,
 >>>36	lelong&0xffff00	0x000800	Sun UltraSPARC3 Extensions Required,
+>18	leshort		19		Intel 80960,
 >18	leshort		20		PowerPC or cisco 4500,
 >18	leshort		21		64-bit PowerPC or cisco 7500,
 >18	leshort		22		IBM S/390,
@@ -113,7 +114,6 @@
 >>>36	lelong		&0x00800000	BE8
 >>>36	lelong		&0x00400000	LE8
 >18	leshort		41		Alpha,
->18	leshort		0xa390		IBM S/390 (obsolete),
 >18	leshort		42		Renesas SH,
 >18	leshort		43		SPARC V9,
 >>4	byte		2
@@ -143,6 +143,8 @@
 >18	leshort		61		Tinyj emb.,
 >18	leshort		62		x86-64,
 >18	leshort		63		Sony DSP,
+>18	leshort		64		DEC PDP-10,
+>18	leshort		65		DEC PDP-11,
 >18	leshort		66		FX66,
 >18	leshort		67		ST9+ 8/16 bit,
 >18	leshort		68		ST7 8 bit,
@@ -172,18 +174,103 @@
 >18	leshort		92		OpenRISC,
 >18	leshort		93		ARC Cores Tangent-A5,
 >18	leshort		94		Tensilica Xtensa,
+>18	leshort		95		Alphamosaic VideoCore,
+>18	leshort		96		Thompson Multimedia,
 >18	leshort		97		NatSemi 32k,
+>18	leshort		98		Tenor Network TPC,
+>18	leshort		99		Trebia SNP 1000,
+>18	leshort		100		STMicroelectronics ST200,
+>18	leshort		101		Ubicom IP2022,
+>18	leshort		102		MAX Processor,
+>18	leshort		103		NatSemi CompactRISC,
+>18	leshort		104		Fujitsu F2MC16,
+>18	leshort		105		TI msp430,
 >18	leshort		106		Analog Devices Blackfin,
+>18	leshort		107		S1C33 Family of Seiko Epson,
+>18	leshort		108		Sharp embedded,
+>18	leshort		109		Arca RISC,
+>18	leshort		110		PKU-Unity Ltd.,
+>18	leshort		111		eXcess: 16/32/64-bit,
+>18	leshort		112		Icera Deep Execution Processor,
 >18	leshort		113		Altera Nios II,
->18	beshort		138		LatticeMico32,
+>18	leshort		114		NatSemi CRX,
+>18	leshort		115		Motorola XGATE,
+>18	leshort		116		Infineon C16x/XC16x,
+>18	leshort		117		Renesas M16C series,
+>18	leshort		118		Microchip dsPIC30F,
+>18	leshort		119		Freescale RISC core,
+>18	leshort		120		Renesas M32C series,
+>18	leshort		131		Altium TSK3000 core,
+>18	leshort		132		Freescale RS08,
+>18	leshort		134		Cyan Technology eCOG2,
+>18	leshort		135		Sunplus S+core7 RISC,
+>18	leshort		136		New Japan Radio (NJR) 24-bit DSP,
+>18	leshort		137		Broadcom VideoCore III,
+>18	leshort		138		LatticeMico32,
+>18	leshort		139		Seiko Epson C17 family,
+>18	leshort		140		TI TMS320C6000 DSP family,
+>18	leshort		141		TI TMS320C2000 DSP family,
+>18	leshort		142		TI TMS320C55x DSP family,
+>18	leshort		160		STMicroelectronics 64bit VLIW DSP,
+>18	leshort		161		Cypress M8C,
+>18	leshort		162		Renesas R32C series,
+>18	leshort		163		NXP TriMedia family,
+>18	leshort		164		QUALCOMM DSP6,
+>18	leshort		165		Intel 8051 and variants,
+>18	leshort		166		STMicroelectronics STxP7x family,
+>18	leshort		167		Andes embedded RISC,
+>18	leshort		168		Cyan eCOG1X family,
+>18	leshort		169		Dallas MAXQ30,
+>18	leshort		170		New Japan Radio (NJR) 16-bit DSP,
+>18	leshort		171		M2000 Reconfigurable RISC,
+>18	leshort		172		Cray NV2 vector architecture,
+>18	leshort		173		Renesas RX family,
 >18	leshort		174		META,
+>18	leshort		175		MCST Elbrus,
+>18	leshort		176		Cyan Technology eCOG16 family,
+>18	leshort		177		NatSemi CompactRISC,
+>18	leshort		178		Freescale Extended Time Processing Unit,
+>18	leshort		179		Infineon SLE9X,
+>18	leshort		180		Intel L1OM,
+>18	leshort		181		Intel K1OM,
 >18	leshort		183		ARM aarch64,
+>18	leshort		185		Atmel 32-bit family,
+>18	leshort		186		STMicroeletronics STM8 8-bit,
 >18	leshort		187		Tilera TILE64,
 >18	leshort		188		Tilera TILEPro,
+>18	leshort		189		Xilinx MicroBlaze 32-bit RISC,
+>18	leshort		190		NVIDIA CUDA architecture,
 >18	leshort		191		Tilera TILE-Gx,
+>18	leshort		197		Renesas RL78 family,
+>18	leshort		199		Renesas 78K0R,
+>18	leshort		0x1057		AVR (unofficial),
+>18	leshort		0x1059		MSP430 (unofficial),
+>18	leshort		0x1223		Adapteva Epiphany (unofficial),
+>18	leshort		0x2530		Morpho MT (unofficial),
+>18	leshort		0x3330		FR30 (unofficial),
 >18	leshort		0x3426		OpenRISC (obsolete),
+>18	leshort		0x4688		Infineon C166 (unofficial),
+>18	leshort		0x5441		Cygnus FRV (unofficial),
+>18	leshort		0x5aa5		DLX (unofficial),
+>18	leshort		0x7650		Cygnus D10V (unofficial),
+>18	leshort		0x7676		Cygnus D30V (unofficial),
+>18	leshort		0x8217		Ubicom IP2xxx (unofficial),
 >18	leshort		0x8472		OpenRISC (obsolete),
+>18	leshort		0x9025		Cygnus PowerPC (unofficial),
 >18	leshort		0x9026		Alpha (unofficial),
+>18	leshort		0x9041		Cygnus M32R (unofficial),
+>18	leshort		0x9080		Cygnus V850 (unofficial),
+>18	leshort		0xa390		IBM S/390 (obsolete),
+>18	leshort		0xabc7		Old Xtensa (unofficial),
+>18	leshort		0xad45		xstormy16 (unofficial),
+>18	leshort		0xbaab		Old MicroBlaze (unofficial),,
+>18	leshort		0xbeef		Cygnus MN10300 (unofficial),
+>18	leshort		0xdead		Cygnus MN10200 (unofficial),
+>18	leshort		0xf00d		Toshiba MeP (unofficial),
+>18	leshort		0xfeb0		Renesas M32C (unofficial),
+>18	leshort		0xfeba		Vitesse IQ2000 (unofficial),
+>18	leshort		0xfebb		NIOS (unofficial),
+>18	leshort		0xfeed		Moxie (unofficial),
 >18	default		x
 >>18	leshort		x		*unknown arch 0x%x*
 >20	lelong		0		invalid version

+ 55 - 40
magic/Magdir/filesystems

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: filesystems,v 1.85 2013/11/19 23:54:05 christos Exp $
+# $File: filesystems,v 1.87 2014/03/01 03:04:06 christos Exp $
 # filesystems:  file(1) magic for different filesystems
 #
 0	name	partid  
@@ -1870,9 +1870,10 @@
 
 # all FAT12 (strength=70) floppies with sectorsize 512 added by Joerg Jenderek at Jun 2013
 # http://en.wikipedia.org/wiki/File_Allocation_Table#Exceptions
-512		ubelong&0xE0ffff00	0xE0ffff00		
+# Too Weak.
+#512		ubelong&0xE0ffff00	0xE0ffff00		
 # without valid Media descriptor in place of BPB, cases with are done at other places
->21		ubyte			<0xE5			floppy with old FAT filesystem
+#>21		ubyte			<0xE5			floppy with old FAT filesystem
 # but valid Media descriptor at begin of FAT
 #>>512		ubyte			=0xed			720k
 #>>512		ubyte			=0xf0			1440k
@@ -1880,64 +1881,64 @@
 #>>512		ubyte			=0xf9			1220k
 #>>512		ubyte			=0xfa			320k
 #>>512		ubyte			=0xfb			640k
->>512		ubyte			=0xfc			180k
+#>>512		ubyte			=0xfc			180k
 # look like an an old DOS directory entry
->>>0xA0E	ubequad			0			
->>>>0xA00	ubequad			!0			
-!:mime application/x-ima
->>512		ubyte			=0xfd			
+#>>>0xA0E	ubequad			0			
+#>>>>0xA00	ubequad			!0			
+#!:mime application/x-ima
+#>>512		ubyte			=0xfd			
 # look for 2nd FAT at different location to distinguish between 360k and 500k
->>>0x600	ubelong&0xE0ffff00	0xE0ffff00		360k
+#>>>0x600	ubelong&0xE0ffff00	0xE0ffff00		360k
 #>>>0x500	ubelong&0xE0ffff00	0xE0ffff00		500k
->>>0xA0E	ubequad			0			
-!:mime application/x-ima
->>512		ubyte			=0xfe			
->>>0x400	ubelong&0xE0ffff00	0xE0ffff00		160k
->>>>0x60E	ubequad			0			
->>>>>0x600	ubequad			!0			
-!:mime application/x-ima
+#>>>0xA0E	ubequad			0			
+#!:mime application/x-ima
+#>>512		ubyte			=0xfe			
+#>>>0x400	ubelong&0xE0ffff00	0xE0ffff00		160k
+#>>>>0x60E	ubequad			0			
+#>>>>>0x600	ubequad			!0			
+#!:mime application/x-ima
 #>>>0xC00	ubelong&0xE0ffff00	0xE0ffff00		1200k
->>512		ubyte			=0xff			320k
->>>0x60E	ubequad			0			
->>>>0x600	ubequad			!0			
-!:mime application/x-ima
->>512		ubyte			x			\b, Media descriptor 0x%x
+#>>512		ubyte			=0xff			320k
+#>>>0x60E	ubequad			0			
+#>>>>0x600	ubequad			!0			
+#!:mime application/x-ima
+#>>512		ubyte			x			\b, Media descriptor 0x%x
 # without x86 jump instruction
->>0		ulelong&0x804000E9	!0x000000E9		
+#>>0		ulelong&0x804000E9	!0x000000E9		
 # assembler instructions: CLI;MOV SP,1E7;MOV AX;07c0;MOV 	
->>>0	ubequad				0xfabce701b8c0078e	\b, MS-DOS 1.12 bootloader
+#>>>0	ubequad				0xfabce701b8c0078e	\b, MS-DOS 1.12 bootloader
 # IOSYS.COM+MSDOS.COM
->>>>0xc4	use			2xDOS-filename
->>0		ulelong&0x804000E9	=0x000000E9	
+#>>>>0xc4	use			2xDOS-filename
+#>>0		ulelong&0x804000E9	=0x000000E9	
 # only x86 short jump instruction found
->>>0		ubyte			=0xEB
->>>>1		ubyte			x			\b, code offset 0x%x+2
+#>>>0		ubyte			=0xEB
+#>>>>1		ubyte			x			\b, code offset 0x%x+2
 # http://thestarman.pcministry.com/DOS/ibm100/Boot.htm
 # assembler instructions: CLI;MOV AX,CS;MOV DS,AX;MOV DX,0		
->>>>(1.b+2)	ubequad			0xfa8cc88ed8ba0000	\b, PC-DOS 1.0 bootloader 
+#>>>>(1.b+2)	ubequad			0xfa8cc88ed8ba0000	\b, PC-DOS 1.0 bootloader 
 # ibmbio.com+ibmdos.com
->>>>>0x176	use			DOS-filename
->>>>>0x181	ubyte			x			\b+
->>>>>0x182	use			DOS-filename
+#>>>>>0x176	use			DOS-filename
+#>>>>>0x181	ubyte			x			\b+
+#>>>>>0x182	use			DOS-filename
 # http://thestarman.pcministry.com/DOS/ibm110/Boot.htm
 # assembler instructions: CLI;MOV AX,CS;MOV DS,AX;XOR DX,DX;MOV		
->>>>(1.b+2)	ubequad			0xfa8cc88ed833d28e	\b, PC-DOS 1.1 bootloader 
+#>>>>(1.b+2)	ubequad			0xfa8cc88ed833d28e	\b, PC-DOS 1.1 bootloader 
 # ibmbio.com+ibmdos.com
->>>>>0x18b	use			DOS-filename
->>>>>0x196	ubyte			x			\b+
->>>>>0x197	use			DOS-filename
+#>>>>>0x18b	use			DOS-filename
+#>>>>>0x196	ubyte			x			\b+
+#>>>>>0x197	use			DOS-filename
 # http://en.wikipedia.org/wiki/Zenith_Data_Systems
 # assembler instructions: MOV BX,07c0;MOV SS,BX;MOV SP,01c6		
->>>>(1.b+2)	ubequad			0xbbc0078ed3bcc601	\b, Zenith Data Systems MS-DOS 1.25 bootloader
+#>>>>(1.b+2)	ubequad			0xbbc0078ed3bcc601	\b, Zenith Data Systems MS-DOS 1.25 bootloader
 # IO.SYS+MSDOS.SYS
->>>>>0x20	use			2xDOS-filename
+#>>>>>0x20	use			2xDOS-filename
 # http://en.wikipedia.org/wiki/Corona_Data_Systems
 # assembler instructions: MOV AX,CS;MOV DS,AX;CLI;MOV SS,AX;		
->>>>(1.b+2)	ubequad			0x8cc88ed8fa8ed0bc	\b, MS-DOS 1.25 bootloader
+#>>>>(1.b+2)	ubequad			0x8cc88ed8fa8ed0bc	\b, MS-DOS 1.25 bootloader
 # IO.SYS+MSDOS.SYS
->>>>>0x69	use			2xDOS-filename
+#>>>>>0x69	use			2xDOS-filename
 # assembler instructions: CLI;PUSH CS;POP SS;MOV SP,7c00;		
->>>>(1.b+2)	ubequad			0xfa0e17bc007cb860	\b, MS-DOS 2.11 bootloader
+#>>>>(1.b+2)	ubequad			0xfa0e17bc007cb860	\b, MS-DOS 2.11 bootloader
 # defect IO.SYS+MSDOS.SYS ?
 #>>>>>0x162	use			2xDOS-filename
 
@@ -2324,3 +2325,17 @@
 # Gregoire Passault
 # http://www.forensicswiki.org/wiki/Encase_image_file_format
 0	string	EVF\x09\x0d\x0a\xff\x00	EWF/Expert Witness/EnCase image file format
+
+# UBIfs
+# Linux kernel sources: fs/ubifs/ubifs-media.h
+0       belong  0x31181006
+>0x16   short   0               UBIfs image
+>0x08   lequad  x               \b, sequence number %llu
+>0x10   leshort x               \b, length %u
+>0x04   lelong  x               \b, CRC 0x%08x
+
+0       belong  0x55424923
+>0x04   short   <2
+>0x05   string  \0\0\0
+>0x1c   string  \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0
+>0x04   short   x       UBI image, version %u

+ 6 - 1
magic/Magdir/flash

@@ -1,11 +1,13 @@
 
 #------------------------------------------------------------------------------
-# $File: flash,v 1.9 2009/11/08 01:30:01 christos Exp $
+# $File: flash,v 1.10 2014/03/06 16:07:24 christos Exp $
 # flash:	file(1) magic for Macromedia Flash file format
 #
 # See
 #
 #	http://www.macromedia.com/software/flash/open/
+#	http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/\
+#	en/devnet/swf/pdf/swf-file-format-spec.pdf page 27
 #
 0	string		FWS		Macromedia Flash data,
 >3	byte		x		version %d
@@ -13,6 +15,9 @@
 0	string		CWS		Macromedia Flash data (compressed),
 !:mime	application/x-shockwave-flash
 >3	byte		x		version %d
+0	string		ZWS		Macromedia Flash data (lzma compressed),
+!:mime	application/x-shockwave-flash
+>3	byte		x		version %d
 # From: Cal Peake <cp@absolutedigital.net>
 0	string		FLV		Macromedia Flash Video
 !:mime	video/x-flv

+ 2 - 1
magic/Magdir/linux

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: linux,v 1.53 2014/01/08 22:26:12 christos Exp $
+# $File: linux,v 1.54 2014/03/02 00:01:29 christos Exp $
 # linux:  file(1) magic for Linux files
 #
 # Values for Linux/i386 binaries, from Daniel Quinlan <quinlan@yggdrasil.com>
@@ -40,6 +40,7 @@
 >28	long		!0		not stripped
 # core dump file, from Bill Reynolds <bill@goshawk.lanl.gov>
 216	lelong		0421		Linux/i386 core file
+!:strength / 2
 >220	string		>\0		of '%s'
 >200	lelong		>0		(signal %d)
 #

+ 6 - 1
magic/Magdir/misctools

@@ -1,6 +1,6 @@
 
 #-----------------------------------------------------------------------------
-# $File: misctools,v 1.13 2013/01/16 13:53:10 christos Exp $
+# $File: misctools,v 1.14 2014/03/06 16:08:58 christos Exp $
 # misctools:  file(1) magic for miscellaneous UNIX tools.
 #
 0	search/1	%%!!			X-Post-It-Note text
@@ -21,3 +21,8 @@
 
 # From: Daniel Novotny <dnovotny@redhat.com>
 0	string		MDMP\x93\xA7				MDMP crash report data
+
+# Summary: abook addressbook file
+# Submitted by: Mark Schreiber <mark7@alumni.cmu.edu>
+0	string	#\x20abook\x20addressbook\x20file abook address book
+!:mime application/x-abook-addressbook

+ 52 - 13
magic/Magdir/msdos

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: msdos,v 1.86 2014/01/08 22:10:18 christos Exp $
+# $File: msdos,v 1.92 2014/03/14 18:47:29 christos Exp $
 # msdos:  file(1) magic for MS-DOS files
 #
 
@@ -42,9 +42,9 @@
 # Many of the compressed formats were extraced from IDARC 1.23 source code.
 #
 0	string/b	MZ
-!:mime	application/x-dosexec
 # All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file.
 >0x18	leshort <0x40 MS-DOS executable
+!:mime	application/x-dosexec
 # These traditional tests usually work but not always.  When test quality support is
 # implemented these can be turned on.
 #>>0x18	leshort	0x1c	(Borland compiler)
@@ -209,8 +209,8 @@
 # calculations (next embedded executable would be at &(&2*512+&0-2)
 # I suspect there are only LE executables in these multi-exe files
 >>>>&(2.s-514)	string	BW
->>>>>0x240	search/0x100	DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded)
->>>>>0x240	search/0x100	!DOS/4G ,\b BW collection for MS-DOS
+>>>>>0x240	search/0x100	DOS/4G	\b, LE for MS-DOS, DOS4GW DOS extender (embedded)
+>>>>>0x240	search/0x100	!DOS/4G	\b, BW collection for MS-DOS
 
 # This sequence skips to the first COFF segment, usually .text
 >(4.s*512)	leshort		0x014c \b, COFF
@@ -578,16 +578,49 @@
 #ico files
 0	string/b	\102\101\050\000\000\000\056\000\000\000\000\000\000\000	Icon for MS Windows
 
-# Windows icons (Ian Springer <ips@fpk.hp.com>)
-0	string/b	\000\000\001\000	MS Windows icon resource
+# Windows icons
+0   name    ico-dir
+# not entirely accurate, the number of icons is part of the header
+>0  byte    1   - 1 icon
+>0  ubyte   >1  - %d icons
+>2  byte    0   \b, 256x
+>2  byte    !0  \b, %dx
+>3  byte    0   \b256
+>3  byte    !0  \b%d
+>4  ubyte   !0  \b, %d colors
+
+0   belong  0x00000100
+
+>9  byte    0
+>>0 byte    x           MS Windows icon resource
 !:mime	image/x-icon
->4	byte	1			- 1 icon
->4	byte	>1			- %d icons
->>6	byte	>0			\b, %dx
->>>7	byte	>0			\b%d
->>8	byte	0			\b, 256-colors
->>8	byte	>0			\b, %d-colors
-
+>>4 use     ico-dir
+>9  ubyte   0xff
+>>0 byte    x           MS Windows icon resource
+!:mime	image/x-icon
+>>4 use     ico-dir
+
+# Windows non-animated cursors
+0   name    cur-dir
+# not entirely accurate, the number of icons is part of the header
+>0  byte        1   - 1 icon
+>0  ubyte       >1  - %d icons
+>2  byte        0   \b, 256x
+>2  byte        !0  \b, %dx
+>3  byte        0   \b256
+>3  byte        !0  \b%d
+>6  uleshort    x   \b, hotspot @%dx
+>8  uleshort    x   \b%d
+
+0   belong  0x00000200
+>9  byte    0
+>>0 byte    x           MS Windows cursor resource
+!:mime image/x-cur
+>>4 use     cur-dir
+>9  ubyte   0xff
+>>0 byte    x           MS Windows cursor resource
+!:mime image/x-cur
+>>4 use     cur-dir
 
 # .chr files
 0	string/b	PK\010\010BGI	Borland font 
@@ -645,6 +678,12 @@
 0	lelong		0x08086b70	TurboC BGI file
 0	lelong		0x08084b50	TurboC Font file
 
+# Debian#712046: The magic below identifies "Delphi compiled form data". 
+# An additional source of information is available at:
+# http://www.woodmann.com/fravia/dafix_t1.htm
+0	string		TPF0
+>4	pstring		>\0		Delphi compiled form '%s'
+
 # tests for DBase files moved, updated and merged to database
 
 0	string		PMCC		Windows 3.x .GRP file

+ 12 - 0
magic/Magdir/neko

@@ -0,0 +1,12 @@
+
+#------------------------------------------------------------
+# $File: neko,v 1.1 2009/11/10 20:36:10 christos Exp $
+
+# From: Mikhail Gusarov <dottedmag@dottedmag.net>
+# NekoVM (http://nekovm.org/) bytecode
+0	string		NEKO	NekoVM bytecode
+>4	lelong		x	(%d global symbols,
+>8	lelong		x	%d global fields,
+>12	lelong		x	%d bytecode ops)
+!:mime	application/x-nekovm-bytecode
+

+ 52 - 0
magic/Magdir/pgf

@@ -0,0 +1,52 @@
+
+#------------------------------------------------------------------------------
+# $File: pgf,v 1.1 2013/04/22 15:19:49 christos Exp $
+# pgf: file(1) magic for Progressive Graphics File (PGF)
+#
+# <http://www.libpgf.org/uploads/media/PGF_Details_01.pdf>
+# 2013 by Philipp Hahn <pmhahn debian org>
+0 string PGF Progressive Graphics image data,
+!:mime image/x-pgf
+>3	string	2	version %s,
+>3	string	4	version %s,
+>3	string	5	version %s,
+>3	string	6	version %s,
+#	PGFPreHeader
+#>>4	lelong	x	header size %d,
+#	PGFHeader
+>>8	lelong	x	%d x
+>>12	lelong	x	%d,
+>>16	byte	x	%d levels,
+>>17	byte	x	compression level %d,
+>>18	byte	x	%d bpp,
+>>19	byte	x	%d channels,
+>>20	clear	x
+>>20	byte	0	bitmap,
+>>20	byte	1	gray scale,
+>>20	byte	2	indexed color,
+>>20	byte	3	RGB color,
+>>20	byte	4	CYMK color,
+>>20	byte	5	HSL color,
+>>20	byte	6	HSB color,
+>>20	byte	7	multi-channel,
+>>20	byte	8	duo tone,
+>>20	byte	9	LAB color,
+>>20	byte	10	gray scale 16,
+>>20	byte	11	RGB color 48,
+>>20	byte	12	LAB color 48,
+>>20	byte	13	CYMK color 64,
+>>20	byte	14	deep multi-channel,
+>>20	byte	15	duo tone 16,
+>>20	byte	17	RGBA color,
+>>20	byte	18	gray scale 32,
+>>20	byte	19	RGB color 12,
+>>20	byte	20	RGB color 16,
+>>20	byte	255	unknown format,
+>>20	default	x	format 
+>>>20	byte	x	\b %d,
+>>21	byte	x	%d bpc
+#	PGFPostHeader
+#	Level-Sizes
+#>>(4.l+4)	lelong x level 0 size: %d
+#>>(4.l+8)	lelong x level 1 size: %d
+#>>(4.l+12)	lelong x level 2 size: %d

+ 7 - 7
magic/Magdir/python

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: python,v 1.23 2013/12/11 14:14:20 christos Exp $
+# $File: python,v 1.24 2014/02/15 01:30:52 christos Exp $
 # python:  file(1) magic for python
 #
 # Outlook puts """ too for urgent messages
@@ -44,13 +44,13 @@
 !:mime text/x-python
 
 # comments
-0	search/4096	'''
->&0	regex	.*'''$	Python script text executable
-!:mime text/x-python
+#0	search/4096	'''
+#>&0	regex	.*'''$	Python script text executable
+#!:mime text/x-python
 
-0	search/4096	"""
->&0	regex	.*"""$	Python script text executable
-!:mime text/x-python
+#0	search/4096	"""
+#>&0	regex	.*"""$	Python script text executable
+#!:mime text/x-python
 
 # try:
 # except: or finally:

+ 44 - 18
magic/Magdir/riff

@@ -1,11 +1,51 @@
 
 #------------------------------------------------------------------------------
-# $File: riff,v 1.24 2013/11/19 23:58:17 christos Exp $
+# $File: riff,v 1.26 2014/03/06 18:55:09 christos Exp $
 # riff:  file(1) magic for RIFF format
 # See
 #
 #	http://www.seanet.com/users/matts/riffmci/riffmci.htm
 #
+
+# audio format tag. Assume limits: max 1024 bit, 128 channels, 1 MHz
+0   name    riff-wave
+>0	leshort		1		\b, Microsoft PCM
+>>14	leshort		>0
+>>>14	leshort		<1024	\b, %d bit
+>0	leshort		2		\b, Microsoft ADPCM
+>0	leshort		6		\b, ITU G.711 A-law
+>0	leshort		7		\b, ITU G.711 mu-law
+>0	leshort		8		\b, Microsoft DTS
+>0	leshort		17		\b, IMA ADPCM
+>0	leshort		20		\b, ITU G.723 ADPCM (Yamaha)
+>0	leshort		49		\b, GSM 6.10
+>0	leshort		64		\b, ITU G.721 ADPCM
+>0	leshort		80		\b, MPEG
+>0	leshort		85		\b, MPEG Layer 3
+>0	leshort		0x2001		\b, DTS
+>2	leshort		=1		\b, mono
+>2	leshort		=2		\b, stereo
+>2	leshort		>2
+>>2	leshort		<128	\b, %d channels
+>4	lelong		>0
+>>4	lelong		<1000000	%d Hz
+
+# try to find "fmt "
+0   name    riff-walk
+>0  string  fmt\x20
+>>4 lelong  <0x80
+>>>8 use    riff-wave
+>0  string  LIST
+>>&(4.l+4)  use riff-walk
+>0  string  DISP
+>>&(4.l+4)  use riff-walk
+>0  string  bext
+>>&(4.l+4)  use riff-walk
+>0  string  Fake
+>>&(4.l+4)  use riff-walk
+>0  string  fact
+>>&(4.l+4)  use riff-walk
+
 # AVI section extended by Patrik Radman <patrik+file-magic@iki.fi>
 #
 0	string		RIFF		RIFF (little-endian) data
@@ -35,28 +75,14 @@
 # Microsoft WAVE format (*.wav)
 >8	string		WAVE		\b, WAVE audio
 !:mime	audio/x-wav
->>20	leshort		1		\b, Microsoft PCM
->>>34	leshort		>0		\b, %d bit
->>20	leshort		2		\b, Microsoft ADPCM
->>20	leshort		6		\b, ITU G.711 A-law
->>20	leshort		7		\b, ITU G.711 mu-law
->>20	leshort		8		\b, Microsoft DTS
->>20	leshort		17		\b, IMA ADPCM
->>20	leshort		20		\b, ITU G.723 ADPCM (Yamaha)
->>20	leshort		49		\b, GSM 6.10
->>20	leshort		64		\b, ITU G.721 ADPCM
->>20	leshort		80		\b, MPEG
->>20	leshort		85		\b, MPEG Layer 3
->>20	leshort		0x2001		\b, DTS
->>22	leshort		=1		\b, mono
->>22	leshort		=2		\b, stereo
->>22	leshort		>2		\b, %d channels
->>24	lelong		>0		%d Hz
+>>12    string  >\0
+>>>12   use     riff-walk
 # Corel Draw Picture
 >8	string		CDRA		\b, Corel Draw Picture
 !:mime	image/x-coreldraw
 >8	string		CDR6		\b, Corel Draw Picture, version 6
 !:mime	image/x-coreldraw
+>8	string		NUNDROOT	\b, Steinberg CuBase
 # AVI == Audio Video Interleave
 >8	string		AVI\040		\b, AVI
 !:mime	video/x-msvideo

+ 2 - 2
magic/Magdir/sgi

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: sgi,v 1.19 2013/01/12 03:09:51 christos Exp $
+# $File: sgi,v 1.20 2014/03/10 00:53:38 christos Exp $
 # sgi:  file(1) magic for Silicon Graphics operating systems and applications
 #
 # Executable images are handled either in aout (for old-style a.out
@@ -128,7 +128,7 @@
 >11	byte	x				dataformat %d
 
 # Alias Maya files
-0	string/t	//Maya ASCII	Alias Maya Ascii File,
+0	string/t	//Maya\040ASCII	Alias Maya Ascii File,
 >13	string	>\0	version %s
 8	string	MAYAFOR4	Alias Maya Binary File,
 >32	string	>\0	version %s scene

+ 32 - 1
magic/Magdir/tex

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: tex,v 1.19 2013/09/17 17:39:16 christos Exp $
+# $File: tex,v 1.20 2014/03/16 02:53:03 christos Exp $
 # tex:  file(1) magic for TeX files
 #
 # XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
@@ -106,3 +106,34 @@
 0	search/1	@c\ @mapfile{	TeX font aliases text file
 
 0	string		#LyX		LyX document text
+
+# ConTeXt documents
+#	http://wiki.contextgarden.net/
+0	search/4096	\\setupcolors[		ConTeXt document text
+!:strength + 15
+0	search/4096	\\definecolor[		ConTeXt document text
+!:strength + 15
+0	search/4096	\\setupinteraction[	ConTeXt document text
+!:strength + 15
+0	search/4096	\\useURL[		ConTeXt document text
+!:strength + 15
+0	search/4096	\\setuppapersize[	ConTeXt document text
+!:strength + 15
+0	search/4096	\\setuplayout[		ConTeXt document text
+!:strength + 15
+0	search/4096	\\setupfooter[		ConTeXt document text
+!:strength + 15
+0	search/4096	\\setupfootertexts[	ConTeXt document text
+!:strength + 15
+0	search/4096	\\setuppagenumbering[	ConTeXt document text
+!:strength + 15
+0	search/4096	\\setupbodyfont[	ConTeXt document text
+!:strength + 15
+0	search/4096	\\setuphead[		ConTeXt document text
+!:strength + 15
+0	search/4096	\\setupitemize[		ConTeXt document text
+!:strength + 15
+0	search/4096	\\setupwhitespace[	ConTeXt document text
+!:strength + 15
+0	search/4096	\\setupindenting[	ConTeXt document text
+!:strength + 15

+ 29 - 1
magic/Magdir/varied.script

@@ -1,28 +1,56 @@
 #------------------------------------------------------------------------------
-# $File: varied.script,v 1.9 2011/12/16 16:32:48 rrt Exp $
+# $File: varied.script,v 1.10 2014/03/01 22:32:39 christos Exp $
 # varied.script:  file(1) magic for various interpreter scripts
 
 0	string/t		#!\ /			a
 >3	string		>\0			%s script text executable
 !:strength / 2
+
+0	string/b		#!\ /			a
+>3	string		>\0			%s script executable (binary data)
+!:strength / 2
+
 0	string/t		#!\t/			a
 >3	string		>\0			%s script text executable
 !:strength / 2
+
+0	string/b		#!\t/			a
+>3	string		>\0			%s script executable (binary data)
+!:strength / 2
+
 0	string/t		#!/			a
 >2	string		>\0			%s script text executable
 !:strength / 2
+
+0	string/b		#!/			a
+>2	string		>\0			%s script executable (binary data)
+!:strength / 2
+
 0	string/t		#!\ 			script text executable
 >3	string		>\0			for %s
 !:strength / 3
 
+0	string/b		#!\ 			script executable
+>3	string		>\0			for %s (binary data)
+!:strength / 3
+
 # using env
 0	string/t	#!/usr/bin/env		a
 >15	string/t	>\0			%s script text executable
 !:strength / 10
+
+0	string/b	#!/usr/bin/env		a
+>15	string/b	>\0			%s script executable (binary data)
+!:strength / 10
+
 0	string/t	#!\ /usr/bin/env	a
 >16	string/t	>\0			%s script text executable
 !:strength / 10
 
+0	string/b	#!\ /usr/bin/env	a
+>16	string/b	>\0			%s script executable (binary data)
+!:strength / 10
+
 # From: arno <arenevier@fdn.fr>
 # mozilla xpconnect typelib
 # see http://www.mozilla.org/scriptable/typelib_file.html

+ 11 - 1
magic/Magdir/virtual

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: virtual,v 1.2 2011/11/22 13:30:05 christos Exp $
+# $File: virtual,v 1.3 2014/03/03 14:19:46 christos Exp $
 # From: James Nobis <quel@quelrod.net>
 # Microsoft hard disk images for:
 # Virtual Server
@@ -15,3 +15,13 @@
 0x40	ulelong		0xbeda107f	VirtualBox Disk Image
 >0x44	uleshort	>0		\b, major %u
 >0x46	uleshort	>0		\b, minor %u
+
+# libvirt
+# From: Philipp Hahn <hahn@univention.de>
+0	string	LibvirtQemudSave	Libvirt QEMU Suspend Image
+>0x10	lelong	x	\b, version %u
+>0x14	lelong	x	\b, XML length %u
+>0x18	lelong	1	\b, running
+>0x1c	lelong	1	\b, compressed
+
+0	string	LibvirtQemudPart	Libvirt QEMU partial Suspend Image

+ 3 - 1
magic/Makefile.am

@@ -1,5 +1,5 @@
 #
-# $File: Makefile.am,v 1.95 2014/01/31 01:51:32 christos Exp $
+# $File: Makefile.am,v 1.96 2014/03/07 17:25:17 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_DIR = $(top_srcdir)/magic
@@ -159,6 +159,7 @@ $(MAGIC_FRAGMENT_DIR)/mup \
 $(MAGIC_FRAGMENT_DIR)/music \
 $(MAGIC_FRAGMENT_DIR)/natinst \
 $(MAGIC_FRAGMENT_DIR)/ncr \
+$(MAGIC_FRAGMENT_DIR)/neko \
 $(MAGIC_FRAGMENT_DIR)/netbsd \
 $(MAGIC_FRAGMENT_DIR)/netscape \
 $(MAGIC_FRAGMENT_DIR)/netware \
@@ -182,6 +183,7 @@ $(MAGIC_FRAGMENT_DIR)/pbm \
 $(MAGIC_FRAGMENT_DIR)/pdf \
 $(MAGIC_FRAGMENT_DIR)/pdp \
 $(MAGIC_FRAGMENT_DIR)/perl \
+$(MAGIC_FRAGMENT_DIR)/pgf \
 $(MAGIC_FRAGMENT_DIR)/pgp \
 $(MAGIC_FRAGMENT_DIR)/pkgadd \
 $(MAGIC_FRAGMENT_DIR)/plan9 \

+ 3 - 1
magic/Makefile.in

@@ -262,7 +262,7 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 
 #
-# $File: Makefile.am,v 1.95 2014/01/31 01:51:32 christos Exp $
+# $File: Makefile.am,v 1.96 2014/03/07 17:25:17 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_DIR = $(top_srcdir)/magic
@@ -420,6 +420,7 @@ $(MAGIC_FRAGMENT_DIR)/mup \
 $(MAGIC_FRAGMENT_DIR)/music \
 $(MAGIC_FRAGMENT_DIR)/natinst \
 $(MAGIC_FRAGMENT_DIR)/ncr \
+$(MAGIC_FRAGMENT_DIR)/neko \
 $(MAGIC_FRAGMENT_DIR)/netbsd \
 $(MAGIC_FRAGMENT_DIR)/netscape \
 $(MAGIC_FRAGMENT_DIR)/netware \
@@ -443,6 +444,7 @@ $(MAGIC_FRAGMENT_DIR)/pbm \
 $(MAGIC_FRAGMENT_DIR)/pdf \
 $(MAGIC_FRAGMENT_DIR)/pdp \
 $(MAGIC_FRAGMENT_DIR)/perl \
+$(MAGIC_FRAGMENT_DIR)/pgf \
 $(MAGIC_FRAGMENT_DIR)/pgp \
 $(MAGIC_FRAGMENT_DIR)/pkgadd \
 $(MAGIC_FRAGMENT_DIR)/plan9 \

+ 1 - 1
python/magic.py

@@ -119,7 +119,7 @@ class Magic(object):
             bi = bytes(filename, 'utf-8')
             return str(_file(self._magic_t, bi), 'utf-8')
         except:
-            return _file(self._magic_t, filename)
+            return _file(self._magic_t, filename.encode('utf-8'))
 
     def descriptor(self, fd):
         """

+ 4 - 3
src/Makefile.in

@@ -81,9 +81,9 @@ build_triplet = @build@
 host_triplet = @host@
 bin_PROGRAMS = file$(EXEEXT)
 subdir = src
-DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am strlcpy.c \
-	getline.c strcasestr.c strlcat.c vasprintf.c asctime_r.c \
-	asprintf.c pread.c getopt_long.c ctime_r.c \
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am ctime_r.c \
+	vasprintf.c pread.c strlcpy.c fmtcheck.c asctime_r.c \
+	strcasestr.c strlcat.c getopt_long.c asprintf.c getline.c \
 	$(top_srcdir)/depcomp $(include_HEADERS)
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
@@ -482,6 +482,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/asctime_r.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/asprintf.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/ctime_r.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/fmtcheck.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getline.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getopt_long.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/pread.Plo@am__quote@

+ 112 - 45
src/apprentice.c

@@ -32,7 +32,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.196 2013/11/19 21:01:12 christos Exp $")
+FILE_RCSID("@(#)$File: apprentice.c,v 1.202 2014/03/14 18:48:11 christos Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -40,6 +40,9 @@ FILE_RCSID("@(#)$File: apprentice.c,v 1.196 2013/11/19 21:01:12 christos Exp $")
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
 #include <string.h>
 #include <assert.h>
 #include <ctype.h>
@@ -48,6 +51,15 @@ FILE_RCSID("@(#)$File: apprentice.c,v 1.196 2013/11/19 21:01:12 christos Exp $")
 #include <sys/mman.h>
 #endif
 #include <dirent.h>
+#if defined(HAVE_LIMITS_H)
+#include <limits.h>
+#endif
+
+#ifndef SSIZE_MAX
+#define MAXMAGIC_SIZE        ((ssize_t)0x7fffffff)
+#else
+#define MAXMAGIC_SIZE        SSIZE_MAX
+#endif
 
 #define	EATAB {while (isascii((unsigned char) *l) && \
 		      isspace((unsigned char) *l))  ++l;}
@@ -553,7 +565,8 @@ file_apprentice(struct magic_set *ms, const char *fn, int action)
 	int file_err, errs = -1;
 	size_t i;
 
-	file_reset(ms);
+	if (ms->mlist[0] != NULL)
+		file_reset(ms);
 
 	if ((fn = magic_getpath(fn, action)) == NULL)
 		return -1;
@@ -629,13 +642,62 @@ file_apprentice(struct magic_set *ms, const char *fn, int action)
 }
 
 /*
+ * Compute the real length of a magic expression, for the purposes
+ * of determining how "strong" a magic expression is (approximating
+ * how specific its matches are):
+ *	- magic characters count 0 unless escaped.
+ *	- [] expressions count 1
+ *	- {} expressions count 0
+ *	- regular characters or escaped magic characters count 1
+ *	- 0 length expressions count as one
+ */
+private size_t
+nonmagic(const char *str)
+{
+	const char *p;
+	size_t rv = 0;
+
+	for (p = str; *p; p++)
+		switch (*p) {
+		case '\\':	/* Escaped anything counts 1 */
+			if (!*++p)
+				p--;
+			rv++;
+			continue;
+		case '?':	/* Magic characters count 0 */
+		case '*':
+		case '.':
+		case '+':
+		case '^':
+		case '$':
+			continue;
+		case '[':	/* Bracketed expressions count 1 the ']' */
+			while (*p && *p != ']')
+				p++;
+			p--;
+			continue;
+		case '{':	/* Braced expressions count 0 */
+			while (*p && *p != '}')
+				p++;
+			if (!*p)
+				p--;
+			continue;
+		default:	/* Anything else counts 1 */
+			rv++;
+			continue;
+		}
+
+	return rv == 0 ? 1 : rv;	/* Return at least 1 */
+}
+
+/*
  * Get weight of this magic entry, for sorting purposes.
  */
 private size_t
 apprentice_magic_strength(const struct magic *m)
 {
 #define MULT 10
-	size_t val = 2 * MULT;	/* baseline strength */
+	size_t v, val = 2 * MULT;	/* baseline strength */
 
 	switch (m->type) {
 	case FILE_DEFAULT:	/* make sure this sorts last */
@@ -671,10 +733,14 @@ apprentice_magic_strength(const struct magic *m)
 		break;
 
 	case FILE_SEARCH:
-	case FILE_REGEX:
 		val += m->vallen * MAX(MULT / m->vallen, 1);
 		break;
 
+	case FILE_REGEX:
+		v = nonmagic(m->value.s);
+		val += v * MAX(MULT / v, 1);
+		break;
+
 	case FILE_DATE:
 	case FILE_LEDATE:
 	case FILE_BEDATE:
@@ -1993,33 +2059,43 @@ out:
 	return -1;
 }
 
-/*
- * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
- * magic[index - 1]
- */
 private int
-parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
+parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
+    off_t off, size_t len, const char *name, int nt)
 {
 	size_t i;
 	const char *l = line;
 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
+	char *buf = (char *)m + off;
 
-	if (m->apple[0] != '\0') {
-		file_magwarn(ms, "Current entry already has a APPLE type "
-		    "`%.8s', new type `%s'", m->mimetype, l);
+	if (buf[0] != '\0') {
+		len = nt ? strlen(buf) : len;
+		file_magwarn(ms, "Current entry already has a %s type "
+		    "`%.*s', new type `%s'", name, (int)len, buf, l);
 		return -1;
 	}	
 
+	if (*m->desc == '\0') {
+		file_magwarn(ms, "Current entry does not yet have a "
+		    "description for adding a %s type", name);
+		return -1;
+	}
+
 	EATAB;
 	for (i = 0; *l && ((isascii((unsigned char)*l) &&
 	    isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
-	    i < sizeof(m->apple); m->apple[i++] = *l++)
+	    i < len; buf[i++] = *l++)
 		continue;
-	if (i == sizeof(m->apple) && *l) {
-		/* We don't need to NUL terminate here, printing handles it */
+
+	if (i == len && *l) {
+		if (nt)
+			buf[len - 1] = '\0';
 		if (ms->flags & MAGIC_CHECK)
-			file_magwarn(ms, "APPLE type `%s' truncated %"
-			    SIZE_T_FORMAT "u", line, i);
+			file_magwarn(ms, "%s type `%s' truncated %"
+			    SIZE_T_FORMAT "u", name, line, i);
+	} else {
+		if (nt)
+			buf[i] = '\0';
 	}
 
 	if (i > 0)
@@ -2029,39 +2105,29 @@ parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
 }
 
 /*
+ * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
+ * magic[index - 1]
+ */
+private int
+parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
+{
+	struct magic *m = &me->mp[0];
+
+	return parse_extra(ms, me, line, offsetof(struct magic, apple),
+	    sizeof(m->apple), "APPLE", 0);
+}
+
+/*
  * parse a MIME annotation line from magic file, put into magic[index - 1]
  * if valid
  */
 private int
 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
 {
-	size_t i;
-	const char *l = line;
-	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
-
-	if (m->mimetype[0] != '\0') {
-		file_magwarn(ms, "Current entry already has a MIME type `%s',"
-		    " new type `%s'", m->mimetype, l);
-		return -1;
-	}	
-
-	EATAB;
-	for (i = 0; *l && ((isascii((unsigned char)*l) &&
-	    isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
-	    i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
-		continue;
-	if (i == sizeof(m->mimetype)) {
-		m->mimetype[sizeof(m->mimetype) - 1] = '\0';
-		if (ms->flags & MAGIC_CHECK)
-			file_magwarn(ms, "MIME type `%s' truncated %"
-			    SIZE_T_FORMAT "u", m->mimetype, i);
-	} else
-		m->mimetype[i] = '\0';
+	struct magic *m = &me->mp[0];
 
-	if (i > 0)
-		return 0;
-	else
-		return -1;
+	return parse_extra(ms, me, line, offsetof(struct magic, mimetype),
+	    sizeof(m->mimetype), "MIME", 1);
 }
 
 private int
@@ -2601,8 +2667,9 @@ apprentice_map(struct magic_set *ms, const char *fn)
 		file_error(ms, errno, "cannot stat `%s'", dbname);
 		goto error;
 	}
-	if (st.st_size < 8) {
-		file_error(ms, 0, "file `%s' is too small", dbname);
+	if (st.st_size < 8 || st.st_size > MAXMAGIC_SIZE) {
+		file_error(ms, 0, "file `%s' is too %s", dbname,
+		    st.st_size < 8 ? "small" : "large");
 		goto error;
 	}
 

+ 11 - 5
src/cdf.c

@@ -35,7 +35,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: cdf.c,v 1.53 2013/02/26 16:20:42 christos Exp $")
+FILE_RCSID("@(#)$File: cdf.c,v 1.55 2014/02/27 23:26:17 christos Exp $")
 #endif
 
 #include <assert.h>
@@ -675,11 +675,13 @@ out:
 
 int
 cdf_read_short_stream(const cdf_info_t *info, const cdf_header_t *h,
-    const cdf_sat_t *sat, const cdf_dir_t *dir, cdf_stream_t *scn)
+    const cdf_sat_t *sat, const cdf_dir_t *dir, cdf_stream_t *scn,
+    const cdf_directory_t **root)
 {
 	size_t i;
 	const cdf_directory_t *d;
 
+	*root = NULL;
 	for (i = 0; i < dir->dir_len; i++)
 		if (dir->dir_tab[i].d_type == CDF_DIR_TYPE_ROOT_STORAGE)
 			break;
@@ -688,6 +690,7 @@ cdf_read_short_stream(const cdf_info_t *info, const cdf_header_t *h,
 	if (i == dir->dir_len)
 		goto out;
 	d = &dir->dir_tab[i];
+	*root = d;
 
 	/* If the it is not there, just fake it; some docs don't have it */
 	if (d->d_stream_first_sector < 0)
@@ -1138,6 +1141,7 @@ cdf_dump_dir(const cdf_info_t *info, const cdf_header_t *h,
 	    "user stream", "lockbytes", "property", "root storage" };
 
 	for (i = 0; i < dir->dir_len; i++) {
+		char buf[26];
 		d = &dir->dir_tab[i];
 		for (j = 0; j < sizeof(name); j++)
 			name[j] = (char)CDF_TOLE2(d->d_name[j]);
@@ -1153,9 +1157,10 @@ cdf_dump_dir(const cdf_info_t *info, const cdf_header_t *h,
 		(void)fprintf(stderr, "Right child: %d\n", d->d_right_child);
 		(void)fprintf(stderr, "Flags: 0x%x\n", d->d_flags);
 		cdf_timestamp_to_timespec(&ts, d->d_created);
-		(void)fprintf(stderr, "Created %s", cdf_ctime(&ts.tv_sec));
+		(void)fprintf(stderr, "Created %s", cdf_ctime(&ts.tv_sec, buf));
 		cdf_timestamp_to_timespec(&ts, d->d_modified);
-		(void)fprintf(stderr, "Modified %s", cdf_ctime(&ts.tv_sec));
+		(void)fprintf(stderr, "Modified %s",
+		    cdf_ctime(&ts.tv_sec, buf));
 		(void)fprintf(stderr, "Stream %d\n", d->d_stream_first_sector);
 		(void)fprintf(stderr, "Size %d\n", d->d_size);
 		switch (d->d_type) {
@@ -1233,9 +1238,10 @@ cdf_dump_property_info(const cdf_property_info_t *info, size_t count)
 				cdf_print_elapsed_time(buf, sizeof(buf), tp);
 				(void)fprintf(stderr, "timestamp %s\n", buf);
 			} else {
+				char buf[26];
 				cdf_timestamp_to_timespec(&ts, tp);
 				(void)fprintf(stderr, "timestamp %s",
-				    cdf_ctime(&ts.tv_sec));
+				    cdf_ctime(&ts.tv_sec, buf));
 			}
 			break;
 		case CDF_CLIPBOARD:

+ 2 - 1
src/cdf.h

@@ -294,7 +294,8 @@ int cdf_read_dir(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *,
 int cdf_read_ssat(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *,
     cdf_sat_t *);
 int cdf_read_short_stream(const cdf_info_t *, const cdf_header_t *,
-    const cdf_sat_t *, const cdf_dir_t *, cdf_stream_t *);
+    const cdf_sat_t *, const cdf_dir_t *, cdf_stream_t *,
+    const cdf_directory_t **);
 int cdf_read_property_info(const cdf_stream_t *, const cdf_header_t *, uint32_t,
     cdf_property_info_t **, size_t *, size_t *);
 int cdf_read_summary_info(const cdf_info_t *, const cdf_header_t *,

+ 2 - 2
src/cdf_time.c

@@ -27,7 +27,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: cdf_time.c,v 1.12 2012/05/15 17:14:36 christos Exp $")
+FILE_RCSID("@(#)$File: cdf_time.c,v 1.13 2014/02/25 20:52:02 christos Exp $")
 #endif
 
 #include <time.h>
@@ -176,7 +176,7 @@ cdf_ctime(const time_t *sec, char *buf)
 }
 
 
-#ifdef TEST
+#ifdef TEST_TIME
 int
 main(int argc, char *argv[])
 {

+ 5 - 1
src/file.h

@@ -27,7 +27,7 @@
  */
 /*
  * file.h - definitions for file(1) program
- * @(#)$File: file.h,v 1.148 2014/02/12 23:20:53 christos Exp $
+ * @(#)$File: file.h,v 1.149 2014/03/15 21:47:40 christos Exp $
  */
 
 #ifndef __file_h__
@@ -514,6 +514,10 @@ char   *ctime_r(const time_t *, char *);
 #ifndef HAVE_ASCTIME_R
 char   *asctime_r(const struct tm *, char *);
 #endif
+#ifndef HAVE_FMTCHECK
+const char *fmtcheck(const char *, const char *) 
+     __attribute__((__format_arg__(2)));
+#endif
 
 #if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
 #define QUICK

+ 234 - 0
src/fmtcheck.c

@@ -0,0 +1,234 @@
+/*	$NetBSD: fmtcheck.c,v 1.8 2008/04/28 20:22:59 martin Exp $	*/
+
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code was contributed to The NetBSD Foundation by Allen Briggs.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "file.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+enum __e_fmtcheck_types {
+	FMTCHECK_START,
+	FMTCHECK_SHORT,
+	FMTCHECK_INT,
+	FMTCHECK_LONG,
+	FMTCHECK_QUAD,
+	FMTCHECK_SHORTPOINTER,
+	FMTCHECK_INTPOINTER,
+	FMTCHECK_LONGPOINTER,
+	FMTCHECK_QUADPOINTER,
+	FMTCHECK_DOUBLE,
+	FMTCHECK_LONGDOUBLE,
+	FMTCHECK_STRING,
+	FMTCHECK_WIDTH,
+	FMTCHECK_PRECISION,
+	FMTCHECK_DONE,
+	FMTCHECK_UNKNOWN
+};
+typedef enum __e_fmtcheck_types EFT;
+
+#define RETURN(pf,f,r) do { \
+			*(pf) = (f); \
+			return r; \
+		       } /*NOTREACHED*/ /*CONSTCOND*/ while (0)
+
+static EFT
+get_next_format_from_precision(const char **pf)
+{
+	int		sh, lg, quad, longdouble;
+	const char	*f;
+
+	sh = lg = quad = longdouble = 0;
+
+	f = *pf;
+	switch (*f) {
+	case 'h':
+		f++;
+		sh = 1;
+		break;
+	case 'l':
+		f++;
+		if (!*f) RETURN(pf,f,FMTCHECK_UNKNOWN);
+		if (*f == 'l') {
+			f++;
+			quad = 1;
+		} else {
+			lg = 1;
+		}
+		break;
+	case 'q':
+		f++;
+		quad = 1;
+		break;
+	case 'L':
+		f++;
+		longdouble = 1;
+		break;
+	default:
+		break;
+	}
+	if (!*f) RETURN(pf,f,FMTCHECK_UNKNOWN);
+	if (strchr("diouxX", *f)) {
+		if (longdouble)
+			RETURN(pf,f,FMTCHECK_UNKNOWN);
+		if (lg)
+			RETURN(pf,f,FMTCHECK_LONG);
+		if (quad)
+			RETURN(pf,f,FMTCHECK_QUAD);
+		RETURN(pf,f,FMTCHECK_INT);
+	}
+	if (*f == 'n') {
+		if (longdouble)
+			RETURN(pf,f,FMTCHECK_UNKNOWN);
+		if (sh)
+			RETURN(pf,f,FMTCHECK_SHORTPOINTER);
+		if (lg)
+			RETURN(pf,f,FMTCHECK_LONGPOINTER);
+		if (quad)
+			RETURN(pf,f,FMTCHECK_QUADPOINTER);
+		RETURN(pf,f,FMTCHECK_INTPOINTER);
+	}
+	if (strchr("DOU", *f)) {
+		if (sh + lg + quad + longdouble)
+			RETURN(pf,f,FMTCHECK_UNKNOWN);
+		RETURN(pf,f,FMTCHECK_LONG);
+	}
+	if (strchr("eEfg", *f)) {
+		if (longdouble)
+			RETURN(pf,f,FMTCHECK_LONGDOUBLE);
+		if (sh + lg + quad)
+			RETURN(pf,f,FMTCHECK_UNKNOWN);
+		RETURN(pf,f,FMTCHECK_DOUBLE);
+	}
+	if (*f == 'c') {
+		if (sh + lg + quad + longdouble)
+			RETURN(pf,f,FMTCHECK_UNKNOWN);
+		RETURN(pf,f,FMTCHECK_INT);
+	}
+	if (*f == 's') {
+		if (sh + lg + quad + longdouble)
+			RETURN(pf,f,FMTCHECK_UNKNOWN);
+		RETURN(pf,f,FMTCHECK_STRING);
+	}
+	if (*f == 'p') {
+		if (sh + lg + quad + longdouble)
+			RETURN(pf,f,FMTCHECK_UNKNOWN);
+		RETURN(pf,f,FMTCHECK_LONG);
+	}
+	RETURN(pf,f,FMTCHECK_UNKNOWN);
+	/*NOTREACHED*/
+}
+
+static EFT
+get_next_format_from_width(const char **pf)
+{
+	const char	*f;
+
+	f = *pf;
+	if (*f == '.') {
+		f++;
+		if (*f == '*') {
+			RETURN(pf,f,FMTCHECK_PRECISION);
+		}
+		/* eat any precision (empty is allowed) */
+		while (isdigit((unsigned char)*f)) f++;
+		if (!*f) RETURN(pf,f,FMTCHECK_UNKNOWN);
+	}
+	RETURN(pf,f,get_next_format_from_precision(pf));
+	/*NOTREACHED*/
+}
+
+static EFT
+get_next_format(const char **pf, EFT eft)
+{
+	int		infmt;
+	const char	*f;
+
+	if (eft == FMTCHECK_WIDTH) {
+		(*pf)++;
+		return get_next_format_from_width(pf);
+	} else if (eft == FMTCHECK_PRECISION) {
+		(*pf)++;
+		return get_next_format_from_precision(pf);
+	}
+
+	f = *pf;
+	infmt = 0;
+	while (!infmt) {
+		f = strchr(f, '%');
+		if (f == NULL)
+			RETURN(pf,f,FMTCHECK_DONE);
+		f++;
+		if (!*f)
+			RETURN(pf,f,FMTCHECK_UNKNOWN);
+		if (*f != '%')
+			infmt = 1;
+		else
+			f++;
+	}
+
+	/* Eat any of the flags */
+	while (*f && (strchr("#0- +", *f)))
+		f++;
+
+	if (*f == '*') {
+		RETURN(pf,f,FMTCHECK_WIDTH);
+	}
+	/* eat any width */
+	while (isdigit((unsigned char)*f)) f++;
+	if (!*f) {
+		RETURN(pf,f,FMTCHECK_UNKNOWN);
+	}
+
+	RETURN(pf,f,get_next_format_from_width(pf));
+	/*NOTREACHED*/
+}
+
+const char *
+fmtcheck(const char *f1, const char *f2)
+{
+	const char	*f1p, *f2p;
+	EFT		f1t, f2t;
+
+	if (!f1) return f2;
+	
+	f1p = f1;
+	f1t = FMTCHECK_START;
+	f2p = f2;
+	f2t = FMTCHECK_START;
+	while ((f1t = get_next_format(&f1p, f1t)) != FMTCHECK_DONE) {
+		if (f1t == FMTCHECK_UNKNOWN)
+			return f2;
+		f2t = get_next_format(&f2p, f2t);
+		if (f1t != f2t)
+			return f2;
+	}
+	return f1;
+}

+ 11 - 18
src/funcs.c

@@ -27,10 +27,11 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: funcs.c,v 1.67 2014/02/12 23:20:53 christos Exp $")
+FILE_RCSID("@(#)$File: funcs.c,v 1.70 2014/03/14 19:02:37 christos Exp $")
 #endif	/* lint */
 
 #include "magic.h"
+#include <assert.h>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
@@ -174,8 +175,7 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((unu
 	const char *code_mime = "binary";
 	const char *type = "application/octet-stream";
 	const char *def = "data";
-
-
+	const char *ftype = NULL;
 
 	if (nb == 0) {
 		def = "empty";
@@ -188,7 +188,7 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((unu
 
 	if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
 		looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen,
-		    &code, &code_mime, &type);
+		    &code, &code_mime, &ftype);
 	}
 
 #ifdef __EMX__
@@ -263,19 +263,6 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((unu
 				(void)fprintf(stderr, "ascmagic %d\n", m);
 			goto done;
 		}
-
-		/* try to discover text encoding */
-		if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
-			if (looks_text == 0)
-				if ((m = file_ascmagic_with_encoding( ms, ubuf,
-				    nb, u8buf, ulen, code, type, looks_text))
-				    != 0) {
-					if ((ms->flags & MAGIC_DEBUG) != 0)
-						(void)fprintf(stderr,
-						    "ascmagic/enc %d\n", m);
-					goto done;
-				}
-		}
 	}
 
 simple:
@@ -442,7 +429,12 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep)
 {
 	regex_t rx;
 	int rc, rv = -1;
+	char *old_lc_ctype;
 
+	old_lc_ctype = setlocale(LC_CTYPE, NULL);
+	assert(old_lc_ctype != NULL);
+	old_lc_ctype = strdup(old_lc_ctype);
+	assert(old_lc_ctype != NULL);
 	(void)setlocale(LC_CTYPE, "C");
 	rc = regcomp(&rx, pat, REG_EXTENDED);
 	if (rc) {
@@ -463,6 +455,7 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep)
 		rv = nm;
 	}
 out:
-	(void)setlocale(LC_CTYPE, "");
+	(void)setlocale(LC_CTYPE, old_lc_ctype);
+	free(old_lc_ctype);
 	return rv;
 }

+ 1 - 1
src/magic.h

@@ -75,7 +75,7 @@
 #define	MAGIC_NO_CHECK_FORTRAN	0x000000 /* Don't check ascii/fortran */
 #define	MAGIC_NO_CHECK_TROFF	0x000000 /* Don't check ascii/troff */
 
-#define MAGIC_VERSION		516	/* This implementation */
+#define MAGIC_VERSION		517	/* This implementation */
 
 
 #ifdef __cplusplus

+ 83 - 15
src/readcdf.c

@@ -26,9 +26,10 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: readcdf.c,v 1.37 2014/01/06 13:41:18 rrt Exp $")
+FILE_RCSID("@(#)$File: readcdf.c,v 1.40 2014/03/06 15:23:33 christos Exp $")
 #endif
 
+#include <assert.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
@@ -69,25 +70,57 @@ static const struct nv {
 	{ NULL,				NULL,			},
 };
 
+static const struct cv {
+	uint64_t clsid[2];
+	const char *mime;
+} clsid2mime[] = {
+	{
+		{ 0x00000000000c1084LLU, 0x46000000000000c0LLU },
+		"x-msi",
+	}
+}, clsid2desc[] = {
+	{
+		{ 0x00000000000c1084LLU, 0x46000000000000c0LLU },
+		"MSI Installer",
+	},
+};
+
+private const char *
+cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
+{
+	size_t i;
+	for (i = 0; cv[i].mime != NULL; i++) {
+		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
+			return cv[i].mime;
+	}
+	return NULL;
+}
+
 private const char *
 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
 {
 	size_t i;
 	const char *rv = NULL;
+	char *old_lc_ctype;
 
+	old_lc_ctype = setlocale(LC_CTYPE, NULL);
+	assert(old_lc_ctype != NULL);
+	old_lc_ctype = strdup(old_lc_ctype);
+	assert(old_lc_ctype != NULL);
 	(void)setlocale(LC_CTYPE, "C");
 	for (i = 0; nv[i].pattern != NULL; i++)
 		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
 			rv = nv[i].mime;
 			break;
 		}
-	(void)setlocale(LC_CTYPE, "");
+	(void)setlocale(LC_CTYPE, old_lc_ctype);
+	free(old_lc_ctype);
 	return rv;
 }
 
 private int
 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
-    size_t count)
+    size_t count, const uint64_t clsid[2])
 {
         size_t i;
         cdf_timestamp_t tp;
@@ -97,6 +130,9 @@ cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
         const char *s;
         int len;
 
+        if (!NOTMIME(ms))
+		str = cdf_clsid_to_mime(clsid, clsid2mime);
+
         for (i = 0; i < count; i++) {
                 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
                 switch (info[i].pi_type) {
@@ -153,7 +189,7 @@ cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
                                                     buf, vbuf) == -1)
                                                         return -1;
                                         }
-                                } else if (info[i].pi_id ==
+                                } else if (str == NULL && info[i].pi_id ==
 				    CDF_PROPERTY_NAME_OF_APPLICATION) {
 					str = cdf_app_to_mime(vbuf, app2mime);
 				}
@@ -200,7 +236,7 @@ cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
 
 private int
 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
-    const cdf_stream_t *sst)
+    const cdf_stream_t *sst, const uint64_t clsid[2])
 {
         cdf_summary_info_header_t si;
         cdf_property_info_t *info;
@@ -211,6 +247,8 @@ cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
                 return -1;
 
         if (NOTMIME(ms)) {
+		const char *str;
+
                 if (file_printf(ms, "Composite Document File V2 Document")
 		    == -1)
                         return -1;
@@ -238,14 +276,32 @@ cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
                                 return -2;
                         break;
                 }
+		str = cdf_clsid_to_mime(clsid, clsid2desc);
+		if (str)
+                        if (file_printf(ms, ", %s", str) == -1)
+				return -2;
         }
 
-        m = cdf_file_property_info(ms, info, count);
+        m = cdf_file_property_info(ms, info, count, clsid);
         free(info);
 
         return m == -1 ? -2 : m;
 }
 
+#ifdef notdef
+private char *
+format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
+	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4" 
+	    PRIx64 "-%.12" PRIx64,
+	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffLLU,
+	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffLLU,
+	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffLLU, 
+	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffLLU,
+	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffLLU);
+	return buf;
+}
+#endif
+
 protected int
 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
     size_t nbytes)
@@ -291,13 +347,26 @@ file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
                 goto out2;
         }
 
-        if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst)) == -1) {
+        const cdf_directory_t *root_storage;
+        if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
+	    &root_storage)) == -1) {
                 expn = "Cannot read short stream";
                 goto out3;
         }
 #ifdef CDF_DEBUG
         cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
 #endif
+#ifdef notdef
+	if (root_storage) {
+		if (NOTMIME(ms)) {
+			char clsbuf[128];
+			if (file_printf(ms, "CLSID %s, ",
+			    format_clsid(clsbuf, sizeof(clsbuf),
+			    root_storage->d_storage_uuid)) == -1)
+				return -1;
+		}
+	}
+#endif
 
         if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
             &scn)) == -1) {
@@ -312,23 +381,22 @@ file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
 #ifdef CDF_DEBUG
         cdf_dump_summary_info(&h, &scn);
 #endif
-        if ((i = cdf_file_summary_info(ms, &h, &scn)) < 0)
+        if ((i = cdf_file_summary_info(ms, &h, &scn,
+	    root_storage->d_storage_uuid)) < 0)
                 expn = "Can't expand summary_info";
+
 	if (i == 0) {
 		const char *str = NULL;
 		cdf_directory_t *d;
 		char name[__arraycount(d->d_name)];
 		size_t j, k;
-		for (j = 0; j < dir.dir_len; j++) {
+
+		for (j = 0; str == NULL && j < dir.dir_len; j++) {
 			d = &dir.dir_tab[j];
 			for (k = 0; k < sizeof(name); k++)
 				name[k] = (char)cdf_tole2(d->d_name[k]);
-			if (NOTMIME(ms))
-				str = cdf_app_to_mime(name, name2desc);
-			else
-				str = cdf_app_to_mime(name, name2mime);
-			if (str != NULL)
-				break;
+			str = cdf_app_to_mime(name,
+			    NOTMIME(ms) ? name2desc : name2mime);
 		}
 		if (NOTMIME(ms)) {
 			if (str != NULL) {

+ 1 - 2
src/readelf.c

@@ -27,7 +27,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: readelf.c,v 1.99 2013/11/05 15:44:01 christos Exp $")
+FILE_RCSID("@(#)$File: readelf.c,v 1.102 2014/03/11 21:00:13 christos Exp $")
 #endif
 
 #ifdef BUILTIN_ELF
@@ -737,7 +737,6 @@ core:
 
 	default:
 		if (xnh_type == NT_PRPSINFO && *flags & FLAGS_IS_CORE) {
-/*###709 [cc] warning: declaration of 'i' shadows previous non-variable%%%*/
 			size_t i, j;
 			unsigned char c;
 			/*

+ 29 - 16
src/softmagic.c

@@ -32,16 +32,12 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: softmagic.c,v 1.174 2014/02/12 23:20:53 christos Exp $")
+FILE_RCSID("@(#)$File: softmagic.c,v 1.180 2014/03/15 21:47:40 christos Exp $")
 #endif	/* lint */
 
 #include "magic.h"
-#ifdef HAVE_FMTCHECK
-#include <stdio.h>
 #define F(a, b) fmtcheck((a), (b))
-#else
-#define F(a, b) (a)
-#endif
+#include <assert.h>
 #include <string.h>
 #include <ctype.h>
 #include <stdlib.h>
@@ -71,7 +67,7 @@ private void cvt_16(union VALUETYPE *, const struct magic *);
 private void cvt_32(union VALUETYPE *, const struct magic *);
 private void cvt_64(union VALUETYPE *, const struct magic *);
 
-#define OFFSET_OOB(n, o, i)	((n) < (o) || (i) >= ((n) - (o)))
+#define OFFSET_OOB(n, o, i)	((n) < (o) || (i) > ((n) - (o)))
 /*
  * softmagic - lookup one file in parsed, in-memory copy of database
  * Passed the name and FILE * of one file to be typed.
@@ -352,10 +348,15 @@ check_fmt(struct magic_set *ms, struct magic *m)
 {
 	regex_t rx;
 	int rc, rv = -1;
+	char *old_lc_ctype;
 
 	if (strchr(m->desc, '%') == NULL)
 		return 0;
 
+	old_lc_ctype = setlocale(LC_CTYPE, NULL);
+	assert(old_lc_ctype != NULL);
+	old_lc_ctype = strdup(old_lc_ctype);
+	assert(old_lc_ctype != NULL);
 	(void)setlocale(LC_CTYPE, "C");
 	rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB);
 	if (rc) {
@@ -367,7 +368,8 @@ check_fmt(struct magic_set *ms, struct magic *m)
 		regfree(&rx);
 		rv = !rc;
 	}
-	(void)setlocale(LC_CTYPE, "");
+	(void)setlocale(LC_CTYPE, old_lc_ctype);
+	free(old_lc_ctype);
 	return rv;
 }
 
@@ -531,8 +533,7 @@ mprint(struct magic_set *ms, struct magic *m)
 	case FILE_LEDATE:
 	case FILE_MEDATE:
 		if (file_printf(ms, F(m->desc, "%s"),
-		    file_fmttime(p->l, FILE_T_LOCAL,
-		    tbuf)) == -1)
+		    file_fmttime(p->l, FILE_T_LOCAL, tbuf)) == -1)
 			return -1;
 		t = ms->offset + sizeof(uint32_t);
 		break;
@@ -1733,14 +1734,14 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 		break;
 
 	case FILE_REGEX:
-		if (OFFSET_OOB(nbytes, offset, 0))
+		if (nbytes < offset)
 			return 0;
 		break;
 
 	case FILE_INDIRECT:
 		if (offset == 0)
 			return 0;
-		if (OFFSET_OOB(nbytes, offset, 0))
+		if (nbytes < offset)
 			return 0;
 		sbuf = ms->o.buf;
 		soffset = ms->offset;
@@ -1755,16 +1756,20 @@ mget(struct magic_set *ms, const unsigned char *s, struct magic *m,
 		ms->offset = soffset;
 		if (rv == 1) {
 			if ((ms->flags & (MAGIC_MIME|MAGIC_APPLE)) == 0 &&
-			    file_printf(ms, F(m->desc, "%u"), offset) == -1)
+			    file_printf(ms, F(m->desc, "%u"), offset) == -1) {
+				free(rbuf);
 				return -1;
-			if (file_printf(ms, "%s", rbuf) == -1)
+			}
+			if (file_printf(ms, "%s", rbuf) == -1) {
+				free(rbuf);
 				return -1;
-			free(rbuf);
+			}
 		}
+		free(rbuf);
 		return rv;
 
 	case FILE_USE:
-		if (OFFSET_OOB(nbytes, offset, 0))
+		if (nbytes < offset)
 			return 0;
 		sbuf = m->value.s;
 		if (*sbuf == '^') {
@@ -1884,6 +1889,7 @@ magiccheck(struct magic_set *ms, struct magic *m)
 	double dl, dv;
 	int matched;
 	union VALUETYPE *p = &ms->ms_value;
+	char *old_lc_ctype;
 
 	switch (m->type) {
 	case FILE_BYTE:
@@ -2042,6 +2048,11 @@ magiccheck(struct magic_set *ms, struct magic *m)
 		if (ms->search.s == NULL)
 			return 0;
 
+		old_lc_ctype = setlocale(LC_CTYPE, NULL);
+		assert(old_lc_ctype != NULL);
+		old_lc_ctype = strdup(old_lc_ctype);
+		assert(old_lc_ctype != NULL);
+		(void)setlocale(LC_CTYPE, "C");
 		l = 0;
 		rc = regcomp(&rx, m->value.s,
 		    REG_EXTENDED|REG_NEWLINE|
@@ -2090,6 +2101,8 @@ magiccheck(struct magic_set *ms, struct magic *m)
 			}
 			regfree(&rx);
 		}
+		(void)setlocale(LC_CTYPE, old_lc_ctype);
+		free(old_lc_ctype);
 		if (v == (uint64_t)-1)
 			return -1;
 		break;

+ 1 - 1
tests/Makefile.am

@@ -13,4 +13,4 @@ issue311docx.testfile
 T = $(top_srcdir)/tests
 check-local:
 	MAGIC=$(top_builddir)/magic/magic ./test
-	for i in $T/*.testfile; do MAGIC=$(top_builddir)/magic/magic ./test $T/$$i $T/$${i%%.testfile}.result; done
+	for i in $T/*.testfile; do echo Running test: $$i; MAGIC=$(top_builddir)/magic/magic ./test $$i $${i%%.testfile}.result; done

+ 1 - 1
tests/Makefile.in

@@ -597,7 +597,7 @@ uninstall-am:
 
 check-local:
 	MAGIC=$(top_builddir)/magic/magic ./test
-	for i in $T/*.testfile; do MAGIC=$(top_builddir)/magic/magic ./test $T/$$i $T/$${i%%.testfile}.result; done
+	for i in $T/*.testfile; do echo Running test: $$i; MAGIC=$(top_builddir)/magic/magic ./test $$i $${i%%.testfile}.result; done
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.