Browse Source

Import upstream version 5.10

Christos Zoulas 7 years ago
parent
commit
b3183901e5

+ 48 - 0
ChangeLog

@@ -1,3 +1,51 @@
+2011-12-15  12:17  Chris Metcalf <cmetcalf@tilera.com>
+
+	* Support Tilera architectures (tile64, tilepro, tilegx).
+
+2011-12-16  16:33  Reuben Thomas <rrt@sc3d.org>
+
+	* Add magic for /usr/bin/env Perl scripts
+	* Weaken generic script magic to avoid clashing with
+	language-specific magic.
+
+2011-12-08  13:37  Reuben Thomas <rrt@sc3d.org>
+
+	* Simplify if (p) free(p) to free(p).
+
+2011-12-08  13:07  Reuben Thomas <rrt@sc3d.org>
+
+	* Remove hardwired token finding (names.h), turning it into soft
+	magic. Patterns are either anchored regexs or search/8192. English
+	language detection and PL/1 detection have been removed as they
+	were too fragile. -e tokens is still accepted for backwards
+	compatibility.
+	* Move 3ds patterns (which are commented out anyway) into autodesk
+	(they were, oddly, in c-lang).
+
+2011-12-06  00:16  Reuben Thomas <rrt@sc3d.org>
+
+	* Tweak strength of generic hash-bang detectors to be less than
+	specific ones.
+	* Make an inconsistent description of Python scripts consistent.
+
+2011-12-05  23:58  Reuben Thomas <rrt@sc3d.org>
+
+	* Fix minor error in file(1).
+
+2011-11-05  00:00  Reuben Thomas <rrt@sc3d.org>
+
+	* Fix issue #150 (I hope).
+
+2011-09-22  12:57  Christos Zoulas <christos@zoulas.com>
+
+	* Python3 binding fixes from Kelly Anderson
+
+2011-09-20  11:32  Christos Zoulas <christos@zoulas.com>
+
+	* If a string type magic entry is marked as text or binary
+	  only match text files against text entries and binary
+	  files against binary entries.
+
 2011-09-01  12:12  Christos Zoulas <christos@zoulas.com>
 
 	* Don't wait for any subprocess, just the one we forked.

+ 10 - 10
configure

@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.68 for file 5.09.
+# Generated by GNU Autoconf 2.68 for file 5.10.
 #
 # Report bugs to <christos@astron.com>.
 #
@@ -709,8 +709,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='file'
 PACKAGE_TARNAME='file'
-PACKAGE_VERSION='5.09'
-PACKAGE_STRING='file 5.09'
+PACKAGE_VERSION='5.10'
+PACKAGE_STRING='file 5.10'
 PACKAGE_BUGREPORT='christos@astron.com'
 PACKAGE_URL=''
 
@@ -1439,7 +1439,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures file 5.09 to adapt to many kinds of systems.
+\`configure' configures file 5.10 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1509,7 +1509,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of file 5.09:";;
+     short | recursive ) echo "Configuration of file 5.10:";;
    esac
   cat <<\_ACEOF
 
@@ -1615,7 +1615,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-file configure 5.09
+file configure 5.10
 generated by GNU Autoconf 2.68
 
 Copyright (C) 2010 Free Software Foundation, Inc.
@@ -2319,7 +2319,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by file $as_me 5.09, which was
+It was created by file $as_me 5.10, which was
 generated by GNU Autoconf 2.68.  Invocation command line was
 
   $ $0 $@
@@ -3134,7 +3134,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='file'
- VERSION='5.09'
+ VERSION='5.10'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -13555,7 +13555,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by file $as_me 5.09, which was
+This file was extended by file $as_me 5.10, which was
 generated by GNU Autoconf 2.68.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -13621,7 +13621,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-file config.status 5.09
+file config.status 5.10
 configured by $0, generated by GNU Autoconf 2.68,
   with options \\"\$ac_cs_config\\"
 

+ 1 - 1
configure.ac

@@ -1,5 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
-AC_INIT(file, 5.09, christos@astron.com)
+AC_INIT(file, 5.10, christos@astron.com)
 AM_INIT_AUTOMAKE()
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 

+ 16 - 3
doc/file.man

@@ -1,5 +1,5 @@
-.\" $File: file.man,v 1.96 2011/07/12 11:23:38 rrt Exp $
-.Dd April 20, 2011
+.\" $File: file.man,v 1.98 2011/12/08 12:12:46 rrt Exp $
+.Dd October 17, 2011
 .Dt FILE __CSECTION__
 .Os
 .Sh NAME
@@ -192,7 +192,7 @@ option).
 .It encoding
 Different text encodings for soft magic tests.
 .It tokens
-Looks for known tokens inside text files.
+Ignored for backwards compatibility.
 .It cdf
 Prints details of Compound Document Files.
 .It compress
@@ -220,6 +220,19 @@ or at least one filename argument must be present;
 to test the standard input, use
 .Sq -
 as a filename argument.
+Please note that 
+.Ar namefile 
+is unwrapped and the enclosed filenames are processed when this option is
+encountered and before any further options processing is done.
+This allows one to process multiple lists of files with different command line
+arguments on the same
+.Nm
+invocation.
+Thus if you want to set the delimiter, you need to do it before you specify
+the list of files, like:
+.Dq Fl F Ar @ Fl f Ar namefile ,
+instead of:
+.Dq Fl f Ar namefile Fl F Ar @ .
 .It Fl h , Fl Fl no-dereference
 option causes symlinks not to be followed
 (on systems that support symbolic links).

+ 22 - 7
doc/libmagic.man

@@ -1,4 +1,4 @@
-.\" $File: libmagic.man,v 1.24 2011/05/13 22:11:44 christos Exp $
+.\" $File: libmagic.man,v 1.26 2011/12/19 17:49:31 christos Exp $
 .\"
 .\" Copyright (c) Christos Zoulas 2003.
 .\" All Rights Reserved.
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd January 14, 2011
+.Dd December 19, 2011
 .Dt LIBMAGIC 3
 .Os
 .Sh NAME
@@ -52,9 +52,9 @@
 .Ft int
 .Fn magic_errno "magic_t cookie"
 .Ft const char *
-.Fn magic_descriptor "magic_t cookie, "int fd"
+.Fn magic_descriptor "magic_t cookie" "int fd"
 .Ft const char *
-.Fn magic_file "magic_t cookie, const char *filename"
+.Fn magic_file "magic_t cookie" "const char *filename"
 .Ft const char *
 .Fn magic_buffer "magic_t cookie" "const void *buffer" "size_t length"
 .Ft int
@@ -64,6 +64,8 @@
 .Ft int
 .Fn magic_compile "magic_t cookie" "const char *filename"
 .Ft int
+.Fn magic_list "magic_t cookie" "const char *filename"
+.Ft int
 .Fn magic_load "magic_t cookie" "const char *filename"
 .Sh DESCRIPTION
 These functions
@@ -126,7 +128,7 @@ Don't get extra information on MS Composite Document Files.
 Don't look inside compressed files.
 .It Dv MAGIC_NO_CHECK_ELF
 Don't print ELF details.
-.It Dv NO_CHECK_ENCODING
+.It Dv MAGIC_NO_CHECK_ENCODING
 Don't check text encodings.
 .It Dv MAGIC_NO_CHECK_SOFT
 Don't consult magic files.
@@ -219,6 +221,17 @@ of each file argument with
 appended to it.
 .Pp
 The
+.Fn magic_list
+function dumps all magic entries in a human readable format,
+dumping first the entries that are matched against binary files and then the
+ones that match text files.
+It takes and optional
+.Fa filename
+argument which is a colon separated list of database files, or
+.Dv NULL
+for the default database.
+.Pp
+The
 .Fn magic_load
 function must be used to load the the colon
 separated list of database files passed in as
@@ -243,15 +256,17 @@ It will set errno to
 .Er EINVAL
 if an unsupported value for flags was given.
 The
+.Fn magic_list ,
 .Fn magic_load ,
 .Fn magic_compile ,
 and
 .Fn magic_check
 functions return 0 on success and \-1 on failure.
 The
-.Fn magic_file ,
+.Fn magic_buffer ,
+.Fn magic_getpath ,
 and
-.Fn magic_buffer
+.Fn magic_file ,
 functions return a string on success and
 .Dv NULL
 on failure.

+ 6 - 6
doc/magic.man

@@ -1,4 +1,4 @@
-.\" $File: magic.man,v 1.69 2011/05/13 22:11:44 christos Exp $
+.\" $File: magic.man,v 1.71 2011/12/07 11:58:24 rrt Exp $
 .Dd April 20, 2011
 .Dt MAGIC __FSECTION__
 .Os
@@ -63,16 +63,16 @@ consecutive blanks, the target needs at least
 consecutive blanks to match.
 The
 .Dq w
-flag treats every blank in the target as an optional blank.
+flag treats every blank in the magic as an optional blank.
 The
 .Dq c
-flag, specifies case insensitive matching: lower case
+flag specifies case insensitive matching: lower case
 characters in the magic match both lower and upper case characters in the
 target, whereas upper case characters in the magic only match upper case
 characters in the target.
 The
 .Dq C
-flag, specifies case insensitive matching: upper case
+flag specifies case insensitive matching: upper case
 characters in the magic match both lower and upper case characters in the
 target, whereas lower case characters in the magic only match upper case
 characters in the target.
@@ -82,9 +82,9 @@ and
 .Dq C .
 The
 .Dq t
-flag, forces the test to be done for text files, while the
+flag forces the test to be done for text files, while the
 .Dq b
-flag, forces the test to be done for binary files.
+flag forces the test to be done for binary files.
 .It Dv pstring
 A Pascal-style string where the first byte/short/int is interpreted as the an
 unsigned length.

+ 7 - 2
magic/Magdir/archive

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: archive,v 1.68 2011/09/07 15:47:51 christos Exp $
+# $File: archive,v 1.70 2011/10/26 15:44:47 christos Exp $
 # archive:  file(1) magic for archive formats (see also "msdos" for self-
 #           extracting compressed archives)
 #
@@ -12,6 +12,11 @@
 257	string		ustar\040\040\0	GNU tar archive
 !:mime	application/x-tar # encoding: gnu
 
+# Incremental snapshot gnu-tar format from:
+# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
+0	string		GNU\ tar-	GNU tar incremental snapshot data
+>&0	regex		[0-9]\.[0-9]+-[0-9]+	version %s
+
 # cpio archives
 #
 # Yes, the top two "cpio archive" formats *are* supposed to just be "short".
@@ -183,7 +188,7 @@
 # MAR
 2	string	=-ah MAR archive data
 # ACB
-0	belong&0x00f800ff	0x00800000 ACB archive data
+#0	belong&0x00f800ff	0x00800000 ACB archive data
 # CPZ
 # TODO, this is what idarc says: 0	string	\0\0\0 CPZ archive data
 # JRC

+ 14 - 0
magic/Magdir/assembler

@@ -0,0 +1,14 @@
+#------------------------------------------------------------------------------
+# $File: assembler,v 1.1 2011/12/08 12:12:46 rrt Exp $
+# make:  file(1) magic for assembler source
+#
+0	regex	\^\.asciiz\?	assembler source text
+!:mime	text/x-asm
+0	regex	\^\.byte		assembler source text
+!:mime	text/x-asm
+0	regex	\^\.even		assembler source text
+!:mime	text/x-asm
+0	regex	\^\.globl		assembler source text
+!:mime	text/x-asm
+0	regex	\^\.text		assembler source text
+!:mime	text/x-asm

+ 41 - 13
magic/Magdir/c-lang

@@ -1,21 +1,49 @@
-
 #------------------------------------------------------------------------------
-# $File: c-lang,v 1.14 2009/09/19 16:28:08 christos Exp $
-# c-lang:  file(1) magic for C programs (or REXX)
+# $File: c-lang,v 1.16 2011/12/09 08:02:16 rrt Exp $
+# c-lang:  file(1) magic for C and related languages programs
 #
 
-# XPM icons (Greg Roelofs, newt@uchicago.edu)
-# if you uncomment "/*" for C/REXX below, also uncomment this entry
-#0	string		/*\ XPM\ */	X pixmap image data
-#!:mime	image/x-xpmi
+# BCPL
+0	search/8192	"libhdr"	BCPL source text
+!:mime	text/x-bcpl
+0	search/8192	"LIBHDR"	BCPL source text
+!:mime	text/x-bcpl
 
-# 3DS (3d Studio files) Conflicts with diff output 0x3d '='
-#16	beshort		0x3d3d		image/x-3ds
+# C
+0	regex	\^#include	C source text
+!:mime	text/x-c
+0	regex	\^char		C source text
+!:mime	text/x-c
+0	regex	\^double		C source text
+!:mime	text/x-c
+0	regex	\^extern		C source text
+!:mime	text/x-c
+0	regex	\^float		C source text
+!:mime	text/x-c
+0	regex	\^struct		C source text
+!:mime	text/x-c
+0	regex	\^union		C source text
+!:mime	text/x-c
+0	search/8192	main(		C source text
+!:mime	text/x-c
 
-# this first will upset you if you're a PL/1 shop...
-# in which case rm it; ascmagic will catch real C programs
-#0	search/1	/*		C or REXX program text
-#0	search/1	//		C++ program text
+# C++
+# The strength of these rules is increased so they beat the C rules above
+0	regex	\^template	C++ source text
+!:strength + 10
+!:mime	text/x-c++
+0	regex	\^virtual		C++ source text
+!:strength + 10
+!:mime	text/x-c++
+0	regex	\^class		C++ source text
+!:strength + 10
+!:mime	text/x-c++
+0	regex	\^public:		C++ source text
+!:strength + 10
+!:mime	text/x-c++
+0	regex	\^private:		C++ source text
+!:strength + 10
+!:mime	text/x-c++
 
 # From: Mikhail Teterin <mi@aldan.algebra.com> 
 0	string		cscope		cscope reference data

+ 4 - 1
magic/Magdir/cad

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: cad,v 1.10 2010/12/25 14:33:43 christos Exp $
+# $File: cad,v 1.11 2011/12/08 12:12:46 rrt Exp $
 # autocad:  file(1) magic for cad files
 #
 
@@ -113,3 +113,6 @@
 0	string	AC1012		AutoDesk AutoCAD R13
 0	string	AC1014		AutoDesk AutoCAD R14 
 0	string	AC1015		AutoDesk AutoCAD R2000
+
+# 3DS (3d Studio files) Conflicts with diff output 0x3d '='
+#16	beshort		0x3d3d		image/x-3ds

+ 1 - 7
magic/Magdir/commands

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: commands,v 1.41 2011/05/02 12:36:41 christos Exp $
+# $File: commands,v 1.42 2011/12/05 23:14:02 rrt Exp $
 # commands:  file(1) magic for various shells and interpreters
 #
 #0	string/w	:			shell archive or script for antique kernel text
@@ -64,12 +64,6 @@
 0	string/wt	#!\ /usr/local/bin/bash	Bourne-Again shell script text executable
 !:mime	text/x-shellscript
 
-# using env
-0	string/t	#!/usr/bin/env		a
->15	string/t	>\0			%s script text executable
-0	string/t	#!\ /usr/bin/env	a
->16	string/t	>\0			%s script text executable
-
 # PHP scripts
 # Ulf Harnhammar <ulfh@update.uu.se>
 0	search/1/c	=<?php			PHP script text

+ 5 - 4
magic/Magdir/compress

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: compress,v 1.47 2011/03/08 00:39:46 christos Exp $
+# $File: compress,v 1.49 2011/12/07 22:04:27 christos Exp $
 # compress:  file(1) magic for pure-compression formats (no archives)
 #
 # compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
@@ -190,9 +190,10 @@
 !:mime	application/x-7z-compressed
 
 # Type: LZMA
-0	lelong		0x8000005d		LZMA compressed data,
->5	lequad		=0xffffffffffffffff	streamed
->5	lequad		!0xffffffffffffffff	non-streamed, size %lld
+0	lelong&0xffffff	=0x5d
+>12	leshort		=0xff			LZMA compressed data,
+>>5	lequad		=0xffffffffffffffff	streamed
+>>5	lequad		!0xffffffffffffffff	non-streamed, size %lld
 !:mime	application/x-lzma
 
 # http://tukaani.org/xz/xz-file-format.txt

+ 82 - 0
magic/Magdir/cups

@@ -0,0 +1,82 @@
+
+#------------------------------------------------------------------------------
+# $File: cups,v 1.1 2011/11/10 18:59:54 christos Exp $
+# Cups: file(1) magic for the cups raster file format
+# From: Laurent Martelli <martellilaurent@gmail.com>
+# http://www.cups.org/documentation.php/spec-raster.html
+#
+
+# Cups Raster image format, Big Endian
+0	string		RaS		
+!:mime	application/vnd.cups-raster
+>3	string		t		Cups Raster version 1, Big Endian
+>3	string		2		Cups Raster version 2, Big Endian
+>3	string		3		Cups Raster version 3, Big Endian
+>280	belong		x		\b, %d
+>284	belong		x		\bx%d dpi
+>376	belong		x		\b, %dx
+>380	belong		x		\b%d pixels
+>388	belong		x		%d bits/color
+>392	belong		x		%d bits/pixel
+>400	belong		0		ColorOrder=Chunky
+>400	belong		1		ColorOrder=Banded
+>400	belong		2		ColorOrder=Planar
+>404	belong		0		ColorSpace=gray
+>404	belong		1		ColorSpace=RGB
+>404	belong		2		ColorSpace=RGBA
+>404	belong		3		ColorSpace=black
+>404	belong		4		ColorSpace=CMY
+>404	belong		5		ColorSpace=YMC
+>404	belong		6		ColorSpace=CMYK
+>404	belong		7		ColorSpace=YMCK
+>404	belong		8		ColorSpace=KCMY
+>404	belong		9		ColorSpace=KCMYcm
+>404	belong		10		ColorSpace=GMCK
+>404	belong		11		ColorSpace=GMCS
+>404	belong		12		ColorSpace=WHITE
+>404	belong		13		ColorSpace=GOLD
+>404	belong		14		ColorSpace=SILVER
+>404	belong		15		ColorSpace=CIE XYZ
+>404	belong		16		ColorSpace=CIE Lab
+>404	belong		17		ColorSpace=RGBW
+>404	belong		18		ColorSpace=sGray
+>404	belong		19		ColorSpace=sRGB
+>404	belong		20		ColorSpace=AdobeRGB
+
+
+# Cups Raster image format, Little Endian
+1	string		SaR		
+>0	string		t		Cups Raster version 1, Little Endian
+>0	string		2		Cups Raster version 2, Little Endian
+>0	string		3		Cups Raster version 3, Little Endian
+!:mime	application/vnd.cups-raster
+>280	lelong		x		\b, %d
+>284	lelong		x		\bx%d dpi
+>376	lelong		x		\b, %dx
+>380	lelong		x		\b%d pixels
+>388	lelong		x		%d bits/color
+>392	lelong		x		%d bits/pixel
+>400	lelong		0		ColorOrder=Chunky
+>400	lelong		1		ColorOrder=Banded
+>400	lelong		2		ColorOrder=Planar
+>404	lelong		0		ColorSpace=gray
+>404	lelong		1		ColorSpace=RGB
+>404	lelong		2		ColorSpace=RGBA
+>404	lelong		3		ColorSpace=black
+>404	lelong		4		ColorSpace=CMY
+>404	lelong		5		ColorSpace=YMC
+>404	lelong		6		ColorSpace=CMYK
+>404	lelong		7		ColorSpace=YMCK
+>404	lelong		8		ColorSpace=KCMY
+>404	lelong		9		ColorSpace=KCMYcm
+>404	lelong		10		ColorSpace=GMCK
+>404	lelong		11		ColorSpace=GMCS
+>404	lelong		12		ColorSpace=WHITE
+>404	lelong		13		ColorSpace=GOLD
+>404	lelong		14		ColorSpace=SILVER
+>404	lelong		15		ColorSpace=CIE XYZ
+>404	lelong		16		ColorSpace=CIE Lab
+>404	lelong		17		ColorSpace=RGBW
+>404	lelong		18		ColorSpace=sGray
+>404	lelong		19		ColorSpace=sRGB
+>404	lelong		20		ColorSpace=AdobeRGB

+ 7 - 1
magic/Magdir/elf

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: elf,v 1.53 2009/09/19 16:28:09 christos Exp $
+# $File: elf,v 1.54 2011/12/17 17:16:29 christos Exp $
 # elf:  file(1) magic for ELF executables
 #
 # We have to check the byte order flag to see what byte order all the
@@ -149,6 +149,9 @@
 >>18	leshort		106		Analog Devices Blackfin,
 >>18	leshort		113		Altera Nios II,
 >>18	leshort		0xae		META,
+>>18	leshort		187		Tilera TILE64,
+>>18	leshort		188		Tilera TILEPro,
+>>18	leshort		191		Tilera TILE-Gx,
 >>18	leshort		0x3426		OpenRISC (obsolete),
 >>18	leshort		0x8472		OpenRISC (obsolete),
 >>18	leshort		0x9026		Alpha (unofficial),
@@ -259,6 +262,9 @@
 >>18	leshort		0x8472		OpenRISC (obsolete),
 >>18	beshort		94		Tensilica Xtensa,
 >>18	beshort		97		NatSemi 32k,
+>>18	beshort		187		Tilera TILE64,
+>>18	beshort		188		Tilera TILEPro,
+>>18	beshort		191		Tilera TILE-Gx,
 >>18	beshort		0x18ad		AVR32 (unofficial),
 >>18	beshort		0x9026		Alpha (unofficial),
 >>18	beshort		0xa390		IBM S/390 (obsolete),

+ 5 - 1
magic/Magdir/gnu

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: gnu,v 1.11 2009/09/19 16:28:09 christos Exp $
+# $File: gnu,v 1.12 2011/12/08 12:12:46 rrt Exp $
 # gnu:  file(1) magic for various GNU tools
 #
 # GNU nlsutils message catalog file format
@@ -42,3 +42,7 @@
 # Files produced by GNU gettext
 0	long	0xDE120495		GNU-format message catalog data
 0	long	0x950412DE		GNU-format message catalog data
+
+# gettext message catalogue
+0	regex	\^msgid\ 		GNU gettext message catalogue text
+!:mime text/x-po

+ 13 - 0
magic/Magdir/guile

@@ -0,0 +1,13 @@
+
+#------------------------------------------------------------------------------
+# $File: guile,v 1.1 2011/12/16 17:44:33 christos Exp $
+# Guile file magic from <dalepsmith@gmail.com>
+# http://www.gnu.org/s/guile/
+# http://git.savannah.gnu.org/gitweb/?p=guile.git;f=libguile/_scm.h;hb=HEAD#l250
+
+0	string	GOOF----	Guile Object
+>8	string	LE		\b, little endian
+>8	string	BE		\b, big endian
+>11	string	4		\b, 32bit
+>11	string	8		\b, 64bit
+>13	regex	.\..		\b, bytecode v%s

+ 7 - 2
magic/Magdir/images

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: images,v 1.70 2010/11/25 15:00:12 christos Exp $
+# $File: images,v 1.72 2011/12/08 12:12:46 rrt Exp $
 # images:  file(1) magic for image formats (see also "iff", and "c-lang" for
 # XPM bitmaps)
 #
@@ -90,6 +90,11 @@
 0	string		II\x2a\x00	TIFF image data, little-endian
 !:mime	image/tiff
 
+0	string		MM\x00\x2b	Big TIFF image data, big-endian
+!:mime	image/tiff
+0	string		II\x2b\x00	Big TIFF image data, little-endian
+!:mime	image/tiff
+
 # PNG [Portable Network Graphics, or "PNG's Not GIF"] images
 # (Greg Roelofs, newt@uchicago.edu)
 # (Albert Cahalan, acahalan@cs.uml.edu)
@@ -228,8 +233,8 @@
 #0	string		BA		PC bitmap array data
 
 # XPM icons (Greg Roelofs, newt@uchicago.edu)
-# note possible collision with C/REXX entry in c-lang; currently commented out
 0	search/1	/*\ XPM\ */	X pixmap image text
+!:mime	image/x-xpmi
 
 # Utah Raster Toolkit RLE images (janl@ifi.uio.no)
 0	leshort		0xcc52		RLE image data,

+ 4 - 1
magic/Magdir/java

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------
-# $File: java,v 1.12 2009/09/19 16:28:10 christos Exp $
+# $File: java,v 1.13 2011/12/08 12:12:46 rrt Exp $
 # Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the
 # same magic number, 0xcafebabe, so they are both handled
 # in the entry called "cafebabe".
@@ -24,3 +24,6 @@
 >0	regex	dey\n[0-9][0-9][0-9]\0	Dalvik dex file (optimized for host)
 >4	string	>000			version %s
 
+# Java source
+0	regex	^import.*;$	Java source
+!:mime	text/x-java

+ 6 - 0
magic/Magdir/m4

@@ -0,0 +1,6 @@
+#------------------------------------------------------------------------------
+# $File: m4,v 1.1 2011/12/08 12:12:46 rrt Exp $
+# make:  file(1) magic for M4 scripts
+#
+0	regex	\^dnl\ 		M4 macro processor script text
+!:mime	text/x-m4

+ 4 - 4
magic/Magdir/mail.news

@@ -1,11 +1,9 @@
-
 #------------------------------------------------------------------------------
-# $File: mail.news,v 1.19 2011/01/25 13:55:57 christos Exp $
+# $File: mail.news,v 1.20 2011/12/08 12:12:46 rrt Exp $
 # mail.news:  file(1) magic for mail and news
 #
 # Unfortunately, saved netnews also has From line added in some news software.
 #0	string		From 		mail text
-# There are tests to ascmagic.c to cope with mail and news.
 0	string/t		Relay-Version: 	old news text
 !:mime	message/rfc822
 0	string/t		#!\ rnews	batched news text
@@ -16,7 +14,9 @@
 !:mime	message/rfc822
 0	string/t		Pipe\ to 	mail piping text
 !:mime	message/rfc822
-0	string/t		Return-Path:	smtp mail text
+0	string/t		Delivered-To:	SMTP mail text
+!:mime	message/rfc822
+0	string/t		Return-Path:	SMTP mail text
 !:mime	message/rfc822
 0	string/t		Path:		news text
 !:mime	message/news

+ 15 - 0
magic/Magdir/make

@@ -0,0 +1,15 @@
+#------------------------------------------------------------------------------
+# $File: make,v 1.1 2011/12/08 12:12:46 rrt Exp $
+# make:  file(1) magic for makefiles
+#
+0	regex	\^CFLAGS	makefile script text
+!:mime	text/x-makefile
+0	regex	\^LDFLAGS	makefile script text
+!:mime	text/x-makefile
+0	regex	\^all:	makefile script text
+!:mime	text/x-makefile
+0	regex	\^.PRECIOUS	makefile script text
+!:mime	text/x-makefile
+
+0	regex	\^SUBDIRS	automake makefile script text
+!:mime	text/x-makefile

+ 54 - 54
magic/Magdir/msdos

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: msdos,v 1.75 2011/08/08 08:56:17 christos Exp $
+# $File: msdos,v 1.77 2011/12/07 22:05:05 christos Exp $
 # msdos:  file(1) magic for MS-DOS files
 #
 
@@ -41,7 +41,7 @@
 #
 # Many of the compressed formats were extraced from IDARC 1.23 source code.
 #
-0	string	MZ
+0	string/b	MZ
 !:mime	application/x-dosexec
 # All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file.
 >0x18	leshort <0x40 MS-DOS executable
@@ -294,7 +294,7 @@
 # added by Joerg Jenderek of http://www.freedos.org/software/?prog=kc
 # and http://www.freedos.org/software/?prog=kpdos
 # for FreeDOS files like KEYBOARD.SYS, KEYBRD2.SYS, KEYBRD3.SYS, *.KBD
-0	string		KCF		FreeDOS KEYBoard Layout collection
+0	string/b	KCF		FreeDOS KEYBoard Layout collection
 # only version=0x100 found
 >3	uleshort	x		\b, version 0x%x
 # length of string containing author,info and special characters
@@ -305,7 +305,7 @@
 #>>>&0	string		x		\b%-s
 >>>&0	string		x		\b%-.15s
 # for FreeDOS *.KL files 
-0	string		KLF		FreeDOS KEYBoard Layout file
+0	string/b	KLF		FreeDOS KEYBoard Layout file
 # only version=0x100 or 0x101 found
 >3	uleshort	x		\b, version 0x%x
 # stringlength
@@ -402,12 +402,12 @@
 # byte 0xeb conflicts with "sequent" magic leshort 0xn2eb
 0	ubeshort&0xeb8d	>0xeb00		
 # DR-DOS STACKER.COM SCREATE.SYS missed
->0	byte		0xeb		DOS executable (COM)
->>0x1FE leshort		0xAA55		\b, boot code
->>85	string		UPX		\b, UPX compressed
->>4	string		\ $ARX		\b, ARX self-extracting archive
->>4	string		\ $LHarc	\b, LHarc self-extracting archive
->>0x20e string		SFX\ by\ LARC	\b, LARC self-extracting archive
+>0	byte		0xeb
+>>0x1FE leshort		0xAA55		DOS executable (COM), boot code
+>>85	string		UPX		DOS executable (COM), UPX compressed
+>>4	string		\ $ARX		DOS executable (COM), ARX self-extracting archive
+>>4	string		\ $LHarc	DOS executable (COM), LHarc self-extracting archive
+>>0x20e string		SFX\ by\ LARC	DOS executable (COM), LARC self-extracting archive
 # updated by Joerg Jenderek at Oct 2008
 #0	byte		0xb8		COM executable
 0	uleshort&0x80ff	0x00b8		
@@ -421,11 +421,11 @@
 # syslinux:doc/comboot.txt
 # A COM32R program must start with the byte sequence B8 FE 4C CD 21 (mov
 # eax,21cd4cfeh) as a magic number.
-0       string	\xb8\xfe\x4c\xcd\x21	COM executable (COM32R)
+0       string/b	\xb8\xfe\x4c\xcd\x21	COM executable (COM32R)
 # start with assembler instructions mov eax,21cd4cfeh
 0	uleshort&0xc0ff	0xc0b8		
 >1	lelong		0x21cd4cfe	COM executable (32-bit COMBOOT, relocatable)
-0	string	\x81\xfc		
+0	string/b	\x81\xfc		
 >4	string	\x77\x02\xcd\x20\xb9	
 >>36	string	UPX!			FREE-DOS executable (COM), UPX compressed
 252	string Must\ have\ DOS\ version DR-DOS executable (COM)
@@ -466,16 +466,16 @@
 # FIXME: missing diet .com compression
 
 # miscellaneous formats
-0	string		LZ		MS-DOS executable (built-in)
+0	string/b	LZ		MS-DOS executable (built-in)
 #0	byte		0xf0		MS-DOS program library data
 #
 
 # AAF files:
 # <stuartc@rd.bbc.co.uk> Stuart Cunningham
-0	string	\320\317\021\340\241\261\032\341AAFB\015\000OM\006\016\053\064\001\001\001\377			AAF legacy file using MS Structured Storage
+0	string/b	\320\317\021\340\241\261\032\341AAFB\015\000OM\006\016\053\064\001\001\001\377			AAF legacy file using MS Structured Storage
 >30	byte	9		(512B sectors)
 >30	byte	12		(4kB sectors)
-0	string	\320\317\021\340\241\261\032\341\001\002\001\015\000\002\000\000\006\016\053\064\003\002\001\001			AAF file using MS Structured Storage
+0	string/b	\320\317\021\340\241\261\032\341\001\002\001\015\000\002\000\000\006\016\053\064\003\002\001\001			AAF file using MS Structured Storage
 >30	byte	9		(512B sectors)
 >30	byte	12		(4kB sectors)
 
@@ -491,14 +491,14 @@
 0	belong	0x31be0000			Microsoft Word Document
 !:mime	application/msword
 #
-0	string	PO^Q`				Microsoft Word 6.0 Document
+0	string/b	PO^Q`				Microsoft Word 6.0 Document
 !:mime	application/msword
 #
-0	string	\376\067\0\043			Microsoft Office Document
+0	string/b	\376\067\0\043			Microsoft Office Document
 !:mime	application/msword
-0	string	\333\245-\0\0\0			Microsoft Office Document
+0	string/b	\333\245-\0\0\0			Microsoft Office Document
 !:mime	application/msword
-512	string		\354\245\301		Microsoft Word Document
+512	string/b		\354\245\301		Microsoft Word Document
 !:mime	application/msword
 #
 2080	string	Microsoft\ Excel\ 5.0\ Worksheet	%s
@@ -513,7 +513,7 @@
 # Italian MS-Excel
 2121	string	Biff5		Microsoft Excel 5.0 Worksheet
 !:mime	application/vnd.ms-excel
-0	string	\x09\x04\x06\x00\x00\x00\x10\x00	Microsoft Excel Worksheet
+0	string/b	\x09\x04\x06\x00\x00\x00\x10\x00	Microsoft Excel Worksheet
 !:mime	application/vnd.ms-excel
 #
 0	belong	0x00001a00	Lotus 1-2-3
@@ -527,9 +527,9 @@
 !:mime	application/x-123
 >4	belong	0x06040600	wk1 document data
 >4	belong	0x06800200	fmt document data
-0	string		WordPro\0	Lotus WordPro
+0	string/b		WordPro\0	Lotus WordPro
 !:mime	application/vnd.lotus-wordpro
-0	string		WordPro\r\373	Lotus WordPro
+0	string/b		WordPro\r\373	Lotus WordPro
 !:mime	application/vnd.lotus-wordpro
 
 
@@ -542,17 +542,17 @@
 
 # Winamp .avs
 #0	string	Nullsoft\ AVS\ Preset\ \060\056\061\032 A plug in for Winamp ms-windows Freeware media player
-0	string	Nullsoft\ AVS\ Preset\ 	Winamp plug in
+0	string/b	Nullsoft\ AVS\ Preset\ 	Winamp plug in
 
 # Windows Metafont .WMF
-0	string	\327\315\306\232	ms-windows metafont .wmf
-0	string	\002\000\011\000	ms-windows metafont .wmf
-0	string	\001\000\011\000	ms-windows metafont .wmf
+0	string/b	\327\315\306\232	ms-windows metafont .wmf
+0	string/b	\002\000\011\000	ms-windows metafont .wmf
+0	string/b	\001\000\011\000	ms-windows metafont .wmf
 
 #tz3 files whatever that is (MS Works files)
-0	string	\003\001\001\004\070\001\000\000	tz3 ms-works file
-0	string	\003\002\001\004\070\001\000\000	tz3 ms-works file
-0	string	\003\003\001\004\070\001\000\000	tz3 ms-works file
+0	string/b	\003\001\001\004\070\001\000\000	tz3 ms-works file
+0	string/b	\003\002\001\004\070\001\000\000	tz3 ms-works file
+0	string/b	\003\003\001\004\070\001\000\000	tz3 ms-works file
 
 # PGP sig files .sig
 #0 string \211\000\077\003\005\000\063\237\127 065 to  \027\266\151\064\005\045\101\233\021\002 PGP sig
@@ -564,14 +564,14 @@
 0 string \211\000\225\003\005\000\062\122\207\304\100\345\042 PGP sig
 
 # windows zips files .dmf
-0	string	MDIF\032\000\010\000\000\000\372\046\100\175\001\000\001\036\001\000 MS Windows special zipped file
+0	string/b	MDIF\032\000\010\000\000\000\372\046\100\175\001\000\001\036\001\000 MS Windows special zipped file
 
 
 #ico files
-0	string	\102\101\050\000\000\000\056\000\000\000\000\000\000\000	Icon for MS Windows
+0	string/b	\102\101\050\000\000\000\056\000\000\000\000\000\000\000	Icon for MS Windows
 
 # Windows icons (Ian Springer <ips@fpk.hp.com>)
-0	string	\000\000\001\000	MS Windows icon resource
+0	string/b	\000\000\001\000	MS Windows icon resource
 !:mime	image/x-icon
 >4	byte	1			- 1 icon
 >4	byte	>1			- %d icons
@@ -582,13 +582,13 @@
 
 
 # .chr files
-0	string	PK\010\010BGI	Borland font 
+0	string/b	PK\010\010BGI	Borland font 
 >4	string	>\0	%s
 # then there is a copyright notice
 
 
 # .bgi files
-0	string	pk\010\010BGI	Borland device 
+0	string/b	pk\010\010BGI	Borland device 
 >4	string	>\0	%s
 # then there is a copyright notice
 
@@ -703,28 +703,28 @@
 !:mime	application/vnd.ms-tnef
 
 # HtmlHelp files (.chm)
-0	string	ITSF\003\000\000\000\x60\000\000\000\001\000\000\000	MS Windows HtmlHelp Data
+0	string/b	ITSF\003\000\000\000\x60\000\000\000\001\000\000\000	MS Windows HtmlHelp Data
 
 # GFA-BASIC (Wolfram Kleff)
-2	string		GFA-BASIC3	GFA-BASIC 3 data
+2	string/b	GFA-BASIC3	GFA-BASIC 3 data
 
 #------------------------------------------------------------------------------
 # From Stuart Caie <kyzer@4u.net> (developer of cabextract)
 # Microsoft Cabinet files
-0	string		MSCF\0\0\0\0	Microsoft Cabinet archive data
+0	string/b	MSCF\0\0\0\0	Microsoft Cabinet archive data
 !:mime application/vnd.ms-cab-compressed
 >8	lelong		x		\b, %u bytes
 >28	leshort		1		\b, 1 file
 >28	leshort		>1		\b, %u files
 
 # InstallShield Cabinet files
-0	string		ISc(		InstallShield Cabinet archive data
+0	string/b	ISc(		InstallShield Cabinet archive data
 >5	byte&0xf0	=0x60		version 6,
 >5	byte&0xf0	!0x60		version 4/5,
 >(12.l+40)	lelong	x		%u files
 
 # Windows CE package files
-0	string		MSCE\0\0\0\0	Microsoft WinCE install header
+0	string/b	MSCE\0\0\0\0	Microsoft WinCE install header
 >20	lelong		0		\b, architecture-independent
 >20	lelong		103		\b, Hitachi SH3
 >20	lelong		104		\b, Hitachi SH4
@@ -748,7 +748,7 @@
 >>44	ulelong x		version 0x%x
 
 # From: Alex Beregszaszi <alex@fsn.hu>
-0	string	COWD		VMWare3
+0	string/b	COWD		VMWare3
 >4	byte	3		disk image
 >>32	lelong	x		(%d/
 >>36	lelong	x		\b%d/
@@ -756,8 +756,8 @@
 >4	byte	2		undoable disk image
 >>32	string	>\0		(%s)
 
-0	string	VMDK		 VMware4 disk image
-0	string	KDMV		 VMware4 disk image
+0	string/b	VMDK		 VMware4 disk image
+0	string/b	KDMV		 VMware4 disk image
 
 #--------------------------------------------------------------------
 # Qemu Emulator Images
@@ -765,11 +765,11 @@
 # Updated by Adam Buchbinder (adam.buchbinder@gmail.com)
 # Made by reading sources, reading documentation, and doing trial and error
 # on existing QCOW files
-0	string	QFI\xFB	QEMU QCOW Image
+0	string/b	QFI\xFB	QEMU QCOW Image
 
 # Uncomment the following line to display Magic (only used for debugging
 # this magic number)
-#>0	string	x	, Magic: %s
+#>0	string/b	x	, Magic: %s
 
 # There are currently 2 Versions: "1" and "2".
 # http://www.gnome.org/~markmc/qcow-image-format-version-1.html
@@ -813,9 +813,9 @@
 
 >4	default x	(unknown version)
 
-0	string	QEVM		QEMU suspend to disk image
+0	string/b	QEVM		QEMU suspend to disk image
 
-0	string	Bochs\ Virtual\ HD\ Image	Bochs disk image,
+0	string/b	Bochs\ Virtual\ HD\ Image	Bochs disk image,
 >32	string	x				type %s,
 >48	string	x				subtype %s
 
@@ -823,8 +823,8 @@
 
 # from http://filext.com by Derek M Jones <derek@knosof.co.uk>
 # False positive with PPT (also currently this string is too long)
-#0	string	\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06	Microsoft Installer
-0	string	\320\317\021\340\241\261\032\341	Microsoft Office Document
+#0	string/b	\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06	Microsoft Installer
+0	string/b	\320\317\021\340\241\261\032\341	Microsoft Office Document
 #>48	byte	0x1B					Excel Document
 #!:mime application/vnd.ms-excel
 >546	string	bjbj			Microsoft Word Document
@@ -832,7 +832,7 @@
 >546	string	jbjb			Microsoft Word Document
 !:mime	application/msword
 
-0	string	\224\246\056		Microsoft Word Document
+0	string/b	\224\246\056		Microsoft Word Document
 !:mime	application/msword
 
 512	string	R\0o\0o\0t\0\ \0E\0n\0t\0r\0y	Microsoft Word Document
@@ -841,7 +841,7 @@
 # From: "Nelson A. de Oliveira" <naoliv@gmail.com>
 # Magic type for Dell's BIOS .hdr files
 # Dell's .hdr
-0	string $RBU
+0	string/b $RBU
 >23	string Dell			%s system BIOS
 >5	byte   2
 >>48	byte   x			version %d.
@@ -853,7 +853,7 @@
 # Type: Microsoft DirectDraw Surface
 # URL:	http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/DDSFileReference/ddsfileformat.asp
 # From: Morten Hustveit <morten@debian.org>
-0	string	DDS\040\174\000\000\000 Microsoft DirectDraw Surface (DDS),
+0	string/b	DDS\040\174\000\000\000 Microsoft DirectDraw Surface (DDS),
 >16	lelong	>0			%hd x
 >12	lelong	>0			%hd,
 >84	string	x			%.4s
@@ -864,13 +864,13 @@
 0	short	0x5045			Microsoft Document Imaging Format
 
 # MS eBook format (.lit)
-0	string	ITOLITLS		Microsoft Reader eBook Data
+0	string/b	ITOLITLS		Microsoft Reader eBook Data
 >8	lelong	x			\b, version %u
 !:mime					application/x-ms-reader
 
 # Windows CE Binary Image Data Format
 # From: Dr. Jesus <j@hug.gs>
-0	string	B000FF\n	Windows Embedded CE binary image
+0	string/b	B000FF\n	Windows Embedded CE binary image
 
 # Windows Imaging (WIM) Image
-0	string	MSWIM\000\000\000	Windows imaging (WIM) image
+0	string/b	MSWIM\000\000\000	Windows imaging (WIM) image

+ 17 - 0
magic/Magdir/music

@@ -0,0 +1,17 @@
+#------------------------------------------------------------------------------
+# $File: music,v 1.1 2011/11/25 03:28:17 christos Exp $
+# music:  file (1) magic for music formats
+
+# BWW format used by Bagpipe Music Writer Gold by Robert MacNeil Musicworks
+# and Bagpipe Writer by Doug Wickstrom
+#
+0	string		Bagpipe		Bagpipe
+>8	string		Reader		Reader
+>>15	string		>\0		(version %.3s)
+>8	string		Music\ Writer	Music Writer
+>>20	string		:
+>>>21	string		>\0		(version %.3s)
+>>21	string		Gold		Gold
+>>>25	string		:
+>>>>26	string		>\0		(version %.3s)
+

+ 40 - 1
magic/Magdir/netbsd

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: netbsd,v 1.18 2009/09/19 16:28:11 christos Exp $
+# $File: netbsd,v 1.19 2011/10/31 17:23:34 christos Exp $
 # netbsd:  file(1) magic for NetBSD objects
 #
 # All new-style magic numbers are in network byte order.
@@ -245,3 +245,42 @@
 0	belong&0377777777	043600507	a.out NetBSD/arm core
 >12	string			>\0		from '%s'
 >32	lelong			!0		(signal %d)
+
+# Kernel core dump format
+0	belong&0x0000ffff 0x00008fca	NetBSD kernel core file
+>0	belong&0x03ff0000 0x00000000	\b, Unknown
+>0	belong&0x03ff0000 0x00001000	\b, sun 68010/68020
+>0	belong&0x03ff0000 0x00020000	\b, sun 68020
+>0	belong&0x03ff0000 0x00640000	\b, 386 PC
+>0	belong&0x03ff0000 0x00860000	\b, i386 BSD
+>0	belong&0x03ff0000 0x00870000	\b, m68k BSD (8K pages)
+>0	belong&0x03ff0000 0x00880000	\b, m68k BSD (4K pages)
+>0	belong&0x03ff0000 0x00890000	\b, ns32532 BSD
+>0	belong&0x03ff0000 0x008a0000	\b, sparc/32 BSD
+>0	belong&0x03ff0000 0x008b0000	\b, pmax BSD
+>0	belong&0x03ff0000 0x008c0000	\b, vax BSD (1K pages)
+>0	belong&0x03ff0000 0x008d0000	\b, alpha BSD
+>0	belong&0x03ff0000 0x008e0000	\b, mips BSD (Big Endian)
+>0	belong&0x03ff0000 0x008f0000	\b, arm6 BSD
+>0	belong&0x03ff0000 0x00900000	\b, m68k BSD (2K pages)
+>0	belong&0x03ff0000 0x00910000	\b, sh3 BSD
+>0	belong&0x03ff0000 0x00920000	\b, ppc BSD (Big Endian)
+>0	belong&0x03ff0000 0x00930000	\b, vax BSD (4K pages)
+>0	belong&0x03ff0000 0x00940000	\b, mips1 BSD
+>0	belong&0x03ff0000 0x00950000	\b, mips2 BSD
+>0	belong&0x03ff0000 0x00960000	\b, parisc BSD
+>0	belong&0x03ff0000 0x00970000	\b, sh5/64 BSD
+>0	belong&0x03ff0000 0x00980000	\b, sparc/64 BSD
+>0	belong&0x03ff0000 0x00990000	\b, amd64 BSD
+>0	belong&0x03ff0000 0x009a0000	\b, hp200 (68010) BSD
+>0	belong&0x03ff0000 0x009b0000	\b, hp300 (68020+68881) BSD
+>0	belong&0x03ff0000 0x009b0000	\b, hp300 (68020+68881) BSD
+>0	belong&0x03ff0000 0x00c80000	\b, hp200
+>0	belong&0x03ff0000 0x020b0000	\b, hp300 (68020+68881) HP-UX
+>0	belong&0x03ff0000 0x020c0000	\b, hp300 (68020+68881) HP-UX
+>0	belong&0xfc000000 0x04000000	\b, CPU
+>0	belong&0xfc000000 0x08000000	\b, DATA
+>0	belong&0xfc000000 0x10000000	\b, STACK
+>4	leshort	x			\b, (headersize = %d
+>6	leshort	x			\b, segmentsize = %d
+>6	lelong	x			\b, segments = %d)

+ 59 - 52
magic/Magdir/palm

@@ -1,65 +1,72 @@
 
 #------------------------------------------------------------------------------
-# $File: palm,v 1.7 2009/09/19 16:28:11 christos Exp $
-# palm:  file(1) magic for PalmOS {.prc,.pdb}: applications, docfiles, and hacks
+# $File: palm,v 1.8 2011/12/15 16:21:43 christos Exp $
+# palm:	 file(1) magic for PalmOS {.prc,.pdb}: applications, docfiles, and hacks
 #
 # Brian Lalor <blalor@hcirisc.cs.binghamton.edu>
 
+# These are weak, byte 59 is not guaranteed to be 0 and there are
+# 8 character identifiers at byte 60, one I found for appl is BIGb.
+# What are the possibilities and where is this documented?
+
 # appl
-60      belong                  0x6170706c      PalmOS application
->0      string                  >\0             "%s"
+59	byte			\0
+>60	string			appl		PalmOS application
+>0	string			>\0		"%s"
 # TEXt
-60      belong                  0x54455874      AportisDoc file
->0      string                  >\0             "%s"
+59	byte			\0
+>60	belong			TEXt		AportisDoc file
+>0	string			>\0		"%s"
 # HACK
-60      belong                  0x4841434b      HackMaster hack
->0      string                  >\0             "%s"
+59	byte			\0
+>60	string			HACK		HackMaster hack
+>0	string			>\0		"%s"
 
 # Variety of PalmOS document types
 # Michael-John Turner <mj@debian.org>
 # Thanks to Hasan Umit Ezerce <humit@tr-net.net.tr> for his DocType
-60	string	                BVokBDIC	BDicty PalmOS document
->0	string                  >\0             "%s"
-60	string	                DB99DBOS	DB PalmOS document
->0	string                  >\0             "%s"
-60	string	                vIMGView	FireViewer/ImageViewer PalmOS document
->0	string                  >\0             "%s"
-60	string	                PmDBPmDB	HanDBase PalmOS document
->0	string                  >\0             "%s"
-60	string	                InfoINDB	InfoView PalmOS document
->0	string                  >\0             "%s"
-60	string	                ToGoToGo	iSilo PalmOS document
->0	string                  >\0             "%s"
-60	string	                JfDbJBas	JFile PalmOS document
->0	string                  >\0             "%s"
-60	string	                JfDbJFil	JFile Pro PalmOS document
->0	string                  >\0             "%s"
-60	string	                DATALSdb	List PalmOS document
->0	string                  >\0             "%s"
-60	string	                Mdb1Mdb1	MobileDB PalmOS document
->0	string                  >\0             "%s"
-60	string	                PNRdPPrs	PeanutPress PalmOS document
->0	string                  >\0             "%s"
-60	string	                DataPlkr	Plucker PalmOS document
->0	string                  >\0             "%s"
-60	string	                DataSprd	QuickSheet PalmOS document
->0	string                  >\0             "%s"
-60	string	                SM01SMem	SuperMemo PalmOS document
->0	string                  >\0             "%s"
-60	string	                TEXtTlDc	TealDoc PalmOS document
->0	string                  >\0             "%s"
-60	string	                InfoTlIf	TealInfo PalmOS document
->0	string                  >\0             "%s"
-60	string	                DataTlMl	TealMeal PalmOS document
->0	string                  >\0             "%s"
-60	string	                DataTlPt	TealPaint PalmOS document
->0	string                  >\0             "%s"
-60	string	                dataTDBP	ThinkDB PalmOS document
->0	string                  >\0             "%s"
-60	string	                TdatTide	Tides PalmOS document
->0	string                  >\0             "%s"
-60	string	                ToRaTRPW	TomeRaider PalmOS document
->0	string                  >\0             "%s"
+60	string			BVokBDIC	BDicty PalmOS document
+>0	string			>\0		"%s"
+60	string			DB99DBOS	DB PalmOS document
+>0	string			>\0		"%s"
+60	string			vIMGView	FireViewer/ImageViewer PalmOS document
+>0	string			>\0		"%s"
+60	string			PmDBPmDB	HanDBase PalmOS document
+>0	string			>\0		"%s"
+60	string			InfoINDB	InfoView PalmOS document
+>0	string			>\0		"%s"
+60	string			ToGoToGo	iSilo PalmOS document
+>0	string			>\0		"%s"
+60	string			JfDbJBas	JFile PalmOS document
+>0	string			>\0		"%s"
+60	string			JfDbJFil	JFile Pro PalmOS document
+>0	string			>\0		"%s"
+60	string			DATALSdb	List PalmOS document
+>0	string			>\0		"%s"
+60	string			Mdb1Mdb1	MobileDB PalmOS document
+>0	string			>\0		"%s"
+60	string			PNRdPPrs	PeanutPress PalmOS document
+>0	string			>\0		"%s"
+60	string			DataPlkr	Plucker PalmOS document
+>0	string			>\0		"%s"
+60	string			DataSprd	QuickSheet PalmOS document
+>0	string			>\0		"%s"
+60	string			SM01SMem	SuperMemo PalmOS document
+>0	string			>\0		"%s"
+60	string			TEXtTlDc	TealDoc PalmOS document
+>0	string			>\0		"%s"
+60	string			InfoTlIf	TealInfo PalmOS document
+>0	string			>\0		"%s"
+60	string			DataTlMl	TealMeal PalmOS document
+>0	string			>\0		"%s"
+60	string			DataTlPt	TealPaint PalmOS document
+>0	string			>\0		"%s"
+60	string			dataTDBP	ThinkDB PalmOS document
+>0	string			>\0		"%s"
+60	string			TdatTide	Tides PalmOS document
+>0	string			>\0		"%s"
+60	string			ToRaTRPW	TomeRaider PalmOS document
+>0	string			>\0		"%s"
 
 # A GutenPalm zTXT etext for use on Palm Pilots (http://gutenpalm.sf.net)
 # For version 1.xx zTXTs, outputs version and numbers of bookmarks and
@@ -88,5 +95,5 @@
 >0		string		>\0		"%s"
 
 # Mobipocket (www.mobipocket.com), donated by Carl Witty
-60	string	                BOOKMOBI	Mobipocket E-book
->0	string                  >\0             "%s"
+60	string			BOOKMOBI	Mobipocket E-book
+>0	string			>\0		"%s"

+ 10 - 0
magic/Magdir/pascal

@@ -0,0 +1,10 @@
+#------------------------------------------------------------------------------
+# $File: pascal,v 1.1 2011/12/08 12:12:46 rrt Exp $
+# pascal:  file(1) magic for Pascal source
+#
+0	search/8192	(input,		Pascal source text
+!:mime	text/x-pascal
+0	regex		\^program	Pascal source text
+!:mime	text/x-pascal
+0	regex           	\^record		Pascal source text
+!:mime	text/x-pascal

+ 5 - 3
magic/Magdir/perl

@@ -1,6 +1,5 @@
-
 #------------------------------------------------------------------------------
-# $File: perl,v 1.16 2009/09/19 16:28:11 christos Exp $
+# $File: perl,v 1.17 2011/12/16 16:24:40 rrt Exp $
 # perl:  file(1) magic for Larry Wall's perl language.
 #
 # The `eval' lines recognizes an outrageously clever hack.
@@ -20,7 +19,10 @@
 !:mime	text/x-perl
 0	search/1	eval\ '(exit\ $?0)'\ &&\ eval\ 'exec	Perl script text
 !:mime	text/x-perl
-
+0	search/1	#!/usr/bin/env\ perl	Perl script text executable
+!:mime	text/x-perl
+0	search/1	#!\ /usr/bin/env\ perl	Perl script text executable
+!:mime	text/x-perl
 
 # by Dmitry V. Levin and Alexey Tourbin
 # check the first line

+ 3 - 2
magic/Magdir/python

@@ -1,11 +1,12 @@
 
 #------------------------------------------------------------------------------
-# $File: python,v 1.18 2011/02/26 02:33:38 christos Exp $
+# $File: python,v 1.20 2011/12/13 13:53:14 christos Exp $
 # python:  file(1) magic for python
 #
+# Outlook puts """ too for urgent messages
 # From: David Necas <yeti@physics.muni.cz>
 # often the module starts with a multiline string
-0	string/t	"""	a python script text executable
+0	string/t	"""	Python script text executable
 # MAGIC as specified in Python/import.c (1.5 to 2.7a0 and 3.1a0, assuming
 # that Py_UnicodeFlag is off for Python 2)
 # 20121  ( YEAR - 1995 ) + MONTH  + DAY (little endian followed by "\r\n"

+ 4 - 4
magic/Magdir/sgml

@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: sgml,v 1.26 2011/04/16 15:20:04 christos Exp $
+# $File: sgml,v 1.27 2011/12/07 12:01:24 rrt Exp $
 # Type:	SVG Vectorial Graphics
 # From:	Noel Torres <tecnico@ejerciciosresueltos.com>
 0	string		\<?xml\ version="
@@ -18,15 +18,15 @@
 # xhtml
 0	string/t		\<?xml\ version="
 >15	string		>\0
->>19	search/4096/cWbt	\<!doctype\ html	xHTML document text
+>>19	search/4096/cWbt	\<!doctype\ html	XHTML document text
 !:mime	text/html
 0	string/t		\<?xml\ version='
 >15	string		>\0
->>19	search/4096/cWbt	\<!doctype\ html	xHTML document text
+>>19	search/4096/cWbt	\<!doctype\ html	XHTML document text
 !:mime	text/html
 0	string/t		\<?xml\ version="
 >15	string		>\0
->>19	search/4096/cWbt	\<html	broken xHTML document text
+>>19	search/4096/cWbt	\<html	broken XHTML document text
 !:mime	text/html
 
 #------------------------------------------------------------------------------

+ 13 - 2
magic/Magdir/varied.script

@@ -1,16 +1,27 @@
-
 #------------------------------------------------------------------------------
-# $File: varied.script,v 1.7 2010/11/25 15:00:12 christos Exp $
+# $File: varied.script,v 1.9 2011/12/16 16:32:48 rrt Exp $
 # varied.script:  file(1) magic for various interpreter scripts
 
 0	string/t		#!\ /			a
 >3	string		>\0			%s script text executable
+!:strength / 2
 0	string/t		#!\t/			a
 >3	string		>\0			%s script text executable
+!:strength / 2
 0	string/t		#!/			a
 >2	string		>\0			%s script text executable
+!:strength / 2
 0	string/t		#!\ 			script text executable
 >3	string		>\0			for %s
+!:strength / 3
+
+# using env
+0	string/t	#!/usr/bin/env		a
+>15	string/t	>\0			%s script text executable
+!:strength / 10
+0	string/t	#!\ /usr/bin/env	a
+>16	string/t	>\0			%s script text executable
+!:strength / 10
 
 # From: arno <arenevier@fdn.fr>
 # mozilla xpconnect typelib

+ 6 - 6
magic/Magdir/virtual

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: virtual,v 1.1 2009/12/25 16:04:30 christos Exp $
+# $File: virtual,v 1.2 2011/11/22 13:30:05 christos Exp $
 # From: James Nobis <quel@quelrod.net>
 # Microsoft hard disk images for:
 # Virtual Server
@@ -10,8 +10,8 @@
 0	string	conectix	Microsoft Disk Image, Virtual Server or Virtual PC
 
 # Sun xVM VirtualBox Disk Image
-# string  <<< Sun xVM VirtualBox Disk Image >>>
-# .vdi
-0	string	\<\<\<\ Sun\ xVM\ VirtualBox\ Disk	Sun xVM VirtualBox Disk Image
-
-
+# From: Richard W.M. Jones <rich@annexia.org>
+# VirtualBox Disk Image
+0x40	ulelong		0xbeda107f	VirtualBox Disk Image
+>0x44	uleshort	>0		\b, major %u
+>0x46	uleshort	>0		\b, minor %u

+ 8 - 1
magic/Makefile.am

@@ -1,5 +1,5 @@
 #
-# $File: Makefile.am,v 1.73 2011/09/08 21:58:42 christos Exp $
+# $File: Makefile.am,v 1.77 2011/12/16 17:44:33 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_DIR = $(top_srcdir)/magic
@@ -22,6 +22,7 @@ $(MAGIC_FRAGMENT_DIR)/apl \
 $(MAGIC_FRAGMENT_DIR)/apple \
 $(MAGIC_FRAGMENT_DIR)/applix \
 $(MAGIC_FRAGMENT_DIR)/archive \
+$(MAGIC_FRAGMENT_DIR)/assembler \
 $(MAGIC_FRAGMENT_DIR)/asterix \
 $(MAGIC_FRAGMENT_DIR)/att3b \
 $(MAGIC_FRAGMENT_DIR)/audio \
@@ -52,6 +53,7 @@ $(MAGIC_FRAGMENT_DIR)/console \
 $(MAGIC_FRAGMENT_DIR)/convex \
 $(MAGIC_FRAGMENT_DIR)/cracklib \
 $(MAGIC_FRAGMENT_DIR)/ctags \
+$(MAGIC_FRAGMENT_DIR)/cups \
 $(MAGIC_FRAGMENT_DIR)/dact \
 $(MAGIC_FRAGMENT_DIR)/database \
 $(MAGIC_FRAGMENT_DIR)/diamond \
@@ -88,6 +90,7 @@ $(MAGIC_FRAGMENT_DIR)/gnumeric \
 $(MAGIC_FRAGMENT_DIR)/grace \
 $(MAGIC_FRAGMENT_DIR)/graphviz \
 $(MAGIC_FRAGMENT_DIR)/gringotts \
+$(MAGIC_FRAGMENT_DIR)/guile \
 $(MAGIC_FRAGMENT_DIR)/hitachi-sh \
 $(MAGIC_FRAGMENT_DIR)/hp \
 $(MAGIC_FRAGMENT_DIR)/human68k \
@@ -114,10 +117,12 @@ $(MAGIC_FRAGMENT_DIR)/lisp \
 $(MAGIC_FRAGMENT_DIR)/llvm \
 $(MAGIC_FRAGMENT_DIR)/lua \
 $(MAGIC_FRAGMENT_DIR)/luks \
+$(MAGIC_FRAGMENT_DIR)/m4 \
 $(MAGIC_FRAGMENT_DIR)/mach \
 $(MAGIC_FRAGMENT_DIR)/macintosh \
 $(MAGIC_FRAGMENT_DIR)/magic \
 $(MAGIC_FRAGMENT_DIR)/mail.news \
+$(MAGIC_FRAGMENT_DIR)/make \
 $(MAGIC_FRAGMENT_DIR)/maple \
 $(MAGIC_FRAGMENT_DIR)/marc21 \
 $(MAGIC_FRAGMENT_DIR)/mathcad \
@@ -140,6 +145,7 @@ $(MAGIC_FRAGMENT_DIR)/msdos \
 $(MAGIC_FRAGMENT_DIR)/msooxml \
 $(MAGIC_FRAGMENT_DIR)/msvc \
 $(MAGIC_FRAGMENT_DIR)/mup \
+$(MAGIC_FRAGMENT_DIR)/music \
 $(MAGIC_FRAGMENT_DIR)/natinst \
 $(MAGIC_FRAGMENT_DIR)/ncr \
 $(MAGIC_FRAGMENT_DIR)/netbsd \
@@ -159,6 +165,7 @@ $(MAGIC_FRAGMENT_DIR)/osf1 \
 $(MAGIC_FRAGMENT_DIR)/palm \
 $(MAGIC_FRAGMENT_DIR)/parix \
 $(MAGIC_FRAGMENT_DIR)/parrot \
+$(MAGIC_FRAGMENT_DIR)/pascal \
 $(MAGIC_FRAGMENT_DIR)/pbm \
 $(MAGIC_FRAGMENT_DIR)/pdf \
 $(MAGIC_FRAGMENT_DIR)/pdp \

+ 8 - 1
magic/Makefile.in

@@ -196,7 +196,7 @@ top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 
 #
-# $File: Makefile.am,v 1.73 2011/09/08 21:58:42 christos Exp $
+# $File: Makefile.am,v 1.77 2011/12/16 17:44:33 christos Exp $
 #
 MAGIC_FRAGMENT_BASE = Magdir
 MAGIC_DIR = $(top_srcdir)/magic
@@ -217,6 +217,7 @@ $(MAGIC_FRAGMENT_DIR)/apl \
 $(MAGIC_FRAGMENT_DIR)/apple \
 $(MAGIC_FRAGMENT_DIR)/applix \
 $(MAGIC_FRAGMENT_DIR)/archive \
+$(MAGIC_FRAGMENT_DIR)/assembler \
 $(MAGIC_FRAGMENT_DIR)/asterix \
 $(MAGIC_FRAGMENT_DIR)/att3b \
 $(MAGIC_FRAGMENT_DIR)/audio \
@@ -247,6 +248,7 @@ $(MAGIC_FRAGMENT_DIR)/console \
 $(MAGIC_FRAGMENT_DIR)/convex \
 $(MAGIC_FRAGMENT_DIR)/cracklib \
 $(MAGIC_FRAGMENT_DIR)/ctags \
+$(MAGIC_FRAGMENT_DIR)/cups \
 $(MAGIC_FRAGMENT_DIR)/dact \
 $(MAGIC_FRAGMENT_DIR)/database \
 $(MAGIC_FRAGMENT_DIR)/diamond \
@@ -283,6 +285,7 @@ $(MAGIC_FRAGMENT_DIR)/gnumeric \
 $(MAGIC_FRAGMENT_DIR)/grace \
 $(MAGIC_FRAGMENT_DIR)/graphviz \
 $(MAGIC_FRAGMENT_DIR)/gringotts \
+$(MAGIC_FRAGMENT_DIR)/guile \
 $(MAGIC_FRAGMENT_DIR)/hitachi-sh \
 $(MAGIC_FRAGMENT_DIR)/hp \
 $(MAGIC_FRAGMENT_DIR)/human68k \
@@ -309,10 +312,12 @@ $(MAGIC_FRAGMENT_DIR)/lisp \
 $(MAGIC_FRAGMENT_DIR)/llvm \
 $(MAGIC_FRAGMENT_DIR)/lua \
 $(MAGIC_FRAGMENT_DIR)/luks \
+$(MAGIC_FRAGMENT_DIR)/m4 \
 $(MAGIC_FRAGMENT_DIR)/mach \
 $(MAGIC_FRAGMENT_DIR)/macintosh \
 $(MAGIC_FRAGMENT_DIR)/magic \
 $(MAGIC_FRAGMENT_DIR)/mail.news \
+$(MAGIC_FRAGMENT_DIR)/make \
 $(MAGIC_FRAGMENT_DIR)/maple \
 $(MAGIC_FRAGMENT_DIR)/marc21 \
 $(MAGIC_FRAGMENT_DIR)/mathcad \
@@ -335,6 +340,7 @@ $(MAGIC_FRAGMENT_DIR)/msdos \
 $(MAGIC_FRAGMENT_DIR)/msooxml \
 $(MAGIC_FRAGMENT_DIR)/msvc \
 $(MAGIC_FRAGMENT_DIR)/mup \
+$(MAGIC_FRAGMENT_DIR)/music \
 $(MAGIC_FRAGMENT_DIR)/natinst \
 $(MAGIC_FRAGMENT_DIR)/ncr \
 $(MAGIC_FRAGMENT_DIR)/netbsd \
@@ -354,6 +360,7 @@ $(MAGIC_FRAGMENT_DIR)/osf1 \
 $(MAGIC_FRAGMENT_DIR)/palm \
 $(MAGIC_FRAGMENT_DIR)/parix \
 $(MAGIC_FRAGMENT_DIR)/parrot \
+$(MAGIC_FRAGMENT_DIR)/pascal \
 $(MAGIC_FRAGMENT_DIR)/pbm \
 $(MAGIC_FRAGMENT_DIR)/pdf \
 $(MAGIC_FRAGMENT_DIR)/pdp \

+ 16 - 6
python/magic.py

@@ -109,13 +109,17 @@ class Magic(object):
         """
         _close(self._magic_t)
 
-    def file(self, file):
+    def file(self, filename):
         """
         Returns a textual description of the contents of the argument passed
         as a filename or None if an error occurred and the MAGIC_ERROR flag
         is set.  A call to errno() will return the numeric error code.
         """
-        return _file(self._magic_t, file)
+        try: # attempt python3 approach first
+            bi = bytes(filename, 'utf-8')
+            return str(_file(self._magic_t, bi), 'utf-8')
+        except:
+            return _file(self._magic_t, filename)
 
     def descriptor(self, fd):
         """
@@ -129,14 +133,20 @@ class Magic(object):
         as a buffer or None if an error occurred and the MAGIC_ERROR flag
         is set. A call to errno() will return the numeric error code.
         """
-        return _buffer(self._magic_t, buf, len(buf))
+        try: # attempt python3 approach first
+            return str(_buffer(self._magic_t, buf, len(buf)), 'utf-8')
+        except:
+            return _buffer(self._magic_t, buf, len(buf))
 
     def error(self):
         """
         Returns a textual explanation of the last error or None
         if there was no error.
         """
-        return _error(self._magic_t)
+        try: # attempt python3 approach first
+            return str(_error(self._magic_t), 'utf-8')
+        except:
+            return _error(self._magic_t)
   
     def setflags(self, flags):
         """
@@ -149,7 +159,7 @@ class Magic(object):
         """
         return _setflags(self._magic_t, flags)
 
-    def load(self, file=None):
+    def load(self, filename=None):
         """
         Must be called to load entries in the colon separated list of database files
         passed as argument or the default database file if no argument before
@@ -157,7 +167,7 @@ class Magic(object):
         
         Returns 0 on success and -1 on failure.
         """
-        return _load(self._magic_t, file)
+        return _load(self._magic_t, filename)
 
     def compile(self, dbs):
         """

+ 1 - 1
src/Makefile.am

@@ -9,7 +9,7 @@ AM_CFLAGS = @WARNINGS@
 
 libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
 	encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
-	funcs.c file.h names.h readelf.h tar.h apptype.c \
+	funcs.c file.h readelf.h tar.h apptype.c \
 	file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
 libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
 if MINGW

+ 1 - 1
src/Makefile.in

@@ -247,7 +247,7 @@ AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"'
 AM_CFLAGS = @WARNINGS@
 libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
 	encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
-	funcs.c file.h names.h readelf.h tar.h apptype.c \
+	funcs.c file.h readelf.h tar.h apptype.c \
 	file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
 
 libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0

+ 7 - 5
src/apprentice.c

@@ -32,7 +32,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.171 2011/09/16 21:04:59 christos Exp $")
+FILE_RCSID("@(#)$File: apprentice.c,v 1.173 2011/12/08 12:38:24 rrt Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -736,8 +736,7 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs,
 			break;
 		}
 	}
-	if (line)
-		free(line);
+	free(line);
 	(void)fclose(f);
 }
 
@@ -790,6 +789,7 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
 				file_oomem(ms,
 				    strlen(fn) + strlen(d->d_name) + 2);
 				errs++;
+				closedir(dir);
 				goto out;
 			}
 			if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
@@ -804,6 +804,7 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
 				    realloc(filearr, mlen))) == NULL) {
 					file_oomem(ms, mlen);
 					free(mfn);
+					closedir(dir);
 					errs++;
 					goto out;
 				}
@@ -2300,7 +2301,7 @@ private int
 apprentice_compile(struct magic_set *ms, struct magic **magicp,
     uint32_t *nmagicp, const char *fn)
 {
-	int fd;
+	int fd = -1;
 	char *dbname;
 	int rv = -1;
 
@@ -2331,7 +2332,8 @@ apprentice_compile(struct magic_set *ms, struct magic **magicp,
 		goto out;
 	}
 
-	(void)close(fd);
+	if (fd != -1)
+		(void)close(fd);
 	rv = 0;
 out:
 	free(dbname);

+ 12 - 82
src/ascmagic.c

@@ -35,7 +35,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.81 2011/03/15 22:16:29 christos Exp $")
+FILE_RCSID("@(#)$File: ascmagic.c,v 1.84 2011/12/08 12:38:24 rrt Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -46,13 +46,11 @@ FILE_RCSID("@(#)$File: ascmagic.c,v 1.81 2011/03/15 22:16:29 christos Exp $")
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
-#include "names.h"
 
 #define MAXLINELEN 300	/* longest sane line length */
 #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
 		  || (x) == 0x85 || (x) == '\f')
 
-private int ascmatch(const unsigned char *, const unichar *, size_t);
 private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t);
 private size_t trim_nuls(const unsigned char *, size_t);
 
@@ -70,7 +68,8 @@ trim_nuls(const unsigned char *buf, size_t nbytes)
 }
 
 protected int
-file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
+file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes,
+	int text)
 {
 	unichar *ubuf = NULL;
 	size_t ulen;
@@ -87,17 +86,13 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 
 	/* If file doesn't look like any sort of text, give up. */
 	if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime,
-	    &type) == 0) {
+	    &type) == 0)
 		rv = 0;
-		goto done;
-	}
-
-	rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code,
-	    type);
+        else
+		rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code,
+						 type, text);
 
- done:
-	if (ubuf)
-		free(ubuf);
+	free(ubuf);
 
 	return rv;
 }
@@ -105,11 +100,10 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 protected int
 file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
     size_t nbytes, unichar *ubuf, size_t ulen, const char *code,
-    const char *type)
+    const char *type, int text)
 {
 	unsigned char *utf8_buf = NULL, *utf8_end;
 	size_t mlen, i;
-	const struct names *p;
 	int rv = -1;
 	int mime = ms->flags & MAGIC_MIME;
 
@@ -124,7 +118,7 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
 	int n_lf = 0;
 	int n_cr = 0;
 	int n_nel = 0;
-	int score, curtype, executable = 0;
+	int executable = 0;
 
 	size_t last_line_end = (size_t)-1;
 	int has_long_lines = 0;
@@ -153,57 +147,10 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
 		    == NULL)
 			goto done;
 		if ((rv = file_softmagic(ms, utf8_buf,
-		    (size_t)(utf8_end - utf8_buf), TEXTTEST)) != 0)
-			goto subtype_identified;
-		else
+		    (size_t)(utf8_end - utf8_buf), TEXTTEST, text)) == 0)
 			rv = -1;
 	}
 
-	/* look for tokens from names.h - this is expensive! */
-	if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)
-		goto subtype_identified;
-
-	i = 0;
-	score = 0;
-	curtype = -1;
-	while (i < ulen) {
-		size_t end;
-
-		/* skip past any leading space */
-		while (i < ulen && ISSPC(ubuf[i]))
-			i++;
-		if (i >= ulen)
-			break;
-
-		/* find the next whitespace */
-		for (end = i + 1; end < nbytes; end++)
-			if (ISSPC(ubuf[end]))
-				break;
-
-		/* compare the word thus isolated against the token list */
-		for (p = names; p < names + NNAMES; p++) {
-			if (ascmatch((const unsigned char *)p->name, ubuf + i,
-			    end - i)) {
-				if (curtype == -1)
-					curtype = p->type;
-				else if (curtype != p->type) {
-					score = p->score;
-					curtype = p->type;
-				} else
-					score += p->score;
-				if (score > 1) {
-					subtype = types[p->type].human;
-					subtype_mime = types[p->type].mime;
-					goto subtype_identified;
-				}
-			}
-		}
-
-		i = end;
-	}
-
-subtype_identified:
-
 	/* Now try to discover other details about the file. */
 	for (i = 0; i < ulen; i++) {
 		if (ubuf[i] == '\n') {
@@ -349,28 +296,11 @@ subtype_identified:
 	}
 	rv = 1;
 done:
-	if (utf8_buf)
-		free(utf8_buf);
+	free(utf8_buf);
 
 	return rv;
 }
 
-private int
-ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
-{
-	size_t i;
-
-	for (i = 0; i < ulen; i++) {
-		if (s[i] != us[i])
-			return 0;
-	}
-
-	if (s[i])
-		return 0;
-	else
-		return 1;
-}
-
 /*
  * Encode Unicode string as UTF-8, returning pointer to character
  * after end of string, or NULL if an invalid character is found.

+ 2 - 2
src/cdf_time.c

@@ -27,7 +27,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: cdf_time.c,v 1.10 2011/02/10 17:03:16 christos Exp $")
+FILE_RCSID("@(#)$File: cdf_time.c,v 1.11 2011/12/13 13:48:41 christos Exp $")
 #endif
 
 #include <time.h>
@@ -121,7 +121,7 @@ cdf_timestamp_to_timespec(struct timespec *ts, cdf_timestamp_t t)
 	tm.tm_year = (int)(CDF_BASE_YEAR + (t / 365));
 
 	rdays = cdf_getdays(tm.tm_year);
-	t -= rdays;
+	t -= rdays - 1;
 	tm.tm_mday = cdf_getday(tm.tm_year, (int)t);
 	tm.tm_mon = cdf_getmonth(tm.tm_year, (int)t);
 	tm.tm_wday = 0;

+ 2 - 3
src/compress.c

@@ -35,7 +35,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: compress.c,v 1.67 2011/09/01 12:12:37 christos Exp $")
+FILE_RCSID("@(#)$File: compress.c,v 1.68 2011/12/08 12:38:24 rrt Exp $")
 #endif
 
 #include "magic.h"
@@ -134,8 +134,7 @@ file_zmagic(struct magic_set *ms, int fd, const char *name,
 		}
 	}
 error:
-	if (newbuf)
-		free(newbuf);
+	free(newbuf);
 	ms->flags |= MAGIC_COMPRESS;
 	return rv;
 }

+ 2 - 3
src/encoding.c

@@ -35,7 +35,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.5 2010/07/21 16:47:17 christos Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.6 2011/12/08 12:38:24 rrt Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -133,8 +133,7 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
 	}
 
  done:
-	if (nbuf)
-		free(nbuf);
+	free(nbuf);
 
 	return rv;
 }

+ 2 - 2
src/file.c

@@ -32,7 +32,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: file.c,v 1.144 2011/05/10 17:08:14 christos Exp $")
+FILE_RCSID("@(#)$File: file.c,v 1.145 2011/12/08 12:12:46 rrt Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -116,7 +116,7 @@ private const struct {
 	{ "soft",	MAGIC_NO_CHECK_SOFT },
 	{ "tar",	MAGIC_NO_CHECK_TAR },
 	{ "text",	MAGIC_NO_CHECK_TEXT },	/* synonym for ascii */
-	{ "tokens",	MAGIC_NO_CHECK_TOKENS },
+	{ "tokens",	MAGIC_NO_CHECK_TOKENS }, /* OBSOLETE: ignored for backwards compatibility */
 };
 
 private char *progname;		/* used throughout 		*/

+ 5 - 4
src/file.h

@@ -27,7 +27,7 @@
  */
 /*
  * file.h - definitions for file(1) program
- * @(#)$File: file.h,v 1.134 2011/09/16 21:23:59 christos Exp $
+ * @(#)$File: file.h,v 1.135 2011/09/20 15:30:14 christos Exp $
  */
 
 #ifndef __file_h__
@@ -405,15 +405,16 @@ protected int file_trycdf(struct magic_set *, int, const unsigned char *,
 protected int file_zmagic(struct magic_set *, int, const char *,
     const unsigned char *, size_t);
 #endif
-protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
+protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t,
+    int);
 protected int file_ascmagic_with_encoding(struct magic_set *,
     const unsigned char *, size_t, unichar *, size_t, const char *,
-    const char *);
+    const char *, int);
 protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
     unichar **, size_t *, const char **, const char **, const char **);
 protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
 protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
-    int);
+    int, int);
 protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
 protected uint64_t file_signextend(struct magic_set *, struct magic *,
     uint64_t);

+ 8 - 7
src/funcs.c

@@ -27,7 +27,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: funcs.c,v 1.57 2011/05/11 01:02:41 christos Exp $")
+FILE_RCSID("@(#)$File: funcs.c,v 1.60 2011/12/08 12:38:24 rrt Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -228,7 +228,8 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((unu
 
 	/* try soft magic tests */
 	if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0)
-		if ((m = file_softmagic(ms, ubuf, nb, BINTEST)) != 0) {
+		if ((m = file_softmagic(ms, ubuf, nb, BINTEST,
+		    looks_text)) != 0) {
 			if ((ms->flags & MAGIC_DEBUG) != 0)
 				(void)fprintf(stderr, "softmagic %d\n", m);
 #ifdef BUILTIN_ELF
@@ -252,10 +253,10 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((unu
 			goto done;
 		}
 
-	/* try text properties (and possibly text tokens) */
+	/* try text properties */
 	if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) {
 
-		if ((m = file_ascmagic(ms, ubuf, nb)) != 0) {
+		if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) {
 			if ((ms->flags & MAGIC_DEBUG) != 0)
 				(void)fprintf(stderr, "ascmagic %d\n", m);
 			goto done;
@@ -265,7 +266,8 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((unu
 		if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
 			if (looks_text == 0)
 				if ((m = file_ascmagic_with_encoding( ms, ubuf,
-				    nb, u8buf, ulen, code, type)) != 0) {
+				    nb, u8buf, ulen, code, type, looks_text))
+				    != 0) {
 					if ((ms->flags & MAGIC_DEBUG) != 0)
 						(void)fprintf(stderr,
 						    "ascmagic/enc %d\n", m);
@@ -288,8 +290,7 @@ file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((unu
 		if (file_printf(ms, "%s", code_mime) == -1)
 			rv = -1;
 	}
-	if (u8buf)
-		free(u8buf);
+	free(u8buf);
 	if (rv)
 		return rv;
 

+ 11 - 1
src/magic.h

@@ -55,7 +55,17 @@
 #define MAGIC_NO_CHECK_ENCODING 0x200000 /* Don't check text encodings */
 
 /* No built-in tests; only consult the magic file */
-#define MAGIC_NO_CHECK_BUILTIN	0x3fb000
+#define MAGIC_NO_CHECK_BUILTIN	( \
+	MAGIC_NO_CHECK_COMPRESS	| \
+	MAGIC_NO_CHECK_TAR	| \
+/*	MAGIC_NO_CHECK_SOFT	| */ \
+	MAGIC_NO_CHECK_APPTYPE	| \
+	MAGIC_NO_CHECK_ELF	| \
+	MAGIC_NO_CHECK_TEXT	| \
+	MAGIC_NO_CHECK_CDF	| \
+	MAGIC_NO_CHECK_TOKENS	| \
+	MAGIC_NO_CHECK_ENCODING	| \
+)
 
 /* Defined for backwards compatibility (renamed) */
 #define	MAGIC_NO_CHECK_ASCII	MAGIC_NO_CHECK_TEXT

+ 0 - 176
src/names.h

@@ -1,176 +0,0 @@
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice immediately at the beginning of the file, without modification,
- *    this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *  
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Names.h - names and types used by ascmagic in file(1).
- * These tokens are here because they can appear anywhere in
- * the first HOWMANY bytes, while tokens in MAGIC must
- * appear at fixed offsets into the file. Don't make HOWMANY
- * too high unless you have a very fast CPU.
- *
- * $File: names.h,v 1.33 2010/10/08 21:58:44 christos Exp $
- */
-
-/*
-	modified by Chris Lowth - 9 April 2000
-	to add mime type strings to the types table.
-*/
-
-/* these types are used to index the table 'types': keep em in sync! */
-#define	L_C	0		/* first and foremost on UNIX */
-#define	L_CC	1		/* Bjarne's postincrement */
-#define	L_MAKE	2		/* Makefiles */
-#define	L_PLI	3		/* PL/1 */
-#define	L_MACH	4		/* some kinda assembler */
-#define	L_ENG	5		/* English */
-#define	L_PAS	6		/* Pascal */
-#define	L_MAIL	7		/* Electronic mail */
-#define	L_NEWS	8		/* Usenet Netnews */
-#define	L_JAVA	9		/* Java code */
-#define	L_HTML	10		/* HTML */
-#define	L_BCPL	11		/* BCPL */
-#define	L_M4	12		/* M4 */
-#define	L_PO	13		/* PO */
-
-static const struct {
-	char human[48];
-	char mime[16];
-} types[] = {
-	{ "C program",					"text/x-c", },
-	{ "C++ program",				"text/x-c++" },
-	{ "make commands",				"text/x-makefile" },
-	{ "PL/1 program",				"text/x-pl1" },
-	{ "assembler program",				"text/x-asm" },
-	{ "English",					"text/plain" },
-	{ "Pascal program",				"text/x-pascal" },
-	{ "mail",					"text/x-mail" },
-	{ "news",					"text/x-news" },
-	{ "Java program",				"text/x-java" },
-	{ "HTML document",				"text/html", },
-	{ "BCPL program",				"text/x-bcpl" },
-	{ "M4 macro language pre-processor",		"text/x-m4" },
-	{ "PO (gettext message catalogue)",             "text/x-po" },
-	{ "cannot happen error on names.h/types",	"error/x-error" }
-};
-
-/*
- * XXX - how should we distinguish Java from C++?
- * The trick used in a Debian snapshot, of having "extends" or "implements"
- * as tags for Java, doesn't work very well, given that those keywords
- * are often preceded by "class", which flags it as C++.
- *
- * Perhaps we need to be able to say
- *
- *	If "class" then
- *
- *		if "extends" or "implements" then
- *			Java
- *		else
- *			C++
- *	endif
- *
- * Or should we use other keywords, such as "package" or "import"?
- * Unfortunately, Ada95 uses "package", and Modula-3 uses "import",
- * although I infer from the language spec at
- *
- *	http://www.research.digital.com/SRC/m3defn/html/m3.html
- *
- * that Modula-3 uses "IMPORT" rather than "import", i.e. it must be
- * in all caps.
- *
- * So, for now, we go with "import".  We must put it before the C++
- * stuff, so that we don't misidentify Java as C++.  Not using "package"
- * means we won't identify stuff that defines a package but imports
- * nothing; hopefully, very little Java code imports nothing (one of the
- * reasons for doing OO programming is to import as much as possible
- * and write only what you need to, right?).
- *
- * Unfortunately, "import" may cause us to misidentify English text
- * as Java, as it comes after "the" and "The".  Perhaps we need a fancier
- * heuristic to identify Java?
- */
-static const struct names {
-	char name[14];
-	unsigned char type;
-	unsigned char score;
-
-} names[] = {
-	/* These must be sorted by eye for optimal hit rate */
-	/* Add to this list only after substantial meditation */
-	{"msgid",	L_PO, 1 },
-	{"dnl",		L_M4, 2 },
-	{"import",	L_JAVA, 2 },
-	{"\"libhdr\"",	L_BCPL, 2 },
-	{"\"LIBHDR\"",	L_BCPL, 2 },
-	{"//",		L_CC, 2 },
-	{"template",	L_CC, 1 },
-	{"virtual",	L_CC, 1 },
-	{"class",	L_CC, 2 },
-	{"public:",	L_CC, 2 },
-	{"private:",	L_CC, 2 },
-	{"/*",		L_C, 2 },	/* must precede "The", "the", etc. */
-	{"#include",	L_C, 2 },
-	{"char",	L_C, 2 },
-	{"The",		L_ENG, 2 },
-	{"the",		L_ENG, 2 },
-	{"double",	L_C, 1 },
-	{"extern",	L_C, 2 },
-	{"float",	L_C, 1 },
-	{"struct",	L_C, 1 },
-	{"union",	L_C, 1 },
-	{"main(",	L_C, 2 },
-	{"CFLAGS",	L_MAKE, 2 },
-	{"LDFLAGS",	L_MAKE, 2 },
-	{"all:",	L_MAKE, 2 },
-	{".PRECIOUS",	L_MAKE, 2 },
-	{".ascii",	L_MACH, 2 },
-	{".asciiz",	L_MACH, 2 },
-	{".byte",	L_MACH, 2 },
-	{".even",	L_MACH, 2 },
-	{".globl",	L_MACH, 2 },
-	{".text",	L_MACH, 2 },
-	{"clr",		L_MACH, 2 },
-	{"(input,",	L_PAS, 2 },
-	{"program",	L_PAS, 1 },
-	{"record",	L_PAS, 1 },
-	{"dcl",		L_PLI, 2 },
-	{"Received:",	L_MAIL, 2 },
-	{">From",	L_MAIL, 2 },
-	{"Return-Path:",L_MAIL, 2 },
-	{"Cc:",		L_MAIL, 2 },
-	{"Newsgroups:",	L_NEWS, 2 },
-	{"Path:",	L_NEWS, 2 },
-	{"Organization:",L_NEWS, 2 },
-	{"href=",	L_HTML, 2 },
-	{"HREF=",	L_HTML, 2 },
-	{"<body",	L_HTML, 2 },
-	{"<BODY",	L_HTML, 2 },
-	{"<html",	L_HTML, 2 },
-	{"<HTML",	L_HTML, 2 },
-	{"<!--",	L_HTML, 2 },
-};
-#define NNAMES (sizeof(names)/sizeof(struct names))

+ 21 - 3
src/print.c

@@ -32,7 +32,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: print.c,v 1.70 2011/08/14 09:03:12 christos Exp $")
+FILE_RCSID("@(#)$File: print.c,v 1.71 2011/09/20 15:28:09 christos Exp $")
 #endif  /* lint */
 
 #include <string.h>
@@ -51,8 +51,8 @@ file_mdump(struct magic *m)
 {
 	private const char optyp[] = { FILE_OPS };
 
-	(void) fprintf(stderr, "%.*s %u", (m->cont_level & 7) + 1, ">>>>>>>>",
-		       m->offset);
+	(void) fprintf(stderr, "%u: %.*s %u", m->lineno,
+	    (m->cont_level & 7) + 1, ">>>>>>>>", m->offset);
 
 	if (m->flag & INDIR) {
 		(void) fprintf(stderr, "(%s,",
@@ -87,6 +87,24 @@ file_mdump(struct magic *m)
 				(void) fputc(CHAR_IGNORE_UPPERCASE, stderr);
 			if (m->str_flags & REGEX_OFFSET_START) 
 				(void) fputc(CHAR_REGEX_OFFSET_START, stderr);
+			if (m->str_flags & STRING_TEXTTEST)
+				(void) fputc(CHAR_TEXTTEST, stderr);
+			if (m->str_flags & STRING_BINTEST)
+				(void) fputc(CHAR_BINTEST, stderr);
+			if (m->str_flags & PSTRING_1_BE)
+				(void) fputc(CHAR_PSTRING_1_BE, stderr);
+			if (m->str_flags & PSTRING_2_BE)
+				(void) fputc(CHAR_PSTRING_2_BE, stderr);
+			if (m->str_flags & PSTRING_2_LE)
+				(void) fputc(CHAR_PSTRING_2_LE, stderr);
+			if (m->str_flags & PSTRING_4_BE)
+				(void) fputc(CHAR_PSTRING_4_BE, stderr);
+			if (m->str_flags & PSTRING_4_LE)
+				(void) fputc(CHAR_PSTRING_4_LE, stderr);
+			if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
+				(void) fputc(
+				    CHAR_PSTRING_LENGTH_INCLUDES_ITSELF,
+				    stderr);
 		}
 		if (m->str_range)
 			(void) fprintf(stderr, "/%u", m->str_range);

+ 5 - 2
src/readcdf.c

@@ -26,7 +26,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$File: readcdf.c,v 1.26 2011/08/26 13:38:28 christos Exp $")
+FILE_RCSID("@(#)$File: readcdf.c,v 1.27 2011/09/28 13:30:10 christos Exp $")
 #endif
 
 #include <stdlib.h>
@@ -144,6 +144,8 @@ cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
         if (!NOTMIME(ms)) {
 		if (str == NULL)
 			return 0;
+                if (file_printf(ms, "application/%s", str) == -1)
+                        return -1;
         }
         return 1;
 }
@@ -292,7 +294,8 @@ out1:
         free(sat.sat_tab);
 out0:
         if (i != 1) {
-                if (file_printf(ms, "Composite Document File V2 Document") == -1)
+                if (file_printf(ms, "Composite Document File V2 Document")
+		    == -1)
                         return -1;
                 if (*expn)
                         if (file_printf(ms, ", %s%s", corrupt, expn) == -1)

+ 16 - 11
src/softmagic.c

@@ -32,7 +32,7 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: softmagic.c,v 1.145 2011/05/13 22:15:40 christos Exp $")
+FILE_RCSID("@(#)$File: softmagic.c,v 1.147 2011/11/05 15:44:22 rrt Exp $")
 #endif	/* lint */
 
 #include "magic.h"
@@ -43,9 +43,9 @@ FILE_RCSID("@(#)$File: softmagic.c,v 1.145 2011/05/13 22:15:40 christos Exp $")
 
 
 private int match(struct magic_set *, struct magic *, uint32_t,
-    const unsigned char *, size_t, int);
+    const unsigned char *, size_t, int, int);
 private int mget(struct magic_set *, const unsigned char *,
-    struct magic *, size_t, unsigned int);
+    struct magic *, size_t, unsigned int, int);
 private int magiccheck(struct magic_set *, struct magic *);
 private int32_t mprint(struct magic_set *, struct magic *);
 private int32_t moffset(struct magic_set *, struct magic *);
@@ -66,12 +66,14 @@ private void cvt_64(union VALUETYPE *, const struct magic *);
  */
 /*ARGSUSED1*/		/* nbytes passed for regularity, maybe need later */
 protected int
-file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes, int mode)
+file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes,
+    int mode, int text)
 {
 	struct mlist *ml;
 	int rv;
 	for (ml = ms->mlist->next; ml != ms->mlist; ml = ml->next)
-		if ((rv = match(ms, ml->magic, ml->nmagic, buf, nbytes, mode)) != 0)
+		if ((rv = match(ms, ml->magic, ml->nmagic, buf, nbytes, mode,
+		    text)) != 0)
 			return rv;
 
 	return 0;
@@ -106,7 +108,7 @@ file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes, in
  */
 private int
 match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
-    const unsigned char *s, size_t nbytes, int mode)
+    const unsigned char *s, size_t nbytes, int mode, int text)
 {
 	uint32_t magindex = 0;
 	unsigned int cont_level = 0;
@@ -123,7 +125,10 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 		int flush = 0;
 		struct magic *m = &magic[magindex];
 
-		if ((m->flag & mode) != mode) {
+		if ((IS_STRING(m->type) &&
+		     ((text && (m->str_flags & (STRING_BINTEST | STRING_TEXTTEST)) == STRING_BINTEST) ||
+		      (!text && (m->str_flags & (STRING_TEXTTEST | STRING_BINTEST)) == STRING_TEXTTEST))) ||
+		    (m->flag & mode) != mode) {
 			/* Skip sub-tests */
 			while (magic[magindex + 1].cont_level != 0 &&
 			       ++magindex < nmagic)
@@ -135,7 +140,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 		ms->line = m->lineno;
 
 		/* if main entry matches, print it... */
-		switch (mget(ms, s, m, nbytes, cont_level)) {
+		switch (mget(ms, s, m, nbytes, cont_level, text)) {
 		case -1:
 			return -1;
 		case 0:
@@ -218,7 +223,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
 					continue;
 			}
 #endif
-			switch (mget(ms, s, m, nbytes, cont_level)) {
+			switch (mget(ms, s, m, nbytes, cont_level, text)) {
 			case -1:
 				return -1;
 			case 0:
@@ -1013,7 +1018,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
 
 private int
 mget(struct magic_set *ms, const unsigned char *s,
-    struct magic *m, size_t nbytes, unsigned int cont_level)
+    struct magic *m, size_t nbytes, unsigned int cont_level, int text)
 {
 	uint32_t offset = ms->offset;
 	uint32_t count = m->str_range;
@@ -1578,7 +1583,7 @@ mget(struct magic_set *ms, const unsigned char *s,
 		if (nbytes < offset)
 			return 0;
 		return file_softmagic(ms, s + offset, nbytes - offset,
-		    BINTEST);
+		    BINTEST, text);
 
 	case FILE_DEFAULT:	/* nothing to check */
 	default:

+ 2 - 3
src/vasprintf.c

@@ -108,7 +108,7 @@ you use strange formats.
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$File: vasprintf.c,v 1.7 2009/02/03 20:27:52 christos Exp $")
+FILE_RCSID("@(#)$File: vasprintf.c,v 1.8 2011/12/08 12:38:24 rrt Exp $")
 #endif	/* lint */
 
 #include <assert.h>
@@ -608,8 +608,7 @@ static int core(xprintf_struct *s)
   return s->pseudo_len;
 
  free_EOF:
-  if (s->buffer_base != NULL)
-    free(s->buffer_base);
+  free(s->buffer_base);
   return EOF;
 }