Browse Source

Import upstream version 3.37

Christos Zoulas 23 years ago
parent
commit
8687aedc87
93 changed files with 6012 additions and 1145 deletions
  1. 1 1
      Header
  2. 3 3
      MAINT
  3. 12 0
      Magdir/adi
  4. 7 0
      Magdir/allegro
  5. 9 0
      Magdir/alpha
  6. 1 1
      Magdir/amigaos
  7. 36 0
      Magdir/animation
  8. 111 0
      Magdir/apple
  9. 22 0
      Magdir/archive
  10. 26 2
      Magdir/audio
  11. 17 0
      Magdir/blender
  12. 6 0
      Magdir/citrus
  13. 46 0
      Magdir/claris
  14. 40 77
      Magdir/commands
  15. 2 1
      Magdir/compress
  16. 122 0
      Magdir/console
  17. 5 0
      Magdir/ctags
  18. 56 77
      Magdir/database
  19. 12 0
      Magdir/dyadic
  20. 9 0
      Magdir/editors
  21. 50 6
      Magdir/elf
  22. 10 0
      Magdir/epoc
  23. 49 9
      Magdir/filesystems
  24. 3 0
      Magdir/fonts
  25. 15 2
      Magdir/freebsd
  26. 27 0
      Magdir/fsav
  27. 3 0
      Magdir/gnu
  28. 18 0
      Magdir/hitachi-sh
  29. 140 0
      Magdir/hp
  30. 97 7
      Magdir/images
  31. 8 0
      Magdir/ispell
  32. 3 3
      Magdir/jpeg
  33. 2 2
      Magdir/lif
  34. 0 3
      Magdir/linux
  35. 6 2
      Magdir/lisp
  36. 288 32
      Magdir/macintosh
  37. 4 0
      Magdir/magic
  38. 7 0
      Magdir/mail.news
  39. 57 0
      Magdir/maple
  40. 59 0
      Magdir/mathematica
  41. 21 18
      Magdir/sgi
  42. 18 0
      Magdir/motorola
  43. 127 4
      Magdir/msdos
  44. 44 0
      Magdir/msvc
  45. 23 0
      Magdir/natinst
  46. 20 4
      Magdir/netbsd
  47. 21 0
      Magdir/netscape
  48. 0 2
      Magdir/os9
  49. 12 0
      Magdir/parix
  50. 2 0
      Magdir/printer
  51. 9 0
      Magdir/project
  52. 5 0
      Magdir/python
  53. 11 0
      Magdir/riff
  54. 10 18
      Magdir/sgml
  55. 22 0
      Magdir/sharc
  56. 5 0
      Magdir/sketch
  57. 24 0
      Magdir/smalltalk
  58. 124 4
      Magdir/sniffer
  59. 0 1
      Magdir/softquad
  60. 142 0
      Magdir/sysex
  61. 2 1
      Magdir/troff
  62. 7 0
      Magdir/tuxedo
  63. 3 0
      Magdir/varied.out
  64. 12 0
      Magdir/vmware
  65. 65 0
      Magdir/vorbis
  66. 10 0
      Magdir/xdelta
  67. 44 29
      Makefile.am
  68. 60 20
      Makefile.in
  69. 3 2
      Makefile.std
  70. 9 2
      README
  71. 8 2
      acconfig.h
  72. 45 0
      acinclude.m4
  73. 64 15
      aclocal.m4
  74. 591 82
      apprentice.c
  75. 606 43
      ascmagic.c
  76. 100 40
      compress.c
  77. 31 5
      config.h.in
  78. 377 92
      configure
  79. 22 2
      configure.in
  80. 149 102
      file.c
  81. 62 22
      file.h
  82. 121 88
      file.man
  83. 16 12
      fsmagic.c
  84. 0 86
      internat.c
  85. 12 12
      is_tar.c
  86. 18 4
      magic.man
  87. 543 0
      magic.mime
  88. 39 0
      mkinstalldirs
  89. 29 17
      names.h
  90. 64 2
      patchlevel.h
  91. 104 46
      print.c
  92. 125 69
      readelf.c
  93. 642 71
      softmagic.c

+ 1 - 1
Header

@@ -1,4 +1,4 @@
-#! file
+# Magic
 # Magic data for file(1) command.
 # Machine-generated from src/cmd/file/magdir/*; edit there only!
 # Format is described in magic(files), where:

+ 3 - 3
MAINT

@@ -1,4 +1,4 @@
-$Id: MAINT,v 1.3 1997/11/05 16:03:18 christos Exp $
+$Id: MAINT,v 1.4 2000/04/11 02:43:51 christos Exp $
 
 Maintenance notes:
 
@@ -29,5 +29,5 @@ your changed version.
 
 Thank you for your assistance and cooperation.
 
-Mark Moraes		Christos Zoulas
-moraes@deshaw.com	christos@astron.com
+Christos Zoulas
+christos@astron.com

+ 12 - 0
Magdir/adi

@@ -0,0 +1,12 @@
+
+#------------------------------------------------------------------------------
+# adi: file(1) magic for ADi's objects
+# From Gregory McGarry <g.mcgarry@ieee.org>
+#
+0	leshort		0x521c		COFF DSP21k
+>18	lelong		&02		executable,
+>18	lelong		^02
+>>18	lelong		&01		static object,
+>>18	lelong		^01		relocatable object,
+>18	lelong		&010		stripped
+>18	lelong		^010		not stripped

+ 7 - 0
Magdir/allegro

@@ -0,0 +1,7 @@
+#------------------------------------------------------------------------------
+# allegro:  file(1) magic for Allegro datafiles
+# Toby Deshane <hac@shoelace.digivill.net>
+#
+0 belong 0x736C6821   Allegro datafile (packed)
+0 belong 0x736C682E   Allegro datafile (not packed/autodetect)
+0 belong 0x736C682B   Allegro datafile (appended exe data)

+ 9 - 0
Magdir/alpha

@@ -17,5 +17,14 @@
 
 # Basic recognition of Digital UNIX core dumps - Mike Bremford <mike@opac.bl.uk>
 #
+# The actual magic number is just "Core", followed by a 2-byte version
+# number; however, treating any file that begins with "Core" as a Digital
+# UNIX core dump file may produce too many false hits, so we include one
+# byte of the version number as well; DU 5.0 appears only to be up to
+# version 2.
+#
 0	string		Core\001	Alpha COFF format core dump (Digital UNIX)
 >24	string		>\0		\b, from '%s'
+0	string		Core\002	Alpha COFF format core dump (Digital UNIX)
+>24	string		>\0		\b, from '%s'
+

+ 1 - 1
Magdir/amigaos

@@ -4,7 +4,7 @@
 #
 # From ignatios@cs.uni-bonn.de (Ignatios Souvatzis)
 # Some formats are still missing: AmigaOS special IFF's, e.g.: FORM....CTLG
-# (the others should be seperate, anyway)
+# (the others should be separate, anyway)
 #
 0	belong		0x000003f3	AmigaOS loadseg()ble executable/binary
 0	belong		0x000003e7	AmigaOS object/library data

+ 36 - 0
Magdir/animation

@@ -11,8 +11,16 @@
 #>4	beshort&0xfff0	x			(%d x
 #>5	beshort&0x0fff  x			%d)
 0	belong		0x000001ba		MPEG system stream data
+
 # MPEG Audio (*.mpx)
 # from dreesen@math.fu-berlin.de
+
+# XXX
+# This conflicts with the FF FE signature for UTF-16-encoded Unicode
+# text, which will be identified as an MP3 file.  I don't have any MP3s
+# so I don't know how to (or even if it's possible to) change this to
+# tell the two apart.    enf@pobox.com
+
 0       beshort         &0xfff0         MP
 # MPEG 1.0
 >1      byte&0x08       =0x08           \b
@@ -132,3 +140,31 @@
 0	string		MOVI		Silicon Graphics movie file
 4	string		moov		Apple QuickTime movie file (moov)
 4	string		mdat		Apple QuickTime movie file (mdat)
+
+# iso 13818 transport stream
+#
+# from Oskar Schirmer <schirmer@scara.com> Feb 3, 2001 (ISO 13818.1)
+# (the following is a little bit restrictive and works fine for a stream
+#  that starts with PAT properly. it won't work for stream data, that is
+#  cut from an input device data right in the middle, but this shouldn't
+#  disturb)
+# syncbyte      8 bit	0x47
+# error_ind     1 bit	-
+# payload_start 1 bit	1
+# priority      1 bit	-
+# PID          13 bit	0x0000
+# scrambling    2 bit	-
+# adaptfld_ctrl 2 bit	1 or 3
+# conti_count   4 bit	0
+0	belong&0xFF5FFF1F	0x47400010	MPEG transport stream data
+>188	byte			!0x47		CORRUPTED
+
+# DIF digital video file format <mpruett@sgi.com>
+0	belong&0xffffff00	0x1f070000      DIF
+>4	byte			&0x01		(DVCPRO) movie file
+>4	byte			^0x01		(DV) movie file
+>3	byte			&0x80		(PAL)
+>3	byte			^0x80		(NTSC)
+
+# Microsoft Advanced Streaming Format (ASF) <mpruett@sgi.com>
+0	belong			0x3026b275	Microsoft ASF

+ 111 - 0
Magdir/apple

@@ -4,8 +4,119 @@
 #
 0	string		FiLeStArTfIlEsTaRt	binscii (apple ][) text
 0	string		\x0aGL			Binary II (apple ][) data
+0	string		\x76\xff		Squeezed (apple ][) data
 0	string		NuFile			NuFile archive (apple ][) data
 0	string		N\xf5F\xe9l\xe5		NuFile archive (apple ][) data
 0	belong		0x00051600		AppleSingle encoded Macintosh file
 0	belong		0x00051607		AppleDouble encoded Macintosh file
 
+# magic for Newton PDA package formats
+# from Ruda Moura <ruda@helllabs.org>
+0	string	package0	Newton package, NOS 1.x,
+>12	belong	&0x80000000	AutoRemove,
+>12	belong	&0x40000000	CopyProtect,
+>12	belong	&0x10000000	NoCompression,
+>12	belong	&0x04000000	Relocation,
+>12	belong	&0x02000000	UseFasterCompression,
+>16	belong	x		version %d
+
+0	string	package1	Newton package, NOS 2.x,
+>12	belong	&0x80000000	AutoRemove,
+>12	belong	&0x40000000	CopyProtect,
+>12	belong	&0x10000000	NoCompression,
+>12	belong	&0x04000000	Relocation,
+>12	belong	&0x02000000	UseFasterCompression,
+>16	belong	x		version %d
+
+# The following entries for the Apple II are for files that have
+# been transferred as raw binary data from an Apple, without having
+# been encapsulated by any of the above archivers.
+#
+# In general, Apple II formats are hard to identify because Apple DOS
+# and especially Apple ProDOS have strong typing in the file system and
+# therefore programmers never felt much need to include type information
+# in the files themselves.
+#
+# Eric Fischer <enf@pobox.com>
+
+# AppleWorks word processor:
+#
+# This matches the standard tab stops for an AppleWorks file, but if
+# a file has a tab stop set in the first four columns this will fail.
+#
+# The "O" is really the magic number, but that's so common that it's
+# necessary to check the tab stops that follow it to avoid false positives.
+
+4       string          O====   AppleWorks word processor data
+>85     byte&0x01       >0      \b, zoomed
+>90     byte&0x01       >0      \b, paginated
+>92     byte&0x01       >0      \b, with mail merge
+#>91    byte            x       \b, left margin %d
+
+# AppleWorks database:
+#
+# This isn't really a magic number, but it's the closest thing to one
+# that I could find.  The 1 and 2 really mean "order in which you defined
+# categories" and "left to right, top to bottom," respectively; the D and R
+# mean that the cursor should move either down or right when you press Return.
+
+#30	string		\x01D	AppleWorks database data
+#30	string		\x02D	AppleWorks database data
+#30	string		\x01R	AppleWorks database data
+#30	string		\x02R	AppleWorks database data
+
+# AppleWorks spreadsheet:
+#
+# Likewise, this isn't really meant as a magic number.  The R or C means
+# row- or column-order recalculation; the A or M means automatic or manual
+# recalculation.
+
+#131	string		RA	AppleWorks spreadsheet data
+#131	string		RM	AppleWorks spreadsheet data
+#131	string		CA	AppleWorks spreadsheet data
+#131	string		CM	AppleWorks spreadsheet data
+
+# Applesoft BASIC:
+#
+# This is incredibly sloppy, but will be true if the program was
+# written at its usual memory location of 2048 and its first line
+# number is less than 256.  Yuck.
+
+0       belong&0xff00ff 0x80000 Applesoft BASIC program data
+#>2     leshort         x       \b, first line number %d
+
+# ORCA/EZ assembler:
+# 
+# This will not identify ORCA/M source files, since those have
+# some sort of date code instead of the two zero bytes at 6 and 7
+# XXX Conflicts with ELF
+#4       belong&0xff00ffff       0x01000000      ORCA/EZ assembler source data
+#>5      byte                    x               \b, build number %d
+
+# Broderbund Fantavision
+#
+# I don't know what these values really mean, but they seem to recur.
+# Will they cause too many conflicts?
+
+# Probably :-)
+#2	belong&0xFF00FF		0x040008	Fantavision movie data
+
+# Some attempts at images.
+#
+# These are actually just bit-for-bit dumps of the frame buffer, so
+# there's really no reasonably way to distinguish them except for their
+# address (if preserved) -- 8192 or 16384 -- and their length -- 8192
+# or, occasionally, 8184.
+#
+# Nevertheless this will manage to catch a lot of images that happen
+# to have a solid-colored line at the bottom of the screen.
+
+8144	string	\x7F\x7F\x7F\x7F\x7F\x7F\x7F\x7F	Apple II image with white background
+8144	string	\x55\x2A\x55\x2A\x55\x2A\x55\x2A	Apple II image with purple background
+8144	string	\x2A\x55\x2A\x55\x2A\x55\x2A\x55	Apple II image with green background
+8144	string	\xD5\xAA\xD5\xAA\xD5\xAA\xD5\xAA	Apple II image with blue background
+8144	string	\xAA\xD5\xAA\xD5\xAA\xD5\xAA\xD5	Apple II image with orange background
+
+# Beagle Bros. Apple Mechanic fonts
+
+0	belong&0xFF00FFFF	0x6400D000	Apple Mechanic font

+ 22 - 0
Magdir/archive

@@ -182,6 +182,8 @@
 2	string		-lh3-		LHa 2.x? archive data [lh3]
 2	string		-lh4-		LHa (2.x) archive data [lh4]
 2	string		-lh5-		LHa (2.x) archive data [lh5]
+2	string		-lh6-		LHa (2.x) archive data [lh6]
+2	string		-lh7-		LHa (2.x) archive data [lh7]
 >20	byte		x		- header level %d
 
 # RAR archiver (Greg Roelofs, newt@uchicago.edu)
@@ -228,3 +230,23 @@
 2       string          -pm2-           PMarc archive data [pm2]
 2       string          -pms-           PMarc SFX archive (CP/M, DOS)
 5       string          -pc1-           PopCom compressed executable (CP/M)
+
+# From rafael@icp.inpg.fr (Rafael Laboissiere) 
+# The Project Revision Control System (see
+# http://www.XCF.Berkeley.EDU/~jmacd/prcs.html) generates a packaged project
+# file which is recognized by the following entry:
+0	leshort		0xeb81	PRCS packaged project
+
+# Microsoft cabinets 
+# by David Necas (Yeti) <yeti@physics.muni.cz>
+0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
+>25	byte	x		v%d
+>24	byte	x		\b.%d
+
+# GTKtalog catalogs 
+# by David Necas (Yeti) <yeti@physics.muni.cz>
+4	string	gtktalog\ 	GTKtalog catalog data,
+>13	string	3		version 3
+>>14	beshort	0x677a		(gzipped)
+>>14	beshort	!0x677a		(not gzipped)
+>13	string	>3		version %s

+ 26 - 2
Magdir/audio

@@ -102,8 +102,9 @@
 1080	string	8CHN		8-channel Fasttracker module sound data
 1080	string	CD81		8-channel Oktalyzer module sound data
 1080	string	OKTA		8-channel Oktalyzer module sound data
-1082	string	CH
->1080	string	>/0		%.2s-channel Fasttracker "oktalyzer" module sound data
+# Not good enough.
+#1082	string	CH
+#>1080	string	>/0		%.2s-channel Fasttracker "oktalyzer" module sound data
 1080	string	16CN		16-channel Taketracker module sound data
 1080	string	32CN		32-channel Taketracker module sound data
 
@@ -118,3 +119,26 @@
 >14	beshort		=1			single song,
 >14	beshort		>1			%d songs,
 >16	beshort		>0			default song: %d
+
+# IRCAM <mpruett@sgi.com>
+# VAX and MIPS files are little-endian; Sun and NeXT are big-endian
+0	belong		0x64a30100		IRCAM file (VAX)
+0	belong		0x64a30200		IRCAM file (Sun)
+0	belong		0x64a30300		IRCAM file (MIPS little-endian)
+0	belong		0x64a30400		IRCAM file (NeXT)
+
+# NIST SPHERE <mpruett@sgi.com>
+0	string		NIST_1A\n\ \ \ 1024\n	NIST SPHERE file
+
+# Sample Vision <mpruett@sgi.com>
+0	string		SOUND\ SAMPLE\ DATA\ 	Sample Vision file
+
+# Audio Visual Research <mpruett@sgi.com>
+0	string		2BIT			Audio Visual Research file
+
+# From Felix von Leitner <leitner@fefe.de>
+0	string		OggS	Ogg-Vorbis compressed sound file
+
+# SGI SoundTrack <mpruett@sgi.com>
+0	string		_SGI_SoundTrack		SGI SoundTrack project file
+0	string		ID3	mp3 file with ID3 2.0 tag

+ 17 - 0
Magdir/blender

@@ -0,0 +1,17 @@
+#------------------------------------------------------------------------------
+# blender: file(1) magic for Blender 3D data files
+#
+# Coded by Guillermo S. Romero <gsromero@alumnos.euitt.upm.es> using the
+# data from Ton Roosendaal <ton@blender.nl>. Ton or his company do not
+# support the rule, so mail GSR if problems with it. Rule version: 1.1.
+# You can get latest version with comments and details about the format
+# at http://acd.asoc.euitt.upm.es/~gsromero/3d/blender/magic.blender
+
+0	string	=BLENDER	Blender3D,
+>7	string	=_		saved as 32-bits
+>7      string	=-		saved as 64-bits
+>8	string	=v		little endian
+>8	string	=V		big endian
+>9	byte	x		with version %c.
+>10	byte	x		\b%c
+>11	byte	x		\b%c

+ 6 - 0
Magdir/citrus

@@ -0,0 +1,6 @@
+#------------------------------------------------------------------------------
+# citrus locale declaration
+#
+
+0	string		RuneCT		Citrus locale declaration for LC_CTYPE
+

+ 46 - 0
Magdir/claris

@@ -0,0 +1,46 @@
+
+#------------------------------------------------------------------------------
+# claris:  file(1) magic for claris
+# "H. Nanosecond" <aldomel@ix.netcom.com>
+# Claris Works a word processor, etc.
+# Version 3.0
+
+# .pct claris works clip art files
+#0000000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000
+#*
+#0001000 #010 250 377 377 377 377 000 213 000 230 000 021 002 377 014 000
+#null to byte 1000 octal
+514	string	\377\377\377\377\000	Claris clip art?
+>0	string	\0\0\0\0\0\0\0\0\0\0\0\0\0	yes.
+514	string	\377\377\377\377\001	Claris clip art?
+>0	string	\0\0\0\0\0\0\0\0\0\0\0\0\0	yes.
+
+# Claris works files
+# .cwk
+0	string	\002\000\210\003\102\117\102\117\000\001\206 Claris works document
+# .plt
+0	string	\020\341\000\000\010\010	Claris Works pallete files .plt
+
+# .msp a dictionary file I am not sure about this I have only one .msp file
+0	string	\002\271\262\000\040\002\000\164	Claris works dictionary
+
+# .usp are user dictionary bits
+# I am not sure about a magic header:
+#0000000 001 123 160 146 070 125 104 040 136 123 015 012 160 157 144 151
+#        soh   S   p   f   8   U   D  sp   ^   S  cr  nl   p   o   d   i
+#0000020 141 164 162 151 163 164 040 136 123 015 012 144 151 166 040 043
+#          a   t   r   i   s   t  sp   ^   S  cr  nl   d   i   v  sp   #
+
+# .mth Thesaurus
+# statrts with \0 but no magic header
+
+# .chy Hyphenation file
+# I am not sure: 000 210 034 000 000
+
+# other claris files
+#./windows/claris/useng.ndx: data
+#./windows/claris/xtndtran.l32: data
+#./windows/claris/xtndtran.lst: data
+#./windows/claris/clworks.lbl: data
+#./windows/claris/clworks.prf: data
+#./windows/claris/userd.spl: data

+ 40 - 77
Magdir/commands

@@ -2,98 +2,61 @@
 #------------------------------------------------------------------------------
 # commands:  file(1) magic for various shells and interpreters
 #
-0	string		:\ shell archive or commands for antique kernel text
-0	string		#!/bin/sh		Bourne shell script text
-0	string		#!\ /bin/sh		Bourne shell script text
-0	string		#!\	/bin/sh		Bourne shell script text
-0	string		#!/bin/csh		C shell script text
-0	string		#!\ /bin/csh		C shell script text
-0	string		#!\	/bin/csh	C shell script text
+0	string		:			shell archive or script for antique kernel text
+0	string/b	#!\ /bin/sh		Bourne shell script text executable
+0	string/b	#!\ /bin/csh		C shell script text executable
 # korn shell magic, sent by George Wu, gwu@clyde.att.com
-0	string		#!/bin/ksh		Korn shell script text
-0	string		#!\ /bin/ksh		Korn shell script text
-0	string		#!\	/bin/ksh	Korn shell script text
-0	string	 	#!/bin/tcsh		Tenex C shell script text
-0	string	 	#!\ /bin/tcsh		Tenex C shell script text
-0	string	 	#!\	/bin/tcsh	Tenex C shell script text
-0	string		#!/usr/local/tcsh	Tenex C shell script text
-0	string	 	#!\ /usr/local/tcsh	Tenex C shell script text
-0	string		#!/usr/local/bin/tcsh	Tenex C shell script text
-0	string		#!\ /usr/local/bin/tcsh	Tenex C shell script text
-0	string		#!\	/usr/local/bin/tcsh	Tenex C shell script text
+0	string/b	#!\ /bin/ksh		Korn shell script text executable
+0	string/b 	#!\ /bin/tcsh		Tenex C shell script text executable
+0	string/b 	#!\ /usr/local/tcsh	Tenex C shell script text executable
+0	string/b	#!\ /usr/local/bin/tcsh	Tenex C shell script text executable
+
 #
 # zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
-0	string		#!/usr/local/bin/zsh	Paul Falstad's zsh
-0	string		#!\ /usr/local/bin/zsh	Paul Falstad's zsh
-0	string		#!\	/usr/local/bin/zsh	Paul Falstad's zsh
-0	string		#!/usr/local/bin/ash	Neil Brown's ash
-0	string		#!\ /usr/local/bin/ash	Neil Brown's ash
-0	string		#!\	/usr/local/bin/ash	Neil Brown's ash
-0	string		#!/usr/local/bin/ae	Neil Brown's ae
-0	string		#!\ /usr/local/bin/ae	Neil Brown's ae
-0	string		#!\	/usr/local/bin/ae	Neil Brown's ae
-0	string		#!/bin/nawk		new awk script text
-0	string		#!\ /bin/nawk		new awk script text
-0	string		#!\	/bin/nawk		new awk script text
-0	string		#!/usr/bin/nawk		new awk script text
-0	string		#!\ /usr/bin/nawk	new awk script text
-0	string		#!\	/usr/bin/nawk	new awk script text
-0	string		#!/usr/local/bin/nawk	new awk script text
-0	string		#!\ /usr/local/bin/nawk	new awk script text
-0	string		#!\	/usr/local/bin/nawk	new awk script text
-0	string		#!/bin/gawk		GNU awk script text
-0	string		#!\ /bin/gawk		GNU awk script text
-0	string		#!\	/bin/gawk		GNU awk script text
-0	string		#!/usr/bin/gawk		GNU awk script text
-0	string		#!\ /usr/bin/gawk	GNU awk script text
-0	string		#!\	/usr/bin/gawk	GNU awk script text
-0	string		#!/usr/local/bin/gawk	GNU awk script text
-0	string		#!\ /usr/local/bin/gawk	GNU awk script text
-0	string		#!\	/usr/local/bin/gawk	GNU awk script text
+0	string/b	#!\ /usr/local/bin/zsh	Paul Falstad's zsh script text executable
+0	string/b	#!\ /usr/local/bin/ash	Neil Brown's ash script text executable
+0	string/b	#!\ /usr/local/bin/ae	Neil Brown's ae script text executable
+0	string/b	#!\ /bin/nawk		new awk script text executable
+0	string/b	#!\ /usr/bin/nawk	new awk script text executable
+0	string/b	#!\ /usr/local/bin/nawk	new awk script text executable
+0	string/b	#!\ /bin/gawk		GNU awk script text executable
+0	string/b	#!\ /usr/bin/gawk	GNU awk script text executable
+0	string/b	#!\ /usr/local/bin/gawk	GNU awk script text executable
 #
-0	string		#!/bin/awk		awk commands text
-0	string		#!\ /bin/awk		awk commands text
-0	string		#!\	/bin/awk		awk commands text
-0	string		#!/usr/bin/awk		awk commands text
-0	string		#!\ /usr/bin/awk	awk commands text
-0	string		#!\	/usr/bin/awk	awk commands text
-0	string		BEGIN			awk commands text
+0	string/b	#!\ /bin/awk		awk script text executable
+0	string/b	#!\ /usr/bin/awk	awk script text executable
+0	string		BEGIN			awk script text
 
 # For Larry Wall's perl language.  The ``eval'' line recognizes an
 # outrageously clever hack for USG systems.
 #				Keith Waclena <keith@cerberus.uchicago.edu>
-0	string		#!/bin/perl			perl commands text
-0	string		#!\ /bin/perl			perl commands text
-0	string		#!\	/bin/perl		perl commands text
-0	string		eval\ "exec\ /bin/perl		perl commands text
-0	string		#!/usr/bin/perl			perl commands text
-0	string		#!\ /usr/bin/perl		perl commands text
-0	string		#!\	/usr/bin/perl		perl commands text
-0	string		eval\ "exec\ /usr/bin/perl	perl commands text
-0	string		#!/usr/local/bin/perl		perl commands text
-0	string		#!\ /usr/local/bin/perl		perl commands text
-0	string		#!\	/usr/local/bin/perl	perl commands text
-0	string		eval\ "exec\ /usr/local/bin/perl	perl commands text
+0	string/b	#!\ /bin/perl			perl script text executable
+0	string		eval\ "exec\ /bin/perl		perl script text
+0	string/b	#!\ /usr/bin/perl		perl script text executable
+0	string		eval\ "exec\ /usr/bin/perl	perl script text
+0	string/b	#!\ /usr/local/bin/perl		perl script text
+0	string		eval\ "exec\ /usr/local/bin/perl	perl script text executable
 
 # AT&T Bell Labs' Plan 9 shell
-0	string		#!/bin/rc	Plan 9 rc shell script text
-0	string		#!\ /bin/rc	Plan 9 rc shell script text
-0	string		#!\	/bin/rc	Plan 9 rc shell script text
+0	string/b	#!\ /bin/rc	Plan 9 rc shell script text executable
 
 # bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
-0	string		#!/bin/bash	Bourne-Again shell script text
-0	string		#!\ /bin/bash	Bourne-Again shell script text
-0	string		#!\	/bin/bash	Bourne-Again shell script text
-0	string		#!/usr/local/bin/bash	Bourne-Again shell script text
-0	string		#!\ /usr/local/bin/bash	Bourne-Again shell script text
-0	string		#!\	/usr/local/bin/bash	Bourne-Again shell script text
+0	string/b	#!\ /bin/bash	Bourne-Again shell script text executable
+0	string/b	#!\ /usr/local/bin/bash	Bourne-Again shell script text executable
+
+# using env
+0	string		#!/usr/bin/env		a
+>15	string		>\0			%s script text executable
+0	string		#!\ /usr/bin/env	a
+>16	string		>\0			%s script text executable
+
 
 # generic shell magic
 0	string		#!\ /			a
->3	string		>\0			%s script text
+>3	string		>\0			%s script text executable
 0	string		#!\	/		a
->3	string		>\0			%s script text
+>3	string		>\0			%s script text executable
 0	string		#!/			a
->2	string		>\0			%s script text
-0	string		#!\ 			commands text
+>2	string		>\0			%s script text executable
+0	string		#!\ 			script text executable
 >3	string		>\0			for %s

+ 2 - 1
Magdir/compress

@@ -21,6 +21,7 @@
 >3	byte		&0x02		continuation,
 >3	byte		&0x04		extra field,
 >3	byte		&0x08		original filename,
+>>10    string          x               `%s',
 >3	byte		&0x10		comment,
 >3	byte		&0x20		encrypted,
 >4	ledate		x		last modified: %s,
@@ -93,7 +94,7 @@
 # bzip	a block-sorting file compressor
 #	by Julian Seward <sewardj@cs.man.ac.uk> and others
 #
-0	string		BZ		bzip compressed	data
+0	string		BZ		bzip compressed data
 >2	byte		x		\b, version: %c
 >3	string		=1		\b, compression block size 100k
 >3	string		=2		\b, compression block size 200k

+ 122 - 0
Magdir/console

@@ -0,0 +1,122 @@
+#------------------------------------------------------------------------------
+# Console game magic
+# Toby Deshane <hac@shoelace.digivill.net>
+#    ines:  file(1) magic for Marat's iNES Nintendo Entertainment System
+#           ROM dump format
+
+0 string NES\032 iNES ROM dump,
+>4 byte  x     %dx16k PRG
+>5 byte  x     \b, %dx8k CHR
+>6 byte&0x01  =0x1  \b, [Vert.]
+>6 byte&0x01  =0x0  \b, [Horiz.]
+>6 byte&0x02  =0x2  \b, [SRAM]
+>6 byte&0x04  =0x4  \b, [Trainer]
+>6 byte&0x04  =0x8  \b, [4-Scr]
+
+#------------------------------------------------------------------------------
+# gameboy:  file(1) magic for the Nintendo (Color) Gameboy raw ROM format
+#
+0x104 belong 0xCEED6666 Gameboy ROM:
+>0x134 string >\0 "%.16s"
+>0x146 byte 0x03  \b,[SGB]
+>0x147 byte 0x00  \b, [ROM ONLY]
+>0x147 byte 0x01  \b, [ROM+MBC1]
+>0x147 byte 0x02  \b, [ROM+MBC1+RAM]
+>0x147 byte 0x03  \b, [ROM+MBC1+RAM+BATT]
+>0x147 byte 0x05  \b, [ROM+MBC2]
+>0x147 byte 0x06  \b, [ROM+MBC2+BATTERY]
+>0x147 byte 0x08  \b, [ROM+RAM]
+>0x147 byte 0x09  \b, [ROM+RAM+BATTERY]
+>0x147 byte 0x0B  \b, [ROM+MMM01]
+>0x147 byte 0x0C  \b, [ROM+MMM01+SRAM]
+>0x147 byte 0x0D  \b, [ROM+MMM01+SRAM+BATT]
+>0x147 byte 0x0F  \b, [ROM+MBC3+TIMER+BATT]
+>0x147 byte 0x10  \b, [ROM+MBC3+TIMER+RAM+BATT]
+>0x147 byte 0x11  \b, [ROM+MBC3]
+>0x147 byte 0x12  \b, [ROM+MBC3+RAM]
+>0x147 byte 0x13  \b, [ROM+MBC3+RAM+BATT]
+>0x147 byte 0x19  \b, [ROM+MBC5]
+>0x147 byte 0x1A  \b, [ROM+MBC5+RAM]
+>0x147 byte 0x1B  \b, [ROM+MBC5+RAM+BATT]
+>0x147 byte 0x1C  \b, [ROM+MBC5+RUMBLE]
+>0x147 byte 0x1D  \b, [ROM+MBC5+RUMBLE+SRAM]
+>0x147 byte 0x1E  \b, [ROM+MBC5+RUMBLE+SRAM+BATT]
+>0x147 byte 0x1F  \b, [Pocket Camera]
+>0x147 byte 0xFD  \b, [Bandai TAMA5]
+>0x147 byte 0xFE  \b, [Hudson HuC-3]
+>0x147 byte 0xFF  \b, [Hudson HuC-1]
+
+>0x148 byte 0     \b, ROM: 256Kbit
+>0x148 byte 1     \b, ROM: 512Kbit
+>0x148 byte 2     \b, ROM: 1Mbit
+>0x148 byte 3     \b, ROM: 2Mbit
+>0x148 byte 4     \b, ROM: 4Mbit
+>0x148 byte 5     \b, ROM: 8Mbit
+>0x148 byte 6     \b, ROM: 16Mbit
+>0x148 byte 0x52  \b, ROM: 9Mbit
+>0x148 byte 0x53  \b, ROM: 10Mbit
+>0x148 byte 0x54  \b, ROM: 12Mbit
+
+>0x149 byte 1     \b, RAM: 16Kbit
+>0x149 byte 2     \b, RAM: 64Kbit
+>0x149 byte 3     \b, RAM: 128Kbit
+>0x149 byte 4     \b, RAM: 1Mbit
+
+#>0x14e long  x     \b, CRC: %x
+
+#------------------------------------------------------------------------------
+# genesis:  file(1) magic for the Sega MegaDrive/Genesis raw ROM format
+#
+0x100 string SEGA  Sega MegaDrive/Genesis raw ROM dump
+>0x120 string >\0 Name: "%.16s"
+>0x110 string >\0 %.16s
+>0x1B0 string RA with SRAM
+
+#------------------------------------------------------------------------------
+# genesis:  file(1) magic for the Super MegaDrive ROM dump format
+#
+0x280 string EAGN  Super MagicDrive ROM dump
+>0 byte x %dx16k blocks
+>2 byte 0 \b, last in series or standalone
+>2 byte >0 \b, split ROM
+>8 byte 0xAA
+>9 byte 0xBB
+
+#------------------------------------------------------------------------------
+# genesis:  file(1) alternate magic for the Super MegaDrive ROM dump format
+#
+0x280 string EAMG  Super MagicDrive ROM dump
+>0 byte x %dx16k blocks
+>2 byte x \b, last in series or standalone
+>8 byte 0xAA
+>9 byte 0xBB
+
+#------------------------------------------------------------------------------
+# smsgg:  file(1) magic for Sega Master System and Game Gear ROM dumps
+#
+# Does not detect all images.  Very preliminary guesswork.  Need more data
+# on format.
+#
+# FIXME: need a little more info...;P
+#
+#0 byte 0xF3
+#>1 byte 0xED  Sega Master System/Game Gear ROM dump
+#>1 byte 0x31  Sega Master System/Game Gear ROM dump
+#>1 byte 0xDB  Sega Master System/Game Gear ROM dump
+#>1 byte 0xAF  Sega Master System/Game Gear ROM dump
+#>1 byte 0xC3  Sega Master System/Game Gear ROM dump
+
+#------------------------------------------------------------------------------
+# dreamcast:  file(1) uncertain magic for the Sega Dreamcast VMU image format
+#
+0 belong 0x21068028   Sega Dreamcast VMU game image
+0 string LCDi         Dream Animator file
+
+#------------------------------------------------------------------------------
+# v64: file(1) uncertain magic for the V64 format N64 ROM dumps
+#
+0 belong 0x37804012    V64 Nintendo 64 ROM dump
+
+#------------------------------------------------------------------------------
+# msx: file(1) magic for MSX game cartridge dumps
+0 beshort 0x4142 MSX game cartridge dump 

+ 5 - 0
Magdir/ctags

@@ -0,0 +1,5 @@
+
+# ----------------------------------------------------------------------------
+# ctags:  file (1) magic for Exuberant Ctags files
+# From: Alexander Mai <mai@migdal.ikp.physik.tu-darmstadt.de>
+0       string  !_TAG   Exuberant Ctags tag file

+ 56 - 77
Magdir/database

@@ -12,84 +12,63 @@
 0	lelong	0x13579ace	GNU dbm 1.x or ndbm database, little endian
 0	string	GDBM		GNU dbm 2.x database
 #
-0	belong	0x061561	Berkeley DB
->4	belong	>2		1.86
->4	belong	<3		1.85
->8	belong	4321		Hash/Big Endian
->8	belong	1234		Hash/Little Endian
->4	belong	>0		(Version %d,
->12	belong	x		Bucket Size %d,
->16	belong	x		Bucket Shift %d,
->20	belong	x		Directory Size %d,
->24	belong	x		Segment Size %d,
->28	belong	x		Segment Shift %d,
->32	belong	x		Overflow Point %d,
->36	belong	x		Last Freed %d,
->40	belong	x		Max Bucket %d,
->44	belong	x		High Mask 0x%x,
->48	belong	x		Low Mask 0x%x,
->52	belong	x		Fill Factor %d,
->56	belong	x		Number of Keys %d)
+# Berkeley DB
 #
+# Ian Darwin's file /etc/magic files: big/little-endian version.
 #
-0	belong	0x053162	Berkeley DB 1.85/1.86 Btree/Big Endian
->4	belong	>0		(Version %d,
->8	belong	x		Page Size %d,
->12	belong	x		Free Page %d,
->16	belong	x		Number of Records %d,
->20	belong	x		Flags 0x%x)
-0	lelong	0x053162	Berkeley DB 1.85/1.86 Btree/Little Endian
->4	lelong	>0		(Version %d,
->8	lelong	x		Page Size %d,
->12	lelong	x		Free Page %d,
->16	lelong	x		Number of Records %d,
->20	lelong	x		Flags 0x%x)
-#
-#
-12	belong	0x061561	Berkeley DB 2.X Hash/Big Endian
->16	belong	>0		(Version %d,
->0	belong  x		Logical sequence number: file - %d,
->4	belong  x		offset - %d,
->20	belong	x		Bucket Size %d,
->24	belong	x		Overflow Point %d,
->28	belong	x		Last Freed %d,
->32	belong	x		Max Bucket %d,
->36	belong	x		High Mask 0x%x,
->40	belong	x		Low Mask 0x%x,
->44	belong	x		Fill Factor %d,
->48	belong	x		Number of Keys %d)
-#
-# DB2.X formats from Khimenko Victor <khim@sch57.msk.ru>
-#
-12	lelong	0x061561	Berkeley DB 2.X Hash/Little Endian
->16	lelong	>0		(Version %d,
->0	lelong  x		Logical sequence number: file - %d,
->4	lelong  x		offset - %d,
->20	lelong	x		Bucket Size %d,
->24	lelong	x		Overflow Point %d,
->28	lelong	x		Last Freed %d,
->32	lelong	x		Max Bucket %d,
->36	lelong	x		High Mask 0x%x,
->40	lelong	x		Low Mask 0x%x,
->44	lelong	x		Fill Factor %d,
->48	lelong	x		Number of Keys %d)
-#
-#
-12	belong	0x053162	Berkeley DB 2.X Btree/Big Endian
->16	belong	>0		(Version %d, 
->0	belong  x		Logical sequence number: file - %d,
->4	belong  x		offset - %d,
->20	belong	x		Page Size %d,
->24	belong	x		Maxkey %d,
->28	belong	x		Minkey %d,
->32	belong	x		Free Page %d)
+# Hash 1.85/1.86 databases store metadata in network byte order.
+# Btree 1.85/1.86 databases store the metadata in host byte order.
+# Hash and Btree 2.X and later databases store the metadata in host byte order.
+
+0	long	0x00061561	Berkeley DB
+>8	belong	4321
+>>4	belong	>2		1.86
+>>4	belong	<3		1.85
+>>4	belong	>0		(Hash, version %d, native byte-order)
+>8	belong	1234
+>>4	belong	>2		1.86
+>>4	belong	<3		1.85
+>>4	belong	>0		(Hash, version %d, little-endian)
+
+0	belong	0x00061561	Berkeley DB
+>8	belong	4321
+>>4	belong	>2		1.86
+>>4	belong	<3		1.85
+>>4	belong	>0		(Hash, version %d, big-endian)
+>8	belong	1234
+>>4	belong	>2		1.86
+>>4	belong	<3		1.85
+>>4	belong	>0		(Hash, version %d, native byte-order)
+
+0	long	0x00053162	Berkeley DB 1.85/1.86
+>4	long	>0		(Btree, version %d, native byte-order)
+0	belong	0x00053162	Berkeley DB 1.85/1.86
+>4	belong	>0		(Btree, version %d, big-endian)
+0	lelong	0x00053162	Berkeley DB 1.85/1.86
+>4	lelong	>0		(Btree, version %d, little-endian)
+
+12	long	0x00061561	Berkeley DB
+>16	long	>0		(Hash, version %d, native byte-order)
+12	belong	0x00061561	Berkeley DB
+>16	belong	>0		(Hash, version %d, big-endian)
+12	lelong	0x00061561	Berkeley DB
+>16	lelong	>0		(Hash, version %d, little-endian)
+
+12	long	0x00053162	Berkeley DB
+>16	long	>0		(Btree, version %d, native byte-order)
+12	belong	0x00053162	Berkeley DB
+>16	belong	>0		(Btree, version %d, big-endian)
+12	lelong	0x00053162	Berkeley DB
+>16	lelong	>0		(Btree, version %d, little-endian)
+
+12	long	0x00042253	Berkeley DB
+>16	long	>0		(Queue, version %d, native byte-order)
+12	belong	0x00042253	Berkeley DB
+>16	belong	>0		(Queue, version %d, big-endian)
+12	lelong	0x00042253	Berkeley DB
+>16	lelong	>0		(Queue, version %d, little-endian)
 #
 #
-12	lelong	0x053162	Berkeley DB 2.X Btree/Little Endian
->16	lelong	>0		(Version %d,
->0	lelong  x		Logical sequence number: file - %d,
->4	lelong  x		offset - %d,
->20	lelong	x		Page Size %d,
->24	lelong	x		Maxkey %d,
->28	lelong	x		Minkey %d,
->32	lelong	x		Free Page %d)
+# Round Robin Database Tool by Tobias Oetiker <oetiker@ee.ethz.ch>
+0	string	RRD		RRDTool DB
+>4	string	x		version %s

+ 12 - 0
Magdir/dyadic

@@ -0,0 +1,12 @@
+
+#------------------------------------------------------------------------------
+# Dyadic: file(1) magic for Dyalog APL.
+#
+0 	byte	0xaa
+>1	byte	<4		Dyalog APL
+>>1	byte	0x00		incomplete workspace
+>>1	byte	0x01		component file
+>>1	byte	0x02		external variable
+>>1	byte	0x03		workspace
+>>2	byte	x		version %d
+>>3	byte	x		.%d

+ 9 - 0
Magdir/editors

@@ -0,0 +1,9 @@
+
+#------------------------------------------------------------------------------
+# T602 editor documents 
+# by David Necas <yeti@physics.muni.cz>
+0	string	@CT\ 	T602 document data,
+>4	string	0	Kamenicky
+>4	string	1	CP 852
+>4	string	2	KOI8-CS
+>4	string	>2	unknown encoding

+ 50 - 6
Magdir/elf

@@ -12,12 +12,31 @@
 0	string		\177ELF		ELF
 >4	byte		0		invalid class
 >4	byte		1		32-bit
-# only for MIPS R3000_BE
+# only for MIPS
 >>18	beshort		8
+>>18	beshort		10
 >>>36   belong          &0x20           N32
 >4	byte		2		64-bit
 >5	byte		0		invalid byte order
 >5	byte		1		LSB
+# only for MIPS R3000_BE
+>>18    leshort		8
+# only for 32-bit
+>>>4	byte		1
+>>>>36  lelong&0xf0000000       0x00000000      mips-1
+>>>>36  lelong&0xf0000000       0x10000000      mips-2
+>>>>36  lelong&0xf0000000       0x20000000      mips-3
+>>>>36  lelong&0xf0000000       0x30000000      mips-4
+>>>>36  lelong&0xf0000000       0x40000000      mips-5
+>>>>36  lelong&0xf0000000       0x50000000      mips-6
+# only for 64-bit
+>>>4	byte		2
+>>>>48  lelong&0xf0000000       0x00000000      mips-1
+>>>>48  lelong&0xf0000000       0x10000000      mips-2
+>>>>48  lelong&0xf0000000       0x20000000      mips-3
+>>>>48  lelong&0xf0000000       0x30000000      mips-4
+>>>>48  lelong&0xf0000000       0x40000000      mips-5
+>>>>48  lelong&0xf0000000       0x50000000      mips-6
 >>16	leshort		0		no file type,
 >>16	leshort		1		relocatable,
 >>16	leshort		2		executable,
@@ -36,11 +55,14 @@
 >>18	leshort		5		Motorola 88000 - invalid byte order,
 >>18	leshort		6		Intel 80486,
 >>18	leshort		7		Intel 80860,
->>18	leshort		8		MIPS R3000_BE - invalid byte order,
+# "officially" big endian, but binutils bfd only emits magic #8 for MIPS.
+>>18	leshort		8		MIPS R3000_LE [bfd bug],
 >>18	leshort		9		Amdahl - invalid byte order,
 >>18	leshort		10		MIPS R3000_LE,
 >>18	leshort		11		RS6000 - invalid byte order,
 >>18	leshort		15		PA-RISC - invalid byte order,
+>>>50	leshort		0x0214		2.0
+>>>48	leshort		&0x0008		(LP64),
 >>18	leshort		16		nCUBE,
 >>18	leshort		17		Fujitsu VPP500,
 >>18	leshort		18		SPARC32PLUS,
@@ -49,7 +71,7 @@
 >>18	leshort		37		Fujitsu FR20,
 >>18	leshort		38		TRW RH-32,
 >>18	leshort		39		Motorola RCE,
->>18	leshort		40		Advanced RISC Machines ARM,
+>>18	leshort		40		ARM,
 >>18	leshort		41		Alpha,
 >>18	leshort		42		Hitachi SH,
 >>18	leshort		43		SPARC V9 - invalid byte order,
@@ -63,11 +85,12 @@
 >>18	leshort		51		Stanford MIPS-X,
 >>18	leshort		52		Motorola Coldfire,
 >>18	leshort		53		Motorola M68HC12,
+>>18	leshort		62		AMD x86-64,
+>>18	leshort		75		Digital VAX,
 >>18	leshort		0x9026		Alpha (unofficial),
 >>20	lelong		0		invalid version
 >>20	lelong		1		version 1
 >>36	lelong		1		MathCoPro/FPU/MAU Required
->8	string		>\0		(%s)
 >5	byte		2		MSB
 # only for MIPS R3000_BE
 >>18    beshort		8
@@ -107,7 +130,9 @@
 >>18	beshort		9		Amdahl,
 >>18	beshort		10		MIPS R3000_LE - invalid byte order,
 >>18	beshort		11		RS6000,
->>18	beshort		15		PA-RISC,
+>>18	beshort		15		PA-RISC
+>>>50	beshort		0x0214		2.0
+>>>48	beshort		&0x0008		(LP64)
 >>18	beshort		16		nCUBE,
 >>18	beshort		17		Fujitsu VPP500,
 >>18	beshort		18		SPARC32PLUS,
@@ -123,7 +148,7 @@
 >>18	beshort		37		Fujitsu FR20,
 >>18	beshort		38		TRW RH-32,
 >>18	beshort		39		Motorola RCE,
->>18	beshort		40		Advanced RISC Machines ARM,
+>>18	beshort		40		ARM,
 >>18	beshort		41		Alpha,
 >>18	beshort		42		Hitachi SH,
 >>18	beshort		43		SPARC V9,
@@ -137,7 +162,26 @@
 >>18	beshort		51		Stanford MIPS-X,
 >>18	beshort		52		Motorola Coldfire,
 >>18	beshort		53		Motorola M68HC12,
+>>18	beshort		73		Cray NV1,
+>>18	beshort		75		Digital VAX,
 >>18	beshort		0x9026		Alpha (unofficial),
 >>20	belong		0		invalid version
 >>20	belong		1		version 1
 >>36	belong		1		MathCoPro/FPU/MAU Required
+>8	string		>\0		(%s)
+>8	string		\0
+>>7	byte		0		(SYSV)
+>>7	byte		1		(HP-UX)
+>>7	byte		2		(NetBSD)
+>>7	byte		3		(GNU/Linux)
+>>7	byte		4		(GNU/Hurd)
+>>7	byte		5		(86Open)
+>>7	byte		6		(Solaris)
+>>7	byte		7		(Monterey)
+>>7	byte		8		(IRIX)
+>>7	byte		9		(FreeBSD)
+>>7	byte		10		(Tru64)
+>>7	byte		11		(Novell Modesto)
+>>7	byte		12		(OpenBSD)
+>>7	byte		97		(ARM)
+>>7	byte		255		(embedded)

+ 10 - 0
Magdir/epoc

@@ -0,0 +1,10 @@
+
+#------------------------------------------------------------------------------
+# Epoc 32 : file(1) magic for Epoc Documents [psion/osaris
+# Stefan Praszalowicz (hpicollo@worldnet.fr)
+#0	lelong		0x10000037	Epoc32
+>4	lelong		0x1000006D
+>>8	lelong		0x1000007F	Word
+>>8	lelong		0x10000088	Sheet
+>>8	lelong		0x1000007D	Sketch
+>>8	lelong		0x10000085	TextEd

+ 49 - 9
Magdir/filesystems

@@ -2,7 +2,6 @@
 #------------------------------------------------------------------------------
 # filesystems:  file(1) magic for different filesystems
 #
-0x438	leshort	0xEF53			Linux/i386 ext2 filesystem
 0	string	\366\366\366\366	PC formatted floppy with no filesystem
 # Sun disk labels
 # From /usr/include/sun/dklabel.h:
@@ -62,17 +61,58 @@
 
 0x18b	string	OS/2	OS/2 Boot Manager
 
-9564	lelong		0x00011954	Unix Fast File system,
+9564	lelong		0x00011954	Unix Fast File system (little-endian),
 >8404	string		x		last mounted on %s,
->9504	ledate		x		last checkd at %s,
->8224	ledate		x		last writen at %s,
+#>9504	ledate		x		last checked at %s,
+>8224	ledate		x		last written at %s,
+>8401	byte		x		clean flag %d,
 >8228	lelong		x		number of blocks %d,
 >8232	lelong		x		number of data blocks %d,
 >8236	lelong		x		number of cylinder groups %d,
->8240	lelong		x		number of basic blocks %d,
->8244	lelong		x		number of fragment blocks %d,
->8248	lelong		x		minimum percentage of free blocks %d,
->8252	lelong		x		rotational delay %dms,
->8256	lelong		x		disk rotational speed %drps,
+>8240	lelong		x		block size %d,
+>8244	lelong		x		fragment size %d,
+>8252	lelong		x		minimum percentage of free blocks %d,
+>8256	lelong		x		rotational delay %dms,
+>8260	lelong		x		disk rotational speed %drps,
 >8320	lelong		0		TIME optimization
 >8320	lelong		1		SPACE optimization
+
+9564	belong		0x00011954	Unix Fast File system (big-endian),
+>8404	string		x		last mounted on %s,
+#>9504	bedate		x		last checked at %s,
+>8224	bedate		x		last written at %s,
+>8401	byte		x		clean flag %d,
+>8228	belong		x		number of blocks %d,
+>8232	belong		x		number of data blocks %d,
+>8236	belong		x		number of cylinder groups %d,
+>8240	belong		x		block size %d,
+>8244	belong		x		fragment size %d,
+>8252	belong		x		minimum percentage of free blocks %d,
+>8256	belong		x		rotational delay %dms,
+>8260	belong		x		disk rotational speed %drps,
+>8320	belong		0		TIME optimization
+>8320	belong		1		SPACE optimization
+
+# ext2/ext3 filesystems - Andreas Dilger <adilger@turbolabs.com>
+0x438	leshort		0xEF53		Linux
+>0x44c	lelong		x		rev %d
+>0x43e	leshort		x		\b.%d
+>0x45c	lelong		^0x0000004	ext2 filesystem data
+>>0x43a	leshort		^0x0000001	(mounted or unclean)
+>0x45c	lelong		&0x0000004	ext3 filesystem data
+>>0x460	lelong		&0x0000004	(needs journal recovery)
+>0x43a	leshort		&0x0000002	(errors)
+>0x460	lelong		&0x0000001	(compressed)
+#>0x460	lelong		&0x0000002	(filetype)
+#>0x464	lelong		&0x0000001	(sparse_super)
+>0x464	lelong		&0x0000002	(large files)
+
+# SGI disk labels - Nathan Scott <nathans@debian.org>
+0	belong		0x0BE5A941	SGI disk label (volume header)
+
+# SGI XFS filesystem - Nathan Scott <nathans@debian.org>
+0	belong		0x58465342	SGI XFS filesystem data
+>0x4	belong		x		(blksz=3D%d,
+>0x68	beshort		x		inosz=3D%d,
+>0x64	beshort		^0x2004		v1 dirs)
+>0x64	beshort		&0x2004		v2 dirs)

+ 3 - 0
Magdir/fonts

@@ -46,3 +46,6 @@
 7	belong		0x00564944	DOS code page font data (from Linux?)
 4098	string		DOSFONT		DOSFONT2 encrypted font data
 
+# downloadable fonts for browser (prints type) anthon@mnt.org
+0	string		PFR1		PFR1 font
+>102	string		>0		\b: %s

+ 15 - 2
Magdir/freebsd

@@ -126,5 +126,18 @@
 
 # /var/run/ld.so.hints
 # What are you laughing about?
-0	lelong			011421044151	ld.so hints file
->4	lelong			>0		(version %d)
+0	lelong			011421044151	ld.so hints file (Little Endian
+>4	lelong			>0		\b, version %d)
+>4	belong			<=0		\b)
+0	belong			011421044151	ld.so hints file (Big Endian
+>4	belong			>0		\b, version %d)
+>4	belong			<=0		\b)
+
+#
+# Files generated by FreeBSD scrshot(1)/vidcontrol(1) utilities
+#
+0	string	SCRSHOT_	scrshot(1) screenshot,
+>8	byte	x		version %d,
+>9	byte	2		%d bytes in header,
+>>10	byte	x		%d chars wide by
+>>11	byte	x		%d chars high

+ 27 - 0
Magdir/fsav

@@ -0,0 +1,27 @@
+
+#------------------------------------------------------------------------------
+# fsav:  file(1) magic for datafellows fsav virus definition files
+# Anthon van der Neut (anthon@mnt.org)
+0	beshort		0x1575		fsav (linux) macro virus
+>8	leshort		>0		(%d-
+>11	byte		>0		\b%02d-
+>10	byte		>0		\b%02d)
+
+# comment this out for now because it regognizes every file where
+# the eighth character is \n
+#8	byte		0x0a
+#>12	byte		0x07
+#>11	leshort		>0		fsav (linux) virus (%d-
+#>10	byte		0		\b01-
+#>10	byte		1		\b02-
+#>10	byte		2		\b03-
+#>10	byte		3		\b04-
+#>10	byte		4		\b05-
+#>10	byte		5		\b06-
+#>10	byte		6		\b07-
+#>10	byte		7		\b08-
+#>10	byte		8		\b08-
+#>10	byte		9		\b10-
+#>10	byte		10		\b11-
+#>10	byte		11		\b12-
+#>9	byte		>0		\b%02d)

+ 3 - 0
Magdir/gnu

@@ -7,3 +7,6 @@
 0	string		\225\4\22\336	GNU message catalog (big endian),
 >4	belong		x		revision %d,
 >8	belong		x		%d messages
+# message catalogs, from Mitchum DSouza <m.dsouza@mrc-apu.cam.ac.uk>
+0	string		*nazgul*	Nazgul style compiled message catalog
+>8	lelong		>0		\b, version %ld

+ 18 - 0
Magdir/hitachi-sh

@@ -0,0 +1,18 @@
+
+#------------------------------------------------------------------------------
+# hitach-sh: file(1) magic for Hitachi Super-H
+#
+# Super-H COFF
+#
+0	beshort		0x0500		Hitachi SH big-endian COFF
+>18	beshort&0x0002	=0x0000		object
+>18	beshort&0x0002	=0x0002		executable
+>18	beshort&0x0008	=0x0000		\b, stripped
+>18	beshort&0x0008	=0x0008		\b, not stripped
+#
+0	leshort		0x0550		Hitachi SH little-endian COFF
+>18	leshort&0x0002	=0x0000		object
+>18	leshort&0x0002	=0x0002		executable
+>18	leshort&0x0008	=0x0000		\b, stripped
+>18	leshort&0x0008	=0x0008		\b, not stripped
+

+ 140 - 0
Magdir/hp

@@ -249,3 +249,143 @@
 >2	beshort		0407		impure binary
 >2	beshort		0410		read-only binary
 >2	beshort		0413		demand paged binary
+#
+# From David Gero <dgero@nortelnetworks.com>
+# HP-UX 10.20 core file format from /usr/include/sys/core.h
+# Unfortunately, HP-UX uses corehead blocks without specifying the order
+# There are four we care about:
+#     CORE_KERNEL, which starts with the string "HP-UX"
+#     CORE_EXEC, which contains the name of the command
+#     CORE_PROC, which contains the signal number that caused the core dump
+#     CORE_FORMAT, which contains the version of the core file format (== 1)
+# The only observed order in real core files is KERNEL, EXEC, FORMAT, PROC
+# but we include all 6 variations of the order of the first 3, and
+# assume that PROC will always be last
+# Order 1: KERNEL, EXEC, FORMAT, PROC
+0x10		string	HP-UX
+>0		belong	2
+>>0xC		belong	0x3C
+>>>0x4C		belong	0x100
+>>>>0x58	belong	0x44
+>>>>>0xA0	belong	1
+>>>>>>0xAC	belong	4
+>>>>>>>0xB0	belong	1
+>>>>>>>>0xB4	belong	4		core file
+>>>>>>>>>0x90	string	>\0		from '%s'
+>>>>>>>>>0xC4	belong	3		- received SIGQUIT
+>>>>>>>>>0xC4	belong	4		- received SIGILL
+>>>>>>>>>0xC4	belong	5		- received SIGTRAP
+>>>>>>>>>0xC4	belong	6		- received SIGABRT
+>>>>>>>>>0xC4	belong	7		- received SIGEMT
+>>>>>>>>>0xC4	belong	8		- received SIGFPE
+>>>>>>>>>0xC4	belong	10		- received SIGBUS
+>>>>>>>>>0xC4	belong	11		- received SIGSEGV
+>>>>>>>>>0xC4	belong	12		- received SIGSYS
+>>>>>>>>>0xC4	belong	33		- received SIGXCPU
+>>>>>>>>>0xC4	belong	34		- received SIGXFSZ
+# Order 2: KERNEL, FORMAT, EXEC, PROC
+>>>0x4C		belong	1
+>>>>0x58	belong	4
+>>>>>0x5C	belong	1
+>>>>>>0x60	belong	0x100
+>>>>>>>0x6C	belong	0x44
+>>>>>>>>0xB4	belong	4		core file
+>>>>>>>>>0xA4	string	>\0		from '%s'
+>>>>>>>>>0xC4	belong	3		- received SIGQUIT
+>>>>>>>>>0xC4	belong	4		- received SIGILL
+>>>>>>>>>0xC4	belong	5		- received SIGTRAP
+>>>>>>>>>0xC4	belong	6		- received SIGABRT
+>>>>>>>>>0xC4	belong	7		- received SIGEMT
+>>>>>>>>>0xC4	belong	8		- received SIGFPE
+>>>>>>>>>0xC4	belong	10		- received SIGBUS
+>>>>>>>>>0xC4	belong	11		- received SIGSEGV
+>>>>>>>>>0xC4	belong	12		- received SIGSYS
+>>>>>>>>>0xC4	belong	33		- received SIGXCPU
+>>>>>>>>>0xC4	belong	34		- received SIGXFSZ
+# Order 3: FORMAT, KERNEL, EXEC, PROC
+0x24		string	HP-UX
+>0		belong	1
+>>0xC		belong	4
+>>>0x10		belong	1
+>>>>0x14	belong	2
+>>>>>0x20	belong	0x3C
+>>>>>>0x60	belong	0x100
+>>>>>>>0x6C	belong	0x44
+>>>>>>>>0xB4	belong	4		core file
+>>>>>>>>>0xA4	string	>\0		from '%s'
+>>>>>>>>>0xC4	belong	3		- received SIGQUIT
+>>>>>>>>>0xC4	belong	4		- received SIGILL
+>>>>>>>>>0xC4	belong	5		- received SIGTRAP
+>>>>>>>>>0xC4	belong	6		- received SIGABRT
+>>>>>>>>>0xC4	belong	7		- received SIGEMT
+>>>>>>>>>0xC4	belong	8		- received SIGFPE
+>>>>>>>>>0xC4	belong	10		- received SIGBUS
+>>>>>>>>>0xC4	belong	11		- received SIGSEGV
+>>>>>>>>>0xC4	belong	12		- received SIGSYS
+>>>>>>>>>0xC4	belong	33		- received SIGXCPU
+>>>>>>>>>0xC4	belong	34		- received SIGXFSZ
+# Order 4: EXEC, KERNEL, FORMAT, PROC
+0x64		string	HP-UX
+>0		belong	0x100
+>>0xC		belong	0x44
+>>>0x54		belong	2
+>>>>0x60	belong	0x3C
+>>>>>0xA0	belong	1
+>>>>>>0xAC	belong	4
+>>>>>>>0xB0	belong	1
+>>>>>>>>0xB4	belong	4		core file
+>>>>>>>>>0x44	string	>\0		from '%s'
+>>>>>>>>>0xC4	belong	3		- received SIGQUIT
+>>>>>>>>>0xC4	belong	4		- received SIGILL
+>>>>>>>>>0xC4	belong	5		- received SIGTRAP
+>>>>>>>>>0xC4	belong	6		- received SIGABRT
+>>>>>>>>>0xC4	belong	7		- received SIGEMT
+>>>>>>>>>0xC4	belong	8		- received SIGFPE
+>>>>>>>>>0xC4	belong	10		- received SIGBUS
+>>>>>>>>>0xC4	belong	11		- received SIGSEGV
+>>>>>>>>>0xC4	belong	12		- received SIGSYS
+>>>>>>>>>0xC4	belong	33		- received SIGXCPU
+>>>>>>>>>0xC4	belong	34		- received SIGXFSZ
+# Order 5: FORMAT, EXEC, KERNEL, PROC
+0x78		string	HP-UX
+>0		belong	1
+>>0xC		belong	4
+>>>0x10		belong	1
+>>>>0x14	belong	0x100
+>>>>>0x20	belong	0x44
+>>>>>>0x68	belong	2
+>>>>>>>0x74	belong	0x3C
+>>>>>>>>0xB4	belong	4		core file
+>>>>>>>>>0x58	string	>\0		from '%s'
+>>>>>>>>>0xC4	belong	3		- received SIGQUIT
+>>>>>>>>>0xC4	belong	4		- received SIGILL
+>>>>>>>>>0xC4	belong	5		- received SIGTRAP
+>>>>>>>>>0xC4	belong	6		- received SIGABRT
+>>>>>>>>>0xC4	belong	7		- received SIGEMT
+>>>>>>>>>0xC4	belong	8		- received SIGFPE
+>>>>>>>>>0xC4	belong	10		- received SIGBUS
+>>>>>>>>>0xC4	belong	11		- received SIGSEGV
+>>>>>>>>>0xC4	belong	12		- received SIGSYS
+>>>>>>>>>0xC4	belong	33		- received SIGXCPU
+>>>>>>>>>0xC4	belong	34		- received SIGXFSZ
+# Order 6: EXEC, FORMAT, KERNEL, PROC
+>0		belong	0x100
+>>0xC		belong	0x44
+>>>0x54		belong	1
+>>>>0x60	belong	4
+>>>>>0x64	belong	1
+>>>>>>0x68	belong	2
+>>>>>>>0x74	belong	0x2C
+>>>>>>>>0xB4	belong	4		core file
+>>>>>>>>>0x44	string	>\0		from '%s'
+>>>>>>>>>0xC4	belong	3		- received SIGQUIT
+>>>>>>>>>0xC4	belong	4		- received SIGILL
+>>>>>>>>>0xC4	belong	5		- received SIGTRAP
+>>>>>>>>>0xC4	belong	6		- received SIGABRT
+>>>>>>>>>0xC4	belong	7		- received SIGEMT
+>>>>>>>>>0xC4	belong	8		- received SIGFPE
+>>>>>>>>>0xC4	belong	10		- received SIGBUS
+>>>>>>>>>0xC4	belong	11		- received SIGSEGV
+>>>>>>>>>0xC4	belong	12		- received SIGSYS
+>>>>>>>>>0xC4	belong	33		- received SIGXCPU
+>>>>>>>>>0xC4	belong	34		- received SIGXFSZ

+ 97 - 7
Magdir/images

@@ -24,12 +24,24 @@
 
 # PBMPLUS images
 # The next byte following the magic is always whitespace.
-0	string		P1		PBM image text
-0	string		P2		PGM image text
-0	string		P3		PPM image text
-0	string		P4		PBM "rawbits" image data
-0	string		P5		PGM "rawbits" image data
-0	string		P6		PPM "rawbits" image data
+0	string		P1		Netpbm PBM image text
+0	string		P2		Netpbm PGM image text
+0	string		P3		Netpbm PPM image text
+0	string		P4		Netpbm PBM "rawbits" image data
+0	string		P5		Netpbm PGM "rawbits" image data
+0	string		P6		Netpbm PPM "rawbits" image data
+0	string		P7		Netpbm PAM image file
+
+# From: bryanh@giraffe-data.com (Bryan Henderson)
+0	string		\117\072	Solitaire Image Recorder format
+>4	string		\013		MGI Type 11
+>4	string		\021		MGI Type 17
+0	string		.MDA		MicroDesign data
+>21	byte		48		version 2
+>21	byte		51		version 3
+0	string		.MDP		MicroDesign page data
+>21	byte		48		version 2
+>21	byte		51		version 3
 
 # NIFF (Navy Interchange File Format, a modification of TIFF) images
 0	string		IIN1		NIFF image data
@@ -219,6 +231,10 @@
 >12	belong		x		%d
 #
 2048	string		PCD_IPI		Kodak Photo CD image pack file
+>0xe02	byte&0x03	0x00		, landscape mode
+>0xe02	byte&0x03	0x01		, portrait mode
+>0xe02	byte&0x03	0x02		, landscape mode
+>0xe02	byte&0x03	0x03		, portrait mode
 0	string		PCD_OPA		Kodak Photo CD overview pack file
 
 # FITS format.  Jeff Uphoff <juphoff@tarsier.cv.nrao.edu>
@@ -242,4 +258,78 @@
 0	beshort		0x1010		PEX Binary Archive
 
 # Visio drawings
-03000	string	Visio\ (TM)\ Drawing			%s
+03000	string	Visio\ (TM)\ Drawing	%s
+
+# Tgif files
+0	string	\%TGIF\ x 		Tgif file version %s
+
+# DICOM medical imaging data
+128	string	DICM			DICOM medical imaging data
+
+# XWD - X-Windows Dump file.
+#   As described in /usr/X11R6/include/X11/XWDFile.h
+#   used by the xwd program.
+#   Bradford Castalia, idaeim, 1/01
+4	belong	7			XWD X-Windows Dump image data
+>100	string	>\0			\b, "%s"
+>16	belong	x			\b, %dx
+>20	belong	x			\b%dx
+>12	belong	x			\b%d
+
+# PDS - Planetary Data System
+#   These files use Parameter Value Language in the header section.
+#   Unfortunately, there is no certain magic, but the following
+#   strings have been found to be most likely.
+0	string	NJPL1I00		PDS (JPL) image data
+2	string	NJPL1I			PDS (JPL) image data
+0	string	CCSD3ZF			PDS (CCSD) image data
+2	string	CCSD3Z			PDS (CCSD) image data
+0	string	PDS_			PDS image data
+0	string	LBLSIZE=		PDS (VICAR) image data
+
+# pM8x: ATARI STAD compressed bitmap format
+#
+# from Oskar Schirmer <schirmer@scara.com> Feb 2, 2001
+# p M 8 5/6 xx yy zz data...
+# Atari ST STAD bitmap is always 640x400, bytewise runlength compressed.
+# bytes either run horizontally (pM85) or vertically (pM86). yy is the
+# most frequent byte, xx and zz are runlength escape codes, where xx is
+# used for runs of yy.
+#
+0	string	pM85		Atari ST STAD bitmap image data (hor)
+>5	byte	0x00		(white background)
+>5	byte	0xFF		(black background)
+0	string	pM86		Atari ST STAD bitmap image data (vert)
+>5	byte	0x00		(white background)
+>5	byte	0xFF		(black background)
+
+# XXX:
+# This is bad magic 0x5249 == 'RI' conflicts with RIFF and other
+# magic.
+# SGI RICE image file <mpruett@sgi.com>
+#0	beshort	0x5249		RICE image
+#>2	beshort	x		v%d
+#>4	beshort	x		(%d x
+#>6	beshort	x		%d)
+#>8	beshort	0		8 bit
+#>8	beshort	1		10 bit
+#>8	beshort	2		12 bit
+#>8	beshort	3		13 bit
+#>10	beshort	0		4:2:2
+#>10	beshort	1		4:2:2:4
+#>10	beshort	2		4:4:4
+#>10	beshort	3		4:4:4:4
+#>12	beshort	1		RGB
+#>12	beshort	2		CCIR601
+#>12	beshort	3		RP175
+#>12	beshort	4		YUV
+
+#------------------------------------------------------------------------------
+#
+# Marco Schmidt (marcoschmidt@users.sourceforge.net) -- an image  file format
+# for the EPOC operating system, which is used with PDAs like those from Psion
+#
+# see http://huizen.dds.nl/~frodol/psiconv/html/Index.html for a description
+# of various EPOC file formats
+
+0	string \x37\x00\x00\x10\x42\x00\x00\x10\x00\x00\x00\x00\x39\x64\x39\x47 EPOC MBM image file

+ 8 - 0
Magdir/ispell

@@ -52,3 +52,11 @@
 >2	beshort		0x0E		8-bit, capitalization, 256 flags
 >2	beshort		0x0F		7-bit, capitalization, 256 flags
 >4	beshort		>0		and %d string characters
+# ispell 4.0 hash files  kromJx <kromJx@crosswinds.net>
+# Ispell 4.0
+0       string          ISPL            ispell
+>4      long            x               hash file version %d,
+>8      long            x               lexletters %d,
+>12     long            x               lexsize %d,
+>16     long            x               hashsize %d,
+>20     long            x               stblsize %d

+ 3 - 3
Magdir/jpeg

@@ -10,7 +10,7 @@
 #
 0	beshort		0xffd8		JPEG image data
 >6	string		JFIF		\b, JFIF standard
-
+>6	string		Exif		\b, EXIF standard
 # The following added by Erik Rossen <rossen@freesurf.ch> 1999-09-06
 # in a vain attempt to add image size reporting for JFIF.  Note that these
 # tests are not fool-proof since some perfectly valid JPEGs are currently
@@ -22,8 +22,6 @@
 >13	byte		0		\b, aspect ratio
 >13	byte		1		\b, resolution (DPI)
 >13	byte		2		\b, resolution (DPCM)
->14	beshort		x		\b X%d:
->16	beshort		x		\bY%d
 #>4	beshort		x		\b, segment length %d
 # Next, show thumbnail info, if it exists:
 >18	byte		!0		\b, thumbnail %dx
@@ -56,6 +54,8 @@
 # I've commented-out quantisation table reporting.  I doubt anyone cares yet.
 #>(4.S+5)	byte		0xDB		\b, quantisation table
 #>>(4.S+6)	beshort		x		\b length=%d
+>14	beshort		x		\b, %d x
+>16	beshort		x		\b %d
 
 # HSI is Handmade Software's proprietary JPEG encoding scheme
 0	string		hsi1		JPEG image data, HSI proprietary

+ 2 - 2
Magdir/lif

@@ -2,6 +2,6 @@
 #------------------------------------------------------------------------------
 # lif:  file(1) magic for lif
 #
-# XXX - byte order?  (Probably beshort, Daniel Quinlan <quinlan@yggdrasil.com>)
+# (Daniel Quinlan <quinlan@yggdrasil.com>)
 #
-0	short		0x8000		lif file
+0	beshort		0x8000		lif file

+ 0 - 3
Magdir/linux

@@ -35,9 +35,6 @@
 >28	long		!0		not stripped
 0	string		\01\03\040\20	Minix-386 executable
 >28	long		!0		not stripped
-# message catalogs, from Mitchum DSouza <m.dsouza@mrc-apu.cam.ac.uk>
-0	string		*nazgul*	Linux compiled message catalog
->8	lelong		>0		\b, version %ld
 # core dump file, from Bill Reynolds <bill@goshawk.lanl.gov>
 216	lelong		0421		Linux/i386 core file
 >220	string		>\0		of '%s'

+ 6 - 2
Magdir/lisp

@@ -6,8 +6,9 @@
 0	string	;;			Lisp/Scheme program text
 # Emacs 18 - this is always correct, but not very magical.
 0	string	\012(			byte-compiled Emacs-Lisp program data
-# Emacs 19
-0	string	;ELC\023\000\000\000	byte-compiled Emacs-Lisp program data
+# Emacs 19+ - ver. recognition added by Ian Springer
+0	string	;ELC			byte-compiled Emacs-Lisp program data,
+>4	byte	>0			version %d
 #
 # Files produced by CLISP Common Lisp From: Bruno Haible <haible@ilog.fr>
 0	string	(SYSTEM::VERSION\040'	CLISP byte-compiled Lisp program text
@@ -16,3 +17,6 @@
 # Files produced by GNU gettext
 0	long	0xDE120495		GNU-format message catalog data
 0	long	0x950412DE		GNU-format message catalog data
+
+#.com and .bin for MIT scheme 
+0	string	\372\372\372\372	MIT scheme (library?)

+ 288 - 32
Magdir/macintosh

@@ -11,69 +11,325 @@
 # files obtained from most archives. (franklsm@tuns.ca)
 0	string		SIT!			StuffIt Archive (data)
 >2	string		x			: %s
-65	string		SIT!			StuffIt Archive (rsrc + data)
->2	string		x			: %s
 0	string		SITD			StuffIt Deluxe (data)
 >2	string		x			: %s
-65	string		SITD			StuffIt Deluxe (rsrc + data)
->2	string		x			: %s
 0	string		Seg			StuffIt Deluxe Segment (data)
 >2	string		x			: %s
-65	string		Seg			StuffIt Deluxe Segment (rsrc + data)
->2	string		x			: %s
 
 # Macintosh Applications and Installation binaries (franklsm@tuns.ca)
 0	string		APPL			Macintosh Application (data)
->2	string		x			: %s
-65	string		APPL			Macintosh Application (rsrc + data)
->2	string		x			: %s
+>2	string		x			\b: %s
 
 # Macintosh System files (franklsm@tuns.ca)
 0	string		zsys			Macintosh System File (data)
-65	string		zsys			Macintosh System File(rsrc + data)
 0	string		FNDR			Macintosh Finder (data)
-65	string		FNDR			Macintosh Finder(rsrc + data)
 0	string		libr			Macintosh Library (data)
 >2	string		x			: %s
-65	string		libr			Macintosh Library(rsrc + data)
->2	string		x			: %s
 0	string		shlb			Macintosh Shared Library (data)
 >2	string		x			: %s
-65	string		shlb			Macintosh Shared Library(rsrc + data)
->2	string		x			: %s
 0	string		cdev			Macintosh Control Panel (data)
 >2	string		x			: %s
-65	string		cdev			Macintosh Control Panel(rsrc + data)
->2	string		x			: %s
 0	string		INIT			Macintosh Extension (data)
 >2	string		x			: %s
-65	string		INIT			Macintosh Extension(rsrc + data)
->2	string		x			: %s
 0	string		FFIL			Macintosh Truetype Font (data)
 >2	string		x			: %s
-65	string		FFIL			Macintosh Truetype Font(rsrc + data)
->2	string		x			: %s
 0	string		LWFN			Macintosh Postscript Font (data)
 >2	string		x			: %s
-65	string		LWFN			Macintosh Postscript Font(rsrc + data)
->2	string		x			: %s
 
 # Additional Macintosh Files (franklsm@tuns.ca)
-
 0	string		PACT			Macintosh Compact Pro Archive (data)
 >2	string		x			: %s
-65	string		PACT			Macintosh Compact Pro Archive(rsrc + data)
->2	string		x			: %s
 0	string		ttro			Macintosh TeachText File (data)
 >2	string		x			: %s
-65	string		ttro			Macintosh TeachText File(rsrc + data)
->2	string		x			: %s
 0	string		TEXT			Macintosh TeachText File (data)
 >2	string		x			: %s
-65	string		TEXT			Macintosh TeachText File(rsrc + data)
->2	string		x			: %s
 0	string		PDF			Macintosh PDF File (data)
 >2	string		x			: %s
-65	string		PDF			Macintosh PDF File(rsrc + data)
->2	string		x			: %s
 
+# MacBinary format (Eric Fischer, enf@pobox.com)
+#
+# Unfortunately MacBinary doesn't really have a magic number prior
+# to the MacBinary III format.  The checksum is really the way to
+# do it, but the magic file format isn't up to the challenge.
+#
+# 0	byte		0
+# 1	byte				# filename length
+# 2	string				# filename
+# 65    string				# file type
+# 69	string				# file creator
+# 73	byte				# Finder flags
+# 74	byte		0
+# 75	beshort				# vertical posn in window
+# 77	beshort				# horiz posn in window
+# 79	beshort				# window or folder ID
+# 81    byte				# protected?
+# 82	byte		0
+# 83	belong				# length of data segment
+# 87	belong				# length of resource segment
+# 91	belong				# file creation date
+# 95	belong				# file modification date
+# 99	beshort				# length of comment after resource
+# 101	byte				# new Finder flags
+# 102	string		mBIN		# (only in MacBinary III)
+# 106	byte				# char. code of file name
+# 107	byte				# still more Finder flags
+# 116	belong				# total file length
+# 120	beshort				# length of add'l header
+# 122	byte		129		# for MacBinary II
+# 122	byte		130		# for MacBinary III
+# 123	byte		129		# minimum version that can read fmt
+# 124	beshort				# checksum
+#
+# This attempts to use the version numbers as a magic number, requiring
+# that the first one be 0x80, 0x81, 0x82, or 0x83, and that the second
+# be 0x81.  This works for the files I have, but maybe not for everyone's.
+
+122	beshort&0xFCFF	0x8081		Macintosh MacBinary data
+
+# MacBinary I doesn't have the version number field at all, but MacBinary II
+# has been in use since 1987 so I hope there aren't many really old files
+# floating around that this will miss.  The original spec calls for using
+# the nulls in 0, 74, and 82 as the magic number.
+#
+# Another possibility, that would also work for MacBinary I, is to use
+# the assumption that 65-72 will all be ASCII (0x20-0x7F), that 73 will
+# have bits 1 (changed), 2 (busy), 3 (bozo), and 6 (invisible) unset,
+# and that 74 will be 0.  So something like
+# 
+# 71 	belong&0x80804EFF 0x00000000 	Macintosh MacBinary data
+# 
+# >73	byte&0x01	0x01		\b, inited
+# >73	byte&0x02	0x02		\b, changed
+# >73	byte&0x04	0x04		\b, busy
+# >73	byte&0x08	0x08		\b, bozo
+# >73	byte&0x10	0x10		\b, system
+# >73	byte&0x10	0x20		\b, bundle
+# >73	byte&0x10	0x40		\b, invisible
+# >73	byte&0x10	0x80		\b, locked
+
+>65	string		x		\b, type "%4.4s"
+
+>65	string		8BIM		(PhotoShop)
+>65	string		ALB3		(PageMaker 3)
+>65	string		ALB4		(PageMaker 4)
+>65	string		ALT3		(PageMaker 3)
+>65	string		APPL		(application)
+>65	string		AWWP		(AppleWorks word processor)
+>65	string		CIRC		(simulated circuit)
+>65	string		DRWG		(MacDraw)
+>65	string		EPSF		(Encapsulated PostScript)
+>65	string		FFIL		(font suitcase)
+>65	string		FKEY		(function key)
+>65	string		FNDR		(Macintosh Finder)
+>65	string		GIFf		(GIF image)
+>65	string		Gzip		(GNU gzip)
+>65	string		INIT		(system extension)
+>65	string		LIB\ 		(library)
+>65	string		LWFN		(PostScript font)
+>65	string		MSBC		(Microsoft BASIC)
+>65	string		PACT		(Compact Pro archive)
+>65	string		PDF\ 		(Portable Document Format)
+>65	string		PICT		(picture)
+>65	string		PNTG		(MacPaint picture)
+>65	string		PREF		(preferences)
+>65	string		PROJ		(Think C project)
+>65	string		QPRJ		(Think Pascal project)
+>65	string		SCFL		(Defender scores)
+>65	string		SCRN		(startup screen)
+>65	string		SITD		(StuffIt Deluxe)
+>65	string		SPn3		(SuperPaint)
+>65	string		STAK		(HyperCard stack)
+>65	string		Seg\ 		(StuffIt segment)
+>65	string		TARF		(Unix tar archive)
+>65	string		TEXT		(ASCII)
+>65	string		TIFF		(TIFF image)
+>65	string		TOVF		(Eudora table of contents)
+>65	string		WDBN		(Microsoft Word word processor)
+>65	string		WORD		(MacWrite word processor)
+>65	string		XLS\ 		(Microsoft Excel)
+>65	string		ZIVM		(compress (.Z))
+>65	string		ZSYS		(Pre-System 7 system file)
+>65	string		acf3		(Aldus FreeHand)
+>65	string		cdev		(control panel)
+>65	string		dfil		(Desk Acessory suitcase)
+>65	string		libr		(library)
+>65	string		nX^d		(WriteNow word processor)
+>65	string		nX^w		(WriteNow dictionary)
+>65	string		rsrc		(resource)
+>65	string		scbk		(Scrapbook)
+>65	string		shlb		(shared library)
+>65	string		ttro		(SimpleText read-only)
+>65	string		zsys		(system file)
+
+>69	string		x		\b, creator "%4.4s"
+
+# Somewhere, Apple has a repository of registered Creator IDs.  These are
+# just the ones that I happened to have files from and was able to identify.
+
+>69	string		8BIM		(Adobe Photoshop)
+>69	string		ALD3		(PageMaker 3)
+>69	string		ALD4		(PageMaker 4)
+>69	string		ALFA		(Alpha editor)
+>69	string		APLS		(Apple Scanner)
+>69	string		APSC		(Apple Scanner)
+>69	string		BRKL		(Brickles)
+>69	string		BTFT		(BitFont)
+>69	string		CCL2 		(Common Lisp 2)
+>69	string		CCL\ 		(Common Lisp)
+>69	string		CDmo		(The Talking Moose)
+>69	string		CPCT		(Compact Pro)
+>69	string		CSOm		(Eudora)
+>69	string		DMOV		(Font/DA Mover)
+>69	string		DSIM		(DigSim)
+>69	string		EDIT		(Macintosh Edit)
+>69	string		ERIK		(Macintosh Finder)
+>69	string		EXTR		(self-extracting archive)
+>69	string		Gzip		(GNU gzip)
+>69	string		KAHL		(Think C)
+>69	string		LWFU		(LaserWriter Utility)
+>69	string		LZIV		(compress)
+>69	string		MACA		(MacWrite)
+>69	string		MACS		(Macintosh operating system)
+>69	string		MAcK		(MacKnowledge terminal emulator)
+>69	string		MLND		(Defender)
+>69	string		MPNT		(MacPaint)
+>69	string		MSBB		(Microsoft BASIC (binary))
+>69	string		MSWD		(Microsoft Word)
+>69	string		NCSA		(NCSA Telnet)
+>69	string		PJMM		(Think Pascal)
+>69	string		PSAL		(Hunt the Wumpus)
+>69	string		PSI2		(Apple File Exchange)
+>69	string		R*ch		(BBEdit)
+>69	string		RMKR		(Resource Maker)
+>69	string		RSED		(Resource Editor)
+>69	string		Rich		(BBEdit)
+>69	string		SIT!		(StuffIt)
+>69	string		SPNT		(SuperPaint)
+>69	string		Unix		(NeXT Mac filesystem)
+>69	string		VIM!		(Vim editor)
+>69	string		WILD		(HyperCard)
+>69	string		XCEL		(Microsoft Excel)
+>69	string		aCa2		(Fontographer)
+>69	string		aca3		(Aldus FreeHand)
+>69	string		dosa		(Macintosh MS-DOS file system)
+>69	string		movr		(Font/DA Mover)
+>69	string		nX^n		(WriteNow)
+>69	string		pdos		(Apple ProDOS file system)
+>69	string		scbk		(Scrapbook)
+>69	string		ttxt		(SimpleText)
+>69	string		ufox		(Foreign File Access)
+
+# Just in case...
+
+102	string		mBIN		MacBinary III data with surprising version number
+
+# sas magic from Bruce Foster (bef@nwu.edu)
+#
+#0	string		SAS		SAS
+#>8	string		x		%s
+0	string		SAS		SAS
+>24	string		DATA		data file
+>24	string		CATALOG		catalog
+>24	string		INDEX		data file index
+>24	string		VIEW		data view
+# spss magic for SPSS system and portable files, 
+#	 from Bruce Foster (bef@nwu.edu).
+
+0	long		0xc1e2c3c9	SPSS Portable File
+>40	string 		x		%s
+
+0	string		$FL2		SPSS System File
+>24	string		x		%s
+
+# Macintosh filesystem data
+# From "Tom N Harris" <telliamed@mac.com>
+# The MacOS epoch begins on 1 Jan 1904 instead of 1 Jan 1970, so these
+# entries depend on the data arithmetic added after v.35
+# There's also some Pascal strings in here, ditto...
+
+# The boot block signature, according to IM:Files, is 
+# "for HFS volumes, this field always contains the value 0x4C4B."
+# But if this is true for MFS or HFS+ volumes, I don't know.
+# Alternatively, the boot block is supposed to be zeroed if it's
+# unused, so a simply >0 should suffice.
+
+0x400	beshort			0xD2D7		Macintosh MFS data
+>0	beshort			0x4C4B		(bootable)
+>0x40a	beshort			&0x8000		(locked)
+>0x402	beldate-0x7C25B080	x		created: %s,
+>0x406	beldate-0x7C25B080	>0		last backup: %s,
+>0x414	belong			x		block size: %d,
+>0x412	beshort			x		number of blocks: %d,
+>0x424	pstring			x		volume name: %s
+
+0x400	beshort			0x4244		Macintosh HFS data
+>0	beshort			0x4C4B		(bootable)
+>0x40a	beshort			&0x8000		(locked)
+>0x40a	beshort			^0x0100		(mounted)
+>0x40a	beshort			&0x0800		(unclean)
+>0x402	beldate-0x7C25B080	x		created: %s,
+>0x406	beldate-0x7C25B080	x		last modified: %s,
+>0x440	beldate-0x7C25B080	>0		last backup: %s,
+>0x414	belong			x		block size: %d,
+>0x412	beshort			x		number of blocks: %d,
+>0x424	pstring			x		volume name: %s
+#>0x480	beshort			=0x482B		Embedded HFS+ Volume:
+#>>((0x482*(0x414))+(0x41c*512))	x	\b
+# Well, this is (theoretically) how we could do this. But it occurs to
+# me that we likely don't read in a large enough chunk. I don't have any
+# HFS+ volumes to see what a typical offset would be.
+
+0x400	beshort			0x482B		Macintosh HFS Extended
+>&2	beshort			x		version %d data
+>0	beshort			0x4C4B		(bootable)
+>&4	belong			^0x00000100	(mounted)
+>&4	belong			&0x00000800	(unclean)
+>&4	belong			&0x00008000	(locked)
+>&8	string			x		last mounted by: '%.4s',
+# really, that should be treated as a belong and we print a string
+# based on the value. TN1150 only mentions '8.10' for "MacOS 8.1"
+>&16	beldate-0x7C25B080	x		created: %s,
+>&20	beldate-0x7C25B080	x		last modified: %s,
+>&24	beldate-0x7C25B080	>0		last backup: %s,
+>&28	beldate-0x7C25B080	>0		last checked: %s,
+>&40	belong			x		block size: %d,
+>&44	belong			x		number of blocks: %d,
+>&48	belong			x		free blocks: %d
+
+# I don't think this is really necessary since it doesn't do much and 
+# anything with a valid driver descriptor will also have a valid
+# partition map
+#0		beshort		0x4552		Apple Device Driver data
+#>&24		beshort		=1		\b, MacOS
+
+# Is that the partition type a cstring or a pstring? Well, IM says "strings 
+# shorter than 32 bytes must be terminated with NULL" so I'll treat it as a 
+# cstring. Of course, partitions can contain more than four entries, but 
+# what're you gonna do?
+0x200		beshort		0x504D		Apple Partition data
+>&2		beshort		x		block size: %d
+>&48		string		x		first type: %s,
+>&12		belong		x		number of blocks: %d,
+>(&0x2.S)	beshort		0x504D		
+>>&48		string		x		second type: %s
+>>&12		belong		x		number of blocks: %d,
+>>(&0x2.S)	beshort		0x504D		
+>>>&48		string		x		third type: %s
+>>>&12		belong		x		number of blocks: %d,
+>>>(&0x2.S)	beshort		0x504D		
+>>>>&48		string		x		fourth type: %s
+>>>>&12		belong		x		number of blocks: %d,
+# AFAIK, only the signature is different
+0x200		beshort		0x5453		Apple Old Partition data
+>&2		beshort		x		block size: %d
+>&48		string		x		first type: %s,
+>&12		belong		x		number of blocks: %d,
+>(&0x2.S)	beshort		0x504D		
+>>&48		string		x		second type: %s
+>>&12		belong		x		number of blocks: %d,
+>>(&0x2.S)	beshort		0x504D		
+>>>&48		string		x		third type: %s
+>>>&12		belong		x		number of blocks: %d,
+>>>(&0x2.S)	beshort		0x504D		
+>>>>&48		string		x		fourth type: %s
+>>>>&12		belong		x		number of blocks: %d,

+ 4 - 0
Magdir/magic

@@ -3,3 +3,7 @@
 # magic:  file(1) magic for magic files
 #
 0	string		#\ Magic	magic text file for file(1) cmd
+0	lelong		0xF11E041C	magic binary file for file(1) cmd
+>4	lelong		x		(version %d) (little endian)
+0	belong		0xF11E041C	magic binary file for file(1) cmd
+>4	belong		x		(version %d) (big endian)

+ 7 - 0
Magdir/mail.news

@@ -19,3 +19,10 @@
 0	string		Received:	RFC 822 mail text
 0	string		MIME-Version:	MIME entity text
 #0	string		Content-	MIME entity text
+
+# TNEF files...
+0	lelong		0x223E9F78	Transport Neutral Encapsulation Format
+
+# From: Kevin Sullivan <ksulliva@psc.edu>
+0	string		*mbx*		MBX mail folder
+

+ 57 - 0
Magdir/maple

@@ -0,0 +1,57 @@
+
+#------------------------------------------------------------------------------
+# maple:  file(1) magic for maple files
+# "H. Nanosecond" <aldomel@ix.netcom.com>
+# Maple V release 4, a multi-purpose math program
+#
+
+# maple library .lib
+0	string	\000MVR4\nI	MapleVr4 library
+
+# .ind
+# no magic for these :-(
+# they are compiled indexes for maple files
+
+# .hdb 
+0	string	\000\004\000\000	Maple help database
+
+# .mhp
+# this has the form <PACKAGE=name>
+0	string	\<PACKAGE=	Maple help file
+0	string	\<HELP\ NAME=	Maple help file
+0	string	\n\<HELP\ NAME=	Maple help file with extra carriage return at start (yuck)
+0	string	#\ Newton	Maple help file, old style
+0	string	#\ daub	Maple help file, old style
+0	string	#===========	Maple help file, old style
+
+# .mws
+0	string	\000\000\001\044\000\221	Maple worksheet
+#this is anomalous
+0	string	WriteNow\000\002\000\001\000\000\000\000\100\000\000\000\000\000	Maple worksheet, but weird
+# this has the form {VERSION 2 3 "IBM INTEL NT" "2.3" }\n
+# that is {VERSION major_version miunor_version computer_type version_string}
+0	string	{VERSION\ 	Maple worksheet
+>9	string	>\0	version %.1s.
+>>10	string
+>>>11	string	>\0	%.1s
+
+# .mps
+0	string	\0\0\001$	Maple something
+# from byte 4 it is either 'nul E' or 'soh R'
+# I think 'nul E' means a file that was saved as  a different name
+# a sort of revision marking
+# 'soh R' means new 
+>4	string	\000\105	An old revision
+>4	string	\001\122	The latest save
+
+# .mpl
+# some of these are the same as .mps above
+#0000000 000 000 001 044 000 105 same as .mps
+#0000000 000 000 001 044 001 122 same as .mps
+
+0	string	#\n##\ <SHAREFILE=	Maple something
+0	string	\n#\n##\ <SHAREFILE=	Maple something
+0	string	##\ <SHAREFILE=	Maple something
+0	string	#\r##\ <SHAREFILE=	Maple something
+0	string	\r#\r##\ <SHAREFILE=	Maple something
+0	string	#\ \r##\ <DESCRIBE>	Maple something anomalous.

+ 59 - 0
Magdir/mathematica

@@ -0,0 +1,59 @@
+
+#------------------------------------------------------------------------------
+# mathematica:  file(1) magic for mathematica files
+# "H. Nanosecond" <aldomel@ix.netcom.com>
+# Mathematica a multi-purpose math program
+# versions 2.2 and 3.0
+
+#mathematica .mb
+0	string	\064\024\012\000\035\000\000\000	Mathematica version 2 notebook
+0	string	\064\024\011\000\035\000\000\000	Mathematica version 2 notebook
+
+# .ma
+# multiple possibilites:
+
+0	string	(*^\n\n::[\011frontEndVersion\ =\ 	Mathematica notebook
+#>41	string	>\0	%s
+
+#0	string	(*^\n\n::[\011palette	Mathematica notebook version 2.x
+
+#0	string	(*^\n\n::[\011Information	Mathematica notebook version 2.x
+#>675	string	>\0	%s #doesn't work well
+
+# there may be 'cr' instread of 'nl' in some does this matter?
+
+# generic:
+0	string	(*^\r\r::[\011	Mathematica notebook version 2.x
+0	string	\(\*\^\r\n\r\n\:\:\[\011	Mathematica notebook version 2.x
+0	string	(*^\015			Mathematica notebook version 2.x
+0	string	(*^\n\r\n\r::[\011	Mathematica notebook version 2.x
+0	string	(*^\r::[\011	Mathematica notebook version 2.x
+0	string	(*^\r\n::[\011	Mathematica notebook version 2.x
+0	string	(*^\n\n::[\011	Mathematica notebook version 2.x
+0	string	(*^\n::[\011	Mathematica notebook version 2.x
+
+
+# Mathematica .mx files
+
+#0	string	(*This\ is\ a\ Mathematica\ binary\ dump\ file.\ It\ can\ be\ loaded\ with\ Get.*)	Mathematica binary file
+0	string	(*This\ is\ a\ Mathematica\ binary\ 	Mathematica binary file
+#>71	string \000\010\010\010\010\000\000\000\000\000\000\010\100\010\000\000\000	
+# >71... is optional
+>88	string	>\0	from %s
+
+
+# Mathematica files PBF:
+# 115 115 101 120 102 106 000 001 000 000 000 203 000 001 000
+0	string	MMAPBF\000\001\000\000\000\203\000\001\000	Mathematica PBF (fonts I think)
+
+# .ml files  These are menu resources I think
+# these start with "[0-9][0-9][0-9]\ A~[0-9][0-9][0-9]\ 
+# how to put that into a magic rule?
+4	string	\ A~	MAthematica .ml file
+
+# .nb files
+#too long 0	string	(***********************************************************************\n\n\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Mathematica-Compatible Notebook	Mathematica 3.0 notebook
+0	string	(***********************	Mathematica 3.0 notebook
+
+# other (* matches it is a comment start in these langs
+0	string	(*	Mathematica, or Pascal,  Modula-2 or 3 code

+ 21 - 18
Magdir/sgi

@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# sgi:  file(1) magic for Silicon Graphics (MIPS, IRIS, IRIX, etc.)
+# mips:  file(1) magic for Silicon Graphics (MIPS, IRIS, IRIX, etc.)
 #                         Dec Ultrix (MIPS)
 # all of SGI's *current* machines and OSes run in big-endian mode on the
 # MIPS machines, as far as I know.
@@ -27,7 +27,7 @@
 0	belong	0xdeadbabe	IRIX Parallel Arena
 >8	belong	>0		- version %ld
 #
-0	beshort	0x0160		MIPSEB COFF executable
+0	beshort	0x0160		MIPSEB ECOFF executable
 >20	beshort	0407		(impure)
 >20	beshort	0410		(swapped)
 >20	beshort	0413		(paged)
@@ -36,7 +36,7 @@
 >22	byte	x		- version %ld
 >23	byte	x		.%ld
 #
-0	beshort	0x0162		MIPSEL COFF executable
+0	beshort	0x0162		MIPSEL-BE ECOFF executable
 >20	beshort	0407		(impure)
 >20	beshort	0410		(swapped)
 >20	beshort	0413		(paged)
@@ -45,7 +45,7 @@
 >23	byte	x		- version %d
 >22	byte	x		.%ld
 #
-0	beshort	0x6001		MIPSEB-LE COFF executable
+0	beshort	0x6001		MIPSEB-LE ECOFF executable
 >20	beshort	03401		(impure)
 >20	beshort	04001		(swapped)
 >20	beshort	05401		(paged)
@@ -54,7 +54,7 @@
 >23	byte	x		- version %d
 >22	byte	x		.%ld
 #
-0	beshort	0x6201		MIPSEL-LE COFF executable
+0	beshort	0x6201		MIPSEL ECOFF executable
 >20	beshort	03401		(impure)
 >20	beshort	04001		(swapped)
 >20	beshort	05401		(paged)
@@ -65,7 +65,7 @@
 #
 # MIPS 2 additions
 #
-0	beshort	0x0163		MIPSEB MIPS-II COFF executable
+0	beshort	0x0163		MIPSEB MIPS-II ECOFF executable
 >20	beshort	0407		(impure)
 >20	beshort	0410		(swapped)
 >20	beshort	0413		(paged)
@@ -74,7 +74,7 @@
 >22	byte	x		- version %ld
 >23	byte	x		.%ld
 #
-0	beshort	0x0166		MIPSEL MIPS-II COFF executable
+0	beshort	0x0166		MIPSEL-BE MIPS-II ECOFF executable
 >20	beshort	0407		(impure)
 >20	beshort	0410		(swapped)
 >20	beshort	0413		(paged)
@@ -83,7 +83,7 @@
 >22	byte	x		- version %ld
 >23	byte	x		.%ld
 #
-0	beshort	0x6301		MIPSEB-LE MIPS-II COFF executable
+0	beshort	0x6301		MIPSEB-LE MIPS-II ECOFF executable
 >20	beshort	03401		(impure)
 >20	beshort	04001		(swapped)
 >20	beshort	05401		(paged)
@@ -92,7 +92,7 @@
 >23	byte	x		- version %ld
 >22	byte	x		.%ld
 #
-0	beshort	0x6601		MIPSEL-LE MIPS-II COFF executable
+0	beshort	0x6601		MIPSEL MIPS-II ECOFF executable
 >20	beshort	03401		(impure)
 >20	beshort	04001		(swapped)
 >20	beshort	05401		(paged)
@@ -103,7 +103,7 @@
 #
 # MIPS 3 additions
 #
-0	beshort	0x0140		MIPSEB MIPS-III COFF executable
+0	beshort	0x0140		MIPSEB MIPS-III ECOFF executable
 >20	beshort	0407		(impure)
 >20	beshort	0410		(swapped)
 >20	beshort	0413		(paged)
@@ -112,7 +112,7 @@
 >22	byte	x		- version %ld
 >23	byte	x		.%ld
 #
-0	beshort	0x0142		MIPSEL MIPS-III COFF executable
+0	beshort	0x0142		MIPSEL-BE MIPS-III ECOFF executable
 >20	beshort	0407		(impure)
 >20	beshort	0410		(swapped)
 >20	beshort	0413		(paged)
@@ -121,7 +121,7 @@
 >22	byte	x		- version %ld
 >23	byte	x		.%ld
 #
-0	beshort	0x4001		MIPSEB-LE MIPS-III COFF executable
+0	beshort	0x4001		MIPSEB-LE MIPS-III ECOFF executable
 >20	beshort	03401		(impure)
 >20	beshort	04001		(swapped)
 >20	beshort	05401		(paged)
@@ -130,7 +130,7 @@
 >23	byte	x		- version %ld
 >22	byte	x		.%ld
 #
-0	beshort	0x4201		MIPSEL-LE MIPS-III COFF executable
+0	beshort	0x4201		MIPSEL MIPS-III ECOFF executable
 >20	beshort	03401		(impure)
 >20	beshort	04001		(swapped)
 >20	beshort	05401		(paged)
@@ -140,7 +140,7 @@
 >22	byte	x		.%ld
 #
 0	beshort	0x180		MIPSEB Ucode
-0	beshort	0x182		MIPSEL Ucode
+0	beshort	0x182		MIPSEL-BE Ucode
 # 32bit core file
 0	belong	0xdeadadb0	IRIX core dump
 >4	belong	1		of
@@ -167,8 +167,11 @@
 #
 0	string	\#Inventor V	IRIS Inventor 1.0 file
 0	string	\#Inventor V2	Open Inventor 2.0 file
-# XXX - I don't know what next thing is!  It is likely to be an image
-# (or movie) format
+# GLF is OpenGL stream encoding
 0	string	glfHeadMagic();		GLF_TEXT
-4	belong	0x41010000		GLF_BINARY_LSB_FIRST
-4	belong	0x00000141		GLF_BINARY_MSB_FIRST
+4	belong	0x7d000000		GLF_BINARY_LSB_FIRST
+4	belong	0x0000007d		GLF_BINARY_MSB_FIRST
+# GLS is OpenGL stream encoding; GLS is the successor of GLF
+0	string	glsBeginGLS(		GLS_TEXT
+4	belong	0x10000000		GLS_BINARY_LSB_FIRST
+4	belong	0x00000010		GLS_BINARY_MSB_FIRST

+ 18 - 0
Magdir/motorola

@@ -30,3 +30,21 @@
 #
 # Motorola S-Records, from Gerd Truschinski <gt@freebsd.first.gmd.de>
 0   string      S0          Motorola S-Record; binary data in text format
+
+# ATARI ST relocatable PRG
+#
+# from Oskar Schirmer <schirmer@scara.com> Feb 3, 2001
+# (according to Roland Waldi, Oct 21, 1987)
+# besides the magic 0x601a, the text segment size is checked to be
+# not larger than 1 MB (which is a lot on ST).
+# The additional 0x601b distinction I took from Doug Lee's magic.
+0	belong&0xFFFFFFF0	0x601A0000	Atari ST M68K contiguous executable
+>2	belong			x		(txt=%ld,
+>6	belong			x		dat=%ld,
+>10	belong			x		bss=%ld,
+>14	belong			x		sym=%ld)
+0	belong&0xFFFFFFF0	0x601B0000	Atari ST M68K non-contig executable
+>2	belong			x		(txt=%ld,
+>6	belong			x		dat=%ld,
+>10	belong			x		bss=%ld,
+>14	belong			x		sym=%ld)

+ 127 - 4
Magdir/msdos

@@ -175,12 +175,135 @@
 >4	belong	0x06040600	wk1 document data
 >4	belong	0x06800200	fmt document data
 
-# WordPerfect documents - Trevor Johnson <trevor@jpj.net>
-#
-1	string	WPC		WordPerfect document
-
 # Help files
 0	string	?_\3\0		MS Windows Help Data
 
 # Microsoft CAB distribution format  Dale Worley <root@dworley.ny.mediaone.net>
 0	string		MSCF\000\000\000\000	Microsoft CAB file
+
+#  DeIsL1.isu what this is I don't know
+0	string	\161\250\000\000\001\002	DeIsL1.isu whatever that is
+
+# Winamp .avs
+#0	string	Nullsoft\ AVS\ Preset\ \060\056\061\032	A plug in for Winamp ms-windows Freeware media player
+0	string	Nullsoft\ AVS\ Preset\ 	Winamp plug in
+
+# Hyper terminal:
+0	string	HyperTerminal\ 	hyperterm
+>15	string	1.0\ --\ HyperTerminal\ data\ file	MS-windows Hyperterminal
+
+# Windows Metafont .WMF
+0	string	\327\315\306\232\000\000\000\000\000\000	ms-windows metafont .wmf
+
+#tz3 files whatever that is (MS Works files)
+0	string	\003\001\001\004\070\001\000\000	tz3 ms-works file
+0	string	\003\002\001\004\070\001\000\000	tz3 ms-works file
+0	string	\003\003\001\004\070\001\000\000	tz3 ms-works file
+
+# PGP sig files .sig
+#0 string \211\000\077\003\005\000\063\237\127 065 to  \027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\065\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\066\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\067\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\070\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\071\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\225\003\005\000\062\122\207\304\100\345\042 PGP sig
+
+# windows zips files .dmf
+0	string	MDIF\032\000\010\000\000\000\372\046\100\175\001\000\001\036\001\000 Ms-windows special zipped file
+
+
+# Windows help file FTG FTS
+0	string	\164\146\115\122\012\000\000\000\001\000\000\000	ms-windows help cache
+
+# grp old windows 3.1 group files
+0 string  \120\115\103\103	Ms-windows 3.1 group files
+
+
+# lnk files windows symlinks
+0	string	\114\000\000\000\001\024\002\000\000\000\000\000\300\000\000\000\000\000\000\106	ms-Windows shortcut
+
+#ico files
+0	string	\102\101\050\000\000\000\056\000\000\000\000\000\000\000	Icon for ms-windows
+
+# Windows icons (Ian Springer <ips@fpk.hp.com>)
+0	string	\000\000\001\000	ms-windows icon resource
+>4	byte	1			- 1 icon
+>4	byte	>1			- %d icons
+>>6	byte	>0			\b, %dx
+>>>7	byte	>0			\b%d
+>>8	byte	0			\b, 256-colors
+>>8	byte	>0			\b, %d-colors
+
+
+# True Type fonts currently misidentified as raw G3 data
+
+0	string	\000\001\000\000\000 MS-Windows true type font .ttf
+
+
+# .chr files
+0	string	PK\010\010BGI	Borland font 
+>4	string	>\0	%s
+# then there is a copyright notice
+
+
+# .bgi files
+0	string	pk\010\010BGI	Borland device 
+>4	string	>\0	%s
+# then there is a copyright notice
+
+
+# recycled/info the windows trash bin index
+9	string	\000\000\000\030\001\000\000\000 ms-windows recycled bin info
+
+
+##### put in Either Magic/font or Magic/news
+# Acroread or something  files wrongly identified as G3  .pfm
+# these have the form \000 \001 any? \002 \000 \000
+# or \000 \001 any? \022 \000 \000
+0	string  \000\001 pfm?
+>3	string  \022\000\000Copyright\  yes
+>3	string  \002\000\000Copyright\  yes
+#>3	string  >\0     oops, not a font file. Cancel that.
+#it clashes with ttf files so put it lower down.
+
+# From Doug Lee via a FreeBSD pr
+9	string		GERBILDOC	First Choice document
+9	string		GERBILDB	First Choice database
+9	string		GERBILCLIP	First Choice database
+0	string		GERBIL		First Choice device file
+9	string		RABBITGRAPH	RabbitGraph file
+0	string		DCU1		Borland Delphi .DCU file
+0	string		!<spell>	MKS Spell hash list (old format)
+0	string		!<spell2>	MKS Spell hash list
+0	string		AH		Halo(TM) bitmapped font file
+0	lelong		0x08086b70	TurboC BGI file
+0	lelong		0x08084b50	TurboC Font file
+
+# WARNING: below line conflicts with Infocom game data Z-machine 3
+0	byte		0x03		DBase 3 data file
+>0x04	lelong		0		(no records)
+>0x04	lelong		>0		(%ld records)
+0	byte		0x83		DBase 3 data file with memo(s)
+>0x04	lelong		0		(no records)
+>0x04	lelong		>0		(%ld records)
+0	leshort		0x0006		DBase 3 index file
+0	string		PMCC		Windows 3.x .GRP file
+1	string		RDC-meg		MegaDots 
+>8	byte		>0x2F		version %c
+>9	byte		>0x2F		\b.%c file
+0	lelong		0x4C
+>4	lelong		0x00021401	Windows shortcut file
+
+# DOS EPS Binary File Header
+# From: Ed Sznyter <ews@Black.Market.NET>
+0	belong		0xC5D0D3C6	DOS EPS Binary File
+>4	long		>0		Postscript starts at byte %d
+>>8	long		>0		length %d
+>>>12	long		>0		Metafile starts at byte %d
+>>>>16	long		>0		length %d
+>>>20	long		>0		TIFF starts at byte %d
+>>>>24	long		>0		length %d
+
+# TNEF magic From "Joomy" <joomy@se-ed.net> 
+0	leshort		0x223e9f78	TNEF

+ 44 - 0
Magdir/msvc

@@ -0,0 +1,44 @@
+
+#------------------------------------------------------------------------------
+# msvc:  file(1) magic for msvc
+# "H. Nanosecond" <aldomel@ix.netcom.com>
+# Microsoft visual C
+# 
+# I have version 1.0
+
+# .aps
+0	string	HWB\000\377\001\000\000\000	Microsoft Visual C .APS file
+
+# .ide
+#too long 0	string	\102\157\162\154\141\156\144\040\103\053\053\040\120\162\157\152\145\143\164\040\106\151\154\145\012\000\032\000\002\000\262\000\272\276\372\316	MSVC .ide
+0	string	\102\157\162\154\141\156\144\040\103\053\053\040\120\162\157	MSVC .ide
+
+# .res
+0	string	\000\000\000\000\040\000\000\000\377	MSVC .res
+0	string	\377\003\000\377\001\000\020\020\350	MSVC .res
+0	string	\377\003\000\377\001\000\060\020\350	MSVC .res
+
+#.lib
+0	string	\360\015\000\000	Microsoft Visual C library
+0	string	\360\075\000\000	Microsoft Visual C library
+0	string	\360\175\000\000	Microsoft Visual C library
+
+#.pch
+0	string	DTJPCH0\000\022\103\006\200	Microsoft Visual C .pch
+
+# .pdb
+# too long 0	string	Microsoft\ C/C++\ program\ database\ 
+0	string	Microsoft\ C/C++\ 	MSVC program database
+>18	string	program\ database\ 	
+>33	string	>\0	ver %s
+
+#.sbr
+0	string	\000\002\000\007\000	MSVC .sbr
+>5	string 	>\0	%s
+
+#.bsc
+0	string	\002\000\002\001	MSVC .bsc
+
+#.wsp
+0	string	1.00\ .0000.0000\000\003	MSVC .wsp version 1.0000.0000
+# these seem to start with the version and contain menus

+ 23 - 0
Magdir/natinst

@@ -0,0 +1,23 @@
+
+#-----------------------------------------------------------------------------
+# natinst:  file(1) magic for National Instruments Code Files
+
+#
+# From <egamez@fcfm.buap.mx> Enrique Gámez-Flores
+# version 1
+# Many formats still missing, we use, for the moment LabVIEW
+# We guess VXI format file. VISA, LabWindowsCVI, BridgeVIEW, etc, are missing
+#
+0       string          RSRC            National Instruments,
+# Check if it's a LabVIEW File
+>8      string          LV              LabVIEW File,
+# Check wich kind of file is
+>>10    string          SB              Code Resource File, data
+>>10    string          IN              Virtual Instrument Program, data
+>>10    string          AR              VI Library, data
+# This is for Menu Libraries
+>8      string          LMNULBVW        Portable File Names, data
+# This is for General Resources
+>8      string          rsc             Resources File, data
+# This is for VXI Package
+0       string          VMAP            National Instruments, VXI File, data

+ 20 - 4
Magdir/netbsd

@@ -30,6 +30,7 @@
 >16	lelong			>0		not stripped
 0	belong&0377777777	041400507	NetBSD/i386 core
 >12	string			>\0		from '%s'
+>32	lelong			!0		(signal %d)
 
 0	belong&0377777777	041600413	NetBSD/m68k demand paged
 >0	byte			&0x80		
@@ -51,6 +52,7 @@
 >16	belong			>0		not stripped
 0	belong&0377777777	041600507	NetBSD/m68k core
 >12	string			>\0		from '%s'
+>32	belong			!0		(signal %d)
 
 0	belong&0377777777	042000413	NetBSD/m68k4k demand paged
 >0	byte			&0x80		
@@ -72,6 +74,7 @@
 >16	belong			>0		not stripped
 0	belong&0377777777	042000507	NetBSD/m68k4k core
 >12	string			>\0		from '%s'
+>32	belong			!0		(signal %d)
 
 0	belong&0377777777	042200413	NetBSD/ns32532 demand paged
 >0	byte			&0x80		
@@ -93,6 +96,10 @@
 >16	lelong			>0		not stripped
 0	belong&0377777777	042200507	NetBSD/ns32532 core
 >12	string			>\0		from '%s'
+>32	lelong			!0		(signal %d)
+
+0	belong&0377777777	045200507	NetBSD/powerpc core
+>12	string			>\0		from '%s'
 
 0	belong&0377777777	042400413	NetBSD/sparc demand paged
 >0	byte			&0x80		
@@ -114,6 +121,7 @@
 >16	belong			>0		not stripped
 0	belong&0377777777	042400507	NetBSD/sparc core
 >12	string			>\0		from '%s'
+>32	belong			!0		(signal %d)
 
 0	belong&0377777777	042600413	NetBSD/pmax demand paged
 >0	byte			&0x80		
@@ -135,6 +143,7 @@
 >16	lelong			>0		not stripped
 0	belong&0377777777	042600507	NetBSD/pmax core
 >12	string			>\0		from '%s'
+>32	lelong			!0		(signal %d)
 
 0	belong&0377777777	043000413	NetBSD/vax 1k demand paged
 >0	byte			&0x80		
@@ -156,6 +165,7 @@
 >16	lelong			>0		not stripped
 0	belong&0377777777	043000507	NetBSD/vax 1k core
 >12	string			>\0		from '%s'
+>32	lelong			!0		(signal %d)
 
 0	belong&0377777777	045400413	NetBSD/vax 4k demand paged
 >0	byte			&0x80		
@@ -177,6 +187,7 @@
 >16	lelong			>0		not stripped
 0	belong&0377777777	045400507	NetBSD/vax 4k core
 >12	string			>\0		from '%s'
+>32	lelong			!0		(signal %d)
 
 # NetBSD/alpha does not support (and has never supported) a.out objects,
 # so no rules are provided for them.  NetBSD/alpha ELF objects are 
@@ -186,6 +197,7 @@
 >10	leshort		0x0000			stripped
 0	belong&0377777777	043200507	NetBSD/alpha core
 >12	string			>\0		from '%s'
+>32	lelong			!0		(signal %d)
 
 0	belong&0377777777	043400413	NetBSD/mips demand paged
 >0	byte			&0x80		
@@ -207,12 +219,13 @@
 >16	belong			>0		not stripped
 0	belong&0377777777	043400507	NetBSD/mips core
 >12	string			>\0		from '%s'
+>32	belong			!0		(signal %d)
 
 0	belong&0377777777	043600413	NetBSD/arm32 demand paged
 >0	byte			&0x80
->>20	lelong			<8192		shared library
->>20	lelong			=8192		dynamically linked executable
->>20	lelong			>8192		dynamically linked executable
+>>20	lelong			<4096		shared library
+>>20	lelong			=4096		dynamically linked executable
+>>20	lelong			>4096		dynamically linked executable
 >0	byte			^0x80		executable
 >16	lelong			>0		not stripped
 0	belong&0377777777	043600410	NetBSD/arm32 pure
@@ -226,5 +239,8 @@
 >>20	lelong			!0		executable
 >>20	lelong			=0		object file
 >16	lelong			>0		not stripped
-0	belong&0377777777	043600507	NetBSD/arm32 core
+# NetBSD/arm26 has always used ELF objects, but it shares a core file
+# format with NetBSD/arm32.
+0	belong&0377777777	043600507	NetBSD/arm core
 >12	string			>\0		from '%s'
+>32	lelong			!0		(signal %d)

+ 21 - 0
Magdir/netscape

@@ -0,0 +1,21 @@
+
+#------------------------------------------------------------------------------
+# netscape:  file(1) magic for Netscape files
+# "H. Nanosecond" <aldomel@ix.netcom.com>
+# version 3 and 4 I think
+#
+
+# Netscape Address book  .nab
+0	string \000\017\102\104\000\000\000\000\000\000\001\000\000\000\000\002\000\000\000\002\000\000\004\000 Netscape Address book
+
+# .snm Caches
+0	string		#\ Netscape\ folder\ cache	Netscape folder cache
+0	string	\000\036\204\220\000	Netscape folder cache
+# .n2p 
+# Net 2 Phone 
+#0	string	123\130\071\066\061\071\071\071\060\070\061\060\061\063\060
+0	string	SX961999	Net2phone
+
+#
+#This is files ending in .art, FIXME add more rules
+0       string          JG\004\016\0\0\0\0      ART

+ 0 - 2
Magdir/os9

@@ -1,6 +1,4 @@
 #
-#	$NetBSD: os9,v 1.2 1997/01/09 20:19:04 tls Exp $
-#
 # Copyright (c) 1996 Ignatios Souvatzis. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without

+ 12 - 0
Magdir/parix

@@ -0,0 +1,12 @@
+
+#------------------------------------------------------------------------------
+#
+# Parix COFF executables
+# From: Ignatios Souvatzis <ignatios@cs.uni-bonn.de>
+#
+0	beshort&0xfff	0xACE	PARIX
+>0	byte&0xf0	0x80	T800
+>0	byte&0xf0	0x90	T9000
+>19	byte&0x02	0x02	executable
+>19	byte&0x02	0x00	object
+>19	byte&0x0c	0x00	not stripped

+ 2 - 0
Magdir/printer

@@ -84,3 +84,5 @@
 # Now magic for IMAGEN font files...
 0	string		Rast		RST-format raster font data
 >45	string		>0		face %
+# From Jukka Ukkonen
+0	string		\033[K\002\0\0\017\033(a\001\0\001\033(g	Canon Bubble Jet BJC formatted data

+ 9 - 0
Magdir/project

@@ -0,0 +1,9 @@
+
+#------------------------------------------------------------------------------
+# project:  file(1) magic for Project management
+# 
+# Magic strings for ftnchek project files. Alexander Mai
+0	string	FTNCHEK_\ P	project file for ftnchek
+>10	string	1		version 2.7
+>10	string	2		version 2.8 to 2.10
+>10	string	3		version 2.11 or later

+ 5 - 0
Magdir/python

@@ -0,0 +1,5 @@
+# often the module starts with a multiline string
+0	string		"""	a python script text executable
+# MAGIC as specified in Python/import.c (1.5.2/1.6)
+# 20121  ( YEAR - 1995 ) + MONTH  + DAY (little endian followed by "\r\n"
+0	belong		0x994e0d0a	python compiled

+ 11 - 0
Magdir/riff

@@ -39,6 +39,15 @@
 >8	string		WAVE		\b, WAVE audio
 >>20	leshort		1		\b, Microsoft PCM
 >>>34	leshort		>0		\b, %d bit
+>>20	leshort		2		\b, Microsoft ADPCM
+>>20	leshort		6		\b, ITU G.711 a-law
+>>20	leshort		7		\b, ITU G.711 u-law
+>>20	leshort		17		\b, IMA ADPCM
+>>20	leshort		20		\b, ITU G.723 ADPCM (Yamaha)
+>>20	leshort		49		\b, GSM 6.10
+>>20	leshort		64		\b, ITU G.721 ADPCM
+>>20	leshort		80		\b, MPEG
+>>20	leshort		85		\b, MPEG Layer 3
 >>22	leshort		=1		\b, mono
 >>22	leshort		=2		\b, stereo
 >>22	leshort		>2		\b, %d channels
@@ -91,3 +100,5 @@
 # Notation Interchange File Format (big-endian only)
 >8	string		NIFF		\b, Notation Interchange File Format
 
+# SoundFont 2 <mpruett@sgi.com>
+>8	string		sfbk		SoundFont 2

+ 10 - 18
Magdir/sgml

@@ -3,26 +3,18 @@
 # sgml:  file(1) magic for Standard Generalized Markup Language
 # HyperText Markup Language (HTML) is an SGML document type,
 # from Daniel Quinlan (quinlan@yggdrasil.com)
-0   string      \<!DOCTYPE\ HTML    HTML document text
-0   string      \<!doctype\ html    HTML document text
-0   string      \<HEAD      HTML document text
-0   string      \<head      HTML document text
-0   string      \<TITLE     HTML document text
-0   string      \<title     HTML document text
-0   string      \<html      HTML document text
-0   string      \<HTML      HTML document text
+# adapted to string extenstions by Anthon van der Neut <anthon@mnt.org)
+0   string/cB	\<!doctype\ html	HTML document text
+0   string/c	\<head			HTML document text
+0   string/c	\<title			HTML document text
+0   string/c	\<html			HTML document text
 
 # Extensible markup language (XML), a subset of SGML
 # from Marc Prud'hommeaux (marc@apocalypse.org)
-0       string          \<?xml          XML document text
-0       string          \<?XML          XML document text
-0       string          \<?Xml          XML document text
-
-# SGML, mostly from rph@sq
-0   string      \<!DOCTYPE  exported SGML document text
-0   string      \<!doctype  exported SGML document text
-0   string      \<!SUBDOC   exported SGML subdocument text
-0   string      \<!subdoc   exported SGML subdocument text
-0   string      \<!--       exported SGML document text
+0   string/c	\<?xml			XML document text
 
 
+# SGML, mostly from rph@sq
+0   string/c	\<!doctype		exported SGML document text
+0   string/c	\<!subdoc		exported SGML subdocument text
+0   string	\<!--			exported SGML document text

+ 22 - 0
Magdir/sharc

@@ -0,0 +1,22 @@
+
+#------------------------------------------------------------------------
+# file(1) magic for sharc files
+#
+# SHARC DSP, MIDI SysEx and RiscOS filetype definitions added by 
+# FutureGroove Music (dsp@futuregroove.de)
+
+#------------------------------------------------------------------------
+0	string			Draw		RiscOS Drawfile
+0	string			PACK		RiscOS PackdDir archive
+
+#------------------------------------------------------------------------
+# SHARC DSP stuff (based on the FGM SHARC DSP SDK)
+
+0	string			!		Assembler source
+0	string			Analog		ADi asm listing file
+0	string			.SYSTEM		SHARC architecture file
+0	string			.system		SHARC architecture file
+
+0	leshort			0x521C		SHARC COFF binary
+>2	leshort			>1		, %hd sections
+>>12	lelong			>0		, not stripped

+ 5 - 0
Magdir/sketch

@@ -0,0 +1,5 @@
+
+#------------------------------------------------------------------------------
+# Sketch Drawings: http://sketch.sourceforge.net/ 
+# From: Edwin Mons <e@ik.nu>
+0	string	##Sketch	Sketch document text

+ 24 - 0
Magdir/smalltalk

@@ -0,0 +1,24 @@
+
+#-----------------------------------------------
+# GNU Smalltalk image, starting at version 1.6.2
+# From: catull_us@yahoo.com
+#
+0	string	GSTIm\0\0	GNU SmallTalk
+# little-endian
+>7	byte&1	=0		LE image version
+>>10	byte	x		%d.
+>>9	byte	x		\b%d.
+>>8	byte	x		\b%d
+#>>12	lelong	x		, data: %ld
+#>>16	lelong	x		, table: %ld
+#>>20	lelong	x		, memory: %ld
+# big-endian
+>7	byte&1	=1		BE image version
+>>8	byte	x		%d.
+>>9	byte	x		\b%d.
+>>10	byte	x		\b%d
+#>>12	belong	x		, data: %ld
+#>>16	belong	x		, table: %ld
+#>>20	belong	x		, memory: %ld
+
+

+ 124 - 4
Magdir/sniffer

@@ -2,9 +2,11 @@
 #------------------------------------------------------------------------------
 # sniffer:  file(1) magic for packet capture files
 #
-# From: guy@netapp.com (Guy Harris)
+# From: guy@alum.mit.edu (Guy Harris)
 #
-# Microsoft Network Monitor capture files.
+
+#
+# Microsoft Network Monitor 1.x capture files.
 #
 0	string		RTSS		NetMon capture file
 >4	byte		x		- version %d
@@ -15,6 +17,17 @@
 >6	leshort		3		(FDDI)
 
 #
+# Microsoft Network Monitor 2.x capture files.
+#
+0	string		GMBU		NetMon capture file
+>4	byte		x		- version %d
+>5	byte		x		\b.%d
+>6	leshort		0		(Unknown)
+>6	leshort		1		(Ethernet)
+>6	leshort		2		(Token Ring)
+>6	leshort		3		(FDDI)
+
+#
 # Network General Sniffer capture files.
 # Sorry, make that "Network Associates Sniffer capture files."
 #
@@ -32,13 +45,20 @@
 >32	byte		7		(Internetwork Analyzer)
 >32	byte		9		(FDDI)
 >32	byte		10		(ATM)
+
 #
 # Cinco Networks NetXRay capture files.
 # Sorry, make that "Network General Sniffer Basic capture files."
 # Sorry, make that "Network Associates Sniffer Basic capture files."
+# Sorry, make that "Network Associates Sniffer Basic, and Windows
+# Sniffer Pro", capture files."
 #
 0	string		XCP\0		NetXRay capture file
 >4	string		>\0		- version %s
+>44	leshort		0		(Ethernet)
+>44	leshort		1		(Token Ring)
+>44	leshort		2		(FDDI)
+
 #
 # "libpcap" capture files.
 # (We call them "tcpdump capture file(s)" for now, as "tcpdump" is
@@ -54,7 +74,7 @@
 >20	belong		3		(AX.25
 >20	belong		4		(ProNET
 >20	belong		5		(CHAOS
->20	belong		6		(IEEE 802.x network
+>20	belong		6		(Token Ring
 >20	belong		7		(ARCNET
 >20	belong		8		(SLIP
 >20	belong		9		(PPP
@@ -63,6 +83,19 @@
 >20	belong		12		(raw IP
 >20	belong		13		(BSD/OS SLIP
 >20	belong		14		(BSD/OS PPP
+>20	belong		50		(PPP or Cisco HDLC
+>20	belong		51		(PPP-over-Ethernet
+>20	belong		100		(RFC 1483 ATM
+>20	belong		101		(raw IP
+>20	belong		102		(BSD/OS SLIP
+>20	belong		103		(BSD/OS PPP
+>20	belong		104		(BSD/OS Cisco HDLC
+>20	belong		105		(802.11
+>20	belong		106		(Linux Classical IP over ATM
+>20	belong		108		(OpenBSD loopback
+>20	belong		109		(OpenBSD IPSEC encrypted
+>20	belong		113		(Linux "cooked"
+>20	belong		114		(LocalTalk
 >16	belong		x		\b, capture length %d)
 0	ulelong		0xa1b2c3d4	tcpdump capture file (little-endian)
 >4	leshort		x		- version %d
@@ -73,7 +106,65 @@
 >20	lelong		3		(AX.25
 >20	lelong		4		(ProNET
 >20	lelong		5		(CHAOS
->20	lelong		6		(IEEE 802.x network
+>20	lelong		6		(Token Ring
+>20	lelong		7		(ARCNET
+>20	lelong		8		(SLIP
+>20	lelong		9		(PPP
+>20	lelong		10		(FDDI
+>20	lelong		11		(RFC 1483 ATM
+>20	lelong		12		(raw IP
+>20	lelong		13		(BSD/OS SLIP
+>20	lelong		14		(BSD/OS PPP
+>20	lelong		50		(PPP or Cisco HDLC
+>20	lelong		51		(PPP-over-Ethernet
+>20	lelong		100		(RFC 1483 ATM
+>20	lelong		101		(raw IP
+>20	lelong		102		(BSD/OS SLIP
+>20	lelong		103		(BSD/OS PPP
+>20	lelong		104		(BSD/OS Cisco HDLC
+>20	lelong		105		(802.11
+>20	lelong		106		(Linux Classical IP over ATM
+>20	lelong		108		(OpenBSD loopback
+>20	lelong		109		(OpenBSD IPSEC encrypted
+>20	lelong		113		(Linux "cooked"
+>20	lelong		114		(LocalTalk
+>16	lelong		x		\b, capture length %d)
+
+#
+# "libpcap"-with-Alexey-Kuznetsov's-patches capture files.
+# (We call them "tcpdump capture file(s)" for now, as "tcpdump" is
+# the main program that uses that format, but there are other programs
+# that use "libpcap", or that use the same capture file format.)
+#
+0	ubelong		0xa1b2cd34	extended tcpdump capture file (big-endian)
+>4	beshort		x		- version %d
+>6	beshort		x		\b.%d
+>20	belong		0		(No link-layer encapsulation
+>20	belong		1		(Ethernet
+>20	belong		2		(3Mb Ethernet
+>20	belong		3		(AX.25
+>20	belong		4		(ProNET
+>20	belong		5		(CHAOS
+>20	belong		6		(Token Ring
+>20	belong		7		(ARCNET
+>20	belong		8		(SLIP
+>20	belong		9		(PPP
+>20	belong		10		(FDDI
+>20	belong		11		(RFC 1483 ATM
+>20	belong		12		(raw IP
+>20	belong		13		(BSD/OS SLIP
+>20	belong		14		(BSD/OS PPP
+>16	belong		x		\b, capture length %d)
+0	ulelong		0xa1b2cd34	extended tcpdump capture file (little-endian)
+>4	leshort		x		- version %d
+>6	leshort		x		\b.%d
+>20	lelong		0		(No link-layer encapsulation
+>20	lelong		1		(Ethernet
+>20	lelong		2		(3Mb Ethernet
+>20	lelong		3		(AX.25
+>20	lelong		4		(ProNET
+>20	lelong		5		(CHAOS
+>20	lelong		6		(Token Ring
 >20	lelong		7		(ARCNET
 >20	lelong		8		(SLIP
 >20	lelong		9		(PPP
@@ -83,3 +174,32 @@
 >20	lelong		13		(BSD/OS SLIP
 >20	lelong		14		(BSD/OS PPP
 >16	lelong		x		\b, capture length %d)
+
+#
+# AIX "iptrace" capture files.
+#
+0	string		iptrace\ 2.0	"iptrace" capture file
+
+#
+# Novell LANalyzer capture files.
+#
+0	leshort		0x1001		LANalyzer capture file
+0	leshort		0x1007		LANalyzer capture file
+
+#
+# HP-UX "nettl" capture files.
+#
+0	string		\x54\x52\x00\x64\x00	"nettl" capture file
+
+#
+# RADCOM WAN/LAN Analyzer capture files.
+#
+0	string		\x42\xd2\x00\x34\x12\x66\x22\x88	RADCOM WAN/LAN Analyzer capture file
+
+#
+# NetStumbler log files.  Not really packets, per se, but about as
+# close as you can get.  These are log files from NetStumbler, a
+# Windows program, that scans for 802.11b networks.
+#
+0	string		NetS		NetStumbler log file
+>8	lelong		x		\b, %d stations found

+ 0 - 1
Magdir/softquad

@@ -2,7 +2,6 @@
 #------------------------------------------------------------------------------
 # softquad:  file(1) magic for SoftQuad Publishing Software
 #
-# $Id: softquad,v 1.9 1995/01/21 21:09:00 christos Exp $
 # Author/Editor and RulesBuilder
 #
 # XXX - byte order?

+ 142 - 0
Magdir/sysex

@@ -0,0 +1,142 @@
+#------------------------------------------------------------------------
+# sysex: file(1) magic for MIDI sysex files
+#
+# 
+0	byte			0xF0		SysEx File -
+
+>1	byte			0x2f		ELKA
+>>3	byte			0x09		EK-44
+
+>1	byte			0x3e		Waldorf
+>>3	byte			0x7f		Microwave I
+
+>1	byte			0x40		Kawai
+>>3	byte			0x22		K4
+
+>1	byte			0x41		Roland
+>>3	byte			0x14		D-50
+>>3	byte			0x2b		U-220
+>>3	byte			0x02		TR-707
+
+>1	byte			0x42		Korg
+>>3	byte			0x19		M1
+
+>1	byte			0x43		Yamaha
+>1	byte			0x44		Casio
+>1	byte			0x46		Kamiya
+>1	byte			0x47		Akai
+>1	byte			0x48		Victor
+>1	byte			0x4b		Fujitsu
+>1	byte			0x4c		Sony
+>1	byte			0x4e		Teac
+>1	byte			0x50		Matsushita
+>1	byte			0x51		Fostex
+>1	byte			0x52		Zoom
+>1	byte			0x54		Matsushita
+>1	byte			0x57		Acoustic tech. lab.
+
+>1	belong&0xffffff00	0x00007400	Ta Horng
+>1	belong&0xffffff00	0x00007500	e-Tek
+>1	belong&0xffffff00	0x00007600	E-Voice
+>1	belong&0xffffff00	0x00007700	Midisoft
+>1	belong&0xffffff00	0x00007800	Q-Sound
+>1	belong&0xffffff00	0x00007900	Westrex
+>1	belong&0xffffff00	0x00007a00	Nvidia*
+>1	belong&0xffffff00	0x00007b00	ESS
+>1	belong&0xffffff00	0x00007c00	Mediatrix
+>1	belong&0xffffff00	0x00007d00	Brooktree
+>1	belong&0xffffff00	0x00007e00	Otari
+>1	belong&0xffffff00	0x00007f00	Key Electronics
+>1	belong&0xffffff00	0x00010000	Shure
+>1	belong&0xffffff00	0x00010100	AuraSound
+>1	belong&0xffffff00	0x00010200	Crystal
+>1	belong&0xffffff00	0x00010300	Rockwell
+>1	belong&0xffffff00	0x00010400	Silicon Graphics
+>1	belong&0xffffff00	0x00010500	Midiman
+>1	belong&0xffffff00	0x00010600	PreSonus
+>1	belong&0xffffff00	0x00010800	Topaz
+>1	belong&0xffffff00	0x00010900	Cast Lightning
+>1	belong&0xffffff00	0x00010a00	Microsoft
+>1	belong&0xffffff00	0x00010b00	Sonic Foundry
+>1	belong&0xffffff00	0x00010c00	Line 6
+>1	belong&0xffffff00	0x00010d00	Beatnik Inc.
+>1	belong&0xffffff00	0x00010e00	Van Koerving
+>1	belong&0xffffff00	0x00010f00	Altech Systems
+>1	belong&0xffffff00	0x00011000	S & S Research
+>1	belong&0xffffff00	0x00011100	VLSI Technology
+>1	belong&0xffffff00	0x00011200	Chromatic
+>1	belong&0xffffff00	0x00011300	Sapphire
+>1	belong&0xffffff00	0x00011400	IDRC
+>1	belong&0xffffff00	0x00011500	Justonic Tuning
+>1	belong&0xffffff00	0x00011600	TorComp
+>1	belong&0xffffff00	0x00011700	Newtek Inc.
+>1	belong&0xffffff00	0x00011800	Sound Sculpture
+>1	belong&0xffffff00	0x00011900	Walker Technical
+>1	belong&0xffffff00	0x00011a00	Digital Harmony
+>1	belong&0xffffff00	0x00011b00	InVision
+>1	belong&0xffffff00	0x00011c00	T-Square
+>1	belong&0xffffff00	0x00011d00	Nemesys
+>1	belong&0xffffff00	0x00011e00	DBX
+>1	belong&0xffffff00	0x00011f00	Syndyne
+>1	belong&0xffffff00	0x00012000	Bitheadz	
+>1	belong&0xffffff00	0x00012100	Cakewalk
+>1	belong&0xffffff00	0x00012200	Staccato
+>1	belong&0xffffff00	0x00012300	National Semicon.
+>1	belong&0xffffff00	0x00012400	Boom Theory
+>1	belong&0xffffff00	0x00012500	Virtual DSP Corp
+>1	belong&0xffffff00	0x00012600	Antares
+>1	belong&0xffffff00	0x00012700	Angel Software
+>1	belong&0xffffff00	0x00012800	St Louis Music
+>1	belong&0xffffff00	0x00012900	Lyrrus dba G-VOX
+>1	belong&0xffffff00	0x00012a00	Ashley Audio
+>1	belong&0xffffff00	0x00012b00	Vari-Lite
+>1	belong&0xffffff00	0x00012c00	Summit Audio
+>1	belong&0xffffff00	0x00012d00	Aureal Semicon.
+>1	belong&0xffffff00	0x00012e00	SeaSound
+>1	belong&0xffffff00	0x00012f00	U.S. Robotics
+>1	belong&0xffffff00	0x00013000	Aurisis
+>1	belong&0xffffff00	0x00013100	Nearfield Multimedia
+>1	belong&0xffffff00	0x00013200	FM7 Inc.
+>1	belong&0xffffff00	0x00013300	Swivel Systems
+>1	belong&0xffffff00	0x00013400	Hyperactive
+>1	belong&0xffffff00	0x00013500	MidiLite
+>1	belong&0xffffff00	0x00013600	Radical
+>1	belong&0xffffff00	0x00013700	Roger Linn
+>1	belong&0xffffff00	0x00013800	Helicon
+>1	belong&0xffffff00	0x00013900	Event
+>1	belong&0xffffff00	0x00013a00	Sonic Network
+>1	belong&0xffffff00	0x00013b00	Realtime Music
+>1	belong&0xffffff00	0x00013c00	Apogee Digital
+
+>1	belong&0xffffff00	0x00202b00	Medeli Electronics
+>1	belong&0xffffff00	0x00202c00	Charlie Lab
+>1	belong&0xffffff00	0x00202d00	Blue Chip Music
+>1	belong&0xffffff00	0x00202e00	BEE OH Corp
+>1	belong&0xffffff00	0x00202f00	LG Semicon America
+>1	belong&0xffffff00	0x00203000	TESI
+>1	belong&0xffffff00	0x00203100	EMAGIC
+>1	belong&0xffffff00	0x00203200	Behringer
+>1	belong&0xffffff00	0x00203300	Access Music
+>1	belong&0xffffff00	0x00203400	Synoptic
+>1	belong&0xffffff00	0x00203500	Hanmesoft Corp
+>1	belong&0xffffff00	0x00203600	Terratec
+>1	belong&0xffffff00	0x00203700	Proel SpA
+>1	belong&0xffffff00	0x00203800	IBK MIDI
+>1	belong&0xffffff00	0x00203900	IRCAM
+>1	belong&0xffffff00	0x00203a00	Propellerhead Software
+>1	belong&0xffffff00	0x00203b00	Red Sound Systems
+>1	belong&0xffffff00	0x00203c00	Electron ESI AB
+>1	belong&0xffffff00	0x00203d00	Sintefex Audio
+>1	belong&0xffffff00	0x00203e00	Music and More
+>1	belong&0xffffff00	0x00203f00	Amsaro
+>1	belong&0xffffff00	0x00204000	CDS Advanced Technology
+>1	belong&0xffffff00	0x00204100	Touched by Sound
+>1	belong&0xffffff00	0x00204200	DSP Arts
+>1	belong&0xffffff00	0x00204300	Phil Rees Music
+>1	belong&0xffffff00	0x00204400	Stamer Musikanlagen GmbH
+>1	belong&0xffffff00	0x00204500	Soundart
+>1	belong&0xffffff00	0x00204600	C-Mexx Software
+>1	belong&0xffffff00	0x00204700	Klavis Tech.
+>1	belong&0xffffff00	0x00204800	Noteheads AB
+
+0	string			T707		Roland TR-707 Data

+ 2 - 1
Magdir/troff

@@ -9,9 +9,10 @@
 0	string		'\\"		troff or preprocessor input text
 0	string		'.\\"		troff or preprocessor input text
 0	string		\\"		troff or preprocessor input text
+0	string		'''		troff or preprocessor input text
 
 # ditroff intermediate output text
-0	string		x\ T		ditroff text
+0	string		x\ T		ditroff output text
 >4	string		cat		for the C/A/T phototypesetter
 >4	string		ps		for PostScript
 >4	string		dvi		for DVI

+ 7 - 0
Magdir/tuxedo

@@ -0,0 +1,7 @@
+#
+#------------------------------------------------------------------------------
+# tuxedo:	file(1) magic for BEA TUXEDO data files
+#
+# from Ian Springer <ispringer@hotmail.com>
+#
+0	string		\0\0\1\236\0\0\0\0\0\0\0\0\0\0\0\0	BEA TUXEDO DES mask data

+ 3 - 0
Magdir/varied.out

@@ -23,3 +23,6 @@
 # ava assembler/linker Uros Platise <uros.platise@ijs.si>
 0       string          avaobj  AVR assembler object code
 >7      string          >\0     version '%s'
+# gnu gmon magic From: Eugen Dedu <dedu@ese-metz.fr>
+0	string		gmon		GNU prof performance data
+>4	long		x		- version %ld

+ 12 - 0
Magdir/vmware

@@ -0,0 +1,12 @@
+
+# -----------------------------------------------------------
+# VMware specific files (deducted from version 1.1 and log file entries)
+# Anthon van der Neut (anthon@mnt.org)
+0	belong	0x4d52564e	VMware nvram 
+0	belong	0x434f5744	
+>8	byte	3	 	VMware virtual disk 
+>>32	lelong	x		(%d/
+>>36	lelong	x		\b%d/
+>>40	lelong	x		\b%d)
+>8	byte	2	 	VMware undoable disk
+>>32	string  >\0		(%s)

+ 65 - 0
Magdir/vorbis

@@ -0,0 +1,65 @@
+
+#------------------------------------------------------------------------------
+# vorbis:  file(1) magic for Ogg/Vorbis files
+#
+# From Felix von Leitner <leitner@fefe.de>
+# Extended by Beni Cherniavsky <cben@crosswinds.net>
+#
+# Most (everything but the number of channels and bitrate) is commented
+# out with `##' as it's not interesting to the average user.  The most
+# probable things advanced users would want to uncomment are probably
+# the number of comments and the encoder version.
+#
+# --- Ogg Framing ---
+0		string		OggS		Ogg data
+>4		byte		!0		UNKNOWN REVISION %u
+##>4		byte		0		revision 0
+>4		byte		0
+##>>14		lelong		x		(Serial %lX)
+# --- First vorbis packet - general header ---
+>>28		string		\x01vorbis	\b, Vorbis audio,
+>>>35		lelong		!0		UNKNOWN VERSION %lu,
+##>>>35		lelong		0		version 0,
+>>>35		lelong		0
+>>>>39		ubyte		1		mono,
+>>>>39		ubyte		2		stereo,
+>>>>39		ubyte		>2		%u channels,
+>>>>40		lelong		x		%lu Hz
+# Minimal, nominal and maximal bitrates specified when encoding
+>>>>48		string		<\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff	\b,
+# The above tests if at least one of these is specified:
+>>>>>44		lelong		!-1
+>>>>>>44	lelong		x		>%lu
+>>>>>48		lelong		!-1
+>>>>>>48	lelong		x		~%lu
+>>>>>52		lelong		!-1
+>>>>>>52	lelong		x		<%lu
+>>>>>48		string		<\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff	kbps
+# -- Second vorbis header packet - the comments
+>>>102		string		\x03vorbis
+# A kludge to read the vendor string.  It's a counted string, not a
+# zero-terminated one, so file(1) can't read it in a generic way.
+# libVorbis is the only one existing currently, so I detect specifically
+# it.  The interesting value is the cvs date (8 digits decimal).
+##>>>>113		string/c	Xiphophorus\ libVorbis\ I	\b, created by: Xiphophorus libVorbis I
+##>>>>>137	string		>00000000	%.8s
+# Map to beta version numbers:
+##>>>>>>137	string		<20000508	(<beta1 - prepublic)
+# The string has not changed from beta1 to 2 - they are indistinguishable.
+##>>>>>>137	string		20000508	(beta1/2)
+##>>>>>>137	string		>20000508
+##>>>>>>>137	string		<20001031	(beta2-3)
+##>>>>>>137	string		20001031	(beta3)
+##>>>>>>137	string		>20001031
+##>>>>>>>137	string		<20010225	(beta3-4)
+##>>>>>>137	string		20010225	(beta4)
+##>>>>>>137	string		>20010225
+##>>>>>>>137	string		<20010615	(beta4-RC1)
+##>>>>>>137	string		20010615	(RC1)
+##>>>>>>137	string		>20010615	(>RC1)
+# Then come the comments, again length-counted (and number-counted).
+# Some looping constructs and registers would allow reading them but now
+# it's impossible.  However we can print the number of comments present
+# (skipping by the vendor string length):
+##>>>>(109.l.113)	lelong		0		\b, no comments
+##>>>>(109.l+113)	lelong		>0		\b, %lu comments

+ 10 - 0
Magdir/xdelta

@@ -0,0 +1,10 @@
+
+#------------------------------------------------------------------------------
+# file(1) magic(5) data for xdelta  Josh MacDonald <jmacd@CS.Berkeley.EDU>
+#
+0	string	%XDELTA%	XDelta binary patch file 0.14
+0	string	%XDZ000%	XDelta binary patch file 0.18
+0	string	%XDZ001%	XDelta binary patch file 0.20
+0	string	%XDZ002%	XDelta binary patch file 1.0
+0	string	%XDZ003%	XDelta binary patch file 1.0.4
+0	string	%XDZ004%	XDelta binary patch file 1.1

+ 44 - 29
Makefile.am

@@ -3,21 +3,27 @@ AUTOMAKE_OPTIONS = foreign no-dependencies
 
 bin_PROGRAMS = file
 
-data_DATA = magic
+data_DATA = magic magic.mime magic.mgc
 
 MAGIC	 = @datadir@/magic
 CPPFLAGS = -DMAGIC='"$(MAGIC)"'
 
-man_MANS = file.1 magic.4
+if FSECT5
+man_MAGIC = magic.5
+else
+man_MAGIC = magic.4
+endif
+fsect = @fsect@
+man_MANS = file.1 $(man_MAGIC)
 
 file_SOURCES = file.c apprentice.c fsmagic.c softmagic.c ascmagic.c \
-	compress.c is_tar.c readelf.c internat.c print.c \
+	compress.c is_tar.c readelf.c print.c \
 	file.h names.h patchlevel.h readelf.h tar.h
 
 EXTRA_DIST = LEGAL.NOTICE MAINT PORTING Makefile.std magic2mime \
 	Localstuff Header $(magic_FRAGMENTS) file.man magic.man
 
-CLEANFILES = $(man_MANS) magic
+CLEANFILES = $(man_MANS) magic magic.mgc
 
 magic: Header Localstuff $(magic_FRAGMENTS)
 	cat $(srcdir)/Header $(srcdir)/Localstuff > $@
@@ -30,39 +36,48 @@ magic: Header Localstuff $(magic_FRAGMENTS)
           cat $$f; \
 	done >> $@
 
+magic.mgc: magic file
+	./file -C -m magic
+
 file.1:	Makefile file.man
 	@rm -f $@
 	sed -e s@__CSECTION__@1@g \
-	    -e s@__FSECTION__@4@g \
+	    -e s@__FSECTION__@${fsect}@g \
 	    -e s@__VERSION__@${VERSION}@g \
 	    -e s@__MAGIC__@${MAGIC}@g $(srcdir)/file.man > $@
 
-magic.4: Makefile magic.man
+magic.${fsect}: Makefile magic.man
 	@rm -f $@
 	sed -e s@__CSECTION__@1@g \
-	    -e s@__FSECTION__@4@g \
+	    -e s@__FSECTION__@${fsect}@g \
 	    -e s@__VERSION__@${VERSION}@g \
 	    -e s@__MAGIC__@${MAGIC}@g $(srcdir)/magic.man > $@
 
-magic_FRAGMENTS = Magdir/adventure Magdir/alliant Magdir/alpha Magdir/amanda \
-Magdir/amigaos Magdir/animation Magdir/apl Magdir/apple Magdir/applix \
-Magdir/archive Magdir/asterix Magdir/att3b Magdir/audio Magdir/blit \
-Magdir/bsdi Magdir/c-lang Magdir/chi Magdir/cisco Magdir/clipper \
-Magdir/commands Magdir/compress Magdir/convex Magdir/database \
-Magdir/diamond Magdir/diff Magdir/digital Magdir/dump Magdir/elf \
-Magdir/encore Magdir/filesystems Magdir/flash Magdir/fonts Magdir/frame \
-Magdir/freebsd Magdir/gimp Magdir/gnu Magdir/hp Magdir/ibm370 \
-Magdir/ibm6000 Magdir/iff Magdir/images Magdir/intel Magdir/interleaf \
-Magdir/island Magdir/ispell Magdir/java Magdir/karma Magdir/lecter \
-Magdir/lex Magdir/lif Magdir/linux Magdir/lisp Magdir/mach Magdir/macintosh \
-Magdir/magic Magdir/mail.news Magdir/mime Magdir/mirage Magdir/mkid \
-Magdir/mmdf Magdir/modem Magdir/motorola Magdir/msdos Magdir/ncr \
-Magdir/netbsd Magdir/news Magdir/octave Magdir/olf Magdir/os2 Magdir/os9 \
-Magdir/osf1 Magdir/pbm Magdir/pdf Magdir/pdp Magdir/pgp Magdir/pkgadd \
-Magdir/plus5 Magdir/printer Magdir/psdbms Magdir/pyramid Magdir/riff \
-Magdir/rpm Magdir/rtf Magdir/sc Magdir/sccs Magdir/sendmail Magdir/sequent \
-Magdir/sgi Magdir/sgml Magdir/sniffer Magdir/softquad Magdir/sun \
-Magdir/teapot Magdir/terminfo Magdir/tex Magdir/ti-8x Magdir/timezone \
-Magdir/troff Magdir/typeset Magdir/unknown Magdir/uuencode \
-Magdir/varied.out Magdir/vax Magdir/vicar Magdir/visx Magdir/vms \
-Magdir/wordperfect Magdir/xenix Magdir/zilog Magdir/zyxel
+magic_FRAGMENTS = Magdir/adi Magdir/adventure Magdir/allegro Magdir/alliant \
+    Magdir/alpha Magdir/amanda Magdir/amigaos Magdir/animation \
+    Magdir/apl Magdir/apple Magdir/applix Magdir/archive Magdir/asterix \
+    Magdir/att3b Magdir/audio Magdir/blender Magdir/blit Magdir/bsdi \
+    Magdir/c-lang Magdir/chi Magdir/cisco Magdir/claris Magdir/clipper \
+    Magdir/commands Magdir/compress Magdir/console Magdir/convex \
+    Magdir/database Magdir/diamond Magdir/diff Magdir/digital \
+    Magdir/dump Magdir/elf Magdir/encore Magdir/epoc Magdir/filesystems \
+    Magdir/flash Magdir/fonts Magdir/frame Magdir/freebsd Magdir/fsav \
+    Magdir/gimp Magdir/gnu Magdir/grace Magdir/hp Magdir/ibm370 \
+    Magdir/ibm6000 Magdir/iff Magdir/images Magdir/intel \
+    Magdir/interleaf Magdir/island Magdir/ispell Magdir/java \
+    Magdir/jpeg Magdir/karma Magdir/lecter Magdir/lex Magdir/lif \
+    Magdir/linux Magdir/lisp Magdir/mach Magdir/macintosh Magdir/magic \
+    Magdir/mail.news Magdir/maple Magdir/mathematica Magdir/mcrypt \
+    Magdir/mime Magdir/mips Magdir/mirage Magdir/mkid Magdir/mmdf Magdir/modem \
+    Magdir/motorola Magdir/msdos Magdir/msvc Magdir/ncr Magdir/netbsd \
+    Magdir/netscape Magdir/news Magdir/octave Magdir/olf Magdir/os2 \
+    Magdir/os9 Magdir/osf1 Magdir/palm Magdir/pbm Magdir/pdf Magdir/pdp \
+    Magdir/pgp Magdir/pkgadd Magdir/plus5 Magdir/printer Magdir/project \
+    Magdir/psdbms Magdir/pyramid Magdir/python Magdir/riff Magdir/rpm \
+    Magdir/rtf Magdir/sc Magdir/sccs Magdir/sendmail Magdir/sequent \
+    Magdir/sgml Magdir/sketch Magdir/sniffer Magdir/softquad Magdir/spectrum \
+    Magdir/sun Magdir/teapot Magdir/terminfo Magdir/tex Magdir/ti-8x \
+    Magdir/timezone Magdir/troff Magdir/typeset Magdir/unknown \
+    Magdir/uuencode Magdir/varied.out Magdir/vax Magdir/vicar Magdir/visx \
+    Magdir/vms Magdir/vmware Magdir/wordperfect Magdir/xdelta Magdir/xenix \
+    Magdir/zilog Magdir/zyxel

File diff suppressed because it is too large
+ 60 - 20
Makefile.in


+ 3 - 2
Makefile.std

@@ -1,6 +1,6 @@
 # Makefile for file(1) cmd. 
 # Copyright (c) Ian F. Darwin 86/09/01 - see LEGAL.NOTICE.
-# @(#)$Id: Makefile.std,v 1.6 1999/02/14 17:21:38 christos Exp $
+# @(#)$Id: Makefile.std,v 1.12 2001/09/03 14:44:21 christos Exp $
 #
 # This software is not subject to any license of the American Telephone
 # and Telegraph Company or of the Regents of the University of California.
@@ -21,7 +21,8 @@
 #    ever read sources, credits must appear in the documentation.
 #
 # 4. This notice may not be removed or altered.
-VERSION	= 3.27
+#
+VERSION	= 3.37
 SHELL	= /bin/sh
 #MAGIC	= /etc/magic
 MAGIC	= /usr/local/etc/magic

+ 9 - 2
README

@@ -1,5 +1,5 @@
 ** README for file(1) Command **
-@(#) $Id: README,v 1.22 1997/11/05 16:03:18 christos Exp $
+@(#) $Id: README,v 1.25 2001/07/23 21:30:22 christos Exp $
 
 This is Release 3.x of Ian Darwin's (copyright but distributable)
 file(1) command. This version is the standard "file" command for Linux,
@@ -80,7 +80,7 @@ file, apply this patch [which is going to be in the next version of gzip]:
       }
       bytes_in += (ulg)insize;
 
-E-mail: christos@astron.com, moraes@deshaw.com
+E-mail: christos@astron.com
 
 Phone: Do not even think of telephoning me about this program. Send cash first!
 
@@ -89,3 +89,10 @@ Cres, # 810, Toronto, Ontario CANADA M8X 2W4.  Phone: 416-239-4801 or
 800-387-2777. Email: mail@sq.com.  Call for information on SGML editing
 and browsing, Unix text processing, and customised products on Unix,
 DOS and Mac.
+
+From: Kees Zeelenberg
+
+An MS-Windows (Win32) port of File-3.36 is available from
+http://gnuwin32.sourceforge.net/
+File is an implementation of the Unix File(1) command.
+It knows the 'magic number' of several thousands of file types.

+ 8 - 2
acconfig.h

@@ -1,8 +1,8 @@
 /* Autoheader needs me */
-#define PACKAGE "file"
+#undef PACKAGE
 
 /* Autoheader needs me */
-#define VERSION "3.27"
+#undef VERSION
 
 /* Define if builtin ELF support is enabled.  */
 #undef BUILTIN_ELF
@@ -13,6 +13,12 @@
 /* Define if the `long long' type works.  */
 #undef HAVE_LONG_LONG
 
+/* Define if we have "tm_isdst" in "struct tm".  */
+#undef HAVE_TM_ISDST
+
+/* Define if we have a global "int" variable "daylight".  */
+#undef HAVE_DAYLIGHT
+
 /* Define to `unsigned char' if standard headers don't define.  */
 #undef uint8_t
 

+ 45 - 0
acinclude.m4

@@ -21,6 +21,51 @@ dnl From: eggert@twinsun.com (Paul Eggert)
 dnl Subject: autoconf 2.13 AC_CHECK_TYPE doesn't allow shell vars
 dnl Newsgroups: gnu.utils.bug
 
+dnl from autoconf 2.13 acspecific.m4, with changes to check for daylight
+
+AC_DEFUN(AC_STRUCT_TIMEZONE_DAYLIGHT,
+[AC_REQUIRE([AC_STRUCT_TM])dnl
+AC_CACHE_CHECK([for tm_zone in struct tm], ac_cv_struct_tm_zone,
+[AC_TRY_COMPILE([#include <sys/types.h>
+#include <$ac_cv_struct_tm>], [struct tm tm; tm.tm_zone;],
+  ac_cv_struct_tm_zone=yes, ac_cv_struct_tm_zone=no)])
+if test "$ac_cv_struct_tm_zone" = yes; then
+  AC_DEFINE(HAVE_TM_ZONE)
+fi
+AC_CACHE_CHECK(for tzname, ac_cv_var_tzname,
+[AC_TRY_LINK(
+changequote(<<, >>)dnl
+<<#include <time.h>
+#ifndef tzname /* For SGI.  */
+extern char *tzname[]; /* RS6000 and others reject char **tzname.  */
+#endif>>,
+changequote([, ])dnl
+[atoi(*tzname);], ac_cv_var_tzname=yes, ac_cv_var_tzname=no)])
+  if test $ac_cv_var_tzname = yes; then
+    AC_DEFINE(HAVE_TZNAME)
+  fi
+
+AC_CACHE_CHECK([for tm_isdst in struct tm], ac_cv_struct_tm_isdst,
+[AC_TRY_COMPILE([#include <sys/types.h>
+#include <$ac_cv_struct_tm>], [struct tm tm; tm.tm_isdst;],
+  ac_cv_struct_tm_isdst=yes, ac_cv_struct_tm_isdst=no)])
+if test "$ac_cv_struct_tm_isdst" = yes; then
+  AC_DEFINE(HAVE_TM_ISDST)
+fi
+AC_CACHE_CHECK(for daylight, ac_cv_var_daylight,
+[AC_TRY_LINK(
+changequote(<<, >>)dnl
+<<#include <time.h>
+#ifndef daylight /* In case IRIX #defines this, too  */
+extern int daylight;
+#endif>>,
+changequote([, ])dnl
+[atoi(daylight);], ac_cv_var_daylight=yes, ac_cv_var_daylight=no)])
+  if test $ac_cv_var_daylight = yes; then
+    AC_DEFINE(HAVE_DAYLIGHT)
+  fi
+])
+
 dnl AC_CHECK_TYPE2(TYPE, DEFAULT)
 AC_DEFUN(AC_CHECK_TYPE2,
 [AC_REQUIRE([AC_HEADER_STDC])dnl

+ 64 - 15
aclocal.m4

@@ -1,7 +1,7 @@
-dnl aclocal.m4 generated automatically by aclocal 1.3
+dnl aclocal.m4 generated automatically by aclocal 1.4
 
-dnl Copyright (C) 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
-dnl This Makefile.in is free software; the Free Software Foundation
+dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
 dnl with or without modifications, as long as this notice is preserved.
 
@@ -33,6 +33,51 @@ dnl From: eggert@twinsun.com (Paul Eggert)
 dnl Subject: autoconf 2.13 AC_CHECK_TYPE doesn't allow shell vars
 dnl Newsgroups: gnu.utils.bug
 
+dnl from autoconf 2.13 acspecific.m4, with changes to check for daylight
+
+AC_DEFUN(AC_STRUCT_TIMEZONE_DAYLIGHT,
+[AC_REQUIRE([AC_STRUCT_TM])dnl
+AC_CACHE_CHECK([for tm_zone in struct tm], ac_cv_struct_tm_zone,
+[AC_TRY_COMPILE([#include <sys/types.h>
+#include <$ac_cv_struct_tm>], [struct tm tm; tm.tm_zone;],
+  ac_cv_struct_tm_zone=yes, ac_cv_struct_tm_zone=no)])
+if test "$ac_cv_struct_tm_zone" = yes; then
+  AC_DEFINE(HAVE_TM_ZONE)
+fi
+AC_CACHE_CHECK(for tzname, ac_cv_var_tzname,
+[AC_TRY_LINK(
+changequote(<<, >>)dnl
+<<#include <time.h>
+#ifndef tzname /* For SGI.  */
+extern char *tzname[]; /* RS6000 and others reject char **tzname.  */
+#endif>>,
+changequote([, ])dnl
+[atoi(*tzname);], ac_cv_var_tzname=yes, ac_cv_var_tzname=no)])
+  if test $ac_cv_var_tzname = yes; then
+    AC_DEFINE(HAVE_TZNAME)
+  fi
+
+AC_CACHE_CHECK([for tm_isdst in struct tm], ac_cv_struct_tm_isdst,
+[AC_TRY_COMPILE([#include <sys/types.h>
+#include <$ac_cv_struct_tm>], [struct tm tm; tm.tm_isdst;],
+  ac_cv_struct_tm_isdst=yes, ac_cv_struct_tm_isdst=no)])
+if test "$ac_cv_struct_tm_isdst" = yes; then
+  AC_DEFINE(HAVE_TM_ISDST)
+fi
+AC_CACHE_CHECK(for daylight, ac_cv_var_daylight,
+[AC_TRY_LINK(
+changequote(<<, >>)dnl
+<<#include <time.h>
+#ifndef daylight /* In case IRIX #defines this, too  */
+extern int daylight;
+#endif>>,
+changequote([, ])dnl
+[atoi(daylight);], ac_cv_var_daylight=yes, ac_cv_var_daylight=no)])
+  if test $ac_cv_var_daylight = yes; then
+    AC_DEFINE(HAVE_DAYLIGHT)
+  fi
+])
+
 dnl AC_CHECK_TYPE2(TYPE, DEFAULT)
 AC_DEFUN(AC_CHECK_TYPE2,
 [AC_REQUIRE([AC_HEADER_STDC])dnl
@@ -100,7 +145,7 @@ dnl Usage:
 dnl AM_INIT_AUTOMAKE(package,version, [no-define])
 
 AC_DEFUN(AM_INIT_AUTOMAKE,
-[AC_REQUIRE([AM_PROG_INSTALL])
+[AC_REQUIRE([AC_PROG_INSTALL])
 PACKAGE=[$1]
 AC_SUBST(PACKAGE)
 VERSION=[$2]
@@ -110,8 +155,8 @@ if test "`cd $srcdir && pwd`" != "`pwd`" && test -f $srcdir/config.status; then
   AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
 fi
 ifelse([$3],,
-AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE")
-AC_DEFINE_UNQUOTED(VERSION, "$VERSION"))
+AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
+AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package]))
 AC_REQUIRE([AM_SANITY_CHECK])
 AC_REQUIRE([AC_ARG_PROGRAM])
 dnl FIXME This is truly gross.
@@ -123,15 +168,6 @@ AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir)
 AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir)
 AC_REQUIRE([AC_PROG_MAKE_SET])])
 
-
-# serial 1
-
-AC_DEFUN(AM_PROG_INSTALL,
-[AC_REQUIRE([AC_PROG_INSTALL])
-test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}'
-AC_SUBST(INSTALL_SCRIPT)dnl
-])
-
 #
 # Check to make sure that the build environment is sane.
 #
@@ -214,3 +250,16 @@ for am_file in <<$1>>; do
 done<<>>dnl>>)
 changequote([,]))])
 
+# Define a conditional.
+
+AC_DEFUN(AM_CONDITIONAL,
+[AC_SUBST($1_TRUE)
+AC_SUBST($1_FALSE)
+if $2; then
+  $1_TRUE=
+  $1_FALSE='#'
+else
+  $1_TRUE='#'
+  $1_FALSE=
+fi])
+

+ 591 - 82
apprentice.c

@@ -25,47 +25,175 @@
  * 4. This notice may not be removed or altered.
  */
 
+#include "file.h"
 #include <stdio.h>
 #include <stdlib.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 #include <string.h>
 #include <ctype.h>
 #include <errno.h>
-#include "file.h"
+#ifdef QUICK
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#endif
 
 #ifndef	lint
-FILE_RCSID("@(#)$Id: apprentice.c,v 1.29 1999/10/31 22:23:03 christos Exp $")
+FILE_RCSID("@(#)$Id: apprentice.c,v 1.44 2001/08/01 14:03:19 christos Exp $")
 #endif	/* lint */
 
 #define	EATAB {while (isascii((unsigned char) *l) && \
 		      isspace((unsigned char) *l))  ++l;}
 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
 			tolower((unsigned char) (l)) : (l))
+/*
+ * Work around a bug in headers on Digital Unix.
+ * At least confirmed for: OSF1 V4.0 878
+ */
+#if defined(__osf__) && defined(__DECC)
+#ifdef MAP_FAILED
+#undef MAP_FAILED
+#endif
+#endif
+
+#ifndef MAP_FAILED
+#define MAP_FAILED (void *) -1
+#endif
+
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
+
+#ifdef __EMX__
+  char PATHSEP=';';
+#else
+  char PATHSEP=':';
+#endif
 
 
 static int getvalue	__P((struct magic *, char **));
 static int hextoint	__P((int));
 static char *getstr	__P((char *, char *, int, int *));
-static int parse	__P((char *, int *, int));
+static int parse	__P((struct magic **, uint32 *, char *, int));
 static void eatsize	__P((char **));
+static int apprentice_1	__P((const char *, int));
+static int apprentice_file	__P((struct magic **, uint32 *,
+    const char *, int));
+static void byteswap	__P((struct magic *, uint32));
+static void bs1		__P((struct magic *));
+static uint16 swap2	__P((uint16));
+static uint32 swap4	__P((uint32));
+static char *mkdbname	__P((const char *));
+static int apprentice_map	__P((struct magic **, uint32 *,
+    const char *, int));
+static int apprentice_compile	__P((struct magic **, uint32 *,
+    const char *, int));
 
 static int maxmagic = 0;
 
-static int apprentice_1	__P((const char *, int));
+struct mlist mlist;
+
+#ifdef COMPILE_ONLY
+const char *magicfile;
+char *progname;
+int lineno;
+
+int main __P((int, char *[]));
+
+int
+main(argc, argv)
+	int argc;
+	char *argv[];
+{
+	int ret;
+
+	if ((progname = strrchr(argv[0], '/')) != NULL)
+		progname++;
+	else
+		progname = argv[0];
+
+	if (argc != 2) {
+		(void)fprintf(stderr, "usage: %s file\n", progname);
+		exit(1);
+	}
+	magicfile = argv[1];
+
+	exit(apprentice(magicfile, COMPILE));
+}
+#endif /* COMPILE_ONLY */
+
+
+/*
+ * Handle one file.
+ */
+static int
+apprentice_1(fn, action)
+	const char *fn;
+	int action;
+{
+	struct magic *magic = NULL;
+	uint32 nmagic = 0;
+	struct mlist *ml;
+	int rv = -1;
+
+	if (action == COMPILE) {
+		rv = apprentice_file(&magic, &nmagic, fn, action);
+		if (rv == 0)
+			return apprentice_compile(&magic, &nmagic, fn, action);
+		else
+			return rv;
+	}
+#ifndef COMPILE_ONLY
+	if ((rv = apprentice_map(&magic, &nmagic, fn, action)) != 0)
+		(void)fprintf(stderr, "%s: Using regular magic file `%s'\n",
+		    progname, fn);
+		
+	if (rv != 0)
+		rv = apprentice_file(&magic, &nmagic, fn, action);
+
+	if (rv != 0)
+		return rv;
+	     
+	if ((ml = malloc(sizeof(*ml))) == NULL) {
+		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
+		    strerror(errno));
+		if (action == CHECK)
+			return -1;
+	}
+
+	if (magic == NULL || nmagic == 0)
+		return rv;
+
+	ml->magic = magic;
+	ml->nmagic = nmagic;
+
+	mlist.prev->next = ml;
+	ml->prev = mlist.prev;
+	ml->next = &mlist;
+	mlist.prev = ml;
+
+	return rv;
+#endif /* COMPILE_ONLY */
+}
+
 
 int
-apprentice(fn, check)
-const char *fn;			/* list of magic files */
-int check;			/* non-zero? checking-only run. */
+apprentice(fn, action)
+	const char *fn;			/* list of magic files */
+	int action;
 {
 	char *p, *mfn;
 	int file_err, errs = -1;
 
-        maxmagic = MAXMAGIS;
-	magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
+	mlist.next = mlist.prev = &mlist;
 	mfn = malloc(strlen(fn)+1);
-	if (magic == NULL || mfn == NULL) {
-		(void) fprintf(stderr, "%s: Out of memory.\n", progname);
-		if (check)
+	if (mfn == NULL) {
+		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
+		    strerror(errno));
+		if (action == CHECK)
 			return -1;
 		else
 			exit(1);
@@ -73,28 +201,33 @@ int check;			/* non-zero? checking-only run. */
 	fn = strcpy(mfn, fn);
   
 	while (fn) {
-		p = strchr(fn, ':');
+		p = strchr(fn, PATHSEP);
 		if (p)
 			*p++ = '\0';
-		file_err = apprentice_1(fn, check);
+		file_err = apprentice_1(fn, action);
 		if (file_err > errs)
 			errs = file_err;
 		fn = p;
 	}
 	if (errs == -1)
 		(void) fprintf(stderr, "%s: couldn't find any magic files!\n",
-			       progname);
-	if (!check && errs)
+		    progname);
+	if (action == CHECK && errs)
 		exit(1);
 
 	free(mfn);
 	return errs;
 }
 
+/*
+ * parse from a file
+ */
 static int
-apprentice_1(fn, check)
-const char *fn;			/* name of magic file */
-int check;			/* non-zero? checking-only run. */
+apprentice_file(magicp, nmagicp, fn, action)
+	struct magic **magicp;
+	uint32 *nmagicp;
+	const char *fn;			/* name of magic file */
+	int action;
 {
 	static const char hdr[] =
 		"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
@@ -103,16 +236,25 @@ int check;			/* non-zero? checking-only run. */
 	int errs = 0;
 
 	f = fopen(fn, "r");
-	if (f==NULL) {
+	if (f == NULL) {
 		if (errno != ENOENT)
 			(void) fprintf(stderr,
-			"%s: can't read magic file %s (%s)\n", 
-			progname, fn, strerror(errno));
+			    "%s: can't read magic file %s (%s)\n", 
+			    progname, fn, strerror(errno));
 		return -1;
 	}
 
+        maxmagic = MAXMAGIS;
+	*magicp = (struct magic *) calloc(sizeof(struct magic), maxmagic);
+	if (*magicp == NULL) {
+		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
+		    strerror(errno));
+		if (action == CHECK)
+			return -1;
+	}
+
 	/* parse it */
-	if (check)	/* print silly verbose header for USG compat. */
+	if (action == CHECK)	/* print silly verbose header for USG compat. */
 		(void) printf("%s\n", hdr);
 
 	for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
@@ -121,11 +263,16 @@ int check;			/* non-zero? checking-only run. */
 		if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
 			continue;
 		line[strlen(line)-1] = '\0'; /* delete newline */
-		if (parse(line, &nmagic, check) != 0)
+		if (parse(magicp, nmagicp, line, action) != 0)
 			errs = 1;
 	}
 
 	(void) fclose(f);
+	if (errs) {
+		free(*magicp);
+		*magicp = NULL;
+		*nmagicp = 0;
+	}
 	return errs;
 }
 
@@ -134,8 +281,8 @@ int check;			/* non-zero? checking-only run. */
  */
 uint32
 signextend(m, v)
-struct magic *m;
-uint32 v;
+	struct magic *m;
+	uint32 v;
 {
 	if (!(m->flag & UNSIGNED))
 		switch(m->type) {
@@ -155,12 +302,16 @@ uint32 v;
 		case DATE:
 		case BEDATE:
 		case LEDATE:
+		case LDATE:
+		case BELDATE:
+		case LELDATE:
 		case LONG:
 		case BELONG:
 		case LELONG:
 			v = (int32) v;
 			break;
 		case STRING:
+		case PSTRING:
 			break;
 		default:
 			magwarn("can't happen: m->type=%d\n",
@@ -174,29 +325,35 @@ uint32 v;
  * parse one line from magic file, put into magic[index++] if valid
  */
 static int
-parse(l, ndx, check)
-char *l;
-int *ndx, check;
+parse(magicp, nmagicp, l, action)
+	struct magic **magicp;
+	uint32 *nmagicp;
+	char *l;
+	int action;
 {
-	int i = 0, nd = *ndx;
+	int i = 0;
 	struct magic *m;
-	char *t, *s;
+	char *t;
 
 #define ALLOC_INCR	200
-	if (nd+1 >= maxmagic){
-	    maxmagic += ALLOC_INCR;
-	    if ((magic = (struct magic *) realloc(magic, 
-						  sizeof(struct magic) * 
-						  maxmagic)) == NULL) {
-		(void) fprintf(stderr, "%s: Out of memory.\n", progname);
-		if (check)
-			return -1;
-		else
-			exit(1);
-	    }
-	    memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
+	if (*nmagicp + 1 >= maxmagic){
+		maxmagic += ALLOC_INCR;
+		if ((m = (struct magic *) realloc(*magicp,
+		    sizeof(struct magic) * maxmagic)) == NULL) {
+			(void) fprintf(stderr, "%s: Out of memory (%s).\n",
+			    progname, strerror(errno));
+			if (*magicp)
+				free(*magicp);
+			if (action == CHECK)
+				return -1;
+			else
+				exit(1);
+		}
+		*magicp = m;
+		memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
+		    * ALLOC_INCR);
 	}
-	m = &magic[*ndx];
+	m = &(*magicp)[*nmagicp];
 	m->flag = 0;
 	m->cont_level = 0;
 
@@ -211,7 +368,7 @@ int *ndx, check;
 	}
 	if (m->cont_level != 0 && *l == '&') {
                 ++l;            /* step over */
-                m->flag |= ADD;
+                m->flag |= OFFADD;
         }
 
 	/* get offset, then skip over it */
@@ -221,8 +378,8 @@ int *ndx, check;
         l = t;
 
 	if (m->flag & INDIR) {
-		m->in.type = LONG;
-		m->in.offset = 0;
+		m->in_type = LONG;
+		m->in_offset = 0;
 		/*
 		 * read [.lbs][+-]nnnnn)
 		 */
@@ -230,24 +387,24 @@ int *ndx, check;
 			l++;
 			switch (*l) {
 			case 'l':
-				m->in.type = LELONG;
+				m->in_type = LELONG;
 				break;
 			case 'L':
-				m->in.type = BELONG;
+				m->in_type = BELONG;
 				break;
 			case 'h':
 			case 's':
-				m->in.type = LESHORT;
+				m->in_type = LESHORT;
 				break;
 			case 'H':
 			case 'S':
-				m->in.type = BESHORT;
+				m->in_type = BESHORT;
 				break;
 			case 'c':
 			case 'b':
 			case 'C':
 			case 'B':
-				m->in.type = BYTE;
+				m->in_type = BYTE;
 				break;
 			default:
 				magwarn("indirect offset type %c invalid", *l);
@@ -255,12 +412,46 @@ int *ndx, check;
 			}
 			l++;
 		}
-		s = l;
-		if (*l == '+' || *l == '-') l++;
-		if (isdigit((unsigned char)*l)) {
-			m->in.offset = strtoul(l, &t, 0);
-			if (*s == '-') m->in.offset = - m->in.offset;
+		if (*l == '~') {
+			m->in_op = OPINVERSE;
+			l++;
+		}
+		switch (*l) {
+		case '&':
+			m->in_op |= OPAND;
+			l++;
+			break;
+		case '|':
+			m->in_op |= OPOR;
+			l++;
+			break;
+		case '^':
+			m->in_op |= OPXOR;
+			l++;
+			break;
+		case '+':
+			m->in_op |= OPADD;
+			l++;
+			break;
+		case '-':
+			m->in_op |= OPMINUS;
+			l++;
+			break;
+		case '*':
+			m->in_op |= OPMULTIPLY;
+			l++;
+			break;
+		case '/':
+			m->in_op |= OPDIVIDE;
+			l++;
+			break;
+		case '%':
+			m->in_op |= OPMODULO;
+			l++;
+			break;
 		}
+		if (isdigit((unsigned char)*l)) 
+			m->in_offset = strtoul(l, &t, 0);
 		else
 			t = l;
 		if (*t++ != ')') 
@@ -284,6 +475,10 @@ int *ndx, check;
 #define NLESHORT	7
 #define NLELONG		6
 #define NLEDATE		6
+#define NPSTRING	7
+#define NLDATE		5
+#define NBELDATE	7
+#define NLELDATE	7
 
 	if (*l == 'u') {
 		++l;
@@ -327,17 +522,103 @@ int *ndx, check;
 	} else if (strncmp(l, "ledate", NLEDATE)==0) {
 		m->type = LEDATE;
 		l += NLEDATE;
+	} else if (strncmp(l, "pstring", NPSTRING)==0) {
+		m->type = PSTRING;
+		l += NPSTRING;
+	} else if (strncmp(l, "ldate", NLDATE)==0) {
+		m->type = LDATE;
+		l += NLDATE;
+	} else if (strncmp(l, "beldate", NBELDATE)==0) {
+		m->type = BELDATE;
+		l += NBELDATE;
+	} else if (strncmp(l, "leldate", NLELDATE)==0) {
+		m->type = LELDATE;
+		l += NLELDATE;
 	} else {
 		magwarn("type %s invalid", l);
 		return -1;
 	}
 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
-	if (*l == '&') {
+	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
+	if (*l == '~') {
+		if (STRING != m->type && PSTRING != m->type)
+			m->mask_op = OPINVERSE;
+		++l;
+	}
+	switch (*l) {
+	case '&':
+		m->mask_op |= OPAND;
 		++l;
 		m->mask = signextend(m, strtoul(l, &l, 0));
 		eatsize(&l);
-	} else
-		m->mask = ~0L;
+		break;
+	case '|':
+		m->mask_op |= OPOR;
+		++l;
+		m->mask = signextend(m, strtoul(l, &l, 0));
+		eatsize(&l);
+		break;
+	case '^':
+		m->mask_op |= OPXOR;
+		++l;
+		m->mask = signextend(m, strtoul(l, &l, 0));
+		eatsize(&l);
+		break;
+	case '+':
+		m->mask_op |= OPADD;
+		++l;
+		m->mask = signextend(m, strtoul(l, &l, 0));
+		eatsize(&l);
+		break;
+	case '-':
+		m->mask_op |= OPMINUS;
+		++l;
+		m->mask = signextend(m, strtoul(l, &l, 0));
+		eatsize(&l);
+		break;
+	case '*':
+		m->mask_op |= OPMULTIPLY;
+		++l;
+		m->mask = signextend(m, strtoul(l, &l, 0));
+		eatsize(&l);
+		break;
+	case '%':
+		m->mask_op |= OPMODULO;
+		++l;
+		m->mask = signextend(m, strtoul(l, &l, 0));
+		eatsize(&l);
+		break;
+	case '/':
+		if (STRING != m->type && PSTRING != m->type) {
+			m->mask_op |= OPDIVIDE;
+			++l;
+			m->mask = signextend(m, strtoul(l, &l, 0));
+			eatsize(&l);
+		} else {
+			m->mask = 0L;
+			while (!isspace(*++l)) {
+				switch (*l) {
+				case CHAR_IGNORE_LOWERCASE:
+					m->mask |= STRING_IGNORE_LOWERCASE;
+					break;
+				case CHAR_COMPACT_BLANK:
+					m->mask |= STRING_COMPACT_BLANK;
+					break;
+				case CHAR_COMPACT_OPTIONAL_BLANK:
+					m->mask |=
+					    STRING_COMPACT_OPTIONAL_BLANK;
+					break;
+				default:
+					magwarn("string extension %c invalid",
+					    *l);
+					return -1;
+				}
+			}
+		}
+		break;
+	}
+	/* We used to set mask to all 1's here, instead let's just not do anything 
+	   if mask = 0 (unless you have a better idea) */
 	EATAB;
   
 	switch (*l) {
@@ -355,7 +636,7 @@ int *ndx, check;
 		}
 		break;
 	case '!':
-		if (m->type != STRING) {
+		if (m->type != STRING && m->type != PSTRING) {
 			m->reln = *l;
 			++l;
 			break;
@@ -398,10 +679,10 @@ GetDesc:
 	while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
 		/* NULLBODY */;
 
-	if (check) {
+	if (action == CHECK) {
 		mdump(m);
 	}
-	++(*ndx);		/* make room for next */
+	++(*nmagicp);		/* make room for next */
 	return 0;
 }
 
@@ -412,12 +693,12 @@ GetDesc:
  */
 static int
 getvalue(m, p)
-struct magic *m;
-char **p;
+	struct magic *m;
+	char **p;
 {
 	int slen;
 
-	if (m->type == STRING) {
+	if (m->type == STRING || m->type == PSTRING) {
 		*p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
 		m->vallen = slen;
 	} else
@@ -436,14 +717,14 @@ char **p;
  */
 static char *
 getstr(s, p, plen, slen)
-register char	*s;
-register char	*p;
-int	plen, *slen;
+	char	*s;
+	char	*p;
+	int	plen, *slen;
 {
 	char	*origs = s, *origp = p;
 	char	*pmax = p + plen - 1;
-	register int	c;
-	register int	val;
+	int	c;
+	int	val;
 
 	while ((c = *s++) != '\0') {
 		if (isspace((unsigned char) c))
@@ -539,13 +820,17 @@ out:
 /* Single hex char to int; -1 if not a hex char. */
 static int
 hextoint(c)
-int c;
+	int c;
 {
-	if (!isascii((unsigned char) c))	return -1;
-	if (isdigit((unsigned char) c))		return c - '0';
-	if ((c>='a')&&(c<='f'))	return c + 10 - 'a';
-	if ((c>='A')&&(c<='F'))	return c + 10 - 'A';
-				return -1;
+	if (!isascii((unsigned char) c))
+		return -1;
+	if (isdigit((unsigned char) c))
+		return c - '0';
+	if ((c >= 'a')&&(c <= 'f'))
+		return c + 10 - 'a';
+	if (( c>= 'A')&&(c <= 'F'))
+		return c + 10 - 'A';
+	return -1;
 }
 
 
@@ -554,11 +839,11 @@ int c;
  */
 void
 showstr(fp, s, len)
-FILE *fp;
-const char *s;
-int len;
+	FILE *fp;
+	const char *s;
+	int len;
 {
-	register char	c;
+	char	c;
 
 	for (;;) {
 		c = *s++;
@@ -613,7 +898,7 @@ int len;
  */
 static void
 eatsize(p)
-char **p;
+	char **p;
 {
 	char *l = *p;
 
@@ -634,3 +919,227 @@ char **p;
 
 	*p = l;
 }
+
+/*
+ * handle an mmaped file.
+ */
+static int
+apprentice_map(magicp, nmagicp, fn, action)
+	struct magic **magicp;
+	uint32 *nmagicp;
+	const char *fn;
+	int action;
+{
+	int fd;
+	struct stat st;
+	uint32 *ptr;
+	uint32 version;
+	int needsbyteswap;
+	char *dbname = mkdbname(fn);
+
+	if (dbname == NULL)
+		return -1;
+
+	if ((fd = open(dbname, O_RDONLY)) == -1)
+		return -1;
+
+	if (fstat(fd, &st) == -1) {
+		(void)fprintf(stderr, "%s: Cannot stat `%s' (%s)\n",
+		    progname, dbname, strerror(errno));
+		goto error;
+	}
+
+#ifdef QUICK
+	if ((*magicp = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
+	    MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
+		(void)fprintf(stderr, "%s: Cannot map `%s' (%s)\n",
+		    progname, dbname, strerror(errno));
+		goto error;
+	}
+#else
+	if ((*magicp = malloc((size_t)st.st_size)) == NULL) {
+		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
+		     strerror(errno));
+		goto error;
+	}
+	if (read(fd, *magicp, (size_t)st.st_size) != (size_t)st.st_size) {
+		(void) fprintf(stderr, "%s: Read failed (%s).\n", progname,
+		    strerror(errno));
+		goto error;
+	}
+#endif
+	(void)close(fd);
+	fd = -1;
+	ptr = (uint32 *) *magicp;
+	if (*ptr != MAGICNO) {
+		if (swap4(*ptr) != MAGICNO) {
+			(void)fprintf(stderr, "%s: Bad magic in `%s'\n",
+			    progname, dbname);
+			goto error;
+		}
+		needsbyteswap = 1;
+	} else
+		needsbyteswap = 0;
+	if (needsbyteswap)
+		version = swap4(ptr[1]);
+	else
+		version = ptr[1];
+	if (version != VERSIONNO) {
+		(void)fprintf(stderr, 
+		    "%s: version mismatch (%d != %d) in `%s'\n",
+		    progname, version, VERSIONNO, dbname);
+		goto error;
+	}
+	*nmagicp = (st.st_size / sizeof(struct magic)) - 1;
+	(*magicp)++;
+	if (needsbyteswap)
+		byteswap(*magicp, *nmagicp);
+	return 0;
+
+error:
+	if (fd != -1)
+		(void)close(fd);
+	if (*magicp) {
+#ifdef QUICK
+		(void)munmap(*magicp, (size_t)st.st_size);
+#else
+		free(*magicp);
+#endif
+	} else {
+		*magicp = NULL;
+		*nmagicp = 0;
+	}
+	return -1;
+}
+
+/*
+ * handle an mmaped file.
+ */
+static int
+apprentice_compile(magicp, nmagicp, fn, action)
+	struct magic **magicp;
+	uint32 *nmagicp;
+	const char *fn;
+	int action;
+{
+	int fd;
+	char *dbname = mkdbname(fn);
+	static const uint32 ar[] = {
+	    MAGICNO, VERSIONNO
+	};
+
+	if (dbname == NULL) 
+		return -1;
+
+	if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
+		(void)fprintf(stderr, "%s: Cannot open `%s' (%s)\n",
+		    progname, dbname, strerror(errno));
+		return -1;
+	}
+
+	if (write(fd, ar, sizeof(ar)) != sizeof(ar)) {
+		(void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
+		    progname, dbname, strerror(errno));
+		return -1;
+	}
+
+	if (lseek(fd, sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
+		(void)fprintf(stderr, "%s: error seeking `%s' (%s)\n",
+		    progname, dbname, strerror(errno));
+		return -1;
+	}
+
+	if (write(fd, *magicp,  sizeof(struct magic) * *nmagicp) 
+	    != sizeof(struct magic) * *nmagicp) {
+		(void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
+		    progname, dbname, strerror(errno));
+		return -1;
+	}
+
+	(void)close(fd);
+	return 0;
+}
+
+/*
+ * make a dbname
+ */
+char *
+mkdbname(fn)
+	const char *fn;
+{
+	static const char ext[] = ".mgc";
+	static char *buf = NULL;
+	size_t len = strlen(fn) + sizeof(ext) + 1;
+	if (buf == NULL)
+		buf = malloc(len);
+	else
+		buf = realloc(buf, len);
+	if (buf == NULL) {
+		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
+		    strerror(errno));
+		return NULL;
+	}
+	(void)strcpy(buf, fn);
+	(void)strcat(buf, ext);
+	return buf;
+}
+
+/*
+ * Byteswap an mmap'ed file if needed
+ */
+static void
+byteswap(magic, nmagic)
+	struct magic *magic;
+	uint32 nmagic;
+{
+	uint32 i;
+	for (i = 0; i < nmagic; i++)
+		bs1(&magic[i]);
+}
+
+/*
+ * swap a short
+ */
+static uint16
+swap2(sv) 
+	uint16 sv;
+{
+	uint16 rv;
+	uint8 *s = (uint8 *) &sv; 
+	uint8 *d = (uint8 *) &rv; 
+	d[0] = s[1];
+	d[1] = s[0];
+	return rv;
+}
+
+/*
+ * swap an int
+ */
+static uint32
+swap4(sv) 
+	uint32 sv;
+{
+	uint32 rv;
+	uint8 *s = (uint8 *) &sv; 
+	uint8 *d = (uint8 *) &rv; 
+	d[0] = s[3];
+	d[1] = s[2];
+	d[2] = s[1];
+	d[3] = s[0];
+	return rv;
+}
+
+/*
+ * byteswap a single magic entry
+ */
+static
+void bs1(m)
+	struct magic *m;
+{
+	m->cont_level = swap2(m->cont_level);
+	m->offset = swap4(m->offset);
+	m->in_offset = swap4(m->in_offset);
+	if (m->type != STRING)
+		m->value.l = swap4(m->value.l);
+	m->mask = swap4(m->mask);
+}

+ 606 - 43
ascmagic.c

@@ -5,6 +5,14 @@
  * Copyright (c) Ian F. Darwin, 1987.
  * Written by Ian F. Darwin.
  *
+ * Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
+ * to handle character codes other than ASCII on a unified basis.
+ *
+ * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
+ * international characters, now subsumed into this file.
+ */
+
+/*
  * This software is not subject to any license of the American Telephone
  * and Telegraph Company or of the Regents of the University of California.
  *
@@ -38,22 +46,50 @@
 #include "names.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$Id: ascmagic.c,v 1.24 1999/02/14 17:16:00 christos Exp $")
+FILE_RCSID("@(#)$Id: ascmagic.c,v 1.30 2001/07/26 13:15:49 christos Exp $")
 #endif	/* lint */
 
-			/* an optimisation over plain strcmp() */
-#define	STREQ(a, b)	(*(a) == *(b) && strcmp((a), (b)) == 0)
+typedef unsigned long unichar;
+
+#define MAXLINELEN 300	/* longest sane line length */
+#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
+		  || (x) == 0x85 || (x) == '\f')
+
+static int looks_ascii __P((const unsigned char *, int, unichar *, int *));
+static int looks_utf8 __P((const unsigned char *, int, unichar *, int *));
+static int looks_unicode __P((const unsigned char *, int, unichar *, int *));
+static int looks_latin1 __P((const unsigned char *, int, unichar *, int *));
+static int looks_extended __P((const unsigned char *, int, unichar *, int *));
+static void from_ebcdic __P((const unsigned char *, int, unsigned char *));
+static int ascmatch __P((const unsigned char *, const unichar *, int));
 
 int
 ascmagic(buf, nbytes)
-unsigned char *buf;
-int nbytes;	/* size actually read */
+	unsigned char *buf;
+	int nbytes;	/* size actually read */
 {
-	int i, has_escapes = 0;
-	unsigned char *s;
-	char nbuf[HOWMANY+1];	/* one extra for terminating '\0' */
-	char *token;
-	register struct names *p;
+	int i;
+	char nbuf[HOWMANY+1];		/* one extra for terminating '\0' */
+	unichar ubuf[HOWMANY+1];	/* one extra for terminating '\0' */
+	int ulen;
+	struct names *p;
+
+	char *code = NULL;
+	char *code_mime = NULL;
+	char *type = NULL;
+	char *subtype = NULL;
+	char *subtype_mime = NULL;
+
+	int has_escapes = 0;
+	int has_backspace = 0;
+
+	int n_crlf = 0;
+	int n_lf = 0;
+	int n_cr = 0;
+	int n_nel = 0;
+
+	int last_line_end = -1;
+	int has_long_lines = 0;
 
 	/*
 	 * Do the tar test first, because if the first file in the tar
@@ -61,66 +97,593 @@ int nbytes;	/* size actually read */
 	 */
 	switch (is_tar(buf, nbytes)) {
 	case 1:
-		ckfputs("tar archive", stdout);
+		ckfputs(iflag ? "application/x-tar" : "tar archive", stdout);
 		return 1;
 	case 2:
-		ckfputs("POSIX tar archive", stdout);
+		ckfputs(iflag ? "application/x-tar, POSIX"
+				: "POSIX tar archive", stdout);
 		return 1;
 	}
 
 	/*
+	 * Undo the NUL-termination kindly provided by process()
+	 * but leave at least one byte to look at
+	 */
+
+	while (nbytes > 1 && buf[nbytes - 1] == '\0')
+		nbytes--;
+
+	/*
+	 * Then try to determine whether it's any character code we can
+	 * identify.  Each of these tests, if it succeeds, will leave
+	 * the text converted into one-unichar-per-character Unicode in
+	 * ubuf, and the number of characters converted in ulen.
+	 */
+	if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
+		code = "ASCII";
+		code_mime = "us-ascii";
+		type = "text";
+	} else if (looks_utf8(buf, nbytes, ubuf, &ulen)) {
+		code = "UTF-8 Unicode";
+		code_mime = "utf-8";
+		type = "text";
+	} else if ((i = looks_unicode(buf, nbytes, ubuf, &ulen))) {
+		if (i == 1)
+			code = "Little-endian UTF-16 Unicode";
+		else
+			code = "Big-endian UTF-16 Unicode";
+
+		type = "character data";
+		code_mime = "utf-16";    /* is this defined? */
+	} else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
+		code = "ISO-8859";
+		type = "text";
+		code_mime = "iso-8859-1"; 
+	} else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
+		code = "Non-ISO extended-ASCII";
+		type = "text";
+		code_mime = "unknown";
+	} else {
+		from_ebcdic(buf, nbytes, nbuf);
+
+		if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
+			code = "EBCDIC";
+			type = "character data";
+			code_mime = "ebcdic";
+		} else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
+			code = "International EBCDIC";
+			type = "character data";
+			code_mime = "ebcdic";
+		} else {
+			return 0;  /* doesn't look like text at all */
+		}
+	}
+
+	/*
 	 * for troff, look for . + letter + letter or .\";
 	 * this must be done to disambiguate tar archives' ./file
 	 * and other trash from real troff input.
+	 *
+	 * I believe Plan 9 troff allows non-ASCII characters in the names
+	 * of macros, so this test might possibly fail on such a file.
 	 */
-	if (*buf == '.') {
-		unsigned char *tp = buf + 1;
+	if (*ubuf == '.') {
+		unichar *tp = ubuf + 1;
 
-		while (isascii(*tp) && isspace(*tp))
+		while (ISSPC(*tp))
 			++tp;	/* skip leading whitespace */
-		if ((isascii(*tp) && (isalnum(*tp) || *tp=='\\') &&
-		    isascii(tp[1]) && (isalnum(tp[1]) || tp[1] == '"'))) {
-			ckfputs("troff or preprocessor input text", stdout);
-			return 1;
+		if ((tp[0] == '\\' && tp[1] == '\"') ||
+		    (isascii(tp[0]) && isalnum(tp[0]) &&
+		     isascii(tp[1]) && isalnum(tp[1]) &&
+		     ISSPC(tp[2]))) {
+			subtype_mime = "text/troff";
+			subtype = "troff or preprocessor input";
+			goto subtype_identified;
 		}
 	}
-	if ((*buf == 'c' || *buf == 'C') && 
-	    isascii(buf[1]) && isspace(buf[1])) {
-		ckfputs("fortran program text", stdout);
+
+	if ((*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) {
+		subtype_mime = "text/fortran";
+		subtype = "fortran program";
+		goto subtype_identified;
+	}
+
+	/* look for tokens from names.h - this is expensive! */
+
+	i = 0;
+	while (i < ulen) {
+		int end;
+
+		/*
+		 * skip past any leading space
+		 */
+		while (i < ulen && ISSPC(ubuf[i]))
+			i++;
+		if (i >= ulen)
+			break;
+
+		/*
+		 * find the next whitespace
+		 */
+		for (end = i + 1; end < nbytes; end++)
+			if (ISSPC(ubuf[end]))
+				break;
+
+		/*
+		 * compare the word thus isolated against the token list
+		 */
+		for (p = names; p < names + NNAMES; p++) {
+			if (ascmatch(p->name, ubuf + i, end - i)) {
+				subtype = types[p->type].human;
+				subtype_mime = types[p->type].mime;
+				goto subtype_identified;
+			}
+		}
+
+		i = end;
+	}
+
+subtype_identified:
+
+	/*
+	 * Now try to discover other details about the file.
+	 */
+	for (i = 0; i < ulen; i++) {
+		if (i > last_line_end + MAXLINELEN)
+			has_long_lines = 1;
+
+		if (ubuf[i] == '\033')
+			has_escapes = 1;
+		if (ubuf[i] == '\b')
+			has_backspace = 1;
+
+		if (ubuf[i] == '\r' && (i + 1 <  ulen && ubuf[i + 1] == '\n')) {
+			n_crlf++;
+			last_line_end = i;
+		}
+		if (ubuf[i] == '\r' && (i + 1 >= ulen || ubuf[i + 1] != '\n')) {
+			n_cr++;
+			last_line_end = i;
+		}
+		if (ubuf[i] == '\n' && (i - 1 <  0    || ubuf[i - 1] != '\r')) {
+			n_lf++;
+			last_line_end = i;
+		}
+		if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
+			n_nel++;
+			last_line_end = i;
+		}
+	}
+
+	if (iflag) {
+		if (subtype_mime)
+			ckfputs(subtype_mime, stdout);
+		else
+			ckfputs("text/plain", stdout);
+
+		if (code_mime) {
+			ckfputs("; charset=", stdout);
+			ckfputs(code_mime, stdout);
+		}
+	} else {
+		ckfputs(code, stdout);
+
+		if (subtype) {
+			ckfputs(" ", stdout);
+			ckfputs(subtype, stdout);
+		}
+
+		ckfputs(" ", stdout);
+		ckfputs(type, stdout);
+
+		if (has_long_lines)
+			ckfputs(", with very long lines", stdout);
+
+		/*
+		 * Only report line terminators if we find one other than LF,
+		 * or if we find none at all.
+		 */
+		if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
+		    (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
+			ckfputs(", with", stdout);
+
+			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0)
+				ckfputs(" no", stdout);
+			else {
+				if (n_crlf) {
+					ckfputs(" CRLF", stdout);
+					if (n_cr || n_lf || n_nel)
+						ckfputs(",", stdout);
+				}
+				if (n_cr) {
+					ckfputs(" CR", stdout);
+					if (n_lf || n_nel)
+						ckfputs(",", stdout);
+				}
+				if (n_lf) {
+					ckfputs(" LF", stdout);
+					if (n_nel)
+						ckfputs(",", stdout);
+				}
+				if (n_nel)
+					ckfputs(" NEL", stdout);
+			}
+
+			ckfputs(" line terminators", stdout);
+		}
+
+		if (has_escapes)
+			ckfputs(", with escape sequences", stdout);
+		if (has_backspace)
+			ckfputs(", with overstriking", stdout);
+	}
+
+	return 1;
+}
+
+static int
+ascmatch(s, us, ulen)
+	const unsigned char *s;
+	const unichar *us;
+	int ulen;
+{
+	size_t i;
+
+	for (i = 0; i < ulen; i++) {
+		if (s[i] != us[i])
+			return 0;
+	}
+
+	if (s[i])
+		return 0;
+	else
 		return 1;
+}
+
+/*
+ * This table reflects a particular philosophy about what constitutes
+ * "text," and there is room for disagreement about it.
+ *
+ * Version 3.31 of the file command considered a file to be ASCII if
+ * each of its characters was approved by either the isascii() or
+ * isalpha() function.  On most systems, this would mean that any
+ * file consisting only of characters in the range 0x00 ... 0x7F
+ * would be called ASCII text, but many systems might reasonably
+ * consider some characters outside this range to be alphabetic,
+ * so the file command would call such characters ASCII.  It might
+ * have been more accurate to call this "considered textual on the
+ * local system" than "ASCII."
+ *
+ * It considered a file to be "International language text" if each
+ * of its characters was either an ASCII printing character (according
+ * to the real ASCII standard, not the above test), a character in
+ * the range 0x80 ... 0xFF, or one of the following control characters:
+ * backspace, tab, line feed, vertical tab, form feed, carriage return,
+ * escape.  No attempt was made to determine the language in which files
+ * of this type were written.
+ *
+ *
+ * The table below considers a file to be ASCII if all of its characters
+ * are either ASCII printing characters (again, according to the X3.4
+ * standard, not isascii()) or any of the following controls: bell,
+ * backspace, tab, line feed, form feed, carriage return, esc, nextline.
+ *
+ * I include bell because some programs (particularly shell scripts)
+ * use it literally, even though it is rare in normal text.  I exclude
+ * vertical tab because it never seems to be used in real text.  I also
+ * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
+ * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
+ * character to.  It might be more appropriate to include it in the 8859
+ * set instead of the ASCII set, but it's got to be included in *something*
+ * we recognize or EBCDIC files aren't going to be considered textual.
+ * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
+ * and Latin characters, so these should possibly be allowed.  But they
+ * make a real mess on VT100-style displays if they're not paired properly,
+ * so we are probably better off not calling them text.
+ *
+ * A file is considered to be ISO-8859 text if its characters are all
+ * either ASCII, according to the above definition, or printing characters
+ * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
+ *
+ * Finally, a file is considered to be international text from some other
+ * character code if its characters are all either ISO-8859 (according to
+ * the above definition) or characters in the range 0x80 ... 0x9F, which
+ * ISO-8859 considers to be control characters but the IBM PC and Macintosh
+ * consider to be printing characters.
+ */
+
+#define F 0   /* character never appears in text */
+#define T 1   /* character appears in plain ASCII text */
+#define I 2   /* character appears in ISO-8859 text */
+#define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
+
+static char text_chars[256] = {
+	/*                  BEL BS HT LF    FF CR    */
+	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
+        /*                              ESC          */
+	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
+	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
+	/*            NEL                            */
+	X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
+	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
+	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
+};
+
+static int
+looks_ascii(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
+{
+	int i;
+
+	*ulen = 0;
+
+	for (i = 0; i < nbytes; i++) {
+		int t = text_chars[buf[i]];
+
+		if (t != T)
+			return 0;
+
+		ubuf[(*ulen)++] = buf[i];
 	}
 
+	return 1;
+}
+
+static int
+looks_latin1(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
+{
+	int i;
+
+	*ulen = 0;
 
-	/* Make sure we are dealing with ascii text before looking for tokens */
 	for (i = 0; i < nbytes; i++) {
-		if (!isascii(buf[i]))
-			return 0;	/* not all ASCII */
+		int t = text_chars[buf[i]];
+
+		if (t != T && t != I)
+			return 0;
+
+		ubuf[(*ulen)++] = buf[i];
 	}
 
-	/* look for tokens from names.h - this is expensive! */
-	/* make a copy of the buffer here because strtok() will destroy it */
-	s = (unsigned char*) memcpy(nbuf, buf, nbytes);
-	s[nbytes] = '\0';
-	has_escapes = (memchr(s, '\033', nbytes) != NULL);
-	while ((token = strtok((char *) s, " \t\n\r\f")) != NULL) {
-		s = NULL;	/* make strtok() keep on tokin' */
-		for (p = names; p < names + NNAMES; p++) {
-			if (STREQ(p->name, token)) {
-				ckfputs(types[p->type], stdout);
-				if (has_escapes)
-					ckfputs(" (with escape sequences)", 
-						stdout);
-				return 1;
+	return 1;
+}
+
+static int
+looks_extended(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
+{
+	int i;
+
+	*ulen = 0;
+
+	for (i = 0; i < nbytes; i++) {
+		int t = text_chars[buf[i]];
+
+		if (t != T && t != I && t != X)
+			return 0;
+
+		ubuf[(*ulen)++] = buf[i];
+	}
+
+	return 1;
+}
+
+int
+looks_utf8(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
+{
+	int i, n;
+	unichar c;
+	int gotone = 0;
+
+	*ulen = 0;
+
+	for (i = 0; i < nbytes; i++) {
+		if ((buf[i] & 0x80) == 0) {	   /* 0xxxxxxx is plain ASCII */
+			/*
+			 * Even if the whole file is valid UTF-8 sequences,
+			 * still reject it if it uses weird control characters.
+			 */
+
+			if (text_chars[buf[i]] != T)
+				return 0;
+
+			ubuf[(*ulen)++] = buf[i];
+		} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
+			return 0;
+		} else {			   /* 11xxxxxx begins UTF-8 */
+			int following;
+
+			if ((buf[i] & 0x20) == 0) {		/* 110xxxxx */
+				c = buf[i] & 0x1f;
+				following = 1;
+			} else if ((buf[i] & 0x10) == 0) {	/* 1110xxxx */
+				c = buf[i] & 0x0f;
+				following = 2;
+			} else if ((buf[i] & 0x08) == 0) {	/* 11110xxx */
+				c = buf[i] & 0x07;
+				following = 3;
+			} else if ((buf[i] & 0x04) == 0) {	/* 111110xx */
+				c = buf[i] & 0x03;
+				following = 4;
+			} else if ((buf[i] & 0x02) == 0) {	/* 1111110x */
+				c = buf[i] & 0x01;
+				following = 5;
+			} else
+				return 0;
+
+			for (n = 0; n < following; n++) {
+				i++;
+				if (i >= nbytes)
+					goto done;
+
+				if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
+					return 0;
+
+				c = (c << 6) + (buf[i] & 0x3f);
 			}
+
+			ubuf[(*ulen)++] = c;
+			gotone = 1;
 		}
 	}
+done:
+	return gotone;   /* don't claim it's UTF-8 if it's all 7-bit */
+}
+
+static int
+looks_unicode(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
+{
+	int bigend;
+	int i;
 
-	/* all else fails, but it is ASCII... */
-	ckfputs("ASCII text", stdout);
-	if (has_escapes) {
-		ckfputs(" (with escape sequences)", stdout);
+	if (nbytes < 2)
+		return 0;
+
+	if (buf[0] == 0xff && buf[1] == 0xfe)
+		bigend = 0;
+	else if (buf[0] == 0xfe && buf[1] == 0xff)
+		bigend = 1;
+	else
+		return 0;
+
+	*ulen = 0;
+
+	for (i = 2; i + 1 < nbytes; i += 2) {
+		/* XXX fix to properly handle chars > 65536 */
+
+		if (bigend)
+			ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
+		else
+			ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
+
+		if (ubuf[*ulen - 1] == 0xfffe)
+			return 0;
+		if (ubuf[*ulen - 1] < 128 && text_chars[ubuf[*ulen - 1]] != T)
+			return 0;
 	}
+
 	return 1;
 }
 
+#undef F
+#undef T
+#undef I
+#undef X
+
+/*
+ * This table maps each EBCDIC character to an (8-bit extended) ASCII
+ * character, as specified in the rationale for the dd(1) command in
+ * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
+ *
+ * Unfortunately it does not seem to correspond exactly to any of the
+ * five variants of EBCDIC documented in IBM's _Enterprise Systems
+ * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
+ * Edition, July, 1999, pp. I-1 - I-4.
+ *
+ * Fortunately, though, all versions of EBCDIC, including this one, agree
+ * on most of the printing characters that also appear in (7-bit) ASCII.
+ * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
+ *
+ * Fortunately too, there is general agreement that codes 0x00 through
+ * 0x3F represent control characters, 0x41 a nonbreaking space, and the
+ * remainder printing characters.
+ *
+ * This is sufficient to allow us to identify EBCDIC text and to distinguish
+ * between old-style and internationalized examples of text.
+ */
+
+unsigned char ebcdic_to_ascii[] = {
+  0,   1,   2,   3, 156,   9, 134, 127, 151, 141, 142,  11,  12,  13,  14,  15,
+ 16,  17,  18,  19, 157, 133,   8, 135,  24,  25, 146, 143,  28,  29,  30,  31,
+128, 129, 130, 131, 132,  10,  23,  27, 136, 137, 138, 139, 140,   5,   6,   7,
+144, 145,  22, 147, 148, 149, 150,   4, 152, 153, 154, 155,  20,  21, 158,  26,
+' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
+'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
+'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
+186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
+195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
+202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
+209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
+216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
+'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
+'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
+'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
+'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
+};
 
+/*
+ * The following EBCDIC-to-ASCII table may relate more closely to reality,
+ * or at least to modern reality.  It comes from
+ *
+ *   http://ftp.s390.ibm.com/products/oe/bpxqp9.html
+ *
+ * and maps the characters of EBCDIC code page 1047 (the code used for
+ * Unix-derived software on IBM's 390 systems) to the corresponding
+ * characters from ISO 8859-1.
+ *
+ * If this table is used instead of the above one, some of the special
+ * cases for the NEL character can be taken out of the code.
+ */
+
+unsigned char ebcdic_1047_to_8859[] = {
+0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
+0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
+0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
+0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
+0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
+0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
+0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
+0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
+0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
+0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
+0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
+0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
+0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
+0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
+0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
+};
+
+/*
+ * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
+ */
+static void
+from_ebcdic(buf, nbytes, out)
+	const unsigned char *buf;
+	int nbytes;
+	unsigned char *out;
+{
+	int i;
+
+	for (i = 0; i < nbytes; i++) {
+		out[i] = ebcdic_to_ascii[buf[i]];
+	}
+}

+ 100 - 40
compress.c

@@ -6,9 +6,6 @@
  *					    using method, return sizeof new
  */
 #include "file.h"
-#ifdef __CYGWIN__
-#include <errno.h>
-#endif
 #include <stdio.h>
 #include <stdlib.h>
 #ifdef HAVE_UNISTD_H
@@ -19,33 +16,39 @@
 #include <sys/wait.h>
 #endif
 #ifndef lint
-FILE_RCSID("@(#)$Id: compress.c,v 1.14 1999/10/31 22:23:03 christos Exp $")
+FILE_RCSID("@(#)$Id: compress.c,v 1.20 2001/07/22 21:04:15 christos Exp $")
 #endif
 
 
 static struct {
-   const char *magic;
-   int   maglen;
-   const char *const argv[3];
-   int	 silent;
+	const char *magic;
+	int   maglen;
+	const char *const argv[3];
+	int	 silent;
 } compr[] = {
-    { "\037\235", 2, { "uncompress", "-c", NULL }, 0 },	/* compressed */
-    { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 },	/* gzipped */
-    { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 },	/* frozen */
-    { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 },	/* SCO LZH */
-    /* the standard pack utilities do not accept standard input */
-    { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 },	/* packed */
+	{ "\037\235", 2, { "gzip", "-cdq", NULL }, 1 },		/* compressed */
+	/* Uncompress can get stuck; so use gzip first if we have it
+	 * Idea from Damien Clark, thanks! */
+	{ "\037\235", 2, { "uncompress", "-c", NULL }, 1 },	/* compressed */
+	{ "\037\213", 2, { "gzip", "-cdq", NULL }, 1 },		/* gzipped */
+	{ "\037\236", 2, { "gzip", "-cdq", NULL }, 1 },		/* frozen */
+	{ "\037\240", 2, { "gzip", "-cdq", NULL }, 1 },		/* SCO LZH */
+	/* the standard pack utilities do not accept standard input */
+	{ "\037\036", 2, { "gzip", "-cdq", NULL }, 0 },		/* packed */
+	{ "BZh",      3, { "bzip2", "-cd", NULL }, 1 },		/* bzip2-ed */
 };
 
 static int ncompr = sizeof(compr) / sizeof(compr[0]);
 
 
 static int uncompress __P((int, const unsigned char *, unsigned char **, int));
+static int swrite __P((int, const void *, size_t));
+static int sread __P((int, void *, size_t));
 
 int
 zmagic(buf, nbytes)
-unsigned char *buf;
-int nbytes;
+	unsigned char *buf;
+	int nbytes;
 {
 	unsigned char *newbuf;
 	int newsize;
@@ -54,30 +57,84 @@ int nbytes;
 	for (i = 0; i < ncompr; i++) {
 		if (nbytes < compr[i].maglen)
 			continue;
-		if (memcmp(buf, compr[i].magic,  compr[i].maglen) == 0)
-			break;
+		if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
+		    (newsize = uncompress(i, buf, &newbuf, nbytes)) != 0) {
+			tryit(newbuf, newsize, 1);
+			free(newbuf);
+			printf(" (");
+			tryit(buf, nbytes, 0);
+			printf(")");
+			return 1;
+		}
 	}
 
 	if (i == ncompr)
 		return 0;
 
-	if ((newsize = uncompress(i, buf, &newbuf, nbytes)) != 0) {
-		tryit(newbuf, newsize, 1);
-		free(newbuf);
-		printf(" (");
-		tryit(buf, nbytes, 0);
-		printf(")");
-	}
 	return 1;
 }
 
+/*
+ * `safe' write for sockets and pipes.
+ */
+static int
+swrite(fd, buf, n)
+	int fd;
+	const void *buf;
+	size_t n;
+{
+	int rv;
+	size_t rn = n;
+
+	do
+		switch (rv = write(fd, buf, n)) {
+		case -1:
+			if (errno == EINTR)
+				continue;
+			return -1;
+		default:
+			n -= rv;
+			buf = ((char *)buf) + rv;
+			break;
+		}
+	while (n > 0);
+	return rn;
+}
+
+
+/*
+ * `safe' read for sockets and pipes.
+ */
+static int
+sread(fd, buf, n)
+	int fd;
+	void *buf;
+	size_t n;
+{
+	int rv;
+	size_t rn = n;
+
+	do
+		switch (rv = read(fd, buf, n)) {
+		case -1:
+			if (errno == EINTR)
+				continue;
+			return -1;
+		default:
+			n -= rv;
+			buf = ((char *)buf) + rv;
+			break;
+		}
+	while (n > 0);
+	return rn;
+}
 
 static int
 uncompress(method, old, newch, n)
-int method;
-const unsigned char *old;
-unsigned char **newch;
-int n;
+	int method;
+	const unsigned char *old;
+	unsigned char **newch;
+	int n;
 {
 	int fdin[2], fdout[2];
 
@@ -97,12 +154,11 @@ int n;
 		(void) close(fdout[0]);
 		(void) close(fdout[1]);
 		if (compr[method].silent)
-		    (void) close(2);
+			(void) close(2);
 
 		execvp(compr[method].argv[0],
 		       (char *const *)compr[method].argv);
-		error("could not execute `%s' (%s).\n", 
-		      compr[method].argv[0], strerror(errno));
+		exit(1);
 		/*NOTREACHED*/
 	case -1:
 		error("could not fork (%s).\n", strerror(errno));
@@ -111,20 +167,24 @@ int n;
 	default: /* parent */
 		(void) close(fdin[0]);
 		(void) close(fdout[1]);
-		if (write(fdin[1], old, n) != n) {
-			error("write failed (%s).\n", strerror(errno));
-			/*NOTREACHED*/
+		if (swrite(fdin[1], old, n) != n) {
+			n = 0;
+			goto err;
 		}
 		(void) close(fdin[1]);
+		fdin[1] = -1;
 		if ((*newch = (unsigned char *) malloc(n)) == NULL) {
-			error("out of memory.\n");
-			/*NOTREACHED*/
+			n = 0;
+			goto err;
 		}
-		if ((n = read(fdout[0], *newch, n)) <= 0) {
+		if ((n = sread(fdout[0], *newch, n)) <= 0) {
 			free(*newch);
-			error("read failed (%s).\n", strerror(errno));
-			/*NOTREACHED*/
+			n = 0;
+			goto err;
 		}
+err:
+		if (fdin[1] != -1)
+			(void) close(fdin[1]);
 		(void) close(fdout[0]);
 		(void) wait(NULL);
 		return n;

+ 31 - 5
config.h.in

@@ -9,6 +9,13 @@
 /* Define if you have <sys/wait.h> that is POSIX.1 compatible.  */
 #undef HAVE_SYS_WAIT_H
 
+/* Define if your struct tm has tm_zone.  */
+#undef HAVE_TM_ZONE
+
+/* Define if you don't have tm_zone but do have the external array
+   tzname.  */
+#undef HAVE_TZNAME
+
 /* Define if major, minor, and makedev are declared in <mkdev.h>.  */
 #undef MAJOR_IN_MKDEV
 
@@ -24,11 +31,8 @@
 /* Define if you have the ANSI C header files.  */
 #undef STDC_HEADERS
 
-/* Autoheader needs me */
-#define PACKAGE "file"
-
-/* Autoheader needs me */
-#define VERSION "3.27"
+/* Define if your <sys/time.h> declares struct tm.  */
+#undef TM_IN_SYS_TIME
 
 /* Define if builtin ELF support is enabled.  */
 #undef BUILTIN_ELF
@@ -39,6 +43,12 @@
 /* Define if the `long long' type works.  */
 #undef HAVE_LONG_LONG
 
+/* Define if we have "tm_isdst" in "struct tm".  */
+#undef HAVE_TM_ISDST
+
+/* Define if we have a global "int" variable "daylight".  */
+#undef HAVE_DAYLIGHT
+
 /* Define to `unsigned char' if standard headers don't define.  */
 #undef uint8_t
 
@@ -64,11 +74,27 @@
 /* The number of bytes in a uint64_t.  */
 #define SIZEOF_UINT64_T 0
 
+/* Define if you have the mmap function.  */
+#undef HAVE_MMAP
+
 /* Define if you have the strerror function.  */
 #undef HAVE_STRERROR
 
 /* Define if you have the strtoul function.  */
 #undef HAVE_STRTOUL
 
+/* Define if you have the <locale.h> header file.  */
+#undef HAVE_LOCALE_H
+
+/* Define if you have the <sys/mman.h> header file.  */
+#undef HAVE_SYS_MMAN_H
+
 /* Define if you have the <unistd.h> header file.  */
 #undef HAVE_UNISTD_H
+
+/* Name of package */
+#undef PACKAGE
+
+/* Version number of package */
+#undef VERSION
+

File diff suppressed because it is too large
+ 377 - 92
configure


+ 22 - 2
configure.in

@@ -1,6 +1,6 @@
 dnl Process this file with autoconf to produce a configure script.
 AC_INIT(file.c)
-AM_INIT_AUTOMAKE(file, 3.27)
+AM_INIT_AUTOMAKE(file, 3.37)
 AM_CONFIG_HEADER(config.h)
 
 AC_MSG_CHECKING(for builtin ELF support)
@@ -31,6 +31,23 @@ fi], [
   AC_DEFINE(ELFCORE)
 ])
 
+AC_MSG_CHECKING(for file formats in man section 5)
+AC_ARG_ENABLE(fsect-man5,
+[  --enable-fsect-man5      enable file formats in man section 5],
+[if test "${enableval}" = yes; then
+  AC_MSG_RESULT(yes)
+  fsect=5
+else
+  AC_MSG_RESULT(no)
+  fsect=4
+fi], [
+  # disable by default
+  AC_MSG_RESULT(no)
+  fsect=4
+])
+AC_SUBST(fsect)
+AM_CONDITIONAL(FSECT5, test x$fsect = x5)
+
 dnl Checks for programs.
 AC_PROG_CC
 AC_PROG_INSTALL
@@ -41,12 +58,15 @@ AC_HEADER_STDC
 AC_HEADER_MAJOR
 AC_HEADER_SYS_WAIT
 AC_CHECK_HEADERS(unistd.h)
+AC_CHECK_HEADERS(locale.h)
+AC_CHECK_HEADERS(sys/mman.h)
 
 dnl Checks for typedefs, structures, and compiler characteristics.
 AC_C_CONST
 AC_TYPE_OFF_T
 AC_TYPE_SIZE_T
 AC_STRUCT_ST_RDEV
+AC_STRUCT_TIMEZONE_DAYLIGHT
 
 dnl FIXME: only found in standard headers!
 AC_CHECK_TYPE(uint8_t, unsigned char)
@@ -68,6 +88,6 @@ AC_CHECK_SIZEOF_STDC_HEADERS(uint32_t, 0)
 AC_CHECK_SIZEOF_STDC_HEADERS(uint64_t, 0)
 
 dnl Checks for functions
-AC_CHECK_FUNCS(strerror strtoul)
+AC_CHECK_FUNCS(mmap strerror strtoul)
 
 AC_OUTPUT(Makefile)

+ 149 - 102
file.c

@@ -24,12 +24,9 @@
  *
  * 4. This notice may not be removed or altered.
  */
-#include "file.h"
-#ifdef __CYGWIN__
-#include <errno.h>
-#endif
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/param.h>	/* for MAXPATHLEN */
@@ -49,20 +46,24 @@
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>	/* for read() */
 #endif
+#ifdef HAVE_LOCALE_H
+#include <locale.h>
+#endif
 
 #include <netinet/in.h>		/* for byte swapping */
 
+#include "file.h"
 #include "patchlevel.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$Id: file.c,v 1.47 1999/10/31 22:23:03 christos Exp $")
+FILE_RCSID("@(#)$Id: file.c,v 1.59 2001/07/23 00:02:32 christos Exp $")
 #endif	/* lint */
 
 
 #ifdef S_IFLNK
-# define USAGE  "Usage: %s [-bcnvzL] [-f namefile] [-m magicfiles] file...\n"
+# define USAGE  "Usage: %s [-bciknsvzL] [-f namefile] [-m magicfiles] file...\n"
 #else
-# define USAGE  "Usage: %s [-bcnvz] [-f namefile] [-m magicfiles] file...\n"
+# define USAGE  "Usage: %s [-bciknsvz] [-f namefile] [-m magicfiles] file...\n"
 #endif
 
 #ifndef MAGIC
@@ -79,19 +80,24 @@ int 			/* Global command-line options 		*/
 	bflag = 0,	/* brief output format	 		*/
 	zflag = 0,	/* follow (uncompress) compressed files */
 	sflag = 0,	/* read block special files		*/
-	nobuffer = 0;   /* Do not buffer stdout */
+	iflag = 0,
+	nobuffer = 0,   /* Do not buffer stdout */
+	kflag = 0;	/* Keep going after the first match	*/
+
 int			/* Misc globals				*/
 	nmagic = 0;	/* number of valid magic[]s 		*/
 
 struct  magic *magic;	/* array of magic entries		*/
 
-const char *magicfile;	/* where magic be found 		*/
+const char *magicfile = 0;	/* where the magic is		*/
+const char *default_magicfile = MAGIC;
 
 char *progname;		/* used throughout 			*/
 int lineno;		/* line number in the magic file	*/
 
 
 static void	unwrap		__P((char *fn));
+static void	usage		__P((void));
 #if 0
 static int	byteconv4	__P((int, int, int));
 static short	byteconv2	__P((int, int, int));
@@ -108,60 +114,92 @@ main(argc, argv)
 	char *argv[];
 {
 	int c;
-	int check = 0, didsomefiles = 0, errflg = 0, ret = 0, app = 0;
+	int action = 0, didsomefiles = 0, errflg = 0, ret = 0, app = 0;
+	char *mime, *home, *usermagic;
+	struct stat sb;
+
+#ifdef LC_CTYPE
+	setlocale(LC_CTYPE, ""); /* makes islower etc work for other langs */
+#endif
 
 	if ((progname = strrchr(argv[0], '/')) != NULL)
 		progname++;
 	else
 		progname = argv[0];
 
-	if (!(magicfile = getenv("MAGIC")))
-		magicfile = MAGIC;
+	magicfile = default_magicfile;
+	if ((usermagic = getenv("MAGIC")) != NULL)
+		magicfile = usermagic;
+	else
+		if ((home = getenv("HOME")) != NULL) {
+			if ((usermagic = malloc(strlen(home) + 8)) != NULL) {
+				(void)strcpy(usermagic, home);
+				(void)strcat(usermagic, "/.magic");
+				if (stat(usermagic, &sb)<0) 
+					free(usermagic);
+				else
+					magicfile = usermagic;
+			}
+		}
 
-	while ((c = getopt(argc, argv, "bcdnf:m:svzL")) != EOF)
+	while ((c = getopt(argc, argv, "bcdf:ikm:nsvzCL")) != EOF)
 		switch (c) {
-		case 'v':
-			(void) fprintf(stdout, "%s-%d.%d\n", progname,
-				       FILE_VERSION_MAJOR, patchlevel);
-			(void) fprintf(stdout, "magic file from %s\n",
-				       magicfile);
-			return 1;
 		case 'b':
 			++bflag;
 			break;
 		case 'c':
-			++check;
+			action = CHECK;
 			break;
-		case 'n':
-			++nobuffer;
+		case 'C':
+			action = COMPILE;
 			break;
 		case 'd':
 			++debug;
 			break;
 		case 'f':
 			if (!app) {
-				ret = apprentice(magicfile, check);
-				if (check)
+				ret = apprentice(magicfile, action);
+				if (action)
 					exit(ret);
 				app = 1;
 			}
 			unwrap(optarg);
 			++didsomefiles;
 			break;
-#ifdef S_IFLNK
-		case 'L':
-			++lflag;
+		case 'i':
+			iflag++;
+			if ((mime = malloc(strlen(magicfile) + 6)) != NULL) {
+				(void)strcpy(mime, magicfile);
+				(void)strcat(mime, ".mime");
+				magicfile = mime;
+			}
+			break;
+		case 'k':
+			kflag = 1;
 			break;
-#endif
 		case 'm':
 			magicfile = optarg;
 			break;
-		case 'z':
-			zflag++;
+		case 'n':
+			++nobuffer;
 			break;
 		case 's':
 			sflag++;
 			break;
+		case 'v':
+			(void) fprintf(stdout, "%s-%d.%d\n", progname,
+				       FILE_VERSION_MAJOR, patchlevel);
+			(void) fprintf(stdout, "magic file from %s\n",
+				       magicfile);
+			return 1;
+		case 'z':
+			zflag++;
+			break;
+#ifdef S_IFLNK
+		case 'L':
+			++lflag;
+			break;
+#endif
 		case '?':
 		default:
 			errflg++;
@@ -169,21 +207,19 @@ main(argc, argv)
 		}
 
 	if (errflg) {
-		(void) fprintf(stderr, USAGE, progname);
-		exit(2);
+		usage();
 	}
 
 	if (!app) {
-		ret = apprentice(magicfile, check);
-		if (check)
+		ret = apprentice(magicfile, action);
+		if (action)
 			exit(ret);
 		app = 1;
 	}
 
 	if (optind == argc) {
 		if (!didsomefiles) {
-			(void)fprintf(stderr, USAGE, progname);
-			exit(2);
+			usage();
 		}
 	}
 	else {
@@ -206,7 +242,7 @@ main(argc, argv)
  */
 static void
 unwrap(fn)
-char *fn;
+	char *fn;
 {
 	char buf[MAXPATHLEN];
 	FILE *f;
@@ -251,29 +287,28 @@ char *fn;
  */
 static int
 byteconv4(from, same, big_endian)
-    int from;
-    int same;
-    int big_endian;
+	int from;
+	int same;
+	int big_endian;
 {
-  if (same)
-    return from;
-  else if (big_endian)		/* lsb -> msb conversion on msb */
-  {
-    union {
-      int i;
-      char c[4];
-    } retval, tmpval;
-
-    tmpval.i = from;
-    retval.c[0] = tmpval.c[3];
-    retval.c[1] = tmpval.c[2];
-    retval.c[2] = tmpval.c[1];
-    retval.c[3] = tmpval.c[0];
-
-    return retval.i;
-  }
-  else
-    return ntohl(from);		/* msb -> lsb conversion on lsb */
+	if (same)
+		return from;
+	else if (big_endian) {		/* lsb -> msb conversion on msb */
+		union {
+			int i;
+			char c[4];
+		} retval, tmpval;
+
+		tmpval.i = from;
+		retval.c[0] = tmpval.c[3];
+		retval.c[1] = tmpval.c[2];
+		retval.c[2] = tmpval.c[1];
+		retval.c[3] = tmpval.c[0];
+
+		return retval.i;
+	}
+	else
+		return ntohl(from);	/* msb -> lsb conversion on lsb */
 }
 
 /*
@@ -286,23 +321,22 @@ byteconv2(from, same, big_endian)
 	int same;
 	int big_endian;
 {
-  if (same)
-    return from;
-  else if (big_endian)		/* lsb -> msb conversion on msb */
-  {
-    union {
-      short s;
-      char c[2];
-    } retval, tmpval;
-
-    tmpval.s = (short) from;
-    retval.c[0] = tmpval.c[1];
-    retval.c[1] = tmpval.c[0];
-
-    return retval.s;
-  }
-  else
-    return ntohs(from);		/* msb -> lsb conversion on lsb */
+	if (same)
+		return from;
+	else if (big_endian) {		/* lsb -> msb conversion on msb */
+		union {
+			short s;
+			char c[2];
+		} retval, tmpval;
+
+		tmpval.s = (short) from;
+		retval.c[0] = tmpval.c[1];
+		retval.c[1] = tmpval.c[0];
+
+		return retval.s;
+	}
+	else
+		return ntohs(from);	/* msb -> lsb conversion on lsb */
 }
 #endif
 
@@ -311,8 +345,8 @@ byteconv2(from, same, big_endian)
  */
 void
 process(inname, wid)
-const char	*inname;
-int wid;
+	const char	*inname;
+	int wid;
 {
 	int	fd = 0;
 	static  const char stdname[] = "standard input";
@@ -335,22 +369,22 @@ int wid;
 			   (int) (wid - strlen(inname)), "");
 
 	if (inname != stdname) {
-	    /*
-	     * first try judging the file based on its filesystem status
-	     */
-	    if (fsmagic(inname, &sb) != 0) {
-		    putchar('\n');
-		    return;
-	    }
-
-	    if ((fd = open(inname, O_RDONLY)) < 0) {
-		    /* We can't open it, but we were able to stat it. */
-		    if (sb.st_mode & 0002) ckfputs("writeable, ", stdout);
-		    if (sb.st_mode & 0111) ckfputs("executable, ", stdout);
-		    ckfprintf(stdout, "can't read `%s' (%s).\n",
-			inname, strerror(errno));
-		    return;
-	    }
+		/*
+		 * first try judging the file based on its filesystem status
+		 */
+		if (fsmagic(inname, &sb) != 0) {
+			putchar('\n');
+			return;
+		}
+
+		if ((fd = open(inname, O_RDONLY)) < 0) {
+			/* We can't open it, but we were able to stat it. */
+			if (sb.st_mode & 0002) ckfputs("writeable, ", stdout);
+			if (sb.st_mode & 0111) ckfputs("executable, ", stdout);
+			ckfprintf(stdout, "can't read `%s' (%s).\n",
+			    inname, strerror(errno));
+			return;
+		}
 	}
 
 
@@ -363,15 +397,24 @@ int wid;
 	}
 
 	if (nbytes == 0)
-		ckfputs("empty", stdout);
+		ckfputs(iflag ? "application/x-empty" : "empty", stdout);
 	else {
 		buf[nbytes++] = '\0';	/* null-terminate it */
 		match = tryit(buf, nbytes, zflag);
 	}
 
 #ifdef BUILTIN_ELF
-	if (match == 's' && nbytes > 5)
+	if (match == 's' && nbytes > 5) {
+		/*
+		 * We matched something in the file, so this *might*
+		 * be an ELF file, and the file is at least 5 bytes long,
+		 * so if it's an ELF file it has at least one byte
+		 * past the ELF magic number - try extracting information
+		 * from the ELF headers that can't easily be extracted
+		 * with rules in the magic file.
+		 */
 		tryelf(fd, buf, nbytes);
+	}
 #endif
 
 	if (inname != stdname) {
@@ -404,8 +447,8 @@ int wid;
 
 int
 tryit(buf, nb, zflag)
-unsigned char *buf;
-int nb, zflag;
+	unsigned char *buf;
+	int nb, zflag;
 {
 	/* try compression stuff */
 	if (zflag && zmagic(buf, nb))
@@ -419,11 +462,15 @@ int nb, zflag;
 	if (ascmagic(buf, nb))
 		return 'a';
 
-	/* see if it's international language text */
-	if (internatmagic(buf, nb))
-		return 'i';
-
 	/* abandon hope, all ye who remain here */
 	ckfputs("data", stdout);
 		return '\0';
 }
+
+static void
+usage()
+{
+	(void)fprintf(stderr, USAGE, progname);
+	(void)fprintf(stderr, "Usage: %s -C [-m magic]\n", progname);
+	exit(1);
+}

+ 62 - 22
file.h

@@ -1,6 +1,6 @@
 /*
  * file.h - definitions for file(1) program
- * @(#)$Id: file.h,v 1.29 1999/02/14 17:16:06 christos Exp $
+ * @(#)$Id: file.h,v 1.37 2001/07/22 21:04:15 christos Exp $
  *
  * Copyright (c) Ian F. Darwin, 1987.
  * Written by Ian F. Darwin.
@@ -35,6 +35,10 @@
 
 typedef int int32;
 typedef unsigned int uint32;
+typedef short int16;
+typedef unsigned short uint16;
+typedef char int8;
+typedef unsigned char uint8;
 
 #ifndef HOWMANY
 # define HOWMANY 16384		/* how much of the file to look at */
@@ -43,20 +47,23 @@ typedef unsigned int uint32;
 #define MAXDESC	50		/* max leng of text description */
 #define MAXstring 32		/* max leng of "string" types */
 
+#define MAGICNO		0xF11E041C
+#define VERSIONNO	1
+
+#define CHECK	1
+#define COMPILE	2
+
 struct magic {
-	short flag;		
+	uint16 cont_level;/* level of ">" */
+	uint8 nospflag;	/* supress space character */
+	uint8 flag;
 #define INDIR	1		/* if '>(...)' appears,  */
 #define	UNSIGNED 2		/* comparison is unsigned */
-#define ADD	4		/* if '>&' appears,  */
-	short cont_level;	/* level of ">" */
-	struct {
-		unsigned char type;	/* byte short long */
-		int32 offset;	/* offset from indirection */
-	} in;
-	int32 offset;		/* offset to magic number */
-	unsigned char reln;	/* relation (0=eq, '>'=gt, etc) */
-	unsigned char type;	/* int, short, long or string. */
-	char vallen;		/* length of string value, if any */
+#define OFFADD	4		/* if '>&' appears,  */
+	uint8 reln;		/* relation (0=eq, '>'=gt, etc) */
+	uint8 vallen;		/* length of string value, if any */
+	uint8 type;		/* int, short, long or string. */
+	uint8 in_type;		/* type of indirrection */
 #define 			BYTE	1
 #define				SHORT	2
 #define				LONG	4
@@ -68,20 +75,53 @@ struct magic {
 #define				LESHORT	10
 #define				LELONG	11
 #define				LEDATE	12
+#define				PSTRING	13
+#define				LDATE	14
+#define				BELDATE	15
+#define				LELDATE	16
+	uint8 in_op;		/* operator for indirection */
+	uint8 mask_op;		/* operator for mask */
+#define				OPAND	1
+#define				OPOR	2
+#define				OPXOR	3
+#define				OPADD	4
+#define				OPMINUS	5
+#define				OPMULTIPLY	6
+#define				OPDIVIDE	7
+#define				OPMODULO	8
+#define				OPINVERSE	0x80
+	int32 offset;		/* offset to magic number */
+	int32 in_offset;	/* offset from indirection */
 	union VALUETYPE {
 		unsigned char b;
 		unsigned short h;
 		uint32 l;
 		char s[MAXstring];
 		unsigned char hs[2];	/* 2 bytes of a fixed-endian "short" */
-		unsigned char hl[4];	/* 2 bytes of a fixed-endian "long" */
+		unsigned char hl[4];	/* 4 bytes of a fixed-endian "long" */
 	} value;		/* either number or string */
 	uint32 mask;	/* mask before comparison with value */
-	char nospflag;		/* supress space character */
 	char desc[MAXDESC];	/* description */
 };
 
+#define BIT(A)   (1 << (A))
+#define STRING_IGNORE_LOWERCASE		BIT(0)
+#define STRING_COMPACT_BLANK		BIT(1)
+#define STRING_COMPACT_OPTIONAL_BLANK	BIT(2)
+#define CHAR_IGNORE_LOWERCASE		'c'
+#define CHAR_COMPACT_BLANK		'B'
+#define CHAR_COMPACT_OPTIONAL_BLANK	'b'
+
+
+/* list of magic entries */
+struct mlist {
+	struct magic *magic;		/* array of magic entries */
+	uint32 nmagic;			/* number of entries in array */
+	struct mlist *next, *prev;
+};
+
 #include <stdio.h>	/* Include that here, to make sure __P gets defined */
+#include <errno.h>
 
 #ifndef __P
 # if defined(__STDC__) || defined(__cplusplus)
@@ -98,6 +138,7 @@ extern void  error		__P((const char *, ...));
 extern void  ckfputs		__P((const char *, FILE *));
 struct stat;
 extern int   fsmagic		__P((const char *, struct stat *));
+extern char *fmttime		__P((long, int));
 extern int   is_compress	__P((const unsigned char *, int *));
 extern int   is_tar		__P((unsigned char *, int));
 extern void  magwarn		__P((const char *, ...));
@@ -109,24 +150,19 @@ extern int   tryit		__P((unsigned char *, int, int));
 extern int   zmagic		__P((unsigned char *, int));
 extern void  ckfprintf		__P((FILE *, const char *, ...));
 extern uint32 signextend	__P((struct magic *, unsigned int32));
-extern int internatmagic	__P((unsigned char *, int));
 extern void tryelf		__P((int, unsigned char *, int));
 
-
-extern int errno;		/* Some unixes don't define this..	*/
-
 extern char *progname;		/* the program name 			*/
 extern const char *magicfile;	/* name of the magic file		*/
 extern int lineno;		/* current line number in magic file	*/
 
-extern struct magic *magic;	/* array of magic entries		*/
-extern int nmagic;		/* number of valid magic[]s 		*/
-
+extern struct mlist mlist;	/* list of arrays of magic entries	*/
 
 extern int debug;		/* enable debugging?			*/
 extern int zflag;		/* process compressed files?		*/
 extern int lflag;		/* follow symbolic links?		*/
 extern int sflag;		/* read/analyze block special files?	*/
+extern int iflag;		/* Output types as mime-types		*/
 
 extern int optind;		/* From getopt(3)			*/
 extern char *optarg;
@@ -142,13 +178,17 @@ extern char *sys_errlist[];
 #define strtoul(a, b, c)	strtol(a, b, c)
 #endif
 
+#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
+#define QUICK
+#endif
+
 #ifdef __STDC__
 #define FILE_RCSID(id) \
 static const char *rcsid(const char *p) { \
 	return rcsid(p = id); \
 }
 #else
-#define FILE_RCSID(id) static char *rcsid[] = id;
+#define FILE_RCSID(id) static char rcsid[] = id;
 #endif
 
 #endif /* __file_h__ */

+ 121 - 88
file.man

@@ -1,24 +1,34 @@
 .TH FILE __CSECTION__ "Copyright but distributable"
-.\" $Id: file.man,v 1.33 1999/02/14 17:16:07 christos Exp $
+.\" $Id: file.man,v 1.39 2001/04/27 22:48:33 christos Exp $
 .SH NAME
 file
 \- determine file type
 .SH SYNOPSIS
 .B file
 [
-.B \-bcnsvzL
+.B \-bciknsvzL
 ]
 [
 .B \-f
-namefile ]
+.I namefile
+]
 [
 .B \-m 
-magicfiles ]
-file ...
+.I magicfiles
+]
+.I file
+\&...
+.br
+.B file
+.B -C
+[
+.B \-m 
+magicfile ]
 .SH DESCRIPTION
 This manual page documents version __VERSION__ of the
 .B file
 command.
+.PP
 .B File
 tests each argument in an attempt to classify it.
 There are three sets of tests, performed in this order:
@@ -30,7 +40,7 @@ test that succeeds causes the file type to be printed.
 The type printed will usually contain one of the words
 .B text
 (the file contains only
-.SM ASCII
+printing characters and a few common control
 characters and is probably safe to read on an
 .SM ASCII
 terminal),
@@ -48,8 +58,14 @@ or the program itself,
 .B "preserve these keywords" .
 People depend on knowing that all the readable files in a directory
 have the word ``text'' printed.
-Don't do as Berkeley did \- change ``shell commands text''
+Don't do as Berkeley did and change ``shell commands text''
 to ``shell script''.
+Note that the file
+.I __MAGIC__
+is built mechanically from a large number of small files in
+the subdirectory
+.I Magdir
+in the source distribution of this program.
 .PP
 The filesystem tests are based on examining the return from a
 .BR stat (2)
@@ -61,7 +77,7 @@ Any known file types appropriate to the system you are running on
 implement them)
 are intuited if they are defined in
 the system header file
-.IR sys/stat.h  .
+.IR <sys/stat.h>  .
 .PP
 The magic number tests are used to check for files with data in
 particular fixed formats.
@@ -78,14 +94,40 @@ that the file is a binary executable, and which of several types thereof.
 The concept of `magic number' has been applied by extension to data files.
 Any file with some invariant identifier at a small fixed
 offset into the file can usually be described in this way.
-The information in these files is read from the magic file
-.I __MAGIC__.
+The information identifying these files is read from the compiled
+magic file
+.I __MAGIC__.mgc ,
+or 
+.I __MAGIC__
+if the compile file does not exist.
+.PP
+If a file does not match any of the entries in the magic file,
+it is examined to see if it seems to be a text file.
+ASCII, ISO-8859-x, non-ISO 8-bit extended-ASCII character sets
+(such as those used on Macintosh and IBM PC systems),
+UTF-8-encoded Unicode, UTF-16-encoded Unicode, and EBCDIC
+character sets can be distinguished by the different
+ranges and sequences of bytes that constitute printable text
+in each set.
+If a file passes any of these tests, its character set is reported.
+ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified
+as ``text'' because they will be mostly readable on nearly any terminal;
+UTF-16 and EBCDIC are only ``character data'' because, while
+they contain text, it is text that will require translation
+before it can be read.
+In addition,
+.B file
+will attempt to determine other characteristics of text-type files.
+If the lines of a file are terminated by CR, CRLF, or NEL, instead
+of the Unix-standard LF, this will be reported.
+Files that contain embedded escape sequences or overstriking
+will also be identified.
 .PP
-If an argument appears to be an
-.SM ASCII 
-file,
+Once
 .B file
-attempts to guess its language.
+has determined the character set used in a text-type file,
+it will
+attempt to determine in what language the file is written.
 The language tests look for particular strings (cf
 .IR names.h )
 that can appear anywhere in the first few blocks of a file.
@@ -101,8 +143,10 @@ two groups, so they are performed last.
 The language test routines also test for some miscellany
 (such as 
 .BR tar (1)
-archives) and determine whether an unknown file should be
-labelled as `ascii text' or `data'. 
+archives).
+.PP
+Any file that cannot be identified as having been written
+in any of the character sets listed above is simply said to be ``data''.
 .SH OPTIONS
 .TP 8
 .B \-b
@@ -114,7 +158,11 @@ This is usually used in conjunction with
 .B \-m
 to debug a new magic file before installing it.
 .TP 8
-.B \-f namefile
+.B \-C
+Write a magic.mgc output file that contains a pre-parsed version of
+file.
+.TP 8
+.BI \-f " namefile"
 Read the names of the files to be examined from 
 .I namefile
 (one per line) 
@@ -122,15 +170,29 @@ before the argument list.
 Either 
 .I namefile
 or at least one filename argument must be present;
-to test the standard input, use ``-'' as a filename argument.
+to test the standard input, use ``\-'' as a filename argument.
+.TP 8
+.B \-i
+Causes the file command to output mime type strings rather than the more
+traditional human readable ones. Thus it may say
+``text/plain; charset=us-ascii''
+rather
+than ``ASCII text''. In order for this option to work, file changes the way
+it handles files recognised by the command itself (such as many of the
+text file types, directories etc), and makes use of an alternative
+``magic'' file.
+(See ``FILES'' section, below).
 .TP 8
-.B \-m list
+.B \-k
+Don't stop at the first match, keep going.
+.TP 8
+.BI \-m " list"
 Specify an alternate list of files containing magic numbers.
 This can be a single file, or a colon-separated list of files.
 .TP 8
 .B \-n
-Force stdout to be flushed after check a file. This is only useful if
-checking a list of files. It is intended to be used by programs want
+Force stdout to be flushed after checking each file. This is only useful if
+checking a list of files. It is intended to be used by programs that want
 filetype output from a pipe.
 .TP 8
 .B \-v
@@ -165,8 +227,16 @@ to disregard the file size as reported by
 .BR stat (2)
 since on some systems it reports a zero size for raw disk partitions.
 .SH FILES
+.I __MAGIC__.mgc
+\- defaults compiled list of magic numbers
+.PP
 .I __MAGIC__
 \- default list of magic numbers
+.PP
+.I __MAGIC__.mime
+\- default list of magic numbers, used to output mime types when the -i option
+is specified.
+
 .SH ENVIRONMENT
 The environment variable
 .B MAGIC
@@ -252,90 +322,52 @@ $ file -s /dev/hda{,1,2,3,4,5,6,7,8,9,10}
 /dev/hda8:  Linux/i386 swap file
 /dev/hda9:  empty
 /dev/hda10: empty
+
+$ file -i file.c file /dev/hda
+file.c:      text/x-c
+file:        application/x-executable, dynamically linked (uses shared libs), not stripped
+/dev/hda:    application/x-not-regular-file
+
 .fi
 .SH HISTORY
 There has been a 
 .B file
 command in every \s-1UNIX\s0 since at least Research Version 6
-(man page dated January, 1975).
+(man page dated January 16, 1975).
 The System V version introduced one significant major change:
 the external list of magic number types.
 This slowed the program down slightly but made it a lot more flexible.
 .PP
 This program, based on the System V version,
-was written by Ian Darwin without looking at anybody else's source code.
+was written by Ian Darwin <ian@darwinsys.com>
+without looking at anybody else's source code.
 .PP
 John Gilmore revised the code extensively, making it better than
 the first version.
 Geoff Collyer found several inadequacies
 and provided some magic file entries.
-The program has undergone continued evolution since.
-.SH AUTHOR
-Written by Ian F. Darwin, UUCP address {utzoo | ihnp4}!darwin!ian,
-Internet address ian@sq.com,
-postal address: P.O. Box 603, Station F, Toronto, Ontario, CANADA M4Y 2L8.
+Contributions by the `&' operator by Rob McMahon, cudcv@warwick.ac.uk, 1989.
 .PP
-Altered by Rob McMahon, cudcv@warwick.ac.uk, 1989, to extend the `&' operator
-from simple `x&y != 0' to `x&y op z'.
+Guy Harris, guy@netapp.com, made many changes from 1993 to the present.
 .PP
-Altered by Guy Harris, guy@netapp.com, 1993, to:
-.RS
+Primary development and maintenance from 1990 to the present by
+Christos Zoulas (christos@astron.com).
 .PP
-put the ``old-style'' `&'
-operator back the way it was, because 1) Rob McMahon's change broke the
-previous style of usage, 2) the SunOS ``new-style'' `&' operator,
-which this version of
-.B file
-supports, also handles `x&y op z', and 3) Rob's change wasn't documented
-in any case;
-.PP
-put in multiple levels of `>';
+Altered by Chris Lowth, chris@lowth.com, 2000:
+Handle the ``-i'' option to output mime type strings and using an alternative
+magic file and internal logic.
 .PP
-put in ``beshort'', ``leshort'', etc. keywords to look at numbers in the
-file in a specific byte order, rather than in the native byte order of
-the process running
-.BR file .
-.RE
+Altered by Eric Fischer (enf@pobox.com), July, 2000,
+to identify character codes and attempt to identify the languages
+of non-ASCII files.
 .PP
-Changes by Ian Darwin and various authors including
-Christos Zoulas (christos@astron.com), 1990-1999.
+The list of contributors to the "Magdir" directory (source for the
+/etc/magic
+file) is too long to include here. You know who you are; thank you.
 .SH LEGAL NOTICE
-Copyright (c) Ian F. Darwin, Toronto, Canada,
-1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993.
-.PP
-This software is not subject to and may not be made subject to any
-license of the American Telephone and Telegraph Company, Sun
-Microsystems Inc., Digital Equipment Inc., Lotus Development Inc., the
-Regents of the University of California, The X Consortium or MIT, or
-The Free Software Foundation.
-.PP
-This software is not subject to any export provision of the United States
-Department of Commerce, and may be exported to any country or planet.
-.PP
-Permission is granted to anyone to use this software for any purpose on
-any computer system, and to alter it and redistribute it freely, subject
-to the following restrictions:
-.PP 
-1. The author is not responsible for the consequences of use of this
-software, no matter how awful, even if they arise from flaws in it.
-.PP
-2. The origin of this software must not be misrepresented, either by
-explicit claim or by omission.  Since few users ever read sources,
-credits must appear in the documentation.
-.PP
-3. Altered versions must be plainly marked as such, and must not be
-misrepresented as being the original software.  Since few users
-ever read sources, credits must appear in the documentation.
-.PP
-4. This notice may not be removed or altered.
-.PP
-A few support files (\fIgetopt\fP, \fIstrtok\fP)
-distributed with this package
-are by Henry Spencer and are subject to the same terms as above.
-.PP
-A few simple support files (\fIstrtol\fP, \fIstrchr\fP)
-distributed with this package
-are in the public domain; they are so marked.
+Copyright (c) Ian F. Darwin, Toronto, Canada, 1986-1999.
+Covered by the standard Berkeley Software Distribution copyright; see the file
+LEGAL.NOTICE in the source distribution.
 .PP
 The files
 .I tar.h
@@ -343,7 +375,7 @@ and
 .I is_tar.c
 were written by John Gilmore from his public-domain
 .B tar
-program, and are not covered by the above restrictions.
+program, and are not covered by the above license.
 .SH BUGS
 There must be a better way to automate the construction of the Magic
 file from all the glop in Magdir. What is it?
@@ -358,11 +390,11 @@ with the flexibility of the System V version.
 .B File
 uses several algorithms that favor speed over accuracy,
 thus it can be misled about the contents of
-.SM ASCII
+text
 files.
 .PP
 The support for
-.SM ASCII
+text
 files (primarily for programming languages)
 is simplistic, inefficient and requires recompilation to update.
 .PP
@@ -401,10 +433,11 @@ The program should provide a way to give an estimate
 of ``how good'' a guess is.
 We end up removing guesses (e.g. ``From '' as first 5 chars of file) because
 they are not as good as other guesses (e.g. ``Newsgroups:'' versus
-"Return-Path:").  Still, if the others don't pan out, it should be
+``Return-Path:'').  Still, if the others don't pan out, it should be
 possible to use the first guess.  
 .PP
 This program is slower than some vendors' file commands.
+The new support for multiple character codes makes it even slower.
 .PP
 This manual page, and particularly this section, is too long.
 .SH AVAILABILITY

+ 16 - 12
fsmagic.c

@@ -26,9 +26,6 @@
  */
 
 #include "file.h"
-#ifdef __CYGWIN__
-#include <errno.h>
-#endif
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
@@ -57,13 +54,13 @@
 #undef HAVE_MAJOR
 
 #ifndef	lint
-FILE_RCSID("@(#)$Id: fsmagic.c,v 1.30 1999/10/31 22:23:03 christos Exp $")
+FILE_RCSID("@(#)$Id: fsmagic.c,v 1.33 2000/08/05 17:36:48 christos Exp $")
 #endif	/* lint */
 
 int
 fsmagic(fn, sb)
-const char *fn;
-struct stat *sb;
+	const char *fn;
+	struct stat *sb;
 {
 	int ret = 0;
 
@@ -86,15 +83,23 @@ struct stat *sb;
 		return 1;
 	}
 
+	if (iflag) {
+		if ((sb->st_mode & S_IFMT) != S_IFREG) {
+			ckfputs("application/x-not-regular-file", stdout);
+			return 1;
+		}
+	}
+	else {
 #ifdef S_ISUID
-	if (sb->st_mode & S_ISUID) ckfputs("setuid ", stdout);
+		if (sb->st_mode & S_ISUID) ckfputs("setuid ", stdout);
 #endif
 #ifdef S_ISGID
-	if (sb->st_mode & S_ISGID) ckfputs("setgid ", stdout);
+		if (sb->st_mode & S_ISGID) ckfputs("setgid ", stdout);
 #endif
 #ifdef S_ISVTX
-	if (sb->st_mode & S_ISVTX) ckfputs("sticky ", stdout);
+		if (sb->st_mode & S_ISVTX) ckfputs("sticky ", stdout);
 #endif
+	}
 	
 	switch (sb->st_mode & S_IFMT) {
 	case S_IFDIR:
@@ -163,7 +168,7 @@ struct stat *sb;
 	case S_IFLNK:
 		{
 			char buf[BUFSIZ+4];
-			register int nch;
+			int nch;
 			struct stat tstatbuf;
 
 			if ((nch = readlink(fn, buf, BUFSIZ-1)) <= 0) {
@@ -239,9 +244,8 @@ struct stat *sb;
 	 * when we read the file.)
 	 */
 	if (!sflag && sb->st_size == 0) {
-		ckfputs("empty", stdout);
+		ckfputs(iflag ? "application/x-empty" : "empty", stdout);
 		return 1;
 	}
 	return 0;
 }
-

+ 0 - 86
internat.c

@@ -1,86 +0,0 @@
-/*
- * ----------------------------------------------------------------------------
- * "THE BEER-WARE LICENSE" (Revision 42):
- * <joerg@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
- * can do whatever you want with this stuff. If we meet some day, and you think
- * this stuff is worth it, you can buy me a beer in return.   Joerg Wunsch
- * ----------------------------------------------------------------------------
- */
-
-#include "file.h"
-
-#include <string.h>
-#include <memory.h>
-
-#ifndef lint
-FILE_RCSID("@(#)$Id: internat.c,v 1.4 1998/06/27 13:23:39 christos Exp $")
-#endif
-
-#define F 0
-#define T 1
-
-/*
- * List of characters that look "reasonable" in international
- * language texts.  That's almost all characters :), except a
- * few in the control range of ASCII (all the known international
- * charactersets share the bottom half with ASCII).
- */
-static char maybe_internat[256] = {
-	F, F, F, F, F, F, F, F, T, T, T, T, T, T, F, F,  /* 0x0X */
-	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x8X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x9X */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0xaX */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0xbX */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0xcX */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0xdX */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0xeX */
-	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T   /* 0xfX */
-};
-
-/* Maximal length of a line we consider "reasonable". */
-#define MAXLINELEN 300
-
-int
-internatmagic(buf, nbytes)
-	unsigned char *buf;
-	int nbytes;
-{
-	int i;
-	unsigned char *cp;
-
-	nbytes--;
-
-	/* First, look whether there are "unreasonable" characters. */
-	for (i = 0, cp = buf; i < nbytes; i++, cp++)
-		if (!maybe_internat[*cp])
-			return 0;
-
-	/*
-	 * Now, look whether the file consists of lines of
-	 * "reasonable" length.
-	 */
-
-	for (i = 0; i < nbytes;) {
-		cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
-		if (cp == NULL) {
-			/* Don't fail if we hit the end of buffer. */
-			if (i + MAXLINELEN >= nbytes)
-				break;
-			else
-				return 0;
-		}
-		if (cp - buf > MAXLINELEN)
-			return 0;
-		i += (cp - buf + 1);
-		buf = cp + 1;
-	}
-	ckfputs("International language text", stdout);
-	return 1;
-}

+ 12 - 12
is_tar.c

@@ -5,7 +5,7 @@
  * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
  *
  * @(#)list.c 1.18 9/23/86 Public Domain - gnu
- * $Id: is_tar.c,v 1.12 1999/02/14 17:16:08 christos Exp $
+ * $Id: is_tar.c,v 1.13 2000/08/05 17:36:48 christos Exp $
  *
  * Comments changed and some code/comments reformatted
  * for file command by Ian Darwin.
@@ -18,7 +18,7 @@
 #include "file.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$Id: is_tar.c,v 1.12 1999/02/14 17:16:08 christos Exp $")
+FILE_RCSID("@(#)$Id: is_tar.c,v 1.13 2000/08/05 17:36:48 christos Exp $")
 #endif
 
 #define	isodigit(c)	( ((c) >= '0') && ((c) <= '7') )
@@ -33,13 +33,13 @@ static int from_oct __P((int, char *));	/* Decode octal number */
  */
 int
 is_tar(buf, nbytes)
-unsigned char *buf;
-int nbytes;
+	unsigned char *buf;
+	int nbytes;
 {
-	register union record *header = (union record *)buf;
-	register int	i;
-	register int	sum, recsum;
-	register char	*p;
+	union record *header = (union record *)buf;
+	int	i;
+	int	sum, recsum;
+	char	*p;
 
 	if (nbytes < sizeof(union record))
 		return 0;
@@ -78,12 +78,12 @@ int nbytes;
  */
 static int
 from_oct(digs, where)
-	register int	digs;
-	register char	*where;
+	int	digs;
+	char	*where;
 {
-	register int	value;
+	int	value;
 
-	while (isspace((unsigned char)*where)) {		/* Skip spaces */
+	while (isspace((unsigned char)*where)) {	/* Skip spaces */
 		where++;
 		if (--digs <= 0)
 			return -1;		/* All blank field */

+ 18 - 4
magic.man

@@ -36,9 +36,19 @@ A two-byte value (on most systems) in this machine's native byte order.
 .IP long
 A four-byte value (on most systems) in this machine's native byte order.
 .IP string
-A string of bytes.
+A string of bytes. The string type specification can be optionally followed
+by /[Bbc]*. The ``B'' flag compacts whitespace in the target, which must
+contain at least one whitespace character. If the magic has "n" consecutive
+blanks, the target needs at least "n" consecutive blanks to match. The ``b''
+flag treats every blank in the target as an optional blank. Finally the ``c''
+flag, specifies case insensitive matching: lowercase characters in the magic
+match both lower and upper case characters in the targer, whereas upper case
+characters in the magic, only much uppercase characters in the target.
 .IP date
-A four-byte value interpreted as a unix date.
+A four-byte value interpreted as a UNIX date.
+.IP ldate
+A four-byte value interpreted as a UNIX-style date, but interpreted as
+local time rather than UTC.
 .IP beshort
 A two-byte value (on most systems) in big-endian byte order.
 .IP belong
@@ -52,7 +62,11 @@ A two-byte value (on most systems) in little-endian byte order.
 A four-byte value (on most systems) in little-endian byte order.
 .IP ledate
 A four-byte value (on most systems) in little-endian byte order,
-interpreted as a unix date.
+interpreted as a UNIX date.
+.IP leldate
+A four-byte value (on most systems) in little-endian byte order,
+interpreted as a UNIX-style date, but interpreted as local time rather
+than UTC.
 .RE
 .PP
 The numeric types may optionally be followed by
@@ -205,4 +219,4 @@ indirect offsets.
 .\" the changes I posted to the S5R2 version.
 .\"
 .\" Modified for Ian Darwin's version of the file command.
-.\" @(#)$Id: magic.man,v 1.15 1998/08/13 17:32:24 christos Exp $
+.\" @(#)$Id: magic.man,v 1.17 2001/08/07 15:38:42 christos Exp $

+ 543 - 0
magic.mime

@@ -0,0 +1,543 @@
+# Magic data for KMimeMagic (originally for file(1) command)
+#
+# The format is 4-5 columns:
+#    Column #1: byte number to begin checking from, ">" indicates continuation
+#    Column #2: type of data to match
+#    Column #3: contents of data to match
+#    Column #4: MIME type of result
+#    Column #5: MIME encoding of result (optional)
+
+#------------------------------------------------------------------------------
+# Localstuff:  file(1) magic for locally observed files
+# Add any locally observed files here.
+
+#------------------------------------------------------------------------------
+# end local stuff
+#------------------------------------------------------------------------------
+
+#------------------------------------------------------------------------------
+# Java
+
+0	short		0xcafe
+>2	short		0xbabe		application/java
+
+#------------------------------------------------------------------------------
+# audio:  file(1) magic for sound formats
+#
+# from Jan Nicolai Langfeldt <janl@ifi.uio.no>,
+#
+
+# Sun/NeXT audio data
+0	string		.snd
+>12	belong		1		audio/basic
+>12	belong		2		audio/basic
+>12	belong		3		audio/basic
+>12	belong		4		audio/basic
+>12	belong		5		audio/basic
+>12	belong		6		audio/basic
+>12	belong		7		audio/basic
+
+>12	belong		23		audio/x-adpcm
+
+# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
+# that uses little-endian encoding and has a different magic number
+# (0x0064732E in little-endian encoding).
+0	lelong		0x0064732E	
+>12	lelong		1		audio/x-dec-basic
+>12	lelong		2		audio/x-dec-basic
+>12	lelong		3		audio/x-dec-basic
+>12	lelong		4		audio/x-dec-basic
+>12	lelong		5		audio/x-dec-basic
+>12	lelong		6		audio/x-dec-basic
+>12	lelong		7		audio/x-dec-basic
+#                                       compressed (G.721 ADPCM)
+>12	lelong		23		audio/x-dec-adpcm
+
+# Bytes 0-3 of AIFF, AIFF-C, & 8SVX audio files are "FORM"
+#					AIFF audio data
+8	string		AIFF		audio/x-aiff	
+#					AIFF-C audio data
+8	string		AIFC		audio/x-aiff	
+#					IFF/8SVX audio data
+8	string		8SVX		audio/x-aiff	
+
+# Creative Labs AUDIO stuff
+#					Standard MIDI data
+0	string	MThd			audio/unknown	
+#>9 	byte	>0			(format %d)
+#>11	byte	>1			using %d channels
+#					Creative Music (CMF) data
+0	string	CTMF			audio/unknown	
+#					SoundBlaster instrument data
+0	string	SBI			audio/unknown	
+#					Creative Labs voice data
+0	string	Creative\ Voice\ File	audio/unknown	
+## is this next line right?  it came this way...
+#>19	byte	0x1A
+#>23	byte	>0			- version %d
+#>22	byte	>0			\b.%d
+
+# [GRR 950115:  is this also Creative Labs?  Guessing that first line
+#  should be string instead of unknown-endian long...]
+#0	long		0x4e54524b	MultiTrack sound data
+#0	string		NTRK		MultiTrack sound data
+#>4	long		x		- version %ld
+
+# Microsoft WAVE format (*.wav)
+# [GRR 950115:  probably all of the shorts and longs should be leshort/lelong]
+#					Microsoft RIFF
+0	string		RIFF		audio/unknown	
+#					- WAVE format
+>8	string		WAVE		audio/x-wav	
+>8	string		AVI		video/x-msvideo
+#
+0	belong		0x2e7261fd	application/x-realaudio
+
+# MPEG Layer 3 sound files
+# Modified the 11/20/97 at 15:59:04 by Christophe Prud'homme <christophe.prudhomme@asci.fr>
+0       belong          0xfffb          audio/x-mp3
+#MP3 with ID3 tag
+0	string		ID3		audio/x-mp3
+# Ogg/Vorbis
+0	string		OggS		audio/x-ogg
+
+#------------------------------------------------------------------------------
+# c-lang:  file(1) magic for C programs or various scripts
+#
+
+# XPM icons (Greg Roelofs, newt@uchicago.edu)
+# ideally should go into "images", but entries below would tag XPM as C source
+0	string		/*\ XPM		image/x-xpm	7bit
+
+# 3DS (3d Studio files)
+16	beshort		0x3d3d		image/x-3ds
+
+# this first will upset you if you're a PL/1 shop... (are there any left?)
+# in which case rm it; ascmagic will catch real C programs
+#					C or REXX program text
+#0	string		/*		text/x-c
+#					C++ program text
+#0	string		//		text/x-c++
+
+#------------------------------------------------------------------------------
+# commands:  file(1) magic for various shells and interpreters
+#
+#0       string          :\ shell archive or commands for antique kernel text
+0       string          #!/bin/sh               application/x-shellscript
+0       string          #!\ /bin/sh             application/x-shellscript
+0       string          #!/bin/csh              application/x-shellscript
+0       string          #!\ /bin/csh            application/x-shellscript
+# korn shell magic, sent by George Wu, gwu@clyde.att.com
+0       string          #!/bin/ksh              application/x-shellscript
+0       string          #!\ /bin/ksh            application/x-shellscript
+0       string          #!/bin/tcsh             application/x-shellscript
+0       string          #!\ /bin/tcsh           application/x-shellscript
+0       string          #!/usr/local/tcsh       application/x-shellscript
+0       string          #!\ /usr/local/tcsh     application/x-shellscript
+0       string          #!/usr/local/bin/tcsh   application/x-shellscript
+0       string          #!\ /usr/local/bin/tcsh application/x-shellscript
+# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
+0       string          #!/bin/bash     		application/x-shellscript
+0       string          #!\ /bin/bash           application/x-shellscript
+0       string          #!/usr/local/bin/bash   application/x-shellscript
+0       string          #!\ /usr/local/bin/bash application/x-shellscript
+
+#
+# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
+0       string          #!/usr/local/bin/zsh    application/x-shellscript
+0       string          #!\ /usr/local/bin/zsh  application/x-shellscript
+0       string          #!/usr/local/bin/ash    application/x-shellscript
+0       string          #!\ /usr/local/bin/ash  application/x-shellscript
+#0       string          #!/usr/local/bin/ae     Neil Brown's ae
+#0       string          #!\ /usr/local/bin/ae   Neil Brown's ae
+0       string          #!/bin/nawk             application/x-nawk
+0       string          #!\ /bin/nawk           application/x-nawk
+0       string          #!/usr/bin/nawk         application/x-nawk
+0       string          #!\ /usr/bin/nawk       application/x-nawk
+0       string          #!/usr/local/bin/nawk   application/x-nawk
+0       string          #!\ /usr/local/bin/nawk application/x-nawk
+0       string          #!/bin/gawk             application/x-gawk
+0       string          #!\ /bin/gawk           application/x-gawk
+0       string          #!/usr/bin/gawk         application/x-gawk
+0       string          #!\ /usr/bin/gawk       application/x-gawk
+0       string          #!/usr/local/bin/gawk   application/x-gawk
+0       string          #!\ /usr/local/bin/gawk application/x-gawk
+#
+0       string          #!/bin/awk              application/x-awk
+0       string          #!\ /bin/awk            application/x-awk
+0       string          #!/usr/bin/awk          application/x-awk
+0       string          #!\ /usr/bin/awk        application/x-awk
+0       string          BEGIN                   application/x-awk
+
+# For Larry Wall's perl language.  The ``eval'' line recognizes an
+# outrageously clever hack for USG systems.
+#                               Keith Waclena <keith@cerberus.uchicago.edu>
+0       string          #!/bin/perl                     application/x-perl
+0       string          #!\ /bin/perl                   application/x-perl
+0       string          eval\ "exec\ /bin/perl          application/x-perl
+0       string          #!/usr/bin/perl                 application/x-perl
+0       string          #!\ /usr/bin/perl               application/x-perl
+0       string          eval\ "exec\ /usr/bin/perl      application/x-perl
+0       string          #!/usr/local/bin/perl           application/x-perl
+0       string          #!\ /usr/local/bin/perl         application/x-perl
+0       string          eval\ "exec\ /usr/local/bin/perl application/x-perl
+
+#------------------------------------------------------------------------------
+# compress:  file(1) magic for pure-compression formats (no archives)
+#
+# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc.
+#
+# Formats for various forms of compressed data
+# Formats for "compress" proper have been moved into "compress.c",
+# because it tries to uncompress it to figure out what's inside.
+
+# standard unix compress
+0	string		\037\235	application/x-compress
+
+# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver)
+0       string          \037\213        application/x-gzip
+
+0		string			PK\003\004		application/x-zip
+
+# According to gzip.h, this is the correct byte order for packed data.
+0	string		\037\036	application/octet-stream
+#
+# This magic number is byte-order-independent.
+#
+0	short		017437		application/octet-stream
+
+# XXX - why *two* entries for "compacted data", one of which is
+# byte-order independent, and one of which is byte-order dependent?
+#
+# compacted data
+0	short		0x1fff		application/octet-stream
+0	string		\377\037	application/octet-stream
+# huf output
+0	short		0145405		application/octet-stream
+
+# Squeeze and Crunch...
+# These numbers were gleaned from the Unix versions of the programs to
+# handle these formats.  Note that I can only uncrunch, not crunch, and
+# I didn't have a crunched file handy, so the crunch number is untested.
+#				Keith Waclena <keith@cerberus.uchicago.edu>
+#0	leshort		0x76FF		squeezed data (CP/M, DOS)
+#0	leshort		0x76FE		crunched data (CP/M, DOS)
+
+# Freeze
+#0	string		\037\237	Frozen file 2.1
+#0	string		\037\236	Frozen file 1.0 (or gzip 0.5)
+
+# lzh?
+#0	string		\037\240	LZH compressed data
+
+257	string		ustar\0		application/x-tar	posix
+257	string		ustar\040\040\0		application/x-tar	gnu
+
+0	short		070707		application/x-cpio
+0	short		0143561		application/x-cpio	swapped
+
+0	string		=<ar>		application/x-archive
+0	string		!<arch>		application/x-archive
+>8	string		debian		application/x-debian-package
+
+#------------------------------------------------------------------------------
+#
+# RPM: file(1) magic for Red Hat Packages   Erik Troan (ewt@redhat.com)
+#
+0       beshort         0xedab
+>2      beshort         0xeedb          application/x-rpm
+
+0	lelong&0x8080ffff	0x0000081a	application/x-arc	lzw
+0	lelong&0x8080ffff	0x0000091a	application/x-arc	squashed
+0	lelong&0x8080ffff	0x0000021a	application/x-arc	uncompressed
+0	lelong&0x8080ffff	0x0000031a	application/x-arc	packed
+0	lelong&0x8080ffff	0x0000041a	application/x-arc	squeezed
+0	lelong&0x8080ffff	0x0000061a	application/x-arc	crunched
+
+0	leshort	0xea60	application/octet-stream	x-arj
+
+# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
+2	string	-lh0-	application/x-lharc	lh0
+2	string	-lh1-	application/x-lharc	lh1
+2	string	-lz4-	application/x-lharc	lz4
+2	string	-lz5-	application/x-lharc	lz5
+#	[never seen any but the last; -lh4- reported in comp.compression:]
+2	string	-lzs-	application/x-lha	lzs
+2	string	-lh\ -	application/x-lha	lh
+2	string	-lhd-	application/x-lha	lhd
+2	string	-lh2-	application/x-lha	lh2
+2	string	-lh3-	application/x-lha	lh3
+2	string	-lh4-	application/x-lha	lh4
+2	string	-lh5-	application/x-lha	lh5
+2	string	-lh6-	application/x-lha	lh6
+2	string	-lh7-	application/x-lha	lh7
+# Shell archives
+10	string	#\ This\ is\ a\ shell\ archive	application/octet-stream	x-shell
+
+#------------------------------------------------------------------------------
+# frame:  file(1) magic for FrameMaker files
+#
+# This stuff came on a FrameMaker demo tape, most of which is
+# copyright, but this file is "published" as witness the following:
+#
+0	string		\<MakerFile	application/x-frame
+0	string		\<MIFFile	application/x-frame
+0	string		\<MakerDictionary	application/x-frame
+0	string		\<MakerScreenFon	application/x-frame
+0	string		\<MML		application/x-frame
+0	string		\<Book		application/x-frame
+0	string		\<Maker		application/x-frame
+
+#------------------------------------------------------------------------------
+# html:  file(1) magic for HTML (HyperText Markup Language) docs
+#
+# from Daniel Quinlan <quinlan@yggdrasil.com>
+#
+0	string		\<HEAD	text/html
+0	string		\<head	text/html
+0	string		\<TITLE	text/html
+0	string		\<title	text/html
+0       string          \<html	text/html
+0       string          \<HTML	text/html
+0	string		\<!--	text/html
+0	string		\<h1	text/html
+0	string		\<H1	text/html
+0	string		\<!doctype\ HTML	text/html
+0	string		\<!DOCTYPE\ HTML	text/html
+0	string		\<!doctype\ html	text/html
+
+#------------------------------------------------------------------------------
+# images:  file(1) magic for image formats (see also "c-lang" for XPM bitmaps)
+#
+# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
+# additions by janl@ifi.uio.no as well as others. Jan also suggested
+# merging several one- and two-line files into here.
+#
+# XXX - byte order for GIF and TIFF fields?
+# [GRR:  TIFF allows both byte orders; GIF is probably little-endian]
+#
+
+# [GRR:  what the hell is this doing in here?]
+#0	string		xbtoa		btoa'd file
+
+# PBMPLUS
+#					PBM file
+0	string		P1		image/x-portable-bitmap	7bit
+#					PGM file
+0	string		P2		image/x-portable-greymap	7bit
+#					PPM file
+0	string		P3		image/x-portable-pixmap	7bit
+#					PBM "rawbits" file
+0	string		P4		image/x-portable-bitmap
+#					PGM "rawbits" file
+0	string		P5		image/x-portable-greymap
+#					PPM "rawbits" file
+0	string		P6		image/x-portable-pixmap
+
+# NIFF (Navy Interchange File Format, a modification of TIFF)
+# [GRR:  this *must* go before TIFF]
+0	string		IIN1		image/x-niff
+
+# TIFF and friends
+#					TIFF file, big-endian
+0	string		MM		image/tiff
+#					TIFF file, little-endian
+0	string		II		image/tiff
+
+# possible GIF replacements; none yet released!
+# (Greg Roelofs, newt@uchicago.edu)
+#
+# GRR 950115:  this was mine ("Zip GIF"):
+#					ZIF image (GIF+deflate alpha)
+0	string		GIF94z		image/unknown
+#
+# GRR 950115:  this is Jeremy Wohl's Free Graphics Format (better):
+#					FGF image (GIF+deflate beta)
+0	string		FGF95a		image/unknown
+#
+# GRR 950115:  this is Thomas Boutell's Portable Bitmap Format proposal
+# (best; not yet implemented):
+#					PBF image (deflate compression)
+0	string		PBF		image/unknown
+
+# GIF
+0	string		GIF		image/gif
+
+# JPEG images
+0	beshort		0xffd8		image/jpeg
+
+# PC bitmaps (OS/2, Windoze BMP files)  (Greg Roelofs, newt@uchicago.edu)
+0	string		BM		image/bmp
+#>14	byte		12		(OS/2 1.x format)
+#>14	byte		64		(OS/2 2.x format)
+#>14	byte		40		(Windows 3.x format)
+#0	string		IC		icon
+#0	string		PI		pointer
+#0	string		CI		color icon
+#0	string		CP		color pointer
+#0	string		BA		bitmap array
+
+
+#------------------------------------------------------------------------------
+# lisp:  file(1) magic for lisp programs
+#
+# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
+0	string	;;			text/plain	8bit
+# Emacs 18 - this is always correct, but not very magical.
+0	string	\012(			application/x-elc
+# Emacs 19
+0	string	;ELC\023\000\000\000	application/x-elc
+
+#------------------------------------------------------------------------------
+# mail.news:  file(1) magic for mail and news
+#
+# There are tests to ascmagic.c to cope with mail and news.
+0	string		Relay-Version: 	message/rfc822	7bit
+0	string		#!\ rnews	message/rfc822	7bit
+0	string		N#!\ rnews	message/rfc822	7bit
+0	string		Forward\ to 	message/rfc822	7bit
+0	string		Pipe\ to 	message/rfc822	7bit
+0	string		Return-Path:	message/rfc822	7bit
+0	string		Path:		message/news	8bit
+0	string		Xref:		message/news	8bit
+0	string		From:		message/rfc822	7bit
+0	string		Article 	message/news	8bit
+#------------------------------------------------------------------------------
+# msword: file(1) magic for MS Word files
+#
+# Contributor claims:
+# Reversed-engineered MS Word magic numbers
+#
+
+0	string		\376\067\0\043			application/msword
+0	string		\320\317\021\340\241\261	application/msword
+0	string		\333\245-\0\0\0			application/msword
+
+
+
+#------------------------------------------------------------------------------
+# printer:  file(1) magic for printer-formatted files
+#
+
+# PostScript
+0	string		%!		application/postscript
+0	string		\004%!		application/postscript
+
+# Acrobat
+# (due to clamen@cs.cmu.edu)
+0	string		%PDF-		application/pdf
+
+#------------------------------------------------------------------------------
+# sc:  file(1) magic for "sc" spreadsheet
+#
+38	string		Spreadsheet	application/x-sc
+
+#------------------------------------------------------------------------------
+# tex:  file(1) magic for TeX files
+#
+# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
+#
+# From <conklin@talisman.kaleida.com>
+
+# Although we may know the offset of certain text fields in TeX DVI
+# and font files, we can't use them reliably because they are not
+# zero terminated. [but we do anyway, christos]
+0	string		\367\002	application/x-dvi
+#0	string		\367\203	TeX generic font data
+#0	string		\367\131	TeX packed font data
+#0	string		\367\312	TeX virtual font data
+#0	string		This\ is\ TeX,	TeX transcript text	
+#0	string		This\ is\ METAFONT,	METAFONT transcript text
+
+# There is no way to detect TeX Font Metric (*.tfm) files without
+# breaking them apart and reading the data.  The following patterns
+# match most *.tfm files generated by METAFONT or afm2tfm.
+2	string		\000\021	application/x-tex-tfm
+2	string		\000\022	application/x-tex-tfm
+#>34	string		>\0		(%s)
+
+# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
+#0	string		\\input\ texinfo	Texinfo source text
+#0	string		This\ is\ Info\ file	GNU Info text
+
+# correct TeX magic for Linux (and maybe more)
+# from Peter Tobias (tobias@server.et-inf.fho-emden.de)
+#
+0	leshort		0x02f7		application/x-dvi
+
+# RTF - Rich Text Format
+0	string		{\\rtf		text/rtf
+
+#------------------------------------------------------------------------------
+# animation:  file(1) magic for animation/movie formats
+#
+# animation formats, originally from vax@ccwf.cc.utexas.edu (VaX#n8)
+#						MPEG file
+0	belong		0x000001b3			video/mpeg
+0	belong		0x000001ba			video/mpeg
+# FLI animation format
+0	leshort		0xAF11				video/fli
+# FLC animation format
+0	leshort		0xAF12				video/flc
+# AVI
+>8	string		AVI\ 				video/avi
+#
+# SGI and Apple formats
+#
+0	string		MOVI				video/sgi
+4	string		moov				video/quicktime	moov
+4	string		mdat				video/quicktime	mdat
+# The contributor claims:
+#   I couldn't find a real magic number for these, however, this
+#   -appears- to work.  Note that it might catch other files, too,
+#   so BE CAREFUL!
+#
+# Note that title and author appear in the two 20-byte chunks
+# at decimal offsets 2 and 22, respectively, but they are XOR'ed with
+# 255 (hex FF)! DL format SUCKS BIG ROCKS.
+#
+#						DL file version 1 , medium format (160x100, 4 images/screen)
+0	byte		1			video/unknown
+0	byte		2			video/unknown
+#
+# Databases
+#
+# GDBM magic numbers
+#  Will be maintained as part of the GDBM distribution in the future.
+#  <downsj@teeny.org>
+0       belong  0x13579ace      application/x-gdbm
+0       lelong  0x13579ace      application/x-gdbm
+0       string  GDBM            application/x-gdbm
+#
+0       belong  0x061561        application/x-dbm
+#
+# Executables
+#
+0       string          \177ELF 
+>4      byte            0
+>4      byte            1
+>4      byte            2
+>5      byte            0
+>5      byte            1
+>>16    leshort         0
+>>16    leshort         1               application/x-object
+>>16    leshort         2               application/x-executable
+>>16    leshort         3               application/x-sharedlib
+>>16    leshort         4               application/x-coredump
+#
+# DOS
+0		string			MZ				application/x-dosexec
+#
+# KDE
+0		string	[KDE\ Desktop\ Entry]	application/x-kdelnk
+0		string	\#\ KDE\ Config\ File	application/x-kdelnk
+# xmcd database file for kscd
+0		string	\#\ xmcd                text/xmcd
+
+#------------------------------------------------------------------------------
+# pkgadd:  file(1) magic for SysV R4 PKG Datastreams
+#
+0       string          #\ PaCkAgE\ DaTaStReAm  application/x-svr4-package
+
+#PNG Image Format
+0	string		\x89PNG			image/png

+ 39 - 0
mkinstalldirs

@@ -0,0 +1,39 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+# Author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Public domain
+
+# $Id: mkinstalldirs,v 1.2 2000/11/13 00:30:49 christos Exp $
+
+errstatus=0
+
+for file
+do
+   set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
+   shift
+
+   pathcomp=
+   for d
+   do
+     pathcomp="$pathcomp$d"
+     case "$pathcomp" in
+       -* ) pathcomp=./$pathcomp ;;
+     esac
+
+     if test ! -d "$pathcomp"; then
+        echo "mkdir $pathcomp"
+
+        mkdir "$pathcomp" || lasterr=$?
+
+        if test ! -d "$pathcomp"; then
+         errstatus=$lasterr
+        fi
+     fi
+
+     pathcomp="$pathcomp/"
+   done
+done
+
+exit $errstatus
+

+ 29 - 17
names.h

@@ -10,9 +10,14 @@
  *
  * See LEGAL.NOTICE
  *
- * $Id: names.h,v 1.16 1999/01/13 15:44:08 christos Exp $
+ * $Id: names.h,v 1.18 2000/08/05 17:36:49 christos Exp $
  */
 
+/*
+	modified by Chris Lowth - 9 April 2000
+	to add mime type strings to the types table.
+*/
+
 /* these types are used to index the table 'types': keep em in sync! */
 #define	L_C	0		/* first and foremost on UNIX */
 #define	L_CC	1		/* Bjarne's postincrement */
@@ -27,23 +32,29 @@
 #define	L_JAVA	10		/* Java code */
 #define	L_HTML	11		/* HTML */
 #define	L_BCPL	12		/* BCPL */
+#define	L_M4	13		/* M4 */
 
-static const char *types[] = {
-	"C program text",
-	"C++ program text",
-	"FORTRAN program text",
-	"make commands text" ,
-	"PL/1 program text",
-	"assembler program text",
-	"English text",
-	"Pascal program text",
-	"mail text",
-	"news text",
-	"Java program text",
-	"HTML document text",
-	"BCPL program text",
-	"can't happen error on names.h/types",
-	0};
+static const struct {
+	char *human;
+	char *mime;
+} types[] = {
+	{ "C program",					"text/x-c", },
+	{ "C++ program",				"text/x-c++" },
+	{ "FORTRAN program",				"text/x-fortran" },
+	{ "make commands",				"text/x-makefile" },
+	{ "PL/1 program",				"text/x-pl1" },
+	{ "assembler program",				"text/x-asm" },
+	{ "English",					"text/plain, English" },
+	{ "Pascal program",				"text/x-pascal" },
+	{ "mail",					"text/x-mail" },
+	{ "news",					"text/x-news" },
+	{ "Java program",				"text/x-java" },
+	{ "HTML document",				"text/html", },
+	{ "BCPL program",				"text/x-bcpl" },
+	{ "M4 macro language pre-processor",		"text/x-m4" },
+	{ "can't happen error on names.h/types",	"error/x-error" },
+	{ 0, 0}
+};
 
 /*
  * XXX - how should we distinguish Java from C++?
@@ -87,6 +98,7 @@ static struct names {
 } names[] = {
 	/* These must be sorted by eye for optimal hit rate */
 	/* Add to this list only after substantial meditation */
+	{"dnl",		L_M4},
 	{"import",	L_JAVA},
 	{"\"libhdr\"",	L_BCPL},
 	{"\"LIBHDR\"",	L_BCPL},

+ 64 - 2
patchlevel.h

@@ -1,11 +1,73 @@
 #define	FILE_VERSION_MAJOR	3
-#define	patchlevel		28
+#define	patchlevel		37
 
 /*
  * Patchlevel file for Ian Darwin's MAGIC command.
- * $Id: patchlevel.h,v 1.28 1999/10/31 22:11:48 christos Exp $
+ * $Id: patchlevel.h,v 1.37 2001/09/03 14:44:22 christos Exp $
  *
  * $Log: patchlevel.h,v $
+ * Revision 1.37  2001/09/03 14:44:22  christos
+ * daylight/tm_isdst detection
+ * magic fixes
+ * don't eat the whole file if it has only nulls
+ *
+ * Revision 1.36  2001/07/22 21:04:15  christos
+ * - magic fixes
+ * - add new operators, pascal strings, UTC date printing, $HOME/.magic
+ *   [from "Tom N Harris" <telliamed@mac.com>]
+ *
+ * Revision 1.35  2001/04/24 14:40:25  christos
+ * - rename magic file sgi to mips and fix it
+ * - add support for building magic.mgc
+ * - portability fixes for mmap()
+ * - try gzip before uncompress, because uncompress sometimes hangs
+ * - be more conservative about pipe reads and writes
+ * - many magic fixes
+ *
+ * Revision 1.34  2001/03/12 05:05:57  christos
+ * - new compiled magic format
+ * - lots of magic additions
+ *
+ * Revision 1.33  2000/11/13 00:30:50  christos
+ * - wordperfect magic fix: freebsd pr 9388
+ * - more msdos fixes from freebsd pr's 20131 and 20812
+ * - sas and spss magic [Bruce Foster]
+ * - mkinstalldirs [John Fremlin]
+ * - sgi opengl fixes [Michael Pruett]
+ * - netbsd magic fixes [Ignatios Souvatzis]
+ * - audio additions [Michael Pruett]
+ * - fix problem with non ansi RCSID [Andreas Ley]
+ * - oggs magic [Felix von Leitner]
+ * - gmon magic [Eugen Dedu]
+ * - TNEF magic [Joomy]
+ * - netpbm magic and misc other image stuff [Bryan Henderson]
+ *
+ * Revision 1.32  2000/08/05 18:24:18  christos
+ * Correct indianness detection in elf (Charles Hannum)
+ * FreeBSD elf core support (Guy Harris)
+ * Use gzip in systems that don't have uncompress (Anthon van der Neut)
+ * Internationalization/EBCDIC support (Eric Fisher)
+ * Many many magic changes
+ *
+ * Revision 1.31  2000/05/14 17:58:36  christos
+ * - new magic for claris files
+ * - new magic for mathematica and maple files
+ * - new magic for msvc files
+ * - new -k flag to keep going matching all possible entries
+ * - add the word executable on #! magic files, and fix the usage of
+ *   the word script
+ * - lots of other magic fixes
+ * - fix typo test -> text
+ *
+ * Revision 1.30  2000/04/11 02:41:17  christos
+ * - add support for mime output (-i)
+ * - make sure we free memory in case realloc fails
+ * - magic fixes
+ *
+ * Revision 1.29  1999/11/28 20:02:29  christos
+ * new string/[Bcb] magic from anthon, and adjustments to the magic files to
+ * use it.
+ *
  * Revision 1.28  1999/10/31 22:11:48  christos
  * - add "char" type for compatibility with HP/UX
  * - recognize HP/UX syntax &=n etc.

+ 104 - 46
print.c

@@ -41,82 +41,103 @@
 #include <time.h>
 
 #ifndef lint
-FILE_RCSID("@(#)$Id: print.c,v 1.29 1999/10/31 22:23:04 christos Exp $")
+FILE_RCSID("@(#)$Id: print.c,v 1.34 2001/08/07 16:01:26 christos Exp $")
 #endif  /* lint */
 
 #define SZOF(a)	(sizeof(a) / sizeof(a[0]))
 
 void
 mdump(m)
-struct magic *m;
+	struct magic *m;
 {
 	static const char *typ[] = { "invalid", "byte", "short", "invalid",
 				     "long", "string", "date", "beshort",
 				     "belong", "bedate", "leshort", "lelong",
-				     "ledate" };
+				     "ledate", "pstring", "ldate", "beldate",
+				     "leldate" };
+	static const char optyp[] = { '@', '&', '|', '^', '+', '-', 
+				      '*', '/', '%' };
 	(void) fputc('[', stderr);
 	(void) fprintf(stderr, ">>>>>>>> %d" + 8 - (m->cont_level & 7),
 		       m->offset);
 
-	if (m->flag & INDIR)
-		(void) fprintf(stderr, "(%s,%d),",
-			       /* Note: in.type is unsigned */
-			       (m->in.type < SZOF(typ)) ? 
-					typ[m->in.type] : "*bad*",
-			       m->in.offset);
-
+	if (m->flag & INDIR) {
+		(void) fprintf(stderr, "(%s,",
+			       /* Note: type is unsigned */
+			       (m->in_type < SZOF(typ)) ? 
+					typ[m->in_type] : "*bad*");
+		if (m->in_op & OPINVERSE)
+			(void) fputc('~', stderr);
+		(void) fprintf(stderr, "%c%d),",
+			       ((m->in_op&0x7F) < SZOF(optyp)) ? 
+					optyp[m->in_op&0x7F] : '?',
+				m->in_offset);
+	}
 	(void) fprintf(stderr, " %s%s", (m->flag & UNSIGNED) ? "u" : "",
 		       /* Note: type is unsigned */
 		       (m->type < SZOF(typ)) ? typ[m->type] : "*bad*");
-	if (m->mask != ~((uint32)0))
-		(void) fprintf(stderr, " & %.8x", m->mask);
+	if (m->mask_op & OPINVERSE)
+		(void) fputc('~', stderr);
+	if (m->mask) {
+		((m->mask_op&0x7F) < SZOF(optyp)) ? 
+			(void) fputc(optyp[m->mask_op&0x7F], stderr) :
+			(void) fputc('?', stderr);
+		if(STRING != m->type || PSTRING != m->type)
+			(void) fprintf(stderr, "%.8x", m->mask);
+		else {
+			if (m->mask & STRING_IGNORE_LOWERCASE) 
+				(void) fputc(CHAR_IGNORE_LOWERCASE, stderr);
+			if (m->mask & STRING_COMPACT_BLANK) 
+				(void) fputc(CHAR_COMPACT_BLANK, stderr);
+			if (m->mask & STRING_COMPACT_OPTIONAL_BLANK) 
+				(void) fputc(CHAR_COMPACT_OPTIONAL_BLANK,
+				stderr);
+		}
+	}
 
 	(void) fprintf(stderr, ",%c", m->reln);
 
 	if (m->reln != 'x') {
-	    switch (m->type) {
-	    case BYTE:
-	    case SHORT:
-	    case LONG:
-	    case LESHORT:
-	    case LELONG:
-	    case BESHORT:
-	    case BELONG:
-		    (void) fprintf(stderr, "%d", m->value.l);
-		    break;
-	    case STRING:
-		    showstr(stderr, m->value.s, -1);
-		    break;
-	    case DATE:
-	    case LEDATE:
-	    case BEDATE:
-		    {
-			    time_t t = m->value.l;
-			    char *rt, *pp = ctime(&t);
-
-			    if ((rt = strchr(pp, '\n')) != NULL)
-				    *rt = '\0';
-			    (void) fprintf(stderr, "%s,", pp);
-			    if (rt)
-				    *rt = '\n';
-		    }
-		    break;
-	    default:
-		    (void) fputs("*bad*", stderr);
-		    break;
-	    }
+		switch (m->type) {
+		case BYTE:
+		case SHORT:
+		case LONG:
+		case LESHORT:
+		case LELONG:
+		case BESHORT:
+		case BELONG:
+			(void) fprintf(stderr, "%d", m->value.l);
+			break;
+		case STRING:
+		case PSTRING:
+			showstr(stderr, m->value.s, -1);
+			break;
+		case DATE:
+		case LEDATE:
+		case BEDATE:
+			(void)fprintf(stderr, "%s,", fmttime(m->value.l, 1));
+			break;
+		case LDATE:
+		case LELDATE:
+		case BELDATE:
+			(void)fprintf(stderr, "%s,", fmttime(m->value.l, 0));
+			break;
+		default:
+			(void) fputs("*bad*", stderr);
+			break;
+		}
 	}
 	(void) fprintf(stderr, ",\"%s\"]\n", m->desc);
 }
 
 /*
- * ckfputs - futs, but with error checking
+ * ckfputs - fputs, but with error checking
  * ckfprintf - fprintf, but with error checking
  */
 void
 ckfputs(str, fil) 	
-    const char *str;
-    FILE *fil;
+	const char *str;
+	FILE *fil;
 {
 	if (fputs(str,fil) == EOF)
 		error("write failed.\n");
@@ -204,3 +225,40 @@ magwarn(va_alist)
 	va_end(va);
 	fputc('\n', stderr);
 }
+
+
+char *
+fmttime(v, local)
+	long v;
+	int local;
+{
+	char *pp, *rt;
+	time_t t = (time_t)v;
+	struct tm *tm;
+
+	if (local) {
+		pp = ctime(&t);
+	} else {
+#ifndef HAVE_DAYLIGHT
+		static int daylight = 0;
+#ifdef HAVE_TM_ISDST
+		static time_t now = (time_t)0;
+
+		if (now == (time_t)0) {
+			struct tm *tm1;
+			(void)time(&now);
+			tm1 = localtime(&now);
+			daylight = tm1->tm_isdst;
+		}
+#endif /* HAVE_TM_ISDST */
+#endif /* HAVE_DAYLIGHT */
+		if (daylight)
+			t += 3600;
+		tm = gmtime(&t);
+		pp = asctime(tm);
+	}
+
+	if ((rt = strchr(pp, '\n')) != NULL)
+		*rt = '\0';
+	return pp;
+}

+ 125 - 69
readelf.c

@@ -14,7 +14,7 @@
 #include "readelf.h"
 
 #ifndef lint
-FILE_RCSID("@(#)$Id: readelf.c,v 1.11 1999/10/31 22:23:04 christos Exp $")
+FILE_RCSID("@(#)$Id: readelf.c,v 1.17 2000/08/05 19:00:12 christos Exp $")
 #endif
 
 #ifdef	ELFCORE
@@ -195,8 +195,10 @@ dophn_exec(class, swap, fd, off, num, size)
 
 #ifdef ELFCORE
 size_t	prpsoffsets32[] = {
+	8,		/* FreeBSD */
+	28,		/* Linux 2.0.36 */
+	32,		/* Linux (I forget which kernel version) */
 	84,		/* SunOS 5.x */
-	32,		/* Linux */
 };
 
 size_t	prpsoffsets64[] = {
@@ -210,14 +212,23 @@ size_t	prpsoffsets64[] = {
 
 /*
  * Look through the program headers of an executable image, searching
- * for a PT_NOTE section of type NT_PRPSINFO, with a name "CORE"; if one
- * is found, try looking in various places in its contents for a 16-character
- * string containing only printable characters - if found, that string
- * should be the name of the program that dropped core.
- * Note: right after that 16-character string is, at least in SunOS 5.x
- * (and possibly other SVR4-flavored systems) and Linux, a longer string
- * (80 characters, in 5.x, probably other SVR4-flavored systems, and Linux)
- * containing the start of the command line for that program.
+ * for a PT_NOTE section of type NT_PRPSINFO, with a name "CORE" or
+ * "FreeBSD"; if one is found, try looking in various places in its
+ * contents for a 16-character string containing only printable
+ * characters - if found, that string should be the name of the program
+ * that dropped core.  Note: right after that 16-character string is,
+ * at least in SunOS 5.x (and possibly other SVR4-flavored systems) and
+ * Linux, a longer string (80 characters, in 5.x, probably other
+ * SVR4-flavored systems, and Linux) containing the start of the
+ * command line for that program.
+ *
+ * The signal number probably appears in a section of type NT_PRSTATUS,
+ * but that's also rather OS-dependent, in ways that are harder to
+ * dissect with heuristics, so I'm not bothering with the signal number.
+ * (I suppose the signal number could be of interest in situations where
+ * you don't have the binary of the program that dropped core; if you
+ * *do* have that binary, the debugger will probably tell you what
+ * signal it was.)
  */
 static void
 dophn_core(class, swap, fd, off, num, size)
@@ -232,12 +243,16 @@ dophn_core(class, swap, fd, off, num, size)
 	Elf32_Nhdr *nh32;
 	Elf64_Phdr ph64;
 	Elf64_Nhdr *nh64;
-	size_t offset, noffset, reloffset;
+	size_t offset, nameoffset, noffset, reloffset;
 	unsigned char c;
 	int i, j;
 	char nbuf[BUFSIZ];
 	int bufsize;
+	int is_freebsd;
 
+	/*
+	 * Loop through all the program headers.
+	 */
 	for ( ; num; num--) {
 		if (lseek(fd, off, SEEK_SET) == -1)
 			error("lseek failed (%s).\n", strerror(errno));
@@ -246,11 +261,16 @@ dophn_core(class, swap, fd, off, num, size)
 		off += size;
 		if (ph_type != PT_NOTE)
 			continue;
+
+		/*
+		 * This is a PT_NOTE section; loop through all the notes
+		 * in the section.
+		 */
 		if (lseek(fd, (off_t) ph_offset, SEEK_SET) == -1)
 			error("lseek failed (%s).\n", strerror(errno));
 		bufsize = read(fd, nbuf, BUFSIZ);
 		if (bufsize == -1)
-			error("read failed (%s).\n", strerror(errno));
+			error(": " "read failed (%s).\n", strerror(errno));
 		offset = 0;
 		for (;;) {
 			if (offset >= bufsize)
@@ -262,19 +282,8 @@ dophn_core(class, swap, fd, off, num, size)
 			offset += nh_size;
 
 			/*
-			 * If this note isn't an NT_PRPSINFO note, it's
-			 * not what we're looking for.
-			 */
-			if (nh_type != NT_PRPSINFO) {
-				offset += nh_namesz;
-				offset = ((offset + 3)/4)*4;
-				offset += nh_descsz;
-				offset = ((offset + 3)/4)*4;
-				continue;
-			}
-
-			/*
-			 * Make sure this note has the name "CORE".
+			 * Check whether this note has the name "CORE" or
+			 * "FreeBSD".
 			 */
 			if (offset + nh_namesz >= bufsize) {
 				/*
@@ -282,58 +291,105 @@ dophn_core(class, swap, fd, off, num, size)
 				 */
 				break;
 			}
-			if (nh_namesz != 5
-			    || strcmp(&nbuf[offset], "CORE") != 0)
-				continue;
+
+			nameoffset = offset;
 			offset += nh_namesz;
 			offset = ((offset + 3)/4)*4;
 
 			/*
-			 * Extract the program name.  We assume it to be
-			 * 16 characters (that's what it is in SunOS 5.x
-			 * and Linux).
-			 *
-			 * Unfortunately, it's at a different offset in
-			 * SunOS 5.x and Linux, so try multiple offsets.
-			 * If the characters aren't all printable, reject
-			 * it.
+			 * Sigh.  The 2.0.36 kernel in Debian 2.1, at
+			 * least, doesn't correctly implement name
+			 * sections, in core dumps, as specified by
+			 * the "Program Linking" section of "UNIX(R) System
+			 * V Release 4 Programmer's Guide: ANSI C and
+			 * Programming Support Tools", because my copy
+			 * clearly says "The first 'namesz' bytes in 'name'
+			 * contain a *null-terminated* [emphasis mine]
+			 * character representation of the entry's owner
+			 * or originator", but the 2.0.36 kernel code
+			 * doesn't include the terminating null in the
+			 * name....
 			 */
-			for (i = 0; i < NOFFSETS; i++) {
-				reloffset = prpsoffsets(i);
-				noffset = offset + reloffset;
-				for (j = 0; j < 16;
-				    j++, noffset++, reloffset++) {
-					/*
-					 * Make sure we're not past the end
-					 * of the buffer; if we are, just
-					 * give up.
-					 */
-					if (noffset >= bufsize)
-						return;
+			if ((nh_namesz == 4 &&
+			      strncmp(&nbuf[nameoffset], "CORE", 4) == 0) ||
+			    (nh_namesz == 5 &&
+			      strcmp(&nbuf[nameoffset], "CORE") == 0))
+				is_freebsd = 0;
+			else if ((nh_namesz == 8 &&
+			      strcmp(&nbuf[nameoffset], "FreeBSD") == 0))
+				is_freebsd = 1;
+			else
+				continue;
+			if (nh_type == NT_PRPSINFO) {
+				/*
+				 * Extract the program name.  We assume
+				 * it to be 16 characters (that's what it
+				 * is in SunOS 5.x and Linux).
+				 *
+				 * Unfortunately, it's at a different offset
+				 * in varous OSes, so try multiple offsets.
+				 * If the characters aren't all printable,
+				 * reject it.
+				 */
+				for (i = 0; i < NOFFSETS; i++) {
+					reloffset = prpsoffsets(i);
+					noffset = offset + reloffset;
+					for (j = 0; j < 16;
+					    j++, noffset++, reloffset++) {
+						/*
+						 * Make sure we're not past
+						 * the end of the buffer; if
+						 * we are, just give up.
+						 */
+						if (noffset >= bufsize)
+							goto tryanother;
+
+						/*
+						 * Make sure we're not past
+						 * the end of the contents;
+						 * if we are, this obviously
+						 * isn't the right offset.
+						 */
+						if (reloffset >= nh_descsz)
+							goto tryanother;
+
+						c = nbuf[noffset];
+						if (c == '\0') {
+							/*
+							 * A '\0' at the
+							 * beginning is
+							 * obviously wrong.
+							 * Any other '\0'
+							 * means we're done.
+							 */
+							if (j == 0)
+								goto tryanother;
+							else
+								break;
+						} else {
+							/*
+							 * A nonprintable
+							 * character is also
+							 * wrong.
+							 */
+#define isquote(c) (strchr("'\"`", (c)) != NULL)
+							if (!isprint(c) ||
+							     isquote(c))
+								goto tryanother;
+						}
+					}
 
 					/*
-					 * Make sure we're not past the
-					 * end of the contents; if we
-					 * are, this obviously isn't
-					 * the right offset.
+					 * Well, that worked.
 					 */
-					if (reloffset >= nh_descsz)
-						goto tryanother;
+					printf(", from '%.16s'",
+					    &nbuf[offset + prpsoffsets(i)]);
+					break;
 
-					c = nbuf[noffset];
-					if (c != '\0' && !isprint(c))
-						goto tryanother;
+				tryanother:
+					;
 				}
-
-				/*
-				 * Well, that worked.
-				 */
-				printf(", from '%.16s'",
-				    &nbuf[offset + prpsoffsets(i)]);
-				return;
-
-			tryanother:
-				;
+				break;
 			}
 			offset += nh_descsz;
 			offset = ((offset + 3)/4)*4;
@@ -377,7 +433,7 @@ tryelf(fd, buf, nbytes)
 
 		u.l = 1;
 		(void) memcpy(&elfhdr, buf, sizeof elfhdr);
-		swap = (u.c[sizeof(long) - 1] + 1) != elfhdr.e_ident[5];
+		swap = (u.c[sizeof(int32) - 1] + 1) != elfhdr.e_ident[5];
 
 		if (getu16(swap, elfhdr.e_type) == ET_CORE) 
 #ifdef ELFCORE
@@ -414,7 +470,7 @@ tryelf(fd, buf, nbytes)
 
 		u.l = 1;
 		(void) memcpy(&elfhdr, buf, sizeof elfhdr);
-		swap = (u.c[sizeof(long) - 1] + 1) != elfhdr.e_ident[5];
+		swap = (u.c[sizeof(int32) - 1] + 1) != elfhdr.e_ident[5];
 
 		if (getu16(swap, elfhdr.e_type) == ET_CORE) 
 #ifdef ELFCORE

+ 642 - 71
softmagic.c

@@ -1,5 +1,5 @@
 /*
- * softmagic - interpret variable magic from /etc/magic
+ * softmagic - interpret variable magic from MAGIC
  *
  * Copyright (c) Ian F. Darwin, 1987.
  * Written by Ian F. Darwin.
@@ -27,6 +27,7 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <ctype.h>
 #include <stdlib.h>
 #include <time.h>
 #include <sys/types.h>
@@ -34,10 +35,10 @@
 #include "file.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$Id: softmagic.c,v 1.39 1999/02/14 17:16:12 christos Exp $")
+FILE_RCSID("@(#)$Id: softmagic.c,v 1.46 2001/07/23 00:02:32 christos Exp $")
 #endif	/* lint */
 
-static int match	__P((unsigned char *, int));
+static int match	__P((struct magic *, uint32, unsigned char *, int));
 static int mget		__P((union VALUETYPE *,
 			     unsigned char *, struct magic *, int));
 static int mcheck	__P((union VALUETYPE *, struct magic *));
@@ -45,6 +46,8 @@ static int32 mprint	__P((union VALUETYPE *, struct magic *));
 static void mdebug	__P((int32, char *, int));
 static int mconvert	__P((union VALUETYPE *, struct magic *));
 
+extern int kflag;
+
 /*
  * softmagic - lookup one file in database 
  * (already read from /etc/magic by apprentice.c).
@@ -53,11 +56,14 @@ static int mconvert	__P((union VALUETYPE *, struct magic *));
 /*ARGSUSED1*/		/* nbytes passed for regularity, maybe need later */
 int
 softmagic(buf, nbytes)
-unsigned char *buf;
-int nbytes;
+	unsigned char *buf;
+	int nbytes;
 {
-	if (match(buf, nbytes))
-		return 1;
+	struct mlist *ml;
+
+	for (ml = mlist.next; ml != &mlist; ml = ml->next)
+		if (match(ml->magic, ml->nmagic, buf, nbytes))
+			return 1;
 
 	return 0;
 }
@@ -90,9 +96,11 @@ int nbytes;
  *	so that higher-level continuations are processed.
  */
 static int
-match(s, nbytes)
-unsigned char	*s;
-int nbytes;
+match(magic, nmagic, s, nbytes)
+	struct magic *magic;
+	uint32 nmagic;
+	unsigned char	*s;
+	int nbytes;
 {
 	int magindex = 0;
 	int cont_level = 0;
@@ -101,6 +109,8 @@ int nbytes;
 	static int32 *tmpoff = NULL;
 	static size_t tmplen = 0;
 	int32 oldoff = 0;
+	int returnval = 0; /* if a match is found it is set to 1*/
+	int firstline = 1; /* a flag to print X\n  X\n- X */
 
 	if (tmpoff == NULL)
 		if ((tmpoff = (int32 *) malloc(tmplen = 20)) == NULL)
@@ -120,6 +130,11 @@ int nbytes;
 			    continue;
 		}
 
+		if (! firstline) { /* we found another match */
+			/* put a newline and '-' to do some simple formatting*/
+			printf("\n- ");
+		}
+
 		tmpoff[cont_level] = mprint(&p, &magic[magindex]);
 		/*
 		 * If we printed something, we'll need to print
@@ -142,9 +157,10 @@ int nbytes;
 					 */
 					cont_level = magic[magindex].cont_level;
 				}
-				if (magic[magindex].flag & ADD) {
+				if (magic[magindex].flag & OFFADD) {
 					oldoff=magic[magindex].offset;
-					magic[magindex].offset += tmpoff[cont_level-1];
+					magic[magindex].offset +=
+					    tmpoff[cont_level-1];
 				}
 				if (mget(&p, s, &magic[magindex], nbytes) &&
 				    mcheck(&p, &magic[magindex])) {
@@ -163,7 +179,8 @@ int nbytes;
 						(void) putchar(' ');
 						need_separator = 0;
 					}
-					tmpoff[cont_level] = mprint(&p, &magic[magindex]);
+					tmpoff[cont_level] =
+					    mprint(&p, &magic[magindex]);
 					if (magic[magindex].desc[0])
 						need_separator = 1;
 
@@ -178,31 +195,32 @@ int nbytes;
 						    tmplen += 20)) == NULL)
 							error("out of memory\n");
 				}
-				if (magic[magindex].flag & ADD) {
+				if (magic[magindex].flag & OFFADD) {
 					 magic[magindex].offset = oldoff;
 				}
 			}
 		}
-		return 1;		/* all through */
+		firstline = 0;
+		returnval = 1;
+		if (!kflag) {
+			return 1; /* don't keep searching */
+		}			
 	}
-	return 0;			/* no match at all */
+	return returnval;  /* This is hit if -k is set or there is no match */
 }
 
 static int32
 mprint(p, m)
-union VALUETYPE *p;
-struct magic *m;
+	union VALUETYPE *p;
+	struct magic *m;
 {
-	char *pp, *rt;
 	uint32 v;
-	time_t curtime;
 	int32 t=0 ;
 
 
   	switch (m->type) {
   	case BYTE:
-		v = p->b;
-		v = signextend(m, v) & m->mask;
+		v = signextend(m, p->b);
 		(void) printf(m->desc, (unsigned char) v);
 		t = m->offset + sizeof(char);
 		break;
@@ -210,8 +228,7 @@ struct magic *m;
   	case SHORT:
   	case BESHORT:
   	case LESHORT:
-		v = p->h;
-		v = signextend(m, v) & m->mask;
+		v = signextend(m, p->h);
 		(void) printf(m->desc, (unsigned short) v);
 		t = m->offset + sizeof(short);
 		break;
@@ -219,13 +236,13 @@ struct magic *m;
   	case LONG:
   	case BELONG:
   	case LELONG:
-		v = p->l;
-		v = signextend(m, v) & m->mask;
+		v = signextend(m, p->l);
 		(void) printf(m->desc, (uint32) v);
 		t = m->offset + sizeof(int32);
   		break;
 
   	case STRING:
+  	case PSTRING:
 		if (m->reln == '=') {
 			(void) printf(m->desc, m->value.s);
 			t = m->offset + strlen(m->value.s);
@@ -244,11 +261,14 @@ struct magic *m;
 	case DATE:
 	case BEDATE:
 	case LEDATE:
-		curtime = p->l;
-		pp = ctime(&curtime);
-		if ((rt = strchr(pp, '\n')) != NULL)
-			*rt = '\0';
-		(void) printf(m->desc, pp);
+		(void) printf(m->desc, fmttime(p->l, 1));
+		t = m->offset + sizeof(time_t);
+		break;
+
+	case LDATE:
+	case BELDATE:
+	case LELDATE:
+		(void) printf(m->desc, fmttime(p->l, 0));
 		t = m->offset + sizeof(time_t);
 		break;
 
@@ -261,43 +281,268 @@ struct magic *m;
 
 /*
  * Convert the byte order of the data we are looking at
+ * While we're here, let's apply the mask operation
+ * (unless you have a better idea)
  */
 static int
 mconvert(p, m)
-union VALUETYPE *p;
-struct magic *m;
+	union VALUETYPE *p;
+	struct magic *m;
 {
 	switch (m->type) {
 	case BYTE:
+		if (m->mask)
+			switch (m->mask_op&0x7F) {
+			case OPAND:
+				p->b &= m->mask;
+				break;
+			case OPOR:
+				p->b |= m->mask;
+				break;
+			case OPXOR:
+				p->b ^= m->mask;
+				break;
+			case OPADD:
+				p->b += m->mask;
+				break;
+			case OPMINUS:
+				p->b -= m->mask;
+				break;
+			case OPMULTIPLY:
+				p->b *= m->mask;
+				break;
+			case OPDIVIDE:
+				p->b /= m->mask;
+				break;
+			case OPMODULO:
+				p->b %= m->mask;
+				break;
+			}
+		if (m->mask_op & OPINVERSE)
+			p->b = ~p->b;
+		return 1;
 	case SHORT:
+		if (m->mask)
+			switch (m->mask_op&0x7F) {
+			case OPAND:
+				p->h &= m->mask;
+				break;
+			case OPOR:
+				p->h |= m->mask;
+				break;
+			case OPXOR:
+				p->h ^= m->mask;
+				break;
+			case OPADD:
+				p->h += m->mask;
+				break;
+			case OPMINUS:
+				p->h -= m->mask;
+				break;
+			case OPMULTIPLY:
+				p->h *= m->mask;
+				break;
+			case OPDIVIDE:
+				p->h /= m->mask;
+				break;
+			case OPMODULO:
+				p->h %= m->mask;
+				break;
+			}
+		if (m->mask_op & OPINVERSE)
+			p->h = ~p->h;
+		return 1;
 	case LONG:
 	case DATE:
+	case LDATE:
+		if (m->mask)
+			switch (m->mask_op&0x7F) {
+			case OPAND:
+				p->l &= m->mask;
+				break;
+			case OPOR:
+				p->l |= m->mask;
+				break;
+			case OPXOR:
+				p->l ^= m->mask;
+				break;
+			case OPADD:
+				p->l += m->mask;
+				break;
+			case OPMINUS:
+				p->l -= m->mask;
+				break;
+			case OPMULTIPLY:
+				p->l *= m->mask;
+				break;
+			case OPDIVIDE:
+				p->l /= m->mask;
+				break;
+			case OPMODULO:
+				p->l %= m->mask;
+				break;
+			}
+		if (m->mask_op & OPINVERSE)
+			p->l = ~p->l;
 		return 1;
 	case STRING:
 		{
-			char *ptr;
+			int n;
 
-			/* Null terminate and eat the return */
+			/* Null terminate and eat *trailing* return */
 			p->s[sizeof(p->s) - 1] = '\0';
-			if ((ptr = strchr(p->s, '\n')) != NULL)
-				*ptr = '\0';
+			n = strlen(p->s) - 1;
+			if (p->s[n] == '\n')
+				p->s[n] = '\0';
+			return 1;
+		}
+	case PSTRING:
+		{
+			char *ptr1 = p->s, *ptr2 = ptr1 + 1;
+			int n = *p->s;
+			if (n >= sizeof(p->s))
+				n = sizeof(p->s) - 1;
+			while (n--)
+				*ptr1++ = *ptr2++;
+			*ptr1 = '\0';
+			n = strlen(p->s) - 1;
+			if (p->s[n] == '\n')
+				p->s[n] = '\0';
 			return 1;
 		}
 	case BESHORT:
 		p->h = (short)((p->hs[0]<<8)|(p->hs[1]));
+		if (m->mask)
+			switch (m->mask_op&0x7F) {
+			case OPAND:
+				p->h &= m->mask;
+				break;
+			case OPOR:
+				p->h |= m->mask;
+				break;
+			case OPXOR:
+				p->h ^= m->mask;
+				break;
+			case OPADD:
+				p->h += m->mask;
+				break;
+			case OPMINUS:
+				p->h -= m->mask;
+				break;
+			case OPMULTIPLY:
+				p->h *= m->mask;
+				break;
+			case OPDIVIDE:
+				p->h /= m->mask;
+				break;
+			case OPMODULO:
+				p->h %= m->mask;
+				break;
+			}
+		if (m->mask_op & OPINVERSE)
+			p->h = ~p->h;
 		return 1;
 	case BELONG:
 	case BEDATE:
+	case BELDATE:
 		p->l = (int32)
 		    ((p->hl[0]<<24)|(p->hl[1]<<16)|(p->hl[2]<<8)|(p->hl[3]));
+		if (m->mask)
+			switch (m->mask_op&0x7F) {
+			case OPAND:
+				p->l &= m->mask;
+				break;
+			case OPOR:
+				p->l |= m->mask;
+				break;
+			case OPXOR:
+				p->l ^= m->mask;
+				break;
+			case OPADD:
+				p->l += m->mask;
+				break;
+			case OPMINUS:
+				p->l -= m->mask;
+				break;
+			case OPMULTIPLY:
+				p->l *= m->mask;
+				break;
+			case OPDIVIDE:
+				p->l /= m->mask;
+				break;
+			case OPMODULO:
+				p->l %= m->mask;
+				break;
+			}
+		if (m->mask_op & OPINVERSE)
+			p->l = ~p->l;
 		return 1;
 	case LESHORT:
 		p->h = (short)((p->hs[1]<<8)|(p->hs[0]));
+		if (m->mask)
+			switch (m->mask_op&0x7F) {
+			case OPAND:
+				p->h &= m->mask;
+				break;
+			case OPOR:
+				p->h |= m->mask;
+				break;
+			case OPXOR:
+				p->h ^= m->mask;
+				break;
+			case OPADD:
+				p->h += m->mask;
+				break;
+			case OPMINUS:
+				p->h -= m->mask;
+				break;
+			case OPMULTIPLY:
+				p->h *= m->mask;
+				break;
+			case OPDIVIDE:
+				p->h /= m->mask;
+				break;
+			case OPMODULO:
+				p->h %= m->mask;
+				break;
+			}
+		if (m->mask_op & OPINVERSE)
+			p->h = ~p->h;
 		return 1;
 	case LELONG:
 	case LEDATE:
+	case LELDATE:
 		p->l = (int32)
 		    ((p->hl[3]<<24)|(p->hl[2]<<16)|(p->hl[1]<<8)|(p->hl[0]));
+		if (m->mask)
+			switch (m->mask_op&0x7F) {
+			case OPAND:
+				p->l &= m->mask;
+				break;
+			case OPOR:
+				p->l |= m->mask;
+				break;
+			case OPXOR:
+				p->l ^= m->mask;
+				break;
+			case OPADD:
+				p->l += m->mask;
+				break;
+			case OPMINUS:
+				p->l -= m->mask;
+				break;
+			case OPMULTIPLY:
+				p->l *= m->mask;
+				break;
+			case OPDIVIDE:
+				p->l /= m->mask;
+				break;
+			case OPMODULO:
+				p->l %= m->mask;
+				break;
+			}
+		if (m->mask_op & OPINVERSE)
+			p->l = ~p->l;
 		return 1;
 	default:
 		error("invalid type %d in mconvert().\n", m->type);
@@ -308,9 +553,9 @@ struct magic *m;
 
 static void
 mdebug(offset, str, len)
-int32 offset;
-char *str;
-int len;
+	int32 offset;
+	char *str;
+	int len;
 {
 	(void) fprintf(stderr, "mget @%d: ", offset);
 	showstr(stderr, (char *) str, len);
@@ -320,10 +565,10 @@ int len;
 
 static int
 mget(p, s, m, nbytes)
-union VALUETYPE* p;
-unsigned char	*s;
-struct magic *m;
-int nbytes;
+	union VALUETYPE* p;
+	unsigned char	*s;
+	struct magic *m;
+	int nbytes;
 {
 	int32 offset = m->offset;
 
@@ -348,33 +593,326 @@ int nbytes;
 
 	if (m->flag & INDIR) {
 
-		switch (m->in.type) {
+		switch (m->in_type) {
 		case BYTE:
-			offset = p->b + m->in.offset;
+			if (m->in_offset)
+				switch (m->in_op&0x7F) {
+				case OPAND:
+					offset = p->b & m->in_offset;
+					break;
+				case OPOR:
+					offset = p->b | m->in_offset;
+					break;
+				case OPXOR:
+					offset = p->b ^ m->in_offset;
+					break;
+				case OPADD:
+					offset = p->b + m->in_offset;
+					break;
+				case OPMINUS:
+					offset = p->b - m->in_offset;
+					break;
+				case OPMULTIPLY:
+					offset = p->b * m->in_offset;
+					break;
+				case OPDIVIDE:
+					offset = p->b / m->in_offset;
+					break;
+				case OPMODULO:
+					offset = p->b % m->in_offset;
+					break;
+				}
+			if (m->in_op & OPINVERSE)
+				offset = ~offset;
 			break;
 		case BESHORT:
-		        offset = (short)((p->hs[0]<<8)|(p->hs[1]))+
-			          m->in.offset;
+			if (m->in_offset)
+				switch (m->in_op&0x7F) {
+				case OPAND:
+					offset = (short)((p->hs[0]<<8)|
+							 (p->hs[1])) &
+						 m->in_offset;
+					break;
+				case OPOR:
+					offset = (short)((p->hs[0]<<8)|
+							 (p->hs[1])) |
+						 m->in_offset;
+					break;
+				case OPXOR:
+					offset = (short)((p->hs[0]<<8)|
+							 (p->hs[1])) ^
+						 m->in_offset;
+					break;
+				case OPADD:
+					offset = (short)((p->hs[0]<<8)|
+							 (p->hs[1])) +
+						 m->in_offset;
+					break;
+				case OPMINUS:
+					offset = (short)((p->hs[0]<<8)|
+							 (p->hs[1])) -
+						 m->in_offset;
+					break;
+				case OPMULTIPLY:
+					offset = (short)((p->hs[0]<<8)|
+							 (p->hs[1])) *
+						 m->in_offset;
+					break;
+				case OPDIVIDE:
+					offset = (short)((p->hs[0]<<8)|
+							 (p->hs[1])) /
+						 m->in_offset;
+					break;
+				case OPMODULO:
+					offset = (short)((p->hs[0]<<8)|
+							 (p->hs[1])) %
+						 m->in_offset;
+					break;
+				}
+			if (m->in_op & OPINVERSE)
+				offset = ~offset;
 			break;
 		case LESHORT:
-		        offset = (short)((p->hs[1]<<8)|(p->hs[0]))+
-			         m->in.offset;
+			if (m->in_offset)
+				switch (m->in_op&0x7F) {
+				case OPAND:
+					offset = (short)((p->hs[1]<<8)|
+							 (p->hs[0])) &
+						 m->in_offset;
+					break;
+				case OPOR:
+					offset = (short)((p->hs[1]<<8)|
+							 (p->hs[0])) |
+						 m->in_offset;
+					break;
+				case OPXOR:
+					offset = (short)((p->hs[1]<<8)|
+							 (p->hs[0])) ^
+						 m->in_offset;
+					break;
+				case OPADD:
+					offset = (short)((p->hs[1]<<8)|
+							 (p->hs[0])) +
+						 m->in_offset;
+					break;
+				case OPMINUS:
+					offset = (short)((p->hs[1]<<8)|
+							 (p->hs[0])) -
+						 m->in_offset;
+					break;
+				case OPMULTIPLY:
+					offset = (short)((p->hs[1]<<8)|
+							 (p->hs[0])) *
+						 m->in_offset;
+					break;
+				case OPDIVIDE:
+					offset = (short)((p->hs[1]<<8)|
+							 (p->hs[0])) /
+						 m->in_offset;
+					break;
+				case OPMODULO:
+					offset = (short)((p->hs[1]<<8)|
+							 (p->hs[0])) %
+						 m->in_offset;
+					break;
+				}
+			if (m->in_op & OPINVERSE)
+				offset = ~offset;
 			break;
 		case SHORT:
-			offset = p->h + m->in.offset;
+			if (m->in_offset)
+				switch (m->in_op&0x7F) {
+				case OPAND:
+					offset = p->h & m->in_offset;
+					break;
+				case OPOR:
+					offset = p->h | m->in_offset;
+					break;
+				case OPXOR:
+					offset = p->h ^ m->in_offset;
+					break;
+				case OPADD:
+					offset = p->h + m->in_offset;
+					break;
+				case OPMINUS:
+					offset = p->h - m->in_offset;
+					break;
+				case OPMULTIPLY:
+					offset = p->h * m->in_offset;
+					break;
+				case OPDIVIDE:
+					offset = p->h / m->in_offset;
+					break;
+				case OPMODULO:
+					offset = p->h % m->in_offset;
+					break;
+				}
+			if (m->in_op & OPINVERSE)
+				offset = ~offset;
 			break;
 		case BELONG:
-		        offset = (int32)((p->hl[0]<<24)|(p->hl[1]<<16)|
-					 (p->hl[2]<<8)|(p->hl[3]))+
-			         m->in.offset;
+			if (m->in_offset)
+				switch (m->in_op&0x7F) {
+				case OPAND:
+					offset = (int32)((p->hl[0]<<24)|
+							 (p->hl[1]<<16)|
+							 (p->hl[2]<<8)|
+							 (p->hl[3])) &
+						 m->in_offset;
+					break;
+				case OPOR:
+					offset = (int32)((p->hl[0]<<24)|
+							 (p->hl[1]<<16)|
+							 (p->hl[2]<<8)|
+							 (p->hl[3])) |
+						 m->in_offset;
+					break;
+				case OPXOR:
+					offset = (int32)((p->hl[0]<<24)|
+							 (p->hl[1]<<16)|
+							 (p->hl[2]<<8)|
+							 (p->hl[3])) ^
+						 m->in_offset;
+					break;
+				case OPADD:
+					offset = (int32)((p->hl[0]<<24)|
+							 (p->hl[1]<<16)|
+							 (p->hl[2]<<8)|
+							 (p->hl[3])) +
+						 m->in_offset;
+					break;
+				case OPMINUS:
+					offset = (int32)((p->hl[0]<<24)|
+							 (p->hl[1]<<16)|
+							 (p->hl[2]<<8)|
+							 (p->hl[3])) -
+						 m->in_offset;
+					break;
+				case OPMULTIPLY:
+					offset = (int32)((p->hl[0]<<24)|
+							 (p->hl[1]<<16)|
+							 (p->hl[2]<<8)|
+							 (p->hl[3])) *
+						 m->in_offset;
+					break;
+				case OPDIVIDE:
+					offset = (int32)((p->hl[0]<<24)|
+							 (p->hl[1]<<16)|
+							 (p->hl[2]<<8)|
+							 (p->hl[3])) /
+						 m->in_offset;
+					break;
+				case OPMODULO:
+					offset = (int32)((p->hl[0]<<24)|
+							 (p->hl[1]<<16)|
+							 (p->hl[2]<<8)|
+							 (p->hl[3])) %
+						 m->in_offset;
+					break;
+				}
+			if (m->in_op & OPINVERSE)
+				offset = ~offset;
 			break;
 		case LELONG:
-		        offset = (int32)((p->hl[3]<<24)|(p->hl[2]<<16)|
-					 (p->hl[1]<<8)|(p->hl[0]))+
-			         m->in.offset;
+			if (m->in_offset)
+				switch (m->in_op&0x7F) {
+				case OPAND:
+					offset = (int32)((p->hl[3]<<24)|
+							 (p->hl[2]<<16)|
+							 (p->hl[1]<<8)|
+							 (p->hl[0])) &
+						 m->in_offset;
+					break;
+				case OPOR:
+					offset = (int32)((p->hl[3]<<24)|
+							 (p->hl[2]<<16)|
+							 (p->hl[1]<<8)|
+							 (p->hl[0])) |
+						 m->in_offset;
+					break;
+				case OPXOR:
+					offset = (int32)((p->hl[3]<<24)|
+							 (p->hl[2]<<16)|
+							 (p->hl[1]<<8)|
+							 (p->hl[0])) ^
+						 m->in_offset;
+					break;
+				case OPADD:
+					offset = (int32)((p->hl[3]<<24)|
+							 (p->hl[2]<<16)|
+							 (p->hl[1]<<8)|
+							 (p->hl[0])) +
+						 m->in_offset;
+					break;
+				case OPMINUS:
+					offset = (int32)((p->hl[3]<<24)|
+							 (p->hl[2]<<16)|
+							 (p->hl[1]<<8)|
+							 (p->hl[0])) -
+						 m->in_offset;
+					break;
+				case OPMULTIPLY:
+					offset = (int32)((p->hl[3]<<24)|
+							 (p->hl[2]<<16)|
+							 (p->hl[1]<<8)|
+							 (p->hl[0])) *
+						 m->in_offset;
+					break;
+				case OPDIVIDE:
+					offset = (int32)((p->hl[3]<<24)|
+							 (p->hl[2]<<16)|
+							 (p->hl[1]<<8)|
+							 (p->hl[0])) /
+						 m->in_offset;
+					break;
+				case OPMODULO:
+					offset = (int32)((p->hl[3]<<24)|
+							 (p->hl[2]<<16)|
+							 (p->hl[1]<<8)|
+							 (p->hl[0])) %
+						 m->in_offset;
+					break;
+				}
+			if (m->in_op & OPINVERSE)
+				offset = ~offset;
 			break;
 		case LONG:
-			offset = p->l + m->in.offset;
+			if (m->in_offset)
+				switch (m->in_op&0x7F) {
+				case OPAND:
+					offset = p->l & m->in_offset;
+					break;
+				case OPOR:
+					offset = p->l | m->in_offset;
+					break;
+				case OPXOR:
+					offset = p->l ^ m->in_offset;
+					break;
+				case OPADD:
+					offset = p->l + m->in_offset;
+					break;
+				case OPMINUS:
+					offset = p->l - m->in_offset;
+					break;
+				case OPMULTIPLY:
+					offset = p->l * m->in_offset;
+					break;
+				case OPDIVIDE:
+					offset = p->l / m->in_offset;
+					break;
+				case OPMODULO:
+					offset = p->l % m->in_offset;
+					break;
+			/*	case TOOMANYSWITCHBLOCKS:
+			 *		ugh = p->eye % m->strain;
+			 *		rub;
+			 *	case BEER:
+			 *		off = p->tab & m->in_gest;
+			 *		sleep;
+			 */
+				}
+			if (m->in_op & OPINVERSE)
+				offset = ~offset;
 			break;
 		}
 
@@ -395,11 +933,11 @@ int nbytes;
 
 static int
 mcheck(p, m)
-union VALUETYPE* p;
-struct magic *m;
+	union VALUETYPE* p;
+	struct magic *m;
 {
-	register uint32 l = m->value.l;
-	register uint32 v;
+	uint32 l = m->value.l;
+	uint32 v;
 	int matched;
 
 	if ( (m->value.s[0] == 'x') && (m->value.s[1] == '\0') ) {
@@ -425,33 +963,66 @@ struct magic *m;
 	case DATE:
 	case BEDATE:
 	case LEDATE:
+	case LDATE:
+	case BELDATE:
+	case LELDATE:
 		v = p->l;
 		break;
 
 	case STRING:
-		l = 0;
-		/* What we want here is:
+	case PSTRING:
+		{
+		/*
+		 * What we want here is:
 		 * v = strncmp(m->value.s, p->s, m->vallen);
 		 * but ignoring any nulls.  bcmp doesn't give -/+/0
 		 * and isn't universally available anyway.
 		 */
+		unsigned char *a = (unsigned char*)m->value.s;
+		unsigned char *b = (unsigned char*)p->s;
+		int len = m->vallen;
+		l = 0;
 		v = 0;
-		{
-			register unsigned char *a = (unsigned char*)m->value.s;
-			register unsigned char *b = (unsigned char*)p->s;
-			register int len = m->vallen;
-
+		if (0L == m->mask) { /* normal string: do it fast */
 			while (--len >= 0)
 				if ((v = *b++ - *a++) != '\0')
-					break;
+					break; 
+		} else { /* combine the others */
+			while (--len >= 0) {
+				if ((m->mask & STRING_IGNORE_LOWERCASE) &&
+				    islower(*a)) {
+					if ((v = tolower(*b++) - *a++) != '\0')
+						break;
+				} else if ((m->mask & STRING_COMPACT_BLANK) && 
+				    isspace(*a)) { 
+					a++;
+					if (isspace(*b++)) {
+						while (isspace(*b))
+							b++;
+					} else {
+						v = 1;
+						break;
+					}
+				} else if (isspace(*a) &&
+				    (m->mask & STRING_COMPACT_OPTIONAL_BLANK)) {
+					a++;
+					while (isspace(*b))
+						b++;
+				} else {
+					if ((v = *b++ - *a++) != '\0')
+						break;
+				}
+			}
 		}
 		break;
+	}
 	default:
 		error("invalid type %d in mcheck().\n", m->type);
 		return 0;/*NOTREACHED*/
 	}
 
-	v = signextend(m, v) & m->mask;
+	if(m->type != STRING && m->type != PSTRING)
+		v = signextend(m, v);
 
 	switch (m->reln) {
 	case 'x':