#------------------------------------------------------------------------------ # $File: archive,v 1.151 2021/08/16 10:19:56 christos Exp $ # archive: file(1) magic for archive formats (see also "msdos" for self- # extracting compressed archives) # # cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc. # pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c. # POSIX tar archives # URL: https://en.wikipedia.org/wiki/Tar_(computing) # Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current # header mainly padded with nul bytes 500 quad 0 !:strength /2 # filename or extended attribute printable strings in range space null til umlaut ue >0 ubeshort >0x1F00 >>0 ubeshort <0xFCFD # last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad # at https://sourceforge.net/projects/s-tar/files/testscripts/ >>>508 ubelong&0x8B9E8DFF 0 # nul, space or ascii digit 0-7 at start of mode >>>>100 ubyte&0xC8 =0 >>>>>101 ubyte&0xC8 =0 # nul, space at end of check sum >>>>>>155 ubyte&0xDF =0 # space or ascii digit 0 at start of check sum >>>>>>>148 ubyte&0xEF =0x20 >>>>>>>>0 use tar-file # minimal check and then display tar archive information which can also be # embedded inside others like Android Backup, Clam AntiVirus database 0 name tar-file >257 string !ustar # header padded with nuls >>257 ulong =0 # GNU tar version 1.29 with non pax format option without refusing # creates misleading V7 header for Long path, Multi-volume, Volume type >>>156 ubyte 0x4c GNU tar archive !:mime application/x-gtar !:ext tar/gtar >>>156 ubyte 0x4d GNU tar archive !:mime application/x-gtar !:ext tar/gtar >>>156 ubyte 0x56 GNU tar archive !:mime application/x-gtar !:ext tar/gtar >>>156 default x tar archive (V7) !:mime application/x-tar !:ext tar # other stuff in padding # some implementations add new fields to the blank area at the end of the header record # created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option >>257 ulong !0 tar archive (old) !:mime application/x-tar !:ext tar # magic in newer, GNU, posix variants >257 string =ustar # 2 last char of magic and UStar version because string expression does not work # 2 space characters followed by a null for GNU variant >>261 ubelong =0x72202000 POSIX tar archive (GNU) !:mime application/x-gtar !:ext tar/gtar # UStar version with ASCII "00" >>261 ubelong 0x72003030 POSIX # gLOBAL and ExTENSION type only found in POSIX.1-2001 format >>>156 ubyte 0x67 \b.1-2001 >>>156 ubyte 0x78 \b.1-2001 >>>156 ubyte x tar archive !:mime application/x-ustar !:ext tar/ustar # version with 2 binary nuls embedded in Android Backup like com.android.settings.ab >>261 ubelong 0x72000000 tar archive (ustar) !:mime application/x-ustar !:ext tar/ustar # not seen ustar variant with garbish version >>261 default x tar archive (unknown ustar) !:mime application/x-ustar !:ext tar/ustar # type flag of 1st tar archive member #>156 ubyte x \b, %c-type >156 ubyte x >>156 ubyte 0 \b, file >>156 ubyte 0x30 \b, file >>156 ubyte 0x31 \b, hard link >>156 ubyte 0x32 \b, symlink >>156 ubyte 0x33 \b, char device >>156 ubyte 0x34 \b, block device >>156 ubyte 0x35 \b, directory >>156 ubyte 0x36 \b, fifo >>156 ubyte 0x37 \b, reserved >>156 ubyte 0x4c \b, long path >>156 ubyte 0x4d \b, multi volume >>156 ubyte 0x56 \b, volume >>156 ubyte 0x67 \b, global >>156 ubyte 0x78 \b, extension >>156 default x \b, type >>>156 ubyte x '%c' # name[100] >0 string >\0 %-.60s # mode mainly stored as an octal number in ASCII null or space terminated >100 string >\0 \b, mode %-.7s # user id mainly as octal numbers in ASCII null or space terminated >108 string >\0 \b, uid %-.7s # group id mainly as octal numbers in ASCII null or space terminated >116 string >\0 \b, gid %-.7s # size mainly as octal number in ASCII >124 ubyte <0x38 >>124 string >\0 \b, size %-.12s # coding indicated by setting the high-order bit of the leftmost byte >124 ubyte >0xEF \b, size 0x >>124 ubyte !0xff \b%2.2x >>125 ubyte !0xff \b%2.2x >>126 ubyte !0xff \b%2.2x >>127 ubyte !0xff \b%2.2x >>128 ubyte !0xff \b%2.2x >>129 ubyte !0xff \b%2.2x >>130 ubyte !0xff \b%2.2x >>131 ubyte !0xff \b%2.2x >>132 ubyte !0xff \b%2.2x >>133 ubyte !0xff \b%2.2x >>134 ubyte !0xff \b%2.2x >>135 ubyte !0xff \b%2.2x # seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated >136 string >\0 \b, seconds %-.11s # header checksum stored as an octal number in ASCII null or space terminated #>148 string x \b, cksum %.7s # linkname[100] >157 string >\0 \b, linkname %-.40s # additional fields for ustar >257 string =ustar # owner user name null terminated >>265 string >\0 \b, user %-.32s # group name null terminated >>297 string >\0 \b, group %-.32s # device major minor if not zero >>329 ubequad&0xCFCFCFCFcFcFcFdf !0 >>>329 string x \b, devmaj %-.7s >>337 ubequad&0xCFCFCFCFcFcFcFdf !0 >>>337 string x \b, devmin %-.7s # prefix[155] >>345 string >\0 \b, prefix %-.155s # old non ustar/POSIX tar >257 string !ustar >>508 string =tar\0 # padding[255] in old star >>>257 string >\0 \b, padding: %-.40s >>508 default x # padding[255] in old tar sometimes comment field >>>257 string >\0 \b, comment: %-.40s # Incremental snapshot gnu-tar format from: # https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html 0 string GNU\ tar- GNU tar incremental snapshot data >&0 regex [0-9]\.[0-9]+-[0-9]+ version %s # cpio archives # # Yes, the top two "cpio archive" formats *are* supposed to just be "short". # The idea is to indicate archives produced on machines with the same # byte order as the machine running "file" with "cpio archive", and # to indicate archives produced on machines with the opposite byte order # from the machine running "file" with "byte-swapped cpio archive". # # The SVR4 "cpio(4)" hints that there are additional formats, but they # are defined as "short"s; I think all the new formats are # character-header formats and thus are strings, not numbers. 0 short 070707 cpio archive !:mime application/x-cpio 0 short 0143561 byte-swapped cpio archive !:mime application/x-cpio # encoding: swapped 0 string 070707 ASCII cpio archive (pre-SVR4 or odc) !:mime application/x-cpio 0 string 070701 ASCII cpio archive (SVR4 with no CRC) !:mime application/x-cpio 0 string 070702 ASCII cpio archive (SVR4 with CRC) !:mime application/x-cpio # # Various archive formats used by various versions of the "ar" # command. # # # Original UNIX archive formats. # They were written with binary values in host byte order, and # the magic number was a host "int", which might have been 16 bits # or 32 bits. We don't say "PDP-11" or "VAX", as there might have # been ports to little-endian 16-bit-int or 32-bit-int platforms # (x86?) using some of those formats; if none existed, feel free # to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian # 32-bit. There might have been big-endian ports of that sort as # well. # 0 leshort 0177555 very old 16-bit-int little-endian archive 0 beshort 0177555 very old 16-bit-int big-endian archive 0 lelong 0177555 very old 32-bit-int little-endian archive 0 belong 0177555 very old 32-bit-int big-endian archive 0 leshort 0177545 old 16-bit-int little-endian archive >2 string __.SYMDEF random library 0 beshort 0177545 old 16-bit-int big-endian archive >2 string __.SYMDEF random library 0 lelong 0177545 old 32-bit-int little-endian archive >4 string __.SYMDEF random library 0 belong 0177545 old 32-bit-int big-endian archive >4 string __.SYMDEF random library # # From "pdp" (but why a 4-byte quantity?) # 0 lelong 0x39bed PDP-11 old archive 0 lelong 0x39bee PDP-11 4.0 archive # # XXX - what flavor of APL used this, and was it a variant of # some ar archive format? It's similar to, but not the same # as, the APL workspace magic numbers in pdp. # 0 long 0100554 apl workspace # # System V Release 1 portable(?) archive format. # 0 string = System V Release 1 ar archive !:mime application/x-archive # # Debian package; it's in the portable archive format, and needs to go # before the entry for regular portable archives, as it's recognized as # a portable archive whose first member has a name beginning with # "debian". # # Update: Joerg Jenderek # URL: https://en.wikipedia.org/wiki/Deb_(file_format) 0 string =!\ndebian # https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html >14 string -split part of multipart Debian package !:mime application/vnd.debian.binary-package # udeb is used for stripped down deb file !:ext deb/udeb >14 string -binary Debian binary package !:mime application/vnd.debian.binary-package # For ipk packager see also https://en.wikipedia.org/wiki/Opkg !:ext deb/udeb/ipk # This should not happen >14 default x Unknown Debian package # NL terminated version; for most Debian cases this is 2.0 or 2.1 for split >68 string >\0 (format %s) #>68 string !2.0\n #>>68 string x (format %.3s) >68 string =2.0\n # 2nd archive name=control archive name like control.tar.gz or control.tar.xz >>72 string >\0 \b, with %.14s # look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma} >>0 search/0x93e4f data.tar. \b, data compression # the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised # for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb >>>&0 string x %.2s # skip space (0x20 BSD) and slash (0x2f System V) character marking end of name >>>&2 ubyte !0x20 >>>>&-1 ubyte !0x2f # display 3rd character of file name extension like 2 of bz2 or m of lzma >>>>>&-1 ubyte x \b%c >>>>>>&0 ubyte !0x20 >>>>>>>&-1 ubyte !0x2f # display 4th character of file name extension like a of lzma >>>>>>>>&-1 ubyte x \b%c # split debian package case >68 string =2.1\n # dpkg-1.18.25/dpkg-split/info.c # NL terminated ASCII package name like ckermit >>&0 string x \b, %s # NL terminated package version like 302-5.3 >>>&1 string x %s # NL terminated MD5 checksum >>>>&1 string x \b, MD5 %s # NL terminated original package length >>>>>&1 string x \b, unsplitted size %s # NL terminated part length >>>>>>&1 string x \b, part length %s # NL terminated package part like n/m >>>>>>>&1 string x \b, part %s # NL terminated package architecture like armhf since dpkg 1.16.1 or later >>>>>>>>&1 string x \b, %s # # MIPS archive; they're in the portable archive format, and need to go # before the entry for regular portable archives, as it's recognized as # a portable archive whose first member has a name beginning with # "__________E". # 0 string =!\n__________E MIPS archive !:mime application/x-archive >20 string U with MIPS Ucode members >21 string L with MIPSEL members >21 string B with MIPSEB members >19 string L and an EL hash table >19 string B and an EB hash table >22 string X -- out of date # # BSD/SVR2-and-later portable archive formats. # # Update: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/AR # Reference: https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/ # Note: Mach-O universal binary in ./cafebabe is dependent # TODO: unify current ar archive, MIPS archive, Debian package # distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR; # *.ar packages from *.a libraries. handle empty archive 0 string =!\n current ar archive # print first and possibly second ar_name[16] for debugging purpose #>8 string x \b, 1st "%.16s" #>68 string x \b, 2nd "%.16s" !:mime application/x-archive # a in most case for libraries; lib for Microsoft libraries; ar else cases !:ext a/lib/ar >8 string __.SYMDEF random library # first member with long marked name __.SYMDEF SORTED implies BSD library >68 string __.SYMDEF\ SORTED random library # Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf # "archive file" entry moved from ./hp # LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture # LST header a_magic 0619h~relocatable library >68 belong 0x020b0619 - PA-RISC1.0 relocatable library >68 belong 0x02100619 - PA-RISC1.1 relocatable library >68 belong 0x02110619 - PA-RISC1.2 relocatable library >68 belong 0x02140619 - PA-RISC2.0 relocatable library #EOF for common ar archives # # "Thin" archive, as can be produced by GNU ar. # 0 string =!\n thin archive with >68 belong 0 no symbol entries >68 belong 1 %d symbol entry >68 belong >1 %d symbol entries 0 search/1 -h- Software Tools format archive text # ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com) # # The first byte is the magic (0x1a), byte 2 is the compression type for # the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS # filename of the first file (null terminated). Since some types collide # we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%), # 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo. 0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW !:mime application/x-arc 0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed !:mime application/x-arc 0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed !:mime application/x-arc 0 lelong&0x8080ffff 0x0000031a ARC archive data, packed !:mime application/x-arc 0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed !:mime application/x-arc 0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched !:mime application/x-arc # [JW] stuff taken from idarc, obviously ARC successors: 0 lelong&0x8080ffff 0x00000a1a PAK archive data !:mime application/x-arc 0 lelong&0x8080ffff 0x0000141a ARC+ archive data !:mime application/x-arc 0 lelong&0x8080ffff 0x0000481a HYP archive data !:mime application/x-arc # Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk) # I can't create either SPARK or ArcFS archives so I have not tested this stuff # [GRR: the original entries collide with ARC, above; replaced with combined # version (not tested)] #0 byte 0x1a RISC OS archive (spark format) 0 string \032archive RISC OS archive (ArcFS format) 0 string Archive\000 RISC OS archive (ArcFS format) # All these were taken from idarc, many could not be verified. Unfortunately, # there were many low-quality sigs, i.e. easy to trigger false positives. # Please notify me of any real-world fishy/ambiguous signatures and I'll try # to get my hands on the actual archiver and see if I find something better. [JW] # probably many can be enhanced by finding some 0-byte or control char near the start # idarc calls this Crush/Uncompressed... *shrug* 0 string CRUSH Crush archive data # Squeeze It (.sqz) 0 string HLSQZ Squeeze It archive data # SQWEZ 0 string SQWEZ SQWEZ archive data # HPack (.hpk) 0 string HPAK HPack archive data # HAP 0 string \x91\x33HF HAP archive data # MD/MDCD 0 string MDmd MDCD archive data # LIM 0 string LIM\x1a LIM archive data # SAR 3 string LH5 SAR archive data # BSArc/BS2 0 string \212\3SB\020\0 BSArc/BS2 archive data # Bethesda Softworks Archive (Oblivion) 0 string BSA\0 BSArc archive data >4 lelong x version %d # MAR 2 string =-ah MAR archive data # ACB #0 belong&0x00f800ff 0x00800000 ACB archive data # CPZ # TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data # JRC 0 string JRchive JRC archive data # Quantum 0 string DS\0 Quantum archive data # ReSOF 0 string PK\3\6 ReSOF archive data # QuArk 0 string 7\4 QuArk archive data # YAC 14 string YC YAC archive data # X1 0 string X1 X1 archive data 0 string XhDr X1 archive data # CDC Codec (.dqt) 0 belong&0xffffe000 0x76ff2000 CDC Codec archive data # AMGC 0 string \xad6" AMGC archive data # NuLIB 0 string N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data # PakLeo 0 string LEOLZW PAKLeo archive data # ChArc 0 string SChF ChArc archive data # PSA 0 string PSA PSA archive data # CrossePAC 0 string DSIGDCC CrossePAC archive data # Freeze 0 string \x1f\x9f\x4a\x10\x0a Freeze archive data # KBoom 0 string \xc2\xa8MP\xc2\xa8 KBoom archive data # NSQ, must go after CDC Codec 0 string \x76\xff NSQ archive data # DPA 0 string Dirk\ Paehl DPA archive data # BA # TODO: idarc says "bytes 0-2 == bytes 3-5" # TTComp # URL: http://fileformats.archiveteam.org/wiki/TTComp_archive # Update: Joerg Jenderek # GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others 0 string \0\6 # look for first keyword of Panorama database *.pan >12 search/261 DESIGN # skip keyword with low entropy >12 default x # skip DOS 2.0 backup id file, sequence 6 with many nils like BACKUPID_xx6.@@@ handled by ./msdos >>8 quad !0 >>>0 use ttcomp # variant ASCII, 4K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? 0 string \1\6 # TODO: # skip VAX-order 68k Blit mpx/mux executable (strength=50) handled by ./blit !:strength -2 >0 use ttcomp 0 string \0\5 # skip some DOS 2.0 backup id file, sequence 5 with many nils like BACKUPID_075.@@@ handled by ./msdos >8 quad !0 >>0 use ttcomp 0 string \1\5 # TODO: # variant ASCII, 2K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? # skip ctab data (strength=50) handled by ./ibm6000 # skip locale data table (strength=50) handled by ./digital !:strength -2 >0 use ttcomp 0 string \0\4 # skip many Maple help database *.hdb with version tag handled by ./maple >1028 string !version # skip veclib maple.hdb by looking for Mable keyword >>4 search/1091 Maple\040 #>4 search/34090 Maple\040 >>4 default x # skip DOS 2.0-3.2 backed up sequence 4 with many nils like LOTUS5.RAR handled by ./msdos # skip xBASE Compound Index file *.CDX with many nils >>>0x54 quad !0 >>>>0 use ttcomp 0 string \1\4 # TODO: # skip Commodore PET BASIC 4.0 program *.prg # variant ASCII, 1K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? # skip shared library (strength=50) handled by ./ibm6000 !:strength -2 >0 use ttcomp # display information of TTComp archive 0 name ttcomp # (version 5.25) labeled the entry as "TTComp archive data" >0 ubyte x TTComp archive data !:mime application/x-compress-ttcomp # PBACKSCR.PI1 !:ext $xe/$ts/pi1/__d # compression type: 0~binary compression 1~ASCII compression >0 ubyte 0 \b, binary >0 ubyte 1 \b, ASCII # size of the dictionary: 4~1024 bytes 5~2048 bytes 6~4096 bytes >1 ubyte 4 \b, 1K >1 ubyte 5 \b, 2K >1 ubyte 6 \b, 4K >1 ubyte x dictionary # https://mark0.net/forum/index.php?topic=848 # last 3 bytes probably have only 8 possible bit sequences # xxxxxxxx 0000000x 11111111 ____FFh # xxxxxxxx 10000000 01111111 __807Fh # 0xxxxxxx 11000000 00111111 __C03Fh # 00xxxxxx 11100000 00011111 __E01Fh # 000xxxxx 11110000 00001111 __F00Fh # 0000xxxx 11111000 00000111 __F807h # 00000xxx 11111100 00000011 __FC03h # 000000xx 11111110 00000001 __FE01h # but for quickgif.__d 0A7DD4h #>-3 ubyte x \b, last 3 bytes 0x%2.2x #>-2 ubeshort x \b%4.4x # From: Joerg Jenderek # URL: https://wiki.68kmla.org/DiskCopy_4.2_format_specification # reference: http://nulib.com/library/FTN.e00005.htm 0x52 ubeshort 0x0100 # test for disk image size equal or above 400k >0x40 ubelong >409599 # test also for disk image size equal or below 1440k to skip # windows7en.mbr UNICODE.DAT >>0x40 ubelong <1474561 # To skip Flags$StringJoiner.class with size 00106A61h test also for only 4 disk image sizes # 00064000 for 400k GCR disks # 000c8000 for 800k GCR disks # 000b4000 for 720k MFM disks # 00168000 for 1440k MFM disks >>>0x40 ubelong&0xffE03fFF 0 >>>>0 use dc42-floppy # display information of Apple DiskCopy 4.2 floppy image 0 name dc42-floppy # image pascal name padded with NULs like Microsoft Mail >00 pstring/B x Apple DiskCopy 4.2 image %s #!:mime application/octet-stream !:mime application/x-dc42-floppy-image !:apple dCpydImg !:ext image/dc42 # data size in bytes like 409600 >0x40 ubelong x \b, %u bytes # for debugging purpose size in hexadecimal #>0x40 ubelong x (%#8.8x) # tag size in bytes >0x44 ubelong >0 \b, %#x tag size # data checksum #>0x48 ubelong x \b, %#x checksum # tag checksum #>0x4c ubelong x \b, %#x tag checksum # disk encoding >0x50 ubyte 0 \b, GCR CLV ssdd (400k) >0x50 ubyte 1 \b, GCR CLV dsdd (800k) >0x50 ubyte 2 \b, MFM CAV dsdd (720k) >0x50 ubyte 3 \b, MFM CAV dshd (1440k) >0x50 ubyte >3 \b, %#x encoding # format byte >0x51 ubyte x \b, %#x format #>0x54 ubequad x \b, data %#16.16llx # ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation? 0 string ESP ESP archive data # ZPack 0 string \1ZPK\1 ZPack archive data # Sky 0 string \xbc\x40 Sky archive data # UFA 0 string UFA UFA archive data # Dry 0 string =-H2O DRY archive data # FoxSQZ 0 string FOXSQZ FoxSQZ archive data # AR7 0 string ,AR7 AR7 archive data # PPMZ 0 string PPMZ PPMZ archive data # MS Compress # Update: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression # Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html # Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z 4 string \x88\xf0\x27 # KWAJ variant >0 string KWAJ MS Compress archive data, KWAJ variant !:mime application/x-ms-compress-kwaj # extension not working in version 5.32 # magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?' # file: line 284: Bad magic entry ' ??_' !:ext ??_ # compression method (0-4) >>8 uleshort x \b, %u method # offset of compressed data >>10 uleshort x \b, %#x offset #>>(10.s) uleshort x #>>>&-6 string x \b, TEST extension %-.3s # header flags to mark header extensions >>12 uleshort >0 \b, %#x flags # 4 bytes: decompressed length of file >>12 uleshort &0x01 >>>14 ulelong x \b, original size: %u bytes # 2 bytes: unknown purpose # 2 bytes: length of unknown data + mentioned bytes # 1-9 bytes: null-terminated file name # 1-4 bytes: null-terminated file extension >>12 uleshort &0x08 >>>12 uleshort ^0x01 >>>>12 uleshort ^0x02 >>>>>12 uleshort ^0x04 >>>>>>12 uleshort ^0x10 >>>>>>>14 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>14 string x \b, %-.8s >>>>>>>>&1 string x \b.%-.3s >>>>>12 uleshort &0x04 >>>>>>12 uleshort ^0x10 >>>>>>>(14.s) uleshort x >>>>>>>>&14 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>(14.s) uleshort x >>>>>>>>&14 string x \b, %-.8s >>>>>>>>>&1 string x \b.%-.3s >>>>12 uleshort &0x02 >>>>>12 uleshort ^0x04 >>>>>>12 uleshort ^0x10 >>>>>>>16 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>16 string x \b, %-.8s >>>>>>>>&1 string x \b.%-.3s >>>>>12 uleshort &0x04 >>>>>>12 uleshort ^0x10 >>>>>>>(16.s) uleshort x >>>>>>>>&16 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>(16.s) uleshort x >>>>>>>&16 string x %-.8s >>>>>>>>&1 string x \b.%-.3s >>>12 uleshort &0x01 >>>>12 uleshort ^0x02 >>>>>12 uleshort ^0x04 >>>>>>12 uleshort ^0x10 >>>>>>>18 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>18 string x \b, %-.8s >>>>>>>>&1 string x \b.%-.3s >>>>>12 uleshort &0x04 >>>>>>12 uleshort ^0x10 >>>>>>>(18.s) uleshort x >>>>>>>>&18 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>(18.s) uleshort x >>>>>>>>&18 string x \b, %-.8s >>>>>>>>>&1 string x \b.%-.3s >>>>12 uleshort &0x02 >>>>>12 uleshort ^0x04 >>>>>>12 uleshort ^0x10 >>>>>>>20 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>20 string x \b, %-.8s >>>>>>>>&1 string x \b.%-.3s >>>>>12 uleshort &0x04 >>>>>>12 uleshort ^0x10 >>>>>>>(20.s) uleshort x >>>>>>>>&20 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>(20.s) uleshort x >>>>>>>>&20 string x \b, %-.8s >>>>>>>>>&1 string x \b.%-.3s # 2 bytes: length of data + mentioned bytes # # SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ >0 string SZDD MS Compress archive data, SZDD variant !:mime application/x-ms-compress-szdd !:ext ??_ # The character missing from the end of the filename (0=unknown) >>9 string >\0 \b, %-.1s is last character of original name # https://www.betaarchive.com/forum/viewtopic.php?t=26161 # Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e >>8 string !A \b, %-.1s method >>10 ulelong >0 \b, original size: %u bytes # QBasic SZDD variant 3 string \x88\xf0\x27 >0 string SZ\x20 MS Compress archive data, QBasic variant !:mime application/x-ms-compress-sz !:ext ??$ >>8 ulelong >0 \b, original size: %u bytes # Summary: FTCOMP compressed archive # From: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/FTCOMP # Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-ftcomp.trid.xml # Note: called by TrID "FTCOMP compressed archive" # extracted by `unpack seahelp.hl_` 24 string/b FTCOMP FTCOMP compressed archive #!:mime application/octet-stream !:mime application/x-compress-ftcomp !:ext ??_/??@/dll/drv/pk2/ # probably A596FDFF magic at the beginning >0 ubelong !0xA596FDFF \b, at beginning %#x # probably original file name with directory like: \OS2\unpack.exe \SYSTEM\8514.DRV MAHJONGG.EXE >41 string x "%s" # MP3 (archiver, not lossy audio compression) 0 string MP3\x1a MP3-Archiver archive data # ZET 0 string OZ\xc3\x9d ZET archive data # TSComp 0 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data # ARQ 0 string gW\4\1 ARQ archive data # Squash 3 string OctSqu Squash archive data # Terse 0 string \5\1\1\0 Terse archive data # PUCrunch 0 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data # UHarc 0 string UHA UHarc archive data # ABComp 0 string \2AB ABComp archive data 0 string \3AB2 ABComp archive data # CMP 0 string CO\0 CMP archive data # Splint 0 string \x93\xb9\x06 Splint archive data # InstallShield 0 string \x13\x5d\x65\x8c InstallShield Z archive Data # Gather 1 string GTH Gather archive data # BOA 0 string BOA BOA archive data # RAX 0 string ULEB\xa RAX archive data # Xtreme 0 string ULEB\0 Xtreme archive data # Pack Magic 0 string @\xc3\xa2\1\0 Pack Magic archive data # BTS 0 belong&0xfeffffff 0x1a034465 BTS archive data # ELI 5750 0 string Ora\ ELI 5750 archive data # QFC 0 string \x1aFC\x1a QFC archive data 0 string \x1aQF\x1a QFC archive data # PRO-PACK 0 string RNC PRO-PACK archive data # 777 0 string 777 777 archive data # LZS221 0 string sTaC LZS221 archive data # HPA 0 string HPA HPA archive data # Arhangel 0 string LG Arhangel archive data # EXP1, uses bzip2 0 string 0123456789012345BZh EXP1 archive data # IMP 0 string IMP\xa IMP archive data # NRV 0 string \x00\x9E\x6E\x72\x76\xFF NRV archive data # Squish 0 string \x73\xb2\x90\xf4 Squish archive data # Par 0 string PHILIPP Par archive data 0 string PAR Par archive data # HIT 0 string UB HIT archive data # SBX 0 belong&0xfffff000 0x53423000 SBX archive data # NaShrink 0 string NSK NaShrink archive data # SAPCAR 0 string #\ CAR\ archive\ header SAPCAR archive data 0 string CAR\ 2.00RG SAPCAR archive data # Disintegrator 0 string DST Disintegrator archive data # ASD 0 string ASD ASD archive data # InstallShield CAB 0 string ISc( InstallShield CAB # TOP4 0 string T4\x1a TOP4 archive data # BatComp left out: sig looks like COM executable # so TODO: get real 4dos batcomp file and find sig # BlakHole 0 string BH\5\7 BlakHole archive data # BIX 0 string BIX0 BIX archive data # ChiefLZA 0 string ChfLZ ChiefLZA archive data # Blink 0 string Blink Blink archive data # Logitech Compress 0 string \xda\xfa Logitech Compress archive data # ARS-Sfx (FIXME: really a SFX? then goto COM/EXE) 1 string (C)\ STEPANYUK ARS-Sfx archive data # AKT/AKT32 0 string AKT32 AKT32 archive data 0 string AKT AKT archive data # NPack 0 string MSTSM NPack archive data # PFT 0 string \0\x50\0\x14 PFT archive data # SemOne 0 string SEM SemOne archive data # PPMD 0 string \x8f\xaf\xac\x84 PPMD archive data # FIZ 0 string FIZ FIZ archive data # MSXiE 0 belong&0xfffff0f0 0x4d530000 MSXiE archive data # DeepFreezer 0 belong&0xfffffff0 0x797a3030 DeepFreezer archive data # DC 0 string =2 string \x2\x4 Xpack DiskImage archive data #!:ext xdi # XPack Data # *.xpa updated by Joerg Jenderek Sep 2015 # ftp://ftp.elf.stuba.sk/pub/pc/pack/ 0 string xpa XPA !:ext xpa # XPA32 # ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip # created by XPA32.EXE version 1.0.2 for Windows >0 string xpa\0\1 \b32 archive data # created by XPACK.COM version 1.67m or 1.67r with short 0x1800 >3 ubeshort !0x0001 \bck archive data # XPack Single Data # changed by Joerg Jenderek Sep 2015 back to like in version 5.12 # letter 'I'+ acute accent is equivalent to \xcd 0 string \xcd\ jm Xpack single archive data #!:mime application/x-xpa-compressed !:ext xpa # TODO: missing due to unknown magic/magic at end of file: #DWC #ARG #ZAR #PC/3270 #InstallIt #RKive #RK #XPack Diskimage # These were inspired by idarc, but actually verified # Dzip archiver (.dz) # Update: Joerg Jenderek # URL: http://speeddemosarchive.com/dzip/ # reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c # GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt 0 string DZ # latest version is 2.9 dated 7 may 2003 >2 byte <4 Dzip archive data !:mime application/x-dzip !:ext dz >>2 byte x \b, version %i >>3 byte x \b.%i >>4 ulelong x \b, offset %#x >>8 ulelong x \b, %u files # ZZip archiver (.zz) 0 string ZZ\ \0\0 ZZip archive data 0 string ZZ0 ZZip archive data # PAQ archiver (.paq) 0 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data 0 string PAQ PAQ archive data >3 byte&0xf0 0x30 >>3 byte x (v%c) # JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP) 0xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data 0 string JARCS JAR (ARJ Software, Inc.) archive data # ARJ archiver (jason@jarthur.Claremont.EDU) 0 leshort 0xea60 ARJ archive data !:mime application/x-arj >5 byte x \b, v%d, >8 byte &0x04 multi-volume, >8 byte &0x10 slash-switched, >8 byte &0x20 backup, >34 string x original name: %s, >7 byte 0 os: MS-DOS >7 byte 1 os: PRIMOS >7 byte 2 os: Unix >7 byte 3 os: Amiga >7 byte 4 os: Macintosh >7 byte 5 os: OS/2 >7 byte 6 os: Apple ][ GS >7 byte 7 os: Atari ST >7 byte 8 os: NeXT >7 byte 9 os: VAX/VMS >3 byte >0 %d] # [JW] idarc says this is also possible 2 leshort 0xea60 ARJ archive data # HA archiver (Greg Roelofs, newt@uchicago.edu) # This is a really bad format. A file containing HAWAII will match this... #0 string HA HA archive data, #>2 leshort =1 1 file, #>2 leshort >1 %hu files, #>4 byte&0x0f =0 first is type CPY #>4 byte&0x0f =1 first is type ASC #>4 byte&0x0f =2 first is type HSC #>4 byte&0x0f =0x0e first is type DIR #>4 byte&0x0f =0x0f first is type SPECIAL # suggestion: at least identify small archives (<1024 files) 0 belong&0xffff00fc 0x48410000 HA archive data >2 leshort =1 1 file, >2 leshort >1 %u files, >4 byte&0x0f =0 first is type CPY >4 byte&0x0f =1 first is type ASC >4 byte&0x0f =2 first is type HSC >4 byte&0x0f =0x0e first is type DIR >4 byte&0x0f =0x0f first is type SPECIAL # HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz) 0 string HPAK HPACK archive data # JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net 0 string \351,\001JAM\ JAM archive, >7 string >\0 version %.4s >0x26 byte =0x27 - >>0x2b string >\0 label %.11s, >>0x27 lelong x serial %08x, >>0x36 string >\0 fstype %.8s # LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) # Update: Joerg Jenderek # URL: https://en.wikipedia.org/wiki/LHA_(file_format) # Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html # # check and display information of lharc (LHa,PMarc) file 0 name lharc-file # check 1st character of method id like -lz4- -lh5- or -pm2- >2 string - # check 5th character of method id >>6 string - # check header level 0 1 2 3 >>>20 ubyte <4 # check 2nd, 3th and 4th character of method id >>>>3 regex \^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1) \b !:mime application/x-lzh-compressed # creator type "LHA " !:apple ????LHA # display archive type name like "LHa/LZS archive data" or "LArc archive" >>>>>2 string -lz \b !:ext lzs # already known -lzs- -lz4- -lz5- with old names >>>>>>2 string -lzs LHa/LZS archive data >>>>>>3 regex \^lz[45] LHarc 1.x archive data # missing -lz?- with wikipedia names >>>>>>3 regex \^lz[2378] LArc archive # display archive type name like "LHa (2.x) archive data" >>>>>2 string -lh \b # already known -lh0- -lh1- -lh2- -lh3- -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names >>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data # LHice archiver use ".ICE" as name extension instead usual one ".lzh" # FOOBAR archiver use ".foo" as name extension instead usual one # "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment >>>>>>>2 string -lh1 \b !:ext lha/lzh/ice >>>>>>3 regex \^lh[23d] LHa 2.x? archive data >>>>>>3 regex \^lh[7] LHa (2.x)/LHark archive data >>>>>>3 regex \^lh[456] LHa (2.x) archive data >>>>>>>2 string -lh5 \b # https://en.wikipedia.org/wiki/BIOS # Some mainboard BIOS like Award use LHa compression. So archives with unusual extension are found like # bios.rom , kd7_v14.bin, 1010.004, ... !:ext lha/lzh/rom/bin # missing -lh?- variants (Joe Jared) >>>>>>3 regex \^lh[89a-ce] LHa (Joe Jared) archive # UNLHA32 2.67a >>>>>>2 string -lhx LHa (UNLHA32) archive # lha archives with standard file name extensions ".lha" ".lzh" >>>>>>3 regex !\^(lh1|lh5) \b !:ext lha/lzh # this should not happen if all -lh variants are described >>>>>>2 default x LHa (unknown) archive #!:ext lha # PMarc >>>>>3 regex \^pm[012] PMarc archive data !:ext pma # append method id without leading and trailing minus character >>>>>3 string x [%3.3s] >>>>>>0 use lharc-header # # check and display information of lharc header 0 name lharc-header # header size 0x4 , 0x1b-0x61 >0 ubyte x # compressed data size != compressed file size #>7 ulelong x \b, data size %d # attribute: 0x2~?? 0x10~symlink|target 0x20~normal #>19 ubyte x \b, 19_%#x # level identifier 0 1 2 3 #>20 ubyte x \b, level %d # time stamp #>15 ubelong x DATE %#8.8x # OS ID for level 1 >20 ubyte 1 # 0x20 types find for *.rom files >>(21.b+24) ubyte <0x21 \b, %#x OS # ascii type like M for MSDOS >>(21.b+24) ubyte >0x20 \b, '%c' OS # OS ID for level 2 >20 ubyte 2 #>>23 ubyte x \b, OS ID %#x >>23 ubyte <0x21 \b, %#x OS >>23 ubyte >0x20 \b, '%c' OS # filename only for level 0 and 1 >20 ubyte <2 # length of filename >>21 ubyte >0 \b, with # filename >>>21 pstring x "%s" # #2 string -lh0- LHarc 1.x/ARX archive data [lh0] #!:mime application/x-lharc 2 string -lh0- >0 use lharc-file #2 string -lh1- LHarc 1.x/ARX archive data [lh1] #!:mime application/x-lharc 2 string -lh1- >0 use lharc-file # NEW -lz2- ... -lz8- 2 string -lz2- >0 use lharc-file 2 string -lz3- >0 use lharc-file 2 string -lz4- >0 use lharc-file 2 string -lz5- >0 use lharc-file 2 string -lz7- >0 use lharc-file 2 string -lz8- >0 use lharc-file # [never seen any but the last; -lh4- reported in comp.compression:] #2 string -lzs- LHa/LZS archive data [lzs] 2 string -lzs- >0 use lharc-file # According to wikipedia and others such a version does not exist #2 string -lh\40- LHa 2.x? archive data [lh ] #2 string -lhd- LHa 2.x? archive data [lhd] 2 string -lhd- >0 use lharc-file #2 string -lh2- LHa 2.x? archive data [lh2] 2 string -lh2- >0 use lharc-file #2 string -lh3- LHa 2.x? archive data [lh3] 2 string -lh3- >0 use lharc-file #2 string -lh4- LHa (2.x) archive data [lh4] 2 string -lh4- >0 use lharc-file #2 string -lh5- LHa (2.x) archive data [lh5] 2 string -lh5- >0 use lharc-file #2 string -lh6- LHa (2.x) archive data [lh6] 2 string -lh6- >0 use lharc-file #2 string -lh7- LHa (2.x)/LHark archive data [lh7] 2 string -lh7- # !:mime application/x-lha # >20 byte x - header level %d >0 use lharc-file # NEW -lh8- ... -lhe- , -lhx- 2 string -lh8- >0 use lharc-file 2 string -lh9- >0 use lharc-file 2 string -lha- >0 use lharc-file 2 string -lhb- >0 use lharc-file 2 string -lhc- >0 use lharc-file 2 string -lhe- >0 use lharc-file 2 string -lhx- >0 use lharc-file # taken from idarc [JW] 2 string -lZ PUT archive data # already done by LHarc magics # this should never happen if all sub types of LZS archive are identified #2 string -lz LZS archive data 2 string -sw1- Swag archive data 0 name rar-file-header >24 byte 15 \b, v1.5 >24 byte 20 \b, v2.0 >24 byte 29 \b, v4 >15 byte 0 \b, os: MS-DOS >15 byte 1 \b, os: OS/2 >15 byte 2 \b, os: Win32 >15 byte 3 \b, os: Unix >15 byte 4 \b, os: Mac OS >15 byte 5 \b, os: BeOS 0 name rar-archive-header >3 leshort&0x1ff >0 \b, flags: >>3 leshort &0x01 ArchiveVolume >>3 leshort &0x02 Commented >>3 leshort &0x04 Locked >>3 leshort &0x10 NewVolumeNaming >>3 leshort &0x08 Solid >>3 leshort &0x20 Authenticated >>3 leshort &0x40 RecoveryRecordPresent >>3 leshort &0x80 EncryptedBlockHeader >>3 leshort &0x100 FirstVolume # RAR (Roshal Archive) archive 0 string Rar!\x1a\7\0 RAR archive data !:mime application/x-rar !:ext rar/cbr # file header >(0xc.l+9) byte 0x74 >>(0xc.l+7) use rar-file-header # subblock seems to share information with file header >(0xc.l+9) byte 0x7a >>(0xc.l+7) use rar-file-header >9 byte 0x73 >>7 use rar-archive-header 0 string Rar!\x1a\7\1\0 RAR archive data, v5 !:mime application/x-rar !:ext rar # Very old RAR archive # https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf 0 string RE\x7e\x5e RAR archive data (26 string \x8\0\0\0mimetypeapplication/ # KOffice / OpenOffice & StarOffice / OpenDocument formats # From: Abel Cheung # KOffice (1.2 or above) formats # (mimetype contains "application/vnd.kde.") >>50 string vnd.kde. KOffice (>=1.2) >>>58 string karbon Karbon document >>>58 string kchart KChart document >>>58 string kformula KFormula document >>>58 string kivio Kivio document >>>58 string kontour Kontour document >>>58 string kpresenter KPresenter document >>>58 string kspread KSpread document >>>58 string kword KWord document # OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7) # (mimetype contains "application/vnd.sun.xml.") # URL: https://en.wikipedia.org/wiki/OpenOffice.org_XML # reference: http://fileformats.archiveteam.org/wiki/OpenOffice.org_XML >>50 string vnd.sun.xml. OpenOffice.org 1.x >>>62 string writer Writer >>>>68 byte !0x2e document !:mime application/vnd.sun.xml.writer !:ext sxw >>>>68 string .template template !:mime application/vnd.sun.xml.writer.template !:ext stw >>>>68 string .web Web template !:mime application/vnd.sun.xml.writer.web !:ext stw >>>>68 string .global global document !:mime application/vnd.sun.xml.writer.global !:ext sxg >>>62 string calc Calc >>>>66 byte !0x2e spreadsheet !:mime application/vnd.sun.xml.calc !:ext sxc >>>>66 string .template template !:mime application/vnd.sun.xml.calc.template !:ext stc >>>62 string draw Draw >>>>66 byte !0x2e document !:mime application/vnd.sun.xml.draw !:ext sxd >>>>66 string .template template !:mime application/vnd.sun.xml.draw.template !:ext std >>>62 string impress Impress >>>>69 byte !0x2e presentation !:mime application/vnd.sun.xml.impress !:ext sxi >>>>69 string .template template !:mime application/vnd.sun.xml.impress.template !:ext sti >>>62 string math Math document !:mime application/vnd.sun.xml.math !:ext sxm >>>62 string base Database file !:mime application/vnd.sun.xml.base !:ext sdb # OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) # URL: http://fileformats.archiveteam.org/wiki/OpenDocument # https://lists.oasis-open.org/archives/office/200505/msg00006.html # (mimetype contains "application/vnd.oasis.opendocument.") >>50 string vnd.oasis.opendocument. OpenDocument >>>73 string text >>>>77 byte !0x2d Text !:mime application/vnd.oasis.opendocument.text !:ext odt >>>>77 string -template Text Template !:mime application/vnd.oasis.opendocument.text-template !:ext ott >>>>77 string -web HTML Document Template !:mime application/vnd.oasis.opendocument.text-web !:ext oth >>>>77 string -master Master Document !:mime application/vnd.oasis.opendocument.text-master !:ext odm >>>73 string graphics >>>>81 byte !0x2d Drawing !:mime application/vnd.oasis.opendocument.graphics !:ext odg >>>>81 string -template Drawing Template !:mime application/vnd.oasis.opendocument.graphics-template !:ext otg >>>73 string presentation >>>>85 byte !0x2d Presentation !:mime application/vnd.oasis.opendocument.presentation !:ext odp >>>>85 string -template Presentation Template !:mime application/vnd.oasis.opendocument.presentation-template !:ext otp >>>73 string spreadsheet >>>>84 byte !0x2d Spreadsheet !:mime application/vnd.oasis.opendocument.spreadsheet !:ext ods >>>>84 string -template Spreadsheet Template !:mime application/vnd.oasis.opendocument.spreadsheet-template !:ext ots >>>73 string chart >>>>78 byte !0x2d Chart !:mime application/vnd.oasis.opendocument.chart !:ext odc >>>>78 string -template Chart Template !:mime application/vnd.oasis.opendocument.chart-template !:ext otc >>>73 string formula >>>>80 byte !0x2d Formula !:mime application/vnd.oasis.opendocument.formula !:ext odf >>>>80 string -template Formula Template !:mime application/vnd.oasis.opendocument.formula-template !:ext otf # https://www.loc.gov/preservation/digital/formats/fdd/fdd000441.shtml >>>73 string database Database !:mime application/vnd.oasis.opendocument.database !:ext odb # Valid for LibreOffice Base 6.0.1.1 at least >>>73 string base Database # https://bugs.documentfoundation.org/show_bug.cgi?id=45854 !:mime application/vnd.oasis.opendocument.database #!:mime application/vnd.oasis.opendocument.base !:ext odb >>>73 string image >>>>78 byte !0x2d Image !:mime application/vnd.oasis.opendocument.image !:ext odi >>>>78 string -template Image Template !:mime application/vnd.oasis.opendocument.image-template !:ext oti # EPUB (OEBPS) books using OCF (OEBPS Container Format) # https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4. # From: Ralf Brown >>50 string epub+zip EPUB document !:mime application/epub+zip # From: Joerg Jenderek # URL: http://en.wikipedia.org/wiki/CorelDRAW # NOTE: version; til 2 WL-based; from 3 til 13 by ./riff; from 14 zip based >>50 string x-vnd.corel. Corel >>>62 string draw.document+zip Draw drawing, version 14-16 !:mime application/x-vnd.corel.draw.document+zip !:ext cdr >>>62 string draw.template+zip Draw template, version 14-16 !:mime application/x-vnd.corel.draw.template+zip !:ext cdrt >>>62 string zcf.draw.document+zip Draw drawing, version 17-22 !:mime application/x-vnd.corel.zcf.draw.document+zip !:ext cdr >>>62 string zcf.draw.template+zip Draw template, version 17-22 !:mime application/x-vnd.corel.zcf.draw.template+zip !:ext cdt/cdrt # URL: http://product.corel.com/help/CorelDRAW/540240626/Main/EN/Doc/CorelDRAW-Other-file-formats.html >>>62 string zcf.pattern+zip Draw pattern, version 22 !:mime application/x-vnd.corel.zcf.pattern+zip !:ext pat # URL: https://en.wikipedia.org/wiki/Corel_Designer # Reference: http://fileformats.archiveteam.org/wiki/Corel_Designer # Note: called by TrID "Corel DESIGN graphics" >>>62 string designer.document+zip DESIGNER graphics, version 14-16 !:mime application/x-vnd.corel.designer.document+zip !:ext des >>>62 string zcf.designer.document+zip DESIGNER graphics, version 17-21 !:mime application/x-vnd.corel.zcf.designer.document+zip !:ext des # URL: http://product.corel.com/help/CorelDRAW/540223850/Main/EN/Documentation/ # CorelDRAW-Corel-Symbol-Library-CSL.html >>>62 string symbol.library+zip Symbol Library, version 6-16.3 !:mime application/x-vnd.corel.symbol.library+zip !:ext csl >>>62 string zcf.symbol.library+zip Symbol Library, version 17-22 !:mime application/x-vnd.corel.zcf.symbol.library+zip !:ext csl # Catch other ZIP-with-mimetype formats # In a ZIP file, the bytes immediately after a member's contents are # always "PK". The 2 regex rules here print the "mimetype" member's # contents up to the first 'P'. Luckily, most MIME types don't contain # any capital 'P's. This is a kludge. # (mimetype contains "application/") >>50 default x Zip data >>>38 regex [!-OQ-~]+ (MIME type "%s"?) !:mime application/zip # (mimetype contents other than "application/*") >26 string \x8\0\0\0mimetype >>38 string !application/ >>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) !:mime application/zip # Java Jar files >(26.s+30) leshort 0xcafe Java archive data (JAR) !:mime application/java-archive # iOS App >(26.s+30) leshort !0xcafe >>26 string !\x8\0\0\0mimetype >>>30 string Payload/ >>>>38 search/64 .app/ iOS App !:mime application/x-ios-app # Dup, see above. #>30 search/100/b application/epub+zip EPUB document #!:mime application/epub+zip # Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) # Next line excludes specialized formats: >(26.s+30) leshort !0xcafe >>30 search/100/b !application/epub+zip >>>26 string !\x8\0\0\0mimetype Zip archive data !:mime application/zip >>>>4 beshort x \b, at least >>>>4 use zipversion >>>>4 beshort x to extract >>>>8 beshort x \b, compression method= >>>>8 use zipcompression >>>>0x161 string WINZIP \b, WinZIP self-extracting # StarView Metafile # From Pierre Ducroquet 0 string VCLMTF StarView MetaFile >6 beshort x \b, version %d >8 belong x \b, size %d # Zoo archiver 20 lelong 0xfdc4a7dc Zoo archive data !:mime application/x-zoo >4 byte >48 \b, v%c. >>6 byte >47 \b%c >>>7 byte >47 \b%c >32 byte >0 \b, modify: v%d >>33 byte x \b.%d+ >42 lelong 0xfdc4a7dc \b, >>70 byte >0 extract: v%d >>>71 byte x \b.%d+ # Shell archives 10 string #\ This\ is\ a\ shell\ archive shell archive text !:mime application/octet-stream # # LBR. NB: May conflict with the questionable # "binary Computer Graphics Metafile" format. # 0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data # # PMA (CP/M derivative of LHA) # Update: Joerg Jenderek # URL: https://en.wikipedia.org/wiki/LHA_(file_format) # #2 string -pm0- PMarc archive data [pm0] 2 string -pm0- >0 use lharc-file #2 string -pm1- PMarc archive data [pm1] 2 string -pm1- >0 use lharc-file #2 string -pm2- PMarc archive data [pm2] 2 string -pm2- >0 use lharc-file 2 string -pms- PMarc SFX archive (CP/M, DOS) #!:mime application/x-foobar-exec !:ext com 5 string -pc1- PopCom compressed executable (CP/M) #!:mime application/x- #!:ext com # From Rafael Laboissiere # The Project Revision Control System (see # http://prcs.sourceforge.net) generates a packaged project # file which is recognized by the following entry: 0 leshort 0xeb81 PRCS packaged project # Microsoft cabinets # by David Necas (Yeti) #0 string MSCF\0\0\0\0 Microsoft cabinet file data, #>25 byte x v%d #>24 byte x \b.%d # MPi: All CABs have version 1.3, so this is pointless. # Better magic in debian-additions. # GTKtalog catalogs # by David Necas (Yeti) 4 string gtktalog\ GTKtalog catalog data, >13 string 3 version 3 >>14 beshort 0x677a (gzipped) >>14 beshort !0x677a (not gzipped) >13 string >3 version %s ############################################################################ # Parity archive reconstruction file, the 'par' file format now used on Usenet. 0 string PAR\0 PARity archive data >48 leshort =0 - Index file >48 leshort >0 - file number %d # Felix von Leitner 0 string d8:announce BitTorrent file !:mime application/x-bittorrent # Durval Menezes, 0 string d13:announce-list BitTorrent file !:mime application/x-bittorrent 0 string d7:comment BitTorrent file !:mime application/x-bittorrent 0 string d4:info BitTorrent file !:mime application/x-bittorrent # Atari MSA archive - Teemu Hukkanen 0 beshort 0x0e0f Atari MSA archive data >2 beshort x \b, %d sectors per track >4 beshort 0 \b, 1 sided >4 beshort 1 \b, 2 sided >6 beshort x \b, starting track: %d >8 beshort x \b, ending track: %d # Alternate ZIP string (amc@arwen.cs.berkeley.edu) 0 string PK00PK\003\004 Zip archive data !:mime application/zip !:ext zip/cbz # ACE archive (from http://www.wotsit.org/download.asp?f=ace) # by Stefan `Sec` Zehl 7 string **ACE** ACE archive data >15 byte >0 version %d >16 byte =0x00 \b, from MS-DOS >16 byte =0x01 \b, from OS/2 >16 byte =0x02 \b, from Win/32 >16 byte =0x03 \b, from Unix >16 byte =0x04 \b, from MacOS >16 byte =0x05 \b, from WinNT >16 byte =0x06 \b, from Primos >16 byte =0x07 \b, from AppleGS >16 byte =0x08 \b, from Atari >16 byte =0x09 \b, from Vax/VMS >16 byte =0x0A \b, from Amiga >16 byte =0x0B \b, from Next >14 byte x \b, version %d to extract >5 leshort &0x0080 \b, multiple volumes, >>17 byte x \b (part %d), >5 leshort &0x0002 \b, contains comment >5 leshort &0x0200 \b, sfx >5 leshort &0x0400 \b, small dictionary >5 leshort &0x0800 \b, multi-volume >5 leshort &0x1000 \b, contains AV-String >>30 string \x16*UNREGISTERED\x20VERSION* (unregistered) >5 leshort &0x2000 \b, with recovery record >5 leshort &0x4000 \b, locked >5 leshort &0x8000 \b, solid # Date in MS-DOS format (whatever that is) #>18 lelong x Created on # sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann # 0x1A string sfArk sfArk compressed Soundfont >0x15 string 2 >>0x1 string >\0 Version %s >>0x2A string >\0 : %s # DR-DOS 7.03 Packed File *.??_ # Reference: http://www.antonis.de/dos/dos-tuts/mpdostip/html/nwdostip.htm # Note: unpacked by PNUNPACK.EXE 0 string Packed\ File\ # by looking for Control-Z skip ASCII text starting with Packed File >0x18 ubyte 0x1a Personal NetWare Packed File !:mime application/x-novell-compress !:ext ??_ >>12 string x \b, was "%.12s" # 1 or 2 #>>0x19 ubyte x \b, at 0x19 %u >>0x1b ulelong x with %u bytes # EET archive # From: Tilman Sauerbeck 0 belong 0x1ee7ff00 EET archive !:mime application/x-eet # rzip archives 0 string RZIP rzip compressed data >4 byte x - version %d >5 byte x \b.%d >6 belong x (%d bytes) # From: Joerg Jenderek # URL: https://help.foxitsoftware.com/kb/install-fzip-file.php # reference: http://mark0.net/download/triddefs_xml.7z/ # defs/f/fzip.trid.xml # Note: unknown compression; No "PK" zip magic; normally in directory like # "%APPDATA%\Foxit Software\Addon\Foxit Reader\Install" 0 ubequad 0x2506781901010000 Foxit add-on/update !:mime application/x-fzip !:ext fzip # From: "Robert Dale" 0 belong 123 dar archive, >4 belong x label "%.8x >>8 belong x %.8x >>>12 beshort x %.4x" >14 byte 0x54 end slice >14 beshort 0x4e4e multi-part >14 beshort 0x4e53 multi-part, with -S # Symbian installation files # https://www.thouky.co.uk/software/psifs/sis.html # http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf 8 lelong 0x10000419 Symbian installation file !:mime application/vnd.symbian.install >4 lelong 0x1000006D (EPOC release 3/4/5) >4 lelong 0x10003A12 (EPOC release 6) 0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x) !:mime x-epoc/x-sisx-app # From "Nelson A. de Oliveira" 0 string MPQ\032 MoPaQ (MPQ) archive # From: "Nelson A. de Oliveira" # .kgb 0 string KGB_arch KGB Archiver file >10 string x with compression level %.1s # xar (eXtensible ARchiver) archive # URL: https://en.wikipedia.org/wiki/Xar_(archiver) # xar archive format: https://code.google.com/p/xar/ # From: "David Remahl" # Update: Joerg Jenderek # TODO: lzma compression; X509Data for pkg and xip # Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or # 7z t -txar Xcode_10.2_beta_4.xip` 0 string xar! xar archive !:mime application/x-xar # pkg for Mac OSX installer package like FullBundleUpdate.pkg # xip for signed Apple software like Xcode_10.2_beta_4.xip !:ext xar/pkg/xip # always 28 in older archives >4 ubeshort >28 \b, header size %u # currently there exit only version 1 since about 2014 >6 ubeshort >1 version %u, >8 ubequad x compressed TOC: %llu, #>16 ubequad x uncompressed TOC: %llu, # cksum_alg 0-2 in older and also 3-4 in newer >24 belong 0 no checksum >24 belong 1 SHA-1 checksum >24 belong 2 MD5 checksum >24 belong 3 SHA-256 checksum >24 belong 4 SHA-512 checksum >24 belong >4 unknown %#x checksum #>24 belong >4 checksum # For no compression jump 0 bytes >24 belong 0 >>0 ubyte x # jump more bytes forward by header size >>>&(4.S) ubyte x # jump more bytes forward by compressed table of contents size #>>>>&(8.Q) ubequad x \b, heap data %#llx >>>>&(8.Q) ubyte x # look for data by ./compress after message with 1 space at end >>>>>&-3 indirect x \b, contains # For SHA-1 jump 20 minus 2 bytes >24 belong 1 >>18 ubyte x # jump more bytes forward by header size >>>&(4.S) ubyte x # jump more bytes forward by compressed table of contents size >>>>&(8.Q) ubyte x # data compressed by gzip, bzip, lzma or none >>>>>&-1 indirect x \b, contains # For SHA-256 jump 32 minus 2 bytes >24 belong 3 >>30 ubyte x # jump more bytes forward by header size >>>&(4.S) ubyte x # jump more bytes forward by compressed table of contents size >>>>&(8.Q) ubyte x >>>>>&-1 indirect x \b, contains # For SHA-512 jump 64 minus 2 bytes >24 belong 4 >>62 ubyte x # jump more bytes forward by header size >>>&(4.S) ubyte x # jump more bytes forward by compressed table of contents size >>>>&(8.Q) ubyte x >>>>>&-1 indirect x \b, contains # Type: Parity Archive # From: Daniel van Eeden 0 string PAR2 Parity Archive Volume Set # Bacula volume format. (Volumes always start with a block header.) # URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html # From: Adam Buchbinder 12 string BB02 Bacula volume >20 bedate x \b, started %s # ePub is XHTML + XML inside a ZIP archive. The first member of the # archive must be an uncompressed file called 'mimetype' with contents # 'application/epub+zip' # From: "Michael Gorny" # ZPAQ: http://mattmahoney.net/dc/zpaq.html 0 string zPQ ZPAQ stream >3 byte x \b, level %d # From: Barry Carter # https://encode.ru/threads/456-zpaq-updates/page32 0 string 7kSt ZPAQ file # BBeB ebook, unencrypted (LRF format) # URL: https://www.sven.de/librie/Librie/LrfFormat # From: Adam Buchbinder 0 string L\0R\0F\0\0\0 BBeB ebook data, unencrypted >8 beshort x \b, version %d >36 byte 1 \b, front-to-back >36 byte 16 \b, back-to-front >42 beshort x \b, (%dx, >44 beshort x %d) # Symantec GHOST image by Joerg Jenderek at May 2014 # https://us.norton.com/ghost/ # https://www.garykessler.net/library/file_sigs.html 0 ubelong&0xFFFFf7f0 0xFEEF0100 Norton GHost image # *.GHO >2 ubyte&0x08 0x00 \b, first file # *.GHS or *.[0-9] with cns program option >2 ubyte&0x08 0x08 \b, split file # part of split index interesting for *.ghs >>4 ubyte x id=%#x # compression tag minus one equals numeric compression command line switch z[1-9] >3 ubyte 0 \b, no compression >3 ubyte 2 \b, fast compression (Z1) >3 ubyte 3 \b, medium compression (Z2) >3 ubyte >3 >>3 ubyte <11 \b, compression (Z%d-1) >2 ubyte&0x08 0x00 # ~ 30 byte password field only for *.gho >>12 ubequad !0 \b, password protected >>44 ubyte !1 # 1~Image All, sector-by-sector only for *.gho >>>10 ubyte 1 \b, sector copy # 1~Image Boot track only for *.gho >>>43 ubyte 1 \b, boot track # 1~Image Disc only for *.gho implies Image Boot track and sector copy >>44 ubyte 1 \b, disc sector copy # optional image description only *.gho >>0xff string >\0 "%-.254s" # look for DOS sector end sequence >0xE08 search/7776 \x55\xAA >>&-512 indirect x \b; contains # Google Chrome extensions # https://developer.chrome.com/extensions/crx # https://developer.chrome.com/extensions/hosting 0 string Cr24 Google Chrome extension !:mime application/x-chrome-extension >4 ulong x \b, version %u # SeqBox - Sequenced container # ext: sbx, seqbox # Marco Pontello marcopon@gmail.com # reference: https://github.com/MarcoPon/SeqBox 0 string SBx SeqBox, >3 byte x version %d # LyNX archive 56 string USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE LyNX archive # From: Joerg Jenderek # URL: https://www.acronis.com/ # Reference: https://en.wikipedia.org/wiki/TIB_(file_format) # Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110 0 ubequad 0xce24b9a220000000 Acronis True Image backup !:mime application/x-acronis-tib !:ext tib # 01000000 #>20 ubelong x \b, at 20 %#x # 20000000 #>28 ubelong x \b, at 28 %#x # strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0" # ??? # strings like "\Device\0000011e" "\Device\0000015a" #>0 search/0x6852300/cs \\Device\\ #>>&-1 pstring x \b, %s # "\Device\HarddiskVolume30" "\Device\HarddiskVolume39" #>>>&1 search/180/cs \\Device\\ #>>>>&-1 pstring x \b, %s #>>>>>&0 search/29/cs \0\0\xc8\0 # disk label #>>>>>>&10 lestring16 x \b, disk label %11.11s #>>>>>>&9 plestring16 x \b, disk label "%11.11s" #>>>>>>&10 ubequad x %16.16llx # Gentoo XPAK binary package # by Michal Gorny # https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5 -4 string STOP >-16 string XPAKSTOP Gentoo binary package (XPAK) # From: Joerg Jenderek # URL: https://kodi.wiki/view/TexturePacker # Reference: https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz # /xbmc-Krypton/xbmc/guilib/XBTF.h # /xbmc-Krypton/xbmc/guilib/XBTF.cpp 0 string XBTF # skip ASCII text by looking for terminating \0 of path >264 ubyte 0 XBMC texture package !:mime application/x-xbmc-xbt !:ext xbt # XBTF_VERSION 2 >>4 string !2 \b, version %-.1s # nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp >>5 ulelong x \b, %u file # plural s >>5 ulelong >1 \bs # path[CXBTFFile[MaximumPathLength=256] >>9 string x \b, 1st %s # ALZIP archive # by Hyungjun Park , Hajin Jang # http://kippler.com/win/unalz/ # https://salsa.debian.org/l10n-korean-team/unalz 0 string ALZ\001 ALZ archive data !:ext alz # https://cf-aldn.altools.co.kr/setup/EGG_Specification.zip 0 string EGGA EGG archive data, !:ext egg >5 byte x version %u >4 byte x \b.%u >>0x0E ulelong =0x08E28222 >>0x0E ulelong =0x24F5A262 \b, split >>0x0E ulelong =0x24E5A060 \b, solid >>0x0E default x \b, unknown # PAQ9A archive # URL: http://mattmahoney.net/dc/#paq9a # Note: Line 1186 of paq9a.cpp gives the magic bytes 0 string pQ9\001 PAQ9A archive