#------------------------------------------------------------------------------ # $File: archive,v 1.193 2023/07/27 17:55:58 christos Exp $ # archive: file(1) magic for archive formats (see also "msdos" for self- # extracting compressed archives) # # cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc. # pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c. # POSIX tar archives # URL: https://en.wikipedia.org/wiki/Tar_(computing) # Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current # header mainly padded with nul bytes 500 quad 0 !:strength /2 # filename or extended attribute printable strings in range space null til umlaut ue >0 ubeshort >0x1F00 >>0 ubeshort <0xFCFD # last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad # at https://sourceforge.net/projects/s-tar/files/testscripts/ >>>508 ubelong&0x8B9E8DFF 0 # nul, space or ascii digit 0-7 at start of mode >>>>100 ubyte&0xC8 =0 >>>>>101 ubyte&0xC8 =0 # nul, space at end of check sum >>>>>>155 ubyte&0xDF =0 # space or ascii digit 0 at start of check sum >>>>>>>148 ubyte&0xEF =0x20 # FOR DEBUGGING: #>>>>>>>>0 regex \^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp) NAME "%s" # check for 1st image main name with digits used for sorting # and for name extension case insensitive like: PNG JPG JPEG TIF TIFF GIF BMP >>>>>>>>0 regex \^[0-9]{2,4}[.](png|jpg|jpeg|tif|tiff|gif|bmp) >>>>>>>>>0 use tar-cbt # check for 1st member name with ovf suffix >>>>>>>>0 regex \^.{1,96}[.](ovf) >>>>>>>>>0 use tar-ova # if 1st member name without digits and without used image suffix and without *.ovf then it is a TAR archive >>>>>>>>0 default x >>>>>>>>>0 use tar-file # minimal check and then display tar archive information which can also be # embedded inside others like Android Backup, Clam AntiVirus database 0 name tar-file >257 string !ustar # header padded with nuls >>257 ulong =0 # GNU tar version 1.29 with non pax format option without refusing # creates misleading V7 header for Long path, Multi-volume, Volume type >>>156 ubyte 0x4c GNU tar archive !:mime application/x-gtar !:ext tar/gtar >>>156 ubyte 0x4d GNU tar archive !:mime application/x-gtar !:ext tar/gtar >>>156 ubyte 0x56 GNU tar archive !:mime application/x-gtar !:ext tar/gtar >>>156 default x tar archive (V7) !:mime application/x-tar !:ext tar # other stuff in padding # some implementations add new fields to the blank area at the end of the header record # created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option >>257 ulong !0 tar archive (old) !:mime application/x-tar !:ext tar # magic in newer, GNU, posix variants >257 string =ustar # 2 last char of magic and UStar version because string expression does not work # 2 space characters followed by a null for GNU variant >>261 ubelong =0x72202000 POSIX tar archive (GNU) !:mime application/x-gtar !:ext tar/gtar # UStar version with ASCII "00" >>261 ubelong 0x72003030 POSIX # gLOBAL and ExTENSION type only found in POSIX.1-2001 format >>>156 ubyte 0x67 \b.1-2001 >>>156 ubyte 0x78 \b.1-2001 >>>156 ubyte x tar archive !:mime application/x-ustar !:ext tar/ustar # version with 2 binary nuls embedded in Android Backup like com.android.settings.ab >>261 ubelong 0x72000000 tar archive (ustar) !:mime application/x-ustar !:ext tar/ustar # not seen ustar variant with garbish version >>261 default x tar archive (unknown ustar) !:mime application/x-ustar !:ext tar/ustar # type flag of 1st tar archive member #>156 ubyte x \b, %c-type >156 ubyte x >>156 ubyte 0 \b, file >>156 ubyte 0x30 \b, file >>156 ubyte 0x31 \b, hard link >>156 ubyte 0x32 \b, symlink >>156 ubyte 0x33 \b, char device >>156 ubyte 0x34 \b, block device >>156 ubyte 0x35 \b, directory >>156 ubyte 0x36 \b, fifo >>156 ubyte 0x37 \b, reserved >>156 ubyte 0x4c \b, long path >>156 ubyte 0x4d \b, multi volume >>156 ubyte 0x56 \b, volume >>156 ubyte 0x67 \b, global >>156 ubyte 0x78 \b, extension >>156 default x \b, type >>>156 ubyte x '%c' # name[100] >0 string >\0 %-.60s # mode mainly stored as an octal number in ASCII null or space terminated >100 string >\0 \b, mode %-.7s # user id mainly as octal numbers in ASCII null or space terminated >108 string >\0 \b, uid %-.7s # group id mainly as octal numbers in ASCII null or space terminated >116 string >\0 \b, gid %-.7s # size mainly as octal number in ASCII >124 ubyte <0x38 >>124 string >\0 \b, size %-.12s # coding indicated by setting the high-order bit of the leftmost byte >124 ubyte >0xEF \b, size 0x >>124 ubyte !0xff \b%2.2x >>125 ubyte !0xff \b%2.2x >>126 ubyte !0xff \b%2.2x >>127 ubyte !0xff \b%2.2x >>128 ubyte !0xff \b%2.2x >>129 ubyte !0xff \b%2.2x >>130 ubyte !0xff \b%2.2x >>131 ubyte !0xff \b%2.2x >>132 ubyte !0xff \b%2.2x >>133 ubyte !0xff \b%2.2x >>134 ubyte !0xff \b%2.2x >>135 ubyte !0xff \b%2.2x # seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated >136 string >\0 \b, seconds %-.11s # header checksum stored as an octal number in ASCII null or space terminated #>148 string x \b, cksum %.7s # linkname[100] >157 string >\0 \b, linkname %-.40s # additional fields for ustar >257 string =ustar # owner user name null terminated >>265 string >\0 \b, user %-.32s # group name null terminated >>297 string >\0 \b, group %-.32s # device major minor if not zero >>329 ubequad&0xCFCFCFCFcFcFcFdf !0 >>>329 string x \b, devmaj %-.7s >>337 ubequad&0xCFCFCFCFcFcFcFdf !0 >>>337 string x \b, devmin %-.7s # prefix[155] >>345 string >\0 \b, prefix %-.155s # old non ustar/POSIX tar >257 string !ustar >>508 string =tar\0 # padding[255] in old star >>>257 string >\0 \b, padding: %-.40s >>508 default x # padding[255] in old tar sometimes comment field >>>257 string >\0 \b, comment: %-.40s # Summary: Comic Book Archive *.CBT with TAR format # URL: https://en.wikipedia.org/wiki/Comic_book_archive # http://fileformats.archiveteam.org/wiki/Comic_Book_Archive # Note: there exist also RAR, ZIP, ACE and 7Z packed variants 0 name tar-cbt >0 string x Comic Book archive, tar archive #!:mime application/x-tar !:mime application/vnd.comicbook #!:mime application/vnd.comicbook+tar !:ext cbt # name[100] probably like: 19.jpg 0001.png 0002.png # or maybe like ComicInfo.xml >0 string >\0 \b, 1st image %-.60s # Summary: Open Virtualization Format *.OVF with disk images and more packed as TAR archive *.OVA # From: Joerg Jenderek # URL: https://en.wikipedia.org/wiki/Open_Virtualization_Format # http://fileformats.archiveteam.org/wiki/OVF_(Open_Virtualization_Format) # Reference: http://mark0.net/download/triddefs_xml.7z/defs/o/ova.trid.xml # Note: called "Open Virtualization Format package" by TrID # assuming *.ovf comes first 0 name tar-ova >0 string x Open Virtualization Format Archive #!:mime application/x-ustar # http://extension.nirsoft.net/ova !:mime application/x-virtualbox-ova !:ext ova # assuming name[100] like: DOS-0.9.ovf FreeDOS_1.ovf Win98SE_DE.ovf >0 string >\0 \b, with %-.60s # Incremental snapshot gnu-tar format from: # https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html 0 string GNU\ tar- GNU tar incremental snapshot data >&0 regex [0-9]\\.[0-9]+-[0-9]+ version %s # cpio archives # # Yes, the top two "cpio archive" formats *are* supposed to just be "short". # The idea is to indicate archives produced on machines with the same # byte order as the machine running "file" with "cpio archive", and # to indicate archives produced on machines with the opposite byte order # from the machine running "file" with "byte-swapped cpio archive". # # The SVR4 "cpio(4)" hints that there are additional formats, but they # are defined as "short"s; I think all the new formats are # character-header formats and thus are strings, not numbers. # URL: http://fileformats.archiveteam.org/wiki/Cpio # https://en.wikipedia.org/wiki/Cpio # Reference: https://people.freebsd.org/~kientzle/libarchive/man/cpio.5.txt # Update: Joerg Jenderek # # Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio-bin.trid.xml # Note: called "CPIO archive (binary)" by TrID, "cpio/Binary LE" by 7-Zip and "CPIO" by DROID via PUID fmt/635 0 short 070707 # skip DROID fmt-635-signature-id-960.cpio by looking for pathname of 1st entry >26 string >\0 cpio archive !:mime application/x-cpio # https://download.opensuse.org/distribution/leap/15.4/iso/openSUSE-Leap-15.4-NET-x86_64-Media.iso # boot/x86_64/loader/bootlogo # message.cpi !:ext /cpio/cpi >>0 use cpio-bin # Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio-bin-sw.trid.xml # Note: called "CPIO archive (byte swapped binary)" by TrID and "Cpio/Binary BE" by 7-Zip 0 short 0143561 byte-swapped cpio archive !:mime application/x-cpio # encoding: swapped # https://telparia.com/fileFormatSamples/archive/cpio/skeleton2.cpio !:ext cpio >0 use cpio-bin-be # Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cpio.trid.xml # Note: called "CPIO archive (portable)" by TrID, "cpio/Portable ASCII" by 7-Zip and "cpio/odc" by GNU cpio 0 string 070707 ASCII cpio archive (pre-SVR4 or odc) !:mime application/x-cpio # https://telparia.com/fileFormatSamples/archive/cpio/ pthreads-1.60B5.osr5src.cpio cinema.cpi VOL.000.008 VOL.000.012 !:ext cpio/cpi/008/012 # Note: called "CPIO archive (portable)" by TrID, "cpio/New ASCII" by 7-Zip and "cpio/newc" by GNU cpio 0 string 070701 ASCII cpio archive (SVR4 with no CRC) !:mime application/x-cpio # https://telparia.com/fileFormatSamples/archive/cpio/MainActor-2.06.3.cpio !:ext cpio # Note: called "CPIO archive (portable)" by TrID, "cpio/New CRC" by 7-Zip and "cpio/crc" by GNU cpio 0 string 070702 ASCII cpio archive (SVR4 with CRC) !:mime application/x-cpio # http://ftp.gnu.org/gnu/tar/tar-1.27.cpio.gz # https://telparia.com/fileFormatSamples/archive/cpio/pcmcia !:ext /cpio # display information of old binary cpio archive # Note: verfied by 7-Zip `7z l -tcpio -slt *.cpio` and # `cpio -ivt --numeric-uid-gid --file=clam.bin-le.cpio` 0 name cpio-bin # c_dev; device number; WHAT IS THAT? >2 uleshort x \b; device %u # c_ino; truncated inode number; use `ls --inode` >4 uleshort x \b, inode %u # c_mode; mode specifies permissions and file type like: ?622~?rw-r--r-- by `ls -l` >6 uleshort x \b, mode %o # c_uid; numeric user id; use `ls --numeric-uid-gid` >8 uleshort x \b, uid %u # c_gid; numeric group id >10 uleshort x \b, gid %u # c_nlink; links to this file; directories at least 2 >12 uleshort >1 \b, %u links # c_rdev; device number for block and character entries; zero for all other entries by writers # like 0x0440 for /dev/ttyS0 >14 uleshort >0 \b, device %#4.4x # c_mtime[2]; modification time in seconds since 1 January 1970; most-significant 16 bits first >16 medate x \b, modified %s # c_filesize[2]; size of pathname; most-significant 16 bits first like: 544 >22 melong x \b, %u bytes # c_namesize; bytes in the pathname that follows the header like: 9 #>20 uleshort x \b, namesize %u # pathname of entry like: "clam.exe" >26 string x "%s" # display information of old binary byte swapped cpio archive # Note: verfied by 7-Zip `7z l -tcpio -slt *.cpio` and # `LANGUAGE=C cpio -ivt --numeric-uid-gid --file=clam.bin-be.cpio` 0 name cpio-bin-be >2 ubeshort x \b; device %u >4 ubeshort x \b, inode %u >6 ubeshort x \b, mode %o >8 ubeshort x \b, uid %u >10 ubeshort x \b, gid %u >12 ubeshort >1 \b, %u links >14 ubeshort >0 \b, device %#4.4x >16 bedate x \b, modified %s >22 ubelong x \b, %u bytes #>20 ubeshort x \b, namesize %u >26 string x "%s" # # Various archive formats used by various versions of the "ar" # command. # # # Original UNIX archive formats. # They were written with binary values in host byte order, and # the magic number was a host "int", which might have been 16 bits # or 32 bits. We don't say "PDP-11" or "VAX", as there might have # been ports to little-endian 16-bit-int or 32-bit-int platforms # (x86?) using some of those formats; if none existed, feel free # to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian # 32-bit. There might have been big-endian ports of that sort as # well. # 0 leshort 0177555 very old 16-bit-int little-endian archive 0 beshort 0177555 very old 16-bit-int big-endian archive 0 lelong 0177555 very old 32-bit-int little-endian archive 0 belong 0177555 very old 32-bit-int big-endian archive 0 leshort 0177545 old 16-bit-int little-endian archive >2 string __.SYMDEF random library 0 beshort 0177545 old 16-bit-int big-endian archive >2 string __.SYMDEF random library 0 lelong 0177545 old 32-bit-int little-endian archive >4 string __.SYMDEF random library 0 belong 0177545 old 32-bit-int big-endian archive >4 string __.SYMDEF random library # # From "pdp" (but why a 4-byte quantity?) # 0 lelong 0x39bed PDP-11 old archive 0 lelong 0x39bee PDP-11 4.0 archive # # XXX - what flavor of APL used this, and was it a variant of # some ar archive format? It's similar to, but not the same # as, the APL workspace magic numbers in pdp. # 0 long 0100554 apl workspace # # System V Release 1 portable(?) archive format. # 0 string = System V Release 1 ar archive !:mime application/x-archive # # Debian package; it's in the portable archive format, and needs to go # before the entry for regular portable archives, as it's recognized as # a portable archive whose first member has a name beginning with # "debian". # # Update: Joerg Jenderek # URL: https://en.wikipedia.org/wiki/Deb_(file_format) 0 string =!\ndebian # https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html >14 string -split part of multipart Debian package !:mime application/vnd.debian.binary-package # udeb is used for stripped down deb file !:ext deb/udeb >14 string -binary Debian binary package !:mime application/vnd.debian.binary-package # For ipk packager see also https://en.wikipedia.org/wiki/Opkg !:ext deb/udeb/ipk # This should not happen >14 default x Unknown Debian package # NL terminated version; for most Debian cases this is 2.0 or 2.1 for split >68 string >\0 (format %s) #>68 string !2.0\n #>>68 string x (format %.3s) >68 string =2.0\n # 2nd archive name=control archive name like control.tar.gz or control.tar.xz # or control.tar.zst >>72 string >\0 \b, with %.15s # look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma} >>0 search/0x93e4f data.tar. \b, data compression # the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised # for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb >>>&0 string x %.2s # skip space (0x20 BSD) and slash (0x2f System V) character marking end of name >>>&2 ubyte !0x20 >>>>&-1 ubyte !0x2f # display 3rd character of file name extension like 2 of bz2 or m of lzma >>>>>&-1 ubyte x \b%c >>>>>>&0 ubyte !0x20 >>>>>>>&-1 ubyte !0x2f # display 4th character of file name extension like a of lzma >>>>>>>>&-1 ubyte x \b%c # split debian package case >68 string =2.1\n # dpkg-1.18.25/dpkg-split/info.c # NL terminated ASCII package name like ckermit >>&0 string x \b, %s # NL terminated package version like 302-5.3 >>>&1 string x %s # NL terminated MD5 checksum >>>>&1 string x \b, MD5 %s # NL terminated original package length >>>>>&1 string x \b, unsplitted size %s # NL terminated part length >>>>>>&1 string x \b, part length %s # NL terminated package part like n/m >>>>>>>&1 string x \b, part %s # NL terminated package architecture like armhf since dpkg 1.16.1 or later >>>>>>>>&1 string x \b, %s # # MIPS archive; they're in the portable archive format, and need to go # before the entry for regular portable archives, as it's recognized as # a portable archive whose first member has a name beginning with # "__________E". # 0 string =!\n__________E MIPS archive !:mime application/x-archive >20 string U with MIPS Ucode members >21 string L with MIPSEL members >21 string B with MIPSEB members >19 string L and an EL hash table >19 string B and an EB hash table >22 string X -- out of date # # BSD/SVR2-and-later portable archive formats. # # Update: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/AR # Reference: https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/ # Note: Mach-O universal binary in ./cafebabe is dependent # TODO: unify current ar archive, MIPS archive, Debian package # distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR; # *.ar packages from *.a libraries. handle empty archive 0 string =!\n current ar archive # print first and possibly second ar_name[16] for debugging purpose #>8 string x \b, 1st "%.16s" #>68 string x \b, 2nd "%.16s" !:mime application/x-archive # a in most case for libraries; lib for Microsoft libraries; ar else cases !:ext a/lib/ar >8 string __.SYMDEF random library # first member with long marked name __.SYMDEF SORTED implies BSD library >68 string __.SYMDEF\ SORTED random library # Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf # "archive file" entry moved from ./hp # LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture # LST header a_magic 0619h~relocatable library >68 belong 0x020b0619 - PA-RISC1.0 relocatable library >68 belong 0x02100619 - PA-RISC1.1 relocatable library >68 belong 0x02110619 - PA-RISC1.2 relocatable library >68 belong 0x02140619 - PA-RISC2.0 relocatable library #EOF for common ar archives # # "Thin" archive, as can be produced by GNU ar. # 0 string =!\n thin archive with >68 belong 0 no symbol entries >68 belong 1 %d symbol entry >68 belong >1 %d symbol entries 0 search/1 -h- Software Tools format archive text # ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com) # # The first byte is the magic (0x1a), byte 2 is the compression type for # the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS # filename of the first file (null terminated). Since some types collide # we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%), # 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo. 0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW !:mime application/x-arc 0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed !:mime application/x-arc 0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed !:mime application/x-arc 0 lelong&0x8080ffff 0x0000031a ARC archive data, packed !:mime application/x-arc 0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed !:mime application/x-arc 0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched !:mime application/x-arc # [JW] stuff taken from idarc, obviously ARC successors: 0 lelong&0x8080ffff 0x00000a1a PAK archive data !:mime application/x-arc 0 lelong&0x8080ffff 0x0000141a ARC+ archive data !:mime application/x-arc 0 lelong&0x8080ffff 0x0000481a HYP archive data !:mime application/x-arc # Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk) # I can't create either SPARK or ArcFS archives so I have not tested this stuff # [GRR: the original entries collide with ARC, above; replaced with combined # version (not tested)] #0 byte 0x1a RISC OS archive (spark format) 0 string \032archive RISC OS archive (ArcFS format) 0 string Archive\000 RISC OS archive (ArcFS format) # All these were taken from idarc, many could not be verified. Unfortunately, # there were many low-quality sigs, i.e. easy to trigger false positives. # Please notify me of any real-world fishy/ambiguous signatures and I'll try # to get my hands on the actual archiver and see if I find something better. [JW] # probably many can be enhanced by finding some 0-byte or control char near the start # idarc calls this Crush/Uncompressed... *shrug* 0 string CRUSH Crush archive data # Squeeze It (.sqz) 0 string HLSQZ Squeeze It archive data # SQWEZ 0 string SQWEZ SQWEZ archive data # HPack (.hpk) 0 string HPAK HPack archive data # HAP 0 string \x91\x33HF HAP archive data # MD/MDCD 0 string MDmd MDCD archive data # LIM 0 string LIM\x1a LIM archive data # SAR 3 string LH5 SAR archive data # BSArc/BS2 0 string \212\3SB\020\0 BSArc/BS2 archive data # Bethesda Softworks Archive (Oblivion) 0 string BSA\0 BSArc archive data >4 lelong x version %d # MAR 2 string =-ah MAR archive data # ACB #0 belong&0x00f800ff 0x00800000 ACB archive data # CPZ # TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data # JRC 0 string JRchive JRC archive data # Quantum 0 string DS\0 Quantum archive data # ReSOF 0 string PK\3\6 ReSOF archive data # QuArk 0 string 7\4 QuArk archive data # YAC 14 string YC YAC archive data # X1 0 string X1 X1 archive data 0 string XhDr X1 archive data # CDC Codec (.dqt) 0 belong&0xffffe000 0x76ff2000 CDC Codec archive data # AMGC 0 string \xad6" AMGC archive data # NuLIB 0 string N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data # PakLeo 0 string LEOLZW PAKLeo archive data # ChArc 0 string SChF ChArc archive data # PSA 0 string PSA PSA archive data # CrossePAC 0 string DSIGDCC CrossePAC archive data # Freeze 0 string \x1f\x9f\x4a\x10\x0a Freeze archive data # KBoom 0 string \xc2\xa8MP\xc2\xa8 KBoom archive data # NSQ, must go after CDC Codec 0 string \x76\xff NSQ archive data # DPA 0 string Dirk\ Paehl DPA archive data # BA # TODO: idarc says "bytes 0-2 == bytes 3-5" # TTComp # URL: http://fileformats.archiveteam.org/wiki/TTComp_archive # Update: Joerg Jenderek # GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others 0 string \0\6 # look for first keyword of Panorama database *.pan >12 search/261 DESIGN # skip keyword with low entropy >12 default x # skip DOS 2.0 backup id file, sequence 6 with many nils like BACKUPID_xx6.@@@ handled by ./msdos >>8 quad !0 >>>0 use ttcomp # variant ASCII, 4K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? 0 string \1\6 # TODO: # skip VAX-order 68k Blit mpx/mux executable (strength=50) handled by ./blit !:strength -2 >0 use ttcomp 0 string \0\5 # skip some DOS 2.0 backup id file, sequence 5 with many nils like BACKUPID_075.@@@ handled by ./msdos >8 quad !0 >>0 use ttcomp 0 string \1\5 # TODO: # variant ASCII, 2K dictionary (strength=48=50-2). With strength=49 wrong order! WHY? # skip ctab data (strength=50) handled by ./ibm6000 # skip locale data table (strength=50) handled by ./digital !:strength -2 >0 use ttcomp 0 string \0\4 # skip many Maple help database *.hdb with version tag handled by ./maple >1028 string !version # skip veclib maple.hdb by looking for Mable keyword >>4 search/1091 Maple\040 #>4 search/34090 Maple\040 >>4 default x # skip DOS 2.0-3.2 backed up sequence 4 with many nils like LOTUS5.RAR handled by ./msdos # skip xBASE Compound Index file *.CDX with many nils >>>0x54 quad !0 >>>>0 use ttcomp 0 string \1\4 # TODO: # skip shared library (strength=50) handled by ./ibm6000 !:strength -2 # skip Commodore PET BASIC programs (Mastermind.prg) with last 3 nil bytes (\0~end of line followed by 0000h line offset) #>-4 ubelong x LAST_BYTES=%8.8x >-4 ubelong&0x00FFffFF !0 >>0 use ttcomp # display information of TTComp archive 0 name ttcomp # (version 5.25) labeled the entry as "TTComp archive data" >0 ubyte x TTComp archive data !:mime application/x-compress-ttcomp # PBACKSCR.PI1 !:ext $xe/$ts/pi1/__d # compression type: 0~binary compression 1~ASCII compression >0 ubyte 0 \b, binary >0 ubyte 1 \b, ASCII # size of the dictionary: 4~1024 bytes 5~2048 bytes 6~4096 bytes >1 ubyte 4 \b, 1K >1 ubyte 5 \b, 2K >1 ubyte 6 \b, 4K >1 ubyte x dictionary # https://mark0.net/forum/index.php?topic=848 # last 3 bytes probably have only 8 possible bit sequences # xxxxxxxx 0000000x 11111111 ____FFh # xxxxxxxx 10000000 01111111 __807Fh # 0xxxxxxx 11000000 00111111 __C03Fh # 00xxxxxx 11100000 00011111 __E01Fh # 000xxxxx 11110000 00001111 __F00Fh # 0000xxxx 11111000 00000111 __F807h # 00000xxx 11111100 00000011 __FC03h # 000000xx 11111110 00000001 __FE01h # but for quickgif.__d 0A7DD4h #>-3 ubyte x \b, last 3 bytes 0x%2.2x #>-2 ubeshort x \b%4.4x # From: Joerg Jenderek # URL: https://en.wikipedia.org/wiki/Disk_Copy # reference: http://nulib.com/library/FTN.e00005.htm 0x52 ubeshort 0x0100 # test for disk image size equal or above 400k >0x40 ubelong >409599 # test also for disk image size equal or below 1440k to skip # windows7en.mbr UNICODE.DAT #>>0x40 ubelong <1474561 # test now for "low" disk image size equal or below 64 MiB to skip # windows7en.mbr (B441BBAAh) UNICODE.DAT (0400AF05h) >>0x40 ubelong <0x04000001 # To skip Flags$StringJoiner.class with size 00106A61h test also for valid disk image sizes # 00064000 for 400k GCR disks dc42-400k-gcr.trid.xml # 000c8000 for 800k GCR disks dc42-800k-gcr.trid.xml # 000b4000 for 720k MFM disks dc42-720k-mfm.trid.xml # 00168000 for 1440k MFM disks dc42-1440k-mfm.trid.xml # https://lisaem.sunder.net/LisaProjectDocs.txt # 00500000 05M available # 00A00000 10M available # 01800000 24M possible # 02000000 32M uncertain # 04000000 64M uncertain >>>0x40 ubelong&0xf8003fFF 0 # skip samples with invalid disk name length like: # 181 (biosmd80.rom) 202 (Flags$StringJoiner.class) 90 (UNICODE.DAT) >>>>0x0 ubyte <64 >>>>>0 use dc42-floppy # display information of Apple DiskCopy 4.2 floppy image 0 name dc42-floppy # disk name length; maximal 63 #>0 ubyte x DISK NAME LENGTH %u # ASCII image pascal (maximal 63 bytes) name padded with NULs like: # "Microsoft Mail" "Disquette 2" "IIe Installer Disk" # "-lisaem.sunder.net hd-" (dc42-lisaem.trid.xml) "-not a Macintosh disk" (dc42-nonmac.trid.xml) >00 pstring/B x Apple DiskCopy 4.2 image %s #!:mime application/octet-stream !:mime application/x-dc42-floppy-image !:apple dCpydImg # probably also img like: "Utilitaires 2.img" "Installation 7.img" !:ext image/dc42/img # data size in bytes like: 409600 737280 819200 1474560 >0x40 ubelong x \b, %u bytes # for debugging purpose size in hexadecimal #>0x40 ubelong x (%#8.8x) # tag size in bytes like: 0 (often) 2580h (PUID fmt/625) 4B00h (Microsoft Mail.image) >0x44 ubelong >0 \b, %#x tag size # data checksum #>0x48 ubelong x \b, %#x checksum # tag checksum #>0x4c ubelong x \b, %#x tag checksum # disk encoding like: 0 1 2 3 (PUID: fmt/625) >0x50 ubyte 0 \b, GCR CLV ssdd (400k) >0x50 ubyte 1 \b, GCR CLV dsdd (800k) >0x50 ubyte 2 \b, MFM CAV dsdd (720k) >0x50 ubyte 3 \b, MFM CAV dshd (1440k) >0x50 ubyte >3 \b, %#x encoding # format byte like: 12h (Lisa 400K) 24h (400K Macintosh) 96h (800K Apple II disk) # 2 (Mac 400k "Disquette Installation 13.image") # 22h (double-sided MFM or Mac 800k "Disco 12.image" "IIe Installer Disk.image") >0x51 ubyte x \b, %#x format #>0x54 ubequad x \b, data %#16.16llx # ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation? 0 string ESP ESP archive data # ZPack 0 string \1ZPK\1 ZPack archive data # Sky 0 string \xbc\x40 Sky archive data # UFA 0 string UFA UFA archive data # Dry 0 string =-H2O DRY archive data # FoxSQZ 0 string FOXSQZ FoxSQZ archive data # AR7 0 string ,AR7 AR7 archive data # PPMZ 0 string PPMZ PPMZ archive data # MS Compress # Update: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression # Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html # Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z 4 string \x88\xf0\x27 # KWAJ variant >0 string KWAJ MS Compress archive data, KWAJ variant !:mime application/x-ms-compress-kwaj # extension not working in version 5.32 # magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?' # file: line 284: Bad magic entry ' ??_' !:ext ??_ # compression method (0-4) >>8 uleshort x \b, %u method # offset of compressed data >>10 uleshort x \b, %#x offset #>>(10.s) uleshort x #>>>&-6 string x \b, TEST extension %-.3s # header flags to mark header extensions >>12 uleshort >0 \b, %#x flags # 4 bytes: decompressed length of file >>12 uleshort &0x01 >>>14 ulelong x \b, original size: %u bytes # 2 bytes: unknown purpose # 2 bytes: length of unknown data + mentioned bytes # 1-9 bytes: null-terminated file name # 1-4 bytes: null-terminated file extension >>12 uleshort &0x08 >>>12 uleshort ^0x01 >>>>12 uleshort ^0x02 >>>>>12 uleshort ^0x04 >>>>>>12 uleshort ^0x10 >>>>>>>14 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>14 string x \b, %-.8s >>>>>>>>&1 string x \b.%-.3s >>>>>12 uleshort &0x04 >>>>>>12 uleshort ^0x10 >>>>>>>(14.s) uleshort x >>>>>>>>&14 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>(14.s) uleshort x >>>>>>>>&14 string x \b, %-.8s >>>>>>>>>&1 string x \b.%-.3s >>>>12 uleshort &0x02 >>>>>12 uleshort ^0x04 >>>>>>12 uleshort ^0x10 >>>>>>>16 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>16 string x \b, %-.8s >>>>>>>>&1 string x \b.%-.3s >>>>>12 uleshort &0x04 >>>>>>12 uleshort ^0x10 >>>>>>>(16.s) uleshort x >>>>>>>>&16 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>(16.s) uleshort x >>>>>>>&16 string x %-.8s >>>>>>>>&1 string x \b.%-.3s >>>12 uleshort &0x01 >>>>12 uleshort ^0x02 >>>>>12 uleshort ^0x04 >>>>>>12 uleshort ^0x10 >>>>>>>18 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>18 string x \b, %-.8s >>>>>>>>&1 string x \b.%-.3s >>>>>12 uleshort &0x04 >>>>>>12 uleshort ^0x10 >>>>>>>(18.s) uleshort x >>>>>>>>&18 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>(18.s) uleshort x >>>>>>>>&18 string x \b, %-.8s >>>>>>>>>&1 string x \b.%-.3s >>>>12 uleshort &0x02 >>>>>12 uleshort ^0x04 >>>>>>12 uleshort ^0x10 >>>>>>>20 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>20 string x \b, %-.8s >>>>>>>>&1 string x \b.%-.3s >>>>>12 uleshort &0x04 >>>>>>12 uleshort ^0x10 >>>>>>>(20.s) uleshort x >>>>>>>>&20 string x \b, %-.8s >>>>>>12 uleshort &0x10 >>>>>>>(20.s) uleshort x >>>>>>>>&20 string x \b, %-.8s >>>>>>>>>&1 string x \b.%-.3s # 2 bytes: length of data + mentioned bytes # # SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ # URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression # Reference: http://www.cabextract.org.uk/libmspack/doc/szdd_kwaj_format.html # http://mark0.net/download/triddefs_xml.7z/defs/s/szdd.trid.xml # Note: called "Microsoft SZDD compressed (Haruhiko Okumura's LZSS)" by TrID # verfied by 7-Zip `7z l -tMsLZ -slt *.??_` as MsLZ # `deark -l -m lzss_oku -d2 setup-1-41.bin` as "LZSS.C by Haruhiko Okumura" >0 string SZDD MS Compress archive data, SZDD variant # 2nd part of signature #>>4 ubelong 0x88F02733 \b, SIGNATURE OK !:mime application/x-ms-compress-szdd !:ext ??_ # The character missing from the end of the filename (0=unknown) >>9 string >\0 \b, %-.1s is last character of original name # https://www.betaarchive.com/forum/viewtopic.php?t=26161 # Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e >>8 string !A \b, %-.1s method >>10 ulelong >0 \b, original size: %u bytes # Summary: InstallShield archive with SZDD compressed # URL: https://community.flexera.com/t5/InstallShield-Knowledge-Base/InstallShield-Redistributable-Files/ta-p/5647 # From: Joerg Jenderek 1 search/48/bs SZDD\x88\xF0\x27\x33 InstallShield archive #!:mime application/octet-stream !:mime application/x-installshield-compress-szdd !:ext ibt # name of compressed archive member like: setup.dl_ _setup7int.dl_ _setup2k.dl_ _igdi.dl_ cabinet.dl_ >0 string x %s # name of uncompressed archive member like: setup.dll _Setup.dll IGdi.dll CABINET.DLL >>&1 string x (%s) # probably version like: 9.0.0.333 9.1.0.429 11.50.0.42618 >>>&1 string x \b, version %s # SZDD member length like: 168048 169333 181842 >>>>&1 string x \b, %s bytes # MS Compress archive data #>&0 string SZDD \b, SIGNATURE FOUND >&0 indirect x # QBasic SZDD variant 3 string \x88\xf0\x27 >0 string SZ\x20 MS Compress archive data, QBasic variant !:mime application/x-ms-compress-sz !:ext ??$ >>8 ulelong >0 \b, original size: %u bytes # Summary: lzss compressed/EDI Pack # From: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/EDI_Install_packed_file # Note: called "EDI Install LZS compressed data" by TrID and verified by # command like `deark -l -m edi_pack -d2 BOOK01A.IC$` as "EDI Pack LZSS1" 0 string EDILZSS >7 string 1 # look for point character before orginal file name extension >>8 search/9/b . # check suffix of possible orginal file anme #>>>&0 ubelong x SUFFIX=%8.8x # samples without valid character after point in original file name field like: FENNEL.LZS PLANTAIN.LZS >>>&0 ubyte <0x20 >>>>0 use edi-lzs # samples with valid character after point in original file name field >>>&0 ubyte >0x1F # check 2nd charcter of suffix #>>>>&0 ubyte x 2ND_SUFFIX=%x # sample with one valid character after point followed by \0 in original file name field like: SPELMATE.H$ >>>>&0 ubyte =0 >>>>>0 use edi-pack >>>>&0 ubyte >0x1F # check 3rd charcter of suffix #>>>>>&0 ubyte x 3RD_SUFFIX=%x # no sample with 2 valid characters after point followed by \0 in original file name field >>>>>&0 ubyte =0 >>>>>>0 use edi-pack # samples with valid 3rd character after point in original file name field >>>>>&0 ubyte >0x1F # sample with 3 valid character after point followed by \0 in original file name field like: BOOK01A.IC$ CTL3D.DL$ >>>>>>&0 ubyte =0 >>>>>>>0 use edi-pack # sample with 3 valid character after point followed by no \0 in original file name field like: HERBTEXT.LZS >>>>>>&0 ubyte !0 >>>>>>>0 use edi-lzs # no sample with invalid 3rd character after point in original file name field >>>>>&0 default x >>>>>>0 use edi-lzs # sample with invalid 2nd character after point in original file name field like: LACERATE.LZS SPLINTER.LZS >>>>&0 default x >>>>>0 use edi-lzs # sample without point character in original file name field like GUNSHOT.LZS >>8 default x >>>0 use edi-lzs # Reference: http://mark0.net/download/triddefs_xml.7z/defs/e/edi-lzss2.trid.xml # Note: called "EDI Install Pro LZSS2 compressed data" by TrID and verified by # command like `deark -l -m edi_pack -d2 4WAY.WA$` as "EDI Pack LZSS2" >7 string 2 EDI LZSS2 packed #!:mime application/octet-stream !:mime application/x-edi-pack-lzss # the name of a compressed file often ends in character '$' or '_' !:ext ??$/??_ # original filename, NUL-terminated, padded to 13 bytes like: mci.vbx 4way.wav skymap.exe cmdialog.vbx >>8 string x "%-0.13s" # original file size, as a 4-byte integer. >>21 ulelong x \b, %u bytes # compressed data like: ff5249464606ec00 ff4d5aa601010000 >>>25 ubequad x \b, data %#16.16llx... 0 name edi-pack # Note: verified by command like `deark -l -d2 SPELMATE.H$` as "EDI Pack LZSS1" # original filename, NUL-terminated, padded to 13 bytes like: ctl3d.dll spelmate.h filemenu.rc owl.def index-it.exe # but not like \377Aloe.lzs\273 (HERBTEXT.LZS) >8 string x EDI LZSS packed "%-.13s" #!:mime application/octet-stream !:mime application/x-edi-pack-lzss # the name of a compressed file often ends in character '$' or '_' !:ext ??$/?$ # compressed data like: f7000001eff02020 ff4d5aa900020000 ff2f2a207370656c >21 ubequad x \b, data %#16.16llx... # URL: http://fileformats.archiveteam.org/wiki/EDI_LZSSLib # Note: verified partly by command like `deark -l -m edi_pack -d2 GUNSHOT.LZS` as "EDI LZSSLib" 0 name edi-lzs # Note: verified by command like `deark -l -d2 GUNSHOT.LZS` as "EDI LZSSLib" # no original filename looks like: \277BM\226.\0 \277BM.n\001 \277BM\226.\0 \277BM.g\001 \377Aloe.lzs\273 >8 string x EDI LZSSLib packed #!:mime application/octet-stream !:mime application/x-edi-pack-lzss # The name of a compressed file ends with LZS suffix !:ext lzs # compressed data like: bf424df6e10100f3 ff416c6f652e6c7a ff416c6f652e6c7a >8 ubequad x \b, data %#16.16llx... # Summary: CAZIP compressed file # From: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/CAZIP # Reference: http://mark0.net/download/triddefs_xml.7z/defs/c/caz.trid.xml # Note: Format is distinct from CAZIPXP compressed 0 string \x0D\x0A\x1ACAZIP CAZIP compressed file #!:mime application/octet-stream !:mime application/x-compress-cazip # like: BLINKER.WR_ CLIPDEFS._ CAOSETUP.EX_ CLIPPER.EX_ FILEIO.C_ !:ext ??_/?_/_ # Summary: FTCOMP compressed archive # From: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/FTCOMP # Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-ftcomp.trid.xml # Note: called by TrID "FTCOMP compressed archive" # extracted by `unpack seahelp.hl_` 24 string/b FTCOMP FTCOMP compressed archive #!:mime application/octet-stream !:mime application/x-compress-ftcomp !:ext ??_/??@/dll/drv/pk2/ # probably A596FDFF magic at the beginning >0 ubelong !0xA596FDFF \b, at beginning %#x # probably original file name with directory like: \OS2\unpack.exe \SYSTEM\8514.DRV MAHJONGG.EXE >41 string x "%s" # MP3 (archiver, not lossy audio compression) 0 string MP3\x1a MP3-Archiver archive data # ZET 0 string OZ\xc3\x9d ZET archive data # TSComp 0 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data # ARQ 0 string gW\4\1 ARQ archive data # Squash 3 string OctSqu Squash archive data # Terse 0 string \5\1\1\0 Terse archive data # UHarc 0 string UHA UHarc archive data # ABComp 0 string \2AB ABComp archive data 0 string \3AB2 ABComp archive data # CMP 0 string CO\0 CMP archive data # Splint 0 string \x93\xb9\x06 Splint archive data # InstallShield 0 string \x13\x5d\x65\x8c InstallShield Z archive Data # Gather 1 string GTH Gather archive data # BOA 0 string BOA BOA archive data # RAX 0 string ULEB\xa RAX archive data # Xtreme 0 string ULEB\0 Xtreme archive data # Pack Magic 0 string @\xc3\xa2\1\0 Pack Magic archive data # BTS 0 belong&0xfeffffff 0x1a034465 BTS archive data # ELI 5750 0 string Ora\ ELI 5750 archive data # QFC 0 string \x1aFC\x1a QFC archive data 0 string \x1aQF\x1a QFC archive data # PRO-PACK https://www.segaretro.org/Rob_Northen_compression 0 string RNC >3 byte 1 PRO-PACK archive data (compression 1) >3 byte 2 PRO-PACK archive data (compression 2) # 777 0 string 777 777 archive data # LZS221 0 string sTaC LZS221 archive data # HPA 0 string HPA HPA archive data # Arhangel 0 string LG Arhangel archive data # EXP1, uses bzip2 0 string 0123456789012345BZh EXP1 archive data # IMP 0 string IMP\xa IMP archive data # NRV 0 string \x00\x9E\x6E\x72\x76\xFF NRV archive data # Squish 0 string \x73\xb2\x90\xf4 Squish archive data # Par 0 string PHILIPP Par archive data 0 string PAR Par archive data # HIT 0 string UB HIT archive data # SBX 0 belong&0xfffff000 0x53423000 SBX archive data # NaShrink 0 string NSK NaShrink archive data # SAPCAR 0 string #\ CAR\ archive\ header SAPCAR archive data 0 string CAR\ 2.00 SAPCAR archive data 0 string CAR\ 2.01 SAPCAR archive data #!:mime application/octet-stream !:mime application/vnd.sar !:ext sar # Disintegrator 0 string DST Disintegrator archive data # ASD 0 string ASD ASD archive data # InstallShield CAB # Update: Joerg Jenderek at Nov 2021 # URL: https://en.wikipedia.org/wiki/InstallShield # Reference: https://github.com/twogood/unshield/blob/master/lib/cabfile.h # Note: Not compatible with Microsoft CAB files # http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield.trid.xml # CAB_SIGNATURE 0x28635349 0 string ISc( InstallShield #!:mime application/octet-stream !:mime application/x-installshield # http://mark0.net/download/triddefs_xml.7z/defs/a/ark-cab-ishield-hdr.trid.xml >16 ulelong !0 setup header # like: _SYS1.HDR _USER1.HDR data1.hdr !:ext hdr >16 ulelong =0 CAB # like: _SYS1.CAB _USER1.CAB DATA1.CAB data2.cab !:ext cab # https://github.com/twogood/unshield/blob/master/lib/helper.c # version like: 0x1005201 0x100600c 0x1007000 0x1009500 # 0x2000578 0x20005dc 0x2000640 0x40007d0 0x4000834 >4 ulelong x \b, version %#x # volume_info like: 0 >8 ulelong !0 \b, volume_info %#x # cab_descriptor_offset like: 0x200 >12 ulelong !0x200 \b, offset %#x #>0x200 ubequad x \b, at 0x200 %#16.16llx # cab_descriptor_size like: 0 (*.cab) BD5 C8B DA5 E2A E36 116C 251D 4DA9 56F0 5CC2 6E4B 777D 779E 1F7C2 >16 ulelong !0 \b, descriptor size %#x # TOP4 0 string T4\x1a TOP4 archive data # BatComp left out: sig looks like COM executable # so TODO: get real 4dos batcomp file and find sig # BlakHole 0 string BH\5\7 BlakHole archive data # BIX 0 string BIX0 BIX archive data # ChiefLZA 0 string ChfLZ ChiefLZA archive data # Blink 0 string Blink Blink archive data # Logitech Compress 0 string \xda\xfa Logitech Compress archive data # ARS-Sfx (FIXME: really a SFX? then goto COM/EXE) 1 string (C)\ STEPANYUK ARS-Sfx archive data # AKT/AKT32 0 string AKT32 AKT32 archive data 0 string AKT AKT archive data # NPack 0 string MSTSM NPack archive data # PFT 0 string \0\x50\0\x14 PFT archive data # SemOne 0 string SEM SemOne archive data # PPMD 0 string \x8f\xaf\xac\x84 PPMD archive data # FIZ 0 string FIZ FIZ archive data # MSXiE 0 belong&0xfffff0f0 0x4d530000 MSXiE archive data # DeepFreezer 0 belong&0xfffffff0 0x797a3030 DeepFreezer archive data # DC 0 string =8 pstring/h x "%s" # according to TrID the next 3 bytes are nil >5 ubyte !0 \b, at 5 %#x >6 ubyte !0 \b, at 6 %#x >7 ubyte !0 \b, at 7 %#x # the fourth byte with value 0 is probably a flag for "non solid" mode #>3 ubyte =0x00 \b, unsolid mode 0 string Ai\2\1 Ai32 archive data #!:mime application/octet-stream !:mime application/x-compress-ai !:ext ai # original file name >8 pstring/h x "%s" # the fourth byte with value 0x01 is probably a flag for "solid" mode; this is not the default >3 ubyte =0x01 \b, solid mode # SBC 0 string SBC SBC archive data # Ybs 0 string YBS Ybs archive data # DitPack 0 string \x9e\0\0 DitPack archive data # DMS 0 string DMS! DMS archive data # EPC 0 string \x8f\xaf\xac\x8c EPC archive data # VSARC 0 string VS\x1a VSARC archive data # PDZ 0 string PDZ PDZ archive data # ReDuq 0 string rdqx ReDuq archive data # GCA 0 string GCAX GCA archive data # PPMN 0 string pN PPMN archive data # WinImage 3 string WINIMAGE WinImage archive data # Compressia 0 string CMP0CMP Compressia archive data # UHBC 0 string UHB UHBC archive data # WinHKI 0 string \x61\x5C\x04\x05 WinHKI archive data # WWPack data file 0 string WWP WWPack archive data # BSN (BSA, PTS-DOS) 0 string \xffBSG BSN archive data 1 string \xffBSG BSN archive data 3 string \xffBSG BSN archive data 1 string \0\xae\2 BSN archive data 1 string \0\xae\3 BSN archive data 1 string \0\xae\7 BSN archive data # AIN 0 string \x33\x18 AIN archive data 0 string \x33\x17 AIN archive data # XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015 # SZip (TODO: doesn't catch all versions) 0 string SZ\x0a\4 SZip archive data # XPack DiskImage # *.XDI updated by Joerg Jenderek Sep 2015 # ftp://ftp.sac.sk/pub/sac/pack/0index.txt # GRR: this test is still too general as it catches also text files starting with jm 0 string jm # only found examples with this additional characteristic 2 bytes >2 string \x2\x4 Xpack DiskImage archive data #!:ext xdi # XPack Data # *.xpa updated by Joerg Jenderek Sep 2015 # ftp://ftp.elf.stuba.sk/pub/pc/pack/ 0 string xpa XPA !:ext xpa # XPA32 # ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip # created by XPA32.EXE version 1.0.2 for Windows >0 string xpa\0\1 \b32 archive data # created by XPACK.COM version 1.67m or 1.67r with short 0x1800 >3 ubeshort !0x0001 \bck archive data # XPack Single Data # changed by Joerg Jenderek Sep 2015 back to like in version 5.12 # letter 'I'+ acute accent is equivalent to \xcd 0 string \xcd\ jm Xpack single archive data #!:mime application/x-xpa-compressed !:ext xpa # TODO: missing due to unknown magic/magic at end of file: #DWC #ARG #ZAR #PC/3270 #InstallIt #RKive #RK #XPack Diskimage # These were inspired by idarc, but actually verified # Dzip archiver (.dz) # Update: Joerg Jenderek # URL: http://speeddemosarchive.com/dzip/ # reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c # GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt 0 string DZ # latest version is 2.9 dated 7 may 2003 >2 byte <4 Dzip archive data !:mime application/x-dzip !:ext dz >>2 byte x \b, version %i >>3 byte x \b.%i >>4 ulelong x \b, offset %#x >>8 ulelong x \b, %u files # ZZip archiver (.zz) 0 string ZZ\ \0\0 ZZip archive data 0 string ZZ0 ZZip archive data # PAQ archiver (.paq) 0 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data 0 string PAQ PAQ archive data >3 byte&0xf0 0x30 >>3 byte x (v%c) # JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP) # Update: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/JAR_(ARJ_Software) # reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jar.trid.xml # https://www.sac.sk/download/pack/jar102x.exe/TECHNOTE.DOC # Note: called "JAR compressed archive" by TrID 0xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data #!:mime application/octet-stream !:mime application/x-compress-j >0 ulelong x \b, CRC32 %#x # standard suffix is ".j"; for multi volumes following order j01 j02 ... j99 100 ... 990 !:ext j/j01/j02 # URL: http://fileformats.archiveteam.org/wiki/JARCS # reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-jarcs.trid.xml # Note: called "JARCS compressed archive" by TrID 0 string JARCS JAR (ARJ Software, Inc.) archive data #!:mime application/octet-stream !:mime application/x-compress-jar !:ext jar # ARJ archiver (jason@jarthur.Claremont.EDU) # URL: http://fileformats.archiveteam.org/wiki/ARJ # reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-arj.trid.xml # https://github.com/FarGroup/FarManager/ # blob/master/plugins/multiarc/arc.doc/arj.txt # Note: called "ARJ compressed archive" by TrID and # "ARJ File Format" by DROID via PUID fmt/610 # verified by `7z l -tarj PHRACK1.ARJ` and # `arj.exe l TEST-hk9.ARJ` 0 leshort 0xea60 # skip DROID fmt-610-signature-id-946.arj by check for valid file type of main header >0xA ubyte 2 >>0 use arj-archive 0 name arj-archive >0 leshort x ARJ archive !:mime application/x-arj # look for terminating 0-character of filename >0x26 search/1024 \0 # file name extension is normally .arj but not for parts of multi volume #>>&-5 string x extension %.4s >>&-5 string/c .arj data !:ext arj >>&-5 default x # for multi volume first name is archive.arj then following parts archive.a01 archive.a02 ... >>>8 byte &0x04 data !:ext a01/a02 # for SFX first name is archive.exe then following parts archive.e01 archive.e02 ... >>>8 byte ^0x04 data, SFX multi-volume !:ext e01/e02 # basic header size like: 0x002b 0x002c 0x04e0 0x04e3 0x04e7 #>2 uleshort x basic header size %#4.4x # next fragment content like: 0x0a200a003a8fc713 0x524a000010bb3471 0x524a0000c73c70f9 #>(2.s) ubequad x NEXT FRAGMENT CONTENT %#16.16llx # first_hdr_size; seems to be same as basic header size #>2 uleshort x 1st header size %#x # archiver version number like: 3 4 6 11 102 >5 byte x \b, v%d # minimum archiver version to extract like: 1 >6 ubyte !1 \b, minimum %u to extract # FOR DEBUGGING #>8 byte x \b, FLAGS %#x # GARBLED_FLAG1; garble with password; g switch >8 byte &0x01 \b, password-protected # encryption version: 0~old 1~old 2~new 3~reserved 4~40 bit key GOST >>0x20 ubyte x (v%u) #>8 byte &0x02 \b, secured # ANSIPAGE_FLAG; indicates ANSI codepage used by ARJ32; hy switch >8 byte &0x02 \b, ANSI codepage # VOLUME_FLAG indicates presence of succeeding volume; but apparently not for SFX >8 byte &0x04 \b, multi-volume #>8 byte &0x08 \b, file-offset # ARJPROT_FLAG; build with data protection record; hk switch >8 byte &0x08 \b, recoverable # arj protection factor; maximal 10; switch hky -> factor=y+1 >>0x22 byte x (factor %u) >8 byte &0x10 \b, slash-switched # BACKUP_FLAG; obsolete >8 byte &0x20 \b, backup # SECURED_FLAG; >8 byte &0x40 \b, secured, # ALTNAME_FLAG; indicates dual-name archive >8 byte &0x80 \b, dual-name # security version; 0~old 2~current >9 ubyte !0 >>9 ubyte !2 \b, security version %u # file type; 2 in main header; 0~binary 1~7-bitText 2~comment 3~directory 4~VolumeLabel 5=ChapterLabel >0xA ubyte !2 \b, file type %u # date+time when original archive was created in MS-DOS format via ./msdos >0xC ulelong x \b, created >0xC use dos-date # or date and time by new internal function #>0xE lemsdosdate x %s #>0xC lemsdostime x %s # FOR DEBUGGING #>0x12 uleshort x RAW DATE %#4.4x #>0x10 uleshort x RAW TIME %#4.4x # date+time when archive was last modified; sometimes nil or # maybe wrong like in HP4DRVR.ARJ #>0x10 ulelong >0 \b, modified #>>0x10 use dos-date # or date and time by new internal function #>>0x12 lemsdosdate x %s #>>0x10 lemsdostime x %s # archive size (currently used only for secured archives); MAYBE? #>0x14 ulelong !0 \b, file size %u # security envelope file position; MAYBE? #>0x18 ulelong !0 \b, at %#x security envelope # filespec position in filename; WHAT IS THAT? #>0x1C uleshort >0 \b, filespec position %#x # length in bytes of security envelope data like: 2CAh 301h 364h 471h >0x1E uleshort !0 \b, security envelope length %#x # last chapter like: 0 1 >0x21 ubyte !0 \b, last chapter %u # filename (null-terminated string); sometimes at 0x26 when 4 bytes for extra data >34 byte x \b, original name: # with extras data >34 byte <0x0B >>38 string x %s # without extras data >34 byte >0x0A >>34 string x %s # host OS: 0~MSDOS ... 11~WIN32 >7 byte 0 \b, os: MS-DOS >7 byte 1 \b, os: PRIMOS >7 byte 2 \b, os: Unix >7 byte 3 \b, os: Amiga >7 byte 4 \b, os: Macintosh >7 byte 5 \b, os: OS/2 >7 byte 6 \b, os: Apple ][ GS >7 byte 7 \b, os: Atari ST >7 byte 8 \b, os: NeXT >7 byte 9 \b, os: VAX/VMS >7 byte 10 \b, os: WIN95 >7 byte 11 \b, os: WIN32 # [JW] idarc says this is also possible 2 leshort 0xea60 ARJ archive data #2 leshort 0xea60 #>2 use arj-archive # HA archiver (Greg Roelofs, newt@uchicago.edu) # This is a really bad format. A file containing HAWAII will match this... #0 string HA HA archive data, #>2 leshort =1 1 file, #>2 leshort >1 %hu files, #>4 byte&0x0f =0 first is type CPY #>4 byte&0x0f =1 first is type ASC #>4 byte&0x0f =2 first is type HSC #>4 byte&0x0f =0x0e first is type DIR #>4 byte&0x0f =0x0f first is type SPECIAL # suggestion: at least identify small archives (<1024 files) 0 belong&0xffff00fc 0x48410000 HA archive data >2 leshort =1 1 file, >2 leshort >1 %u files, >4 byte&0x0f =0 first is type CPY >4 byte&0x0f =1 first is type ASC >4 byte&0x0f =2 first is type HSC >4 byte&0x0f =0x0e first is type DIR >4 byte&0x0f =0x0f first is type SPECIAL # HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz) 0 string HPAK HPACK archive data # JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net 0 string \351,\001JAM\ JAM archive, >7 string >\0 version %.4s >0x26 byte =0x27 - >>0x2b string >\0 label %.11s, >>0x27 lelong x serial %08x, >>0x36 string >\0 fstype %.8s # LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) # Update: Joerg Jenderek # URL: https://en.wikipedia.org/wiki/LHA_(file_format) # Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html # # check and display information of lharc (LHa,PMarc) file 0 name lharc-file # check 1st character of method id like -lz4- -lh5- or -pm2- >2 string - # check 5th character of method id >>6 string - # check header level 0 1 2 3 >>>20 ubyte <4 # check 2nd, 3th and 4th character of method id >>>>3 regex \^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1) \b !:mime application/x-lzh-compressed # creator type "LHA " !:apple ????LHA # display archive type name like "LHa/LZS archive data" or "LArc archive" >>>>>2 string -lz \b !:ext lzs # already known -lzs- -lz4- -lz5- with old names >>>>>>2 string -lzs LHa/LZS archive data >>>>>>3 regex \^lz[45] LHarc 1.x archive data # missing -lz?- with wikipedia names >>>>>>3 regex \^lz[2378] LArc archive # display archive type name like "LHa (2.x) archive data" >>>>>2 string -lh \b # already known -lh0- -lh1- -lh2- -lh3- -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names >>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data # LHice archiver use ".ICE" as name extension instead usual one ".lzh" # FOOBAR archiver use ".foo" as name extension instead usual one # "Florian Orjanov's and Olga Bachetska's ARchiver" not found at the moment >>>>>>>2 string -lh1 \b !:ext lha/lzh/ice >>>>>>3 regex \^lh[23d] LHa 2.x? archive data >>>>>>3 regex \^lh[7] LHa (2.x)/LHark archive data >>>>>>3 regex \^lh[456] LHa (2.x) archive data >>>>>>>2 string -lh5 \b # https://en.wikipedia.org/wiki/BIOS # Some mainboard BIOS like Award use LHa compression. So archives with unusual extension are found like # bios.rom , kd7_v14.bin, 1010.004, ... !:ext lha/lzh/rom/bin # missing -lh?- variants (Joe Jared) >>>>>>3 regex \^lh[89a-ce] LHa (Joe Jared) archive # UNLHA32 2.67a >>>>>>2 string -lhx LHa (UNLHA32) archive # lha archives with standard file name extensions ".lha" ".lzh" >>>>>>3 regex !\^(lh1|lh5) \b !:ext lha/lzh # this should not happen if all -lh variants are described >>>>>>2 default x LHa (unknown) archive #!:ext lha # PMarc >>>>>3 regex \^pm[012] PMarc archive data !:ext pma # append method id without leading and trailing minus character >>>>>3 string x [%3.3s] >>>>>>0 use lharc-header # # check and display information of lharc header 0 name lharc-header # header size 0x4 , 0x1b-0x61 >0 ubyte x # compressed data size != compressed file size #>7 ulelong x \b, data size %d # attribute: 0x2~?? 0x10~symlink|target 0x20~normal #>19 ubyte x \b, 19_%#x # level identifier 0 1 2 3 #>20 ubyte x \b, level %d # time stamp #>15 ubelong x DATE %#8.8x # OS ID for level 1 >20 ubyte 1 # 0x20 types find for *.rom files >>(21.b+24) ubyte <0x21 \b, %#x OS # ascii type like M for MSDOS >>(21.b+24) ubyte >0x20 \b, '%c' OS # OS ID for level 2 >20 ubyte 2 #>>23 ubyte x \b, OS ID %#x >>23 ubyte <0x21 \b, %#x OS >>23 ubyte >0x20 \b, '%c' OS # filename only for level 0 and 1 >20 ubyte <2 # length of filename >>21 ubyte >0 \b, with # filename >>>21 pstring x "%s" # #2 string -lh0- LHarc 1.x/ARX archive data [lh0] #!:mime application/x-lharc 2 string -lh0- >0 use lharc-file #2 string -lh1- LHarc 1.x/ARX archive data [lh1] #!:mime application/x-lharc 2 string -lh1- >0 use lharc-file # NEW -lz2- ... -lz8- 2 string -lz2- >0 use lharc-file 2 string -lz3- >0 use lharc-file 2 string -lz4- >0 use lharc-file 2 string -lz5- >0 use lharc-file 2 string -lz7- >0 use lharc-file 2 string -lz8- >0 use lharc-file # [never seen any but the last; -lh4- reported in comp.compression:] #2 string -lzs- LHa/LZS archive data [lzs] 2 string -lzs- >0 use lharc-file # According to wikipedia and others such a version does not exist #2 string -lh\40- LHa 2.x? archive data [lh ] #2 string -lhd- LHa 2.x? archive data [lhd] 2 string -lhd- >0 use lharc-file #2 string -lh2- LHa 2.x? archive data [lh2] 2 string -lh2- >0 use lharc-file #2 string -lh3- LHa 2.x? archive data [lh3] 2 string -lh3- >0 use lharc-file #2 string -lh4- LHa (2.x) archive data [lh4] 2 string -lh4- >0 use lharc-file #2 string -lh5- LHa (2.x) archive data [lh5] 2 string -lh5- >0 use lharc-file #2 string -lh6- LHa (2.x) archive data [lh6] 2 string -lh6- >0 use lharc-file #2 string -lh7- LHa (2.x)/LHark archive data [lh7] 2 string -lh7- # !:mime application/x-lha # >20 byte x - header level %d >0 use lharc-file # NEW -lh8- ... -lhe- , -lhx- 2 string -lh8- >0 use lharc-file 2 string -lh9- >0 use lharc-file 2 string -lha- >0 use lharc-file 2 string -lhb- >0 use lharc-file 2 string -lhc- >0 use lharc-file 2 string -lhe- >0 use lharc-file 2 string -lhx- >0 use lharc-file # taken from idarc [JW] 2 string -lZ PUT archive data # already done by LHarc magics # this should never happen if all sub types of LZS archive are identified #2 string -lz LZS archive data 2 string -sw1- Swag archive data 0 name rar-file-header >24 byte 15 \b, v1.5 >24 byte 20 \b, v2.0 >24 byte 29 \b, v4 >15 byte 0 \b, os: MS-DOS >15 byte 1 \b, os: OS/2 >15 byte 2 \b, os: Win32 >15 byte 3 \b, os: Unix >15 byte 4 \b, os: Mac OS >15 byte 5 \b, os: BeOS 0 name rar-archive-header >3 leshort&0x1ff >0 \b, flags: >>3 leshort &0x01 ArchiveVolume >>3 leshort &0x02 Commented >>3 leshort &0x04 Locked >>3 leshort &0x10 NewVolumeNaming >>3 leshort &0x08 Solid >>3 leshort &0x20 Authenticated >>3 leshort &0x40 RecoveryRecordPresent >>3 leshort &0x80 EncryptedBlockHeader >>3 leshort &0x100 FirstVolume # RAR (Roshal Archive) archive 0 string Rar!\x1a\7\0 RAR archive data !:mime application/x-rar !:ext rar/cbr # file header >(0xc.l+9) byte 0x74 >>(0xc.l+7) use rar-file-header # subblock seems to share information with file header >(0xc.l+9) byte 0x7a >>(0xc.l+7) use rar-file-header >9 byte 0x73 >>7 use rar-archive-header 0 string Rar!\x1a\7\1\0 RAR archive data, v5 !:mime application/x-rar !:ext rar # Very old RAR archive # https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf 0 string RE\x7e\x5e RAR archive data (26 uleshort 19 >>30 string AndroidManifest.xml Android package (APK), with AndroidManifest.xml !:mime application/vnd.android.package-archive !:ext apk >>>-22 string PK\005\006 >>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block # Starts with META-INF/com/android/build/gradle/app-metadata.properties >26 uleshort 57 >>30 string META-INF/com/android/build/gradle/ >>>&0 string app-metadata.properties Android package (APK), with gradle app-metadata.properties !:mime application/vnd.android.package-archive !:ext apk >>>>-22 string PK\005\006 >>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block # Starts with classes.dex (file name length = 11) >26 uleshort 11 >>30 string classes.dex Android package (APK), with classes.dex !:mime application/vnd.android.package-archive !:ext apk >>>-22 string PK\005\006 >>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block # Starts with META-INF/MANIFEST.MF (file name length = 20) # NB: checks for resources.arsc, classes.dex, etc. as well to avoid matching JAR files >26 uleshort 20 >>30 string META-INF/MANIFEST.MF # Contains resources.arsc (near the end, in the central directory) >>>-512 search resources.arsc Android package (APK), with MANIFEST.MF and resources.arsc !:mime application/vnd.android.package-archive !:ext apk >>>>-22 string PK\005\006 >>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block >>>-512 default x # Contains classes.dex (near the end, in the central directory) >>>>-512 search classes.dex Android package (APK), with MANIFEST.MF and classes.dex !:mime application/vnd.android.package-archive !:ext apk >>>>>-22 string PK\005\006 >>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block >>>>-512 default x # Contains lib/armeabi (near the end, in the central directory) >>>>>-512 search lib/armeabi Android package (APK), with MANIFEST.MF and armeabi lib !:mime application/vnd.android.package-archive !:ext apk >>>>>>-22 string PK\005\006 >>>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block >>>>>-512 default x # Contains drawables (near the end, in the central directory) >>>>>>-512 search res/drawable Android package (APK), with MANIFEST.MF and drawables !:mime application/vnd.android.package-archive !:ext apk >>>>>>>-22 string PK\005\006 >>>>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block # It may or may not be an APK file, but it's definitely a Java JAR file >>>>>>-512 default x Java archive data (JAR) !:mime application/java-archive !:ext jar # Starts with zipflinger virtual entry (28 + 104 = 132 bytes) # See https://github.com/obfusk/apksigcopier/blob/666f5b7/apksigcopier/__init__.py#L230 >4 string \x00\x00\x00\x00\x00\x00 >>&0 string \x21\x08\x21\x02 >>>&0 string \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 >>>>&0 string \x00\x00 Android package (APK), with zipflinger virtual entry !:mime application/vnd.android.package-archive !:ext apk >>>>>-22 string PK\005\006 >>>>>>(-6.l-16) string APK\x20Sig\x20Block\x2042 \b, with APK Signing Block # APK Signing Block >0 default x >>-22 string PK\005\006 >>>(-6.l-16) string APK\x20Sig\x20Block\x2042 Android package (APK), with APK Signing Block !:mime application/vnd.android.package-archive !:ext apk # Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 0 string PK\005\006 Zip archive data (empty) !:mime application/zip !:ext zip/cbz !:strength +1 0 string PK\003\004 !:strength +1 # Specialised zip formats which start with a member named 'mimetype' # (stored uncompressed, with no 'extra field') containing the file's MIME type. # Check for have 8-byte name, 0-byte extra field, name "mimetype", and # contents starting with "application/": >26 string \x8\0\0\0mimetypeapplication/ # KOffice / OpenOffice & StarOffice / OpenDocument formats # From: Abel Cheung # KOffice (1.2 or above) formats # (mimetype contains "application/vnd.kde.") >>50 string vnd.kde. KOffice (>=1.2) >>>58 string karbon Karbon document >>>58 string kchart KChart document >>>58 string kformula KFormula document >>>58 string kivio Kivio document >>>58 string kontour Kontour document >>>58 string kpresenter KPresenter document >>>58 string kspread KSpread document >>>58 string kword KWord document # OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7) # (mimetype contains "application/vnd.sun.xml.") # URL: https://en.wikipedia.org/wiki/OpenOffice.org_XML # reference: http://fileformats.archiveteam.org/wiki/OpenOffice.org_XML >>50 string vnd.sun.xml. OpenOffice.org 1.x >>>62 string writer Writer >>>>68 byte !0x2e document !:mime application/vnd.sun.xml.writer !:ext sxw >>>>68 string .template template !:mime application/vnd.sun.xml.writer.template !:ext stw >>>>68 string .web Web template !:mime application/vnd.sun.xml.writer.web !:ext stw >>>>68 string .global global document !:mime application/vnd.sun.xml.writer.global !:ext sxg >>>62 string calc Calc >>>>66 byte !0x2e spreadsheet !:mime application/vnd.sun.xml.calc !:ext sxc >>>>66 string .template template !:mime application/vnd.sun.xml.calc.template !:ext stc >>>62 string draw Draw >>>>66 byte !0x2e document !:mime application/vnd.sun.xml.draw !:ext sxd >>>>66 string .template template !:mime application/vnd.sun.xml.draw.template !:ext std >>>62 string impress Impress >>>>69 byte !0x2e presentation !:mime application/vnd.sun.xml.impress !:ext sxi >>>>69 string .template template !:mime application/vnd.sun.xml.impress.template !:ext sti >>>62 string math Math document !:mime application/vnd.sun.xml.math !:ext sxm >>>62 string base Database file !:mime application/vnd.sun.xml.base !:ext sdb # URL: https://wiki.openoffice.org/wiki/Documentation/DevGuide/Extensions/File_Format # From: Joerg Jenderek # Note: only few OXT samples are detected here by mimetype member # is used by OpenOffice and LibreOffice and probably also NeoOffice # verified by `unzip -Zv *.oxt` or `7z l -slt *.oxt` >>50 string vnd.openofficeorg. OpenOffice >>>68 string extension \b/LibreOffice Extension # http://extension.nirsoft.net/oxt !:mime application/vnd.openofficeorg.extension # like: Gallery-Puzzle.2.1.0.1.oxt !:ext oxt # OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) # URL: http://fileformats.archiveteam.org/wiki/OpenDocument # https://lists.oasis-open.org/archives/office/200505/msg00006.html # (mimetype contains "application/vnd.oasis.opendocument.") >>50 string vnd.oasis.opendocument. OpenDocument >>>73 string text >>>>77 byte !0x2d Text !:mime application/vnd.oasis.opendocument.text !:ext odt >>>>77 string -template Text Template !:mime application/vnd.oasis.opendocument.text-template !:ext ott >>>>77 string -web HTML Document Template !:mime application/vnd.oasis.opendocument.text-web !:ext oth >>>>77 string -master >>>>>84 byte !0x2d Master Document !:mime application/vnd.oasis.opendocument.text-master !:ext odm >>>>>84 string -template Master Template !:mime application/vnd.oasis.opendocument.text-master-template !:ext otm >>>73 string graphics >>>>81 byte !0x2d Drawing !:mime application/vnd.oasis.opendocument.graphics !:ext odg >>>>81 string -template Drawing Template !:mime application/vnd.oasis.opendocument.graphics-template !:ext otg >>>73 string presentation >>>>85 byte !0x2d Presentation !:mime application/vnd.oasis.opendocument.presentation !:ext odp >>>>85 string -template Presentation Template !:mime application/vnd.oasis.opendocument.presentation-template !:ext otp >>>73 string spreadsheet >>>>84 byte !0x2d Spreadsheet !:mime application/vnd.oasis.opendocument.spreadsheet !:ext ods >>>>84 string -template Spreadsheet Template !:mime application/vnd.oasis.opendocument.spreadsheet-template !:ext ots >>>73 string chart >>>>78 byte !0x2d Chart !:mime application/vnd.oasis.opendocument.chart !:ext odc >>>>78 string -template Chart Template !:mime application/vnd.oasis.opendocument.chart-template !:ext otc >>>73 string formula >>>>80 byte !0x2d Formula !:mime application/vnd.oasis.opendocument.formula !:ext odf >>>>80 string -template Formula Template !:mime application/vnd.oasis.opendocument.formula-template !:ext otf # https://www.loc.gov/preservation/digital/formats/fdd/fdd000441.shtml >>>73 string database Database !:mime application/vnd.oasis.opendocument.database !:ext odb # Valid for LibreOffice Base 6.0.1.1 at least >>>73 string base Database # https://bugs.documentfoundation.org/show_bug.cgi?id=45854 !:mime application/vnd.oasis.opendocument.base !:ext odb >>>73 string image >>>>78 byte !0x2d Image !:mime application/vnd.oasis.opendocument.image !:ext odi >>>>78 string -template Image Template !:mime application/vnd.oasis.opendocument.image-template !:ext oti # EPUB (OEBPS) books using OCF (OEBPS Container Format) # https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4. # From: Ralf Brown >>50 string epub+zip EPUB document !:mime application/epub+zip # From: Hajin Jang # hwpx (OWPML) document format follows OCF specification. # Hangul Word Processor 2010+ supports HWPX format. # URL: https://www.hancom.com/etc/hwpDownload.do # https://standard.go.kr/KSCI/standardIntro/getStandardSearchView.do?menuId=503&topMenuId=502&ksNo=KSX6101 # https://e-ks.kr/streamdocs/view/sd;streamdocsId=72059197557727331 >>50 string hwp+zip Hancom HWP (Hangul Word Processor) file, HWPX !:mime application/x-hwp+zip !:ext hwpx # From: Joerg Jenderek # URL: http://en.wikipedia.org/wiki/CorelDRAW # NOTE: version; til 2 WL-based; from 3 til 13 by ./riff; from 14 zip based >>50 string x-vnd.corel. Corel >>>62 string draw.document+zip Draw drawing, version 14-16 !:mime application/x-vnd.corel.draw.document+zip !:ext cdr >>>62 string draw.template+zip Draw template, version 14-16 !:mime application/x-vnd.corel.draw.template+zip !:ext cdrt >>>62 string zcf.draw.document+zip Draw drawing, version 17-22 !:mime application/x-vnd.corel.zcf.draw.document+zip !:ext cdr >>>62 string zcf.draw.template+zip Draw template, version 17-22 !:mime application/x-vnd.corel.zcf.draw.template+zip !:ext cdt/cdrt # URL: http://product.corel.com/help/CorelDRAW/540240626/Main/EN/Doc/CorelDRAW-Other-file-formats.html >>>62 string zcf.pattern+zip Draw pattern, version 22 !:mime application/x-vnd.corel.zcf.pattern+zip !:ext pat # URL: https://en.wikipedia.org/wiki/Corel_Designer # Reference: http://fileformats.archiveteam.org/wiki/Corel_Designer # Note: called by TrID "Corel DESIGN graphics" >>>62 string designer.document+zip DESIGNER graphics, version 14-16 !:mime application/x-vnd.corel.designer.document+zip !:ext des >>>62 string zcf.designer.document+zip DESIGNER graphics, version 17-21 !:mime application/x-vnd.corel.zcf.designer.document+zip !:ext des # URL: http://product.corel.com/help/CorelDRAW/540223850/Main/EN/Documentation/ # CorelDRAW-Corel-Symbol-Library-CSL.html >>>62 string symbol.library+zip Symbol Library, version 6-16.3 !:mime application/x-vnd.corel.symbol.library+zip !:ext csl >>>62 string zcf.symbol.library+zip Symbol Library, version 17-22 !:mime application/x-vnd.corel.zcf.symbol.library+zip !:ext csl # Catch other ZIP-with-mimetype formats # In a ZIP file, the bytes immediately after a member's contents are # always "PK". The 2 regex rules here print the "mimetype" member's # contents up to the first 'P'. Luckily, most MIME types don't contain # any capital 'P's. This is a kludge. # (mimetype contains "application/") >>50 default x Zip data >>>38 regex [!-OQ-~]+ (MIME type "%s"?) !:mime application/zip # (mimetype contents other than "application/*") >26 string \x8\0\0\0mimetype >>38 string !application/ >>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) !:mime application/zip # Java Jar files (see also APK files above) >(26.s+30) leshort 0xcafe Java archive data (JAR) !:mime application/java-archive !:ext jar # iOS App >(26.s+30) leshort !0xcafe >>26 string !\x8\0\0\0mimetype >>>30 string Payload/ >>>>38 search/64 .app/ iOS App !:mime application/x-ios-app # Dup, see above. #>30 search/100/b application/epub+zip EPUB document #!:mime application/epub+zip # Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) # Next line excludes specialized formats: >(26.s+30) leshort !0xcafe >>30 search/100/b !application/epub+zip >>>26 string !\x8\0\0\0mimetype Zip archive data !:mime application/zip >>>>4 beshort x \b, at least >>>>4 use zipversion >>>>4 beshort x to extract >>>>8 beshort x \b, compression method= >>>>8 use zipcompression >>>>0x161 string WINZIP \b, WinZIP self-extracting # StarView Metafile # From Pierre Ducroquet 0 string VCLMTF StarView MetaFile >6 beshort x \b, version %d >8 belong x \b, size %d # Zoo archiver # Update: Joerg Jenderek # URL: https://en.wikipedia.org/wiki/Zoo_(file_format) # http://fileformats.archiveteam.org/wiki/Zoo # Reference: http://mark0.net/download/triddefs_xml.7z/defs/a/ark-zoo-strict.trid.xml # http://distcache.freebsd.org/ports-distfiles/zoo-2.10pl1.tar.gz/zoo.h # Note: called "ZOO compressed archive (strict)" by TrID and "ZOO Compressed Archive" by DROID via PUID x-fmt/269 # verified by command like `deark -m zoo -l -d2 WHRCGA.ZOO` 20 lelong 0xfdc4a7dc # skip DROID x-fmt-269-signature-id-621.zoo by looking for valid major version to manipulate archive >32 byte >0 Zoo archive data !:mime application/x-zoo # bak is extension of backup-ed zoo !:ext zoo/bak # version in text form like: 1.50 2.00 2.10 >>4 byte >48 \b, v%c. >>>6 byte >47 \b%c >>>>7 byte >47 \b%c # ZOO files typically start with "ZOO ?.?? Archive.", followed by the bytes 0x1a 0x0 0x0; not used by Zoo and they may be anything >>8 string !\040Archive.\032 \b, at 8 >>>8 string x text "%0.10s" # major_ver.minor_ver; minimum version needed to manipulate archive like: 1.0 2.0 >>32 byte >0 \b, modify: v%d >>>33 byte x \b.%d+ # major_ver.minor_ver; minimum version needed to extract after modify like in old versions >>(24.l+28) ubyte x \b, extract: v%u >>(24.l+29) ubyte x \b.%u+ # with zoo 2.00 additional fields have been added in the archive header >>32 byte >1 # type; type of archive header like: 1 2 >>>34 ubyte !1 \b, header type %u # acmt_pos; position of archive comment like: 6258 30599 61369 149501 >>>35 lelong >0 \b, at %d # acmt_len; length of archive comment like: 258 >>>>39 uleshort x %u bytes comment #>>>>(35.l) ubequad x COMMENT=%16.16llx # 1st character of comment maybe is CarriageReturn (0x0d) >>>>(35.l) ubyte <040 # 2nd character of comment maybe is LineFeed (0x0a) >>>>>(35.l+1) ubyte <040 # comment string after CRLF like "Anonymous ftp site garbo.uwasa.fi 128.214.87.1 moderated by" >>>>>>(35.l+2) string x %s # next character of remaining comment maybe is CarriageReturn (0x0d) >>>>>>>&0 ubyte <040 >>>>>>>>&0 ubyte <040 # 2nd comment part like: Timo Salmi ts@chyde.uwasa.fi PC directories and uploads\015\012Harri Valkama hv@chyde.uwasa.fi PC, Mac, Unix files, and upload >>>>>>>>>&0 string >037 %s # vdata; archive-level versioning byte like: 1 3 >>>41 ubyte !1 \b, vdata %#x # zoo_start; pointer to 1st entry header >>24 lelong x \b; at %u # zoo_minus; zoo_start -1 for consistency checking #>>28 lelong x \b, zoo_minus %#x # zoo_tag; tag for check #>>(24.l+0) ulelong !0xfdc4a7dc \b, zoo_tag=%8.8x # type; type of directory entry like: 1 2 >>(24.l+4) ubyte !2 type=%u # packing_method; 0~no packing 1~normal LZW 2~lzh >>(24.l+5) ubyte x method= >>>(24.l+5) ubyte 0 \bnot-compressed >>>(24.l+5) ubyte 1 \blzd >>>(24.l+5) ubyte 2 \blzh # next; position of next directory entry >>(24.l+6) ulelong x \b, next entry at %u # offset; position of file data for this entry #>>(24.l+10) ulelong x \b, data at %u # file_crc; CRC-16 of file data >>(24.l+18) uleshort x \b, CRC %#4.4x # comment; zero if none or points to entry comment like ADD9h (WHRCGA.ZOO) >>(24.l+32) lelong >0 \b, at %#x # cmt_size; if not 0 for none then length of entry comment like: 46 >>>(24.l+36) uleshort >0 %u bytes comment # entry comment itself like: "CGA .GL file showing menu input from keyboard" >>>>(&-6.l) string x "%s" # org_size; original size of file >>(24.l+20) ulelong x \b, size %u # size_now; compressed size of file >>(24.l+24) ulelong x (%u compressed) # major_ver.minor_ver; minimum version needed to extract already done # deleted; will be 1 if deleted, 0 if not >>(24.l+30) ubyte =1 \b, deleted # struc; file structure if any; WHAT IS THAT? >>(24.l+31) ubyte !0 \b, structured # fname[13]; short/DOS file name like 12345678.012 >>(24.l+38) string x \b, %0.13s # for directory entry type 2 with variable part >>(24.l+4) ubyte =2 # var_dir_len; length of variable part of dir entry >>>(24.l+51) uleshort >0 #>>>(24.l+51) uleshort >0 \b, variable part length %u # namlen; length of long filename #>>>>(24.l+56) ubyte x \b, namlen %u # dirlen; length of directory name #>>>>(24.l+57) ubyte x \b, dirlen %u # if file length positive then show long file name >>>>(24.l+56) ubyte >0 # lfname[256]; long file name \0-terminated >>>>>(24.l+58) string x "%s" # if directory length positive then jump before file name field and then jump this addtional length plus 2 (\0-terminator + dirlen field) to following directory name >>>>(24.l+57) ubyte >0 >>>>>(24.l+55) ubyte x # dirname[256]; directory name \0-terminated >>>>>>&(&0.b+2) string x in "%s" # dir_crc; CRC of directory entry #>>>(24.l+54) uleshort x \b, entry CRC %#4.4x # tz; timezone where file was archived; 7Fh~unknown 4~1.00hoursWestOfUTC 12 16 20~5.00hoursWestOfUTC -107~26.75hoursEastOfUTC -4~1.00hoursEastOfUTC >>>(24.l+53) byte !0x7f \b, time zone %d/4 # date; last mod file date in DOS format >>>(24.l+14) lemsdosdate x \b, modified %s # time; last mod file time in DOS format >>>(24.l+16) lemsdostime x %s # Shell archives 10 string #\ This\ is\ a\ shell\ archive shell archive text !:mime application/octet-stream # # LBR. NB: May conflict with the questionable # "binary Computer Graphics Metafile" format. # 0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data # # PMA (CP/M derivative of LHA) # Update: Joerg Jenderek # URL: https://en.wikipedia.org/wiki/LHA_(file_format) # #2 string -pm0- PMarc archive data [pm0] 2 string -pm0- >0 use lharc-file #2 string -pm1- PMarc archive data [pm1] 2 string -pm1- >0 use lharc-file #2 string -pm2- PMarc archive data [pm2] 2 string -pm2- >0 use lharc-file 2 string -pms- PMarc SFX archive (CP/M, DOS) #!:mime application/x-foobar-exec !:ext com 5 string -pc1- PopCom compressed executable (CP/M) #!:mime application/x- #!:ext com # From Rafael Laboissiere # The Project Revision Control System (see # http://prcs.sourceforge.net) generates a packaged project # file which is recognized by the following entry: 0 leshort 0xeb81 PRCS packaged project # Microsoft cabinets # by David Necas (Yeti) #0 string MSCF\0\0\0\0 Microsoft cabinet file data, #>25 byte x v%d #>24 byte x \b.%d # MPi: All CABs have version 1.3, so this is pointless. # Better magic in debian-additions. # GTKtalog catalogs # by David Necas (Yeti) 4 string gtktalog\ GTKtalog catalog data, >13 string 3 version 3 >>14 beshort 0x677a (gzipped) >>14 beshort !0x677a (not gzipped) >13 string >3 version %s ############################################################################ # Parity archive reconstruction file, the 'par' file format now used on Usenet. 0 string PAR\0 PARity archive data >48 leshort =0 - Index file >48 leshort >0 - file number %d # Felix von Leitner 0 string d8:announce BitTorrent file !:mime application/x-bittorrent !:ext torrent # Durval Menezes, 0 string d13:announce-list BitTorrent file !:mime application/x-bittorrent !:ext torrent 0 string d7:comment BitTorrent file !:mime application/x-bittorrent !:ext torrent 0 string d4:info BitTorrent file !:mime application/x-bittorrent !:ext torrent # Atari MSA archive - Teemu Hukkanen # URL: http://fileformats.archiveteam.org/wiki/MSA_(Magic_Shadow_Archiver) # Reference: http://info-coach.fr/atari/documents/_mydoc/FD_Image_File_Format.pdf # http://mark0.net/download/triddefs_xml.7z/defs/m/msa.trid.xml # Update: Joerg Jenderek # Note: called by TrID "Atari MSA Disk Image" and verified by # command like `deark -l -m msa -d2 PDATS578.msa` as " Atari ST floppy disk image" # GRR: line below is too general as it matches setup.skin 0 beshort 0x0e0f # skip foo setup.skin with unrealistic high number 52255 of sides by check for valid "low" value >4 ubeshort <2 Atari MSA archive data #!:mime application/octet-stream !:mime application/x-atari-msa !:ext msa # sectors per track like: 9 10 >>2 beshort x \b, %d sectors per track # sides (0 or 1; add 1 to this to get correct number of sides) >>4 beshort 0 \b, 1 sided >>4 beshort 1 \b, 2 sided # starting track like: 0 >>6 beshort x \b, starting track: %d # ending track like: 39 79 80 81 >>8 beshort x \b, ending track: %d # tracks content #>>10 ubequad x \b, track content %#16.16llx # Alternate ZIP string (amc@arwen.cs.berkeley.edu) 0 string PK00PK\003\004 Zip archive data !:mime application/zip !:ext zip/cbz # Recognize ZIP archives with prepended data by end-of-central-directory record # https://en.wikipedia.org/wiki/ZIP_(file_format)#End_of_central_directory_record_(EOCD) # by Michal Gorny -2 uleshort 0 >&-22 string PK\005\006 # without #! >>0 string !#! Zip archive, with extra data prepended !:mime application/zip !:ext zip/cbz # with #! >>0 string/w #!\ a >>>&-1 string/T x %s script executable (Zip archive) # ACE archive (from http://www.wotsit.org/download.asp?f=ace) # by Stefan `Sec` Zehl 7 string **ACE** ACE archive data !:mime application/x-ace-compressed !:ext ace >15 byte >0 version %d >16 byte =0x00 \b, from MS-DOS >16 byte =0x01 \b, from OS/2 >16 byte =0x02 \b, from Win/32 >16 byte =0x03 \b, from Unix >16 byte =0x04 \b, from MacOS >16 byte =0x05 \b, from WinNT >16 byte =0x06 \b, from Primos >16 byte =0x07 \b, from AppleGS >16 byte =0x08 \b, from Atari >16 byte =0x09 \b, from Vax/VMS >16 byte =0x0A \b, from Amiga >16 byte =0x0B \b, from Next >14 byte x \b, version %d to extract >5 leshort &0x0080 \b, multiple volumes, >>17 byte x \b (part %d), >5 leshort &0x0002 \b, contains comment >5 leshort &0x0200 \b, sfx >5 leshort &0x0400 \b, small dictionary >5 leshort &0x0800 \b, multi-volume >5 leshort &0x1000 \b, contains AV-String >>30 string \x16*UNREGISTERED\x20VERSION* (unregistered) >5 leshort &0x2000 \b, with recovery record >5 leshort &0x4000 \b, locked >5 leshort &0x8000 \b, solid # Date in MS-DOS format (whatever that is) #>18 lelong x Created on # sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann # 0x1A string sfArk sfArk compressed Soundfont >0x15 string 2 >>0x1 string >\0 Version %s >>0x2A string >\0 : %s # DR-DOS 7.03 Packed File *.??_ # Reference: http://www.antonis.de/dos/dos-tuts/mpdostip/html/nwdostip.htm # Note: unpacked by PNUNPACK.EXE 0 string Packed\ File\ # by looking for Control-Z skip ASCII text starting with Packed File >0x18 ubyte 0x1a Personal NetWare Packed File !:mime application/x-novell-compress !:ext ??_ >>12 string x \b, was "%.12s" # 1 or 2 #>>0x19 ubyte x \b, at 0x19 %u >>0x1b ulelong x with %u bytes # EET archive # From: Tilman Sauerbeck 0 belong 0x1ee7ff00 EET archive !:mime application/x-eet # rzip archives 0 string RZIP rzip compressed data >4 byte x - version %d >5 byte x \b.%d >6 belong x (%d bytes) # From: Joerg Jenderek # URL: https://help.foxitsoftware.com/kb/install-fzip-file.php # reference: http://mark0.net/download/triddefs_xml.7z/ # defs/f/fzip.trid.xml # Note: unknown compression; No "PK" zip magic; normally in directory like # "%APPDATA%\Foxit Software\Addon\Foxit Reader\Install" 0 ubequad 0x2506781901010000 Foxit add-on/update !:mime application/x-fzip !:ext fzip # From: "Robert Dale" 0 belong 123 dar archive, >4 belong x label "%.8x >>8 belong x %.8x >>>12 beshort x %.4x" >14 byte 0x54 end slice >14 beshort 0x4e4e multi-part >14 beshort 0x4e53 multi-part, with -S # Symbian installation files # https://www.thouky.co.uk/software/psifs/sis.html # http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf 8 lelong 0x10000419 Symbian installation file !:mime application/vnd.symbian.install >4 lelong 0x1000006D (EPOC release 3/4/5) >4 lelong 0x10003A12 (EPOC release 6) 0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x) !:mime x-epoc/x-sisx-app # From "Nelson A. de Oliveira" 0 string MPQ\032 MoPaQ (MPQ) archive # From: "Nelson A. de Oliveira" # .kgb 0 string KGB_arch KGB Archiver file >10 string x with compression level %.1s # xar (eXtensible ARchiver) archive # URL: https://en.wikipedia.org/wiki/Xar_(archiver) # xar archive format: https://code.google.com/p/xar/ # From: "David Remahl" # Update: Joerg Jenderek # TODO: lzma compression; X509Data for pkg and xip # Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or # 7z t -txar Xcode_10.2_beta_4.xip` 0 string xar! xar archive !:mime application/x-xar # pkg for Mac OSX installer package like FullBundleUpdate.pkg # xip for signed Apple software like Xcode_10.2_beta_4.xip !:ext xar/pkg/xip # always 28 in older archives >4 ubeshort >28 \b, header size %u # currently there exit only version 1 since about 2014 >6 ubeshort >1 version %u, >8 ubequad x compressed TOC: %llu, #>16 ubequad x uncompressed TOC: %llu, # cksum_alg 0-2 in older and also 3-4 in newer >24 belong 0 no checksum >24 belong 1 SHA-1 checksum >24 belong 2 MD5 checksum >24 belong 3 SHA-256 checksum >24 belong 4 SHA-512 checksum >24 belong >4 unknown %#x checksum #>24 belong >4 checksum # For no compression jump 0 bytes >24 belong 0 >>0 ubyte x # jump more bytes forward by header size >>>&(4.S) ubyte x # jump more bytes forward by compressed table of contents size #>>>>&(8.Q) ubequad x \b, heap data %#llx >>>>&(8.Q) ubyte x # look for data by ./compress after message with 1 space at end >>>>>&-3 indirect x \b, contains # For SHA-1 jump 20 minus 2 bytes >24 belong 1 >>18 ubyte x # jump more bytes forward by header size >>>&(4.S) ubyte x # jump more bytes forward by compressed table of contents size >>>>&(8.Q) ubyte x # data compressed by gzip, bzip, lzma or none >>>>>&-1 indirect x \b, contains # For SHA-256 jump 32 minus 2 bytes >24 belong 3 >>30 ubyte x # jump more bytes forward by header size >>>&(4.S) ubyte x # jump more bytes forward by compressed table of contents size >>>>&(8.Q) ubyte x >>>>>&-1 indirect x \b, contains # For SHA-512 jump 64 minus 2 bytes >24 belong 4 >>62 ubyte x # jump more bytes forward by header size >>>&(4.S) ubyte x # jump more bytes forward by compressed table of contents size >>>>&(8.Q) ubyte x >>>>>&-1 indirect x \b, contains # Type: Parity Archive # From: Daniel van Eeden 0 string PAR2 Parity Archive Volume Set # Bacula volume format. (Volumes always start with a block header.) # URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html # From: Adam Buchbinder 12 string BB02 Bacula volume >20 bedate x \b, started %s # ePub is XHTML + XML inside a ZIP archive. The first member of the # archive must be an uncompressed file called 'mimetype' with contents # 'application/epub+zip' # From: "Michael Gorny" # ZPAQ: http://mattmahoney.net/dc/zpaq.html 0 string zPQ ZPAQ stream >3 byte x \b, level %d # From: Barry Carter # https://encode.ru/threads/456-zpaq-updates/page32 0 string 7kSt ZPAQ file # BBeB ebook, unencrypted (LRF format) # URL: https://www.sven.de/librie/Librie/LrfFormat # From: Adam Buchbinder 0 string L\0R\0F\0\0\0 BBeB ebook data, unencrypted >8 beshort x \b, version %d >36 byte 1 \b, front-to-back >36 byte 16 \b, back-to-front >42 beshort x \b, (%dx, >44 beshort x %d) # Symantec GHOST image by Joerg Jenderek at May 2014 # https://us.norton.com/ghost/ # https://www.garykessler.net/library/file_sigs.html 0 ubelong&0xFFFFf7f0 0xFEEF0100 Norton GHost image # *.GHO >2 ubyte&0x08 0x00 \b, first file # *.GHS or *.[0-9] with cns program option >2 ubyte&0x08 0x08 \b, split file # part of split index interesting for *.ghs >>4 ubyte x id=%#x # compression tag minus one equals numeric compression command line switch z[1-9] >3 ubyte 0 \b, no compression >3 ubyte 2 \b, fast compression (Z1) >3 ubyte 3 \b, medium compression (Z2) >3 ubyte >3 >>3 ubyte <11 \b, compression (Z%d-1) >2 ubyte&0x08 0x00 # ~ 30 byte password field only for *.gho >>12 ubequad !0 \b, password protected >>44 ubyte !1 # 1~Image All, sector-by-sector only for *.gho >>>10 ubyte 1 \b, sector copy # 1~Image Boot track only for *.gho >>>43 ubyte 1 \b, boot track # 1~Image Disc only for *.gho implies Image Boot track and sector copy >>44 ubyte 1 \b, disc sector copy # optional image description only *.gho >>0xff string >\0 "%-.254s" # look for DOS sector end sequence >0xE08 search/7776 \x55\xAA >>&-512 indirect x \b; contains # Google Chrome extensions # https://developer.chrome.com/extensions/crx # https://developer.chrome.com/extensions/hosting 0 string Cr24 Google Chrome extension !:mime application/x-chrome-extension >4 ulong x \b, version %u # SeqBox - Sequenced container # ext: sbx, seqbox # Marco Pontello marcopon@gmail.com # reference: https://github.com/MarcoPon/SeqBox 0 string SBx SeqBox, >3 byte x version %d # LyNX archive # Update: Joerg Jenderek # URL: http://fileformats.archiveteam.org/wiki/Lynx_archive # Reference: http://ist.uwaterloo.ca/~schepers/formats/LNX.TXT # http://mark0.net/download/triddefs_xml.7z/defs/a/ark-lnx.trid.xml # Note: called "Lynx archive" by TrID and "Commodore C64 BASIC program" with "POKE 53280" by ./c64 # TODO: merge and unify with Commodore C64 BASIC program 56 string USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE LyNX archive # display "Lynx archive" (strength=330) before Commodore C64 BASIC program (strength=50) handled by ./c64 #!:strength +0 #!:mime application/octet-stream !:mime application/x-commodore-lnx !:ext lnx # afterwards look for BASIC tokenized GOTO (89h) 10, line terminator \0, end of programm tag \0\0 and CarriageReturn >86 search/10 \x8910\0\0\0\r \b, # for DEBUGGING #>>&0 string x STRING="%s" # number in ASCII of directory blocks with spaces on both sides like: 1 2 3 5 >>&0 regex [0-9]{1,5} %s directory blocks # signature like: "*LYNX XII BY WILL CORLEY" " LYNX IX BY WILL CORLEY" "*LYNX BY CBMCONVERT 2.0*" >>>&2 regex [^\r]{1,24} \b, signature "%s" # number of files in ASCII surrounded by spaces and delimited by CR like: 2 3 6 13 69 144 (maximum?) >>>>&1 regex [0-9]{1,3} \b, %s files # From: Joerg Jenderek # URL: https://www.acronis.com/ # Reference: https://en.wikipedia.org/wiki/TIB_(file_format) # Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110 0 ubequad 0xce24b9a220000000 Acronis True Image backup !:mime application/x-acronis-tib !:ext tib # 01000000 #>20 ubelong x \b, at 20 %#x # 20000000 #>28 ubelong x \b, at 28 %#x # strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0" # ??? # strings like "\Device\0000011e" "\Device\0000015a" #>0 search/0x6852300/cs \\Device\\ #>>&-1 pstring x \b, %s # "\Device\HarddiskVolume30" "\Device\HarddiskVolume39" #>>>&1 search/180/cs \\Device\\ #>>>>&-1 pstring x \b, %s #>>>>>&0 search/29/cs \0\0\xc8\0 # disk label #>>>>>>&10 lestring16 x \b, disk label %11.11s #>>>>>>&9 plestring16 x \b, disk label "%11.11s" #>>>>>>&10 ubequad x %16.16llx # Gentoo XPAK binary package # by Michal Gorny # https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5 -4 string STOP >-16 string XPAKSTOP Gentoo binary package (XPAK) !:mime application/vnd.gentoo.xpak # From: Joerg Jenderek # URL: https://kodi.wiki/view/TexturePacker # Reference: https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz # /xbmc-Krypton/xbmc/guilib/XBTF.h # /xbmc-Krypton/xbmc/guilib/XBTF.cpp 0 string XBTF # skip ASCII text by looking for terminating \0 of path >264 ubyte 0 XBMC texture package !:mime application/x-xbmc-xbt !:ext xbt # XBTF_VERSION 2 >>4 string !2 \b, version %-.1s # nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp >>5 ulelong x \b, %u file # plural s >>5 ulelong >1 \bs # path[CXBTFFile[MaximumPathLength=256] >>9 string x \b, 1st %s # ALZIP archive # by Hyungjun Park , Hajin Jang # http://kippler.com/win/unalz/ # https://salsa.debian.org/l10n-korean-team/unalz 0 string ALZ\001 ALZ archive data !:ext alz # https://cf-aldn.altools.co.kr/setup/EGG_Specification.zip 0 string EGGA EGG archive data, !:ext egg >5 byte x version %u >4 byte x \b.%u >>0x0E ulelong =0x08E28222 >>0x0E ulelong =0x24F5A262 \b, split >>0x0E ulelong =0x24E5A060 \b, solid >>0x0E default x \b, unknown # PAQ9A archive # URL: http://mattmahoney.net/dc/#paq9a # Note: Line 1186 of paq9a.cpp gives the magic bytes 0 string pQ9\001 PAQ9A archive # From wof (wof@stachelkaktus.net) 0 string Unison\ archive\ format Unison archive format # https://ankiweb.net 30 string collection.anki2 Anki APKG file #!:ext .apkg # Synology archive (DiskStation Manager 7.0+) # From: Alexandre Iooss # Note: These archives are signed and encrypted. 0 ulelong&0xFFFFFF00 0xEFBEAD00 # MessagePack header (fixarray of 5 elements starting with a bin of 32 bytes) >8 ulelong&0x00FFFFFF 0x20C495 Synology archive !:ext spk # Extract some properties from MessagePack third item >>43 search/0x10000 package= >>>&0 string x \b, package %s >>43 search/0x10000 arch= >>>&0 string x %s >>43 search/0x10000 version= >>>&0 string x %s >>43 search/0x10000 create_time= >>>&0 string x \b, created on %s # MonoGame/XNA processed assets archive # From: Alexandre Iooss # URL: https://github.com/MonoGame/MonoGame/blob/v3.8.1/MonoGame.Framework/Content/ContentManager.cs 0 string XNB # XNB must be version 4 or 5 >4 byte <6 >>4 byte >3 # Size must be positive >>>6 lelong >0 MonoGame/XNA processed assets !:ext xnb >>>>3 string =w \b, for Windows >>>>3 string =x \b, for Xbox360 >>>>3 string =i \b, for iOS >>>>3 string =a \b, for Android >>>>3 string =d \b, for DesktopGL >>>>3 string =X \b, for MacOSX >>>>3 string =W \b, for WindowsStoreApp >>>>3 string =n \b, for NativeClient >>>>3 string =M \b, for WindowsPhone8 >>>>3 string =r \b, for RaspberryPi >>>>3 string =P \b, for PlayStation4 >>>>3 string =5 \b, for PlayStation5 >>>>3 string =O \b, for XboxOne >>>>3 string =S \b, for Nintendo Switch >>>>3 string =G \b, for Google Stadia >>>>3 string =b \b, for WebAssembly and Bridge.NET >>>>3 string =m \b, for WindowsPhone7.0 (XNA) >>>>3 string =p \b, for PlayStationMobile >>>>3 string =v \b, for PSVita >>>>3 string =g \b, for Windows (OpenGL) >>>>3 string =l \b, for Linux >>>>4 byte x \b, version %d >>>>5 byte &0x80 \b, LZX compressed >>>>>10 lelong x \b, decompressed size: %d bytes >>>>5 byte &0x40 \b, LZ4 compressed >>>>>10 lelong x \b, decompressed size: %d bytes # Electron ASAR archive # From: Alexandre Iooss # URL: https://github.com/electron/asar 0 ulelong 4 # Match JSON header start and end >16 string {"files":{" >>(12.l+12) string }}}} Electron ASAR archive !:ext asar >>>12 ulelong x \b, header length: %d bytes