compress 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. #------------------------------------------------------------------------------
  2. # $File: compress,v 1.91 2023/06/16 19:37:47 christos Exp $
  3. # compress: file(1) magic for pure-compression formats (no archives)
  4. #
  5. # compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
  6. #
  7. # Formats for various forms of compressed data
  8. # Formats for "compress" proper have been moved into "compress.c",
  9. # because it tries to uncompress it to figure out what's inside.
  10. # standard unix compress
  11. 0 string \037\235 compress'd data
  12. !:mime application/x-compress
  13. !:apple LZIVZIVU
  14. !:ext Z
  15. >2 byte&0x80 >0 block compressed
  16. >2 byte&0x1f x %d bits
  17. # gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
  18. # URL: https://en.wikipedia.org/wiki/Gzip
  19. # Reference: https://tools.ietf.org/html/rfc1952
  20. # Update: Joerg Jenderek, Apr 2019, Dec 2022
  21. # Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
  22. # * Original filename is only at offset 10 if "extra field" absent
  23. # * Produce shorter output - notably, only report compression methods
  24. # other than 8 ("deflate", the only method defined in RFC 1952).
  25. # Note: find defs -iname '*.trid.xml' -exec grep -q '<Bytes>1F8B08' {} \; -ls
  26. # TODO:
  27. # FBR Blueberry FlashBack screen Record https://www.flashbackrecorder.com/
  28. # KPR KOffice/Calligra KPresenter application/x-kpresenter
  29. # KPT KOffice/Calligra KPresenter template? application/x-kpresenter
  30. # SAV Diggles Saved Game File http://www.innonics.com
  31. # SAV FarCry (demo) saved game http://www.farcry-thegame.com
  32. # DAT ZOAGZIP game data format http://en.wikipedia.org/wiki/SD_Gundam_Capsule_Fighter
  33. 0 string \037\213
  34. # to display gzip compressed (strength=100=2*50) before other (strength=50)?
  35. #!:strength * 2
  36. # no FNAME and FCOMMENT bit implies no file name/comment. That means only binary
  37. >3 byte&0x18 =0
  38. # For binary gzipped no ASCII text should occur
  39. # mcd-monu-cad.trid.xml
  40. >>10 string MCD Monu-Cad Drawing, Component or Font
  41. #>>36 string Created\ with\ MONU-CAD
  42. #!:mime application/octet-stream
  43. # http://fileformats.archiveteam.org/wiki/Monu-CAD
  44. # http://www.monucad.com/downloads/FullDemo-2005.EXE
  45. # /HANDS96.MCC Component
  46. # /DEMO_DD01.MCD Drawing
  47. # /MCALF020.FNT Font
  48. !:ext mcc/mcd/fnt
  49. # http://www.generalcadd.com
  50. >>10 string GXD General CADD, Drawing or Component
  51. #!:mime application/octet-stream
  52. # /gxc/BUILDINGEDGE.gxc Component
  53. # /gxd/HOCKETT-STPAUL-WRHSE.gxd Drawing
  54. # /gxd/POWERLAND-MILL-ADD-11.gxd Drawing v9.1.06
  55. !:ext gxc/gxd
  56. #>>>13 ubyte 0 \b, version 0
  57. >>>13 string 09 \b, version 9
  58. # other gzipped binary like gzipped tar, VirtualBox extension package,...
  59. >>10 default x gzip compressed data
  60. !:mime application/gzip
  61. >>>0 use gzip-info
  62. # size of the original (uncompressed) input data modulo 2^32
  63. # TODO: check for GXD MCD cad the reported size
  64. >>>-4 ulelong x \b, original size modulo 2^32 %u
  65. # gzipped TAR or VirtualBox extension package
  66. #!:mime application/x-compressed-tar
  67. #!:mime application/x-virtualbox-vbox-extpack
  68. # https://www.w3.org/TR/SVG/mimereg.html
  69. #!:mime image/svg+xml-compressed
  70. # zlib.3.gz
  71. # microcode-20180312.tgz
  72. # tpz same as tgz
  73. # lua-md5_1.2-1_i386_i486.ipk https://en.wikipedia.org/wiki/Opkg
  74. # Oracle_VM_VirtualBox_Extension_Pack-5.0.12-104815.vbox-extpack
  75. # trees.blend http://fileformats.archiveteam.org/wiki/BLEND
  76. # 2020-07-19-Note-16-24.xoj https://xournal.sourceforge.net/manual.html
  77. # MYgnucash-gz.gnucash https://wiki.gnucash.org/wiki/GnuCash_XML_format
  78. # text-rotate.dia https://en.wikipedia.org/wiki/Dia_(software)
  79. # MYrdata.RData https://en.wikipedia.org/wiki/R_(programming_language)
  80. !:ext gz/tgz/tpz/ipk/vbox-extpack/svgz/blend/dia/gnucash/rdata/xoj
  81. # FNAME/FCOMMENT bit implies file name/comment as iso-8859-1 text
  82. >3 byte&0x18 >0 gzip compressed data
  83. !:mime application/gzip
  84. # gzipped tar, gzipped Abiword document
  85. #!:mime application/x-compressed-tar
  86. #!:mime application/x-abiword-compressed
  87. #!:mime image/image/svg+xml-compressed
  88. # kleopatra_splashscreen.svgz gzipped .svg
  89. # RSI-Mega-Demo_Disk1.adz gzipped .adf http://fileformats.archiveteam.org/wiki/ADF_(Amiga)
  90. # PostbankTest.kmy gzipped XML https://docs.kde.org/stable5/en/kmymoney/kmymoney/details.formats.compressed.html
  91. # Logo.xcfgz gzipped .xcf http://fileformats.archiveteam.org/wiki/XCF
  92. !:ext gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz
  93. >>0 use gzip-info
  94. # size of the original (uncompressed) input data modulo 2^32
  95. >>-4 ulelong x \b, original size modulo 2^32 %u
  96. # display information of gzip compressed files
  97. 0 name gzip-info
  98. #>2 byte x THIS iS GZIP
  99. >2 byte <8 \b, reserved method
  100. >2 byte >8 \b, unknown method
  101. >3 byte &0x01 \b, ASCII
  102. >3 byte &0x02 \b, has CRC
  103. >3 byte &0x04 \b, extra field
  104. >3 byte&0xC =0x08
  105. >>10 string x \b, was "%s"
  106. >3 byte &0x10 \b, has comment
  107. >3 byte &0x20 \b, encrypted
  108. >4 ledate >0 \b, last modified: %s
  109. >8 byte 2 \b, max compression
  110. >8 byte 4 \b, max speed
  111. >9 byte =0x00 \b, from FAT filesystem (MS-DOS, OS/2, NT)
  112. >9 byte =0x01 \b, from Amiga
  113. >9 byte =0x02 \b, from VMS
  114. >9 byte =0x03 \b, from Unix
  115. >9 byte =0x04 \b, from VM/CMS
  116. >9 byte =0x05 \b, from Atari
  117. >9 byte =0x06 \b, from HPFS filesystem (OS/2, NT)
  118. >9 byte =0x07 \b, from MacOS
  119. >9 byte =0x08 \b, from Z-System
  120. >9 byte =0x09 \b, from CP/M
  121. >9 byte =0x0A \b, from TOPS/20
  122. >9 byte =0x0B \b, from NTFS filesystem (NT)
  123. >9 byte =0x0C \b, from QDOS
  124. >9 byte =0x0D \b, from Acorn RISCOS
  125. # size of the original (uncompressed) input data modulo 2^32
  126. #>-4 ulelong x \b, original size modulo 2^32 %u
  127. #ERROR: line 114: non zero offset 1048572 at level 1
  128. # packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
  129. 0 string \037\036 packed data
  130. !:mime application/octet-stream
  131. !:ext z
  132. >2 belong >1 \b, %d characters originally
  133. >2 belong =1 \b, %d character originally
  134. #
  135. # This magic number is byte-order-independent.
  136. 0 short 0x1f1f old packed data
  137. !:mime application/octet-stream
  138. # XXX - why *two* entries for "compacted data", one of which is
  139. # byte-order independent, and one of which is byte-order dependent?
  140. #
  141. 0 short 0x1fff compacted data
  142. !:mime application/octet-stream
  143. # This string is valid for SunOS (BE) and a matching "short" is listed
  144. # in the Ultrix (LE) magic file.
  145. 0 string \377\037 compacted data
  146. !:mime application/octet-stream
  147. 0 short 0145405 huf output
  148. !:mime application/octet-stream
  149. # bzip2
  150. 0 string BZh bzip2 compressed data
  151. !:mime application/x-bzip2
  152. !:ext bz2
  153. >3 byte >47 \b, block size = %c00k
  154. # bzip a block-sorting file compressor
  155. # by Julian Seward <sewardj@cs.man.ac.uk> and others
  156. 0 string BZ0 bzip compressed data
  157. !:mime application/x-bzip
  158. >3 byte >47 \b, block size = %c00k
  159. # lzip
  160. 0 string LZIP lzip compressed data
  161. !:mime application/x-lzip
  162. !:ext lz
  163. >4 byte x \b, version: %d
  164. # squeeze and crunch
  165. # Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
  166. 0 beshort 0x76FF squeezed data,
  167. >4 string x original name %s
  168. 0 beshort 0x76FE crunched data,
  169. >2 string x original name %s
  170. 0 beshort 0x76FD LZH compressed data,
  171. >2 string x original name %s
  172. # Freeze
  173. 0 string \037\237 frozen file 2.1
  174. 0 string \037\236 frozen file 1.0 (or gzip 0.5)
  175. # SCO compress -H (LZH)
  176. 0 string \037\240 SCO compress -H (LZH) data
  177. # European GSM 06.10 is a provisional standard for full-rate speech
  178. # transcoding, prI-ETS 300 036, which uses RPE/LTP (residual pulse
  179. # excitation/long term prediction) coding at 13 kbit/s.
  180. #
  181. # There's only a magic nibble (4 bits); that nibble repeats every 33
  182. # bytes. This isn't suited for use, but maybe we can use it someday.
  183. #
  184. # This will cause very short GSM files to be declared as data and
  185. # mismatches to be declared as data too!
  186. #0 byte&0xF0 0xd0 data
  187. #>33 byte&0xF0 0xd0
  188. #>66 byte&0xF0 0xd0
  189. #>99 byte&0xF0 0xd0
  190. #>132 byte&0xF0 0xd0 GSM 06.10 compressed audio
  191. # lzop from <markus.oberhumer@jk.uni-linz.ac.at>
  192. 0 string \x89\x4c\x5a\x4f\x00\x0d\x0a\x1a\x0a lzop compressed data
  193. !:ext lzo
  194. >9 beshort <0x0940
  195. >>9 byte&0xf0 =0x00 - version 0.
  196. >>9 beshort&0x0fff x \b%03x,
  197. >>13 byte 1 LZO1X-1,
  198. >>13 byte 2 LZO1X-1(15),
  199. >>13 byte 3 LZO1X-999,
  200. ## >>22 bedate >0 last modified: %s,
  201. >>14 byte =0x00 os: MS-DOS
  202. >>14 byte =0x01 os: Amiga
  203. >>14 byte =0x02 os: VMS
  204. >>14 byte =0x03 os: Unix
  205. >>14 byte =0x05 os: Atari
  206. >>14 byte =0x06 os: OS/2
  207. >>14 byte =0x07 os: MacOS
  208. >>14 byte =0x0A os: Tops/20
  209. >>14 byte =0x0B os: WinNT
  210. >>14 byte =0x0E os: Win32
  211. >9 beshort >0x0939
  212. >>9 byte&0xf0 =0x00 - version 0.
  213. >>9 byte&0xf0 =0x10 - version 1.
  214. >>9 byte&0xf0 =0x20 - version 2.
  215. >>9 beshort&0x0fff x \b%03x,
  216. >>15 byte 1 LZO1X-1,
  217. >>15 byte 2 LZO1X-1(15),
  218. >>15 byte 3 LZO1X-999,
  219. ## >>25 bedate >0 last modified: %s,
  220. >>17 byte =0x00 os: MS-DOS
  221. >>17 byte =0x01 os: Amiga
  222. >>17 byte =0x02 os: VMS
  223. >>17 byte =0x03 os: Unix
  224. >>17 byte =0x05 os: Atari
  225. >>17 byte =0x06 os: OS/2
  226. >>17 byte =0x07 os: MacOS
  227. >>17 byte =0x0A os: Tops/20
  228. >>17 byte =0x0B os: WinNT
  229. >>17 byte =0x0E os: Win32
  230. # 4.3BSD-Quasijarus Strong Compression
  231. # https://minnie.tuhs.org/Quasijarus/compress.html
  232. 0 string \037\241 Quasijarus strong compressed data
  233. # From: Cory Dikkers <cdikkers@swbell.net>
  234. 0 string XPKF Amiga xpkf.library compressed data
  235. 0 string PP11 Power Packer 1.1 compressed data
  236. 0 string PP20 Power Packer 2.0 compressed data,
  237. >4 belong 0x09090909 fast compression
  238. >4 belong 0x090A0A0A mediocre compression
  239. >4 belong 0x090A0B0B good compression
  240. >4 belong 0x090A0C0C very good compression
  241. >4 belong 0x090A0C0D best compression
  242. # 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
  243. # https://www.7-zip.org or DOC/7zFormat.txt
  244. #
  245. 0 string 7z\274\257\047\034 7-zip archive data,
  246. >6 byte x version %d
  247. >7 byte x \b.%d
  248. !:mime application/x-7z-compressed
  249. !:ext 7z/cb7
  250. 0 name lzma LZMA compressed data,
  251. !:mime application/x-lzma
  252. !:ext lzma
  253. >5 lequad =0xffffffffffffffff streamed
  254. >5 lequad !0xffffffffffffffff non-streamed, size %lld
  255. # Type: LZMA
  256. 0 lelong&0xffffff =0x5d
  257. >12 leshort 0xff
  258. >>0 use lzma
  259. >12 leshort 0
  260. >>0 use lzma
  261. # http://tukaani.org/xz/xz-file-format.txt
  262. 0 ustring \xFD7zXZ\x00 XZ compressed data, checksum
  263. !:strength * 2
  264. !:mime application/x-xz
  265. !:ext xz
  266. >7 byte&0xf 0x0 NONE
  267. >7 byte&0xf 0x1 CRC32
  268. >7 byte&0xf 0x4 CRC64
  269. >7 byte&0xf 0xa SHA-256
  270. # https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
  271. 0 string LRZI LRZIP compressed data
  272. !:mime application/x-lrzip
  273. >4 byte x - version %d
  274. >5 byte x \b.%d
  275. >22 byte 1 \b, encrypted
  276. # https://fastcompression.blogspot.fi/2013/04/lz4-streaming-format-final.html
  277. 0 lelong 0x184d2204 LZ4 compressed data (v1.4+)
  278. !:mime application/x-lz4
  279. !:ext lz4
  280. # Added by osm0sis@xda-developers.com
  281. 0 lelong 0x184c2103 LZ4 compressed data (v1.0-v1.3)
  282. !:mime application/x-lz4
  283. 0 lelong 0x184c2102 LZ4 compressed data (v0.1-v0.9)
  284. !:mime application/x-lz4
  285. # Zstandard/LZ4 skippable frames
  286. # https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
  287. 0 lelong&0xFFFFFFF0 0x184D2A50
  288. >(4.l+8) indirect x
  289. # Zstandard Dictionary ID subroutine
  290. 0 name zstd-dictionary-id
  291. # Single Segment = True
  292. >0 byte &0x20 \b, Dictionary ID:
  293. >>0 byte&0x03 0 None
  294. >>0 byte&0x03 1
  295. >>>1 byte x %u
  296. >>0 byte&0x03 2
  297. >>>1 leshort x %u
  298. >>0 byte&0x03 3
  299. >>>1 lelong x %u
  300. # Single Segment = False
  301. >0 byte ^0x20 \b, Dictionary ID:
  302. >>0 byte&0x03 0 None
  303. >>0 byte&0x03 1
  304. >>>2 byte x %u
  305. >>0 byte&0x03 2
  306. >>>2 leshort x %u
  307. >>0 byte&0x03 3
  308. >>>2 lelong x %u
  309. # Zstandard compressed data
  310. # https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
  311. 0 lelong 0xFD2FB522 Zstandard compressed data (v0.2)
  312. !:mime application/zstd
  313. !:ext zst
  314. 0 lelong 0xFD2FB523 Zstandard compressed data (v0.3)
  315. !:mime application/zstd
  316. !:ext zst
  317. 0 lelong 0xFD2FB524 Zstandard compressed data (v0.4)
  318. !:mime application/zstd
  319. !:ext zst
  320. 0 lelong 0xFD2FB525 Zstandard compressed data (v0.5)
  321. !:mime application/zstd
  322. !:ext zst
  323. 0 lelong 0xFD2FB526 Zstandard compressed data (v0.6)
  324. !:mime application/zstd
  325. !:ext zst
  326. 0 lelong 0xFD2FB527 Zstandard compressed data (v0.7)
  327. !:mime application/zstd
  328. !:ext zst
  329. >4 use zstd-dictionary-id
  330. 0 lelong 0xFD2FB528 Zstandard compressed data (v0.8+)
  331. !:mime application/zstd
  332. !:ext zst
  333. >4 use zstd-dictionary-id
  334. # https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
  335. 0 lelong 0xEC30A437 Zstandard dictionary
  336. !:mime application/x-std-dictionary
  337. >4 lelong x (ID %u)
  338. # AFX compressed files (Wolfram Kleff)
  339. 2 string -afx- AFX compressed file data
  340. # Supplementary magic data for the file(1) command to support
  341. # rzip(1). The format is described in magic(5).
  342. #
  343. # Copyright (C) 2003 by Andrew Tridgell. You may do whatever you want with
  344. # this file.
  345. #
  346. 0 string RZIP rzip compressed data
  347. >4 byte x - version %d
  348. >5 byte x \b.%d
  349. >6 belong x (%d bytes)
  350. 0 string ArC\x01 FreeArc archive <http://freearc.org>
  351. # Type: DACT compressed files
  352. 0 long 0x444354C3 DACT compressed data
  353. >4 byte >-1 (version %i.
  354. >5 byte >-1 %i.
  355. >6 byte >-1 %i)
  356. >7 long >0 , original size: %i bytes
  357. >15 long >30 , block size: %i bytes
  358. # Valve Pack (VPK) files
  359. 0 lelong 0x55aa1234 Valve Pak file
  360. >0x4 lelong x \b, version %u
  361. >0x8 lelong x \b, %u entries
  362. # Snappy framing format
  363. # https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
  364. 0 string \377\006\0\0sNaPpY snappy framed data
  365. !:mime application/x-snappy-framed
  366. # qpress, https://www.quicklz.com/
  367. 0 string qpress10 qpress compressed data
  368. !:mime application/x-qpress
  369. # Zlib https://www.ietf.org/rfc/rfc6713.txt
  370. 0 string/b x
  371. >0 beshort%31 =0
  372. >>0 byte&0xf =8
  373. >>>0 byte&0x80 =0 zlib compressed data
  374. !:mime application/zlib
  375. # BWC compression
  376. 0 string BWC
  377. >3 byte 0 BWC compressed data
  378. # UCL compression
  379. 0 bequad 0x00e955434cff011a UCL compressed data
  380. # Softlib archive
  381. 0 string SLIB Softlib archive
  382. >4 leshort x \b, version %d
  383. >6 leshort x (contains %d files)
  384. # URL: https://github.com/lzfse/lzfse/blob/master/src/lzfse_internal.h#L276
  385. # From: Eric Hall <eric.hall@darkart.com>
  386. 0 string bvx- lzfse encoded, no compression
  387. 0 string bvx1 lzfse compressed, uncompressed tables
  388. 0 string bvx2 lzfse compressed, compressed tables
  389. 0 string bvxn lzfse encoded, lzvn compressed
  390. # pcxLib.exe compression program
  391. # http://www.shikadi.net/moddingwiki/PCX_Library
  392. 0 string/b pcxLib
  393. >0x0A string/b Copyright\020(c)\020Genus\020Microprogramming,\020Inc. pcxLib compressed
  394. # https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/SW/ORA/ORAFormatSpecification.htm
  395. 0 uleshort 0x7c49
  396. >2 lelong 0x80 ORA FASTQ compressed file
  397. >>6 ulelong x \b, DNA size %u
  398. >>10 ulelong x \b, read names size %u
  399. >>14 ulelong x \b, quality buffer 1 size %u
  400. >>18 ulelong x \b, quality buffer 2 size %u
  401. >>22 ulelong x \b, sequence buffer size %u
  402. >>26 ulelong x \b, N-position buffer size %u
  403. >>30 ulelong x \b, crypto buffer size %u
  404. >>34 ulelong x \b, misc buffer 1 size %u
  405. >>38 ulelong x \b, misc buffer 2 size %u
  406. >>42 ulelong x \b, flags %#x
  407. >>46 lelong x \b, read size %d
  408. >>50 lelong x \b, number of reads %d
  409. >>54 leshort x \b, version %d
  410. # https://github.com/kspalaiologos/bzip3/blob/master/doc/file_format.md
  411. 0 string/b BZ3v1 bzip3 compressed data
  412. >5 ulelong x \b, blocksize %u
  413. # https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/\
  414. # SW/ORA/ORAFormatSpecification.htm
  415. # From Guillaume Rizk
  416. 0 short =0x7C49 DRAGEN ORA file,
  417. >-261 short =0x7C49 with metadata:
  418. >-125 u8 x NB reads: %llu,
  419. >-109 u8 x NB bases: %llu.
  420. >-219 u4&0x02 2 File contains interleaved paired reads