Browse Source

Cherry-pick several commits to improve the detection of APK files. Closes: #849782

Special thanks to FC Stegerman.
Christoph Biedl 1 year ago
parent
commit
c6c086b133

+ 37 - 0
debian/patches/1672082456.FILE5_44-5-g813b3304.pr-410-pandrew-add-bitcoin-and-leveldb-support.patch

@@ -0,0 +1,37 @@
+Subject: PR/410: pandrew: Add bitcoin and leveldb support
+Origin: FILE5_44-5-g813b3304 <https://github.com/file/file/commit/FILE5_44-5-g813b3304>
+Upstream-Author: Christos Zoulas <christos@zoulas.com>
+Date: Mon Dec 26 19:20:56 2022 +0000
+
+--- a/magic/Magdir/crypto
++++ b/magic/Magdir/crypto
+@@ -3,3 +3,29 @@
+ # $File: crypto,v 1.2 2021/03/27 20:15:53 christos Exp $
+ # crypto:  file(1) magic for crypto formats
+ #
++# Bitcoin block files
++0		lelong			0xD9B4BEF9	Bitcoin
++>(4.l+40)	lelong			0xD9B4BEF9	reverse block
++>>4		lelong			x		\b, size %u
++# normal block below
++>0		default			x		block
++>>4		lelong			x		\b, size %u
++>>8		lelong&0xE0000000	0x20000000
++>>>8		lelong			x		\b, BIP9 0x%x
++>>8		lelong&0xE0000000	!0x20000000
++>>>8		lelong			x		\b, version 0x%x
++>>76		ledate			x		\b, %s UTC
++# VarInt counter
++>>88		ubyte			<0xfd		\b, txcount %u
++>>88		ubyte			0xfd
++>>>89		leshort			x		\b, txcount %u
++>>88		ubyte			0xfe
++>>>89		lelong			x		\b, txcount %u
++>>88		ubyte			0xff
++>>>89		lequad			x		\b, txcount %llu
++!:ext	dat
++# option to find more blocks in the file
++#>>(4.l+8)	indirect	x			;
++
++# LevelDB
++-8		lequad		0xdb4775248b80fb57	LevelDB table data

+ 31 - 0
debian/patches/1673481589.FILE5_44-14-gbb955ca3.jar-manifest-signature-file-pkcs-7-signed-data-fc-stegerman.patch

@@ -0,0 +1,31 @@
+Subject: JAR Manifest & Signature File, PKCS#7 Signed Data (FC Stegerman)
+Origin: FILE5_44-14-gbb955ca3 <https://github.com/file/file/commit/FILE5_44-14-gbb955ca3>
+Upstream-Author: Christos Zoulas <christos@zoulas.com>
+Date: Wed Jan 11 23:59:49 2023 +0000
+
+--- a/magic/Magdir/der
++++ b/magic/Magdir/der
+@@ -137,3 +137,10 @@
+ >>>>&0	der	seq
+ >>>>>&0	der     obj_id3=550403
+ >>>>>&0	der     utf8_str=x      \b, Subject=%s
++
++# PKCS#7 Signed Data (e.g. JAR Signature Block File)
++# OID 1.2.840.113549.1.7.2 (2a864886f70d010702)
++# Reference: https://www.rfc-editor.org/rfc/rfc2315
++0	der	seq
++>&0	der	obj_id9=2a864886f70d010702	DER Encoded PKCS#7 Signed Data
++!:ext	RSA/DSA/EC
+--- a/magic/Magdir/java
++++ b/magic/Magdir/java
+@@ -43,3 +43,10 @@
+ >6	leshort	>0x00	\b, version %d
+ >4	leshort	x	\b.%d
+ !:mime	application/x-java-image
++
++# JAR Manifest & Signature File
++# Reference: https://docs.oracle.com/javase/8/docs/technotes/guides/jar/jar.html
++0	string/t	Manifest-Version:\x201.0	JAR Manifest
++!:ext	MF
++0	string/t	Signature-Version:\x201.0	JAR Signature File
++!:ext	SF

+ 103 - 0
debian/patches/1673481736.FILE5_44-15-ga2756aa5.improve-javascript-detection-fc-stegerman.patch

@@ -0,0 +1,103 @@
+Subject: Improve JavaScript detection (FC Stegerman)
+Origin: FILE5_44-15-ga2756aa5 <https://github.com/file/file/commit/FILE5_44-15-ga2756aa5>
+Upstream-Author: Christos Zoulas <christos@zoulas.com>
+Date: Thu Jan 12 00:02:16 2023 +0000
+
+--- a/magic/Magdir/javascript
++++ b/magic/Magdir/javascript
+@@ -3,18 +3,68 @@
+ # $File: javascript,v 1.4 2022/09/02 08:08:17 christos Exp $
+ # javascript:  magic for javascript and node.js scripts.
+ #
+-0	string/w	#!/bin/node		Node.js script text executable
++0	string/tw	#!/bin/node		Node.js script executable
+ !:mime application/javascript
+-0	string/w	#!/usr/bin/node		Node.js script text executable
++0	string/tw	#!/usr/bin/node		Node.js script executable
+ !:mime application/javascript
+-0	string/w	#!/bin/nodejs		Node.js script text executable
++0	string/tw	#!/bin/nodejs		Node.js script executable
+ !:mime application/javascript
+-0	string/w	#!/usr/bin/nodejs	Node.js script text executable
+-!:mime application/javascript
+-0	string		#!/usr/bin/env\ node	Node.js script text executable
+-!:mime application/javascript
+-0	string		#!/usr/bin/env\ nodejs	Node.js script text executable
++0	string/tw	#!/usr/bin/nodejs	Node.js script executable
+ !:mime application/javascript
++0	string/t		#!/usr/bin/env\ node	Node.js script executable
++!:mime application/javascript
++0	string/t		#!/usr/bin/env\ nodejs	Node.js script executable
++!:mime application/javascript
++
++# JavaScript
++# The strength is increased to beat the C++ & HTML rules
++0	search	"use\x20strict"	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++0	search	'use\x20strict'	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++0	regex	module(\\.|\\[["'])exports.*=	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++0	regex	\^(const|var|let).*=.*require\\(	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++0	regex	\^export\x20(function|class|default|const|var|let|async)\x20	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++0	regex	\\((async\x20)?function[(\x20]	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++0	regex	\^(import|export).*\x20from\x20	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++0	regex	\^(import|export)\x20["']\\./	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++0	regex	\^require\\(["']	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++0	regex	typeof.*[!=]==	JavaScript source
++!:strength +30
++!:mime	application/javascript
++!:ext 	js
++
++# React Native minified JavaScript
++0	search/128	__BUNDLE_START_TIME__=	React Native minified JavaScript
++!:strength +30
++!:mime	application/javascript
++!:ext	bundle/jsbundle
++
+ # Hermes by Facebook https://hermesengine.dev/
+ # https://github.com/facebook/hermes/blob/master/include/hermes/\
+ # BCGen/HBC/BytecodeFileFormat.h#L24
+--- a/magic/Magdir/sgml
++++ b/magic/Magdir/sgml
+@@ -50,6 +50,16 @@
+ !:mime	text/html
+ !:strength + 5
+ 
++# avoid misdetection as JavaScript
++0	string/cWt	\<!doctype\ html	HTML document text
++!:mime	text/html
++0	string/ct	\<html>	HTML document text
++!:mime	text/html
++0	string/ct	\<!--
++>&0	search/4096/cWt	\<!doctype\ html	HTML document text
++>&0	search/4096/ct	\<html>	HTML document text
++!:mime	text/html
++
+ # SVG document
+ # https://www.w3.org/TR/SVG/single-page.html
+ 0	search/4096/cWbt	\<!doctype\ svg	SVG XML document

+ 73 - 0
debian/patches/1673482012.FILE5_44-16-gb29519e7.detect-android-apk-files-fc-stegerman.patch

@@ -0,0 +1,73 @@
+Subject: Detect Android APK files (FC Stegerman)
+Origin: FILE5_44-16-gb29519e7 <https://github.com/file/file/commit/FILE5_44-16-gb29519e7>
+Upstream-Author: Christos Zoulas <christos@zoulas.com>
+Date: Thu Jan 12 00:06:52 2023 +0000
+
+--- a/magic/Magdir/archive
++++ b/magic/Magdir/archive
+@@ -1505,6 +1505,65 @@
+ !:mime	application/zip
+ !:ext zip/cbz
+ 
++# Android APK file (Zip archive)
++0	string		PK\003\004
++!:strength +1
++# Starts with AndroidManifest.xml (file name length = 19)
++>26	uleshort	19
++>>30	string	AndroidManifest.xml	Android package (APK), with AndroidManifest.xml
++>>>-22	string	PK\005\006
++>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
++!:mime	application/vnd.android.package-archive
++!:ext	apk
++# Starts with META-INF/com/android/build/gradle/app-metadata.properties
++>26	uleshort	57
++>>30	string	META-INF/com/android/build/gradle/
++>>>&0	string	app-metadata.properties	Android package (APK), with gradle app-metadata.properties
++>>>>-22	string	PK\005\006
++>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
++!:mime	application/vnd.android.package-archive
++!:ext	apk
++# Starts with classes.dex (file name length = 11)
++>26	uleshort	11
++>>30	string	classes.dex	Android package (APK), with classes.dex
++>>>-22	string	PK\005\006
++>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
++!:mime	application/vnd.android.package-archive
++!:ext	apk
++# Starts with META-INF/MANIFEST.MF (file name length = 20)
++# NB: checks for resources.arsc or drawables as well to avoid matching JAR files
++>26	uleshort	20
++>>30	string	META-INF/MANIFEST.MF
++# Contains resources.arsc (near the end, in the central directory)
++>>>-512	search	resources.arsc	Android package (APK), with MANIFEST.MF and resources.arsc
++>>>>-22	string	PK\005\006
++>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
++!:mime	application/vnd.android.package-archive
++!:ext	apk
++>>>-512 default x
++# Contains drawables (near the end, in the central directory)
++>>>>-512	search	res/drawable	Android package (APK), with MANIFEST.MF and drawables
++>>>>>-22	string	PK\005\006
++>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
++!:mime	application/vnd.android.package-archive
++!:ext	apk
++# Starts with zipflinger virtual entry (28 + 104 = 132 bytes)
++# See https://github.com/obfusk/apksigcopier/blob/666f5b7/apksigcopier/__init__.py#L230
++>4	string	\x00\x00\x00\x00\x00\x00
++>>&0	string	\x21\x08\x21\x02
++>>>&0	string	\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00
++>>>>&0	string	\x00\x00	Android package (APK), with zipflinger virtual entry
++>>>>>-22	string	PK\005\006
++>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
++!:mime	application/vnd.android.package-archive
++!:ext	apk
++# APK Signing Block
++>0	default	x
++>>-22	string	PK\005\006
++>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	Android package (APK), with APK Signing Block
++!:mime	application/vnd.android.package-archive
++!:ext	apk
++
+ # Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
+ 0	string		PK\005\006	Zip archive data (empty)
+ !:mime application/zip

+ 32 - 0
debian/patches/1673482234.FILE5_44-18-g9fa5e784.detect-android-art-baseline-profiles-found-in-apk-files.patch

@@ -0,0 +1,32 @@
+Subject: Detect Android ART (baseline) profiles found in APK files. (FC Stegerman)
+Origin: FILE5_44-18-g9fa5e784 <https://github.com/file/file/commit/FILE5_44-18-g9fa5e784>
+Upstream-Author: Christos Zoulas <christos@zoulas.com>
+Date: Thu Jan 12 00:10:34 2023 +0000
+
+--- a/magic/Magdir/android
++++ b/magic/Magdir/android
+@@ -212,3 +212,24 @@
+ 0	string/t	.class\x20
+ >&0	regex/512	\^\\.super\x20L.*;$	disassembled Android DEX Java class (smali/baksmali)
+ !:ext	smali
++
++# Android ART (baseline) profile + metadata: baseline.prof, baseline.profm
++# Reference: https://android.googlesource.com/platform/frameworks/support/\
++#            +/refs/heads/androidx-main/profileinstaller/profileinstaller/\
++#            src/main/java/androidx/profileinstaller/ProfileTranscoder.java
++# Reference: https://android.googlesource.com/platform/frameworks/support/\
++#            +/refs/heads/androidx-main/profileinstaller/profileinstaller/\
++#            src/main/java/androidx/profileinstaller/ProfileVersion.java
++0	string	pro\x00
++>0	regex	pro\x000[0-9][0-9]\x00	Android ART profile
++!:ext	prof
++>>4	string	001\x00	\b, version 001 N
++>>4	string	005\x00	\b, version 005 O
++>>4	string	009\x00	\b, version 009 O MR1
++>>4	string	010\x00	\b, version 010 P
++>>4	string	015\x00	\b, version 015 S
++0	string	prm\x00
++>0	regex	prm\x000[0-9][0-9]\x00	Android ART profile metadata
++!:ext	profm
++>>4	string	001\x00	\b, version 001 N
++>>4	string	002\x00	\b, version 002

+ 30 - 0
debian/patches/1673482275.FILE5_44-19-g4341ed61.detect-android-package-resource-table-arsc-fc-stegerman.patch

@@ -0,0 +1,30 @@
+Subject: Detect Android package resource table (ARSC) (FC Stegerman)
+Origin: FILE5_44-19-g4341ed61 <https://github.com/file/file/commit/FILE5_44-19-g4341ed61>
+Upstream-Author: Christos Zoulas <christos@zoulas.com>
+Date: Thu Jan 12 00:11:15 2023 +0000
+
+--- a/magic/Magdir/android
++++ b/magic/Magdir/android
+@@ -233,3 +233,22 @@
+ !:ext	profm
+ >>4	string	001\x00	\b, version 001 N
+ >>4	string	002\x00	\b, version 002
++
++# Android package resource table (ARSC): resources.arsc
++# Reference: https://android.googlesource.com/platform/tools/base/\
++#            +/refs/heads/mirror-goog-studio-main/apkparser/binary-resources/\
++#            src/main/java/com/google/devrel/gmscore/tools/apk/arsc
++# 00: resource table type = 0x0002 (2) + header size = 12 (2)
++# 04: chunk size (4, skipped)
++# 08: #packages (4)
++0	ulelong	0x000c0002	Android package resource table (ARSC)
++!:ext	arsc
++>8	ulelong	!1	\b, %d packages
++# 12: string pool type = 0x0001 (2) + header size = 28 (2)
++# 16: chunk size (4, skipped)
++# 20: #strings (4), #styles (4), flags (4)
++>12	ulelong	0x001c0001
++>>20	ulelong	!0	\b, %d string(s)
++>>24	ulelong	!0	\b, %d style(s)
++>>28	ulelong	&1	\b, sorted
++>>28	ulelong	&256	\b, utf8

+ 47 - 0
debian/patches/1673725413.FILE5_44-22-g6d565d82.improve-apk-detection-fc-stegerman.patch

@@ -0,0 +1,47 @@
+Subject: Improve APK detection (FC Stegerman)
+Origin: FILE5_44-22-g6d565d82 <https://github.com/file/file/commit/FILE5_44-22-g6d565d82>
+Upstream-Author: Christos Zoulas <christos@zoulas.com>
+Date: Sat Jan 14 19:43:33 2023 +0000
+
+--- a/magic/Magdir/archive
++++ b/magic/Magdir/archive
+@@ -1559,7 +1559,7 @@
+ !:mime	application/vnd.android.package-archive
+ !:ext	apk
+ # Starts with META-INF/MANIFEST.MF (file name length = 20)
+-# NB: checks for resources.arsc or drawables as well to avoid matching JAR files
++# NB: checks for resources.arsc, classes.dex, etc. as well to avoid matching JAR files
+ >26	uleshort	20
+ >>30	string	META-INF/MANIFEST.MF
+ # Contains resources.arsc (near the end, in the central directory)
+@@ -1568,13 +1568,27 @@
+ >>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
+ !:mime	application/vnd.android.package-archive
+ !:ext	apk
+->>>-512 default x
+-# Contains drawables (near the end, in the central directory)
+->>>>-512	search	res/drawable	Android package (APK), with MANIFEST.MF and drawables
++>>>-512	default x
++# Contains classes.dex (near the end, in the central directory)
++>>>>-512	search	classes.dex	Android package (APK), with MANIFEST.MF and classes.dex
+ >>>>>-22	string	PK\005\006
+ >>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
+ !:mime	application/vnd.android.package-archive
+ !:ext	apk
++>>>>-512	default x
++# Contains lib/armeabi (near the end, in the central directory)
++>>>>>-512	search	lib/armeabi	Android package (APK), with MANIFEST.MF and armeabi lib
++>>>>>>-22	string	PK\005\006
++>>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
++!:mime	application/vnd.android.package-archive
++!:ext	apk
++>>>>>-512	default x
++# Contains drawables (near the end, in the central directory)
++>>>>>>-512	search	res/drawable	Android package (APK), with MANIFEST.MF and drawables
++>>>>>>>-22	string	PK\005\006
++>>>>>>>>(-6.l-16)	string	APK\x20Sig\x20Block\x2042	\b, with APK Signing Block
++!:mime	application/vnd.android.package-archive
++!:ext	apk
+ # Starts with zipflinger virtual entry (28 + 104 = 132 bytes)
+ # See https://github.com/obfusk/apksigcopier/blob/666f5b7/apksigcopier/__init__.py#L230
+ >4	string	\x00\x00\x00\x00\x00\x00

+ 7 - 0
debian/patches/series

@@ -1,8 +1,15 @@
 # reverted upstream commits
 
 # cherry-picked commits. Keep in upstream's chronological order
+1672082456.FILE5_44-5-g813b3304.pr-410-pandrew-add-bitcoin-and-leveldb-support.patch
 1672518248.FILE5_44-9-gdc71304b.pyzip-improvements-fc-stegerman.patch
+1673481589.FILE5_44-14-gbb955ca3.jar-manifest-signature-file-pkcs-7-signed-data-fc-stegerman.patch
+1673481736.FILE5_44-15-ga2756aa5.improve-javascript-detection-fc-stegerman.patch
+1673482012.FILE5_44-16-gb29519e7.detect-android-apk-files-fc-stegerman.patch
 1673482186.FILE5_44-17-g4c8ee2e7.detect-smali-files-fc-stegerman.patch
+1673482234.FILE5_44-18-g9fa5e784.detect-android-art-baseline-profiles-found-in-apk-files.patch
+1673482275.FILE5_44-19-g4341ed61.detect-android-package-resource-table-arsc-fc-stegerman.patch
+1673725413.FILE5_44-22-g6d565d82.improve-apk-detection-fc-stegerman.patch
 
 # patches that should go upstream
 upstream.disable.att3b.patch