| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446 |
- Subject: Simplify tests into something more delarative
- Origin: upstream, commit 0.4.27-37-g42980e5 <https://github.com/ahupp/python-magic/commit/0.4.27-37-g42980e5>
- Author: Adam Hupp <adam@hupp.org>
- Date: Sun May 26 18:06:37 2024 -0700
- --- a/test/python_magic_test.py
- +++ b/test/python_magic_test.py
- @@ -1,3 +1,5 @@
- +from dataclasses import dataclass
- +from enum import Enum
- import os
- import os.path
- import shutil
- @@ -17,11 +19,140 @@
-
- import magic
-
- +@dataclass
- +class TestFile:
- + file_name: str
- + mime_results: list[str]
- + text_results: list[str]
- + no_check_elf_results: list[str] | None
- + buf_equals_file: bool = True
-
- # magic_descriptor is broken (?) in centos 7, so don't run those tests
- SKIP_FROM_DESCRIPTOR = bool(os.environ.get("SKIP_FROM_DESCRIPTOR"))
-
-
- +COMMON_PLAIN = [
- + {},
- + {"check_soft": True},
- + {"check_soft": False},
- + {"check_json": True},
- + {"check_json": False},
- +]
- +
- +NO_SOFT = {"check_soft": False}
- +
- +COMMON_MIME = [{"mime": True, **k} for k in COMMON_PLAIN]
- +
- +CASES = {
- + "magic._pyc_": [
- + (COMMON_MIME, [
- + "application/octet-stream",
- + "text/x-bytecode.python",
- + "application/x-bytecode.python",
- + ]),
- + (COMMON_PLAIN, ["python 2.4 byte-compiled"]),
- + (NO_SOFT, ["data"]),
- + ],
- + "test.pdf": [
- + (COMMON_MIME, ["application/pdf"]),
- + (COMMON_PLAIN, [
- + "PDF document, version 1.2",
- + "PDF document, version 1.2, 2 pages",
- + "PDF document, version 1.2, 2 page(s)",
- + ]),
- + (NO_SOFT, ["ASCII text"]),
- + ],
- + "test.gz": [
- + (COMMON_MIME, ["application/gzip", "application/x-gzip"]),
- + (COMMON_PLAIN, [
- + 'gzip compressed data, was "test", from Unix, last modified: Sun Jun 29 01:32:52 2008',
- + 'gzip compressed data, was "test", last modified: Sun Jun 29 01:32:52 2008, from Unix',
- + 'gzip compressed data, was "test", last modified: Sun Jun 29 01:32:52 2008, from Unix, original size 15',
- + 'gzip compressed data, was "test", last modified: Sun Jun 29 01:32:52 2008, from Unix, original size modulo 2^32 15',
- + 'gzip compressed data, was "test", last modified: Sun Jun 29 01:32:52 2008, from Unix, truncated',
- + ]),
- + ({"extension": True}, [
- + # some versions return '' for the extensions of a gz file,
- + # including w/ the command line. Who knows...
- + "gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz",
- + "gz/tgz/tpz/zabw/svgz",
- + "",
- + "???",
- + ]),
- + (NO_SOFT, ["data"]),
- + ],
- + "test.snappy.parquet": [
- + (COMMON_MIME, ["application/octet-stream"]),
- + (COMMON_PLAIN, ["Apache Parquet", "Par archive data"]),
- + (NO_SOFT, ["data"]),
- + ],
- + "test.json": [
- + # TODO: soft, no_json
- + (COMMON_MIME, ["application/json"]),
- + (COMMON_PLAIN, ["JSON text data"]),
- + ({"mime": True, "check_json": False}, [
- + "data",
- + ]),
- + (NO_SOFT, ["JSON text data"])
- + ],
- + "elf-NetBSD-x86_64-echo": [
- + # TODO: soft, no elf
- + (COMMON_PLAIN, [
- + "ELF 64-bit LSB shared object, x86-64, version 1 (SYSV)",
- + "ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /libexec/ld.elf_so, for NetBSD 8.0, not stripped",
- + ]),
- + (COMMON_MIME, [
- + "application/x-pie-executable",
- + "application/x-sharedlib",
- + ]),
- + ({"check_elf": False}, [
- + "ELF 64-bit LSB shared object, x86-64, version 1 (SYSV)",
- + ]),
- + # TODO: sometimes
- + # "ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /libexec/ld.elf_so, for NetBSD 8.0, not stripped",
- +
- + (NO_SOFT, ["data"]),
- + ],
- + "test.txt": [
- + (COMMON_MIME, ["text/plain"]),
- + (COMMON_PLAIN, ["ASCII text"]),
- + ({"mime_encoding": True}, [
- + "us-ascii",
- + ]),
- + (NO_SOFT, ["ASCII text"]),
- + ],
- + "text-iso8859-1.txt": [
- + ({"mime_encoding": True}, [
- + "iso-8859-1",
- + ]),
- + ],
- + b"\xce\xbb": [
- + (COMMON_MIME, ["text/plain"]),
- + ],
- + "b\xce\xbb".decode("utf-8"): [
- + (COMMON_MIME, ["text/plain"]),
- + ],
- + "name_use.jpg": [
- + ({"extension": True}, [
- + "jpeg/jpg/jpe/jfif"
- + ]),
- + ],
- + "keep-going.jpg": [
- + (COMMON_MIME, [
- + "image/jpeg"
- + ]),
- + ({"mime": True, "keep_going": True}, [
- + "image/jpeg\\012- application/octet-stream",
- + ])
- + ],
- + "test.py": [
- + (COMMON_MIME, [
- + "text/x-python",
- + "text/x-script.python",
- + ])
- + ]
- +}
- +
- class MagicTest(unittest.TestCase):
- TESTDATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "testdata"))
-
- @@ -34,26 +165,6 @@
- def test_fs_encoding(self):
- self.assertEqual("utf-8", sys.getfilesystemencoding().lower())
-
- - def assert_values(self, m, expected_values, buf_equals_file=True):
- - for filename, expected_value in expected_values.items():
- - try:
- - filename = os.path.join(self.TESTDATA_DIR, filename)
- - except TypeError:
- - filename = os.path.join(self.TESTDATA_DIR.encode("utf-8"), filename)
- -
- - if type(expected_value) is not tuple:
- - expected_value = (expected_value,)
- -
- - with open(filename, "rb") as f:
- - buf_value = m.from_buffer(f.read())
- -
- - file_value = m.from_file(filename)
- -
- - if buf_equals_file:
- - self.assertEqual(buf_value, file_value)
- -
- - for value in (buf_value, file_value):
- - self.assertIn(value, expected_value)
-
- def test_from_file_str_and_bytes(self):
- filename = os.path.join(self.TESTDATA_DIR, "test.pdf")
- @@ -63,203 +174,34 @@
- "application/pdf", magic.from_file(filename.encode("utf-8"), mime=True)
- )
-
- - def test_from_descriptor_str_and_bytes(self):
- - if SKIP_FROM_DESCRIPTOR:
- - self.skipTest("magic_descriptor is broken in this version of libmagic")
-
- - filename = os.path.join(self.TESTDATA_DIR, "test.pdf")
- - with open(filename) as f:
- - self.assertEqual(
- - "application/pdf", magic.from_descriptor(f.fileno(), mime=True)
- - )
- - self.assertEqual(
- - "application/pdf", magic.from_descriptor(f.fileno(), mime=True)
- - )
- -
- - def test_from_buffer_str_and_bytes(self):
- - if SKIP_FROM_DESCRIPTOR:
- - self.skipTest("magic_descriptor is broken in this version of libmagic")
- - m = magic.Magic(mime=True)
- -
- - self.assertTrue(
- - m.from_buffer('#!/usr/bin/env python\nprint("foo")')
- - in ("text/x-python", "text/x-script.python")
- - )
- - self.assertTrue(
- - m.from_buffer(b'#!/usr/bin/env python\nprint("foo")')
- - in ("text/x-python", "text/x-script.python")
- - )
- -
- - def test_mime_types(self):
- + def test_all_cases(self):
- + # TODO:
- + # * MAGIC_EXTENSION not supported
- + # * keep_going not supported
- + # * buffer checks
- dest = os.path.join(MagicTest.TESTDATA_DIR, b"\xce\xbb".decode("utf-8"))
- shutil.copyfile(os.path.join(MagicTest.TESTDATA_DIR, "lambda"), dest)
- + os.environ["TZ"] = "UTC"
- try:
- - m = magic.Magic(mime=True)
- - self.assert_values(
- - m,
- - {
- - "elf-NetBSD-x86_64-echo": (
- - "application/x-pie-executable",
- - "application/x-sharedlib",
- - ),
- - "magic._pyc_": (
- - "application/octet-stream",
- - "text/x-bytecode.python",
- - "application/x-bytecode.python",
- - ),
- - "test.pdf": "application/pdf",
- - "test.gz": ("application/gzip", "application/x-gzip"),
- - "test.snappy.parquet": "application/octet-stream",
- - "text.txt": "text/plain",
- - b"\xce\xbb".decode("utf-8"): "text/plain",
- - b"\xce\xbb": "text/plain",
- - "test.json": "application/json",
- - },
- - buf_equals_file=False,
- - )
- - finally:
- - os.unlink(dest)
- -
- - # TODO: Fix this failing test on Ubuntu
- - @pytest.mark.skipif(sys.platform == "linux", reason="'JSON data' not found")
- - def test_descriptions(self):
- - m = magic.Magic()
- - os.environ["TZ"] = "UTC" # To get last modified date of test.gz in UTC
- - try:
- - self.assert_values(
- - m,
- - {
- - "elf-NetBSD-x86_64-echo": (
- - "ELF 64-bit LSB shared object, x86-64, version 1 (SYSV)",
- - "ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /libexec/ld.elf_so, for NetBSD 8.0, not stripped",
- - ),
- - "magic._pyc_": "python 2.4 byte-compiled",
- - "test.pdf": (
- - "PDF document, version 1.2",
- - "PDF document, version 1.2, 2 pages",
- - "PDF document, version 1.2, 2 page(s)",
- - ),
- - "test.gz": (
- - 'gzip compressed data, was "test", from Unix, last '
- - "modified: Sun Jun 29 01:32:52 2008",
- - 'gzip compressed data, was "test", last modified'
- - ": Sun Jun 29 01:32:52 2008, from Unix",
- - 'gzip compressed data, was "test", last modified'
- - ": Sun Jun 29 01:32:52 2008, from Unix, original size 15",
- - 'gzip compressed data, was "test", '
- - "last modified: Sun Jun 29 01:32:52 2008, "
- - "from Unix, original size modulo 2^32 15",
- - 'gzip compressed data, was "test", last modified'
- - ": Sun Jun 29 01:32:52 2008, from Unix, truncated",
- - ),
- - "text.txt": "ASCII text",
- - "test.snappy.parquet": ("Apache Parquet", "Par archive data"),
- - "test.json": "JSON text data",
- - },
- - buf_equals_file=False,
- - )
- + for file_name, cases in CASES:
- + filename = os.path.join(self.TESTDATA_DIR, file_name)
- + for flags, outputs in cases:
- + m = magic.Magic(**flags)
- + with open(filename) as f:
- + self.assertIn(m.from_descriptor(f.fileno()), outputs)
- +
- + self.assertIn(m.from_file(filename), outputs)
- +
- + fname_bytes = filename.encode("utf-8")
- + self.assertIn(m.from_file(fname_bytes), outputs)
- +
- + with open(file_name, "rb") as f:
- + buf_result = m.from_buffer(f.read(1024))
- + self.assertIn(buf_result, outputs)
- finally:
- del os.environ["TZ"]
- -
- - # TODO: Fix this failing test on Ubuntu
- - @pytest.mark.skipif(sys.platform == "linux", reason="'JSON data' not found")
- - def test_descriptions_no_soft(self):
- - m = magic.Magic(check_soft=False)
- - self.assert_values(
- - m,
- - {
- - "elf-NetBSD-x86_64-echo": (
- - "data",
- - "ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /libexec/ld.elf_so, for NetBSD 8.0, not stripped",
- - ),
- - "magic._pyc_": "data",
- - "test.pdf": "ASCII text",
- - "test.gz": "data",
- - "text.txt": "ASCII text",
- - "test.snappy.parquet": "data",
- - "test.json": "JSON text data",
- - },
- - buf_equals_file=False,
- - )
- -
- - def test_descriptions_no_elf(self):
- - m = magic.Magic(check_elf=False)
- - self.assert_values(
- - m,
- - {
- - "elf-NetBSD-x86_64-echo": "ELF 64-bit LSB shared object, x86-64, version 1 (SYSV)",
- - },
- - buf_equals_file=True,
- - )
- -
- - def test_descriptions_no_json(self):
- - m = magic.Magic(check_elf=False)
- - self.assert_values(
- - m,
- - {
- - "test.json": "data",
- - },
- - buf_equals_file=True,
- - )
- -
- - def test_descriptions_no_json_unchanged(self):
- - # verify non-json results are unchanged
- - m = magic.Magic(check_json=False)
- - os.environ["TZ"] = "UTC" # To get last modified date of test.gz in UTC
- - try:
- - self.assert_values(
- - m,
- - {
- - "elf-NetBSD-x86_64-echo": (
- - "ELF 64-bit LSB shared object, x86-64, version 1 (SYSV)",
- - "ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /libexec/ld.elf_so, for NetBSD 8.0, not stripped",
- - ),
- - "magic._pyc_": "python 2.4 byte-compiled",
- - "test.pdf": (
- - "PDF document, version 1.2",
- - "PDF document, version 1.2, 2 pages",
- - "PDF document, version 1.2, 2 page(s)",
- - ),
- - "test.gz": (
- - 'gzip compressed data, was "test", from Unix, last '
- - "modified: Sun Jun 29 01:32:52 2008",
- - 'gzip compressed data, was "test", last modified'
- - ": Sun Jun 29 01:32:52 2008, from Unix",
- - 'gzip compressed data, was "test", last modified'
- - ": Sun Jun 29 01:32:52 2008, from Unix, original size 15",
- - 'gzip compressed data, was "test", '
- - "last modified: Sun Jun 29 01:32:52 2008, "
- - "from Unix, original size modulo 2^32 15",
- - 'gzip compressed data, was "test", last modified'
- - ": Sun Jun 29 01:32:52 2008, from Unix, truncated",
- - ),
- - "text.txt": "ASCII text",
- - "test.snappy.parquet": ("Apache Parquet", "Par archive data"),
- - },
- - buf_equals_file=False,
- - )
- - finally:
- - del os.environ["TZ"]
- -
- - def test_extension(self):
- - try:
- - m = magic.Magic(extension=True)
- - self.assert_values(
- - m,
- - {
- - # some versions return '' for the extensions of a gz file,
- - # including w/ the command line. Who knows...
- - "test.gz": (
- - "gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz",
- - "gz/tgz/tpz/zabw/svgz",
- - "",
- - "???",
- - ),
- - "name_use.jpg": "jpeg/jpg/jpe/jfif",
- - },
- - )
- - except NotImplementedError:
- - self.skipTest("MAGIC_EXTENSION not supported in this version")
- + os.unlink(dest)
-
- def test_unicode_result_nonraw(self):
- m = magic.Magic(raw=False)
- @@ -280,15 +222,6 @@
- else:
- raise unittest.SkipTest("Magic file doesn't return expected type.")
-
- - def test_mime_encodings(self):
- - m = magic.Magic(mime_encoding=True)
- - self.assert_values(
- - m,
- - {
- - "text-iso8859-1.txt": "iso-8859-1",
- - "text.txt": "us-ascii",
- - },
- - )
-
- def test_errors(self):
- m = magic.Magic()
- @@ -300,22 +233,6 @@
- finally:
- del os.environ["MAGIC"]
-
- - def test_keep_going(self):
- - filename = os.path.join(self.TESTDATA_DIR, "keep-going.jpg")
- -
- - m = magic.Magic(mime=True)
- - self.assertEqual(m.from_file(filename), "image/jpeg")
- -
- - try:
- - # this will throw if you have an "old" version of the library
- - # I'm otherwise not sure how to query if keep_going is supported
- - magic.version()
- - m = magic.Magic(mime=True, keep_going=True)
- - self.assertEqual(
- - m.from_file(filename), "image/jpeg\\012- application/octet-stream"
- - )
- - except NotImplementedError:
- - pass
-
- def test_rethrow(self):
- old = magic.magic_buffer
|