| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519 |
- Subject: Add MAGIC_SYMLINK support, and tests for same
- Origin: upstream, commit 0.4.27-19-g2a01b18 <https://github.com/ahupp/python-magic/commit/0.4.27-19-g2a01b18>
- Author: Adam Hupp <adam@hupp.org>
- Date: Fri Aug 25 11:02:53 2023 -0700
- --- a/CHANGELOG
- +++ b/CHANGELOG
- @@ -1,41 +1,51 @@
- +Changes to 0.4.29:
- +
- +- support MAGIC_SYMLINK (via follow_symlink flag on Magic constructor)
- +- correctly throw FileNotFoundException depending on flag
- +
- Changes to 0.4.28:
- - - support "magic-1.dll" on Windows, which is produced by vcpkg
- - - add python 3.10 to tox config
- - - update test for upstream gzip extensions
- +
- +- support "magic-1.dll" on Windows, which is produced by vcpkg
- +- add python 3.10 to tox config
- +- update test for upstream gzip extensions
-
- Changes to 0.4.27:
- - - remove spurious pyproject.toml that breaks source builds
- +
- +- remove spurious pyproject.toml that breaks source builds
-
- Changes to 0.4.26:
- - - Use tox for all multi-version testing
- - - Fix use of pytest, use it via tox
- +
- +- Use tox for all multi-version testing
- +- Fix use of pytest, use it via tox
-
- Changes to 0.4.25:
- - - Support os.PathLike values in Magic.from_file and magic.from_file
- - - Handle some versions of libmagic that return mime string without charset
- - - Fix tests for file 5.41
- - - Include typing stub in package
- +
- +- Support os.PathLike values in Magic.from_file and magic.from_file
- +- Handle some versions of libmagic that return mime string without charset
- +- Fix tests for file 5.41
- +- Include typing stub in package
-
- Changes to 0.4.24:
- - - Fix regression in library loading on some Alpine docker images.
- +
- +- Fix regression in library loading on some Alpine docker images.
-
- Changes to 0.4.23
-
- - - Include a `py.typed` sentinal to enable type checking
- - - Improve fix for attribute error during destruction
- - - Cleanup library loading logic
- - - Add new homebrew library dir for OSX
- +- Include a `py.typed` sentinal to enable type checking
- +- Improve fix for attribute error during destruction
- +- Cleanup library loading logic
- +- Add new homebrew library dir for OSX
-
- Changes to 0.4.21, 0.4.22
-
- - - Unify dll loader between the standard and compat library, fixing load
- - failures on some previously supported platforms.
- +- Unify dll loader between the standard and compat library, fixing load
- + failures on some previously supported platforms.
-
- Changes to 0.4.20
-
- - merge in a compatibility layer for the upstream libmagic python binding.
- Since both this package and that one are called 'magic', this compat layer
- - removes a very common source of runtime errors. Use of that libmagic API will
- + removes a very common source of runtime errors. Use of that libmagic API will
- produce a deprecation warning.
-
- - support python 3.9 in tests and pypi metadata
- @@ -44,9 +54,9 @@
- rather than a filename.
-
- - sometimes the returned description includes snippets of the file, e.g a title
- - for MS Word docs. Since this is in an unknown encoding, we would throw a
- - unicode decode error trying to decode. Now, it decodes with
- - 'backslashreplace' to handle this more gracefully. The undecodable characters
- + for MS Word docs. Since this is in an unknown encoding, we would throw a
- + unicode decode error trying to decode. Now, it decodes with
- + 'backslashreplace' to handle this more gracefully. The undecodable characters
- are replaced with hex escapes.
-
- - add support for MAGIC_EXTENSION, to return possible file extensions.
- @@ -55,18 +65,18 @@
-
- Changes in 0.4.18
-
- -- Make bindings for magic_[set|get]param optional, and throw NotImplementedError
- -if they are used but not supported. Only call setparam() in the constructor if
- -it's supported. This prevents breakage on CentOS7 which uses an old version of
- -libmagic.
- +- Make bindings for magic\_[set|get]param optional, and throw NotImplementedError
- + if they are used but not supported. Only call setparam() in the constructor if
- + it's supported. This prevents breakage on CentOS7 which uses an old version of
- + libmagic.
-
- - Add tests for CentOS 7 & 8
-
- Changes in 0.4.16 and 0.4.17
-
- - add MAGIC_MIME_TYPE constant, use that in preference to MAGIC_MIME internally.
- -This sets up for a breaking change in a future major version bump where
- -MAGIC_MIME will change to mathch magic.h.
- + This sets up for a breaking change in a future major version bump where
- + MAGIC_MIME will change to mathch magic.h.
- - add magic.version() function to return library version
- - add setparam/getparam to control internal behavior
- - increase internal limits with setparam to prevent spurious error on some jpeg files
- @@ -76,12 +86,12 @@
- - include tests in source distribution
-
- - many test improvements:
- --- tox runner support
- --- remove deprecated test_suite field from setup.py
- --- docker tests that cover all LTS ubuntu versions
- --- add test for snapp file identification
- + -- tox runner support
- + -- remove deprecated test_suite field from setup.py
- + -- docker tests that cover all LTS ubuntu versions
- + -- add test for snapp file identification
-
- - doc improvements
- --- document dependency install process for debian
- --- various typos
- --- document test running process
- + -- document dependency install process for debian
- + -- various typos
- + -- document test running process
- --- a/magic/__init__.py
- +++ b/magic/__init__.py
- @@ -39,7 +39,8 @@
- """
-
- def __init__(self, mime=False, magic_file=None, mime_encoding=False,
- - keep_going=False, uncompress=False, raw=False, extension=False):
- + keep_going=False, uncompress=False, raw=False, extension=False,
- + follow_symlinks=False):
- """
- Create a new libmagic wrapper.
-
- @@ -65,6 +66,9 @@
- if extension:
- self.flags |= MAGIC_EXTENSION
-
- + if follow_symlinks:
- + self.flags |= MAGIC_SYMLINK
- +
- self.cookie = magic_open(self.flags)
- self.lock = threading.Lock()
-
- --- a/test/README
- +++ b/test/README
- @@ -1,6 +1,4 @@
- There are a few ways to run the python-magic tests
-
- - 1. `pytest` will run the test suite against your default version of python
- - 2. `./test/run_all_versions.py` will run the tests against all installed versions of python.
- - 3. `./test/run_all_docker_test.sh` will run against a variety of different Linux distributions, using docker.
- -
- +1. `tox` will run the tests against all installed versions of python
- +2. `./test/run_all_docker_test.sh` will run against a variety of different Linux distributions, using docker.
- --- a/test/python_magic_test.py
- +++ b/test/python_magic_test.py
- @@ -1,9 +1,10 @@
- +import tempfile
- import os
-
- # for output which reports a local time
- -os.environ['TZ'] = 'GMT'
- +os.environ["TZ"] = "GMT"
-
- -if os.environ.get('LC_ALL', '') != 'en_US.UTF-8':
- +if os.environ.get("LC_ALL", "") != "en_US.UTF-8":
- # this ensure we're in a utf-8 default filesystem encoding which is
- # necessary for some tests
- raise Exception("must run `export LC_ALL=en_US.UTF-8` before running test suite")
- @@ -16,10 +17,11 @@
- import sys
-
- # magic_descriptor is broken (?) in centos 7, so don't run those tests
- -SKIP_FROM_DESCRIPTOR = bool(os.environ.get('SKIP_FROM_DESCRIPTOR'))
- +SKIP_FROM_DESCRIPTOR = bool(os.environ.get("SKIP_FROM_DESCRIPTOR"))
- +
-
- class MagicTest(unittest.TestCase):
- - TESTDATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'testdata'))
- + TESTDATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "testdata"))
-
- def test_version(self):
- try:
- @@ -28,20 +30,19 @@
- pass
-
- def test_fs_encoding(self):
- - self.assertEqual('utf-8', sys.getfilesystemencoding().lower())
- + self.assertEqual("utf-8", sys.getfilesystemencoding().lower())
-
- def assert_values(self, m, expected_values, buf_equals_file=True):
- for filename, expected_value in expected_values.items():
- try:
- filename = os.path.join(self.TESTDATA_DIR, filename)
- except TypeError:
- - filename = os.path.join(
- - self.TESTDATA_DIR.encode('utf-8'), filename)
- + filename = os.path.join(self.TESTDATA_DIR.encode("utf-8"), filename)
-
- if type(expected_value) is not tuple:
- expected_value = (expected_value,)
-
- - with open(filename, 'rb') as f:
- + with open(filename, "rb") as f:
- buf_value = m.from_buffer(f.read())
-
- file_value = m.from_file(filename)
- @@ -55,10 +56,10 @@
- def test_from_file_str_and_bytes(self):
- filename = os.path.join(self.TESTDATA_DIR, "test.pdf")
-
- - self.assertEqual('application/pdf',
- - magic.from_file(filename, mime=True))
- - self.assertEqual('application/pdf',
- - magic.from_file(filename.encode('utf-8'), mime=True))
- + self.assertEqual("application/pdf", magic.from_file(filename, mime=True))
- + self.assertEqual(
- + "application/pdf", magic.from_file(filename.encode("utf-8"), mime=True)
- + )
-
- def test_from_descriptor_str_and_bytes(self):
- if SKIP_FROM_DESCRIPTOR:
- @@ -66,10 +67,12 @@
-
- filename = os.path.join(self.TESTDATA_DIR, "test.pdf")
- with open(filename) as f:
- - self.assertEqual('application/pdf',
- - magic.from_descriptor(f.fileno(), mime=True))
- - self.assertEqual('application/pdf',
- - magic.from_descriptor(f.fileno(), mime=True))
- + self.assertEqual(
- + "application/pdf", magic.from_descriptor(f.fileno(), mime=True)
- + )
- + self.assertEqual(
- + "application/pdf", magic.from_descriptor(f.fileno(), mime=True)
- + )
-
- def test_from_buffer_str_and_bytes(self):
- if SKIP_FROM_DESCRIPTOR:
- @@ -78,125 +81,151 @@
-
- self.assertTrue(
- m.from_buffer('#!/usr/bin/env python\nprint("foo")')
- - in ("text/x-python", "text/x-script.python"))
- + in ("text/x-python", "text/x-script.python")
- + )
- self.assertTrue(
- m.from_buffer(b'#!/usr/bin/env python\nprint("foo")')
- - in ("text/x-python", "text/x-script.python"))
- + in ("text/x-python", "text/x-script.python")
- + )
-
- def test_mime_types(self):
- - dest = os.path.join(MagicTest.TESTDATA_DIR,
- - b'\xce\xbb'.decode('utf-8'))
- - shutil.copyfile(os.path.join(MagicTest.TESTDATA_DIR, 'lambda'), dest)
- + dest = os.path.join(MagicTest.TESTDATA_DIR, b"\xce\xbb".decode("utf-8"))
- + shutil.copyfile(os.path.join(MagicTest.TESTDATA_DIR, "lambda"), dest)
- try:
- m = magic.Magic(mime=True)
- - self.assert_values(m, {
- - 'magic._pyc_': ('application/octet-stream', 'text/x-bytecode.python', 'application/x-bytecode.python'),
- - 'test.pdf': 'application/pdf',
- - 'test.gz': ('application/gzip', 'application/x-gzip'),
- - 'test.snappy.parquet': 'application/octet-stream',
- - 'text.txt': 'text/plain',
- - b'\xce\xbb'.decode('utf-8'): 'text/plain',
- - b'\xce\xbb': 'text/plain',
- - })
- + self.assert_values(
- + m,
- + {
- + "magic._pyc_": (
- + "application/octet-stream",
- + "text/x-bytecode.python",
- + "application/x-bytecode.python",
- + ),
- + "test.pdf": "application/pdf",
- + "test.gz": ("application/gzip", "application/x-gzip"),
- + "test.snappy.parquet": "application/octet-stream",
- + "text.txt": "text/plain",
- + b"\xce\xbb".decode("utf-8"): "text/plain",
- + b"\xce\xbb": "text/plain",
- + },
- + )
- finally:
- os.unlink(dest)
-
- def test_descriptions(self):
- m = magic.Magic()
- - os.environ['TZ'] = 'UTC' # To get last modified date of test.gz in UTC
- + os.environ["TZ"] = "UTC" # To get last modified date of test.gz in UTC
- try:
- - self.assert_values(m, {
- - 'magic._pyc_': 'python 2.4 byte-compiled',
- - 'test.pdf': ('PDF document, version 1.2',
- - 'PDF document, version 1.2, 2 pages',
- - 'PDF document, version 1.2, 2 page(s)'),
- - 'test.gz':
- - ('gzip compressed data, was "test", from Unix, last '
- - 'modified: Sun Jun 29 01:32:52 2008',
- - 'gzip compressed data, was "test", last modified'
- - ': Sun Jun 29 01:32:52 2008, from Unix',
- - 'gzip compressed data, was "test", last modified'
- - ': Sun Jun 29 01:32:52 2008, from Unix, original size 15',
- - 'gzip compressed data, was "test", '
- - 'last modified: Sun Jun 29 01:32:52 2008, '
- - 'from Unix, original size modulo 2^32 15',
- - 'gzip compressed data, was "test", last modified'
- - ': Sun Jun 29 01:32:52 2008, from Unix, truncated'
- - ),
- - 'text.txt': 'ASCII text',
- - 'test.snappy.parquet': ('Apache Parquet', 'Par archive data'),
- - }, buf_equals_file=False)
- + self.assert_values(
- + m,
- + {
- + "magic._pyc_": "python 2.4 byte-compiled",
- + "test.pdf": (
- + "PDF document, version 1.2",
- + "PDF document, version 1.2, 2 pages",
- + "PDF document, version 1.2, 2 page(s)",
- + ),
- + "test.gz": (
- + 'gzip compressed data, was "test", from Unix, last '
- + "modified: Sun Jun 29 01:32:52 2008",
- + 'gzip compressed data, was "test", last modified'
- + ": Sun Jun 29 01:32:52 2008, from Unix",
- + 'gzip compressed data, was "test", last modified'
- + ": Sun Jun 29 01:32:52 2008, from Unix, original size 15",
- + 'gzip compressed data, was "test", '
- + "last modified: Sun Jun 29 01:32:52 2008, "
- + "from Unix, original size modulo 2^32 15",
- + 'gzip compressed data, was "test", last modified'
- + ": Sun Jun 29 01:32:52 2008, from Unix, truncated",
- + ),
- + "text.txt": "ASCII text",
- + "test.snappy.parquet": ("Apache Parquet", "Par archive data"),
- + },
- + buf_equals_file=False,
- + )
- finally:
- - del os.environ['TZ']
- + del os.environ["TZ"]
-
- def test_extension(self):
- try:
- m = magic.Magic(extension=True)
- - self.assert_values(m, {
- - # some versions return '' for the extensions of a gz file,
- - # including w/ the command line. Who knows...
- - 'test.gz': ('gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz', 'gz/tgz/tpz/zabw/svgz', '', '???'),
- - 'name_use.jpg': 'jpeg/jpg/jpe/jfif',
- - })
- + self.assert_values(
- + m,
- + {
- + # some versions return '' for the extensions of a gz file,
- + # including w/ the command line. Who knows...
- + "test.gz": (
- + "gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz",
- + "gz/tgz/tpz/zabw/svgz",
- + "",
- + "???",
- + ),
- + "name_use.jpg": "jpeg/jpg/jpe/jfif",
- + },
- + )
- except NotImplementedError:
- - self.skipTest('MAGIC_EXTENSION not supported in this version')
- + self.skipTest("MAGIC_EXTENSION not supported in this version")
-
- def test_unicode_result_nonraw(self):
- m = magic.Magic(raw=False)
- - src = os.path.join(MagicTest.TESTDATA_DIR, 'pgpunicode')
- + src = os.path.join(MagicTest.TESTDATA_DIR, "pgpunicode")
- result = m.from_file(src)
- # NOTE: This check is added as otherwise some magic files don't identify the test case as a PGP key.
- - if 'PGP' in result:
- + if "PGP" in result:
- assert r"PGP\011Secret Sub-key -" == result
- else:
- raise unittest.SkipTest("Magic file doesn't return expected type.")
-
- def test_unicode_result_raw(self):
- m = magic.Magic(raw=True)
- - src = os.path.join(MagicTest.TESTDATA_DIR, 'pgpunicode')
- + src = os.path.join(MagicTest.TESTDATA_DIR, "pgpunicode")
- result = m.from_file(src)
- - if 'PGP' in result:
- - assert b'PGP\tSecret Sub-key -' == result.encode('utf-8')
- + if "PGP" in result:
- + assert b"PGP\tSecret Sub-key -" == result.encode("utf-8")
- else:
- raise unittest.SkipTest("Magic file doesn't return expected type.")
-
- def test_mime_encodings(self):
- m = magic.Magic(mime_encoding=True)
- - self.assert_values(m, {
- - 'text-iso8859-1.txt': 'iso-8859-1',
- - 'text.txt': 'us-ascii',
- - })
- + self.assert_values(
- + m,
- + {
- + "text-iso8859-1.txt": "iso-8859-1",
- + "text.txt": "us-ascii",
- + },
- + )
-
- def test_errors(self):
- m = magic.Magic()
- - self.assertRaises(IOError, m.from_file, 'nonexistent')
- - self.assertRaises(magic.MagicException, magic.Magic,
- - magic_file='nonexistent')
- - os.environ['MAGIC'] = 'nonexistent'
- + self.assertRaises(IOError, m.from_file, "nonexistent")
- + self.assertRaises(magic.MagicException, magic.Magic, magic_file="nonexistent")
- + os.environ["MAGIC"] = "nonexistent"
- try:
- self.assertRaises(magic.MagicException, magic.Magic)
- finally:
- - del os.environ['MAGIC']
- + del os.environ["MAGIC"]
-
- def test_keep_going(self):
- - filename = os.path.join(self.TESTDATA_DIR, 'keep-going.jpg')
- + filename = os.path.join(self.TESTDATA_DIR, "keep-going.jpg")
-
- m = magic.Magic(mime=True)
- - self.assertEqual(m.from_file(filename), 'image/jpeg')
- + self.assertEqual(m.from_file(filename), "image/jpeg")
-
- try:
- # this will throw if you have an "old" version of the library
- # I'm otherwise not sure how to query if keep_going is supported
- magic.version()
- m = magic.Magic(mime=True, keep_going=True)
- - self.assertEqual(m.from_file(filename),
- - 'image/jpeg\\012- application/octet-stream')
- + self.assertEqual(
- + m.from_file(filename), "image/jpeg\\012- application/octet-stream"
- + )
- except NotImplementedError:
- pass
-
- def test_rethrow(self):
- old = magic.magic_buffer
- try:
- +
- def t(x, y):
- raise magic.MagicException("passthrough")
-
- @@ -217,16 +246,47 @@
-
- def test_name_count(self):
- m = magic.Magic()
- - with open(os.path.join(self.TESTDATA_DIR, 'name_use.jpg'), 'rb') as f:
- + with open(os.path.join(self.TESTDATA_DIR, "name_use.jpg"), "rb") as f:
- m.from_buffer(f.read())
-
- def test_pathlike(self):
- if sys.version_info < (3, 6):
- return
- from pathlib import Path
- - path = Path(self.TESTDATA_DIR, "test.pdf")
- +
- + path = Path(self.TESTDATA_DIR, "test.pdf")
- m = magic.Magic(mime=True)
- - self.assertEqual('application/pdf', m.from_file(path))
- + self.assertEqual("application/pdf", m.from_file(path))
- +
- + def test_symlink(self):
- + # TODO: 3.0
- + if not hasattr(tempfile, "TemporaryDirectory"):
- + return
- +
- + with tempfile.TemporaryDirectory() as tmp:
- + tmp_link = os.path.join(tmp, "test_link")
- + tmp_broken = os.path.join(tmp, "nonexistent")
- +
- + os.symlink(
- + os.path.join(self.TESTDATA_DIR, "test.pdf"),
- + tmp_link,
- + )
- +
- + os.symlink("/nonexistent", tmp_broken)
- +
- + m = magic.Magic()
- + m_follow = magic.Magic(follow_symlinks=True)
- + self.assertTrue(m.from_file(tmp_link).startswith("symbolic link to "))
- + self.assertTrue(m_follow.from_file(tmp_link).startswith("PDF document"))
- +
- + self.assertTrue(
- + m.from_file(tmp_broken).startswith(
- + "broken symbolic link to /nonexistent"
- + )
- + )
- +
- + self.assertRaises(IOError, m_follow.from_file, tmp_broken)
- +
-
- -if __name__ == '__main__':
- +if __name__ == "__main__":
- unittest.main()
|