Subject: Add MAGIC_SYMLINK support, and tests for same Origin: upstream, commit 0.4.27-19-g2a01b18 Author: Adam Hupp Date: Fri Aug 25 11:02:53 2023 -0700 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,41 +1,51 @@ +Changes to 0.4.29: + +- support MAGIC_SYMLINK (via follow_symlink flag on Magic constructor) +- correctly throw FileNotFoundException depending on flag + Changes to 0.4.28: - - support "magic-1.dll" on Windows, which is produced by vcpkg - - add python 3.10 to tox config - - update test for upstream gzip extensions + +- support "magic-1.dll" on Windows, which is produced by vcpkg +- add python 3.10 to tox config +- update test for upstream gzip extensions Changes to 0.4.27: - - remove spurious pyproject.toml that breaks source builds + +- remove spurious pyproject.toml that breaks source builds Changes to 0.4.26: - - Use tox for all multi-version testing - - Fix use of pytest, use it via tox + +- Use tox for all multi-version testing +- Fix use of pytest, use it via tox Changes to 0.4.25: - - Support os.PathLike values in Magic.from_file and magic.from_file - - Handle some versions of libmagic that return mime string without charset - - Fix tests for file 5.41 - - Include typing stub in package + +- Support os.PathLike values in Magic.from_file and magic.from_file +- Handle some versions of libmagic that return mime string without charset +- Fix tests for file 5.41 +- Include typing stub in package Changes to 0.4.24: - - Fix regression in library loading on some Alpine docker images. + +- Fix regression in library loading on some Alpine docker images. Changes to 0.4.23 - - Include a `py.typed` sentinal to enable type checking - - Improve fix for attribute error during destruction - - Cleanup library loading logic - - Add new homebrew library dir for OSX +- Include a `py.typed` sentinal to enable type checking +- Improve fix for attribute error during destruction +- Cleanup library loading logic +- Add new homebrew library dir for OSX Changes to 0.4.21, 0.4.22 - - Unify dll loader between the standard and compat library, fixing load - failures on some previously supported platforms. +- Unify dll loader between the standard and compat library, fixing load + failures on some previously supported platforms. Changes to 0.4.20 - merge in a compatibility layer for the upstream libmagic python binding. Since both this package and that one are called 'magic', this compat layer - removes a very common source of runtime errors. Use of that libmagic API will + removes a very common source of runtime errors. Use of that libmagic API will produce a deprecation warning. - support python 3.9 in tests and pypi metadata @@ -44,9 +54,9 @@ rather than a filename. - sometimes the returned description includes snippets of the file, e.g a title - for MS Word docs. Since this is in an unknown encoding, we would throw a - unicode decode error trying to decode. Now, it decodes with - 'backslashreplace' to handle this more gracefully. The undecodable characters + for MS Word docs. Since this is in an unknown encoding, we would throw a + unicode decode error trying to decode. Now, it decodes with + 'backslashreplace' to handle this more gracefully. The undecodable characters are replaced with hex escapes. - add support for MAGIC_EXTENSION, to return possible file extensions. @@ -55,18 +65,18 @@ Changes in 0.4.18 -- Make bindings for magic_[set|get]param optional, and throw NotImplementedError -if they are used but not supported. Only call setparam() in the constructor if -it's supported. This prevents breakage on CentOS7 which uses an old version of -libmagic. +- Make bindings for magic\_[set|get]param optional, and throw NotImplementedError + if they are used but not supported. Only call setparam() in the constructor if + it's supported. This prevents breakage on CentOS7 which uses an old version of + libmagic. - Add tests for CentOS 7 & 8 Changes in 0.4.16 and 0.4.17 - add MAGIC_MIME_TYPE constant, use that in preference to MAGIC_MIME internally. -This sets up for a breaking change in a future major version bump where -MAGIC_MIME will change to mathch magic.h. + This sets up for a breaking change in a future major version bump where + MAGIC_MIME will change to mathch magic.h. - add magic.version() function to return library version - add setparam/getparam to control internal behavior - increase internal limits with setparam to prevent spurious error on some jpeg files @@ -76,12 +86,12 @@ - include tests in source distribution - many test improvements: --- tox runner support --- remove deprecated test_suite field from setup.py --- docker tests that cover all LTS ubuntu versions --- add test for snapp file identification + -- tox runner support + -- remove deprecated test_suite field from setup.py + -- docker tests that cover all LTS ubuntu versions + -- add test for snapp file identification - doc improvements --- document dependency install process for debian --- various typos --- document test running process + -- document dependency install process for debian + -- various typos + -- document test running process --- a/magic/__init__.py +++ b/magic/__init__.py @@ -39,7 +39,8 @@ """ def __init__(self, mime=False, magic_file=None, mime_encoding=False, - keep_going=False, uncompress=False, raw=False, extension=False): + keep_going=False, uncompress=False, raw=False, extension=False, + follow_symlinks=False): """ Create a new libmagic wrapper. @@ -65,6 +66,9 @@ if extension: self.flags |= MAGIC_EXTENSION + if follow_symlinks: + self.flags |= MAGIC_SYMLINK + self.cookie = magic_open(self.flags) self.lock = threading.Lock() --- a/test/README +++ b/test/README @@ -1,6 +1,4 @@ There are a few ways to run the python-magic tests - 1. `pytest` will run the test suite against your default version of python - 2. `./test/run_all_versions.py` will run the tests against all installed versions of python. - 3. `./test/run_all_docker_test.sh` will run against a variety of different Linux distributions, using docker. - +1. `tox` will run the tests against all installed versions of python +2. `./test/run_all_docker_test.sh` will run against a variety of different Linux distributions, using docker. --- a/test/python_magic_test.py +++ b/test/python_magic_test.py @@ -1,9 +1,10 @@ +import tempfile import os # for output which reports a local time -os.environ['TZ'] = 'GMT' +os.environ["TZ"] = "GMT" -if os.environ.get('LC_ALL', '') != 'en_US.UTF-8': +if os.environ.get("LC_ALL", "") != "en_US.UTF-8": # this ensure we're in a utf-8 default filesystem encoding which is # necessary for some tests raise Exception("must run `export LC_ALL=en_US.UTF-8` before running test suite") @@ -16,10 +17,11 @@ import sys # magic_descriptor is broken (?) in centos 7, so don't run those tests -SKIP_FROM_DESCRIPTOR = bool(os.environ.get('SKIP_FROM_DESCRIPTOR')) +SKIP_FROM_DESCRIPTOR = bool(os.environ.get("SKIP_FROM_DESCRIPTOR")) + class MagicTest(unittest.TestCase): - TESTDATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'testdata')) + TESTDATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "testdata")) def test_version(self): try: @@ -28,20 +30,19 @@ pass def test_fs_encoding(self): - self.assertEqual('utf-8', sys.getfilesystemencoding().lower()) + self.assertEqual("utf-8", sys.getfilesystemencoding().lower()) def assert_values(self, m, expected_values, buf_equals_file=True): for filename, expected_value in expected_values.items(): try: filename = os.path.join(self.TESTDATA_DIR, filename) except TypeError: - filename = os.path.join( - self.TESTDATA_DIR.encode('utf-8'), filename) + filename = os.path.join(self.TESTDATA_DIR.encode("utf-8"), filename) if type(expected_value) is not tuple: expected_value = (expected_value,) - with open(filename, 'rb') as f: + with open(filename, "rb") as f: buf_value = m.from_buffer(f.read()) file_value = m.from_file(filename) @@ -55,10 +56,10 @@ def test_from_file_str_and_bytes(self): filename = os.path.join(self.TESTDATA_DIR, "test.pdf") - self.assertEqual('application/pdf', - magic.from_file(filename, mime=True)) - self.assertEqual('application/pdf', - magic.from_file(filename.encode('utf-8'), mime=True)) + self.assertEqual("application/pdf", magic.from_file(filename, mime=True)) + self.assertEqual( + "application/pdf", magic.from_file(filename.encode("utf-8"), mime=True) + ) def test_from_descriptor_str_and_bytes(self): if SKIP_FROM_DESCRIPTOR: @@ -66,10 +67,12 @@ filename = os.path.join(self.TESTDATA_DIR, "test.pdf") with open(filename) as f: - self.assertEqual('application/pdf', - magic.from_descriptor(f.fileno(), mime=True)) - self.assertEqual('application/pdf', - magic.from_descriptor(f.fileno(), mime=True)) + self.assertEqual( + "application/pdf", magic.from_descriptor(f.fileno(), mime=True) + ) + self.assertEqual( + "application/pdf", magic.from_descriptor(f.fileno(), mime=True) + ) def test_from_buffer_str_and_bytes(self): if SKIP_FROM_DESCRIPTOR: @@ -78,125 +81,151 @@ self.assertTrue( m.from_buffer('#!/usr/bin/env python\nprint("foo")') - in ("text/x-python", "text/x-script.python")) + in ("text/x-python", "text/x-script.python") + ) self.assertTrue( m.from_buffer(b'#!/usr/bin/env python\nprint("foo")') - in ("text/x-python", "text/x-script.python")) + in ("text/x-python", "text/x-script.python") + ) def test_mime_types(self): - dest = os.path.join(MagicTest.TESTDATA_DIR, - b'\xce\xbb'.decode('utf-8')) - shutil.copyfile(os.path.join(MagicTest.TESTDATA_DIR, 'lambda'), dest) + dest = os.path.join(MagicTest.TESTDATA_DIR, b"\xce\xbb".decode("utf-8")) + shutil.copyfile(os.path.join(MagicTest.TESTDATA_DIR, "lambda"), dest) try: m = magic.Magic(mime=True) - self.assert_values(m, { - 'magic._pyc_': ('application/octet-stream', 'text/x-bytecode.python', 'application/x-bytecode.python'), - 'test.pdf': 'application/pdf', - 'test.gz': ('application/gzip', 'application/x-gzip'), - 'test.snappy.parquet': 'application/octet-stream', - 'text.txt': 'text/plain', - b'\xce\xbb'.decode('utf-8'): 'text/plain', - b'\xce\xbb': 'text/plain', - }) + self.assert_values( + m, + { + "magic._pyc_": ( + "application/octet-stream", + "text/x-bytecode.python", + "application/x-bytecode.python", + ), + "test.pdf": "application/pdf", + "test.gz": ("application/gzip", "application/x-gzip"), + "test.snappy.parquet": "application/octet-stream", + "text.txt": "text/plain", + b"\xce\xbb".decode("utf-8"): "text/plain", + b"\xce\xbb": "text/plain", + }, + ) finally: os.unlink(dest) def test_descriptions(self): m = magic.Magic() - os.environ['TZ'] = 'UTC' # To get last modified date of test.gz in UTC + os.environ["TZ"] = "UTC" # To get last modified date of test.gz in UTC try: - self.assert_values(m, { - 'magic._pyc_': 'python 2.4 byte-compiled', - 'test.pdf': ('PDF document, version 1.2', - 'PDF document, version 1.2, 2 pages', - 'PDF document, version 1.2, 2 page(s)'), - 'test.gz': - ('gzip compressed data, was "test", from Unix, last ' - 'modified: Sun Jun 29 01:32:52 2008', - 'gzip compressed data, was "test", last modified' - ': Sun Jun 29 01:32:52 2008, from Unix', - 'gzip compressed data, was "test", last modified' - ': Sun Jun 29 01:32:52 2008, from Unix, original size 15', - 'gzip compressed data, was "test", ' - 'last modified: Sun Jun 29 01:32:52 2008, ' - 'from Unix, original size modulo 2^32 15', - 'gzip compressed data, was "test", last modified' - ': Sun Jun 29 01:32:52 2008, from Unix, truncated' - ), - 'text.txt': 'ASCII text', - 'test.snappy.parquet': ('Apache Parquet', 'Par archive data'), - }, buf_equals_file=False) + self.assert_values( + m, + { + "magic._pyc_": "python 2.4 byte-compiled", + "test.pdf": ( + "PDF document, version 1.2", + "PDF document, version 1.2, 2 pages", + "PDF document, version 1.2, 2 page(s)", + ), + "test.gz": ( + 'gzip compressed data, was "test", from Unix, last ' + "modified: Sun Jun 29 01:32:52 2008", + 'gzip compressed data, was "test", last modified' + ": Sun Jun 29 01:32:52 2008, from Unix", + 'gzip compressed data, was "test", last modified' + ": Sun Jun 29 01:32:52 2008, from Unix, original size 15", + 'gzip compressed data, was "test", ' + "last modified: Sun Jun 29 01:32:52 2008, " + "from Unix, original size modulo 2^32 15", + 'gzip compressed data, was "test", last modified' + ": Sun Jun 29 01:32:52 2008, from Unix, truncated", + ), + "text.txt": "ASCII text", + "test.snappy.parquet": ("Apache Parquet", "Par archive data"), + }, + buf_equals_file=False, + ) finally: - del os.environ['TZ'] + del os.environ["TZ"] def test_extension(self): try: m = magic.Magic(extension=True) - self.assert_values(m, { - # some versions return '' for the extensions of a gz file, - # including w/ the command line. Who knows... - 'test.gz': ('gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz', 'gz/tgz/tpz/zabw/svgz', '', '???'), - 'name_use.jpg': 'jpeg/jpg/jpe/jfif', - }) + self.assert_values( + m, + { + # some versions return '' for the extensions of a gz file, + # including w/ the command line. Who knows... + "test.gz": ( + "gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz", + "gz/tgz/tpz/zabw/svgz", + "", + "???", + ), + "name_use.jpg": "jpeg/jpg/jpe/jfif", + }, + ) except NotImplementedError: - self.skipTest('MAGIC_EXTENSION not supported in this version') + self.skipTest("MAGIC_EXTENSION not supported in this version") def test_unicode_result_nonraw(self): m = magic.Magic(raw=False) - src = os.path.join(MagicTest.TESTDATA_DIR, 'pgpunicode') + src = os.path.join(MagicTest.TESTDATA_DIR, "pgpunicode") result = m.from_file(src) # NOTE: This check is added as otherwise some magic files don't identify the test case as a PGP key. - if 'PGP' in result: + if "PGP" in result: assert r"PGP\011Secret Sub-key -" == result else: raise unittest.SkipTest("Magic file doesn't return expected type.") def test_unicode_result_raw(self): m = magic.Magic(raw=True) - src = os.path.join(MagicTest.TESTDATA_DIR, 'pgpunicode') + src = os.path.join(MagicTest.TESTDATA_DIR, "pgpunicode") result = m.from_file(src) - if 'PGP' in result: - assert b'PGP\tSecret Sub-key -' == result.encode('utf-8') + if "PGP" in result: + assert b"PGP\tSecret Sub-key -" == result.encode("utf-8") else: raise unittest.SkipTest("Magic file doesn't return expected type.") def test_mime_encodings(self): m = magic.Magic(mime_encoding=True) - self.assert_values(m, { - 'text-iso8859-1.txt': 'iso-8859-1', - 'text.txt': 'us-ascii', - }) + self.assert_values( + m, + { + "text-iso8859-1.txt": "iso-8859-1", + "text.txt": "us-ascii", + }, + ) def test_errors(self): m = magic.Magic() - self.assertRaises(IOError, m.from_file, 'nonexistent') - self.assertRaises(magic.MagicException, magic.Magic, - magic_file='nonexistent') - os.environ['MAGIC'] = 'nonexistent' + self.assertRaises(IOError, m.from_file, "nonexistent") + self.assertRaises(magic.MagicException, magic.Magic, magic_file="nonexistent") + os.environ["MAGIC"] = "nonexistent" try: self.assertRaises(magic.MagicException, magic.Magic) finally: - del os.environ['MAGIC'] + del os.environ["MAGIC"] def test_keep_going(self): - filename = os.path.join(self.TESTDATA_DIR, 'keep-going.jpg') + filename = os.path.join(self.TESTDATA_DIR, "keep-going.jpg") m = magic.Magic(mime=True) - self.assertEqual(m.from_file(filename), 'image/jpeg') + self.assertEqual(m.from_file(filename), "image/jpeg") try: # this will throw if you have an "old" version of the library # I'm otherwise not sure how to query if keep_going is supported magic.version() m = magic.Magic(mime=True, keep_going=True) - self.assertEqual(m.from_file(filename), - 'image/jpeg\\012- application/octet-stream') + self.assertEqual( + m.from_file(filename), "image/jpeg\\012- application/octet-stream" + ) except NotImplementedError: pass def test_rethrow(self): old = magic.magic_buffer try: + def t(x, y): raise magic.MagicException("passthrough") @@ -217,16 +246,47 @@ def test_name_count(self): m = magic.Magic() - with open(os.path.join(self.TESTDATA_DIR, 'name_use.jpg'), 'rb') as f: + with open(os.path.join(self.TESTDATA_DIR, "name_use.jpg"), "rb") as f: m.from_buffer(f.read()) def test_pathlike(self): if sys.version_info < (3, 6): return from pathlib import Path - path = Path(self.TESTDATA_DIR, "test.pdf") + + path = Path(self.TESTDATA_DIR, "test.pdf") m = magic.Magic(mime=True) - self.assertEqual('application/pdf', m.from_file(path)) + self.assertEqual("application/pdf", m.from_file(path)) + + def test_symlink(self): + # TODO: 3.0 + if not hasattr(tempfile, "TemporaryDirectory"): + return + + with tempfile.TemporaryDirectory() as tmp: + tmp_link = os.path.join(tmp, "test_link") + tmp_broken = os.path.join(tmp, "nonexistent") + + os.symlink( + os.path.join(self.TESTDATA_DIR, "test.pdf"), + tmp_link, + ) + + os.symlink("/nonexistent", tmp_broken) + + m = magic.Magic() + m_follow = magic.Magic(follow_symlinks=True) + self.assertTrue(m.from_file(tmp_link).startswith("symbolic link to ")) + self.assertTrue(m_follow.from_file(tmp_link).startswith("PDF document")) + + self.assertTrue( + m.from_file(tmp_broken).startswith( + "broken symbolic link to /nonexistent" + ) + ) + + self.assertRaises(IOError, m_follow.from_file, tmp_broken) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main()