test.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. import os
  2. # for output which reports a local time
  3. os.environ['TZ'] = 'GMT'
  4. if os.environ.get('LC_ALL', '') != 'en_US.UTF-8':
  5. # this ensure we're in a utf-8 default filesystem encoding which is
  6. # necessary for some tests
  7. raise Exception("must run `export LC_ALL=en_US.UTF-8` before running test suite")
  8. import shutil
  9. import os.path
  10. import unittest
  11. import magic
  12. import sys
  13. # magic_descriptor is broken (?) in centos 7, so don't run those tests
  14. SKIP_FROM_DESCRIPTOR = bool(os.environ.get('SKIP_FROM_DESCRIPTOR'))
  15. class MagicTest(unittest.TestCase):
  16. TESTDATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'testdata')
  17. def test_version(self):
  18. try:
  19. self.assertTrue(magic.version() > 0)
  20. except NotImplementedError:
  21. pass
  22. def test_fs_encoding(self):
  23. self.assertEqual('utf-8', sys.getfilesystemencoding().lower())
  24. def assert_values(self, m, expected_values, buf_equals_file=True):
  25. for filename, expected_value in expected_values.items():
  26. try:
  27. filename = os.path.join(self.TESTDATA_DIR, filename)
  28. except TypeError:
  29. filename = os.path.join(
  30. self.TESTDATA_DIR.encode('utf-8'), filename)
  31. if type(expected_value) is not tuple:
  32. expected_value = (expected_value,)
  33. with open(filename, 'rb') as f:
  34. buf_value = m.from_buffer(f.read())
  35. file_value = m.from_file(filename)
  36. if buf_equals_file:
  37. self.assertEqual(buf_value, file_value)
  38. for value in (buf_value, file_value):
  39. self.assertIn(value, expected_value)
  40. def test_from_file_str_and_bytes(self):
  41. filename = os.path.join(self.TESTDATA_DIR, "test.pdf")
  42. self.assertEqual('application/pdf',
  43. magic.from_file(filename, mime=True))
  44. self.assertEqual('application/pdf',
  45. magic.from_file(filename.encode('utf-8'), mime=True))
  46. def test_from_descriptor_str_and_bytes(self):
  47. if SKIP_FROM_DESCRIPTOR:
  48. self.skipTest("magic_descriptor is broken in this version of libmagic")
  49. filename = os.path.join(self.TESTDATA_DIR, "test.pdf")
  50. with open(filename) as f:
  51. self.assertEqual('application/pdf',
  52. magic.from_descriptor(f.fileno(), mime=True))
  53. self.assertEqual('application/pdf',
  54. magic.from_descriptor(f.fileno(), mime=True))
  55. def test_from_buffer_str_and_bytes(self):
  56. if SKIP_FROM_DESCRIPTOR:
  57. self.skipTest("magic_descriptor is broken in this version of libmagic")
  58. m = magic.Magic(mime=True)
  59. self.assertTrue(
  60. m.from_buffer('#!/usr/bin/env python\nprint("foo")')
  61. in ("text/x-python", "text/x-script.python"))
  62. self.assertTrue(
  63. m.from_buffer(b'#!/usr/bin/env python\nprint("foo")')
  64. in ("text/x-python", "text/x-script.python"))
  65. def test_mime_types(self):
  66. dest = os.path.join(MagicTest.TESTDATA_DIR,
  67. b'\xce\xbb'.decode('utf-8'))
  68. shutil.copyfile(os.path.join(MagicTest.TESTDATA_DIR, 'lambda'), dest)
  69. try:
  70. m = magic.Magic(mime=True)
  71. self.assert_values(m, {
  72. 'magic._pyc_': ('application/octet-stream', 'text/x-bytecode.python'),
  73. 'test.pdf': 'application/pdf',
  74. 'test.gz': ('application/gzip', 'application/x-gzip'),
  75. 'test.snappy.parquet': 'application/octet-stream',
  76. 'text.txt': 'text/plain',
  77. b'\xce\xbb'.decode('utf-8'): 'text/plain',
  78. b'\xce\xbb': 'text/plain',
  79. })
  80. finally:
  81. os.unlink(dest)
  82. def test_descriptions(self):
  83. m = magic.Magic()
  84. os.environ['TZ'] = 'UTC' # To get last modified date of test.gz in UTC
  85. try:
  86. self.assert_values(m, {
  87. 'magic._pyc_': 'python 2.4 byte-compiled',
  88. 'test.pdf': 'PDF document, version 1.2',
  89. 'test.gz':
  90. ('gzip compressed data, was "test", from Unix, last '
  91. 'modified: Sun Jun 29 01:32:52 2008',
  92. 'gzip compressed data, was "test", last modified'
  93. ': Sun Jun 29 01:32:52 2008, from Unix',
  94. 'gzip compressed data, was "test", last modified'
  95. ': Sun Jun 29 01:32:52 2008, from Unix, original size 15',
  96. 'gzip compressed data, was "test", '
  97. 'last modified: Sun Jun 29 01:32:52 2008, '
  98. 'from Unix, original size modulo 2^32 15',
  99. 'gzip compressed data, was "test", last modified'
  100. ': Sun Jun 29 01:32:52 2008, from Unix, truncated'
  101. ),
  102. 'text.txt': 'ASCII text',
  103. 'test.snappy.parquet': ('Apache Parquet', 'Par archive data'),
  104. }, buf_equals_file=False)
  105. finally:
  106. del os.environ['TZ']
  107. def test_extension(self):
  108. try:
  109. m = magic.Magic(extension=True)
  110. self.assert_values(m, {
  111. # some versions return '' for the extensions of a gz file,
  112. # including w/ the command line. Who knows...
  113. 'test.gz': ('gz/tgz/tpz/zabw/svgz', '', '???'),
  114. 'name_use.jpg': 'jpeg/jpg/jpe/jfif',
  115. })
  116. except NotImplementedError:
  117. self.skipTest('MAGIC_EXTENSION not supported in this version')
  118. def test_unicode_result_nonraw(self):
  119. m = magic.Magic(raw=False)
  120. src = os.path.join(MagicTest.TESTDATA_DIR, 'pgpunicode')
  121. result = m.from_file(src)
  122. # NOTE: This check is added as otherwise some magic files don't identify the test case as a PGP key.
  123. if 'PGP' in result:
  124. assert r"PGP\011Secret Sub-key -" == result
  125. else:
  126. raise unittest.SkipTest("Magic file doesn't return expected type.")
  127. def test_unicode_result_raw(self):
  128. m = magic.Magic(raw=True)
  129. src = os.path.join(MagicTest.TESTDATA_DIR, 'pgpunicode')
  130. result = m.from_file(src)
  131. if 'PGP' in result:
  132. assert b'PGP\tSecret Sub-key -' == result.encode('utf-8')
  133. else:
  134. raise unittest.SkipTest("Magic file doesn't return expected type.")
  135. def test_mime_encodings(self):
  136. m = magic.Magic(mime_encoding=True)
  137. self.assert_values(m, {
  138. 'text-iso8859-1.txt': 'iso-8859-1',
  139. 'text.txt': 'us-ascii',
  140. })
  141. def test_errors(self):
  142. m = magic.Magic()
  143. self.assertRaises(IOError, m.from_file, 'nonexistent')
  144. self.assertRaises(magic.MagicException, magic.Magic,
  145. magic_file='nonexistent')
  146. os.environ['MAGIC'] = 'nonexistent'
  147. try:
  148. self.assertRaises(magic.MagicException, magic.Magic)
  149. finally:
  150. del os.environ['MAGIC']
  151. def test_keep_going(self):
  152. filename = os.path.join(self.TESTDATA_DIR, 'keep-going.jpg')
  153. m = magic.Magic(mime=True)
  154. self.assertEqual(m.from_file(filename), 'image/jpeg')
  155. try:
  156. # this will throw if you have an "old" version of the library
  157. # I'm otherwise not sure how to query if keep_going is supported
  158. magic.version()
  159. m = magic.Magic(mime=True, keep_going=True)
  160. self.assertEqual(m.from_file(filename),
  161. 'image/jpeg\\012- application/octet-stream')
  162. except NotImplementedError:
  163. pass
  164. def test_rethrow(self):
  165. old = magic.magic_buffer
  166. try:
  167. def t(x, y):
  168. raise magic.MagicException("passthrough")
  169. magic.magic_buffer = t
  170. with self.assertRaises(magic.MagicException):
  171. magic.from_buffer("hello", True)
  172. finally:
  173. magic.magic_buffer = old
  174. def test_getparam(self):
  175. m = magic.Magic(mime=True)
  176. try:
  177. m.setparam(magic.MAGIC_PARAM_INDIR_MAX, 1)
  178. self.assertEqual(m.getparam(magic.MAGIC_PARAM_INDIR_MAX), 1)
  179. except NotImplementedError:
  180. pass
  181. def test_name_count(self):
  182. m = magic.Magic()
  183. with open(os.path.join(self.TESTDATA_DIR, 'name_use.jpg'), 'rb') as f:
  184. m.from_buffer(f.read())
  185. if __name__ == '__main__':
  186. unittest.main()