libmagic-compat.patch 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096
  1. Subject: libmagic compability
  2. Origin: libmagic-compat branch, commit 9aba180
  3. Upstream-Author: Adam Hupp <adam@hupp.org>
  4. Date: Mon Dec 4 11:55:27 2017 -0800
  5. --- a/LICENSE
  6. +++ b/LICENSE
  7. @@ -19,3 +19,40 @@
  8. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  9. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  10. SOFTWARE.
  11. +
  12. +
  13. +====
  14. +
  15. +Portions of this package (magic/compat.py and test/libmagic_test.py)
  16. +are distributed under the following copyright notice:
  17. +
  18. +
  19. +$File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $
  20. +Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
  21. +Software written by Ian F. Darwin and others;
  22. +maintained 1994- Christos Zoulas.
  23. +
  24. +This software is not subject to any export provision of the United States
  25. +Department of Commerce, and may be exported to any country or planet.
  26. +
  27. +Redistribution and use in source and binary forms, with or without
  28. +modification, are permitted provided that the following conditions
  29. +are met:
  30. +1. Redistributions of source code must retain the above copyright
  31. + notice immediately at the beginning of the file, without modification,
  32. + this list of conditions, and the following disclaimer.
  33. +2. Redistributions in binary form must reproduce the above copyright
  34. + notice, this list of conditions and the following disclaimer in the
  35. + documentation and/or other materials provided with the distribution.
  36. +
  37. +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  38. +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  39. +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  40. +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  41. +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  42. +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  43. +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  44. +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  45. +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  46. +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. +SUCH DAMAGE.
  48. --- a/README.md
  49. +++ b/README.md
  50. @@ -45,9 +45,18 @@
  51. Minor version bumps should be backwards compatible. Major bumps are not.
  52. -## Name Conflict
  53. +## Compatability
  54. -There are, sadly, two libraries which use the module name `magic`. Both have been around for quite a while.If you are using this module and get an error using a method like `open`, your code is expecting the other one. Hopefully one day these will be reconciled.
  55. +There are, sadly, 3 libraries using the package name `magic`. The others are:
  56. +
  57. +1. libmagic itself distributes a `magic` python module with a somewhat
  58. +different API. python-magic includes a copy of this module to avoid
  59. +unnessary breakage when both versions are installed. Maybe someday
  60. +they will converge.
  61. +
  62. +2. python-libmagic also uses the same module name, and has a similar
  63. +but not identical API. If you run into errors about "magic.h" not
  64. +being present, you should uninstall python-libmagic.
  65. ## Installation
  66. @@ -116,5 +125,3 @@
  67. python-magic is distributed under the MIT license. See the included
  68. LICENSE file for details.
  69. -
  70. -
  71. --- a/magic.py
  72. +++ /dev/null
  73. @@ -1,301 +0,0 @@
  74. -"""
  75. -magic is a wrapper around the libmagic file identification library.
  76. -
  77. -See README for more information.
  78. -
  79. -Usage:
  80. -
  81. ->>> import magic
  82. ->>> magic.from_file("testdata/test.pdf")
  83. -'PDF document, version 1.2'
  84. ->>> magic.from_file("testdata/test.pdf", mime=True)
  85. -'application/pdf'
  86. ->>> magic.from_buffer(open("testdata/test.pdf").read(1024))
  87. -'PDF document, version 1.2'
  88. ->>>
  89. -
  90. -
  91. -"""
  92. -
  93. -import sys
  94. -import glob
  95. -import os.path
  96. -import ctypes
  97. -import ctypes.util
  98. -import threading
  99. -
  100. -from ctypes import c_char_p, c_int, c_size_t, c_void_p
  101. -
  102. -
  103. -class MagicException(Exception):
  104. - def __init__(self, message):
  105. - super(MagicException, self).__init__(message)
  106. - self.message = message
  107. -
  108. -
  109. -class Magic:
  110. - """
  111. - Magic is a wrapper around the libmagic C library.
  112. -
  113. - """
  114. -
  115. - def __init__(self, mime=False, magic_file=None, mime_encoding=False,
  116. - keep_going=False, uncompress=False):
  117. - """
  118. - Create a new libmagic wrapper.
  119. -
  120. - mime - if True, mimetypes are returned instead of textual descriptions
  121. - mime_encoding - if True, codec is returned
  122. - magic_file - use a mime database other than the system default
  123. - keep_going - don't stop at the first match, keep going
  124. - uncompress - Try to look inside compressed files.
  125. - """
  126. - self.flags = MAGIC_NONE
  127. - if mime:
  128. - self.flags |= MAGIC_MIME
  129. - if mime_encoding:
  130. - self.flags |= MAGIC_MIME_ENCODING
  131. - if keep_going:
  132. - self.flags |= MAGIC_CONTINUE
  133. -
  134. - if uncompress:
  135. - self.flags |= MAGIC_COMPRESS
  136. -
  137. - self.cookie = magic_open(self.flags)
  138. - self.lock = threading.Lock()
  139. -
  140. - magic_load(self.cookie, magic_file)
  141. -
  142. - def from_buffer(self, buf):
  143. - """
  144. - Identify the contents of `buf`
  145. - """
  146. - with self.lock:
  147. - try:
  148. - # if we're on python3, convert buf to bytes
  149. - # otherwise this string is passed as wchar*
  150. - # which is not what libmagic expects
  151. - if type(buf) == str and str != bytes:
  152. - buf = buf.encode('utf-8', errors='replace')
  153. - return maybe_decode(magic_buffer(self.cookie, buf))
  154. - except MagicException as e:
  155. - return self._handle509Bug(e)
  156. -
  157. - def from_file(self, filename):
  158. - # raise FileNotFoundException or IOError if the file does not exist
  159. - with open(filename):
  160. - pass
  161. - with self.lock:
  162. - try:
  163. - return maybe_decode(magic_file(self.cookie, filename))
  164. - except MagicException as e:
  165. - return self._handle509Bug(e)
  166. -
  167. - def _handle509Bug(self, e):
  168. - # libmagic 5.09 has a bug where it might fail to identify the
  169. - # mimetype of a file and returns null from magic_file (and
  170. - # likely _buffer), but also does not return an error message.
  171. - if e.message is None and (self.flags & MAGIC_MIME):
  172. - return "application/octet-stream"
  173. - else:
  174. - raise e
  175. -
  176. - def __del__(self):
  177. - # no _thread_check here because there can be no other
  178. - # references to this object at this point.
  179. -
  180. - # during shutdown magic_close may have been cleared already so
  181. - # make sure it exists before using it.
  182. -
  183. - # the self.cookie check should be unnecessary and was an
  184. - # incorrect fix for a threading problem, however I'm leaving
  185. - # it in because it's harmless and I'm slightly afraid to
  186. - # remove it.
  187. - if self.cookie and magic_close:
  188. - magic_close(self.cookie)
  189. - self.cookie = None
  190. -
  191. -_instances = {}
  192. -
  193. -def _get_magic_type(mime):
  194. - i = _instances.get(mime)
  195. - if i is None:
  196. - i = _instances[mime] = Magic(mime=mime)
  197. - return i
  198. -
  199. -def from_file(filename, mime=False):
  200. - """"
  201. - Accepts a filename and returns the detected filetype. Return
  202. - value is the mimetype if mime=True, otherwise a human readable
  203. - name.
  204. -
  205. - >>> magic.from_file("testdata/test.pdf", mime=True)
  206. - 'application/pdf'
  207. - """
  208. - m = _get_magic_type(mime)
  209. - return m.from_file(filename)
  210. -
  211. -def from_buffer(buffer, mime=False):
  212. - """
  213. - Accepts a binary string and returns the detected filetype. Return
  214. - value is the mimetype if mime=True, otherwise a human readable
  215. - name.
  216. -
  217. - >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
  218. - 'PDF document, version 1.2'
  219. - """
  220. - m = _get_magic_type(mime)
  221. - return m.from_buffer(buffer)
  222. -
  223. -
  224. -
  225. -
  226. -libmagic = None
  227. -# Let's try to find magic or magic1
  228. -dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1')
  229. -
  230. -# This is necessary because find_library returns None if it doesn't find the library
  231. -if dll:
  232. - libmagic = ctypes.CDLL(dll)
  233. -
  234. -if not libmagic or not libmagic._name:
  235. - windows_dlls = ['magic1.dll','cygmagic-1.dll']
  236. - platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib',
  237. - '/usr/local/lib/libmagic.dylib'] +
  238. - # Assumes there will only be one version installed
  239. - glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'),
  240. - 'win32': windows_dlls,
  241. - 'cygwin': windows_dlls,
  242. - 'linux': ['libmagic.so.1'], # fallback for some Linuxes (e.g. Alpine) where library search does not work
  243. - }
  244. - platform = 'linux' if sys.platform.startswith('linux') else sys.platform
  245. - for dll in platform_to_lib.get(platform, []):
  246. - try:
  247. - libmagic = ctypes.CDLL(dll)
  248. - break
  249. - except OSError:
  250. - pass
  251. -
  252. -if not libmagic or not libmagic._name:
  253. - # It is better to raise an ImportError since we are importing magic module
  254. - raise ImportError('failed to find libmagic. Check your installation')
  255. -
  256. -magic_t = ctypes.c_void_p
  257. -
  258. -def errorcheck_null(result, func, args):
  259. - if result is None:
  260. - err = magic_error(args[0])
  261. - raise MagicException(err)
  262. - else:
  263. - return result
  264. -
  265. -def errorcheck_negative_one(result, func, args):
  266. - if result is -1:
  267. - err = magic_error(args[0])
  268. - raise MagicException(err)
  269. - else:
  270. - return result
  271. -
  272. -
  273. -# return str on python3. Don't want to unconditionally
  274. -# decode because that results in unicode on python2
  275. -def maybe_decode(s):
  276. - if str == bytes:
  277. - return s
  278. - else:
  279. - return s.decode('utf-8')
  280. -
  281. -def coerce_filename(filename):
  282. - if filename is None:
  283. - return None
  284. -
  285. - # ctypes will implicitly convert unicode strings to bytes with
  286. - # .encode('ascii'). If you use the filesystem encoding
  287. - # then you'll get inconsistent behavior (crashes) depending on the user's
  288. - # LANG environment variable
  289. - is_unicode = (sys.version_info[0] <= 2 and
  290. - isinstance(filename, unicode)) or \
  291. - (sys.version_info[0] >= 3 and
  292. - isinstance(filename, str))
  293. - if is_unicode:
  294. - return filename.encode('utf-8', 'surrogateescape')
  295. - else:
  296. - return filename
  297. -
  298. -magic_open = libmagic.magic_open
  299. -magic_open.restype = magic_t
  300. -magic_open.argtypes = [c_int]
  301. -
  302. -magic_close = libmagic.magic_close
  303. -magic_close.restype = None
  304. -magic_close.argtypes = [magic_t]
  305. -
  306. -magic_error = libmagic.magic_error
  307. -magic_error.restype = c_char_p
  308. -magic_error.argtypes = [magic_t]
  309. -
  310. -magic_errno = libmagic.magic_errno
  311. -magic_errno.restype = c_int
  312. -magic_errno.argtypes = [magic_t]
  313. -
  314. -_magic_file = libmagic.magic_file
  315. -_magic_file.restype = c_char_p
  316. -_magic_file.argtypes = [magic_t, c_char_p]
  317. -_magic_file.errcheck = errorcheck_null
  318. -
  319. -def magic_file(cookie, filename):
  320. - return _magic_file(cookie, coerce_filename(filename))
  321. -
  322. -_magic_buffer = libmagic.magic_buffer
  323. -_magic_buffer.restype = c_char_p
  324. -_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
  325. -_magic_buffer.errcheck = errorcheck_null
  326. -
  327. -def magic_buffer(cookie, buf):
  328. - return _magic_buffer(cookie, buf, len(buf))
  329. -
  330. -
  331. -_magic_load = libmagic.magic_load
  332. -_magic_load.restype = c_int
  333. -_magic_load.argtypes = [magic_t, c_char_p]
  334. -_magic_load.errcheck = errorcheck_negative_one
  335. -
  336. -def magic_load(cookie, filename):
  337. - return _magic_load(cookie, coerce_filename(filename))
  338. -
  339. -magic_setflags = libmagic.magic_setflags
  340. -magic_setflags.restype = c_int
  341. -magic_setflags.argtypes = [magic_t, c_int]
  342. -
  343. -magic_check = libmagic.magic_check
  344. -magic_check.restype = c_int
  345. -magic_check.argtypes = [magic_t, c_char_p]
  346. -
  347. -magic_compile = libmagic.magic_compile
  348. -magic_compile.restype = c_int
  349. -magic_compile.argtypes = [magic_t, c_char_p]
  350. -
  351. -
  352. -
  353. -MAGIC_NONE = 0x000000 # No flags
  354. -MAGIC_DEBUG = 0x000001 # Turn on debugging
  355. -MAGIC_SYMLINK = 0x000002 # Follow symlinks
  356. -MAGIC_COMPRESS = 0x000004 # Check inside compressed files
  357. -MAGIC_DEVICES = 0x000008 # Look at the contents of devices
  358. -MAGIC_MIME = 0x000010 # Return a mime string
  359. -MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
  360. -MAGIC_CONTINUE = 0x000020 # Return all matches
  361. -MAGIC_CHECK = 0x000040 # Print warnings to stderr
  362. -MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
  363. -MAGIC_RAW = 0x000100 # Don't translate unprintable chars
  364. -MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
  365. -
  366. -MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
  367. -MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
  368. -MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
  369. -MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
  370. -MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
  371. -MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
  372. -MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
  373. -MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
  374. -MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens
  375. --- /dev/null
  376. +++ b/magic/__init__.py
  377. @@ -0,0 +1,357 @@
  378. +"""
  379. +magic is a wrapper around the libmagic file identification library.
  380. +
  381. +See README for more information.
  382. +
  383. +Usage:
  384. +
  385. +>>> import magic
  386. +>>> magic.from_file("testdata/test.pdf")
  387. +'PDF document, version 1.2'
  388. +>>> magic.from_file("testdata/test.pdf", mime=True)
  389. +'application/pdf'
  390. +>>> magic.from_buffer(open("testdata/test.pdf").read(1024))
  391. +'PDF document, version 1.2'
  392. +>>>
  393. +
  394. +
  395. +"""
  396. +
  397. +import sys
  398. +import glob
  399. +import os.path
  400. +import ctypes
  401. +import ctypes.util
  402. +import threading
  403. +import logging
  404. +
  405. +from ctypes import c_char_p, c_int, c_size_t, c_void_p
  406. +
  407. +
  408. +class MagicException(Exception):
  409. + def __init__(self, message):
  410. + super(MagicException, self).__init__(message)
  411. + self.message = message
  412. +
  413. +
  414. +class Magic:
  415. + """
  416. + Magic is a wrapper around the libmagic C library.
  417. +
  418. + """
  419. +
  420. + def __init__(self, mime=False, magic_file=None, mime_encoding=False,
  421. + keep_going=False, uncompress=False):
  422. + """
  423. + Create a new libmagic wrapper.
  424. +
  425. + mime - if True, mimetypes are returned instead of textual descriptions
  426. + mime_encoding - if True, codec is returned
  427. + magic_file - use a mime database other than the system default
  428. + keep_going - don't stop at the first match, keep going
  429. + uncompress - Try to look inside compressed files.
  430. + """
  431. + self.flags = MAGIC_NONE
  432. + if mime:
  433. + self.flags |= MAGIC_MIME
  434. + if mime_encoding:
  435. + self.flags |= MAGIC_MIME_ENCODING
  436. + if keep_going:
  437. + self.flags |= MAGIC_CONTINUE
  438. +
  439. + if uncompress:
  440. + self.flags |= MAGIC_COMPRESS
  441. +
  442. + self.cookie = magic_open(self.flags)
  443. + self.lock = threading.Lock()
  444. +
  445. + magic_load(self.cookie, magic_file)
  446. +
  447. + def from_buffer(self, buf):
  448. + """
  449. + Identify the contents of `buf`
  450. + """
  451. + with self.lock:
  452. + try:
  453. + # if we're on python3, convert buf to bytes
  454. + # otherwise this string is passed as wchar*
  455. + # which is not what libmagic expects
  456. + if type(buf) == str and str != bytes:
  457. + buf = buf.encode('utf-8', errors='replace')
  458. + return maybe_decode(magic_buffer(self.cookie, buf))
  459. + except MagicException as e:
  460. + return self._handle509Bug(e)
  461. +
  462. + def from_open_file(self, open_file):
  463. + with self.lock:
  464. + try:
  465. + return maybe_decode(magic_descriptor(self.cookie, open_file.fileno()))
  466. + except MagicException as e:
  467. + return self._handle509Bug(e)
  468. +
  469. + def from_file(self, filename):
  470. + # raise FileNotFoundException or IOError if the file does not exist
  471. + # use __builtins__ because the compat stuff at the bottom shadows the builtin open
  472. + with __builtins__['open'](filename):
  473. + pass
  474. +
  475. + with self.lock:
  476. + try:
  477. + return maybe_decode(magic_file(self.cookie, filename))
  478. + except MagicException as e:
  479. + return self._handle509Bug(e)
  480. +
  481. + def _handle509Bug(self, e):
  482. + # libmagic 5.09 has a bug where it might fail to identify the
  483. + # mimetype of a file and returns null from magic_file (and
  484. + # likely _buffer), but also does not return an error message.
  485. + if e.message is None and (self.flags & MAGIC_MIME):
  486. + return "application/octet-stream"
  487. + else:
  488. + raise e
  489. +
  490. + def __del__(self):
  491. + # no _thread_check here because there can be no other
  492. + # references to this object at this point.
  493. +
  494. + # during shutdown magic_close may have been cleared already so
  495. + # make sure it exists before using it.
  496. +
  497. + # the self.cookie check should be unnecessary and was an
  498. + # incorrect fix for a threading problem, however I'm leaving
  499. + # it in because it's harmless and I'm slightly afraid to
  500. + # remove it.
  501. + if self.cookie and magic_close:
  502. + magic_close(self.cookie)
  503. + self.cookie = None
  504. +
  505. +_instances = {}
  506. +
  507. +def _get_magic_type(mime):
  508. + i = _instances.get(mime)
  509. + if i is None:
  510. + i = _instances[mime] = Magic(mime=mime)
  511. + return i
  512. +
  513. +def from_file(filename, mime=False):
  514. + """"
  515. + Accepts a filename and returns the detected filetype. Return
  516. + value is the mimetype if mime=True, otherwise a human readable
  517. + name.
  518. +
  519. + >>> magic.from_file("testdata/test.pdf", mime=True)
  520. + 'application/pdf'
  521. + """
  522. + m = _get_magic_type(mime)
  523. + return m.from_file(filename)
  524. +
  525. +def from_buffer(buffer, mime=False):
  526. + """
  527. + Accepts a binary string and returns the detected filetype. Return
  528. + value is the mimetype if mime=True, otherwise a human readable
  529. + name.
  530. +
  531. + >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
  532. + 'PDF document, version 1.2'
  533. + """
  534. + m = _get_magic_type(mime)
  535. + return m.from_buffer(buffer)
  536. +
  537. +
  538. +
  539. +
  540. +libmagic = None
  541. +# Let's try to find magic or magic1
  542. +dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1')
  543. +
  544. +# This is necessary because find_library returns None if it doesn't find the library
  545. +if dll:
  546. + libmagic = ctypes.CDLL(dll)
  547. +
  548. +if not libmagic or not libmagic._name:
  549. + windows_dlls = ['magic1.dll','cygmagic-1.dll']
  550. + platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib',
  551. + '/usr/local/lib/libmagic.dylib'] +
  552. + # Assumes there will only be one version installed
  553. + glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'),
  554. + 'win32': windows_dlls,
  555. + 'cygwin': windows_dlls,
  556. + 'linux': ['libmagic.so.1'], # fallback for some Linuxes (e.g. Alpine) where library search does not work
  557. + }
  558. + platform = 'linux' if sys.platform.startswith('linux') else sys.platform
  559. + for dll in platform_to_lib.get(platform, []):
  560. + try:
  561. + libmagic = ctypes.CDLL(dll)
  562. + break
  563. + except OSError:
  564. + pass
  565. +
  566. +if not libmagic or not libmagic._name:
  567. + # It is better to raise an ImportError since we are importing magic module
  568. + raise ImportError('failed to find libmagic. Check your installation')
  569. +
  570. +magic_t = ctypes.c_void_p
  571. +
  572. +def errorcheck_null(result, func, args):
  573. + if result is None:
  574. + err = magic_error(args[0])
  575. + raise MagicException(err)
  576. + else:
  577. + return result
  578. +
  579. +def errorcheck_negative_one(result, func, args):
  580. + if result is -1:
  581. + err = magic_error(args[0])
  582. + raise MagicException(err)
  583. + else:
  584. + return result
  585. +
  586. +
  587. +# return str on python3. Don't want to unconditionally
  588. +# decode because that results in unicode on python2
  589. +def maybe_decode(s):
  590. + if str == bytes:
  591. + return s
  592. + else:
  593. + return s.decode('utf-8')
  594. +
  595. +def coerce_filename(filename):
  596. + if filename is None:
  597. + return None
  598. +
  599. + # ctypes will implicitly convert unicode strings to bytes with
  600. + # .encode('ascii'). If you use the filesystem encoding
  601. + # then you'll get inconsistent behavior (crashes) depending on the user's
  602. + # LANG environment variable
  603. + is_unicode = (sys.version_info[0] <= 2 and
  604. + isinstance(filename, unicode)) or \
  605. + (sys.version_info[0] >= 3 and
  606. + isinstance(filename, str))
  607. + if is_unicode:
  608. + return filename.encode('utf-8', 'surrogateescape')
  609. + else:
  610. + return filename
  611. +
  612. +magic_open = libmagic.magic_open
  613. +magic_open.restype = magic_t
  614. +magic_open.argtypes = [c_int]
  615. +
  616. +magic_close = libmagic.magic_close
  617. +magic_close.restype = None
  618. +magic_close.argtypes = [magic_t]
  619. +
  620. +magic_error = libmagic.magic_error
  621. +magic_error.restype = c_char_p
  622. +magic_error.argtypes = [magic_t]
  623. +
  624. +magic_errno = libmagic.magic_errno
  625. +magic_errno.restype = c_int
  626. +magic_errno.argtypes = [magic_t]
  627. +
  628. +_magic_file = libmagic.magic_file
  629. +_magic_file.restype = c_char_p
  630. +_magic_file.argtypes = [magic_t, c_char_p]
  631. +_magic_file.errcheck = errorcheck_null
  632. +
  633. +def magic_file(cookie, filename):
  634. + return _magic_file(cookie, coerce_filename(filename))
  635. +
  636. +_magic_buffer = libmagic.magic_buffer
  637. +_magic_buffer.restype = c_char_p
  638. +_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
  639. +_magic_buffer.errcheck = errorcheck_null
  640. +
  641. +def magic_buffer(cookie, buf):
  642. + return _magic_buffer(cookie, buf, len(buf))
  643. +
  644. +magic_descriptor = libmagic.magic_descriptor
  645. +magic_descriptor.restype = c_char_p
  646. +magic_descriptor.argtypes = [magic_t, c_int]
  647. +magic_descriptor.errcheck = errorcheck_null
  648. +
  649. +_magic_load = libmagic.magic_load
  650. +_magic_load.restype = c_int
  651. +_magic_load.argtypes = [magic_t, c_char_p]
  652. +_magic_load.errcheck = errorcheck_negative_one
  653. +
  654. +def magic_load(cookie, filename):
  655. + return _magic_load(cookie, coerce_filename(filename))
  656. +
  657. +magic_setflags = libmagic.magic_setflags
  658. +magic_setflags.restype = c_int
  659. +magic_setflags.argtypes = [magic_t, c_int]
  660. +
  661. +magic_check = libmagic.magic_check
  662. +magic_check.restype = c_int
  663. +magic_check.argtypes = [magic_t, c_char_p]
  664. +
  665. +magic_compile = libmagic.magic_compile
  666. +magic_compile.restype = c_int
  667. +magic_compile.argtypes = [magic_t, c_char_p]
  668. +
  669. +
  670. +
  671. +MAGIC_NONE = 0x000000 # No flags
  672. +MAGIC_DEBUG = 0x000001 # Turn on debugging
  673. +MAGIC_SYMLINK = 0x000002 # Follow symlinks
  674. +MAGIC_COMPRESS = 0x000004 # Check inside compressed files
  675. +MAGIC_DEVICES = 0x000008 # Look at the contents of devices
  676. +MAGIC_MIME = 0x000010 # Return a mime string
  677. +MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
  678. +MAGIC_CONTINUE = 0x000020 # Return all matches
  679. +MAGIC_CHECK = 0x000040 # Print warnings to stderr
  680. +MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
  681. +MAGIC_RAW = 0x000100 # Don't translate unprintable chars
  682. +MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
  683. +
  684. +MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
  685. +MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
  686. +MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
  687. +MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
  688. +MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
  689. +MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
  690. +MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
  691. +MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
  692. +MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens
  693. +
  694. +# This package name conflicts with the one provided by upstream
  695. +# libmagic. This is a common source of confusion for users. To
  696. +# resolve, We ship a copy of that module, and expose it's functions
  697. +# wrapped in deprecation warnings.
  698. +def add_compat(to_module):
  699. +
  700. + import warnings, re
  701. + from magic import compat
  702. +
  703. + def deprecation_wrapper(compat, fn, alternate):
  704. + def _(*args, **kwargs):
  705. + warnings.warn(
  706. + "Using compatability mode with libmagic's python binding",
  707. + DeprecationWarning)
  708. +
  709. + return compat[fn](*args, **kwargs)
  710. + return _
  711. +
  712. + fn = [('detect_from_filename', 'magic.from_file'),
  713. + ('detect_from_content', 'magic.from_buffer'),
  714. + ('detect_from_fobj', 'magic.Magic.from_open_file'),
  715. + ('open', 'magic.Magic')]
  716. + for (fname, alternate) in fn:
  717. + to_module[fname] = deprecation_wrapper(compat.__dict__, fname, alternate)
  718. +
  719. + # copy constants over, ensuring there's no conflicts
  720. + is_const_re = re.compile("^[A-Z_]+$")
  721. + allowed_inconsistent = set(['MAGIC_MIME'])
  722. + for name, value in compat.__dict__.items():
  723. + if is_const_re.match(name):
  724. + if name in to_module:
  725. + if name in allowed_inconsistent:
  726. + continue
  727. + if to_module[name] != value:
  728. + raise Exception("inconsistent value for " + name)
  729. + else:
  730. + continue
  731. + else:
  732. + to_module[name] = value
  733. +
  734. +add_compat(globals())
  735. --- /dev/null
  736. +++ b/magic/compat.py
  737. @@ -0,0 +1,285 @@
  738. +# coding: utf-8
  739. +
  740. +'''
  741. +Python bindings for libmagic
  742. +'''
  743. +
  744. +import ctypes
  745. +
  746. +from collections import namedtuple
  747. +
  748. +from ctypes import *
  749. +from ctypes.util import find_library
  750. +
  751. +
  752. +def _init():
  753. + """
  754. + Loads the shared library through ctypes and returns a library
  755. + L{ctypes.CDLL} instance
  756. + """
  757. + return ctypes.cdll.LoadLibrary(find_library('magic'))
  758. +
  759. +_libraries = {}
  760. +_libraries['magic'] = _init()
  761. +
  762. +# Flag constants for open and setflags
  763. +MAGIC_NONE = NONE = 0
  764. +MAGIC_DEBUG = DEBUG = 1
  765. +MAGIC_SYMLINK = SYMLINK = 2
  766. +MAGIC_COMPRESS = COMPRESS = 4
  767. +MAGIC_DEVICES = DEVICES = 8
  768. +MAGIC_MIME_TYPE = MIME_TYPE = 16
  769. +MAGIC_CONTINUE = CONTINUE = 32
  770. +MAGIC_CHECK = CHECK = 64
  771. +MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
  772. +MAGIC_RAW = RAW = 256
  773. +MAGIC_ERROR = ERROR = 512
  774. +MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
  775. +MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING
  776. +MAGIC_APPLE = APPLE = 2048
  777. +
  778. +MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
  779. +MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
  780. +MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
  781. +MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
  782. +MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
  783. +MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
  784. +MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
  785. +MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
  786. +MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
  787. +
  788. +MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
  789. +
  790. +FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
  791. +
  792. +
  793. +class magic_set(Structure):
  794. + pass
  795. +magic_set._fields_ = []
  796. +magic_t = POINTER(magic_set)
  797. +
  798. +_open = _libraries['magic'].magic_open
  799. +_open.restype = magic_t
  800. +_open.argtypes = [c_int]
  801. +
  802. +_close = _libraries['magic'].magic_close
  803. +_close.restype = None
  804. +_close.argtypes = [magic_t]
  805. +
  806. +_file = _libraries['magic'].magic_file
  807. +_file.restype = c_char_p
  808. +_file.argtypes = [magic_t, c_char_p]
  809. +
  810. +_descriptor = _libraries['magic'].magic_descriptor
  811. +_descriptor.restype = c_char_p
  812. +_descriptor.argtypes = [magic_t, c_int]
  813. +
  814. +_buffer = _libraries['magic'].magic_buffer
  815. +_buffer.restype = c_char_p
  816. +_buffer.argtypes = [magic_t, c_void_p, c_size_t]
  817. +
  818. +_error = _libraries['magic'].magic_error
  819. +_error.restype = c_char_p
  820. +_error.argtypes = [magic_t]
  821. +
  822. +_setflags = _libraries['magic'].magic_setflags
  823. +_setflags.restype = c_int
  824. +_setflags.argtypes = [magic_t, c_int]
  825. +
  826. +_load = _libraries['magic'].magic_load
  827. +_load.restype = c_int
  828. +_load.argtypes = [magic_t, c_char_p]
  829. +
  830. +_compile = _libraries['magic'].magic_compile
  831. +_compile.restype = c_int
  832. +_compile.argtypes = [magic_t, c_char_p]
  833. +
  834. +_check = _libraries['magic'].magic_check
  835. +_check.restype = c_int
  836. +_check.argtypes = [magic_t, c_char_p]
  837. +
  838. +_list = _libraries['magic'].magic_list
  839. +_list.restype = c_int
  840. +_list.argtypes = [magic_t, c_char_p]
  841. +
  842. +_errno = _libraries['magic'].magic_errno
  843. +_errno.restype = c_int
  844. +_errno.argtypes = [magic_t]
  845. +
  846. +
  847. +class Magic(object):
  848. + def __init__(self, ms):
  849. + self._magic_t = ms
  850. +
  851. + def close(self):
  852. + """
  853. + Closes the magic database and deallocates any resources used.
  854. + """
  855. + _close(self._magic_t)
  856. +
  857. + @staticmethod
  858. + def __tostr(s):
  859. + if s is None:
  860. + return None
  861. + if isinstance(s, str):
  862. + return s
  863. + try: # keep Python 2 compatibility
  864. + return str(s, 'utf-8')
  865. + except TypeError:
  866. + return str(s)
  867. +
  868. + @staticmethod
  869. + def __tobytes(b):
  870. + if b is None:
  871. + return None
  872. + if isinstance(b, bytes):
  873. + return b
  874. + try: # keep Python 2 compatibility
  875. + return bytes(b, 'utf-8')
  876. + except TypeError:
  877. + return bytes(b)
  878. +
  879. + def file(self, filename):
  880. + """
  881. + Returns a textual description of the contents of the argument passed
  882. + as a filename or None if an error occurred and the MAGIC_ERROR flag
  883. + is set. A call to errno() will return the numeric error code.
  884. + """
  885. + return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
  886. +
  887. + def descriptor(self, fd):
  888. + """
  889. + Returns a textual description of the contents of the argument passed
  890. + as a file descriptor or None if an error occurred and the MAGIC_ERROR
  891. + flag is set. A call to errno() will return the numeric error code.
  892. + """
  893. + return Magic.__tostr(_descriptor(self._magic_t, fd))
  894. +
  895. + def buffer(self, buf):
  896. + """
  897. + Returns a textual description of the contents of the argument passed
  898. + as a buffer or None if an error occurred and the MAGIC_ERROR flag
  899. + is set. A call to errno() will return the numeric error code.
  900. + """
  901. + return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
  902. +
  903. + def error(self):
  904. + """
  905. + Returns a textual explanation of the last error or None
  906. + if there was no error.
  907. + """
  908. + return Magic.__tostr(_error(self._magic_t))
  909. +
  910. + def setflags(self, flags):
  911. + """
  912. + Set flags on the magic object which determine how magic checking
  913. + behaves; a bitwise OR of the flags described in libmagic(3), but
  914. + without the MAGIC_ prefix.
  915. +
  916. + Returns -1 on systems that don't support utime(2) or utimes(2)
  917. + when PRESERVE_ATIME is set.
  918. + """
  919. + return _setflags(self._magic_t, flags)
  920. +
  921. + def load(self, filename=None):
  922. + """
  923. + Must be called to load entries in the colon separated list of database
  924. + files passed as argument or the default database file if no argument
  925. + before any magic queries can be performed.
  926. +
  927. + Returns 0 on success and -1 on failure.
  928. + """
  929. + return _load(self._magic_t, Magic.__tobytes(filename))
  930. +
  931. + def compile(self, dbs):
  932. + """
  933. + Compile entries in the colon separated list of database files
  934. + passed as argument or the default database file if no argument.
  935. + The compiled files created are named from the basename(1) of each file
  936. + argument with ".mgc" appended to it.
  937. +
  938. + Returns 0 on success and -1 on failure.
  939. + """
  940. + return _compile(self._magic_t, Magic.__tobytes(dbs))
  941. +
  942. + def check(self, dbs):
  943. + """
  944. + Check the validity of entries in the colon separated list of
  945. + database files passed as argument or the default database file
  946. + if no argument.
  947. +
  948. + Returns 0 on success and -1 on failure.
  949. + """
  950. + return _check(self._magic_t, Magic.__tobytes(dbs))
  951. +
  952. + def list(self, dbs):
  953. + """
  954. + Check the validity of entries in the colon separated list of
  955. + database files passed as argument or the default database file
  956. + if no argument.
  957. +
  958. + Returns 0 on success and -1 on failure.
  959. + """
  960. + return _list(self._magic_t, Magic.__tobytes(dbs))
  961. +
  962. + def errno(self):
  963. + """
  964. + Returns a numeric error code. If return value is 0, an internal
  965. + magic error occurred. If return value is non-zero, the value is
  966. + an OS error code. Use the errno module or os.strerror() can be used
  967. + to provide detailed error information.
  968. + """
  969. + return _errno(self._magic_t)
  970. +
  971. +
  972. +def open(flags):
  973. + """
  974. + Returns a magic object on success and None on failure.
  975. + Flags argument as for setflags.
  976. + """
  977. + return Magic(_open(flags))
  978. +
  979. +
  980. +# Objects used by `detect_from_` functions
  981. +mime_magic = Magic(_open(MAGIC_MIME))
  982. +mime_magic.load()
  983. +none_magic = Magic(_open(MAGIC_NONE))
  984. +none_magic.load()
  985. +
  986. +
  987. +def _create_filemagic(mime_detected, type_detected):
  988. + mime_type, mime_encoding = mime_detected.split('; ')
  989. +
  990. + return FileMagic(name=type_detected, mime_type=mime_type,
  991. + encoding=mime_encoding.replace('charset=', ''))
  992. +
  993. +
  994. +def detect_from_filename(filename):
  995. + '''Detect mime type, encoding and file type from a filename
  996. +
  997. + Returns a `FileMagic` namedtuple.
  998. + '''
  999. +
  1000. + return _create_filemagic(mime_magic.file(filename),
  1001. + none_magic.file(filename))
  1002. +
  1003. +
  1004. +def detect_from_fobj(fobj):
  1005. + '''Detect mime type, encoding and file type from file-like object
  1006. +
  1007. + Returns a `FileMagic` namedtuple.
  1008. + '''
  1009. +
  1010. + file_descriptor = fobj.fileno()
  1011. + return _create_filemagic(mime_magic.descriptor(file_descriptor),
  1012. + none_magic.descriptor(file_descriptor))
  1013. +
  1014. +
  1015. +def detect_from_content(byte_content):
  1016. + '''Detect mime type, encoding and file type from bytes
  1017. +
  1018. + Returns a `FileMagic` namedtuple.
  1019. + '''
  1020. +
  1021. + return _create_filemagic(mime_magic.buffer(byte_content),
  1022. + none_magic.buffer(byte_content))
  1023. --- /dev/null
  1024. +++ b/test/libmagic_test.py
  1025. @@ -0,0 +1,35 @@
  1026. +# coding: utf-8
  1027. +
  1028. +import unittest
  1029. +
  1030. +import magic
  1031. +
  1032. +
  1033. +class MagicTestCase(unittest.TestCase):
  1034. +
  1035. + filename = 'test/testdata/test.pdf'
  1036. + expected_mime_type = 'application/pdf'
  1037. + expected_encoding = 'us-ascii'
  1038. + expected_name = 'PDF document, version 1.2'
  1039. +
  1040. + def assert_result(self, result):
  1041. + self.assertEqual(result.mime_type, self.expected_mime_type)
  1042. + self.assertEqual(result.encoding, self.expected_encoding)
  1043. + self.assertEqual(result.name, self.expected_name)
  1044. +
  1045. + def test_detect_from_filename(self):
  1046. + result = magic.detect_from_filename(self.filename)
  1047. + self.assert_result(result)
  1048. +
  1049. + def test_detect_from_fobj(self):
  1050. + with open(self.filename) as fobj:
  1051. + result = magic.detect_from_fobj(fobj)
  1052. + self.assert_result(result)
  1053. +
  1054. + def test_detect_from_content(self):
  1055. + with open(self.filename) as fobj:
  1056. + result = magic.detect_from_content(fobj.read(4096))
  1057. + self.assert_result(result)
  1058. +
  1059. +if __name__ == '__main__':
  1060. + unittest.main()
  1061. --- a/test/run.sh
  1062. +++ b/test/run.sh
  1063. @@ -8,7 +8,10 @@
  1064. echo "python2.6"
  1065. python2.6 ${THISDIR}/test.py
  1066. +python2.6 ${THISDIR}/libmagic_test.py
  1067. echo "python2.7"
  1068. python2.7 ${THISDIR}/test.py
  1069. -echo "python3.0"
  1070. +python2.7 ${THISDIR}/libmagic_test.py
  1071. +echo "python3"
  1072. python3 ${THISDIR}/test.py
  1073. +python3 ${THISDIR}/libmagic_test.py
  1074. --- a/test/test.py
  1075. +++ b/test/test.py
  1076. @@ -38,6 +38,12 @@
  1077. b = b'#!/usr/bin/env python\nprint("foo")'
  1078. self.assertEqual("text/x-python", m.from_buffer(b))
  1079. +
  1080. + def test_open_file(self):
  1081. + m = magic.Magic(mime=True)
  1082. + with open(os.path.join(self.TESTDATA_DIR, "test.pdf")) as f:
  1083. + self.assertEqual("application/pdf", m.from_open_file(f))
  1084. +
  1085. def test_mime_types(self):
  1086. dest = os.path.join(MagicTest.TESTDATA_DIR, b'\xce\xbb'.decode('utf-8'))
  1087. shutil.copyfile(os.path.join(MagicTest.TESTDATA_DIR, 'lambda'), dest)