1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138 |
- Subject: libmagic compatibility
- Origin: libmagic-compat branch, commit 315cb4c
- Upstream-Author: Adam Hupp <adam@hupp.org>
- Date: Mon Dec 4 11:55:27 2017 -0800
- Last-Update: 2018-01-15
- --- a/LICENSE
- +++ b/LICENSE
- @@ -19,3 +19,40 @@
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
- +
- +
- +====
- +
- +Portions of this package (magic/compat.py and test/libmagic_test.py)
- +are distributed under the following copyright notice:
- +
- +
- +$File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $
- +Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
- +Software written by Ian F. Darwin and others;
- +maintained 1994- Christos Zoulas.
- +
- +This software is not subject to any export provision of the United States
- +Department of Commerce, and may be exported to any country or planet.
- +
- +Redistribution and use in source and binary forms, with or without
- +modification, are permitted provided that the following conditions
- +are met:
- +1. Redistributions of source code must retain the above copyright
- + notice immediately at the beginning of the file, without modification,
- + this list of conditions, and the following disclaimer.
- +2. Redistributions in binary form must reproduce the above copyright
- + notice, this list of conditions and the following disclaimer in the
- + documentation and/or other materials provided with the distribution.
- +
- +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- +SUCH DAMAGE.
- --- a/README.md
- +++ b/README.md
- @@ -45,9 +45,18 @@
-
- Minor version bumps should be backwards compatible. Major bumps are not.
-
- -## Name Conflict
- +## Compatibility
-
- -There are, sadly, two libraries which use the module name `magic`. Both have been around for quite a while.If you are using this module and get an error using a method like `open`, your code is expecting the other one. Hopefully one day these will be reconciled.
- +There are, sadly, 3 libraries using the package name `magic`. The others are:
- +
- +1. libmagic itself distributes a `magic` python module with a somewhat
- +different API. python-magic includes a copy of this module to avoid
- +unnessary breakage when both versions are installed. Maybe someday
- +they will converge.
- +
- +2. python-libmagic also uses the same module name, and has a similar
- +but not identical API. If you run into errors about "magic.h" not
- +being present, you should uninstall python-libmagic.
-
- ## Installation
-
- @@ -64,7 +73,7 @@
- You'll need DLLs for libmagic. @julian-r has uploaded a versoin of this project that includes binaries to pypi:
- https://pypi.python.org/pypi/python-magic-bin/0.4.14
-
- -Other sources of the libraries in the past have been [File for Windows](http://gnuwin32.sourceforge.net/packages/file.htm) . You will need to copy the file `magic` out of `[binary-zip]\share\misc`, and pass it's location to `Magic(magic_file=...)`.
- +Other sources of the libraries in the past have been [File for Windows](http://gnuwin32.sourceforge.net/packages/file.htm) . You will need to copy the file `magic` out of `[binary-zip]\share\misc`, and pass its location to `Magic(magic_file=...)`.
-
- If you are using a 64-bit build of python, you'll need 64-bit libmagic binaries which can be found here: https://github.com/pidydx/libmagicwin64. Newer version can be found here: https://github.com/nscaife/file-windows.
-
- @@ -86,7 +95,7 @@
- Attempting to run the 32-bit libmagic DLL in a 64-bit build of
- python will fail with this error. Here are 64-bit builds of libmagic for windows: https://github.com/pidydx/libmagicwin64
-
- -- 'WindowsError: exception: access violation writing 0x00000000 ' This may indicate you are mixing
- +- 'WindowsError: exception: access violation writing 0x00000000 ' This may indicate you are mixing
- Windows Python and Cygwin Python. Make sure your libmagic and python builds are consistent.
-
- ## Author
- @@ -116,5 +125,3 @@
-
- python-magic is distributed under the MIT license. See the included
- LICENSE file for details.
- -
- -
- --- a/magic.py
- +++ /dev/null
- @@ -1,301 +0,0 @@
- -"""
- -magic is a wrapper around the libmagic file identification library.
- -
- -See README for more information.
- -
- -Usage:
- -
- ->>> import magic
- ->>> magic.from_file("testdata/test.pdf")
- -'PDF document, version 1.2'
- ->>> magic.from_file("testdata/test.pdf", mime=True)
- -'application/pdf'
- ->>> magic.from_buffer(open("testdata/test.pdf").read(1024))
- -'PDF document, version 1.2'
- ->>>
- -
- -
- -"""
- -
- -import sys
- -import glob
- -import os.path
- -import ctypes
- -import ctypes.util
- -import threading
- -
- -from ctypes import c_char_p, c_int, c_size_t, c_void_p
- -
- -
- -class MagicException(Exception):
- - def __init__(self, message):
- - super(MagicException, self).__init__(message)
- - self.message = message
- -
- -
- -class Magic:
- - """
- - Magic is a wrapper around the libmagic C library.
- -
- - """
- -
- - def __init__(self, mime=False, magic_file=None, mime_encoding=False,
- - keep_going=False, uncompress=False):
- - """
- - Create a new libmagic wrapper.
- -
- - mime - if True, mimetypes are returned instead of textual descriptions
- - mime_encoding - if True, codec is returned
- - magic_file - use a mime database other than the system default
- - keep_going - don't stop at the first match, keep going
- - uncompress - Try to look inside compressed files.
- - """
- - self.flags = MAGIC_NONE
- - if mime:
- - self.flags |= MAGIC_MIME
- - if mime_encoding:
- - self.flags |= MAGIC_MIME_ENCODING
- - if keep_going:
- - self.flags |= MAGIC_CONTINUE
- -
- - if uncompress:
- - self.flags |= MAGIC_COMPRESS
- -
- - self.cookie = magic_open(self.flags)
- - self.lock = threading.Lock()
- -
- - magic_load(self.cookie, magic_file)
- -
- - def from_buffer(self, buf):
- - """
- - Identify the contents of `buf`
- - """
- - with self.lock:
- - try:
- - # if we're on python3, convert buf to bytes
- - # otherwise this string is passed as wchar*
- - # which is not what libmagic expects
- - if type(buf) == str and str != bytes:
- - buf = buf.encode('utf-8', errors='replace')
- - return maybe_decode(magic_buffer(self.cookie, buf))
- - except MagicException as e:
- - return self._handle509Bug(e)
- -
- - def from_file(self, filename):
- - # raise FileNotFoundException or IOError if the file does not exist
- - with open(filename):
- - pass
- - with self.lock:
- - try:
- - return maybe_decode(magic_file(self.cookie, filename))
- - except MagicException as e:
- - return self._handle509Bug(e)
- -
- - def _handle509Bug(self, e):
- - # libmagic 5.09 has a bug where it might fail to identify the
- - # mimetype of a file and returns null from magic_file (and
- - # likely _buffer), but also does not return an error message.
- - if e.message is None and (self.flags & MAGIC_MIME):
- - return "application/octet-stream"
- - else:
- - raise e
- -
- - def __del__(self):
- - # no _thread_check here because there can be no other
- - # references to this object at this point.
- -
- - # during shutdown magic_close may have been cleared already so
- - # make sure it exists before using it.
- -
- - # the self.cookie check should be unnecessary and was an
- - # incorrect fix for a threading problem, however I'm leaving
- - # it in because it's harmless and I'm slightly afraid to
- - # remove it.
- - if self.cookie and magic_close:
- - magic_close(self.cookie)
- - self.cookie = None
- -
- -_instances = {}
- -
- -def _get_magic_type(mime):
- - i = _instances.get(mime)
- - if i is None:
- - i = _instances[mime] = Magic(mime=mime)
- - return i
- -
- -def from_file(filename, mime=False):
- - """"
- - Accepts a filename and returns the detected filetype. Return
- - value is the mimetype if mime=True, otherwise a human readable
- - name.
- -
- - >>> magic.from_file("testdata/test.pdf", mime=True)
- - 'application/pdf'
- - """
- - m = _get_magic_type(mime)
- - return m.from_file(filename)
- -
- -def from_buffer(buffer, mime=False):
- - """
- - Accepts a binary string and returns the detected filetype. Return
- - value is the mimetype if mime=True, otherwise a human readable
- - name.
- -
- - >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
- - 'PDF document, version 1.2'
- - """
- - m = _get_magic_type(mime)
- - return m.from_buffer(buffer)
- -
- -
- -
- -
- -libmagic = None
- -# Let's try to find magic or magic1
- -dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1')
- -
- -# This is necessary because find_library returns None if it doesn't find the library
- -if dll:
- - libmagic = ctypes.CDLL(dll)
- -
- -if not libmagic or not libmagic._name:
- - windows_dlls = ['magic1.dll','cygmagic-1.dll']
- - platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib',
- - '/usr/local/lib/libmagic.dylib'] +
- - # Assumes there will only be one version installed
- - glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'),
- - 'win32': windows_dlls,
- - 'cygwin': windows_dlls,
- - 'linux': ['libmagic.so.1'], # fallback for some Linuxes (e.g. Alpine) where library search does not work
- - }
- - platform = 'linux' if sys.platform.startswith('linux') else sys.platform
- - for dll in platform_to_lib.get(platform, []):
- - try:
- - libmagic = ctypes.CDLL(dll)
- - break
- - except OSError:
- - pass
- -
- -if not libmagic or not libmagic._name:
- - # It is better to raise an ImportError since we are importing magic module
- - raise ImportError('failed to find libmagic. Check your installation')
- -
- -magic_t = ctypes.c_void_p
- -
- -def errorcheck_null(result, func, args):
- - if result is None:
- - err = magic_error(args[0])
- - raise MagicException(err)
- - else:
- - return result
- -
- -def errorcheck_negative_one(result, func, args):
- - if result is -1:
- - err = magic_error(args[0])
- - raise MagicException(err)
- - else:
- - return result
- -
- -
- -# return str on python3. Don't want to unconditionally
- -# decode because that results in unicode on python2
- -def maybe_decode(s):
- - if str == bytes:
- - return s
- - else:
- - return s.decode('utf-8')
- -
- -def coerce_filename(filename):
- - if filename is None:
- - return None
- -
- - # ctypes will implicitly convert unicode strings to bytes with
- - # .encode('ascii'). If you use the filesystem encoding
- - # then you'll get inconsistent behavior (crashes) depending on the user's
- - # LANG environment variable
- - is_unicode = (sys.version_info[0] <= 2 and
- - isinstance(filename, unicode)) or \
- - (sys.version_info[0] >= 3 and
- - isinstance(filename, str))
- - if is_unicode:
- - return filename.encode('utf-8', 'surrogateescape')
- - else:
- - return filename
- -
- -magic_open = libmagic.magic_open
- -magic_open.restype = magic_t
- -magic_open.argtypes = [c_int]
- -
- -magic_close = libmagic.magic_close
- -magic_close.restype = None
- -magic_close.argtypes = [magic_t]
- -
- -magic_error = libmagic.magic_error
- -magic_error.restype = c_char_p
- -magic_error.argtypes = [magic_t]
- -
- -magic_errno = libmagic.magic_errno
- -magic_errno.restype = c_int
- -magic_errno.argtypes = [magic_t]
- -
- -_magic_file = libmagic.magic_file
- -_magic_file.restype = c_char_p
- -_magic_file.argtypes = [magic_t, c_char_p]
- -_magic_file.errcheck = errorcheck_null
- -
- -def magic_file(cookie, filename):
- - return _magic_file(cookie, coerce_filename(filename))
- -
- -_magic_buffer = libmagic.magic_buffer
- -_magic_buffer.restype = c_char_p
- -_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
- -_magic_buffer.errcheck = errorcheck_null
- -
- -def magic_buffer(cookie, buf):
- - return _magic_buffer(cookie, buf, len(buf))
- -
- -
- -_magic_load = libmagic.magic_load
- -_magic_load.restype = c_int
- -_magic_load.argtypes = [magic_t, c_char_p]
- -_magic_load.errcheck = errorcheck_negative_one
- -
- -def magic_load(cookie, filename):
- - return _magic_load(cookie, coerce_filename(filename))
- -
- -magic_setflags = libmagic.magic_setflags
- -magic_setflags.restype = c_int
- -magic_setflags.argtypes = [magic_t, c_int]
- -
- -magic_check = libmagic.magic_check
- -magic_check.restype = c_int
- -magic_check.argtypes = [magic_t, c_char_p]
- -
- -magic_compile = libmagic.magic_compile
- -magic_compile.restype = c_int
- -magic_compile.argtypes = [magic_t, c_char_p]
- -
- -
- -
- -MAGIC_NONE = 0x000000 # No flags
- -MAGIC_DEBUG = 0x000001 # Turn on debugging
- -MAGIC_SYMLINK = 0x000002 # Follow symlinks
- -MAGIC_COMPRESS = 0x000004 # Check inside compressed files
- -MAGIC_DEVICES = 0x000008 # Look at the contents of devices
- -MAGIC_MIME = 0x000010 # Return a mime string
- -MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
- -MAGIC_CONTINUE = 0x000020 # Return all matches
- -MAGIC_CHECK = 0x000040 # Print warnings to stderr
- -MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
- -MAGIC_RAW = 0x000100 # Don't translate unprintable chars
- -MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
- -
- -MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
- -MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
- -MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
- -MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
- -MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
- -MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
- -MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
- -MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
- -MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens
- --- /dev/null
- +++ b/magic/__init__.py
- @@ -0,0 +1,361 @@
- +"""
- +magic is a wrapper around the libmagic file identification library.
- +
- +See README for more information.
- +
- +Usage:
- +
- +>>> import magic
- +>>> magic.from_file("testdata/test.pdf")
- +'PDF document, version 1.2'
- +>>> magic.from_file("testdata/test.pdf", mime=True)
- +'application/pdf'
- +>>> magic.from_buffer(open("testdata/test.pdf").read(1024))
- +'PDF document, version 1.2'
- +>>>
- +
- +
- +"""
- +
- +import sys
- +import glob
- +import os.path
- +import ctypes
- +import ctypes.util
- +import threading
- +import logging
- +
- +from ctypes import c_char_p, c_int, c_size_t, c_void_p
- +
- +# avoid shadowing the real open with the version from compat.py
- +_real_open = open
- +
- +class MagicException(Exception):
- + def __init__(self, message):
- + super(MagicException, self).__init__(message)
- + self.message = message
- +
- +
- +class Magic:
- + """
- + Magic is a wrapper around the libmagic C library.
- +
- + """
- +
- + def __init__(self, mime=False, magic_file=None, mime_encoding=False,
- + keep_going=False, uncompress=False):
- + """
- + Create a new libmagic wrapper.
- +
- + mime - if True, mimetypes are returned instead of textual descriptions
- + mime_encoding - if True, codec is returned
- + magic_file - use a mime database other than the system default
- + keep_going - don't stop at the first match, keep going
- + uncompress - Try to look inside compressed files.
- + """
- + self.flags = MAGIC_NONE
- + if mime:
- + self.flags |= MAGIC_MIME
- + if mime_encoding:
- + self.flags |= MAGIC_MIME_ENCODING
- + if keep_going:
- + self.flags |= MAGIC_CONTINUE
- +
- + if uncompress:
- + self.flags |= MAGIC_COMPRESS
- +
- + self.cookie = magic_open(self.flags)
- + self.lock = threading.Lock()
- +
- + magic_load(self.cookie, magic_file)
- +
- + def from_buffer(self, buf):
- + """
- + Identify the contents of `buf`
- + """
- + with self.lock:
- + try:
- + # if we're on python3, convert buf to bytes
- + # otherwise this string is passed as wchar*
- + # which is not what libmagic expects
- + if type(buf) == str and str != bytes:
- + buf = buf.encode('utf-8', errors='replace')
- + return maybe_decode(magic_buffer(self.cookie, buf))
- + except MagicException as e:
- + return self._handle509Bug(e)
- +
- + def from_open_file(self, open_file):
- + with self.lock:
- + try:
- + return maybe_decode(magic_descriptor(self.cookie, open_file.fileno()))
- + except MagicException as e:
- + return self._handle509Bug(e)
- +
- + def from_file(self, filename):
- + # raise FileNotFoundException or IOError if the file does not exist
- + with _real_open(filename):
- + pass
- +
- + with self.lock:
- + try:
- + return maybe_decode(magic_file(self.cookie, filename))
- + except MagicException as e:
- + return self._handle509Bug(e)
- +
- + def _handle509Bug(self, e):
- + # libmagic 5.09 has a bug where it might fail to identify the
- + # mimetype of a file and returns null from magic_file (and
- + # likely _buffer), but also does not return an error message.
- + if e.message is None and (self.flags & MAGIC_MIME):
- + return "application/octet-stream"
- + else:
- + raise e
- +
- + def __del__(self):
- + # no _thread_check here because there can be no other
- + # references to this object at this point.
- +
- + # during shutdown magic_close may have been cleared already so
- + # make sure it exists before using it.
- +
- + # the self.cookie check should be unnecessary and was an
- + # incorrect fix for a threading problem, however I'm leaving
- + # it in because it's harmless and I'm slightly afraid to
- + # remove it.
- + if self.cookie and magic_close:
- + magic_close(self.cookie)
- + self.cookie = None
- +
- +_instances = {}
- +
- +def _get_magic_type(mime):
- + i = _instances.get(mime)
- + if i is None:
- + i = _instances[mime] = Magic(mime=mime)
- + return i
- +
- +def from_file(filename, mime=False):
- + """"
- + Accepts a filename and returns the detected filetype. Return
- + value is the mimetype if mime=True, otherwise a human readable
- + name.
- +
- + >>> magic.from_file("testdata/test.pdf", mime=True)
- + 'application/pdf'
- + """
- + m = _get_magic_type(mime)
- + return m.from_file(filename)
- +
- +def from_buffer(buffer, mime=False):
- + """
- + Accepts a binary string and returns the detected filetype. Return
- + value is the mimetype if mime=True, otherwise a human readable
- + name.
- +
- + >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
- + 'PDF document, version 1.2'
- + """
- + m = _get_magic_type(mime)
- + return m.from_buffer(buffer)
- +
- +
- +
- +
- +libmagic = None
- +# Let's try to find magic or magic1
- +dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1')
- +
- +# This is necessary because find_library returns None if it doesn't find the library
- +if dll:
- + libmagic = ctypes.CDLL(dll)
- +
- +if not libmagic or not libmagic._name:
- + windows_dlls = ['magic1.dll','cygmagic-1.dll']
- + platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib',
- + '/usr/local/lib/libmagic.dylib'] +
- + # Assumes there will only be one version installed
- + glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'),
- + 'win32': windows_dlls,
- + 'cygwin': windows_dlls,
- + 'linux': ['libmagic.so.1'], # fallback for some Linuxes (e.g. Alpine) where library search does not work
- + }
- + platform = 'linux' if sys.platform.startswith('linux') else sys.platform
- + for dll in platform_to_lib.get(platform, []):
- + try:
- + libmagic = ctypes.CDLL(dll)
- + break
- + except OSError:
- + pass
- +
- +if not libmagic or not libmagic._name:
- + # It is better to raise an ImportError since we are importing magic module
- + raise ImportError('failed to find libmagic. Check your installation')
- +
- +magic_t = ctypes.c_void_p
- +
- +def errorcheck_null(result, func, args):
- + if result is None:
- + err = magic_error(args[0])
- + raise MagicException(err)
- + else:
- + return result
- +
- +def errorcheck_negative_one(result, func, args):
- + if result is -1:
- + err = magic_error(args[0])
- + raise MagicException(err)
- + else:
- + return result
- +
- +
- +# return str on python3. Don't want to unconditionally
- +# decode because that results in unicode on python2
- +def maybe_decode(s):
- + if str == bytes:
- + return s
- + else:
- + return s.decode('utf-8')
- +
- +def coerce_filename(filename):
- + if filename is None:
- + return None
- +
- + # ctypes will implicitly convert unicode strings to bytes with
- + # .encode('ascii'). If you use the filesystem encoding
- + # then you'll get inconsistent behavior (crashes) depending on the user's
- + # LANG environment variable
- + is_unicode = (sys.version_info[0] <= 2 and
- + isinstance(filename, unicode)) or \
- + (sys.version_info[0] >= 3 and
- + isinstance(filename, str))
- + if is_unicode:
- + return filename.encode('utf-8', 'surrogateescape')
- + else:
- + return filename
- +
- +magic_open = libmagic.magic_open
- +magic_open.restype = magic_t
- +magic_open.argtypes = [c_int]
- +
- +magic_close = libmagic.magic_close
- +magic_close.restype = None
- +magic_close.argtypes = [magic_t]
- +
- +magic_error = libmagic.magic_error
- +magic_error.restype = c_char_p
- +magic_error.argtypes = [magic_t]
- +
- +magic_errno = libmagic.magic_errno
- +magic_errno.restype = c_int
- +magic_errno.argtypes = [magic_t]
- +
- +_magic_file = libmagic.magic_file
- +_magic_file.restype = c_char_p
- +_magic_file.argtypes = [magic_t, c_char_p]
- +_magic_file.errcheck = errorcheck_null
- +
- +def magic_file(cookie, filename):
- + return _magic_file(cookie, coerce_filename(filename))
- +
- +_magic_buffer = libmagic.magic_buffer
- +_magic_buffer.restype = c_char_p
- +_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
- +_magic_buffer.errcheck = errorcheck_null
- +
- +def magic_buffer(cookie, buf):
- + return _magic_buffer(cookie, buf, len(buf))
- +
- +magic_descriptor = libmagic.magic_descriptor
- +magic_descriptor.restype = c_char_p
- +magic_descriptor.argtypes = [magic_t, c_int]
- +magic_descriptor.errcheck = errorcheck_null
- +
- +_magic_load = libmagic.magic_load
- +_magic_load.restype = c_int
- +_magic_load.argtypes = [magic_t, c_char_p]
- +_magic_load.errcheck = errorcheck_negative_one
- +
- +def magic_load(cookie, filename):
- + return _magic_load(cookie, coerce_filename(filename))
- +
- +magic_setflags = libmagic.magic_setflags
- +magic_setflags.restype = c_int
- +magic_setflags.argtypes = [magic_t, c_int]
- +
- +magic_check = libmagic.magic_check
- +magic_check.restype = c_int
- +magic_check.argtypes = [magic_t, c_char_p]
- +
- +magic_compile = libmagic.magic_compile
- +magic_compile.restype = c_int
- +magic_compile.argtypes = [magic_t, c_char_p]
- +
- +
- +
- +MAGIC_NONE = 0x000000 # No flags
- +MAGIC_DEBUG = 0x000001 # Turn on debugging
- +MAGIC_SYMLINK = 0x000002 # Follow symlinks
- +MAGIC_COMPRESS = 0x000004 # Check inside compressed files
- +MAGIC_DEVICES = 0x000008 # Look at the contents of devices
- +MAGIC_MIME = 0x000010 # Return a mime string
- +MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
- +MAGIC_CONTINUE = 0x000020 # Return all matches
- +MAGIC_CHECK = 0x000040 # Print warnings to stderr
- +MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
- +MAGIC_RAW = 0x000100 # Don't translate unprintable chars
- +MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
- +
- +MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
- +MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
- +MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
- +MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
- +MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
- +MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
- +MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
- +MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
- +MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens
- +
- +# This package name conflicts with the one provided by upstream
- +# libmagic. This is a common source of confusion for users. To
- +# resolve, We ship a copy of that module, and expose it's functions
- +# wrapped in deprecation warnings.
- +def add_compat(to_module):
- +
- + import warnings, re
- + from magic import compat
- +
- + def deprecation_wrapper(compat, fn, alternate):
- + def _(*args, **kwargs):
- + warnings.warn(
- + "Using compatability mode with libmagic's python binding",
- + DeprecationWarning)
- +
- + return compat[fn](*args, **kwargs)
- + return _
- +
- + fn = [('detect_from_filename', 'magic.from_file'),
- + ('detect_from_content', 'magic.from_buffer'),
- + ('detect_from_fobj', 'magic.Magic.from_open_file'),
- + ('open', 'magic.Magic')]
- + for (fname, alternate) in fn:
- + # for now, disable the deprecation warning until theres clarity on
- + # what the merged module should look like
- + to_module[fname] = compat.__dict__.get(fname)
- + #to_module[fname] = deprecation_wrapper(compat.__dict__, fname, alternate)
- +
- + # copy constants over, ensuring there's no conflicts
- + is_const_re = re.compile("^[A-Z_]+$")
- + allowed_inconsistent = set(['MAGIC_MIME'])
- + for name, value in compat.__dict__.items():
- + if is_const_re.match(name):
- + if name in to_module:
- + if name in allowed_inconsistent:
- + continue
- + if to_module[name] != value:
- + raise Exception("inconsistent value for " + name)
- + else:
- + continue
- + else:
- + to_module[name] = value
- +
- +add_compat(globals())
- --- /dev/null
- +++ b/magic/compat.py
- @@ -0,0 +1,285 @@
- +# coding: utf-8
- +
- +'''
- +Python bindings for libmagic
- +'''
- +
- +import ctypes
- +
- +from collections import namedtuple
- +
- +from ctypes import *
- +from ctypes.util import find_library
- +
- +
- +def _init():
- + """
- + Loads the shared library through ctypes and returns a library
- + L{ctypes.CDLL} instance
- + """
- + return ctypes.cdll.LoadLibrary(find_library('magic'))
- +
- +_libraries = {}
- +_libraries['magic'] = _init()
- +
- +# Flag constants for open and setflags
- +MAGIC_NONE = NONE = 0
- +MAGIC_DEBUG = DEBUG = 1
- +MAGIC_SYMLINK = SYMLINK = 2
- +MAGIC_COMPRESS = COMPRESS = 4
- +MAGIC_DEVICES = DEVICES = 8
- +MAGIC_MIME_TYPE = MIME_TYPE = 16
- +MAGIC_CONTINUE = CONTINUE = 32
- +MAGIC_CHECK = CHECK = 64
- +MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
- +MAGIC_RAW = RAW = 256
- +MAGIC_ERROR = ERROR = 512
- +MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
- +MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING
- +MAGIC_APPLE = APPLE = 2048
- +
- +MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
- +MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
- +MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
- +MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
- +MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
- +MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
- +MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
- +MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
- +MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
- +
- +MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
- +
- +FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
- +
- +
- +class magic_set(Structure):
- + pass
- +magic_set._fields_ = []
- +magic_t = POINTER(magic_set)
- +
- +_open = _libraries['magic'].magic_open
- +_open.restype = magic_t
- +_open.argtypes = [c_int]
- +
- +_close = _libraries['magic'].magic_close
- +_close.restype = None
- +_close.argtypes = [magic_t]
- +
- +_file = _libraries['magic'].magic_file
- +_file.restype = c_char_p
- +_file.argtypes = [magic_t, c_char_p]
- +
- +_descriptor = _libraries['magic'].magic_descriptor
- +_descriptor.restype = c_char_p
- +_descriptor.argtypes = [magic_t, c_int]
- +
- +_buffer = _libraries['magic'].magic_buffer
- +_buffer.restype = c_char_p
- +_buffer.argtypes = [magic_t, c_void_p, c_size_t]
- +
- +_error = _libraries['magic'].magic_error
- +_error.restype = c_char_p
- +_error.argtypes = [magic_t]
- +
- +_setflags = _libraries['magic'].magic_setflags
- +_setflags.restype = c_int
- +_setflags.argtypes = [magic_t, c_int]
- +
- +_load = _libraries['magic'].magic_load
- +_load.restype = c_int
- +_load.argtypes = [magic_t, c_char_p]
- +
- +_compile = _libraries['magic'].magic_compile
- +_compile.restype = c_int
- +_compile.argtypes = [magic_t, c_char_p]
- +
- +_check = _libraries['magic'].magic_check
- +_check.restype = c_int
- +_check.argtypes = [magic_t, c_char_p]
- +
- +_list = _libraries['magic'].magic_list
- +_list.restype = c_int
- +_list.argtypes = [magic_t, c_char_p]
- +
- +_errno = _libraries['magic'].magic_errno
- +_errno.restype = c_int
- +_errno.argtypes = [magic_t]
- +
- +
- +class Magic(object):
- + def __init__(self, ms):
- + self._magic_t = ms
- +
- + def close(self):
- + """
- + Closes the magic database and deallocates any resources used.
- + """
- + _close(self._magic_t)
- +
- + @staticmethod
- + def __tostr(s):
- + if s is None:
- + return None
- + if isinstance(s, str):
- + return s
- + try: # keep Python 2 compatibility
- + return str(s, 'utf-8')
- + except TypeError:
- + return str(s)
- +
- + @staticmethod
- + def __tobytes(b):
- + if b is None:
- + return None
- + if isinstance(b, bytes):
- + return b
- + try: # keep Python 2 compatibility
- + return bytes(b, 'utf-8')
- + except TypeError:
- + return bytes(b)
- +
- + def file(self, filename):
- + """
- + Returns a textual description of the contents of the argument passed
- + as a filename or None if an error occurred and the MAGIC_ERROR flag
- + is set. A call to errno() will return the numeric error code.
- + """
- + return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
- +
- + def descriptor(self, fd):
- + """
- + Returns a textual description of the contents of the argument passed
- + as a file descriptor or None if an error occurred and the MAGIC_ERROR
- + flag is set. A call to errno() will return the numeric error code.
- + """
- + return Magic.__tostr(_descriptor(self._magic_t, fd))
- +
- + def buffer(self, buf):
- + """
- + Returns a textual description of the contents of the argument passed
- + as a buffer or None if an error occurred and the MAGIC_ERROR flag
- + is set. A call to errno() will return the numeric error code.
- + """
- + return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
- +
- + def error(self):
- + """
- + Returns a textual explanation of the last error or None
- + if there was no error.
- + """
- + return Magic.__tostr(_error(self._magic_t))
- +
- + def setflags(self, flags):
- + """
- + Set flags on the magic object which determine how magic checking
- + behaves; a bitwise OR of the flags described in libmagic(3), but
- + without the MAGIC_ prefix.
- +
- + Returns -1 on systems that don't support utime(2) or utimes(2)
- + when PRESERVE_ATIME is set.
- + """
- + return _setflags(self._magic_t, flags)
- +
- + def load(self, filename=None):
- + """
- + Must be called to load entries in the colon separated list of database
- + files passed as argument or the default database file if no argument
- + before any magic queries can be performed.
- +
- + Returns 0 on success and -1 on failure.
- + """
- + return _load(self._magic_t, Magic.__tobytes(filename))
- +
- + def compile(self, dbs):
- + """
- + Compile entries in the colon separated list of database files
- + passed as argument or the default database file if no argument.
- + The compiled files created are named from the basename(1) of each file
- + argument with ".mgc" appended to it.
- +
- + Returns 0 on success and -1 on failure.
- + """
- + return _compile(self._magic_t, Magic.__tobytes(dbs))
- +
- + def check(self, dbs):
- + """
- + Check the validity of entries in the colon separated list of
- + database files passed as argument or the default database file
- + if no argument.
- +
- + Returns 0 on success and -1 on failure.
- + """
- + return _check(self._magic_t, Magic.__tobytes(dbs))
- +
- + def list(self, dbs):
- + """
- + Check the validity of entries in the colon separated list of
- + database files passed as argument or the default database file
- + if no argument.
- +
- + Returns 0 on success and -1 on failure.
- + """
- + return _list(self._magic_t, Magic.__tobytes(dbs))
- +
- + def errno(self):
- + """
- + Returns a numeric error code. If return value is 0, an internal
- + magic error occurred. If return value is non-zero, the value is
- + an OS error code. Use the errno module or os.strerror() can be used
- + to provide detailed error information.
- + """
- + return _errno(self._magic_t)
- +
- +
- +def open(flags):
- + """
- + Returns a magic object on success and None on failure.
- + Flags argument as for setflags.
- + """
- + return Magic(_open(flags))
- +
- +
- +# Objects used by `detect_from_` functions
- +mime_magic = Magic(_open(MAGIC_MIME))
- +mime_magic.load()
- +none_magic = Magic(_open(MAGIC_NONE))
- +none_magic.load()
- +
- +
- +def _create_filemagic(mime_detected, type_detected):
- + mime_type, mime_encoding = mime_detected.split('; ')
- +
- + return FileMagic(name=type_detected, mime_type=mime_type,
- + encoding=mime_encoding.replace('charset=', ''))
- +
- +
- +def detect_from_filename(filename):
- + '''Detect mime type, encoding and file type from a filename
- +
- + Returns a `FileMagic` namedtuple.
- + '''
- +
- + return _create_filemagic(mime_magic.file(filename),
- + none_magic.file(filename))
- +
- +
- +def detect_from_fobj(fobj):
- + '''Detect mime type, encoding and file type from file-like object
- +
- + Returns a `FileMagic` namedtuple.
- + '''
- +
- + file_descriptor = fobj.fileno()
- + return _create_filemagic(mime_magic.descriptor(file_descriptor),
- + none_magic.descriptor(file_descriptor))
- +
- +
- +def detect_from_content(byte_content):
- + '''Detect mime type, encoding and file type from bytes
- +
- + Returns a `FileMagic` namedtuple.
- + '''
- +
- + return _create_filemagic(mime_magic.buffer(byte_content),
- + none_magic.buffer(byte_content))
- --- a/setup.py
- +++ b/setup.py
- @@ -8,8 +8,8 @@
- author='Adam Hupp',
- author_email='adam@hupp.org',
- url="http://github.com/ahupp/python-magic",
- - version='0.4.15',
- - py_modules=['magic'],
- + version='0.4.16',
- + packages=['magic'],
- long_description="""This module uses ctypes to access the libmagic file type
- identification library. It makes use of the local magic database and
- supports both textual and MIME-type output.
- --- /dev/null
- +++ b/test/libmagic_test.py
- @@ -0,0 +1,39 @@
- +# coding: utf-8
- +
- +import unittest
- +
- +import magic
- +
- +
- +class MagicTestCase(unittest.TestCase):
- +
- + filename = 'test/testdata/test.pdf'
- + expected_mime_type = 'application/pdf'
- + expected_encoding = 'us-ascii'
- + expected_name = 'PDF document, version 1.2'
- +
- + def assert_result(self, result):
- + self.assertEqual(result.mime_type, self.expected_mime_type)
- + self.assertEqual(result.encoding, self.expected_encoding)
- + self.assertEqual(result.name, self.expected_name)
- +
- + def test_detect_from_filename(self):
- + result = magic.detect_from_filename(self.filename)
- + self.assert_result(result)
- +
- + def test_detect_from_fobj(self):
- + with open(self.filename) as fobj:
- + result = magic.detect_from_fobj(fobj)
- + self.assert_result(result)
- +
- + def test_detect_from_content(self):
- + # differ from upstream by opening file in binary mode,
- + # this avoids hitting a bug in python3+libfile bindings
- + # see https://github.com/ahupp/python-magic/issues/152
- + # for a similar issue
- + with open(self.filename, 'rb') as fobj:
- + result = magic.detect_from_content(fobj.read(4096))
- + self.assert_result(result)
- +
- +if __name__ == '__main__':
- + unittest.main()
- --- a/test/run.sh
- +++ b/test/run.sh
- @@ -8,7 +8,10 @@
-
- echo "python2.6"
- python2.6 ${THISDIR}/test.py
- +python2.6 ${THISDIR}/libmagic_test.py
- echo "python2.7"
- python2.7 ${THISDIR}/test.py
- -echo "python3.0"
- +python2.7 ${THISDIR}/libmagic_test.py
- +echo "python3"
- python3 ${THISDIR}/test.py
- +python3 ${THISDIR}/libmagic_test.py
- --- a/test/test.py
- +++ b/test/test.py
- @@ -37,7 +37,13 @@
- self.assertEqual("text/x-python", m.from_buffer(s))
- b = b'#!/usr/bin/env python\nprint("foo")'
- self.assertEqual("text/x-python", m.from_buffer(b))
- -
- +
- +
- + def test_open_file(self):
- + m = magic.Magic(mime=True)
- + with open(os.path.join(self.TESTDATA_DIR, "test.pdf")) as f:
- + self.assertEqual("application/pdf", m.from_open_file(f))
- +
- def test_mime_types(self):
- dest = os.path.join(MagicTest.TESTDATA_DIR, b'\xce\xbb'.decode('utf-8'))
- shutil.copyfile(os.path.join(MagicTest.TESTDATA_DIR, 'lambda'), dest)
|