Subject: libmagic compability Origin: libmagic-compat branch, commit 315cb4c Upstream-Author: Adam Hupp Date: Mon Dec 4 11:55:27 2017 -0800 Last-Updated: 2018-01-15 --- a/LICENSE +++ b/LICENSE @@ -19,3 +19,40 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +==== + +Portions of this package (magic/compat.py and test/libmagic_test.py) +are distributed under the following copyright notice: + + +$File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $ +Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995. +Software written by Ian F. Darwin and others; +maintained 1994- Christos Zoulas. + +This software is not subject to any export provision of the United States +Department of Commerce, and may be exported to any country or planet. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice immediately at the beginning of the file, without modification, + this list of conditions, and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. --- a/README.md +++ b/README.md @@ -45,9 +45,18 @@ Minor version bumps should be backwards compatible. Major bumps are not. -## Name Conflict +## Compatibility -There are, sadly, two libraries which use the module name `magic`. Both have been around for quite a while.If you are using this module and get an error using a method like `open`, your code is expecting the other one. Hopefully one day these will be reconciled. +There are, sadly, 3 libraries using the package name `magic`. The others are: + +1. libmagic itself distributes a `magic` python module with a somewhat +different API. python-magic includes a copy of this module to avoid +unnessary breakage when both versions are installed. Maybe someday +they will converge. + +2. python-libmagic also uses the same module name, and has a similar +but not identical API. If you run into errors about "magic.h" not +being present, you should uninstall python-libmagic. ## Installation @@ -64,7 +73,7 @@ You'll need DLLs for libmagic. @julian-r has uploaded a versoin of this project that includes binaries to pypi: https://pypi.python.org/pypi/python-magic-bin/0.4.14 -Other sources of the libraries in the past have been [File for Windows](http://gnuwin32.sourceforge.net/packages/file.htm) . You will need to copy the file `magic` out of `[binary-zip]\share\misc`, and pass it's location to `Magic(magic_file=...)`. +Other sources of the libraries in the past have been [File for Windows](http://gnuwin32.sourceforge.net/packages/file.htm) . You will need to copy the file `magic` out of `[binary-zip]\share\misc`, and pass its location to `Magic(magic_file=...)`. If you are using a 64-bit build of python, you'll need 64-bit libmagic binaries which can be found here: https://github.com/pidydx/libmagicwin64. Newer version can be found here: https://github.com/nscaife/file-windows. @@ -86,7 +95,7 @@ Attempting to run the 32-bit libmagic DLL in a 64-bit build of python will fail with this error. Here are 64-bit builds of libmagic for windows: https://github.com/pidydx/libmagicwin64 -- 'WindowsError: exception: access violation writing 0x00000000 ' This may indicate you are mixing +- 'WindowsError: exception: access violation writing 0x00000000 ' This may indicate you are mixing Windows Python and Cygwin Python. Make sure your libmagic and python builds are consistent. ## Author @@ -116,5 +125,3 @@ python-magic is distributed under the MIT license. See the included LICENSE file for details. - - --- a/magic.py +++ /dev/null @@ -1,301 +0,0 @@ -""" -magic is a wrapper around the libmagic file identification library. - -See README for more information. - -Usage: - ->>> import magic ->>> magic.from_file("testdata/test.pdf") -'PDF document, version 1.2' ->>> magic.from_file("testdata/test.pdf", mime=True) -'application/pdf' ->>> magic.from_buffer(open("testdata/test.pdf").read(1024)) -'PDF document, version 1.2' ->>> - - -""" - -import sys -import glob -import os.path -import ctypes -import ctypes.util -import threading - -from ctypes import c_char_p, c_int, c_size_t, c_void_p - - -class MagicException(Exception): - def __init__(self, message): - super(MagicException, self).__init__(message) - self.message = message - - -class Magic: - """ - Magic is a wrapper around the libmagic C library. - - """ - - def __init__(self, mime=False, magic_file=None, mime_encoding=False, - keep_going=False, uncompress=False): - """ - Create a new libmagic wrapper. - - mime - if True, mimetypes are returned instead of textual descriptions - mime_encoding - if True, codec is returned - magic_file - use a mime database other than the system default - keep_going - don't stop at the first match, keep going - uncompress - Try to look inside compressed files. - """ - self.flags = MAGIC_NONE - if mime: - self.flags |= MAGIC_MIME - if mime_encoding: - self.flags |= MAGIC_MIME_ENCODING - if keep_going: - self.flags |= MAGIC_CONTINUE - - if uncompress: - self.flags |= MAGIC_COMPRESS - - self.cookie = magic_open(self.flags) - self.lock = threading.Lock() - - magic_load(self.cookie, magic_file) - - def from_buffer(self, buf): - """ - Identify the contents of `buf` - """ - with self.lock: - try: - # if we're on python3, convert buf to bytes - # otherwise this string is passed as wchar* - # which is not what libmagic expects - if type(buf) == str and str != bytes: - buf = buf.encode('utf-8', errors='replace') - return maybe_decode(magic_buffer(self.cookie, buf)) - except MagicException as e: - return self._handle509Bug(e) - - def from_file(self, filename): - # raise FileNotFoundException or IOError if the file does not exist - with open(filename): - pass - with self.lock: - try: - return maybe_decode(magic_file(self.cookie, filename)) - except MagicException as e: - return self._handle509Bug(e) - - def _handle509Bug(self, e): - # libmagic 5.09 has a bug where it might fail to identify the - # mimetype of a file and returns null from magic_file (and - # likely _buffer), but also does not return an error message. - if e.message is None and (self.flags & MAGIC_MIME): - return "application/octet-stream" - else: - raise e - - def __del__(self): - # no _thread_check here because there can be no other - # references to this object at this point. - - # during shutdown magic_close may have been cleared already so - # make sure it exists before using it. - - # the self.cookie check should be unnecessary and was an - # incorrect fix for a threading problem, however I'm leaving - # it in because it's harmless and I'm slightly afraid to - # remove it. - if self.cookie and magic_close: - magic_close(self.cookie) - self.cookie = None - -_instances = {} - -def _get_magic_type(mime): - i = _instances.get(mime) - if i is None: - i = _instances[mime] = Magic(mime=mime) - return i - -def from_file(filename, mime=False): - """" - Accepts a filename and returns the detected filetype. Return - value is the mimetype if mime=True, otherwise a human readable - name. - - >>> magic.from_file("testdata/test.pdf", mime=True) - 'application/pdf' - """ - m = _get_magic_type(mime) - return m.from_file(filename) - -def from_buffer(buffer, mime=False): - """ - Accepts a binary string and returns the detected filetype. Return - value is the mimetype if mime=True, otherwise a human readable - name. - - >>> magic.from_buffer(open("testdata/test.pdf").read(1024)) - 'PDF document, version 1.2' - """ - m = _get_magic_type(mime) - return m.from_buffer(buffer) - - - - -libmagic = None -# Let's try to find magic or magic1 -dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1') - -# This is necessary because find_library returns None if it doesn't find the library -if dll: - libmagic = ctypes.CDLL(dll) - -if not libmagic or not libmagic._name: - windows_dlls = ['magic1.dll','cygmagic-1.dll'] - platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib', - '/usr/local/lib/libmagic.dylib'] + - # Assumes there will only be one version installed - glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'), - 'win32': windows_dlls, - 'cygwin': windows_dlls, - 'linux': ['libmagic.so.1'], # fallback for some Linuxes (e.g. Alpine) where library search does not work - } - platform = 'linux' if sys.platform.startswith('linux') else sys.platform - for dll in platform_to_lib.get(platform, []): - try: - libmagic = ctypes.CDLL(dll) - break - except OSError: - pass - -if not libmagic or not libmagic._name: - # It is better to raise an ImportError since we are importing magic module - raise ImportError('failed to find libmagic. Check your installation') - -magic_t = ctypes.c_void_p - -def errorcheck_null(result, func, args): - if result is None: - err = magic_error(args[0]) - raise MagicException(err) - else: - return result - -def errorcheck_negative_one(result, func, args): - if result is -1: - err = magic_error(args[0]) - raise MagicException(err) - else: - return result - - -# return str on python3. Don't want to unconditionally -# decode because that results in unicode on python2 -def maybe_decode(s): - if str == bytes: - return s - else: - return s.decode('utf-8') - -def coerce_filename(filename): - if filename is None: - return None - - # ctypes will implicitly convert unicode strings to bytes with - # .encode('ascii'). If you use the filesystem encoding - # then you'll get inconsistent behavior (crashes) depending on the user's - # LANG environment variable - is_unicode = (sys.version_info[0] <= 2 and - isinstance(filename, unicode)) or \ - (sys.version_info[0] >= 3 and - isinstance(filename, str)) - if is_unicode: - return filename.encode('utf-8', 'surrogateescape') - else: - return filename - -magic_open = libmagic.magic_open -magic_open.restype = magic_t -magic_open.argtypes = [c_int] - -magic_close = libmagic.magic_close -magic_close.restype = None -magic_close.argtypes = [magic_t] - -magic_error = libmagic.magic_error -magic_error.restype = c_char_p -magic_error.argtypes = [magic_t] - -magic_errno = libmagic.magic_errno -magic_errno.restype = c_int -magic_errno.argtypes = [magic_t] - -_magic_file = libmagic.magic_file -_magic_file.restype = c_char_p -_magic_file.argtypes = [magic_t, c_char_p] -_magic_file.errcheck = errorcheck_null - -def magic_file(cookie, filename): - return _magic_file(cookie, coerce_filename(filename)) - -_magic_buffer = libmagic.magic_buffer -_magic_buffer.restype = c_char_p -_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] -_magic_buffer.errcheck = errorcheck_null - -def magic_buffer(cookie, buf): - return _magic_buffer(cookie, buf, len(buf)) - - -_magic_load = libmagic.magic_load -_magic_load.restype = c_int -_magic_load.argtypes = [magic_t, c_char_p] -_magic_load.errcheck = errorcheck_negative_one - -def magic_load(cookie, filename): - return _magic_load(cookie, coerce_filename(filename)) - -magic_setflags = libmagic.magic_setflags -magic_setflags.restype = c_int -magic_setflags.argtypes = [magic_t, c_int] - -magic_check = libmagic.magic_check -magic_check.restype = c_int -magic_check.argtypes = [magic_t, c_char_p] - -magic_compile = libmagic.magic_compile -magic_compile.restype = c_int -magic_compile.argtypes = [magic_t, c_char_p] - - - -MAGIC_NONE = 0x000000 # No flags -MAGIC_DEBUG = 0x000001 # Turn on debugging -MAGIC_SYMLINK = 0x000002 # Follow symlinks -MAGIC_COMPRESS = 0x000004 # Check inside compressed files -MAGIC_DEVICES = 0x000008 # Look at the contents of devices -MAGIC_MIME = 0x000010 # Return a mime string -MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding -MAGIC_CONTINUE = 0x000020 # Return all matches -MAGIC_CHECK = 0x000040 # Print warnings to stderr -MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit -MAGIC_RAW = 0x000100 # Don't translate unprintable chars -MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors - -MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files -MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files -MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries -MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type -MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details -MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files -MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff -MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran -MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens --- /dev/null +++ b/magic/__init__.py @@ -0,0 +1,361 @@ +""" +magic is a wrapper around the libmagic file identification library. + +See README for more information. + +Usage: + +>>> import magic +>>> magic.from_file("testdata/test.pdf") +'PDF document, version 1.2' +>>> magic.from_file("testdata/test.pdf", mime=True) +'application/pdf' +>>> magic.from_buffer(open("testdata/test.pdf").read(1024)) +'PDF document, version 1.2' +>>> + + +""" + +import sys +import glob +import os.path +import ctypes +import ctypes.util +import threading +import logging + +from ctypes import c_char_p, c_int, c_size_t, c_void_p + +# avoid shadowing the real open with the version from compat.py +_real_open = open + +class MagicException(Exception): + def __init__(self, message): + super(MagicException, self).__init__(message) + self.message = message + + +class Magic: + """ + Magic is a wrapper around the libmagic C library. + + """ + + def __init__(self, mime=False, magic_file=None, mime_encoding=False, + keep_going=False, uncompress=False): + """ + Create a new libmagic wrapper. + + mime - if True, mimetypes are returned instead of textual descriptions + mime_encoding - if True, codec is returned + magic_file - use a mime database other than the system default + keep_going - don't stop at the first match, keep going + uncompress - Try to look inside compressed files. + """ + self.flags = MAGIC_NONE + if mime: + self.flags |= MAGIC_MIME + if mime_encoding: + self.flags |= MAGIC_MIME_ENCODING + if keep_going: + self.flags |= MAGIC_CONTINUE + + if uncompress: + self.flags |= MAGIC_COMPRESS + + self.cookie = magic_open(self.flags) + self.lock = threading.Lock() + + magic_load(self.cookie, magic_file) + + def from_buffer(self, buf): + """ + Identify the contents of `buf` + """ + with self.lock: + try: + # if we're on python3, convert buf to bytes + # otherwise this string is passed as wchar* + # which is not what libmagic expects + if type(buf) == str and str != bytes: + buf = buf.encode('utf-8', errors='replace') + return maybe_decode(magic_buffer(self.cookie, buf)) + except MagicException as e: + return self._handle509Bug(e) + + def from_open_file(self, open_file): + with self.lock: + try: + return maybe_decode(magic_descriptor(self.cookie, open_file.fileno())) + except MagicException as e: + return self._handle509Bug(e) + + def from_file(self, filename): + # raise FileNotFoundException or IOError if the file does not exist + with _real_open(filename): + pass + + with self.lock: + try: + return maybe_decode(magic_file(self.cookie, filename)) + except MagicException as e: + return self._handle509Bug(e) + + def _handle509Bug(self, e): + # libmagic 5.09 has a bug where it might fail to identify the + # mimetype of a file and returns null from magic_file (and + # likely _buffer), but also does not return an error message. + if e.message is None and (self.flags & MAGIC_MIME): + return "application/octet-stream" + else: + raise e + + def __del__(self): + # no _thread_check here because there can be no other + # references to this object at this point. + + # during shutdown magic_close may have been cleared already so + # make sure it exists before using it. + + # the self.cookie check should be unnecessary and was an + # incorrect fix for a threading problem, however I'm leaving + # it in because it's harmless and I'm slightly afraid to + # remove it. + if self.cookie and magic_close: + magic_close(self.cookie) + self.cookie = None + +_instances = {} + +def _get_magic_type(mime): + i = _instances.get(mime) + if i is None: + i = _instances[mime] = Magic(mime=mime) + return i + +def from_file(filename, mime=False): + """" + Accepts a filename and returns the detected filetype. Return + value is the mimetype if mime=True, otherwise a human readable + name. + + >>> magic.from_file("testdata/test.pdf", mime=True) + 'application/pdf' + """ + m = _get_magic_type(mime) + return m.from_file(filename) + +def from_buffer(buffer, mime=False): + """ + Accepts a binary string and returns the detected filetype. Return + value is the mimetype if mime=True, otherwise a human readable + name. + + >>> magic.from_buffer(open("testdata/test.pdf").read(1024)) + 'PDF document, version 1.2' + """ + m = _get_magic_type(mime) + return m.from_buffer(buffer) + + + + +libmagic = None +# Let's try to find magic or magic1 +dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1') + +# This is necessary because find_library returns None if it doesn't find the library +if dll: + libmagic = ctypes.CDLL(dll) + +if not libmagic or not libmagic._name: + windows_dlls = ['magic1.dll','cygmagic-1.dll'] + platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib', + '/usr/local/lib/libmagic.dylib'] + + # Assumes there will only be one version installed + glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'), + 'win32': windows_dlls, + 'cygwin': windows_dlls, + 'linux': ['libmagic.so.1'], # fallback for some Linuxes (e.g. Alpine) where library search does not work + } + platform = 'linux' if sys.platform.startswith('linux') else sys.platform + for dll in platform_to_lib.get(platform, []): + try: + libmagic = ctypes.CDLL(dll) + break + except OSError: + pass + +if not libmagic or not libmagic._name: + # It is better to raise an ImportError since we are importing magic module + raise ImportError('failed to find libmagic. Check your installation') + +magic_t = ctypes.c_void_p + +def errorcheck_null(result, func, args): + if result is None: + err = magic_error(args[0]) + raise MagicException(err) + else: + return result + +def errorcheck_negative_one(result, func, args): + if result is -1: + err = magic_error(args[0]) + raise MagicException(err) + else: + return result + + +# return str on python3. Don't want to unconditionally +# decode because that results in unicode on python2 +def maybe_decode(s): + if str == bytes: + return s + else: + return s.decode('utf-8') + +def coerce_filename(filename): + if filename is None: + return None + + # ctypes will implicitly convert unicode strings to bytes with + # .encode('ascii'). If you use the filesystem encoding + # then you'll get inconsistent behavior (crashes) depending on the user's + # LANG environment variable + is_unicode = (sys.version_info[0] <= 2 and + isinstance(filename, unicode)) or \ + (sys.version_info[0] >= 3 and + isinstance(filename, str)) + if is_unicode: + return filename.encode('utf-8', 'surrogateescape') + else: + return filename + +magic_open = libmagic.magic_open +magic_open.restype = magic_t +magic_open.argtypes = [c_int] + +magic_close = libmagic.magic_close +magic_close.restype = None +magic_close.argtypes = [magic_t] + +magic_error = libmagic.magic_error +magic_error.restype = c_char_p +magic_error.argtypes = [magic_t] + +magic_errno = libmagic.magic_errno +magic_errno.restype = c_int +magic_errno.argtypes = [magic_t] + +_magic_file = libmagic.magic_file +_magic_file.restype = c_char_p +_magic_file.argtypes = [magic_t, c_char_p] +_magic_file.errcheck = errorcheck_null + +def magic_file(cookie, filename): + return _magic_file(cookie, coerce_filename(filename)) + +_magic_buffer = libmagic.magic_buffer +_magic_buffer.restype = c_char_p +_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] +_magic_buffer.errcheck = errorcheck_null + +def magic_buffer(cookie, buf): + return _magic_buffer(cookie, buf, len(buf)) + +magic_descriptor = libmagic.magic_descriptor +magic_descriptor.restype = c_char_p +magic_descriptor.argtypes = [magic_t, c_int] +magic_descriptor.errcheck = errorcheck_null + +_magic_load = libmagic.magic_load +_magic_load.restype = c_int +_magic_load.argtypes = [magic_t, c_char_p] +_magic_load.errcheck = errorcheck_negative_one + +def magic_load(cookie, filename): + return _magic_load(cookie, coerce_filename(filename)) + +magic_setflags = libmagic.magic_setflags +magic_setflags.restype = c_int +magic_setflags.argtypes = [magic_t, c_int] + +magic_check = libmagic.magic_check +magic_check.restype = c_int +magic_check.argtypes = [magic_t, c_char_p] + +magic_compile = libmagic.magic_compile +magic_compile.restype = c_int +magic_compile.argtypes = [magic_t, c_char_p] + + + +MAGIC_NONE = 0x000000 # No flags +MAGIC_DEBUG = 0x000001 # Turn on debugging +MAGIC_SYMLINK = 0x000002 # Follow symlinks +MAGIC_COMPRESS = 0x000004 # Check inside compressed files +MAGIC_DEVICES = 0x000008 # Look at the contents of devices +MAGIC_MIME = 0x000010 # Return a mime string +MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding +MAGIC_CONTINUE = 0x000020 # Return all matches +MAGIC_CHECK = 0x000040 # Print warnings to stderr +MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit +MAGIC_RAW = 0x000100 # Don't translate unprintable chars +MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors + +MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files +MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files +MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries +MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type +MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details +MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files +MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff +MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran +MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens + +# This package name conflicts with the one provided by upstream +# libmagic. This is a common source of confusion for users. To +# resolve, We ship a copy of that module, and expose it's functions +# wrapped in deprecation warnings. +def add_compat(to_module): + + import warnings, re + from magic import compat + + def deprecation_wrapper(compat, fn, alternate): + def _(*args, **kwargs): + warnings.warn( + "Using compatability mode with libmagic's python binding", + DeprecationWarning) + + return compat[fn](*args, **kwargs) + return _ + + fn = [('detect_from_filename', 'magic.from_file'), + ('detect_from_content', 'magic.from_buffer'), + ('detect_from_fobj', 'magic.Magic.from_open_file'), + ('open', 'magic.Magic')] + for (fname, alternate) in fn: + # for now, disable the deprecation warning until theres clarity on + # what the merged module should look like + to_module[fname] = compat.__dict__.get(fname) + #to_module[fname] = deprecation_wrapper(compat.__dict__, fname, alternate) + + # copy constants over, ensuring there's no conflicts + is_const_re = re.compile("^[A-Z_]+$") + allowed_inconsistent = set(['MAGIC_MIME']) + for name, value in compat.__dict__.items(): + if is_const_re.match(name): + if name in to_module: + if name in allowed_inconsistent: + continue + if to_module[name] != value: + raise Exception("inconsistent value for " + name) + else: + continue + else: + to_module[name] = value + +add_compat(globals()) --- /dev/null +++ b/magic/compat.py @@ -0,0 +1,285 @@ +# coding: utf-8 + +''' +Python bindings for libmagic +''' + +import ctypes + +from collections import namedtuple + +from ctypes import * +from ctypes.util import find_library + + +def _init(): + """ + Loads the shared library through ctypes and returns a library + L{ctypes.CDLL} instance + """ + return ctypes.cdll.LoadLibrary(find_library('magic')) + +_libraries = {} +_libraries['magic'] = _init() + +# Flag constants for open and setflags +MAGIC_NONE = NONE = 0 +MAGIC_DEBUG = DEBUG = 1 +MAGIC_SYMLINK = SYMLINK = 2 +MAGIC_COMPRESS = COMPRESS = 4 +MAGIC_DEVICES = DEVICES = 8 +MAGIC_MIME_TYPE = MIME_TYPE = 16 +MAGIC_CONTINUE = CONTINUE = 32 +MAGIC_CHECK = CHECK = 64 +MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128 +MAGIC_RAW = RAW = 256 +MAGIC_ERROR = ERROR = 512 +MAGIC_MIME_ENCODING = MIME_ENCODING = 1024 +MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING +MAGIC_APPLE = APPLE = 2048 + +MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096 +MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192 +MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384 +MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768 +MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536 +MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072 +MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144 +MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576 +MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152 + +MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824 + +FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name')) + + +class magic_set(Structure): + pass +magic_set._fields_ = [] +magic_t = POINTER(magic_set) + +_open = _libraries['magic'].magic_open +_open.restype = magic_t +_open.argtypes = [c_int] + +_close = _libraries['magic'].magic_close +_close.restype = None +_close.argtypes = [magic_t] + +_file = _libraries['magic'].magic_file +_file.restype = c_char_p +_file.argtypes = [magic_t, c_char_p] + +_descriptor = _libraries['magic'].magic_descriptor +_descriptor.restype = c_char_p +_descriptor.argtypes = [magic_t, c_int] + +_buffer = _libraries['magic'].magic_buffer +_buffer.restype = c_char_p +_buffer.argtypes = [magic_t, c_void_p, c_size_t] + +_error = _libraries['magic'].magic_error +_error.restype = c_char_p +_error.argtypes = [magic_t] + +_setflags = _libraries['magic'].magic_setflags +_setflags.restype = c_int +_setflags.argtypes = [magic_t, c_int] + +_load = _libraries['magic'].magic_load +_load.restype = c_int +_load.argtypes = [magic_t, c_char_p] + +_compile = _libraries['magic'].magic_compile +_compile.restype = c_int +_compile.argtypes = [magic_t, c_char_p] + +_check = _libraries['magic'].magic_check +_check.restype = c_int +_check.argtypes = [magic_t, c_char_p] + +_list = _libraries['magic'].magic_list +_list.restype = c_int +_list.argtypes = [magic_t, c_char_p] + +_errno = _libraries['magic'].magic_errno +_errno.restype = c_int +_errno.argtypes = [magic_t] + + +class Magic(object): + def __init__(self, ms): + self._magic_t = ms + + def close(self): + """ + Closes the magic database and deallocates any resources used. + """ + _close(self._magic_t) + + @staticmethod + def __tostr(s): + if s is None: + return None + if isinstance(s, str): + return s + try: # keep Python 2 compatibility + return str(s, 'utf-8') + except TypeError: + return str(s) + + @staticmethod + def __tobytes(b): + if b is None: + return None + if isinstance(b, bytes): + return b + try: # keep Python 2 compatibility + return bytes(b, 'utf-8') + except TypeError: + return bytes(b) + + def file(self, filename): + """ + Returns a textual description of the contents of the argument passed + as a filename or None if an error occurred and the MAGIC_ERROR flag + is set. A call to errno() will return the numeric error code. + """ + return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename))) + + def descriptor(self, fd): + """ + Returns a textual description of the contents of the argument passed + as a file descriptor or None if an error occurred and the MAGIC_ERROR + flag is set. A call to errno() will return the numeric error code. + """ + return Magic.__tostr(_descriptor(self._magic_t, fd)) + + def buffer(self, buf): + """ + Returns a textual description of the contents of the argument passed + as a buffer or None if an error occurred and the MAGIC_ERROR flag + is set. A call to errno() will return the numeric error code. + """ + return Magic.__tostr(_buffer(self._magic_t, buf, len(buf))) + + def error(self): + """ + Returns a textual explanation of the last error or None + if there was no error. + """ + return Magic.__tostr(_error(self._magic_t)) + + def setflags(self, flags): + """ + Set flags on the magic object which determine how magic checking + behaves; a bitwise OR of the flags described in libmagic(3), but + without the MAGIC_ prefix. + + Returns -1 on systems that don't support utime(2) or utimes(2) + when PRESERVE_ATIME is set. + """ + return _setflags(self._magic_t, flags) + + def load(self, filename=None): + """ + Must be called to load entries in the colon separated list of database + files passed as argument or the default database file if no argument + before any magic queries can be performed. + + Returns 0 on success and -1 on failure. + """ + return _load(self._magic_t, Magic.__tobytes(filename)) + + def compile(self, dbs): + """ + Compile entries in the colon separated list of database files + passed as argument or the default database file if no argument. + The compiled files created are named from the basename(1) of each file + argument with ".mgc" appended to it. + + Returns 0 on success and -1 on failure. + """ + return _compile(self._magic_t, Magic.__tobytes(dbs)) + + def check(self, dbs): + """ + Check the validity of entries in the colon separated list of + database files passed as argument or the default database file + if no argument. + + Returns 0 on success and -1 on failure. + """ + return _check(self._magic_t, Magic.__tobytes(dbs)) + + def list(self, dbs): + """ + Check the validity of entries in the colon separated list of + database files passed as argument or the default database file + if no argument. + + Returns 0 on success and -1 on failure. + """ + return _list(self._magic_t, Magic.__tobytes(dbs)) + + def errno(self): + """ + Returns a numeric error code. If return value is 0, an internal + magic error occurred. If return value is non-zero, the value is + an OS error code. Use the errno module or os.strerror() can be used + to provide detailed error information. + """ + return _errno(self._magic_t) + + +def open(flags): + """ + Returns a magic object on success and None on failure. + Flags argument as for setflags. + """ + return Magic(_open(flags)) + + +# Objects used by `detect_from_` functions +mime_magic = Magic(_open(MAGIC_MIME)) +mime_magic.load() +none_magic = Magic(_open(MAGIC_NONE)) +none_magic.load() + + +def _create_filemagic(mime_detected, type_detected): + mime_type, mime_encoding = mime_detected.split('; ') + + return FileMagic(name=type_detected, mime_type=mime_type, + encoding=mime_encoding.replace('charset=', '')) + + +def detect_from_filename(filename): + '''Detect mime type, encoding and file type from a filename + + Returns a `FileMagic` namedtuple. + ''' + + return _create_filemagic(mime_magic.file(filename), + none_magic.file(filename)) + + +def detect_from_fobj(fobj): + '''Detect mime type, encoding and file type from file-like object + + Returns a `FileMagic` namedtuple. + ''' + + file_descriptor = fobj.fileno() + return _create_filemagic(mime_magic.descriptor(file_descriptor), + none_magic.descriptor(file_descriptor)) + + +def detect_from_content(byte_content): + '''Detect mime type, encoding and file type from bytes + + Returns a `FileMagic` namedtuple. + ''' + + return _create_filemagic(mime_magic.buffer(byte_content), + none_magic.buffer(byte_content)) --- a/setup.py +++ b/setup.py @@ -8,8 +8,8 @@ author='Adam Hupp', author_email='adam@hupp.org', url="http://github.com/ahupp/python-magic", - version='0.4.15', - py_modules=['magic'], + version='0.4.16', + packages=['magic'], long_description="""This module uses ctypes to access the libmagic file type identification library. It makes use of the local magic database and supports both textual and MIME-type output. --- /dev/null +++ b/test/libmagic_test.py @@ -0,0 +1,39 @@ +# coding: utf-8 + +import unittest + +import magic + + +class MagicTestCase(unittest.TestCase): + + filename = 'test/testdata/test.pdf' + expected_mime_type = 'application/pdf' + expected_encoding = 'us-ascii' + expected_name = 'PDF document, version 1.2' + + def assert_result(self, result): + self.assertEqual(result.mime_type, self.expected_mime_type) + self.assertEqual(result.encoding, self.expected_encoding) + self.assertEqual(result.name, self.expected_name) + + def test_detect_from_filename(self): + result = magic.detect_from_filename(self.filename) + self.assert_result(result) + + def test_detect_from_fobj(self): + with open(self.filename) as fobj: + result = magic.detect_from_fobj(fobj) + self.assert_result(result) + + def test_detect_from_content(self): + # differ from upstream by opening file in binary mode, + # this avoids hitting a bug in python3+libfile bindings + # see https://github.com/ahupp/python-magic/issues/152 + # for a similar issue + with open(self.filename, 'rb') as fobj: + result = magic.detect_from_content(fobj.read(4096)) + self.assert_result(result) + +if __name__ == '__main__': + unittest.main() --- a/test/run.sh +++ b/test/run.sh @@ -8,7 +8,10 @@ echo "python2.6" python2.6 ${THISDIR}/test.py +python2.6 ${THISDIR}/libmagic_test.py echo "python2.7" python2.7 ${THISDIR}/test.py -echo "python3.0" +python2.7 ${THISDIR}/libmagic_test.py +echo "python3" python3 ${THISDIR}/test.py +python3 ${THISDIR}/libmagic_test.py --- a/test/test.py +++ b/test/test.py @@ -37,7 +37,13 @@ self.assertEqual("text/x-python", m.from_buffer(s)) b = b'#!/usr/bin/env python\nprint("foo")' self.assertEqual("text/x-python", m.from_buffer(b)) - + + + def test_open_file(self): + m = magic.Magic(mime=True) + with open(os.path.join(self.TESTDATA_DIR, "test.pdf")) as f: + self.assertEqual("application/pdf", m.from_open_file(f)) + def test_mime_types(self): dest = os.path.join(MagicTest.TESTDATA_DIR, b'\xce\xbb'.decode('utf-8')) shutil.copyfile(os.path.join(MagicTest.TESTDATA_DIR, 'lambda'), dest)