123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361 |
- # coding: utf-8
- '''
- Python bindings for libmagic
- '''
- import ctypes
- import threading
- from collections import namedtuple
- from ctypes import *
- from ctypes.util import find_library
- def _init():
- """
- Loads the shared library through ctypes and returns a library
- L{ctypes.CDLL} instance
- """
- return ctypes.cdll.LoadLibrary(find_library('magic'))
- _libraries = {}
- _libraries['magic'] = _init()
- # Flag constants for open and setflags
- MAGIC_NONE = NONE = 0
- MAGIC_DEBUG = DEBUG = 1
- MAGIC_SYMLINK = SYMLINK = 2
- MAGIC_COMPRESS = COMPRESS = 4
- MAGIC_DEVICES = DEVICES = 8
- MAGIC_MIME_TYPE = MIME_TYPE = 16
- MAGIC_CONTINUE = CONTINUE = 32
- MAGIC_CHECK = CHECK = 64
- MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
- MAGIC_RAW = RAW = 256
- MAGIC_ERROR = ERROR = 512
- MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
- MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING
- MAGIC_APPLE = APPLE = 2048
- MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
- MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
- MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
- MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
- MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
- MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
- MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
- MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
- MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
- MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
- MAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0
- MAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1
- MAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2
- MAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3
- MAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4
- MAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5
- MAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6
- FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
- class magic_set(Structure):
- pass
- magic_set._fields_ = []
- magic_t = POINTER(magic_set)
- _open = _libraries['magic'].magic_open
- _open.restype = magic_t
- _open.argtypes = [c_int]
- _close = _libraries['magic'].magic_close
- _close.restype = None
- _close.argtypes = [magic_t]
- _file = _libraries['magic'].magic_file
- _file.restype = c_char_p
- _file.argtypes = [magic_t, c_char_p]
- _descriptor = _libraries['magic'].magic_descriptor
- _descriptor.restype = c_char_p
- _descriptor.argtypes = [magic_t, c_int]
- _buffer = _libraries['magic'].magic_buffer
- _buffer.restype = c_char_p
- _buffer.argtypes = [magic_t, c_void_p, c_size_t]
- _error = _libraries['magic'].magic_error
- _error.restype = c_char_p
- _error.argtypes = [magic_t]
- _setflags = _libraries['magic'].magic_setflags
- _setflags.restype = c_int
- _setflags.argtypes = [magic_t, c_int]
- _load = _libraries['magic'].magic_load
- _load.restype = c_int
- _load.argtypes = [magic_t, c_char_p]
- _compile = _libraries['magic'].magic_compile
- _compile.restype = c_int
- _compile.argtypes = [magic_t, c_char_p]
- _check = _libraries['magic'].magic_check
- _check.restype = c_int
- _check.argtypes = [magic_t, c_char_p]
- _list = _libraries['magic'].magic_list
- _list.restype = c_int
- _list.argtypes = [magic_t, c_char_p]
- _errno = _libraries['magic'].magic_errno
- _errno.restype = c_int
- _errno.argtypes = [magic_t]
- _getparam = _libraries['magic'].magic_getparam
- _getparam.restype = c_int
- _getparam.argtypes = [magic_t, c_int, c_void_p]
- _setparam = _libraries['magic'].magic_setparam
- _setparam.restype = c_int
- _setparam.argtypes = [magic_t, c_int, c_void_p]
- class Magic(object):
- def __init__(self, ms):
- self._magic_t = ms
- def close(self):
- """
- Closes the magic database and deallocates any resources used.
- """
- _close(self._magic_t)
- @staticmethod
- def __tostr(s):
- if s is None:
- return None
- if isinstance(s, str):
- return s
- try: # keep Python 2 compatibility
- return str(s, 'utf-8')
- except TypeError:
- return str(s)
- @staticmethod
- def __tobytes(b):
- if b is None:
- return None
- if isinstance(b, bytes):
- return b
- try: # keep Python 2 compatibility
- return bytes(b, 'utf-8')
- except TypeError:
- return bytes(b)
- def file(self, filename):
- """
- Returns a textual description of the contents of the argument passed
- as a filename or None if an error occurred and the MAGIC_ERROR flag
- is set. A call to errno() will return the numeric error code.
- """
- return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
- def descriptor(self, fd):
- """
- Returns a textual description of the contents of the argument passed
- as a file descriptor or None if an error occurred and the MAGIC_ERROR
- flag is set. A call to errno() will return the numeric error code.
- """
- return Magic.__tostr(_descriptor(self._magic_t, fd))
- def buffer(self, buf):
- """
- Returns a textual description of the contents of the argument passed
- as a buffer or None if an error occurred and the MAGIC_ERROR flag
- is set. A call to errno() will return the numeric error code.
- """
- return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
- def error(self):
- """
- Returns a textual explanation of the last error or None
- if there was no error.
- """
- return Magic.__tostr(_error(self._magic_t))
- def setflags(self, flags):
- """
- Set flags on the magic object which determine how magic checking
- behaves; a bitwise OR of the flags described in libmagic(3), but
- without the MAGIC_ prefix.
- Returns -1 on systems that don't support utime(2) or utimes(2)
- when PRESERVE_ATIME is set.
- """
- return _setflags(self._magic_t, flags)
- def load(self, filename=None):
- """
- Must be called to load entries in the colon separated list of database
- files passed as argument or the default database file if no argument
- before any magic queries can be performed.
- Returns 0 on success and -1 on failure.
- """
- return _load(self._magic_t, Magic.__tobytes(filename))
- def compile(self, dbs):
- """
- Compile entries in the colon separated list of database files
- passed as argument or the default database file if no argument.
- The compiled files created are named from the basename(1) of each file
- argument with ".mgc" appended to it.
- Returns 0 on success and -1 on failure.
- """
- return _compile(self._magic_t, Magic.__tobytes(dbs))
- def check(self, dbs):
- """
- Check the validity of entries in the colon separated list of
- database files passed as argument or the default database file
- if no argument.
- Returns 0 on success and -1 on failure.
- """
- return _check(self._magic_t, Magic.__tobytes(dbs))
- def list(self, dbs):
- """
- Check the validity of entries in the colon separated list of
- database files passed as argument or the default database file
- if no argument.
- Returns 0 on success and -1 on failure.
- """
- return _list(self._magic_t, Magic.__tobytes(dbs))
- def errno(self):
- """
- Returns a numeric error code. If return value is 0, an internal
- magic error occurred. If return value is non-zero, the value is
- an OS error code. Use the errno module or os.strerror() can be used
- to provide detailed error information.
- """
- return _errno(self._magic_t)
- def getparam(self, param):
- """
- Returns the param value if successful and -1 if the parameter
- was unknown.
- """
- v = c_int()
- i = _getparam(self._magic_t, param, byref(v))
- if i == -1:
- return -1
- return v.value
- def setparam(self, param, value):
- """
- Returns 0 if successful and -1 if the parameter was unknown.
- """
- v = c_int(value)
- return _setparam(self._magic_t, param, byref(v))
- def open(flags):
- """
- Returns a magic object on success and None on failure.
- Flags argument as for setflags.
- """
- magic_t = _open(flags)
- if magic_t is None:
- return None
- return Magic(magic_t)
- # Objects used by `detect_from_` functions
- class error(Exception):
- pass
- class MagicDetect(object):
- def __init__(self):
- self.mime_magic = open(MAGIC_MIME)
- if self.mime_magic is None:
- raise error
- if self.mime_magic.load() == -1:
- self.mime_magic.close()
- self.mime_magic = None
- raise error
- self.none_magic = open(MAGIC_NONE)
- if self.none_magic is None:
- self.mime_magic.close()
- self.mime_magic = None
- raise error
- if self.none_magic.load() == -1:
- self.none_magic.close()
- self.none_magic = None
- self.mime_magic.close()
- self.mime_magic = None
- raise error
- def __del__(self):
- if self.mime_magic is not None:
- self.mime_magic.close()
- if self.none_magic is not None:
- self.none_magic.close()
- threadlocal = threading.local()
- def _detect_make():
- v = getattr(threadlocal, "magic_instance", None)
- if v is None:
- v = MagicDetect()
- setattr(threadlocal, "magic_instance", v)
- return v
- def _create_filemagic(mime_detected, type_detected):
- try:
- mime_type, mime_encoding = mime_detected.split('; ')
- except ValueError:
- raise ValueError(mime_detected)
- return FileMagic(name=type_detected, mime_type=mime_type,
- encoding=mime_encoding.replace('charset=', ''))
- def detect_from_filename(filename):
- '''Detect mime type, encoding and file type from a filename
- Returns a `FileMagic` namedtuple.
- '''
- x = _detect_make()
- return _create_filemagic(x.mime_magic.file(filename),
- x.none_magic.file(filename))
- def detect_from_fobj(fobj):
- '''Detect mime type, encoding and file type from file-like object
- Returns a `FileMagic` namedtuple.
- '''
- file_descriptor = fobj.fileno()
- x = _detect_make()
- return _create_filemagic(x.mime_magic.descriptor(file_descriptor),
- x.none_magic.descriptor(file_descriptor))
- def detect_from_content(byte_content):
- '''Detect mime type, encoding and file type from bytes
- Returns a `FileMagic` namedtuple.
- '''
- x = _detect_make()
- return _create_filemagic(x.mime_magic.buffer(byte_content),
- x.none_magic.buffer(byte_content))
|