| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469 | """magic is a wrapper around the libmagic file identification library.See README for more information.Usage:>>> import magic>>> magic.from_file("testdata/test.pdf")'PDF document, version 1.2'>>> magic.from_file("testdata/test.pdf", mime=True)'application/pdf'>>> magic.from_buffer(open("testdata/test.pdf").read(1024))'PDF document, version 1.2'>>>"""import sysimport globimport ctypesimport ctypes.utilimport threadingimport loggingfrom ctypes import c_char_p, c_int, c_size_t, c_void_p, byref, POINTER# avoid shadowing the real open with the version from compat.py_real_open = openclass MagicException(Exception):    def __init__(self, message):        super(Exception, self).__init__(message)        self.message = messageclass Magic:    """    Magic is a wrapper around the libmagic C library.    """    def __init__(self, mime=False, magic_file=None, mime_encoding=False,                 keep_going=False, uncompress=False, raw=False, extension=False):        """        Create a new libmagic wrapper.        mime - if True, mimetypes are returned instead of textual descriptions        mime_encoding - if True, codec is returned        magic_file - use a mime database other than the system default        keep_going - don't stop at the first match, keep going        uncompress - Try to look inside compressed files.        raw - Do not try to decode "non-printable" chars.        extension - Print a slash-separated list of valid extensions for the file type found.        """        self.flags = MAGIC_NONE        if mime:            self.flags |= MAGIC_MIME_TYPE        if mime_encoding:            self.flags |= MAGIC_MIME_ENCODING        if keep_going:            self.flags |= MAGIC_CONTINUE        if uncompress:            self.flags |= MAGIC_COMPRESS        if raw:            self.flags |= MAGIC_RAW        if extension:            self.flags |= MAGIC_EXTENSION        self.cookie = magic_open(self.flags)        self.lock = threading.Lock()        magic_load(self.cookie, magic_file)        # MAGIC_EXTENSION was added in 523 or 524, so bail if        # it doesn't appear to be available        if extension and (not _has_version or version() < 524):            raise NotImplementedError('MAGIC_EXTENSION is not supported in this version of libmagic')        # For https://github.com/ahupp/python-magic/issues/190        # libmagic has fixed internal limits that some files exceed, causing        # an error.  We can avoid this (at least for the sample file given)        # by bumping the limit up.  It's not clear if this is a general solution        # or whether other internal limits should be increased, but given        # the lack of other reports I'll assume this is rare.        if _has_param:            try:                self.setparam(MAGIC_PARAM_NAME_MAX, 64)            except MagicException as e:                # some versions of libmagic fail this call,                # so rather than fail hard just use default behavior                pass    def from_buffer(self, buf):        """        Identify the contents of `buf`        """        with self.lock:            try:                # if we're on python3, convert buf to bytes                # otherwise this string is passed as wchar*                # which is not what libmagic expects                # NEXTBREAK: only take bytes                if type(buf) == str and str != bytes:                    buf = buf.encode('utf-8', errors='replace')                return maybe_decode(magic_buffer(self.cookie, buf))            except MagicException as e:                return self._handle509Bug(e)    def from_file(self, filename):        # raise FileNotFoundException or IOError if the file does not exist        with _real_open(filename):            pass        with self.lock:            try:                return maybe_decode(magic_file(self.cookie, filename))            except MagicException as e:                return self._handle509Bug(e)    def from_descriptor(self, fd):        with self.lock:            try:                return maybe_decode(magic_descriptor(self.cookie, fd))            except MagicException as e:                return self._handle509Bug(e)    def _handle509Bug(self, e):        # libmagic 5.09 has a bug where it might fail to identify the        # mimetype of a file and returns null from magic_file (and        # likely _buffer), but also does not return an error message.        if e.message is None and (self.flags & MAGIC_MIME_TYPE):            return "application/octet-stream"        else:            raise e    def setparam(self, param, val):        return magic_setparam(self.cookie, param, val)    def getparam(self, param):        return magic_getparam(self.cookie, param)    def __del__(self):        # no _thread_check here because there can be no other        # references to this object at this point.        # during shutdown magic_close may have been cleared already so        # make sure it exists before using it.        # the self.cookie check should be unnecessary and was an        # incorrect fix for a threading problem, however I'm leaving        # it in because it's harmless and I'm slightly afraid to        # remove it.        if hasattr(self, 'cookie') and self.cookie and magic_close:            magic_close(self.cookie)            self.cookie = None_instances = {}def _get_magic_type(mime):    i = _instances.get(mime)    if i is None:        i = _instances[mime] = Magic(mime=mime)    return idef from_file(filename, mime=False):    """"    Accepts a filename and returns the detected filetype.  Return    value is the mimetype if mime=True, otherwise a human readable    name.    >>> magic.from_file("testdata/test.pdf", mime=True)    'application/pdf'    """    m = _get_magic_type(mime)    return m.from_file(filename)def from_buffer(buffer, mime=False):    """    Accepts a binary string and returns the detected filetype.  Return    value is the mimetype if mime=True, otherwise a human readable    name.    >>> magic.from_buffer(open("testdata/test.pdf").read(1024))    'PDF document, version 1.2'    """    m = _get_magic_type(mime)    return m.from_buffer(buffer)def from_descriptor(fd, mime=False):    """    Accepts a file descriptor and returns the detected filetype.  Return    value is the mimetype if mime=True, otherwise a human readable    name.    >>> f = open("testdata/test.pdf")    >>> magic.from_descriptor(f.fileno())    'PDF document, version 1.2'    """    m = _get_magic_type(mime)    return m.from_descriptor(fd)from . import loaderlibmagic = loader.load_lib()magic_t = ctypes.c_void_pdef errorcheck_null(result, func, args):    if result is None:        err = magic_error(args[0])        raise MagicException(err)    else:        return resultdef errorcheck_negative_one(result, func, args):    if result == -1:        err = magic_error(args[0])        raise MagicException(err)    else:        return result# return str on python3.  Don't want to unconditionally# decode because that results in unicode on python2def maybe_decode(s):    # NEXTBREAK: remove    if str == bytes:        return s    else:        # backslashreplace here because sometimes libmagic will return metadata in the charset        # of the file, which is unknown to us (e.g the title of a Word doc)        return s.decode('utf-8', 'backslashreplace')try:    from os import PathLike    def unpath(filename):        if isinstance(filename, PathLike):            return filename.__fspath__()        else:            return filenameexcept ImportError:    def unpath(filename):        return filenamedef coerce_filename(filename):    if filename is None:        return None    filename = unpath(filename)    # ctypes will implicitly convert unicode strings to bytes with    # .encode('ascii').  If you use the filesystem encoding    # then you'll get inconsistent behavior (crashes) depending on the user's    # LANG environment variable    # NEXTBREAK: remove    is_unicode = (sys.version_info[0] <= 2 and                 isinstance(filename, unicode)) or \                 (sys.version_info[0] >= 3 and                  isinstance(filename, str))    if is_unicode:        return filename.encode('utf-8', 'surrogateescape')    else:        return filenamemagic_open = libmagic.magic_openmagic_open.restype = magic_tmagic_open.argtypes = [c_int]magic_close = libmagic.magic_closemagic_close.restype = Nonemagic_close.argtypes = [magic_t]magic_error = libmagic.magic_errormagic_error.restype = c_char_pmagic_error.argtypes = [magic_t]magic_errno = libmagic.magic_errnomagic_errno.restype = c_intmagic_errno.argtypes = [magic_t]_magic_file = libmagic.magic_file_magic_file.restype = c_char_p_magic_file.argtypes = [magic_t, c_char_p]_magic_file.errcheck = errorcheck_nulldef magic_file(cookie, filename):    return _magic_file(cookie, coerce_filename(filename))_magic_buffer = libmagic.magic_buffer_magic_buffer.restype = c_char_p_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]_magic_buffer.errcheck = errorcheck_nulldef magic_buffer(cookie, buf):    return _magic_buffer(cookie, buf, len(buf))magic_descriptor = libmagic.magic_descriptormagic_descriptor.restype = c_char_pmagic_descriptor.argtypes = [magic_t, c_int]magic_descriptor.errcheck = errorcheck_null_magic_descriptor = libmagic.magic_descriptor_magic_descriptor.restype = c_char_p_magic_descriptor.argtypes = [magic_t, c_int]_magic_descriptor.errcheck = errorcheck_nulldef magic_descriptor(cookie, fd):    return _magic_descriptor(cookie, fd)_magic_load = libmagic.magic_load_magic_load.restype = c_int_magic_load.argtypes = [magic_t, c_char_p]_magic_load.errcheck = errorcheck_negative_onedef magic_load(cookie, filename):    return _magic_load(cookie, coerce_filename(filename))magic_setflags = libmagic.magic_setflagsmagic_setflags.restype = c_intmagic_setflags.argtypes = [magic_t, c_int]magic_check = libmagic.magic_checkmagic_check.restype = c_intmagic_check.argtypes = [magic_t, c_char_p]magic_compile = libmagic.magic_compilemagic_compile.restype = c_intmagic_compile.argtypes = [magic_t, c_char_p]_has_param = Falseif hasattr(libmagic, 'magic_setparam') and hasattr(libmagic, 'magic_getparam'):    _has_param = True    _magic_setparam = libmagic.magic_setparam    _magic_setparam.restype = c_int    _magic_setparam.argtypes = [magic_t, c_int, POINTER(c_size_t)]    _magic_setparam.errcheck = errorcheck_negative_one    _magic_getparam = libmagic.magic_getparam    _magic_getparam.restype = c_int    _magic_getparam.argtypes = [magic_t, c_int, POINTER(c_size_t)]    _magic_getparam.errcheck = errorcheck_negative_onedef magic_setparam(cookie, param, val):    if not _has_param:        raise NotImplementedError("magic_setparam not implemented")    v = c_size_t(val)    return _magic_setparam(cookie, param, byref(v))def magic_getparam(cookie, param):    if not _has_param:        raise NotImplementedError("magic_getparam not implemented")    val = c_size_t()    _magic_getparam(cookie, param, byref(val))    return val.value_has_version = Falseif hasattr(libmagic, "magic_version"):    _has_version = True    magic_version = libmagic.magic_version    magic_version.restype = c_int    magic_version.argtypes = []def version():    if not _has_version:        raise NotImplementedError("magic_version not implemented")    return magic_version()MAGIC_NONE = 0x000000  # No flagsMAGIC_DEBUG = 0x000001  # Turn on debuggingMAGIC_SYMLINK = 0x000002  # Follow symlinksMAGIC_COMPRESS = 0x000004  # Check inside compressed filesMAGIC_DEVICES = 0x000008  # Look at the contents of devicesMAGIC_MIME_TYPE = 0x000010  # Return a mime stringMAGIC_MIME_ENCODING = 0x000400  # Return the MIME encoding# TODO:  should be# MAGIC_MIME = MAGIC_MIME_TYPE | MAGIC_MIME_ENCODINGMAGIC_MIME = 0x000010  # Return a mime stringMAGIC_EXTENSION = 0x1000000  # Return a /-separated list of extensionsMAGIC_CONTINUE = 0x000020  # Return all matchesMAGIC_CHECK = 0x000040  # Print warnings to stderrMAGIC_PRESERVE_ATIME = 0x000080  # Restore access time on exitMAGIC_RAW = 0x000100  # Don't translate unprintable charsMAGIC_ERROR = 0x000200  # Handle ENOENT etc as real errorsMAGIC_NO_CHECK_COMPRESS = 0x001000  # Don't check for compressed filesMAGIC_NO_CHECK_TAR = 0x002000  # Don't check for tar filesMAGIC_NO_CHECK_SOFT = 0x004000  # Don't check magic entriesMAGIC_NO_CHECK_APPTYPE = 0x008000  # Don't check application typeMAGIC_NO_CHECK_ELF = 0x010000  # Don't check for elf detailsMAGIC_NO_CHECK_ASCII = 0x020000  # Don't check for ascii filesMAGIC_NO_CHECK_TROFF = 0x040000  # Don't check ascii/troffMAGIC_NO_CHECK_FORTRAN = 0x080000  # Don't check ascii/fortranMAGIC_NO_CHECK_TOKENS = 0x100000  # Don't check ascii/tokensMAGIC_PARAM_INDIR_MAX = 0  # Recursion limit for indirect magicMAGIC_PARAM_NAME_MAX = 1  # Use count limit for name/use magicMAGIC_PARAM_ELF_PHNUM_MAX = 2  # Max ELF notes processedMAGIC_PARAM_ELF_SHNUM_MAX = 3  # Max ELF program sections processedMAGIC_PARAM_ELF_NOTES_MAX = 4  # # Max ELF sections processedMAGIC_PARAM_REGEX_MAX = 5  # Length limit for regex searchesMAGIC_PARAM_BYTES_MAX = 6  # Max number of bytes to read from file# This package name conflicts with the one provided by upstream# libmagic.  This is a common source of confusion for users.  To# resolve, We ship a copy of that module, and expose it's functions# wrapped in deprecation warnings.def _add_compat(to_module):    import warnings, re    from magic import compat    def deprecation_wrapper(fn):        def _(*args, **kwargs):            warnings.warn(                "Using compatibility mode with libmagic's python binding. "                "See https://github.com/ahupp/python-magic/blob/master/COMPAT.md for details.",                PendingDeprecationWarning)            return fn(*args, **kwargs)        return _    fn = ['detect_from_filename',          'detect_from_content',          'detect_from_fobj',          'open']    for fname in fn:        to_module[fname] = deprecation_wrapper(compat.__dict__[fname])    # copy constants over, ensuring there's no conflicts    is_const_re = re.compile("^[A-Z_]+$")    allowed_inconsistent = set(['MAGIC_MIME'])    for name, value in compat.__dict__.items():        if is_const_re.match(name):            if name in to_module:                if name in allowed_inconsistent:                    continue                if to_module[name] != value:                    raise Exception("inconsistent value for " + name)                else:                    continue            else:                to_module[name] = value_add_compat(globals())
 |