magic.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. # coding: utf-8
  2. '''
  3. Python bindings for libmagic
  4. '''
  5. import ctypes
  6. from collections import namedtuple
  7. from ctypes import *
  8. from ctypes.util import find_library
  9. def _init():
  10. """
  11. Loads the shared library through ctypes and returns a library
  12. L{ctypes.CDLL} instance
  13. """
  14. return ctypes.cdll.LoadLibrary(find_library('magic'))
  15. _libraries = {}
  16. _libraries['magic'] = _init()
  17. # Flag constants for open and setflags
  18. MAGIC_NONE = NONE = 0
  19. MAGIC_DEBUG = DEBUG = 1
  20. MAGIC_SYMLINK = SYMLINK = 2
  21. MAGIC_COMPRESS = COMPRESS = 4
  22. MAGIC_DEVICES = DEVICES = 8
  23. MAGIC_MIME_TYPE = MIME_TYPE = 16
  24. MAGIC_CONTINUE = CONTINUE = 32
  25. MAGIC_CHECK = CHECK = 64
  26. MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
  27. MAGIC_RAW = RAW = 256
  28. MAGIC_ERROR = ERROR = 512
  29. MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
  30. MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING
  31. MAGIC_APPLE = APPLE = 2048
  32. MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
  33. MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
  34. MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
  35. MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
  36. MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
  37. MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
  38. MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
  39. MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
  40. MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
  41. MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
  42. FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
  43. class magic_set(Structure):
  44. pass
  45. magic_set._fields_ = []
  46. magic_t = POINTER(magic_set)
  47. _open = _libraries['magic'].magic_open
  48. _open.restype = magic_t
  49. _open.argtypes = [c_int]
  50. _close = _libraries['magic'].magic_close
  51. _close.restype = None
  52. _close.argtypes = [magic_t]
  53. _file = _libraries['magic'].magic_file
  54. _file.restype = c_char_p
  55. _file.argtypes = [magic_t, c_char_p]
  56. _descriptor = _libraries['magic'].magic_descriptor
  57. _descriptor.restype = c_char_p
  58. _descriptor.argtypes = [magic_t, c_int]
  59. _buffer = _libraries['magic'].magic_buffer
  60. _buffer.restype = c_char_p
  61. _buffer.argtypes = [magic_t, c_void_p, c_size_t]
  62. _error = _libraries['magic'].magic_error
  63. _error.restype = c_char_p
  64. _error.argtypes = [magic_t]
  65. _setflags = _libraries['magic'].magic_setflags
  66. _setflags.restype = c_int
  67. _setflags.argtypes = [magic_t, c_int]
  68. _load = _libraries['magic'].magic_load
  69. _load.restype = c_int
  70. _load.argtypes = [magic_t, c_char_p]
  71. _compile = _libraries['magic'].magic_compile
  72. _compile.restype = c_int
  73. _compile.argtypes = [magic_t, c_char_p]
  74. _check = _libraries['magic'].magic_check
  75. _check.restype = c_int
  76. _check.argtypes = [magic_t, c_char_p]
  77. _list = _libraries['magic'].magic_list
  78. _list.restype = c_int
  79. _list.argtypes = [magic_t, c_char_p]
  80. _errno = _libraries['magic'].magic_errno
  81. _errno.restype = c_int
  82. _errno.argtypes = [magic_t]
  83. class Magic(object):
  84. def __init__(self, ms):
  85. self._magic_t = ms
  86. def close(self):
  87. """
  88. Closes the magic database and deallocates any resources used.
  89. """
  90. _close(self._magic_t)
  91. @staticmethod
  92. def __tostr(s):
  93. if s is None:
  94. return None
  95. if isinstance(s, str):
  96. return s
  97. try: # keep Python 2 compatibility
  98. return str(s, 'utf-8')
  99. except TypeError:
  100. return str(s)
  101. @staticmethod
  102. def __tobytes(b):
  103. if b is None:
  104. return None
  105. if isinstance(b, bytes):
  106. return b
  107. try: # keep Python 2 compatibility
  108. return bytes(b, 'utf-8')
  109. except TypeError:
  110. return bytes(b)
  111. def file(self, filename):
  112. """
  113. Returns a textual description of the contents of the argument passed
  114. as a filename or None if an error occurred and the MAGIC_ERROR flag
  115. is set. A call to errno() will return the numeric error code.
  116. """
  117. return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
  118. def descriptor(self, fd):
  119. """
  120. Returns a textual description of the contents of the argument passed
  121. as a file descriptor or None if an error occurred and the MAGIC_ERROR
  122. flag is set. A call to errno() will return the numeric error code.
  123. """
  124. return Magic.__tostr(_descriptor(self._magic_t, fd))
  125. def buffer(self, buf):
  126. """
  127. Returns a textual description of the contents of the argument passed
  128. as a buffer or None if an error occurred and the MAGIC_ERROR flag
  129. is set. A call to errno() will return the numeric error code.
  130. """
  131. return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
  132. def error(self):
  133. """
  134. Returns a textual explanation of the last error or None
  135. if there was no error.
  136. """
  137. return Magic.__tostr(_error(self._magic_t))
  138. def setflags(self, flags):
  139. """
  140. Set flags on the magic object which determine how magic checking
  141. behaves; a bitwise OR of the flags described in libmagic(3), but
  142. without the MAGIC_ prefix.
  143. Returns -1 on systems that don't support utime(2) or utimes(2)
  144. when PRESERVE_ATIME is set.
  145. """
  146. return _setflags(self._magic_t, flags)
  147. def load(self, filename=None):
  148. """
  149. Must be called to load entries in the colon separated list of database
  150. files passed as argument or the default database file if no argument
  151. before any magic queries can be performed.
  152. Returns 0 on success and -1 on failure.
  153. """
  154. return _load(self._magic_t, Magic.__tobytes(filename))
  155. def compile(self, dbs):
  156. """
  157. Compile entries in the colon separated list of database files
  158. passed as argument or the default database file if no argument.
  159. The compiled files created are named from the basename(1) of each file
  160. argument with ".mgc" appended to it.
  161. Returns 0 on success and -1 on failure.
  162. """
  163. return _compile(self._magic_t, Magic.__tobytes(dbs))
  164. def check(self, dbs):
  165. """
  166. Check the validity of entries in the colon separated list of
  167. database files passed as argument or the default database file
  168. if no argument.
  169. Returns 0 on success and -1 on failure.
  170. """
  171. return _check(self._magic_t, Magic.__tobytes(dbs))
  172. def list(self, dbs):
  173. """
  174. Check the validity of entries in the colon separated list of
  175. database files passed as argument or the default database file
  176. if no argument.
  177. Returns 0 on success and -1 on failure.
  178. """
  179. return _list(self._magic_t, Magic.__tobytes(dbs))
  180. def errno(self):
  181. """
  182. Returns a numeric error code. If return value is 0, an internal
  183. magic error occurred. If return value is non-zero, the value is
  184. an OS error code. Use the errno module or os.strerror() can be used
  185. to provide detailed error information.
  186. """
  187. return _errno(self._magic_t)
  188. def open(flags):
  189. """
  190. Returns a magic object on success and None on failure.
  191. Flags argument as for setflags.
  192. """
  193. return Magic(_open(flags))
  194. # Objects used by `detect_from_` functions
  195. mime_magic = Magic(_open(MAGIC_MIME))
  196. mime_magic.load()
  197. none_magic = Magic(_open(MAGIC_NONE))
  198. none_magic.load()
  199. def _create_filemagic(mime_detected, type_detected):
  200. try:
  201. mime_type, mime_encoding = mime_detected.split('; ')
  202. except ValueError:
  203. raise ValueError(mime_detected)
  204. return FileMagic(name=type_detected, mime_type=mime_type,
  205. encoding=mime_encoding.replace('charset=', ''))
  206. def detect_from_filename(filename):
  207. '''Detect mime type, encoding and file type from a filename
  208. Returns a `FileMagic` namedtuple.
  209. '''
  210. return _create_filemagic(mime_magic.file(filename),
  211. none_magic.file(filename))
  212. def detect_from_fobj(fobj):
  213. '''Detect mime type, encoding and file type from file-like object
  214. Returns a `FileMagic` namedtuple.
  215. '''
  216. file_descriptor = fobj.fileno()
  217. return _create_filemagic(mime_magic.descriptor(file_descriptor),
  218. none_magic.descriptor(file_descriptor))
  219. def detect_from_content(byte_content):
  220. '''Detect mime type, encoding and file type from bytes
  221. Returns a `FileMagic` namedtuple.
  222. '''
  223. return _create_filemagic(mime_magic.buffer(byte_content),
  224. none_magic.buffer(byte_content))