magic.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. # coding: utf-8
  2. '''
  3. Python bindings for libmagic
  4. '''
  5. import ctypes
  6. import threading
  7. from collections import namedtuple
  8. from ctypes import *
  9. from ctypes.util import find_library
  10. def _init():
  11. """
  12. Loads the shared library through ctypes and returns a library
  13. L{ctypes.CDLL} instance
  14. """
  15. return ctypes.cdll.LoadLibrary(find_library('magic'))
  16. _libraries = {}
  17. _libraries['magic'] = _init()
  18. # Flag constants for open and setflags
  19. MAGIC_NONE = NONE = 0
  20. MAGIC_DEBUG = DEBUG = 1
  21. MAGIC_SYMLINK = SYMLINK = 2
  22. MAGIC_COMPRESS = COMPRESS = 4
  23. MAGIC_DEVICES = DEVICES = 8
  24. MAGIC_MIME_TYPE = MIME_TYPE = 16
  25. MAGIC_CONTINUE = CONTINUE = 32
  26. MAGIC_CHECK = CHECK = 64
  27. MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
  28. MAGIC_RAW = RAW = 256
  29. MAGIC_ERROR = ERROR = 512
  30. MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
  31. MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING
  32. MAGIC_APPLE = APPLE = 2048
  33. MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
  34. MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
  35. MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
  36. MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
  37. MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
  38. MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
  39. MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
  40. MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
  41. MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
  42. MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
  43. MAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0
  44. MAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1
  45. MAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2
  46. MAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3
  47. MAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4
  48. MAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5
  49. MAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6
  50. FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
  51. class magic_set(Structure):
  52. pass
  53. magic_set._fields_ = []
  54. magic_t = POINTER(magic_set)
  55. _open = _libraries['magic'].magic_open
  56. _open.restype = magic_t
  57. _open.argtypes = [c_int]
  58. _close = _libraries['magic'].magic_close
  59. _close.restype = None
  60. _close.argtypes = [magic_t]
  61. _file = _libraries['magic'].magic_file
  62. _file.restype = c_char_p
  63. _file.argtypes = [magic_t, c_char_p]
  64. _descriptor = _libraries['magic'].magic_descriptor
  65. _descriptor.restype = c_char_p
  66. _descriptor.argtypes = [magic_t, c_int]
  67. _buffer = _libraries['magic'].magic_buffer
  68. _buffer.restype = c_char_p
  69. _buffer.argtypes = [magic_t, c_void_p, c_size_t]
  70. _error = _libraries['magic'].magic_error
  71. _error.restype = c_char_p
  72. _error.argtypes = [magic_t]
  73. _setflags = _libraries['magic'].magic_setflags
  74. _setflags.restype = c_int
  75. _setflags.argtypes = [magic_t, c_int]
  76. _load = _libraries['magic'].magic_load
  77. _load.restype = c_int
  78. _load.argtypes = [magic_t, c_char_p]
  79. _compile = _libraries['magic'].magic_compile
  80. _compile.restype = c_int
  81. _compile.argtypes = [magic_t, c_char_p]
  82. _check = _libraries['magic'].magic_check
  83. _check.restype = c_int
  84. _check.argtypes = [magic_t, c_char_p]
  85. _list = _libraries['magic'].magic_list
  86. _list.restype = c_int
  87. _list.argtypes = [magic_t, c_char_p]
  88. _errno = _libraries['magic'].magic_errno
  89. _errno.restype = c_int
  90. _errno.argtypes = [magic_t]
  91. _getparam = _libraries['magic'].magic_getparam
  92. _getparam.restype = c_int
  93. _getparam.argtypes = [magic_t, c_int, c_void_p]
  94. _setparam = _libraries['magic'].magic_setparam
  95. _setparam.restype = c_int
  96. _setparam.argtypes = [magic_t, c_int, c_void_p]
  97. class Magic(object):
  98. def __init__(self, ms):
  99. self._magic_t = ms
  100. self._close = _close
  101. def close(self):
  102. """
  103. Closes the magic database and deallocates any resources used.
  104. """
  105. if _close:
  106. _close(self._magic_t)
  107. @staticmethod
  108. def __tostr(s):
  109. if s is None:
  110. return None
  111. if isinstance(s, str):
  112. return s
  113. try: # keep Python 2 compatibility
  114. return str(s, 'utf-8')
  115. except TypeError:
  116. return str(s)
  117. @staticmethod
  118. def __tobytes(b):
  119. if b is None:
  120. return None
  121. if isinstance(b, bytes):
  122. return b
  123. try: # keep Python 2 compatibility
  124. return bytes(b, 'utf-8')
  125. except TypeError:
  126. return bytes(b)
  127. def file(self, filename):
  128. """
  129. Returns a textual description of the contents of the argument passed
  130. as a filename or None if an error occurred and the MAGIC_ERROR flag
  131. is set. A call to errno() will return the numeric error code.
  132. """
  133. return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
  134. def descriptor(self, fd):
  135. """
  136. Returns a textual description of the contents of the argument passed
  137. as a file descriptor or None if an error occurred and the MAGIC_ERROR
  138. flag is set. A call to errno() will return the numeric error code.
  139. """
  140. return Magic.__tostr(_descriptor(self._magic_t, fd))
  141. def buffer(self, buf):
  142. """
  143. Returns a textual description of the contents of the argument passed
  144. as a buffer or None if an error occurred and the MAGIC_ERROR flag
  145. is set. A call to errno() will return the numeric error code.
  146. """
  147. return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
  148. def error(self):
  149. """
  150. Returns a textual explanation of the last error or None
  151. if there was no error.
  152. """
  153. return Magic.__tostr(_error(self._magic_t))
  154. def setflags(self, flags):
  155. """
  156. Set flags on the magic object which determine how magic checking
  157. behaves; a bitwise OR of the flags described in libmagic(3), but
  158. without the MAGIC_ prefix.
  159. Returns -1 on systems that don't support utime(2) or utimes(2)
  160. when PRESERVE_ATIME is set.
  161. """
  162. return _setflags(self._magic_t, flags)
  163. def load(self, filename=None):
  164. """
  165. Must be called to load entries in the colon separated list of database
  166. files passed as argument or the default database file if no argument
  167. before any magic queries can be performed.
  168. Returns 0 on success and -1 on failure.
  169. """
  170. return _load(self._magic_t, Magic.__tobytes(filename))
  171. def compile(self, dbs):
  172. """
  173. Compile entries in the colon separated list of database files
  174. passed as argument or the default database file if no argument.
  175. The compiled files created are named from the basename(1) of each file
  176. argument with ".mgc" appended to it.
  177. Returns 0 on success and -1 on failure.
  178. """
  179. return _compile(self._magic_t, Magic.__tobytes(dbs))
  180. def check(self, dbs):
  181. """
  182. Check the validity of entries in the colon separated list of
  183. database files passed as argument or the default database file
  184. if no argument.
  185. Returns 0 on success and -1 on failure.
  186. """
  187. return _check(self._magic_t, Magic.__tobytes(dbs))
  188. def list(self, dbs):
  189. """
  190. Check the validity of entries in the colon separated list of
  191. database files passed as argument or the default database file
  192. if no argument.
  193. Returns 0 on success and -1 on failure.
  194. """
  195. return _list(self._magic_t, Magic.__tobytes(dbs))
  196. def errno(self):
  197. """
  198. Returns a numeric error code. If return value is 0, an internal
  199. magic error occurred. If return value is non-zero, the value is
  200. an OS error code. Use the errno module or os.strerror() can be used
  201. to provide detailed error information.
  202. """
  203. return _errno(self._magic_t)
  204. def getparam(self, param):
  205. """
  206. Returns the param value if successful and -1 if the parameter
  207. was unknown.
  208. """
  209. v = c_int()
  210. i = _getparam(self._magic_t, param, byref(v))
  211. if i == -1:
  212. return -1
  213. return v.value
  214. def setparam(self, param, value):
  215. """
  216. Returns 0 if successful and -1 if the parameter was unknown.
  217. """
  218. v = c_int(value)
  219. return _setparam(self._magic_t, param, byref(v))
  220. def open(flags):
  221. """
  222. Returns a magic object on success and None on failure.
  223. Flags argument as for setflags.
  224. """
  225. magic_t = _open(flags)
  226. if magic_t is None:
  227. return None
  228. return Magic(magic_t)
  229. # Objects used by `detect_from_` functions
  230. class error(Exception):
  231. pass
  232. class MagicDetect(object):
  233. def __init__(self):
  234. self.mime_magic = open(MAGIC_MIME)
  235. if self.mime_magic is None:
  236. raise error
  237. if self.mime_magic.load() == -1:
  238. self.mime_magic.close()
  239. self.mime_magic = None
  240. raise error
  241. self.none_magic = open(MAGIC_NONE)
  242. if self.none_magic is None:
  243. self.mime_magic.close()
  244. self.mime_magic = None
  245. raise error
  246. if self.none_magic.load() == -1:
  247. self.none_magic.close()
  248. self.none_magic = None
  249. self.mime_magic.close()
  250. self.mime_magic = None
  251. raise error
  252. def __del__(self):
  253. if self.mime_magic is not None:
  254. self.mime_magic.close()
  255. if self.none_magic is not None:
  256. self.none_magic.close()
  257. threadlocal = threading.local()
  258. def _detect_make():
  259. v = getattr(threadlocal, "magic_instance", None)
  260. if v is None:
  261. v = MagicDetect()
  262. setattr(threadlocal, "magic_instance", v)
  263. return v
  264. def _create_filemagic(mime_detected, type_detected):
  265. try:
  266. mime_type, mime_encoding = mime_detected.split('; ')
  267. except ValueError:
  268. raise ValueError(mime_detected)
  269. return FileMagic(name=type_detected, mime_type=mime_type,
  270. encoding=mime_encoding.replace('charset=', ''))
  271. def detect_from_filename(filename):
  272. '''Detect mime type, encoding and file type from a filename
  273. Returns a `FileMagic` namedtuple.
  274. '''
  275. x = _detect_make()
  276. return _create_filemagic(x.mime_magic.file(filename),
  277. x.none_magic.file(filename))
  278. def detect_from_fobj(fobj):
  279. '''Detect mime type, encoding and file type from file-like object
  280. Returns a `FileMagic` namedtuple.
  281. '''
  282. file_descriptor = fobj.fileno()
  283. x = _detect_make()
  284. return _create_filemagic(x.mime_magic.descriptor(file_descriptor),
  285. x.none_magic.descriptor(file_descriptor))
  286. def detect_from_content(byte_content):
  287. '''Detect mime type, encoding and file type from bytes
  288. Returns a `FileMagic` namedtuple.
  289. '''
  290. x = _detect_make()
  291. return _create_filemagic(x.mime_magic.buffer(byte_content),
  292. x.none_magic.buffer(byte_content))