magic.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. # coding: utf-8
  2. '''
  3. Python bindings for libmagic
  4. '''
  5. import ctypes
  6. import threading
  7. from collections import namedtuple
  8. from ctypes import *
  9. from ctypes.util import find_library
  10. def _init():
  11. """
  12. Loads the shared library through ctypes and returns a library
  13. L{ctypes.CDLL} instance
  14. """
  15. return ctypes.cdll.LoadLibrary(find_library('magic'))
  16. _libraries = {}
  17. _libraries['magic'] = _init()
  18. # Flag constants for open and setflags
  19. MAGIC_NONE = NONE = 0
  20. MAGIC_DEBUG = DEBUG = 1
  21. MAGIC_SYMLINK = SYMLINK = 2
  22. MAGIC_COMPRESS = COMPRESS = 4
  23. MAGIC_DEVICES = DEVICES = 8
  24. MAGIC_MIME_TYPE = MIME_TYPE = 16
  25. MAGIC_CONTINUE = CONTINUE = 32
  26. MAGIC_CHECK = CHECK = 64
  27. MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
  28. MAGIC_RAW = RAW = 256
  29. MAGIC_ERROR = ERROR = 512
  30. MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
  31. MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING
  32. MAGIC_APPLE = APPLE = 2048
  33. MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
  34. MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
  35. MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
  36. MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
  37. MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
  38. MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
  39. MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
  40. MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
  41. MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
  42. MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
  43. MAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0
  44. MAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1
  45. MAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2
  46. MAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3
  47. MAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4
  48. MAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5
  49. MAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6
  50. FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
  51. class magic_set(Structure):
  52. pass
  53. magic_set._fields_ = []
  54. magic_t = POINTER(magic_set)
  55. _open = _libraries['magic'].magic_open
  56. _open.restype = magic_t
  57. _open.argtypes = [c_int]
  58. _close = _libraries['magic'].magic_close
  59. _close.restype = None
  60. _close.argtypes = [magic_t]
  61. _file = _libraries['magic'].magic_file
  62. _file.restype = c_char_p
  63. _file.argtypes = [magic_t, c_char_p]
  64. _descriptor = _libraries['magic'].magic_descriptor
  65. _descriptor.restype = c_char_p
  66. _descriptor.argtypes = [magic_t, c_int]
  67. _buffer = _libraries['magic'].magic_buffer
  68. _buffer.restype = c_char_p
  69. _buffer.argtypes = [magic_t, c_void_p, c_size_t]
  70. _error = _libraries['magic'].magic_error
  71. _error.restype = c_char_p
  72. _error.argtypes = [magic_t]
  73. _setflags = _libraries['magic'].magic_setflags
  74. _setflags.restype = c_int
  75. _setflags.argtypes = [magic_t, c_int]
  76. _load = _libraries['magic'].magic_load
  77. _load.restype = c_int
  78. _load.argtypes = [magic_t, c_char_p]
  79. _compile = _libraries['magic'].magic_compile
  80. _compile.restype = c_int
  81. _compile.argtypes = [magic_t, c_char_p]
  82. _check = _libraries['magic'].magic_check
  83. _check.restype = c_int
  84. _check.argtypes = [magic_t, c_char_p]
  85. _list = _libraries['magic'].magic_list
  86. _list.restype = c_int
  87. _list.argtypes = [magic_t, c_char_p]
  88. _errno = _libraries['magic'].magic_errno
  89. _errno.restype = c_int
  90. _errno.argtypes = [magic_t]
  91. _getparam = _libraries['magic'].magic_getparam
  92. _getparam.restype = c_int
  93. _getparam.argtypes = [magic_t, c_int, c_void_p]
  94. _setparam = _libraries['magic'].magic_setparam
  95. _setparam.restype = c_int
  96. _setparam.argtypes = [magic_t, c_int, c_void_p]
  97. class Magic(object):
  98. def __init__(self, ms):
  99. self._magic_t = ms
  100. def close(self):
  101. """
  102. Closes the magic database and deallocates any resources used.
  103. """
  104. _close(self._magic_t)
  105. @staticmethod
  106. def __tostr(s):
  107. if s is None:
  108. return None
  109. if isinstance(s, str):
  110. return s
  111. try: # keep Python 2 compatibility
  112. return str(s, 'utf-8')
  113. except TypeError:
  114. return str(s)
  115. @staticmethod
  116. def __tobytes(b):
  117. if b is None:
  118. return None
  119. if isinstance(b, bytes):
  120. return b
  121. try: # keep Python 2 compatibility
  122. return bytes(b, 'utf-8')
  123. except TypeError:
  124. return bytes(b)
  125. def file(self, filename):
  126. """
  127. Returns a textual description of the contents of the argument passed
  128. as a filename or None if an error occurred and the MAGIC_ERROR flag
  129. is set. A call to errno() will return the numeric error code.
  130. """
  131. return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
  132. def descriptor(self, fd):
  133. """
  134. Returns a textual description of the contents of the argument passed
  135. as a file descriptor or None if an error occurred and the MAGIC_ERROR
  136. flag is set. A call to errno() will return the numeric error code.
  137. """
  138. return Magic.__tostr(_descriptor(self._magic_t, fd))
  139. def buffer(self, buf):
  140. """
  141. Returns a textual description of the contents of the argument passed
  142. as a buffer or None if an error occurred and the MAGIC_ERROR flag
  143. is set. A call to errno() will return the numeric error code.
  144. """
  145. return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
  146. def error(self):
  147. """
  148. Returns a textual explanation of the last error or None
  149. if there was no error.
  150. """
  151. return Magic.__tostr(_error(self._magic_t))
  152. def setflags(self, flags):
  153. """
  154. Set flags on the magic object which determine how magic checking
  155. behaves; a bitwise OR of the flags described in libmagic(3), but
  156. without the MAGIC_ prefix.
  157. Returns -1 on systems that don't support utime(2) or utimes(2)
  158. when PRESERVE_ATIME is set.
  159. """
  160. return _setflags(self._magic_t, flags)
  161. def load(self, filename=None):
  162. """
  163. Must be called to load entries in the colon separated list of database
  164. files passed as argument or the default database file if no argument
  165. before any magic queries can be performed.
  166. Returns 0 on success and -1 on failure.
  167. """
  168. return _load(self._magic_t, Magic.__tobytes(filename))
  169. def compile(self, dbs):
  170. """
  171. Compile entries in the colon separated list of database files
  172. passed as argument or the default database file if no argument.
  173. The compiled files created are named from the basename(1) of each file
  174. argument with ".mgc" appended to it.
  175. Returns 0 on success and -1 on failure.
  176. """
  177. return _compile(self._magic_t, Magic.__tobytes(dbs))
  178. def check(self, dbs):
  179. """
  180. Check the validity of entries in the colon separated list of
  181. database files passed as argument or the default database file
  182. if no argument.
  183. Returns 0 on success and -1 on failure.
  184. """
  185. return _check(self._magic_t, Magic.__tobytes(dbs))
  186. def list(self, dbs):
  187. """
  188. Check the validity of entries in the colon separated list of
  189. database files passed as argument or the default database file
  190. if no argument.
  191. Returns 0 on success and -1 on failure.
  192. """
  193. return _list(self._magic_t, Magic.__tobytes(dbs))
  194. def errno(self):
  195. """
  196. Returns a numeric error code. If return value is 0, an internal
  197. magic error occurred. If return value is non-zero, the value is
  198. an OS error code. Use the errno module or os.strerror() can be used
  199. to provide detailed error information.
  200. """
  201. return _errno(self._magic_t)
  202. def getparam(self, param):
  203. """
  204. Returns the param value if successful and -1 if the parameter
  205. was unknown.
  206. """
  207. v = c_int()
  208. i = _getparam(self._magic_t, param, byref(v))
  209. if i == -1:
  210. return -1
  211. return v.value
  212. def setparam(self, param, value):
  213. """
  214. Returns 0 if successful and -1 if the parameter was unknown.
  215. """
  216. v = c_int(value)
  217. return _setparam(self._magic_t, param, byref(v))
  218. def open(flags):
  219. """
  220. Returns a magic object on success and None on failure.
  221. Flags argument as for setflags.
  222. """
  223. magic_t = _open(flags)
  224. if magic_t is None:
  225. return None
  226. return Magic(magic_t)
  227. # Objects used by `detect_from_` functions
  228. class error(Exception):
  229. pass
  230. class MagicDetect(object):
  231. def __init__(self):
  232. self.mime_magic = open(MAGIC_MIME)
  233. if self.mime_magic is None:
  234. raise error
  235. if self.mime_magic.load() == -1:
  236. self.mime_magic.close()
  237. self.mime_magic = None
  238. raise error
  239. self.none_magic = open(MAGIC_NONE)
  240. if self.none_magic is None:
  241. self.mime_magic.close()
  242. self.mime_magic = None
  243. raise error
  244. if self.none_magic.load() == -1:
  245. self.none_magic.close()
  246. self.none_magic = None
  247. self.mime_magic.close()
  248. self.mime_magic = None
  249. raise error
  250. def __del__(self):
  251. if self.mime_magic is not None:
  252. self.mime_magic.close()
  253. if self.none_magic is not None:
  254. self.none_magic.close()
  255. threadlocal = threading.local()
  256. def _detect_make():
  257. v = getattr(threadlocal, "magic_instance", None)
  258. if v is None:
  259. v = MagicDetect()
  260. setattr(threadlocal, "magic_instance", v)
  261. return v
  262. def _create_filemagic(mime_detected, type_detected):
  263. try:
  264. mime_type, mime_encoding = mime_detected.split('; ')
  265. except ValueError:
  266. raise ValueError(mime_detected)
  267. return FileMagic(name=type_detected, mime_type=mime_type,
  268. encoding=mime_encoding.replace('charset=', ''))
  269. def detect_from_filename(filename):
  270. '''Detect mime type, encoding and file type from a filename
  271. Returns a `FileMagic` namedtuple.
  272. '''
  273. x = _detect_make()
  274. return _create_filemagic(x.mime_magic.file(filename),
  275. x.none_magic.file(filename))
  276. def detect_from_fobj(fobj):
  277. '''Detect mime type, encoding and file type from file-like object
  278. Returns a `FileMagic` namedtuple.
  279. '''
  280. file_descriptor = fobj.fileno()
  281. x = _detect_make()
  282. return _create_filemagic(x.mime_magic.descriptor(file_descriptor),
  283. x.none_magic.descriptor(file_descriptor))
  284. def detect_from_content(byte_content):
  285. '''Detect mime type, encoding and file type from bytes
  286. Returns a `FileMagic` namedtuple.
  287. '''
  288. x = _detect_make()
  289. return _create_filemagic(x.mime_magic.buffer(byte_content),
  290. x.none_magic.buffer(byte_content))