msooxml 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. #------------------------------------------------------------------------------
  2. # $File: msooxml,v 1.24 2025/05/29 15:03:04 christos Exp $
  3. # msooxml: file(1) magic for Microsoft Office XML
  4. # From: Ralf Brown <ralf.brown@gmail.com>
  5. # .docx, .pptx, and .xlsx are XML plus other files inside a ZIP
  6. # archive. The first member file is normally "[Content_Types].xml".
  7. # but some libreoffice generated files put this later. Perhaps skip
  8. # the "[Content_Types].xml" test?
  9. # Since MSOOXML doesn't have anything like the uncompressed "mimetype"
  10. # file of ePub or OpenDocument, we'll have to scan for a filename
  11. # which can distinguish between the three types
  12. 0 name msooxml
  13. >0 string word/ Microsoft Word 2007+
  14. !:mime application/vnd.openxmlformats-officedocument.wordprocessingml.document
  15. !:ext docx
  16. >0 string ppt/ Microsoft PowerPoint 2007+
  17. !:mime application/vnd.openxmlformats-officedocument.presentationml.presentation
  18. !:ext pptx
  19. >0 string xl/ Microsoft Excel 2007+
  20. !:mime application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
  21. !:ext xlsx
  22. >0 string visio/ Microsoft Visio 2013+
  23. !:mime application/vnd.ms-visio.drawing.main+xml
  24. >0 string AppManifest.xaml Microsoft Silverlight Application
  25. !:mime application/x-silverlight-app
  26. >0 search/100 .nuspec NuGet package
  27. !:mime application/vnd.nuget.package
  28. # start by checking for ZIP local file header signature
  29. 0 string PK\003\004
  30. !:strength +10
  31. # make sure the first file is correct
  32. >0x1E use msooxml
  33. >0x1E default x
  34. >>0x1E regex \\[Content_Types\\]\\.xml|_rels/\\.rels|docProps|customXml|.*\\.md|.*\\.png
  35. # skip to the second local file header
  36. # since some documents include a 520-byte extra field following the file
  37. # header, we need to scan for the next header
  38. >>>&26 search/10000 PK\003\004
  39. >>>>&26 use msooxml
  40. >>>>&26 default x
  41. # now skip to the *third* local file header; again, we need to scan due to a
  42. # 520-byte extra field following the file header
  43. >>>>>&26 search/10000 PK\003\004
  44. # and check the subdirectory name to determine which type of OOXML
  45. # file we have. Correct the mimetype with the registered ones:
  46. # https://technet.microsoft.com/en-us/library/cc179224.aspx
  47. >>>>>>&26 use msooxml
  48. >>>>>>&26 default x
  49. # OpenOffice/Libreoffice orders ZIP entry differently, so check the 4th file
  50. >>>>>>>&26 search/10000 PK\003\004
  51. >>>>>>>>&26 use msooxml
  52. # Some OOXML generators add an extra customXml directory. Check another file.
  53. >>>>>>>>&26 default x
  54. >>>>>>>>>&26 search/10000 PK\003\004
  55. >>>>>>>>>>&26 use msooxml
  56. >>>>>>>>>>&26 default x
  57. >>>>>>>>>>>&26 search/10000 PK\003\004
  58. >>>>>>>>>>>>&26 use msooxml
  59. >>>>>>>>>>>>&26 default x Microsoft OOXML
  60. >>>>>>>>>>>&26 default x Microsoft OOXML
  61. >>>>>>>>>>&26 default x Microsoft OOXML
  62. >>>>>>>>>&26 default x Microsoft OOXML
  63. >>>>>>>>&26 default x Microsoft OOXML
  64. >>>>>>>&26 default x Microsoft OOXML
  65. >>>>>>&26 default x Microsoft OOXML
  66. >>0x1E regex \\[trash\\]
  67. >>>&26 search/10000 PK\003\004
  68. >>>>&26 search/10000 PK\003\004
  69. >>>>>&26 use msooxml
  70. >>>>>&26 default x
  71. >>>>>>&26 search/10000 PK\003\004
  72. >>>>>>>&26 use msooxml
  73. >>>>>>>&26 default x Microsoft OOXML
  74. >>>>>>&26 default x Microsoft OOXML
  75. >>>>>&26 default x Microsoft OOXML