Example #1
0
 /**
  * @param array $params
  * @param Config $mainConfig
  * @return array
  */
 public static function applyDefaultParameters(array $params, Config $mainConfig)
 {
     $logger = LoggerFactory::getInstance('Mime');
     $params += ['typeFile' => $mainConfig->get('MimeTypeFile'), 'infoFile' => $mainConfig->get('MimeInfoFile'), 'xmlTypes' => $mainConfig->get('XMLMimeTypes'), 'guessCallback' => function ($mimeAnalyzer, &$head, &$tail, $file, &$mime) use($logger) {
         // Also test DjVu
         $deja = new DjVuImage($file);
         if ($deja->isValid()) {
             $logger->info(__METHOD__ . ": detected {$file} as image/vnd.djvu\n");
             $mime = 'image/vnd.djvu';
             return;
         }
         // Some strings by reference for performance - assuming well-behaved hooks
         Hooks::run('MimeMagicGuessFromContent', [$mimeAnalyzer, &$head, &$tail, $file, &$mime]);
     }, 'extCallback' => function ($mimeAnalyzer, $ext, &$mime) {
         // Media handling extensions can improve the MIME detected
         Hooks::run('MimeMagicImproveFromExtension', [$mimeAnalyzer, $ext, &$mime]);
     }, 'initCallback' => function ($mimeAnalyzer) {
         // Allow media handling extensions adding MIME-types and MIME-info
         Hooks::run('MimeMagicInit', [$mimeAnalyzer]);
     }, 'logger' => $logger];
     if ($params['infoFile'] === 'includes/mime.info') {
         $params['infoFile'] = __DIR__ . "/libs/mime/mime.info";
     }
     if ($params['typeFile'] === 'includes/mime.types') {
         $params['typeFile'] = __DIR__ . "/libs/mime/mime.types";
     }
     $detectorCmd = $mainConfig->get('MimeDetectorCommand');
     if ($detectorCmd) {
         $params['detectCallback'] = function ($file) use($detectorCmd) {
             return wfShellExec("{$detectorCmd} " . wfEscapeShellArg($file));
         };
     }
     return $params;
 }
Example #2
0
 /**
  * Guess the mime type from the file contents.
  *
  * @param string $file
  * @param mixed $ext
  * @return bool|string
  */
 private function doGuessMimeType($file, $ext)
 {
     // TODO: remove $ext param
     // Read a chunk of the file
     wfSuppressWarnings();
     // @todo FIXME: Shouldn't this be rb?
     $f = fopen($file, 'rt');
     wfRestoreWarnings();
     if (!$f) {
         return 'unknown/unknown';
     }
     $head = fread($f, 1024);
     fseek($f, -65558, SEEK_END);
     $tail = fread($f, 65558);
     // 65558 = maximum size of a zip EOCDR
     fclose($f);
     wfDebug(__METHOD__ . ": analyzing head and tail of {$file} for magic numbers.\n");
     // Hardcode a few magic number checks...
     $headers = array('MThd' => 'audio/midi', 'OggS' => 'application/ogg', "\t" => 'application/x-msmetafile', "×ÍÆš" => 'application/x-msmetafile', '%PDF' => 'application/pdf', 'gimp xcf' => 'image/x-xcf', 'MZ' => 'application/octet-stream', "Êþº¾" => 'application/octet-stream', "ELF" => 'application/octet-stream');
     foreach ($headers as $magic => $candidate) {
         if (strncmp($head, $magic, strlen($magic)) == 0) {
             wfDebug(__METHOD__ . ": magic header in {$file} recognized as {$candidate}\n");
             return $candidate;
         }
     }
     /* Look for WebM and Matroska files */
     if (strncmp($head, pack("C4", 0x1a, 0x45, 0xdf, 0xa3), 4) == 0) {
         $doctype = strpos($head, "B‚");
         if ($doctype) {
             // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers)
             $data = substr($head, $doctype + 3, 8);
             if (strncmp($data, "matroska", 8) == 0) {
                 wfDebug(__METHOD__ . ": recognized file as video/x-matroska\n");
                 return "video/x-matroska";
             } elseif (strncmp($data, "webm", 4) == 0) {
                 wfDebug(__METHOD__ . ": recognized file as video/webm\n");
                 return "video/webm";
             }
         }
         wfDebug(__METHOD__ . ": unknown EBML file\n");
         return "unknown/unknown";
     }
     /* Look for WebP */
     if (strncmp($head, "RIFF", 4) == 0 && strncmp(substr($head, 8, 8), "WEBPVP8 ", 8) == 0) {
         wfDebug(__METHOD__ . ": recognized file as image/webp\n");
         return "image/webp";
     }
     /**
      * Look for PHP.  Check for this before HTML/XML...  Warning: this is a
      * heuristic, and won't match a file with a lot of non-PHP before.  It
      * will also match text files which could be PHP. :)
      *
      * @todo FIXME: For this reason, the check is probably useless -- an attacker
      * could almost certainly just pad the file with a lot of nonsense to
      * circumvent the check in any case where it would be a security
      * problem.  On the other hand, it causes harmful false positives (bug
      * 16583).  The heuristic has been cut down to exclude three-character
      * strings like "<? ", but should it be axed completely?
      */
     if (strpos($head, '<?php') !== false || strpos($head, "<?php") !== false || strpos($head, "<? ") !== false || strpos($head, "<?\n") !== false || strpos($head, "<?\t") !== false || strpos($head, "<?=") !== false) {
         wfDebug(__METHOD__ . ": recognized {$file} as application/x-php\n");
         return 'application/x-php';
     }
     /**
      * look for XML formats (XHTML and SVG)
      */
     $xml = new XmlTypeCheck($file);
     if ($xml->wellFormed) {
         global $wgXMLMimeTypes;
         if (isset($wgXMLMimeTypes[$xml->getRootElement()])) {
             return $wgXMLMimeTypes[$xml->getRootElement()];
         } else {
             return 'application/xml';
         }
     }
     /**
      * look for shell scripts
      */
     $script_type = null;
     # detect by shebang
     if (substr($head, 0, 2) == "#!") {
         $script_type = "ASCII";
     } elseif (substr($head, 0, 5) == "#!") {
         $script_type = "UTF-8";
     } elseif (substr($head, 0, 7) == "þÿ#!") {
         $script_type = "UTF-16BE";
     } elseif (substr($head, 0, 7) == "ÿþ#!") {
         $script_type = "UTF-16LE";
     }
     if ($script_type) {
         if ($script_type !== "UTF-8" && $script_type !== "ASCII") {
             // Quick and dirty fold down to ASCII!
             $pack = array('UTF-16BE' => 'n*', 'UTF-16LE' => 'v*');
             $chars = unpack($pack[$script_type], substr($head, 2));
             $head = '';
             foreach ($chars as $codepoint) {
                 if ($codepoint < 128) {
                     $head .= chr($codepoint);
                 } else {
                     $head .= '?';
                 }
             }
         }
         $match = array();
         if (preg_match('%/?([^\\s]+/)(\\w+)%', $head, $match)) {
             $mime = "application/x-{$match[2]}";
             wfDebug(__METHOD__ . ": shell script recognized as {$mime}\n");
             return $mime;
         }
     }
     // Check for ZIP variants (before getimagesize)
     if (strpos($tail, "PK") !== false) {
         wfDebug(__METHOD__ . ": ZIP header present in {$file}\n");
         return $this->detectZipType($head, $tail, $ext);
     }
     wfSuppressWarnings();
     $gis = getimagesize($file);
     wfRestoreWarnings();
     if ($gis && isset($gis['mime'])) {
         $mime = $gis['mime'];
         wfDebug(__METHOD__ . ": getimagesize detected {$file} as {$mime}\n");
         return $mime;
     }
     // Also test DjVu
     $deja = new DjVuImage($file);
     if ($deja->isValid()) {
         wfDebug(__METHOD__ . ": detected {$file} as image/vnd.djvu\n");
         return 'image/vnd.djvu';
     }
     return false;
 }
Example #3
0
 /**
  * Load metadata from the file itself
  */
 function loadFromFile()
 {
     global $wgUseSharedUploads, $wgSharedUploadDirectory, $wgContLang, $wgShowEXIF;
     wfProfileIn(__METHOD__);
     $this->imagePath = $this->getFullPath();
     $this->fileExists = file_exists($this->imagePath);
     $this->fromSharedDirectory = false;
     $gis = array();
     if (!$this->fileExists) {
         wfDebug(__METHOD__ . ': ' . $this->imagePath . " not found locally!\n");
     }
     # If the file is not found, and a shared upload directory is used, look for it there.
     if (!$this->fileExists && $wgUseSharedUploads && $wgSharedUploadDirectory) {
         # In case we're on a wgCapitalLinks=false wiki, we
         # capitalize the first letter of the filename before
         # looking it up in the shared repository.
         $sharedImage = Image::newFromName($wgContLang->ucfirst($this->name));
         $this->fileExists = $sharedImage && file_exists($sharedImage->getFullPath(true));
         if ($this->fileExists) {
             $this->name = $sharedImage->name;
             $this->imagePath = $this->getFullPath(true);
             $this->fromSharedDirectory = true;
         }
     }
     if ($this->fileExists) {
         $magic =& wfGetMimeMagic();
         $this->mime = $magic->guessMimeType($this->imagePath, true);
         $this->type = $magic->getMediaType($this->imagePath, $this->mime);
         # Get size in bytes
         $this->size = filesize($this->imagePath);
         $magic =& wfGetMimeMagic();
         # Height and width
         wfSuppressWarnings();
         if ($this->mime == 'image/svg') {
             $gis = wfGetSVGsize($this->imagePath);
         } elseif ($this->mime == 'image/vnd.djvu') {
             $deja = new DjVuImage($this->imagePath);
             $gis = $deja->getImageSize();
         } elseif (!$magic->isPHPImageType($this->mime)) {
             # Don't try to get the width and height of sound and video files, that's bad for performance
             $gis = false;
         } else {
             $gis = getimagesize($this->imagePath);
         }
         wfRestoreWarnings();
         wfDebug(__METHOD__ . ': ' . $this->imagePath . " loaded, " . $this->size . " bytes, " . $this->mime . ".\n");
     } else {
         $this->mime = NULL;
         $this->type = MEDIATYPE_UNKNOWN;
         wfDebug(__METHOD__ . ': ' . $this->imagePath . " NOT FOUND!\n");
     }
     if ($gis) {
         $this->width = $gis[0];
         $this->height = $gis[1];
     } else {
         $this->width = 0;
         $this->height = 0;
     }
     #NOTE: $gis[2] contains a code for the image type. This is no longer used.
     #NOTE: we have to set this flag early to avoid load() to be called
     # be some of the functions below. This may lead to recursion or other bad things!
     # as ther's only one thread of execution, this should be safe anyway.
     $this->dataLoaded = true;
     $this->metadata = serialize($this->retrieveExifData($this->imagePath));
     if (isset($gis['bits'])) {
         $this->bits = $gis['bits'];
     } else {
         $this->bits = 0;
     }
     wfProfileOut(__METHOD__);
 }
Example #4
0
 function doGuessMimeType($file, $ext = true)
 {
     // Read a chunk of the file
     wfSuppressWarnings();
     $f = fopen($file, "rt");
     wfRestoreWarnings();
     if (!$f) {
         return "unknown/unknown";
     }
     $head = fread($f, 1024);
     fseek($f, -65558, SEEK_END);
     $tail = fread($f, 65558);
     // 65558 = maximum size of a zip EOCDR
     fclose($f);
     // Hardcode a few magic number checks...
     $headers = array('MThd' => 'audio/midi', 'OggS' => 'application/ogg', "\t" => 'application/x-msmetafile', "×ÍÆš" => 'application/x-msmetafile', '%PDF' => 'application/pdf', 'gimp xcf' => 'image/x-xcf', 'MZ' => 'application/octet-stream', "Êþº¾" => 'application/octet-stream', "ELF" => 'application/octet-stream');
     foreach ($headers as $magic => $candidate) {
         if (strncmp($head, $magic, strlen($magic)) == 0) {
             wfDebug(__METHOD__ . ": magic header in {$file} recognized as {$candidate}\n");
             return $candidate;
         }
     }
     /*
      * look for PHP
      * Check for this before HTML/XML...
      * Warning: this is a heuristic, and won't match a file with a lot of non-PHP before.
      * It will also match text files which could be PHP. :)
      */
     if (strpos($head, '<?php') !== false || strpos($head, '<? ') !== false || strpos($head, "<?\n") !== false || strpos($head, "<?\t") !== false || strpos($head, "<?=") !== false || strpos($head, "<?php") !== false || strpos($head, "<? ") !== false || strpos($head, "<?\n") !== false || strpos($head, "<?\t") !== false || strpos($head, "<?=") !== false) {
         wfDebug(__METHOD__ . ": recognized {$file} as application/x-php\n");
         return "application/x-php";
     }
     /*
      * look for XML formats (XHTML and SVG)
      */
     $xml = new XmlTypeCheck($file);
     if ($xml->wellFormed) {
         global $wgXMLMimeTypes;
         if (isset($wgXMLMimeTypes[$xml->getRootElement()])) {
             return $wgXMLMimeTypes[$xml->getRootElement()];
         } else {
             return 'application/xml';
         }
     }
     /*
      * look for shell scripts
      */
     $script_type = NULL;
     # detect by shebang
     if (substr($head, 0, 2) == "#!") {
         $script_type = "ASCII";
     } elseif (substr($head, 0, 5) == "#!") {
         $script_type = "UTF-8";
     } elseif (substr($head, 0, 7) == "þÿ#!") {
         $script_type = "UTF-16BE";
     } elseif (substr($head, 0, 7) == "ÿþ#!") {
         $script_type = "UTF-16LE";
     }
     if ($script_type) {
         if ($script_type !== "UTF-8" && $script_type !== "ASCII") {
             // Quick and dirty fold down to ASCII!
             $pack = array('UTF-16BE' => 'n*', 'UTF-16LE' => 'v*');
             $chars = unpack($pack[$script_type], substr($head, 2));
             $head = '';
             foreach ($chars as $codepoint) {
                 if ($codepoint < 128) {
                     $head .= chr($codepoint);
                 } else {
                     $head .= '?';
                 }
             }
         }
         $match = array();
         if (preg_match('%/?([^\\s]+/)(\\w+)%', $head, $match)) {
             $mime = "application/x-{$match[2]}";
             wfDebug(__METHOD__ . ": shell script recognized as {$mime}\n");
             return $mime;
         }
     }
     // Check for ZIP (before getimagesize)
     if (strpos($tail, "PK") !== false) {
         wfDebug(__METHOD__ . ": ZIP header present at end of {$file}\n");
         return $this->detectZipType($head);
     }
     wfSuppressWarnings();
     $gis = getimagesize($file);
     wfRestoreWarnings();
     if ($gis && isset($gis['mime'])) {
         $mime = $gis['mime'];
         wfDebug(__METHOD__ . ": getimagesize detected {$file} as {$mime}\n");
         return $mime;
     }
     // Also test DjVu
     $deja = new DjVuImage($file);
     if ($deja->isValid()) {
         wfDebug(__METHOD__ . ": detected {$file} as image/vnd.djvu\n");
         return 'image/vnd.djvu';
     }
     return false;
 }
Example #5
0
 function initializeMultiPageXML()
 {
     #
     # Check for files uploaded prior to DJVU support activation
     # They have a '0' in their metadata field.
     #
     if ($this->metadata == '0' || $this->metadata == '') {
         $deja = new DjVuImage($this->imagePath);
         $this->metadata = $deja->retrieveMetaData();
         $this->purgeMetadataCache();
         # Update metadata in the database
         $dbw =& wfGetDB(DB_MASTER);
         $dbw->update('image', array('img_metadata' => $this->metadata), array('img_name' => $this->name), __METHOD__);
     }
     wfSuppressWarnings();
     $this->multiPageXML = new SimpleXMLElement($this->metadata);
     wfRestoreWarnings();
 }
Example #6
0
 /** Internal mime type detection, please use guessMimeType() for application code instead.
  * Detection is done using an external program, if $wgMimeDetectorCommand is set.
  * Otherwise, the fileinfo extension and mime_content_type are tried (in this order), if they are available.
  * If the dections fails and $useExt is true, the mime type is guessed from the file extension, using guessTypesForExtension.
  * If the mime type is still unknown, getimagesize is used to detect the mime type if the file is an image.
  * If no mime type can be determined, this function returns "unknown/unknown".
  *
  * @param string $file The file to check
  * @param bool $useExt switch for allowing to use the file extension to guess the mime type. true by default.
  *
  * @return string the mime type of $file
  * @access private
  */
 function detectMimeType($file, $useExt = true)
 {
     $fname = 'MimeMagic::detectMimeType';
     global $wgMimeDetectorCommand;
     $m = NULL;
     if ($wgMimeDetectorCommand) {
         $fn = wfEscapeShellArg($file);
         $m = `{$wgMimeDetectorCommand} {$fn}`;
     } else {
         if (function_exists("finfo_open") && function_exists("finfo_file")) {
             # This required the fileinfo extension by PECL,
             # see http://pecl.php.net/package/fileinfo
             # This must be compiled into PHP
             #
             # finfo is the official replacement for the deprecated
             # mime_content_type function, see below.
             #
             # If you may need to load the fileinfo extension at runtime, set
             # $wgLoadFileinfoExtension in LocalSettings.php
             $mime_magic_resource = finfo_open(FILEINFO_MIME);
             /* return mime type ala mimetype extension */
             if ($mime_magic_resource) {
                 $m = finfo_file($mime_magic_resource, $file);
                 finfo_close($mime_magic_resource);
             } else {
                 wfDebug("{$fname}: finfo_open failed on " . FILEINFO_MIME . "!\n");
             }
         } else {
             if (function_exists("mime_content_type")) {
                 # NOTE: this function is available since PHP 4.3.0, but only if
                 # PHP was compiled with --with-mime-magic or, before 4.3.2, with --enable-mime-magic.
                 #
                 # On Winodws, you must set mime_magic.magicfile in php.ini to point to the mime.magic file bundeled with PHP;
                 # sometimes, this may even be needed under linus/unix.
                 #
                 # Also note that this has been DEPRECATED in favor of the fileinfo extension by PECL, see above.
                 # see http://www.php.net/manual/en/ref.mime-magic.php for details.
                 $m = mime_content_type($file);
             } else {
                 wfDebug("{$fname}: no magic mime detector found!\n");
             }
         }
     }
     if ($m) {
         #normalize
         $m = preg_replace('![;, ].*$!', '', $m);
         #strip charset, etc
         $m = trim($m);
         $m = strtolower($m);
         if (strpos($m, 'unknown') !== false) {
             $m = NULL;
         } else {
             wfDebug("{$fname}: magic mime type of {$file}: {$m}\n");
             return $m;
         }
     }
     #if still not known, use getimagesize to find out the type of image
     #TODO: skip things that do not have a well-known image extension? Would that be safe?
     wfSuppressWarnings();
     $gis = getimagesize($file);
     wfRestoreWarnings();
     $notAnImage = false;
     if ($gis && is_array($gis) && $gis[2]) {
         switch ($gis[2]) {
             case IMAGETYPE_GIF:
                 $m = "image/gif";
                 break;
             case IMAGETYPE_JPEG:
                 $m = "image/jpeg";
                 break;
             case IMAGETYPE_PNG:
                 $m = "image/png";
                 break;
             case IMAGETYPE_SWF:
                 $m = "application/x-shockwave-flash";
                 break;
             case IMAGETYPE_PSD:
                 $m = "application/photoshop";
                 break;
             case IMAGETYPE_BMP:
                 $m = "image/bmp";
                 break;
             case IMAGETYPE_TIFF_II:
                 $m = "image/tiff";
                 break;
             case IMAGETYPE_TIFF_MM:
                 $m = "image/tiff";
                 break;
             case IMAGETYPE_JPC:
                 $m = "image";
                 break;
             case IMAGETYPE_JP2:
                 $m = "image/jpeg2000";
                 break;
             case IMAGETYPE_JPX:
                 $m = "image/jpeg2000";
                 break;
             case IMAGETYPE_JB2:
                 $m = "image";
                 break;
             case IMAGETYPE_SWC:
                 $m = "application/x-shockwave-flash";
                 break;
             case IMAGETYPE_IFF:
                 $m = "image/vnd.xiff";
                 break;
             case IMAGETYPE_WBMP:
                 $m = "image/vnd.wap.wbmp";
                 break;
             case IMAGETYPE_XBM:
                 $m = "image/x-xbitmap";
                 break;
         }
         if ($m) {
             wfDebug("{$fname}: image mime type of {$file}: {$m}\n");
             return $m;
         } else {
             $notAnImage = true;
         }
     } else {
         // Also test DjVu
         $deja = new DjVuImage($file);
         if ($deja->isValid()) {
             wfDebug("{$fname}: detected {$file} as image/vnd.djvu\n");
             return 'image/vnd.djvu';
         }
     }
     #if desired, look at extension as a fallback.
     if ($useExt) {
         $i = strrpos($file, '.');
         $e = strtolower($i ? substr($file, $i + 1) : '');
         $m = $this->guessTypesForExtension($e);
         #TODO: if $notAnImage is set, do not trust the file extension if
         # the results is one of the image types that should have been recognized
         # by getimagesize
         if ($m) {
             wfDebug("{$fname}: extension mime type of {$file}: {$m}\n");
             return $m;
         }
     }
     #unknown type
     wfDebug("{$fname}: failed to guess mime type for {$file}!\n");
     return "unknown/unknown";
 }