/** * @param array $params * @param Config $mainConfig * @return array */ public static function applyDefaultParameters(array $params, Config $mainConfig) { $logger = LoggerFactory::getInstance('Mime'); $params += ['typeFile' => $mainConfig->get('MimeTypeFile'), 'infoFile' => $mainConfig->get('MimeInfoFile'), 'xmlTypes' => $mainConfig->get('XMLMimeTypes'), 'guessCallback' => function ($mimeAnalyzer, &$head, &$tail, $file, &$mime) use($logger) { // Also test DjVu $deja = new DjVuImage($file); if ($deja->isValid()) { $logger->info(__METHOD__ . ": detected {$file} as image/vnd.djvu\n"); $mime = 'image/vnd.djvu'; return; } // Some strings by reference for performance - assuming well-behaved hooks Hooks::run('MimeMagicGuessFromContent', [$mimeAnalyzer, &$head, &$tail, $file, &$mime]); }, 'extCallback' => function ($mimeAnalyzer, $ext, &$mime) { // Media handling extensions can improve the MIME detected Hooks::run('MimeMagicImproveFromExtension', [$mimeAnalyzer, $ext, &$mime]); }, 'initCallback' => function ($mimeAnalyzer) { // Allow media handling extensions adding MIME-types and MIME-info Hooks::run('MimeMagicInit', [$mimeAnalyzer]); }, 'logger' => $logger]; if ($params['infoFile'] === 'includes/mime.info') { $params['infoFile'] = __DIR__ . "/libs/mime/mime.info"; } if ($params['typeFile'] === 'includes/mime.types') { $params['typeFile'] = __DIR__ . "/libs/mime/mime.types"; } $detectorCmd = $mainConfig->get('MimeDetectorCommand'); if ($detectorCmd) { $params['detectCallback'] = function ($file) use($detectorCmd) { return wfShellExec("{$detectorCmd} " . wfEscapeShellArg($file)); }; } return $params; }
/** * Guess the mime type from the file contents. * * @param string $file * @param mixed $ext * @return bool|string */ private function doGuessMimeType($file, $ext) { // TODO: remove $ext param // Read a chunk of the file wfSuppressWarnings(); // @todo FIXME: Shouldn't this be rb? $f = fopen($file, 'rt'); wfRestoreWarnings(); if (!$f) { return 'unknown/unknown'; } $head = fread($f, 1024); fseek($f, -65558, SEEK_END); $tail = fread($f, 65558); // 65558 = maximum size of a zip EOCDR fclose($f); wfDebug(__METHOD__ . ": analyzing head and tail of {$file} for magic numbers.\n"); // Hardcode a few magic number checks... $headers = array('MThd' => 'audio/midi', 'OggS' => 'application/ogg', "\t" => 'application/x-msmetafile', "×ÍÆš" => 'application/x-msmetafile', '%PDF' => 'application/pdf', 'gimp xcf' => 'image/x-xcf', 'MZ' => 'application/octet-stream', "Êþº¾" => 'application/octet-stream', "ELF" => 'application/octet-stream'); foreach ($headers as $magic => $candidate) { if (strncmp($head, $magic, strlen($magic)) == 0) { wfDebug(__METHOD__ . ": magic header in {$file} recognized as {$candidate}\n"); return $candidate; } } /* Look for WebM and Matroska files */ if (strncmp($head, pack("C4", 0x1a, 0x45, 0xdf, 0xa3), 4) == 0) { $doctype = strpos($head, "B‚"); if ($doctype) { // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers) $data = substr($head, $doctype + 3, 8); if (strncmp($data, "matroska", 8) == 0) { wfDebug(__METHOD__ . ": recognized file as video/x-matroska\n"); return "video/x-matroska"; } elseif (strncmp($data, "webm", 4) == 0) { wfDebug(__METHOD__ . ": recognized file as video/webm\n"); return "video/webm"; } } wfDebug(__METHOD__ . ": unknown EBML file\n"); return "unknown/unknown"; } /* Look for WebP */ if (strncmp($head, "RIFF", 4) == 0 && strncmp(substr($head, 8, 8), "WEBPVP8 ", 8) == 0) { wfDebug(__METHOD__ . ": recognized file as image/webp\n"); return "image/webp"; } /** * Look for PHP. Check for this before HTML/XML... Warning: this is a * heuristic, and won't match a file with a lot of non-PHP before. It * will also match text files which could be PHP. :) * * @todo FIXME: For this reason, the check is probably useless -- an attacker * could almost certainly just pad the file with a lot of nonsense to * circumvent the check in any case where it would be a security * problem. On the other hand, it causes harmful false positives (bug * 16583). The heuristic has been cut down to exclude three-character * strings like "<? ", but should it be axed completely? */ if (strpos($head, '<?php') !== false || strpos($head, "<?php") !== false || strpos($head, "<? ") !== false || strpos($head, "<?\n") !== false || strpos($head, "<?\t") !== false || strpos($head, "<?=") !== false) { wfDebug(__METHOD__ . ": recognized {$file} as application/x-php\n"); return 'application/x-php'; } /** * look for XML formats (XHTML and SVG) */ $xml = new XmlTypeCheck($file); if ($xml->wellFormed) { global $wgXMLMimeTypes; if (isset($wgXMLMimeTypes[$xml->getRootElement()])) { return $wgXMLMimeTypes[$xml->getRootElement()]; } else { return 'application/xml'; } } /** * look for shell scripts */ $script_type = null; # detect by shebang if (substr($head, 0, 2) == "#!") { $script_type = "ASCII"; } elseif (substr($head, 0, 5) == "#!") { $script_type = "UTF-8"; } elseif (substr($head, 0, 7) == "þÿ#!") { $script_type = "UTF-16BE"; } elseif (substr($head, 0, 7) == "ÿþ#!") { $script_type = "UTF-16LE"; } if ($script_type) { if ($script_type !== "UTF-8" && $script_type !== "ASCII") { // Quick and dirty fold down to ASCII! $pack = array('UTF-16BE' => 'n*', 'UTF-16LE' => 'v*'); $chars = unpack($pack[$script_type], substr($head, 2)); $head = ''; foreach ($chars as $codepoint) { if ($codepoint < 128) { $head .= chr($codepoint); } else { $head .= '?'; } } } $match = array(); if (preg_match('%/?([^\\s]+/)(\\w+)%', $head, $match)) { $mime = "application/x-{$match[2]}"; wfDebug(__METHOD__ . ": shell script recognized as {$mime}\n"); return $mime; } } // Check for ZIP variants (before getimagesize) if (strpos($tail, "PK") !== false) { wfDebug(__METHOD__ . ": ZIP header present in {$file}\n"); return $this->detectZipType($head, $tail, $ext); } wfSuppressWarnings(); $gis = getimagesize($file); wfRestoreWarnings(); if ($gis && isset($gis['mime'])) { $mime = $gis['mime']; wfDebug(__METHOD__ . ": getimagesize detected {$file} as {$mime}\n"); return $mime; } // Also test DjVu $deja = new DjVuImage($file); if ($deja->isValid()) { wfDebug(__METHOD__ . ": detected {$file} as image/vnd.djvu\n"); return 'image/vnd.djvu'; } return false; }
/** * Load metadata from the file itself */ function loadFromFile() { global $wgUseSharedUploads, $wgSharedUploadDirectory, $wgContLang, $wgShowEXIF; wfProfileIn(__METHOD__); $this->imagePath = $this->getFullPath(); $this->fileExists = file_exists($this->imagePath); $this->fromSharedDirectory = false; $gis = array(); if (!$this->fileExists) { wfDebug(__METHOD__ . ': ' . $this->imagePath . " not found locally!\n"); } # If the file is not found, and a shared upload directory is used, look for it there. if (!$this->fileExists && $wgUseSharedUploads && $wgSharedUploadDirectory) { # In case we're on a wgCapitalLinks=false wiki, we # capitalize the first letter of the filename before # looking it up in the shared repository. $sharedImage = Image::newFromName($wgContLang->ucfirst($this->name)); $this->fileExists = $sharedImage && file_exists($sharedImage->getFullPath(true)); if ($this->fileExists) { $this->name = $sharedImage->name; $this->imagePath = $this->getFullPath(true); $this->fromSharedDirectory = true; } } if ($this->fileExists) { $magic =& wfGetMimeMagic(); $this->mime = $magic->guessMimeType($this->imagePath, true); $this->type = $magic->getMediaType($this->imagePath, $this->mime); # Get size in bytes $this->size = filesize($this->imagePath); $magic =& wfGetMimeMagic(); # Height and width wfSuppressWarnings(); if ($this->mime == 'image/svg') { $gis = wfGetSVGsize($this->imagePath); } elseif ($this->mime == 'image/vnd.djvu') { $deja = new DjVuImage($this->imagePath); $gis = $deja->getImageSize(); } elseif (!$magic->isPHPImageType($this->mime)) { # Don't try to get the width and height of sound and video files, that's bad for performance $gis = false; } else { $gis = getimagesize($this->imagePath); } wfRestoreWarnings(); wfDebug(__METHOD__ . ': ' . $this->imagePath . " loaded, " . $this->size . " bytes, " . $this->mime . ".\n"); } else { $this->mime = NULL; $this->type = MEDIATYPE_UNKNOWN; wfDebug(__METHOD__ . ': ' . $this->imagePath . " NOT FOUND!\n"); } if ($gis) { $this->width = $gis[0]; $this->height = $gis[1]; } else { $this->width = 0; $this->height = 0; } #NOTE: $gis[2] contains a code for the image type. This is no longer used. #NOTE: we have to set this flag early to avoid load() to be called # be some of the functions below. This may lead to recursion or other bad things! # as ther's only one thread of execution, this should be safe anyway. $this->dataLoaded = true; $this->metadata = serialize($this->retrieveExifData($this->imagePath)); if (isset($gis['bits'])) { $this->bits = $gis['bits']; } else { $this->bits = 0; } wfProfileOut(__METHOD__); }
function doGuessMimeType($file, $ext = true) { // Read a chunk of the file wfSuppressWarnings(); $f = fopen($file, "rt"); wfRestoreWarnings(); if (!$f) { return "unknown/unknown"; } $head = fread($f, 1024); fseek($f, -65558, SEEK_END); $tail = fread($f, 65558); // 65558 = maximum size of a zip EOCDR fclose($f); // Hardcode a few magic number checks... $headers = array('MThd' => 'audio/midi', 'OggS' => 'application/ogg', "\t" => 'application/x-msmetafile', "×ÍÆš" => 'application/x-msmetafile', '%PDF' => 'application/pdf', 'gimp xcf' => 'image/x-xcf', 'MZ' => 'application/octet-stream', "Êþº¾" => 'application/octet-stream', "ELF" => 'application/octet-stream'); foreach ($headers as $magic => $candidate) { if (strncmp($head, $magic, strlen($magic)) == 0) { wfDebug(__METHOD__ . ": magic header in {$file} recognized as {$candidate}\n"); return $candidate; } } /* * look for PHP * Check for this before HTML/XML... * Warning: this is a heuristic, and won't match a file with a lot of non-PHP before. * It will also match text files which could be PHP. :) */ if (strpos($head, '<?php') !== false || strpos($head, '<? ') !== false || strpos($head, "<?\n") !== false || strpos($head, "<?\t") !== false || strpos($head, "<?=") !== false || strpos($head, "<?php") !== false || strpos($head, "<? ") !== false || strpos($head, "<?\n") !== false || strpos($head, "<?\t") !== false || strpos($head, "<?=") !== false) { wfDebug(__METHOD__ . ": recognized {$file} as application/x-php\n"); return "application/x-php"; } /* * look for XML formats (XHTML and SVG) */ $xml = new XmlTypeCheck($file); if ($xml->wellFormed) { global $wgXMLMimeTypes; if (isset($wgXMLMimeTypes[$xml->getRootElement()])) { return $wgXMLMimeTypes[$xml->getRootElement()]; } else { return 'application/xml'; } } /* * look for shell scripts */ $script_type = NULL; # detect by shebang if (substr($head, 0, 2) == "#!") { $script_type = "ASCII"; } elseif (substr($head, 0, 5) == "#!") { $script_type = "UTF-8"; } elseif (substr($head, 0, 7) == "þÿ#!") { $script_type = "UTF-16BE"; } elseif (substr($head, 0, 7) == "ÿþ#!") { $script_type = "UTF-16LE"; } if ($script_type) { if ($script_type !== "UTF-8" && $script_type !== "ASCII") { // Quick and dirty fold down to ASCII! $pack = array('UTF-16BE' => 'n*', 'UTF-16LE' => 'v*'); $chars = unpack($pack[$script_type], substr($head, 2)); $head = ''; foreach ($chars as $codepoint) { if ($codepoint < 128) { $head .= chr($codepoint); } else { $head .= '?'; } } } $match = array(); if (preg_match('%/?([^\\s]+/)(\\w+)%', $head, $match)) { $mime = "application/x-{$match[2]}"; wfDebug(__METHOD__ . ": shell script recognized as {$mime}\n"); return $mime; } } // Check for ZIP (before getimagesize) if (strpos($tail, "PK") !== false) { wfDebug(__METHOD__ . ": ZIP header present at end of {$file}\n"); return $this->detectZipType($head); } wfSuppressWarnings(); $gis = getimagesize($file); wfRestoreWarnings(); if ($gis && isset($gis['mime'])) { $mime = $gis['mime']; wfDebug(__METHOD__ . ": getimagesize detected {$file} as {$mime}\n"); return $mime; } // Also test DjVu $deja = new DjVuImage($file); if ($deja->isValid()) { wfDebug(__METHOD__ . ": detected {$file} as image/vnd.djvu\n"); return 'image/vnd.djvu'; } return false; }
function initializeMultiPageXML() { # # Check for files uploaded prior to DJVU support activation # They have a '0' in their metadata field. # if ($this->metadata == '0' || $this->metadata == '') { $deja = new DjVuImage($this->imagePath); $this->metadata = $deja->retrieveMetaData(); $this->purgeMetadataCache(); # Update metadata in the database $dbw =& wfGetDB(DB_MASTER); $dbw->update('image', array('img_metadata' => $this->metadata), array('img_name' => $this->name), __METHOD__); } wfSuppressWarnings(); $this->multiPageXML = new SimpleXMLElement($this->metadata); wfRestoreWarnings(); }
/** Internal mime type detection, please use guessMimeType() for application code instead. * Detection is done using an external program, if $wgMimeDetectorCommand is set. * Otherwise, the fileinfo extension and mime_content_type are tried (in this order), if they are available. * If the dections fails and $useExt is true, the mime type is guessed from the file extension, using guessTypesForExtension. * If the mime type is still unknown, getimagesize is used to detect the mime type if the file is an image. * If no mime type can be determined, this function returns "unknown/unknown". * * @param string $file The file to check * @param bool $useExt switch for allowing to use the file extension to guess the mime type. true by default. * * @return string the mime type of $file * @access private */ function detectMimeType($file, $useExt = true) { $fname = 'MimeMagic::detectMimeType'; global $wgMimeDetectorCommand; $m = NULL; if ($wgMimeDetectorCommand) { $fn = wfEscapeShellArg($file); $m = `{$wgMimeDetectorCommand} {$fn}`; } else { if (function_exists("finfo_open") && function_exists("finfo_file")) { # This required the fileinfo extension by PECL, # see http://pecl.php.net/package/fileinfo # This must be compiled into PHP # # finfo is the official replacement for the deprecated # mime_content_type function, see below. # # If you may need to load the fileinfo extension at runtime, set # $wgLoadFileinfoExtension in LocalSettings.php $mime_magic_resource = finfo_open(FILEINFO_MIME); /* return mime type ala mimetype extension */ if ($mime_magic_resource) { $m = finfo_file($mime_magic_resource, $file); finfo_close($mime_magic_resource); } else { wfDebug("{$fname}: finfo_open failed on " . FILEINFO_MIME . "!\n"); } } else { if (function_exists("mime_content_type")) { # NOTE: this function is available since PHP 4.3.0, but only if # PHP was compiled with --with-mime-magic or, before 4.3.2, with --enable-mime-magic. # # On Winodws, you must set mime_magic.magicfile in php.ini to point to the mime.magic file bundeled with PHP; # sometimes, this may even be needed under linus/unix. # # Also note that this has been DEPRECATED in favor of the fileinfo extension by PECL, see above. # see http://www.php.net/manual/en/ref.mime-magic.php for details. $m = mime_content_type($file); } else { wfDebug("{$fname}: no magic mime detector found!\n"); } } } if ($m) { #normalize $m = preg_replace('![;, ].*$!', '', $m); #strip charset, etc $m = trim($m); $m = strtolower($m); if (strpos($m, 'unknown') !== false) { $m = NULL; } else { wfDebug("{$fname}: magic mime type of {$file}: {$m}\n"); return $m; } } #if still not known, use getimagesize to find out the type of image #TODO: skip things that do not have a well-known image extension? Would that be safe? wfSuppressWarnings(); $gis = getimagesize($file); wfRestoreWarnings(); $notAnImage = false; if ($gis && is_array($gis) && $gis[2]) { switch ($gis[2]) { case IMAGETYPE_GIF: $m = "image/gif"; break; case IMAGETYPE_JPEG: $m = "image/jpeg"; break; case IMAGETYPE_PNG: $m = "image/png"; break; case IMAGETYPE_SWF: $m = "application/x-shockwave-flash"; break; case IMAGETYPE_PSD: $m = "application/photoshop"; break; case IMAGETYPE_BMP: $m = "image/bmp"; break; case IMAGETYPE_TIFF_II: $m = "image/tiff"; break; case IMAGETYPE_TIFF_MM: $m = "image/tiff"; break; case IMAGETYPE_JPC: $m = "image"; break; case IMAGETYPE_JP2: $m = "image/jpeg2000"; break; case IMAGETYPE_JPX: $m = "image/jpeg2000"; break; case IMAGETYPE_JB2: $m = "image"; break; case IMAGETYPE_SWC: $m = "application/x-shockwave-flash"; break; case IMAGETYPE_IFF: $m = "image/vnd.xiff"; break; case IMAGETYPE_WBMP: $m = "image/vnd.wap.wbmp"; break; case IMAGETYPE_XBM: $m = "image/x-xbitmap"; break; } if ($m) { wfDebug("{$fname}: image mime type of {$file}: {$m}\n"); return $m; } else { $notAnImage = true; } } else { // Also test DjVu $deja = new DjVuImage($file); if ($deja->isValid()) { wfDebug("{$fname}: detected {$file} as image/vnd.djvu\n"); return 'image/vnd.djvu'; } } #if desired, look at extension as a fallback. if ($useExt) { $i = strrpos($file, '.'); $e = strtolower($i ? substr($file, $i + 1) : ''); $m = $this->guessTypesForExtension($e); #TODO: if $notAnImage is set, do not trust the file extension if # the results is one of the image types that should have been recognized # by getimagesize if ($m) { wfDebug("{$fname}: extension mime type of {$file}: {$m}\n"); return $m; } } #unknown type wfDebug("{$fname}: failed to guess mime type for {$file}!\n"); return "unknown/unknown"; }