/** determine the mimetype of a file * * This routine tries to discover the mimetype of a file. * First we try to determine the mimetype via the fileinfo extension. * If that doesn't work, we try the deprecated mime_content_type() function. * If that doesn't work, we try to shell out to file(1). * If that doesn't work, we resort to "guessing" the mimetype based * on the extension of the file or else we return the generic * 'application/octet-stream'. * * Note that in step 3 we shell out and try to execute the file(1) command. * The results are checked against a pattern to assert that we are * really dealing with a mime type. The pattern is described in RFC2616 * (see sections 3.7 and 2.2): * *<pre> * media-type = type "/" subtype *( ";" parameter ) * type = token * subtype = token * token = 1*<any CHAR except CTLs or separators> * separators = "(" | ")" | "<" | ">" | "@" * | "," | ";" | ":" | "\" | <"> * | "/" | "[" | "]" | "?" | "=" * | "{" | "}" | SP | HT * CHAR = <any US-ASCII character (octets 0 - 127)> * CTL = <any US-ASCII control character * (octets 0 - 31) and DEL (127)> * SP = <US-ASCII SP, space (32)> * HT = <US-ASCII HT, horizontal-tab (9)> * <"> = <US-ASCII double-quote mark (34)> *</pre> * * This description means we should look for two tokens containing * letters a-z or A-Z, digits 0-9 and these special characters: * ! # $ % & ' * + - . ^ _ ` | or ~. That's it. * * Note that file(1) may return a mime type with additional parameters. * e.g. 'text/plain; charset=US-ASCII'. This fits the pattern, because * it starts with a token, a slash and another token. * * The optional parameter $name is used to determine the mimetype based * on the extension (as a last resort), even when the current name of the * file is meaningless, e.g. when uploading a file, the name of the file * (from $_FILES['file0']['tmp_name']) is something like '/tmp/php4r5dwfw', * even though $_FILES['file0']['name'] might read 'S6301234.JPG'. * If $name is not specified (i.e. is empty), we construct it from $path. * * @param string $path fully qualified path to the file to test * @param string $name name of the file, possibly different from $path * @return string mimetype of the file $path * @todo there is room for improvement here: * the code in step 1 and step 2 is largely untested */ function get_mimetype($path, $name = '') { // 0 -- quick check for file of type 'image/*' (suppress annoying warning message from getimagesize()) if (($imagesize = @getimagesize($path)) !== FALSE && is_array($imagesize) && isset($imagesize['mime'])) { $mimetype = $imagesize['mime']; // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',0,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG); return $mimetype; } // 1 -- try the finfo-route if it is available if (function_exists('finfo_open') && function_exists('finfo_file') && function_exists('finfo_close') && defined(FILEINFO_MIME)) { $finfo = finfo_open(FILEINFO_MIME); if ($finfo !== FALSE) { $mimetype = finfo_file($finfo, $path); $finfo_close($finfo); if ($mimetype !== FALSE) { // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',1,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG); return $mimetype; } } } // 2 -- now try the deprecated mime_content_type method if (function_exists('mime_content_type')) { $mimetype = mime_content_type($path); // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',2,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG); return $mimetype; } // 3 -- now try to shell out and use the file command $command = sprintf('file -b -i %s', escapeshellarg($path)); // -b = brief output, -i = output mime type strings $dummy = array(); $retval = 0; $mimetype = exec($command, $dummy, $retval); if ($retval == 0) { // now assert that the result looks like a mimetype and not an error message if (get_mediatype($mimetype) !== FALSE) { // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',3,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG); return $mimetype; } } // 4 -- take a wild guess; boldly assume that the file extension carries any meaning whatsoever $ext = strtolower(pathinfo(empty($name) ? $path : $name, PATHINFO_EXTENSION)); $mimetypes = get_mimetypes_array(); $mimetype = isset($mimetypes[$ext]) ? $mimetypes[$ext] : 'application/octet-stream'; // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',4,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG); return $mimetype; }
/** try to make sure that the extension of file $name makes sense or matches the actual filetype * * this checks or changes the $name of the file in line with the * mimetype of the actual file (as established by get_mimetype()). * * The reason to do this is to make it harder to 'smuggle in' files * with deceptive filenames/extensions. Quite often the extension is * used to determine the type of the file, even by browsers that should * know better. By uploading a malicious .PDF using an innocuous extension * like .TXT, a browser may be tricked into rendering that .PDF inline. * By changing the extension from .TXT to .PDF we can mitigate that risk, * at least a little bit. (People somehow trust an extension even though * they should know better and file(1) says so...) * * Strategy is as follows. If the mimetype based on the $name matches the * actual mimetype, we can simply allow the name provided. * * If there is a difference, we try to find an extension that maps to the * same mimetype as that of the actual file. IOW: we put more trust in the * mimetype of the actual file than we do in the mimetype suggested by the * extension. * * @param string $path full path to the actual file (from $_FILES[$i]['tmp_name']) * @param string $name the requested name of the file to examine (from $_FILES[$i]['name']) * @param string $type the suggested filetype of the file (from $_FILES[$i]['type']) * @return string the sanitised name and extension based on the file type */ function sanitise_filetype($path, $name, $type) { // 0 -- initialise: isolate the $filename and $ext if (strpos($name, '.') === FALSE) { // not a single dot -> filename without extension $filename = $name; $extension = ''; } else { $components = explode('.', $name); $extension = array_pop($components); $filename = implode('.', $components); unset($components); } // 1 -- does actual file mimetype agree with the file extension? $type_path = get_mediatype(get_mimetype($path, $name)); $ext = utf8_strtolower($extension); $mimetypes = get_mimetypes_array(); $type_name = isset($mimetypes[$ext]) ? get_mediatype($mimetypes[$ext]) : 'application/octet-stream'; if (strcmp($type_path, $type_name) == 0) { return $name; } // 2 -- No, we change the extension based on the actual mimetype of the file // 2A - lookup the first extension matching type, or use '' (which implies application/octet-stream) $new_extension = array_search($type_path, $mimetypes); if ($new_extension === FALSE || is_null($new_extension)) { $new_extension = ''; logger(sprintf('%s.%s(): mimetype \'%s\' not recognised; using \'%s\' instead', __CLASS__, __FUNCTION__, $type_path, $mimetypes[$new_extension])); } // 2B - avoid tricks with double extensions (eg. upload of "malware.exe.txt") if ($new_extension == '') { if ($type_name == 'application/octet-stream') { // preserve original extension and case because the original // extension will yield 'application/octet-stream' when served via file.php, // i.e. there is no need to lose the extension if it yields the same mimetype anyway $new_name = $name; } elseif (strpos($filename, '.') === FALSE) { // filename has no dot => // no part of existing filename can be mistaken for an extension => // don't add anything at all $new_name = $filename; } else { // bare $filename already contains an extension => // add '.bin' to force 'application/octet-stream' $new_name = $filename . '.bin'; } } else { $new_name = $filename . '.' . $new_extension; } logger(sprintf('%s.%s(): namechange %s -> %s (%s)', __CLASS__, __FUNCTION__, $name, $new_name, $type_path), WLOG_DEBUG); return $new_name; }