예제 #1
0
/** determine the mimetype of a file
 *
 * This routine tries to discover the mimetype of a file.
 * First we try to determine the mimetype via the fileinfo extension.
 * If that doesn't work, we try the deprecated mime_content_type() function.
 * If that doesn't work, we try to shell out to file(1).
 * If that doesn't work, we resort to "guessing" the mimetype based
 * on the extension of the file or else we return the generic
 * 'application/octet-stream'.
 *
 * Note that in step 3 we shell out and try to execute the file(1) command.
 * The results are checked against a pattern to assert that we are
 * really dealing with a mime type. The pattern is described in RFC2616
 * (see sections 3.7 and 2.2):
 *
 *<pre>
 *      media-type     = type "/" subtype *( ";" parameter )
 *      type           = token
 *      subtype        = token
 *      token          = 1*&lt;any CHAR except CTLs or separators&gt;
 *      separators     = "(" | ")" | "&lt;" | "&gt;" | "@"
 *                     | "," | ";" | ":" | "\" | &lt;"&gt;
 *                     | "/" | "[" | "]" | "?" | "="
 *                     | "{" | "}" | SP | HT
 *      CHAR           = &lt;any US-ASCII character (octets 0 - 127)&gt;
 *      CTL            = &lt;any US-ASCII control character
 *                       (octets 0 - 31) and DEL (127)&gt;
 *      SP             = &lt;US-ASCII SP, space (32)&gt;
 *      HT             = &lt;US-ASCII HT, horizontal-tab (9)&gt;
 *      &lt;"&gt;            = &lt;US-ASCII double-quote mark (34)&gt;
 *</pre>
 *
 * This description means we should look for two tokens containing
 * letters a-z or A-Z, digits 0-9 and these special characters:
 * ! # $ % & ' * + - . ^ _ ` | or ~. That's it.
 *
 * Note that file(1) may return a mime type with additional parameters.
 * e.g. 'text/plain; charset=US-ASCII'. This fits the pattern, because
 * it starts with a token, a slash and another token.
 *
 * The optional parameter $name is used to determine the mimetype based
 * on the extension (as a last resort), even when the current name of the
 * file is meaningless, e.g. when uploading a file, the name of the file
 * (from $_FILES['file0']['tmp_name']) is something like '/tmp/php4r5dwfw',
 * even though $_FILES['file0']['name'] might read 'S6301234.JPG'.
 * If $name is not specified (i.e. is empty), we construct it from $path.
 *
 * @param string $path fully qualified path to the file to test
 * @param string $name name of the file, possibly different from $path
 * @return string mimetype of the file $path
 * @todo there is room for improvement here: 
 *       the code in step 1 and step 2 is largely untested
 */
function get_mimetype($path, $name = '')
{
    // 0 -- quick check for file of type 'image/*' (suppress annoying warning message from getimagesize())
    if (($imagesize = @getimagesize($path)) !== FALSE && is_array($imagesize) && isset($imagesize['mime'])) {
        $mimetype = $imagesize['mime'];
        // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',0,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG);
        return $mimetype;
    }
    // 1 -- try the finfo-route if it is available
    if (function_exists('finfo_open') && function_exists('finfo_file') && function_exists('finfo_close') && defined(FILEINFO_MIME)) {
        $finfo = finfo_open(FILEINFO_MIME);
        if ($finfo !== FALSE) {
            $mimetype = finfo_file($finfo, $path);
            $finfo_close($finfo);
            if ($mimetype !== FALSE) {
                // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',1,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG);
                return $mimetype;
            }
        }
    }
    // 2 -- now try the deprecated mime_content_type method
    if (function_exists('mime_content_type')) {
        $mimetype = mime_content_type($path);
        // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',2,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG);
        return $mimetype;
    }
    // 3 -- now try to shell out and use the file command
    $command = sprintf('file -b -i %s', escapeshellarg($path));
    // -b = brief output, -i = output mime type strings
    $dummy = array();
    $retval = 0;
    $mimetype = exec($command, $dummy, $retval);
    if ($retval == 0) {
        // now assert that the result looks like a mimetype and not an error message
        if (get_mediatype($mimetype) !== FALSE) {
            // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',3,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG);
            return $mimetype;
        }
    }
    // 4 -- take a wild guess; boldly assume that the file extension carries any meaning whatsoever
    $ext = strtolower(pathinfo(empty($name) ? $path : $name, PATHINFO_EXTENSION));
    $mimetypes = get_mimetypes_array();
    $mimetype = isset($mimetypes[$ext]) ? $mimetypes[$ext] : 'application/octet-stream';
    // logger(sprintf('%d: %s(): path=%s name=%s mime=%s',4,__FUNCTION__,$path,$name,$mimetype),WLOG_DEBUG);
    return $mimetype;
}
 /** try to make sure that the extension of file $name makes sense or matches the actual filetype
  *
  * this checks or changes the $name of the file in line with the
  * mimetype of the actual file (as established by get_mimetype()).
  *
  * The reason to do this is to make it harder to 'smuggle in' files
  * with deceptive filenames/extensions. Quite often the extension is
  * used to determine the type of the file, even by browsers that should
  * know better. By uploading a malicious .PDF using an innocuous extension
  * like .TXT, a browser may be tricked into rendering that .PDF inline.
  * By changing the extension from .TXT to .PDF we can mitigate that risk,
  * at least a little bit. (People somehow trust an extension even though
  * they should know better and file(1) says so...)
  *
  * Strategy is as follows. If the mimetype based on the $name matches the
  * actual mimetype, we can simply allow the name provided.
  *
  * If there is a difference, we try to find an extension that maps to the
  * same mimetype as that of the actual file. IOW: we put more trust in the
  * mimetype of the actual file than we do in the mimetype suggested by the
  * extension.
  *
  * @param string $path full path to the actual file (from $_FILES[$i]['tmp_name'])
  * @param string $name the requested name of the file to examine (from $_FILES[$i]['name'])
  * @param string $type the suggested filetype of the file (from $_FILES[$i]['type'])
  * @return string the sanitised name and extension based on the file type
  */
 function sanitise_filetype($path, $name, $type)
 {
     // 0 -- initialise: isolate the $filename and $ext
     if (strpos($name, '.') === FALSE) {
         // not a single dot -> filename without extension
         $filename = $name;
         $extension = '';
     } else {
         $components = explode('.', $name);
         $extension = array_pop($components);
         $filename = implode('.', $components);
         unset($components);
     }
     // 1 -- does actual file mimetype agree with the file extension?
     $type_path = get_mediatype(get_mimetype($path, $name));
     $ext = utf8_strtolower($extension);
     $mimetypes = get_mimetypes_array();
     $type_name = isset($mimetypes[$ext]) ? get_mediatype($mimetypes[$ext]) : 'application/octet-stream';
     if (strcmp($type_path, $type_name) == 0) {
         return $name;
     }
     // 2 -- No, we change the extension based on the actual mimetype of the file
     // 2A - lookup the first extension matching type, or use '' (which implies application/octet-stream)
     $new_extension = array_search($type_path, $mimetypes);
     if ($new_extension === FALSE || is_null($new_extension)) {
         $new_extension = '';
         logger(sprintf('%s.%s(): mimetype \'%s\' not recognised; using \'%s\' instead', __CLASS__, __FUNCTION__, $type_path, $mimetypes[$new_extension]));
     }
     // 2B - avoid tricks with double extensions (eg. upload of "malware.exe.txt")
     if ($new_extension == '') {
         if ($type_name == 'application/octet-stream') {
             // preserve original extension and case because the original
             // extension will yield 'application/octet-stream' when served via file.php,
             // i.e. there is no need to lose the extension if it yields the same mimetype anyway
             $new_name = $name;
         } elseif (strpos($filename, '.') === FALSE) {
             // filename has no dot =>
             // no part of existing filename can be mistaken for an extension =>
             // don't add anything at all
             $new_name = $filename;
         } else {
             // bare $filename already contains an extension =>
             // add '.bin' to force 'application/octet-stream'
             $new_name = $filename . '.bin';
         }
     } else {
         $new_name = $filename . '.' . $new_extension;
     }
     logger(sprintf('%s.%s(): namechange %s -> %s (%s)', __CLASS__, __FUNCTION__, $name, $new_name, $type_path), WLOG_DEBUG);
     return $new_name;
 }