/** * Returns an abstract of the file content. * * @param array $data * @return string */ protected static function getContentPreview($data) { if (!ATTACHMENT_ENABLE_CONTENT_PREVIEW || $data['isBinary'] || $data['attachmentSize'] == 0) { return ''; } $content = ''; try { $file = new File(WCF_DIR . 'attachments/attachment-' . $data['attachmentID'], 'rb'); $content = $file->read(2003); $file->close(); if (CHARSET == 'UTF-8') { if (!StringUtil::isASCII($content) && !StringUtil::isUTF8($content)) { $content = StringUtil::convertEncoding('ISO-8859-1', CHARSET, $content); } $content = StringUtil::substring($content, 0, 500); if (strlen($content) < $file->filesize()) { $content .= '...'; } } else { if (StringUtil::isUTF8($content)) { return ''; } $content = StringUtil::substring($content, 0, 500); if ($file->filesize() > 500) { $content .= '...'; } } } catch (Exception $e) { } // ignore errors return $content; }
/** * Returns an abstract of the file content. * * @return string */ public function getContentPreview() { if ($this->contentPreview === null) { $this->contentPreview = ''; if (ATTACHMENT_ENABLE_CONTENT_PREVIEW && !$this->isBinary && $this->attachmentSize != 0) { try { $file = new File(WCF_DIR . 'attachments/attachment-' . $this->attachmentID, 'rb'); $this->contentPreview = $file->read(2003); $file->close(); if (CHARSET == 'UTF-8') { if (!StringUtil::isASCII($this->contentPreview) && !StringUtil::isUTF8($this->contentPreview)) { $this->contentPreview = StringUtil::convertEncoding('ISO-8859-1', CHARSET, $this->contentPreview); } $this->contentPreview = StringUtil::substring($this->contentPreview, 0, 500); if (strlen($this->contentPreview) < $file->filesize()) { $this->contentPreview .= '...'; } } else { if (StringUtil::isUTF8($this->contentPreview)) { $this->contentPreview = ''; } else { $this->contentPreview = StringUtil::substring($this->contentPreview, 0, 500); if ($file->filesize() > 500) { $this->contentPreview .= '...'; } } } } catch (Exception $e) { } // ignore errors } } return $this->contentPreview; }
/** * Parses search keywords. * * @param string $keywordString */ protected static function parseKeywords($keywordString) { // convert encoding if necessary if (CHARSET == 'UTF-8' && !StringUtil::isASCII($keywordString) && !StringUtil::isUTF8($keywordString)) { $keywordString = StringUtil::convertEncoding('ISO-8859-1', 'UTF-8', $keywordString); } // remove bad wildcards $keywordString = preg_replace('/(?<!\\w)\\*/', '', $keywordString); // remove search operators $keywordString = preg_replace('/[\\+\\-><()~]+/', '', $keywordString); if (StringUtil::substring($keywordString, 0, 1) == '"' && StringUtil::substring($keywordString, -1) == '"') { // phrases search $keywordString = StringUtil::trim(StringUtil::substring($keywordString, 1, -1)); if (!empty($keywordString)) { self::$keywords = array_merge(self::$keywords, array(StringUtil::encodeHTML($keywordString))); } } else { // replace word delimiters by space $keywordString = preg_replace('/[.,]/', ' ', $keywordString); $keywords = ArrayUtil::encodeHTML(ArrayUtil::trim(explode(' ', $keywordString))); if (count($keywords) > 0) { self::$keywords = array_merge(self::$keywords, $keywords); } } }
/** * Validates the node for correctness. * * The following options are supported: * - Node::REPAIR - If something is broken, and automatic repair may * be attempted. * * An array is returned with warnings. * * Every item in the array has the following properties: * * level - (number between 1 and 3 with severity information) * * message - (human readable message) * * node - (reference to the offending node) * * @param int $options * @return array */ public function validate($options = 0) { $warnings = array(); // Checking if our value is UTF-8 if (!StringUtil::isUTF8($this->getRawMimeDirValue())) { $oldValue = $this->getRawMimeDirValue(); $level = 3; if ($options & self::REPAIR) { $newValue = StringUtil::convertToUTF8($oldValue); if (true || StringUtil::isUTF8($newValue)) { $this->setRawMimeDirValue($newValue); $level = 1; } } if (preg_match('%([\\x00-\\x08\\x0B-\\x0C\\x0E\\x0F])%', $oldValue, $matches)) { $message = 'Property contained a control character (0x' . bin2hex($matches[1]) . ')'; } else { $message = 'Property is not valid UTF-8! ' . $oldValue; } $warnings[] = array('level' => $level, 'message' => $message, 'node' => $this); } // Checking if the propertyname does not contain any invalid bytes. if (!preg_match('/^([A-Z0-9-]+)$/', $this->name)) { $warnings[] = array('level' => 1, 'message' => 'The propertyname: ' . $this->name . ' contains invalid characters. Only A-Z, 0-9 and - are allowed', 'node' => $this); if ($options & self::REPAIR) { // Uppercasing and converting underscores to dashes. $this->name = strtoupper(str_replace('_', '-', $this->name)); // Removing every other invalid character $this->name = preg_replace('/([^A-Z0-9-])/u', '', $this->name); } } // Validating inner parameters foreach ($this->parameters as $param) { $warnings = array_merge($warnings, $param->validate($options)); } return $warnings; }
/** * Validates the node for correctness. * * The following options are supported: * - Node::REPAIR - If something is broken, and automatic repair may * be attempted. * * An array is returned with warnings. * * Every item in the array has the following properties: * * level - (number between 1 and 3 with severity information) * * message - (human readable message) * * node - (reference to the offending node) * * @param int $options * @return array */ public function validate($options = 0) { $warnings = array(); // Checking if our value is UTF-8 if (!StringUtil::isUTF8($this->value)) { $warnings[] = array('level' => 1, 'message' => 'Property is not valid UTF-8!', 'node' => $this); if ($options & self::REPAIR) { $this->value = StringUtil::convertToUTF8($this->value); } } // Checking if the propertyname does not contain any invalid bytes. if (!preg_match('/^([A-Z0-9-]+)$/', $this->name)) { $warnings[] = array('level' => 1, 'message' => 'The propertyname: ' . $this->name . ' contains invalid characters. Only A-Z, 0-9 and - are allowed', 'node' => $this); if ($options & self::REPAIR) { // Uppercasing and converting underscores to dashes. $this->name = strtoupper(str_replace('_', '-', $this->name)); // Removing every other invalid character $this->name = preg_replace('/([^A-Z0-9-])/u', '', $this->name); } } // Validating inner parameters foreach ($this->parameters as $param) { $warnings = array_merge($warnings, $param->validate($options)); } return $warnings; }
function testUTF8ControlChar() { $string = StringUtil::isUTF8(chr('0x00')); $this->assertEquals(false, $string); }
/** * Validates the node for correctness. * * The following options are supported: * - Node::REPAIR - If something is broken, and automatic repair may * be attempted. * * An array is returned with warnings. * * Every item in the array has the following properties: * * level - (number between 1 and 3 with severity information) * * message - (human readable message) * * node - (reference to the offending node) * * @param int $options * @return array */ function validate($options = 0) { $warnings = array(); // Checking if our value is UTF-8 if (!StringUtil::isUTF8($this->getRawMimeDirValue())) { $oldValue = $this->getRawMimeDirValue(); $level = 3; if ($options & self::REPAIR) { $newValue = StringUtil::convertToUTF8($oldValue); if (true || StringUtil::isUTF8($newValue)) { $this->setRawMimeDirValue($newValue); $level = 1; } } if (preg_match('%([\\x00-\\x08\\x0B-\\x0C\\x0E-\\x1F\\x7F])%', $oldValue, $matches)) { $message = 'Property contained a control character (0x' . bin2hex($matches[1]) . ')'; } else { $message = 'Property is not valid UTF-8! ' . $oldValue; } $warnings[] = array('level' => $level, 'message' => $message, 'node' => $this); } // Checking if the propertyname does not contain any invalid bytes. if (!preg_match('/^([A-Z0-9-]+)$/', $this->name)) { $warnings[] = array('level' => 1, 'message' => 'The propertyname: ' . $this->name . ' contains invalid characters. Only A-Z, 0-9 and - are allowed', 'node' => $this); if ($options & self::REPAIR) { // Uppercasing and converting underscores to dashes. $this->name = strtoupper(str_replace('_', '-', $this->name)); // Removing every other invalid character $this->name = preg_replace('/([^A-Z0-9-])/u', '', $this->name); } } if ($encoding = $this->offsetGet('ENCODING')) { if ($this->root->getDocumentType() === Document::VCARD40) { $warnings[] = array('level' => 1, 'message' => 'ENCODING parameter is not valid in vCard 4.', 'node' => $this); } else { $encoding = (string) $encoding; $allowedEncoding = array(); switch ($this->root->getDocumentType()) { case Document::ICALENDAR20: $allowedEncoding = array('8BIT', 'BASE64'); break; case Document::VCARD21: $allowedEncoding = array('QUOTED-PRINTABLE', 'BASE64', '8BIT'); break; case Document::VCARD30: $allowedEncoding = array('B'); break; } if ($allowedEncoding && !in_array(strtoupper($encoding), $allowedEncoding)) { $warnings[] = array('level' => 1, 'message' => 'ENCODING=' . strtoupper($encoding) . ' is not valid for this document type.', 'node' => $this); } } } // Validating inner parameters foreach ($this->parameters as $param) { $warnings = array_merge($warnings, $param->validate($options)); } return $warnings; }