/** * Perform sanity checking on a raw header (e.g. handle 8-bit characters). * * @param string $data The header data. * * @return string The cleaned header data. */ protected function _sanityCheck($data) { $charset_test = array('windows-1252', self::$defaultCharset); if (!Horde_String::validUtf8($data)) { /* Appears to be a PHP error with the internal String structure * which prevents accurate manipulation of the string. Copying * the data to a new variable fixes things. */ $data = substr($data, 0); /* Assumption: broken charset in headers is generally either * UTF-8 or ISO-8859-1/Windows-1252. Test these charsets * first before using default charset. This may be a * Western-centric approach, but it's better than nothing. */ foreach ($charset_test as $charset) { $tmp = Horde_String::convertCharset($data, $charset, 'UTF-8'); if (Horde_String::validUtf8($tmp)) { return $tmp; } } } return $data; }
/** * Takes all necessary actions for the given import step, parameters and * form values and returns the next necessary step. * * @param integer $action The current step. One of the IMPORT_* constants. * @param array $param An associative array containing needed * parameters for the current step. Keys for this * driver: * - check_charset: (boolean) Do some checks to see if the correct * charset has been provided. Throws charset exception * on error. * - import_mapping: TODO * * @return mixed Either the next step as an integer constant or imported * data set after the final step. * @throws Horde_Data_Exception * @throws Horde_Data_Exception_Charset */ public function nextStep($action, array $param = array()) { switch ($action) { case Horde_Data::IMPORT_FILE: parent::nextStep($action, $param); /* Move uploaded file so that we can read it again in the next step after the user gave some format details. */ $file_name = $_FILES['import_file']['tmp_name']; if (($file_data = file_get_contents($file_name)) === false) { throw new Horde_Data_Exception(Horde_Data_Translation::t("The uploaded file could not be saved.")); } /* Do charset checking now, if requested. */ if (isset($param['check_charset'])) { $charset = isset($this->_vars->charset) ? Horde_String::lower($this->_vars->charset) : 'utf-8'; switch ($charset) { case 'utf-8': $error = !Horde_String::validUtf8($file_data); break; default: $error = $file_data != Horde_String::convertCharset(Horde_String::convertCharset($file_data, $charset, 'UTF-8'), 'UTF-8', $charset); break; } if ($error) { $e = new Horde_Data_Exception_Charset(Horde_Data_Translation::t("Incorrect charset given for the data.")); $e->badCharset = $charset; throw $e; } } $this->storage->set('charset', $this->_vars->charset); $this->storage->set('file_data', $file_data); /* Read the file's first two lines to show them to the user. */ $first_lines = ''; if ($fp = @fopen($file_name, 'r')) { for ($line_no = 1, $line = fgets($fp); $line_no <= 3 && $line; $line_no++, $line = fgets($fp)) { $line = Horde_String::convertCharset($line, $this->_vars->charset, 'UTF-8'); $first_lines .= Horde_String::truncate($line); if (Horde_String::length($line) > 100) { $first_lines .= "\n"; } } } $this->storage->set('first_lines', $first_lines); /* Import the first line to guess the number of fields. */ if ($first_lines) { rewind($fp); $line = self::getCsv($fp); if ($line) { $this->storage->set('fields', count($line)); } } return Horde_Data::IMPORT_CSV; case Horde_Data::IMPORT_CSV: $this->storage->set('header', $this->_vars->header); $import_mapping = array(); if (isset($param['import_mapping'])) { $import_mapping = $param['import_mapping']; } $file_name = Horde_Util::getTempFile('import'); file_put_contents($file_name, $this->storage->get('file_data')); $this->storage->set('data', $this->importFile($file_name, $this->_vars->header, $this->_vars->sep, $this->_vars->quote, $this->_vars->fields, $import_mapping, $this->storage->get('charset'), $this->storage->get('crlf'))); $this->storage->set('map'); return Horde_Data::IMPORT_MAPPED; default: return parent::nextStep($action, $param); } }
/** * Ensure $data is converted to valid UTF-8 data. Works as follows: * Converts to UTF-8, assuming data is in $from_charset encoding. If * that produces invalid UTF-8, attempt to convert to most common mulitibyte * encodings. If that *still* fails, strip out non 7-Bit characters...and * force encoding to UTF-8 from $from_charset as a last resort. * * @param string $data The string data to convert to UTF-8. * @param string $from_charset The character set to assume $data is encoded * in. * * @return string A valid UTF-8 encoded string. */ public static function ensureUtf8($data, $from_charset) { $text = Horde_String::convertCharset($data, $from_charset, 'UTF-8'); if (!Horde_String::validUtf8($text)) { $test_charsets = array('windows-1252', 'UTF-8'); foreach ($test_charsets as $charset) { if ($charset != $from_charset) { $text = Horde_String::convertCharset($data, $charset, 'UTF-8'); if (Horde_String::validUtf8($text)) { return $text; } } } // Invalid UTF-8 still found. Strip out non 7-bit characters, or if // that fails, force a conersion to UTF-8 as a last resort. Need // to break string into smaller chunks to avoid hitting // https://bugs.php.net/bug.php?id=37793 $chunk_size = 4000; $text = ''; while ($data !== false && strlen($data)) { $test = self::_stripNon7BitChars(substr($data, 0, $chunk_size)); if ($test !== false) { $text .= $test; } else { return Horde_String::convertCharset($data, $from_charset, 'UTF-8', true); } $data = substr($data, $chunk_size); } } return $text; }