Example #1
0
 /**
  * Perform sanity checking on a raw header (e.g. handle 8-bit characters).
  *
  * @param string $data  The header data.
  *
  * @return string  The cleaned header data.
  */
 protected function _sanityCheck($data)
 {
     $charset_test = array('windows-1252', self::$defaultCharset);
     if (!Horde_String::validUtf8($data)) {
         /* Appears to be a PHP error with the internal String structure
          * which prevents accurate manipulation of the string. Copying
          * the data to a new variable fixes things. */
         $data = substr($data, 0);
         /* Assumption: broken charset in headers is generally either
          * UTF-8 or ISO-8859-1/Windows-1252. Test these charsets
          * first before using default charset. This may be a
          * Western-centric approach, but it's better than nothing. */
         foreach ($charset_test as $charset) {
             $tmp = Horde_String::convertCharset($data, $charset, 'UTF-8');
             if (Horde_String::validUtf8($tmp)) {
                 return $tmp;
             }
         }
     }
     return $data;
 }
Example #2
0
File: Csv.php Project: horde/horde
 /**
  * Takes all necessary actions for the given import step, parameters and
  * form values and returns the next necessary step.
  *
  * @param integer $action  The current step. One of the IMPORT_* constants.
  * @param array $param     An associative array containing needed
  *                         parameters for the current step. Keys for this
  *                         driver:
  *   - check_charset: (boolean) Do some checks to see if the correct
  *                    charset has been provided. Throws charset exception
  *                    on error.
  *   - import_mapping: TODO
  *
  * @return mixed  Either the next step as an integer constant or imported
  *                data set after the final step.
  * @throws Horde_Data_Exception
  * @throws Horde_Data_Exception_Charset
  */
 public function nextStep($action, array $param = array())
 {
     switch ($action) {
         case Horde_Data::IMPORT_FILE:
             parent::nextStep($action, $param);
             /* Move uploaded file so that we can read it again in the next
                step after the user gave some format details. */
             $file_name = $_FILES['import_file']['tmp_name'];
             if (($file_data = file_get_contents($file_name)) === false) {
                 throw new Horde_Data_Exception(Horde_Data_Translation::t("The uploaded file could not be saved."));
             }
             /* Do charset checking now, if requested. */
             if (isset($param['check_charset'])) {
                 $charset = isset($this->_vars->charset) ? Horde_String::lower($this->_vars->charset) : 'utf-8';
                 switch ($charset) {
                     case 'utf-8':
                         $error = !Horde_String::validUtf8($file_data);
                         break;
                     default:
                         $error = $file_data != Horde_String::convertCharset(Horde_String::convertCharset($file_data, $charset, 'UTF-8'), 'UTF-8', $charset);
                         break;
                 }
                 if ($error) {
                     $e = new Horde_Data_Exception_Charset(Horde_Data_Translation::t("Incorrect charset given for the data."));
                     $e->badCharset = $charset;
                     throw $e;
                 }
             }
             $this->storage->set('charset', $this->_vars->charset);
             $this->storage->set('file_data', $file_data);
             /* Read the file's first two lines to show them to the user. */
             $first_lines = '';
             if ($fp = @fopen($file_name, 'r')) {
                 for ($line_no = 1, $line = fgets($fp); $line_no <= 3 && $line; $line_no++, $line = fgets($fp)) {
                     $line = Horde_String::convertCharset($line, $this->_vars->charset, 'UTF-8');
                     $first_lines .= Horde_String::truncate($line);
                     if (Horde_String::length($line) > 100) {
                         $first_lines .= "\n";
                     }
                 }
             }
             $this->storage->set('first_lines', $first_lines);
             /* Import the first line to guess the number of fields. */
             if ($first_lines) {
                 rewind($fp);
                 $line = self::getCsv($fp);
                 if ($line) {
                     $this->storage->set('fields', count($line));
                 }
             }
             return Horde_Data::IMPORT_CSV;
         case Horde_Data::IMPORT_CSV:
             $this->storage->set('header', $this->_vars->header);
             $import_mapping = array();
             if (isset($param['import_mapping'])) {
                 $import_mapping = $param['import_mapping'];
             }
             $file_name = Horde_Util::getTempFile('import');
             file_put_contents($file_name, $this->storage->get('file_data'));
             $this->storage->set('data', $this->importFile($file_name, $this->_vars->header, $this->_vars->sep, $this->_vars->quote, $this->_vars->fields, $import_mapping, $this->storage->get('charset'), $this->storage->get('crlf')));
             $this->storage->set('map');
             return Horde_Data::IMPORT_MAPPED;
         default:
             return parent::nextStep($action, $param);
     }
 }
Example #3
0
 /**
  * Ensure $data is converted to valid UTF-8 data. Works as follows:
  * Converts to UTF-8, assuming data is in $from_charset encoding. If
  * that produces invalid UTF-8, attempt to convert to most common mulitibyte
  * encodings. If that *still* fails, strip out non 7-Bit characters...and
  * force encoding to UTF-8 from $from_charset as a last resort.
  *
  * @param string $data          The string data to convert to UTF-8.
  * @param string $from_charset  The character set to assume $data is encoded
  *                              in.
  *
  * @return string  A valid UTF-8 encoded string.
  */
 public static function ensureUtf8($data, $from_charset)
 {
     $text = Horde_String::convertCharset($data, $from_charset, 'UTF-8');
     if (!Horde_String::validUtf8($text)) {
         $test_charsets = array('windows-1252', 'UTF-8');
         foreach ($test_charsets as $charset) {
             if ($charset != $from_charset) {
                 $text = Horde_String::convertCharset($data, $charset, 'UTF-8');
                 if (Horde_String::validUtf8($text)) {
                     return $text;
                 }
             }
         }
         // Invalid UTF-8 still found. Strip out non 7-bit characters, or if
         // that fails, force a conersion to UTF-8 as a last resort. Need
         // to break string into smaller chunks to avoid hitting
         // https://bugs.php.net/bug.php?id=37793
         $chunk_size = 4000;
         $text = '';
         while ($data !== false && strlen($data)) {
             $test = self::_stripNon7BitChars(substr($data, 0, $chunk_size));
             if ($test !== false) {
                 $text .= $test;
             } else {
                 return Horde_String::convertCharset($data, $from_charset, 'UTF-8', true);
             }
             $data = substr($data, $chunk_size);
         }
     }
     return $text;
 }