Exemplo n.º 1
0
 /**
  * Parses CSV files into a two dimensional array.
  *
  * @param \Iterator $line_iterator
  *   An Iterator object that yields line strings, e.g. ParserCSVIterator.
  *
  * @return array
  *   Two dimensional array that contains the data in the CSV file.
  */
 public function parse(\Iterator $line_iterator)
 {
     $skip_line = $this->skipFirstLine;
     $rows = array();
     $this->lastLinePos = 0;
     $max_time = $this->timeout ? microtime() + $this->timeout : FALSE;
     $lines_parsed = 0;
     for ($line_iterator->rewind($this->startByte); $line_iterator->valid(); $line_iterator->next()) {
         // Make really sure we've got lines without trailing newlines.
         $line = trim($line_iterator->current(), "\r\n");
         // Skip empty lines.
         if (!$line) {
             continue;
         }
         // If the first line contains column names, skip it.
         if ($skip_line) {
             $skip_line = FALSE;
             continue;
         }
         // The actual parser. explode() is unfortunately not suitable because the
         // delimiter might be located inside a quoted field, and that would break
         // the field and/or require additional effort to re-join the fields.
         $quoted = FALSE;
         $current_index = 0;
         $current_field = '';
         $fields = array();
         // We must use strlen() as we're parsing byte by byte using strpos(), so
         // drupal_strlen() will not work properly.
         $line_length = strlen($line);
         while ($current_index <= $line_length) {
             if ($quoted) {
                 $next_quote_index = strpos($line, '"', $current_index);
                 if ($next_quote_index === FALSE) {
                     // There's a line break before the quote is closed, so grab the rest
                     // of this line and fetch the next line.
                     $current_field .= substr($line, $current_index);
                     $line_iterator->next();
                     if (!$line_iterator->valid()) {
                         // Whoa, an unclosed quote! Well whatever, let's just ignore
                         // that shortcoming and record it nevertheless.
                         $fields[] = $current_field;
                         break;
                     }
                     // Ok, so, on with fetching the next line, as mentioned above.
                     $current_field .= "\n";
                     $line = trim($line_iterator->current(), "\r\n");
                     $current_index = 0;
                     continue;
                 }
                 // There's actually another quote in this line, find out whether it's
                 // escaped or not.
                 $current_field .= substr($line, $current_index, $next_quote_index - $current_index);
                 if (isset($line[$next_quote_index + 1]) && $line[$next_quote_index + 1] === '"') {
                     // Escaped quote, add a single one to the field and proceed quoted.
                     $current_field .= '"';
                     $current_index = $next_quote_index + 2;
                 } else {
                     // End of the quoted section, close the quote and let the
                     // $quoted == FALSE block finalize the field.
                     $quoted = FALSE;
                     $current_index = $next_quote_index + 1;
                 }
             } else {
                 // First, let's find out where the next character of interest is.
                 $next_quote_index = strpos($line, '"', $current_index);
                 $next_delimiter_index = strpos($line, $this->delimiter, $current_index);
                 if ($next_quote_index === FALSE) {
                     $next_index = $next_delimiter_index;
                 } elseif ($next_delimiter_index === FALSE) {
                     $next_index = $next_quote_index;
                 } else {
                     $next_index = min($next_quote_index, $next_delimiter_index);
                 }
                 if ($next_index === FALSE) {
                     // This line is done, add the rest of it as last field.
                     $current_field .= substr($line, $current_index);
                     $fields[] = $current_field;
                     break;
                 } elseif ($line[$next_index] === $this->delimiter[0]) {
                     $length = $next_index + strlen($this->delimiter) - 1 - $current_index;
                     $current_field .= substr($line, $current_index, $length);
                     $fields[] = $current_field;
                     $current_field = '';
                     $current_index += $length + 1;
                     // Continue with the next field.
                 } else {
                     $quoted = TRUE;
                     $current_field .= substr($line, $current_index, $next_index - $current_index);
                     $current_index = $next_index + 1;
                     // Continue this field in the $quoted == TRUE block.
                 }
             }
         }
         // End of CSV parser. We've now got all the fields of the line as strings
         // in the $fields array.
         if (!$this->columnNames) {
             $row = $fields;
         } else {
             $row = array();
             foreach ($this->columnNames as $column_name) {
                 $field = array_shift($fields);
                 $row[$column_name] = (string) $field;
             }
         }
         $rows[] = $row;
         // Quit parsing if timeout has been reached or requested lines have been
         // reached.
         if ($max_time && microtime() > $max_time) {
             $this->lastLinePos = $line_iterator->currentPosition();
             break;
         }
         $lines_parsed++;
         if ($this->lineLimit && $lines_parsed >= $this->lineLimit) {
             $this->lastLinePos = $line_iterator->currentPosition();
             break;
         }
     }
     return $rows;
 }