/** * Parses CSV files into a two dimensional array. * * @param \Iterator $line_iterator * An Iterator object that yields line strings, e.g. ParserCSVIterator. * * @return array * Two dimensional array that contains the data in the CSV file. */ public function parse(\Iterator $line_iterator) { $skip_line = $this->skipFirstLine; $rows = array(); $this->lastLinePos = 0; $max_time = $this->timeout ? microtime() + $this->timeout : FALSE; $lines_parsed = 0; for ($line_iterator->rewind($this->startByte); $line_iterator->valid(); $line_iterator->next()) { // Make really sure we've got lines without trailing newlines. $line = trim($line_iterator->current(), "\r\n"); // Skip empty lines. if (!$line) { continue; } // If the first line contains column names, skip it. if ($skip_line) { $skip_line = FALSE; continue; } // The actual parser. explode() is unfortunately not suitable because the // delimiter might be located inside a quoted field, and that would break // the field and/or require additional effort to re-join the fields. $quoted = FALSE; $current_index = 0; $current_field = ''; $fields = array(); // We must use strlen() as we're parsing byte by byte using strpos(), so // drupal_strlen() will not work properly. $line_length = strlen($line); while ($current_index <= $line_length) { if ($quoted) { $next_quote_index = strpos($line, '"', $current_index); if ($next_quote_index === FALSE) { // There's a line break before the quote is closed, so grab the rest // of this line and fetch the next line. $current_field .= substr($line, $current_index); $line_iterator->next(); if (!$line_iterator->valid()) { // Whoa, an unclosed quote! Well whatever, let's just ignore // that shortcoming and record it nevertheless. $fields[] = $current_field; break; } // Ok, so, on with fetching the next line, as mentioned above. $current_field .= "\n"; $line = trim($line_iterator->current(), "\r\n"); $current_index = 0; continue; } // There's actually another quote in this line, find out whether it's // escaped or not. $current_field .= substr($line, $current_index, $next_quote_index - $current_index); if (isset($line[$next_quote_index + 1]) && $line[$next_quote_index + 1] === '"') { // Escaped quote, add a single one to the field and proceed quoted. $current_field .= '"'; $current_index = $next_quote_index + 2; } else { // End of the quoted section, close the quote and let the // $quoted == FALSE block finalize the field. $quoted = FALSE; $current_index = $next_quote_index + 1; } } else { // First, let's find out where the next character of interest is. $next_quote_index = strpos($line, '"', $current_index); $next_delimiter_index = strpos($line, $this->delimiter, $current_index); if ($next_quote_index === FALSE) { $next_index = $next_delimiter_index; } elseif ($next_delimiter_index === FALSE) { $next_index = $next_quote_index; } else { $next_index = min($next_quote_index, $next_delimiter_index); } if ($next_index === FALSE) { // This line is done, add the rest of it as last field. $current_field .= substr($line, $current_index); $fields[] = $current_field; break; } elseif ($line[$next_index] === $this->delimiter[0]) { $length = $next_index + strlen($this->delimiter) - 1 - $current_index; $current_field .= substr($line, $current_index, $length); $fields[] = $current_field; $current_field = ''; $current_index += $length + 1; // Continue with the next field. } else { $quoted = TRUE; $current_field .= substr($line, $current_index, $next_index - $current_index); $current_index = $next_index + 1; // Continue this field in the $quoted == TRUE block. } } } // End of CSV parser. We've now got all the fields of the line as strings // in the $fields array. if (!$this->columnNames) { $row = $fields; } else { $row = array(); foreach ($this->columnNames as $column_name) { $field = array_shift($fields); $row[$column_name] = (string) $field; } } $rows[] = $row; // Quit parsing if timeout has been reached or requested lines have been // reached. if ($max_time && microtime() > $max_time) { $this->lastLinePos = $line_iterator->currentPosition(); break; } $lines_parsed++; if ($this->lineLimit && $lines_parsed >= $this->lineLimit) { $this->lastLinePos = $line_iterator->currentPosition(); break; } } return $rows; }