function getFormat($filename) { $options = new ParseOptions(); $fp = FileFactory::create($filename); $data = ""; $format_base_type = FALSE; do { $data .= $fp->fread(64); $nlpos = strpos($data, "\n"); $length = strlen($data); } while ($length < 1024 && !$nlpos && !$fp->feof()); $fp->fclose(); unset($fp); if ($nlpos) { $data = substr($data, 0, $nlpos); } $data = ltrim($data); if ($data[0] == "<") { $format_base_type = "xml"; } if (strpos($data, "?xml")) { $format_base_type = "xml"; } //set csv as the default if ($format_base_type === FALSE) { $format_base_type = "csv"; } $options->setParser($format_base_type); $analysis_function = '\\Faker\\Parser\\Analysis\\' . strtoupper($format_base_type); if (class_exists($analysis_function)) { $fp = FileFactory::create($filename); $class = new $analysis_function($this->dispatcher); $format_parameters = $class->analyse($fp, $options); } else { throw new AnalysisClassNotFound($analysis_function); } return $format_parameters; }
public function parse(FileInterface $file, ParseOptions $options) { $done = false; $this->field_separator = $options->getFieldSeperator(); $this->text_delimiter = $options->getDeliminator(); $this->eol_ignorecr = $options->getEolIgnoreChr(); $header = NULL; $skip_rows = $options->getSkipRows(); //skip the number of linex while ($skip_rows--) { $this->read($file); } //fetch the header row if ($options->getHasHeaderRow() === TRUE) { $header = $this->read($file); // send the record to the event $this->event_class->dispatch('header_parsed', new HeaderParsed($header, 0)); } $row = 0; while (!$file->feof()) { if (($record = $this->read($file)) !== FALSE) { $user_record = array(); $record_pointer = 0; if ($header !== NULL) { foreach ($header as $v) { $user_record[$v] = $record[$record_pointer]; $record_pointer++; } } else { foreach ($record as $v) { $user_record["FIELD" . ($record_pointer + 1)] = $record[$record_pointer]; $record_pointer++; } } ++$row; // send the record to the event $this->event_class->dispatch('row_parsed', new RowParsed($user_record, $row)); } } $file->fclose(); return true; }
public function analyse(FileInterface $file, ParseOptions $options) { if (!$file->feof()) { $data1 = $file->fgets(4096); } if (!$file->feof()) { $data2 = $file->fgets(4096); } $file->fclose(); if (!$data2) { $data2 = $data1; } $data1 = ltrim($data1); $data2 = ltrim($data2); if (substr($data1, 0, 4) == "HDR|") { return "124|0|0|1"; } unset($field_separator); $pipe_count = substr_count($data1, "|"); $tab_count = substr_count($data1, "\t"); if ($pipe_count) { $field_separator = 124; } elseif ($tab_count) { $field_separator = 9; } else { $field_separator = 44; } unset($header_row); if (!isset($header_row)) { if (strpos($data1, "http")) { $header_row = 0; } if (strpos($data1, ".")) { $header_row = 0; } } if (!isset($header_row)) { if (strpos($data1, "product")) { $header_row = 1; } if (strpos($data1, "description")) { $header_row = 1; } if (strpos($data1, "price")) { $header_row = 1; } } if (!isset($header_row)) { $header_row = 1; } unset($text_delimiter); if (!isset($text_delimiter)) { if (strpos($data2, "\"") !== FALSE) { $text_delimiter = 34; } } if (!isset($text_delimiter)) { if ($data2[0] == "'") { $text_delimiter = 39; } } if (!isset($text_delimiter)) { $text_delimiter = 0; } $options->setFieldSeperator($field_separator); $options->setHasHeaderRow((bool) $header_row); $options->setDeliminator($text_delimiter); return $options; }