public function analyse(FileInterface $file, ParseOptions $options) { $this->analysis_last_path = ""; $this->analysis_path = ""; $this->analysis_depth = 0; $this->analysis_length = array(); $this->analysis_records = array(); $parser = @xml_parser_create(); if (!$parser) { throw new PHPXmlParserError(); } xml_set_element_handler($parser, array($this, 'analysisStartTag'), array($this, 'analysisEndTag')); xml_set_character_data_handler($parser, array($this, "analysisCDATA")); $first = true; while (!$file->feof()) { $xml = $file->fread(2048); if ($first) { $xml = ltrim($xml); $first = false; } xml_parse($parser, $xml, false); } if ($file->feof()) { xml_parse($parser, "", true); } $file->fclose($fp); $ignore[] = "ARG"; $ignore[] = "CATEGORIES"; $ignore[] = "CATEGORY"; $ignore[] = "CONTENT"; $ignore[] = "DC:SUBJECT"; $ignore[] = "FIELD"; $ignore[] = "FIELDS"; $ignore[] = "OPTIONVALUE"; $ignore[] = "PAYMETHOD"; $ignore[] = "PRODUCTITEMDETAIL"; $ignore[] = "PRODUCTREF"; $ignore[] = "SHIPMETHOD"; $ignore[] = "TDCATEGORIES"; $ignore[] = "TDCATEGORY"; $ignore[] = "MEDIA:THUMBNAIL"; $repeating_element_count = 0; foreach ($this->analysis_records as $xpath => $data) { if ($data["count"] > $repeating_element_count) { $ok_to_use = TRUE; foreach ($ignore as $v) { if (strpos($xpath, $v) !== FALSE) { $ok_to_use = FALSE; } } if ($ok_to_use) { //$repeating_element_xpath = $xpath; $repeating_element_count = $data["count"]; } } } return $repeating_element_xpath; }
public function read(FileInterface $file) { $done = false; $field = ""; $record = array(); $position = 0; $inquote = false; while (!$done) { $char = $file->fgetc(); //check for file loop // see reasons http://www.php.net/manual/en/function.feof.php#70715; if ($file->feof()) { return FALSE; } $usechar = false; $commit = false; if ($char === false) { $done = true; $commit = true; } else { switch ($char) { case "\r": if ($this->eol_ignorecr) { break; } break; case "\n": if ($position > 1 && !$inquote) { $commit = true; $done = true; } break; case $this->field_separator: if (!$inquote) { $commit = true; } else { $usechar = true; } break; case $this->text_delimiter: if ($this->text_delimiter != chr(0)) { $inquote = !$inquote; } break; default: $usechar = true; } } if ($usechar) { $position++; $field .= $char; } if ($commit && $position) { $record[] = $field; $field = ""; } if ($file->feof()) { $done = true; } } return $record; }
public function read(FileInterface $file) { return $file->fread($file->filesize()); }
/** * Starts parsing a file * * @param FileInterface $file * @return array() the xml data * @access public */ public function parse(FileInterface $file, ParseOptions $options) { if ($this->parser === null) { throw new ParserException('Parser not been registered'); } $this->depth = 0; $this->stack = new Stack(); $this->tree = null; $this->stack->push(array()); // first index top of the tree $this->done = false; # start iterating over the file $first = true; while (!$file->feof() && !$this->done) { $xml = $file->fread(2048); if ($first) { $xml = ltrim($xml); $first = false; } if (xml_parse($this->parser, $xml, false) <= 0) { throw new PHPXmlParserError($this->getParserError()); } } # parsing line by line need to tell parsre the last pieice. if ($file->feof()) { xml_parse($this->parser, "", true); } # close the file $file->fclose(); return true; }
public function analyse(FileInterface $file, ParseOptions $options) { if (!$file->feof()) { $data1 = $file->fgets(4096); } if (!$file->feof()) { $data2 = $file->fgets(4096); } $file->fclose(); if (!$data2) { $data2 = $data1; } $data1 = ltrim($data1); $data2 = ltrim($data2); if (substr($data1, 0, 4) == "HDR|") { return "124|0|0|1"; } unset($field_separator); $pipe_count = substr_count($data1, "|"); $tab_count = substr_count($data1, "\t"); if ($pipe_count) { $field_separator = 124; } elseif ($tab_count) { $field_separator = 9; } else { $field_separator = 44; } unset($header_row); if (!isset($header_row)) { if (strpos($data1, "http")) { $header_row = 0; } if (strpos($data1, ".")) { $header_row = 0; } } if (!isset($header_row)) { if (strpos($data1, "product")) { $header_row = 1; } if (strpos($data1, "description")) { $header_row = 1; } if (strpos($data1, "price")) { $header_row = 1; } } if (!isset($header_row)) { $header_row = 1; } unset($text_delimiter); if (!isset($text_delimiter)) { if (strpos($data2, "\"") !== FALSE) { $text_delimiter = 34; } } if (!isset($text_delimiter)) { if ($data2[0] == "'") { $text_delimiter = 39; } } if (!isset($text_delimiter)) { $text_delimiter = 0; } $options->setFieldSeperator($field_separator); $options->setHasHeaderRow((bool) $header_row); $options->setDeliminator($text_delimiter); return $options; }