public function start_process() { if (!$this->path_to_xml_file) { return false; } if (!$this->valid_xml) { return false; } $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => DOC_ROOT . 'temp/xml_to_archive/')); $this->taxon_ids = array(); $this->media_ids = array(); $this->vernacular_name_ids = array(); $this->reference_ids = array(); $this->agent_ids = array(); $reader = new \XMLReader(); $file = file_get_contents($this->path_to_xml_file); $file = iconv("UTF-8", "UTF-8//IGNORE", $file); $reader->XML($file); $i = 0; while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "taxon") { $taxon_xml = $reader->readOuterXML(); $t = simplexml_load_string($taxon_xml, null, LIBXML_NOCDATA); if ($t) { $this->add_taxon_to_archive($t); } $i++; if ($i % 100 == 0) { echo "Parsed taxon {$i} : " . time_elapsed() . "\n"; } // if($i >= 5000) break; } } $this->archive_builder->finalize(); }
/** * @throws RuntimeException * @return array of arrays. * array( 'dumpKey' => array( 'match1', 'match2' ) ) */ public function scan() { $openSuccess = $this->reader->open($this->dumpLocation); if (!$openSuccess) { throw new RuntimeException('Failed to open XML: ' . $this->dumpLocation); } $result = array(); foreach ($this->query as $queryKey => $query) { $result[$queryKey] = array(); // Make sure keys are returned even if empty } while ($this->reader->read() && $this->reader->name !== 'page') { } while ($this->reader->name === 'page') { $element = new SimpleXMLElement($this->reader->readOuterXML()); $page = $this->getPageFromElement($element); foreach ($this->query as $queryKey => $query) { $match = $this->matchPage($page, $query); if ($match) { //TODO allow the user to choose what to return $result[$queryKey][] = $page->getTitle()->getTitle(); } } $this->reader->next('page'); } $this->reader->close(); return $result; }
function main() { var_dump(stream_wrapper_register('strstream', 'StringWrapper')); $r = new XMLReader(); $r->open("strstream://"); var_dump($r->read()); var_dump($r->readOuterXML()); var_dump($r->readInnerXML()); var_dump($r->read()); var_dump($r->readOuterXML()); var_dump($r->readInnerXML()); }
function eol_xml_stats() { $path = "http://localhost/eol_php_code/applications/content_server/resources/218.xml"; $reader = new \XMLReader(); $reader->open($path); $i = 0; $dist_count = 0; $taxa_count = 0; while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "taxon") { $string = $reader->readOuterXML(); $string = str_ireplace("dc:", "dc_", $string); $string = str_ireplace("dwc:", "dwc_", $string); if ($xml = simplexml_load_string($string)) { $taxa_with_dist = false; $taxon_id = (string) $xml->dc_identifier; print "[{$taxon_id}]"; foreach ($xml->dataObject as $o) { if (@$o->subject == "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Distribution") { $dist_count++; $taxa_with_dist = true; } } if ($taxa_with_dist) { $taxa_count++; } } } } print "\n\n"; print "\n distribution: [{$dist_count}]"; print "\n taxa with dist: [" . $taxa_count . "]"; print "\n\n"; }
/** * filterFeed Reads the XML from the url and filters it based on the provided filter. The results are organized into an array keyed by the unqiue values of the filter. */ function filterFeed() { $reader = new XMLReader(); if (!$reader->open($this->url)) { throw new Exception("Cannot open feed from the provided URL."); } while ($reader->read()) { if ($reader->name == "product") { //get the entire product node. $xml_node = $reader->readOuterXML(); } if ($reader->name == $this->filter) { //read the values for the $this->filter node. $reader->read(); //get string/value from the node we are filtering and explode it by a delimiter. $nodeValues = []; $nodeValues = explode($this->delimiter, $reader->value); if (!empty($nodeValues[$this->index])) { $this->filteredXML[$nodeValues[$this->index]][] = $xml_node; } else { throw new Exception("The index specified does not exist."); } //Go to the next product. $reader->next("product"); } } //if the array has no items then the filtered node does not exist. if (empty($this->filteredXML)) { throw new Exception("{$this->filter} does not exist in the XML."); } }
public function get_all_taxa() { if (!file_exists(DOC_ROOT . 'tmp/natureserve')) { mkdir(DOC_ROOT . 'tmp/natureserve'); } if (!file_exists(DOC_ROOT . 'tmp/natureserve/images')) { mkdir(DOC_ROOT . 'tmp/natureserve/images'); } $this->archive_builder = new \eol_schema\ContentArchiveBuilder(array('directory_path' => DOC_ROOT . "/temp/dwc_archive_test/")); $species_list_path = DOC_ROOT . "update_resources/connectors/files/natureserve_species_list.xml"; shell_exec("rm -f {$species_list_path}"); shell_exec("curl " . self::SPECIES_LIST_URL . " -o {$species_list_path}"); $reader = new \XMLReader(); $reader->open($species_list_path); $records = array(); while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "DATA_RECORD") { $record = simplexml_load_string($reader->readOuterXML(), null, LIBXML_NOCDATA); $records[] = (string) $record->EGT_UID; } } echo "Total Records: " . count($records) . "\n"; $chunk_size = 1; // shuffle($records); // array_unshift($records, 'ELEMENT_GLOBAL.2.104470'); // Bald eagle array_unshift($records, 'ELEMENT_GLOBAL.2.102211'); // Polar bear // array_unshift($records, 'ELEMENT_GLOBAL.2.106470'); // bobcat - Lynx rufus // array_unshift($records, 'ELEMENT_GLOBAL.2.104731'); // striped bass - Morone saxatilis array_unshift($records, 'ELEMENT_GLOBAL.2.105926'); // American Bullfrog array_unshift($records, 'ELEMENT_GLOBAL.2.104777'); // White tailed deer array_unshift($records, 'ELEMENT_GLOBAL.2.100925'); // golden eagle $records = array_unique($records); $chunks = array_chunk($records, $chunk_size); $i = 0; $start_time = time_elapsed(); foreach ($chunks as $chunk) { $this->lookup_multiple_ids($chunk); // if($i % 500 == 0) print_r($this->archive_builder->file_columns); $i += $chunk_size; if ($i % 100 == 0) { $estimated_total_time = (time_elapsed() - $start_time) / $i * count($records); echo "Time spent ({$i} records) " . time_elapsed() . "\n"; echo "Estimated total seconds : {$estimated_total_time}\n"; echo "Estimated total hours : " . $estimated_total_time / (60 * 60) . "\n"; echo "Memory : " . memory_get_usage() . "\n"; } // if($i >= 100) break; } $this->archive_builder->finalize(); }
private function insert_data_mysql($db) { $tbl = $db->query("select sigla, nome from estados"); $db->query("SET CHARACTER SET utf8"); $db->query("SET NAMES utf8"); $tbl->setFetchMode(PDO::FETCH_OBJ); while ($row = $tbl->fetch()) { // MySQL $arr_estados[mb_strtoupper(iconv("ISO-8859-1", "UTF-8", $row->nome), "UTF-8")] = $row->sigla; // PostgreSQL // $arr_estados[mb_strtoupper($row->nome, "UTF-8")] = $row->sigla; } var_dump($arr_estados); $xml = new XMLReader(); $xml->open('database/seeds/BR Localidades 2010 v1.kml'); // Salta as informações de cabeçalho, para o primeiro "registro" de dados ("Placemark") while ($xml->read() && $xml->name !== "Placemark") { } $contador = 0; $insert_ok = 0; $insert_erro = 0; $sql = $db->prepare("insert into cidades(codigo, nome, sigla_estado, longitude, latitude, altitude) values(?, ?, ?, ?, ?, ?)"); // Loop para varrer todas as ocorrências de "Placemark" while ($xml->name === "Placemark") { $node = new SimpleXMLElement($xml->readOuterXML()); $cidade = new SimpleXMLElement($node->asXML()); $nome = strval($cidade->name); // $nome = str_replace("'", "''", $nome); $sdata = $cidade->ExtendedData->SchemaData->SimpleData; $codigo = intval($sdata[9]); $sigla_estado = $arr_estados[strval($sdata[13])]; $longitude = floatval($sdata[18]); $latitude = floatval($sdata[19]); $altitude = floatval($sdata[20]); $res = $sql->execute(array($codigo, $nome, $sigla_estado, $longitude, $latitude, $altitude)); if ($res) { $insert_ok++; } else { $insert_erro++; echo $nome . " " . $codigo . " " . $sigla_estado . " " . $longitude . " " . $latitude . " " . $altitude . "<br>"; } // Salta para próximo registro $xml->next("Placemark"); } echo "Incluiu {$insert_ok} registros com sucesso<br>"; echo "Falha na inclusão de {$insert_erro} registros<br>"; }
function get_all_taxa($data_dump_url = false) { /* working but commented during development... if(!$data_dump_url) $data_dump_url = $this->data_dump_url; $path = self::download_and_extract_remote_file($data_dump_url); echo "\n xml file: [$path] \n"; */ $path = "http://localhost/~eolit/xml_parser/bioproject.xml"; // debug $reader = new \XMLReader(); $reader->open($path); $i = 0; while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "Package") { $string = $reader->readOuterXML(); if ($xml = simplexml_load_string($string)) { $i++; self::parse_record_element($xml); // echo("\n $i. "); // print_r($xml); if ($i > 20) { break; } } // // debug - to process by batch // $i++; // if($i > 10) // { // // self::parse_record_element($xml); // print_r($xml); // if($i > 20) break; // } } } $supergroups = array_keys($this->stats["supergroup"]); print "\n"; print "\n unique taxon_ids: " . count($this->taxon_ids); print "\n"; print_r($supergroups); foreach ($supergroups as $supergroup) { print "\n[{$supergroup}] " . count(array_keys($this->stats["supergroup"][$supergroup])); //print_r(array_keys($this->stats["supergroup"][$supergroup])); } print "\n\n"; $this->create_archive(); unlink($path); }
<?php $xml = <<<EOF <a> <b>0</b> <c>1<d>2<e>3</e>4</d>5</c> </a> EOF; $reader = new XMLReader(); $reader->XML($xml); while ($reader->read()) { var_dump($reader->readOuterXML()); }
function odt2html($odt_file, $xml_string = NULL) { global $_ophir_odt_import_conf; $xml = new XMLReader(); if ($xml_string === NULL) { if (@$xml->open('zip://' . $odt_file . '#content.xml') === FALSE) { ophir_error("Unable to read file contents."); return false; } } else { if (@$xml->xml($xml_string) === FALSE) { ophir_error("Invalid file contents."); return false; } } //Now, convert the xml from a string to an $html = ""; $elements_tree = array(); static $styles = array("Quotations" => array("tags" => array("blockquote"))); $footnotes = ""; $translation_table = array(); $translation_table['draw:frame'] = 'div class="odt-frame"'; if ($_ophir_odt_import_conf["features"]["list"] === 0) { $translation_table["text:list"] = FALSE; } elseif ($_ophir_odt_import_conf["features"]["list"] === 2) { $translation_table["text:list"] = "ul"; $translation_table["text:list-item"] = "li"; } if ($_ophir_odt_import_conf["features"]["table"] === 0) { $translation_table["table:table"] = FALSE; } elseif ($_ophir_odt_import_conf["features"]["table"] === 2) { $translation_table["table:table"] = "table cellspacing=0 cellpadding=0 border=1"; $translation_table["table:table-row"] = "tr"; $translation_table["table:table-cell"] = "td"; } if ($_ophir_odt_import_conf["features"]["table of contents"] === 0) { $translation_table['text:table-of-content'] = FALSE; } elseif ($_ophir_odt_import_conf["features"]["table of contents"] === 2) { $translation_table['text:table-of-content'] = 'div class="odt-table-of-contents"'; } $translation_table['text:line-break'] = 'br'; while ($xml->read()) { $opened_tags = array(); //This array will contain the HTML tags opened in every iteration if ($xml->nodeType === XMLReader::END_ELEMENT) { //Handle a closing tag if (empty($elements_tree)) { continue; } do { $element = array_pop($elements_tree); if ($element && $element["tags"]) { //Close opened tags $element["tags"] = array_reverse($element["tags"]); foreach ($element["tags"] as $HTML_tag) { //$html.= "<font style='color:red' title='Closing $HTML_tag, from $element[name]. Current element is " .($xml->name). "'>©</font>"; $HTML_tag = current(explode(" ", $HTML_tag)); $html .= "</" . $HTML_tag . ">"; } } } while ($xml->name !== $element["name"] && $element); //Close every opened tags. This should also handle malformed XML files continue; } elseif (in_array($xml->nodeType, array(XMLReader::ELEMENT, XMLReader::TEXT, XMLReader::SIGNIFICANT_WHITESPACE))) { //Handle tags switch ($xml->name) { case "#text": //Text $html .= htmlspecialchars($xml->value); break; case "text:h": //Title if ($_ophir_odt_import_conf["features"]["header"] === 0) { $xml->next(); break; } elseif ($_ophir_odt_import_conf["features"]["header"] === 1) { break; } $n = $xml->getAttribute("text:outline-level"); if ($n > 6) { $n = 6; } $opened_tags[] = "h{$n}"; $html .= "\n\n<h{$n}>"; break; case "text:p": //Paragraph //Just convert odf <text:p> to html <p> $tags = @$styles[$xml->getAttribute("text:style-name")]["tags"]; if (!($tags && !in_array("blockquote", $tags))) { // Do not print a <p> immediatly after or before a <blockquote> $opened_tags[] = "p"; $html .= "\n<p>"; } break; case "text:a": if ($_ophir_odt_import_conf["features"]["link"] === 0) { $xml->next(); break; } elseif ($_ophir_odt_import_conf["features"]["link"] === 1) { break; } $href = $xml->getAttribute("xlink:href"); $opened_tags[] = 'a'; $html .= '<a href="' . $href . '">'; break; case "draw:image": if ($_ophir_odt_import_conf["features"]["image"] === 0) { $xml->next(); break; } elseif ($_ophir_odt_import_conf["features"]["image"] === 1) { break; } $image_file = 'zip://' . $odt_file . '#' . $xml->getAttribute("xlink:href"); if (isset($_ophir_odt_import_conf["images_folder"]) && is_dir($_ophir_odt_import_conf["images_folder"])) { if (ophir_is_image($image_file)) { $image_to_save = $_ophir_odt_import_conf["images_folder"] . '/' . basename($image_file); if (!($src = ophir_copy_file($image_file, $image_to_save))) { ophir_error("Unable to move image file"); break; } } else { ophir_error("Found invalid image file."); break; } } else { //ophir_error('Unable to save the image. Creating a data URL. Image saved directly in the body.F'); $src = 'data:image;base64,' . base64_encode(file_get_contents($image_file)); } $html .= "\n<img src=\"{$src}\" />"; break; case "style:style": $name = $xml->getAttribute("style:name"); $parent = $xml->getAttribute("style:parent-style-name"); if (array_key_exists($parent, $styles)) { $styles[$name] = $styles[$parent]; } //Not optimal if ($xml->isEmptyElement) { break; } //We can't handle that at the moment while ($xml->read() && ($xml->name != "style:style" || $xml->nodeType != XMLReader::END_ELEMENT)) { if ($xml->name == "style:text-properties") { if ($xml->getAttribute("fo:font-style") == "italic") { $styles[$name]["tags"][] = "em"; } //Creates the style and add <em> to its tags if ($xml->getAttribute("fo:font-weight") == "bold") { $styles[$name]["tags"][] = "strong"; } //Creates the style and add <strong> to its tags if ($xml->getAttribute("style:text-underline-style") == "solid") { $styles[$name]["tags"][] = "u"; } //Creates the style and add <u> to its tags } } break; case "text:note": if ($_ophir_odt_import_conf["features"]["note"] === 0) { $xml->next(); break; } elseif ($_ophir_odt_import_conf["features"]["note"] === 1) { break; } $note_id = $xml->getAttribute("text:id"); $note_name = "Note"; while ($xml->read() && ($xml->name != "text:note" || $xml->nodeType != XMLReader::END_ELEMENT)) { if ($xml->name == "text:note-citation" && $xml->nodeType == XMLReader::ELEMENT) { $note_name = $xml->readString(); } elseif ($xml->name == "text:note-body" && $xml->nodeType == XMLReader::ELEMENT) { $note_content = odt2html($odt_file, $xml->readOuterXML()); } } $html .= "<sup><a href=\"#odt-footnote-{$note_id}\" class=\"odt-footnote-anchor\" name=\"anchor-odt-{$note_id}\">{$note_name}</a></sup>"; $footnotes .= "\n" . '<div class="odt-footnote" id="odt-footnote-' . $note_id . '" >'; $footnotes .= '<a class="footnote-name" href="#anchor-odt-' . $note_id . '">' . $note_name . ' .</a> '; $footnotes .= $note_content; $footnotes .= '</div>' . "\n"; break; case "office:annotation": if ($_ophir_odt_import_conf["features"]["annotation"] === 0) { $xml->next(); break; } elseif ($_ophir_odt_import_conf["features"]["annotation"] === 1) { break; } $annotation_id = isset($annotation_id) ? $annotation_id + 1 : 1; $annotation_content = ""; $annotation_creator = "Anonymous"; $annotation_date = ""; do { $xml->read(); if ($xml->name == "dc:creator" && $xml->nodeType == XMLReader::ELEMENT) { $annotation_creator = $xml->readString(); } elseif ($xml->name == "dc:date" && $xml->nodeType == XMLReader::ELEMENT) { $annotation_date = date("jS \\of F Y, H\\h i\\m", strtotime($xml->readString())); } elseif ($xml->nodeType == XMLReader::ELEMENT) { $annotation_content .= $xml->readString(); $xml->next(); } } while (!($xml->name === "office:annotation" && $xml->nodeType === XMLReader::END_ELEMENT)); //End of the note $html .= '<sup><a href="#odt-annotation-' . $annotation_id . '" name="anchor-odt-annotation-' . $annotation_id . '" title="Annotation (' . $annotation_creator . ')">(' . $annotation_id . ')</a></sup>'; $footnotes .= "\n" . '<div class="odt-annotation" id="odt-annotation-' . $annotation_id . '" >'; $footnotes .= '<a class="annotation-name" href="#anchor-odt-annotation-' . $annotation_id . '"> (' . $annotation_id . ') </a>'; $footnotes .= "\n" . '<b>' . $annotation_creator . ' (<i>' . $annotation_date . '</i>)</b> :'; $footnotes .= "\n" . '<div class="odt-annotation-content">' . $annotation_content . '</div>'; $footnotes .= '</div>' . "\n"; break; default: if (array_key_exists($xml->name, $translation_table)) { if ($translation_table[$xml->name] === FALSE) { $xml->next(); break; } $tag = explode(" ", $translation_table[$xml->name], 1); //$tag[0] is the tag name, other indexes are attributes $opened_tags[] = $tag[0]; $html .= "\n<" . $translation_table[$xml->name] . ">"; } } } if ($xml->nodeType === XMLReader::ELEMENT && !$xml->isEmptyElement) { //Opening tag $current_element_style = $xml->getAttribute("text:style-name"); if ($current_element_style && isset($styles[$current_element_style])) { //Styling tags management foreach ($styles[$current_element_style]["tags"] as $HTML_tag) { $html .= "<" . $HTML_tag . ">"; $opened_tags[] = $HTML_tag; } } $elements_tree[] = array("name" => $xml->name, "tags" => $opened_tags); } } return $html . $footnotes; }
protected function readDataBlock() { $v4b43b0aee35624cd95b910189b3dc231 = new XMLReader(); $v4b43b0aee35624cd95b910189b3dc231->open($this->file_path); $v2245023265ae4cf87d02c8b6ba991139 = mainConfiguration::getInstance(); $vf7c163939469a0b7becb4e4e6a94efac = $v2245023265ae4cf87d02c8b6ba991139->includeParam('system.kernel') . 'subsystems/import/schemes/' . $this->type . '.xsd'; if (is_file($vf7c163939469a0b7becb4e4e6a94efac)) { $v4b43b0aee35624cd95b910189b3dc231->setSchema($vf7c163939469a0b7becb4e4e6a94efac); } $v9a09b4dfda82e3e665e31092d1c3ec8d = new DomDocument("1.0", "utf-8"); $v07214c6750d983a32e0a33da225c4efd = array("Группа", "Товар", "Предложение"); $vc00d122cde678a9551cae41dc45a40b7 = array('ОписаниеГрупп'); $v95723b5e620e47cf613462b9f293282a = 0; $v4757fe07fd492a8be0ea6a760d683d6e = 0; $v5f0b6ebc4bea10285ba2b8a6ce78b863 = $v9a09b4dfda82e3e665e31092d1c3ec8d; $v7aa28ed115707345d0274032757e8991 = $v4b43b0aee35624cd95b910189b3dc231->read(); while ($v7aa28ed115707345d0274032757e8991) { switch ($v4b43b0aee35624cd95b910189b3dc231->nodeType) { case XMLReader::ELEMENT: if (in_array($v4b43b0aee35624cd95b910189b3dc231->name, $vc00d122cde678a9551cae41dc45a40b7)) { $v7aa28ed115707345d0274032757e8991 = $v4b43b0aee35624cd95b910189b3dc231->next(); continue; } if (in_array($v4b43b0aee35624cd95b910189b3dc231->name, $v07214c6750d983a32e0a33da225c4efd)) { if ($v4757fe07fd492a8be0ea6a760d683d6e++ < $this->offset) { $v7aa28ed115707345d0274032757e8991 = $v4b43b0aee35624cd95b910189b3dc231->next(); continue 2; } if ($v95723b5e620e47cf613462b9f293282a + 1 > $this->block_size) { if ($v4b43b0aee35624cd95b910189b3dc231->name == "Предложение") { $vd60db28d94d538bbb249dcc7f2273ab1 = DOMDocument::loadXML($v4b43b0aee35624cd95b910189b3dc231->readOuterXML()); if ($this->__getOffersCompare($v9a09b4dfda82e3e665e31092d1c3ec8d, $vd60db28d94d538bbb249dcc7f2273ab1, $v95723b5e620e47cf613462b9f293282a)) { break 2; } } else { break 2; } } $v95723b5e620e47cf613462b9f293282a++; } $v65c10911d8b8591219a21ebacf46da01 = $v9a09b4dfda82e3e665e31092d1c3ec8d->createElement($v4b43b0aee35624cd95b910189b3dc231->name, $v4b43b0aee35624cd95b910189b3dc231->value); $v5f0b6ebc4bea10285ba2b8a6ce78b863->appendChild($v65c10911d8b8591219a21ebacf46da01); if (!$v4b43b0aee35624cd95b910189b3dc231->isEmptyElement) { $v5f0b6ebc4bea10285ba2b8a6ce78b863 = $v65c10911d8b8591219a21ebacf46da01; } if ($v4b43b0aee35624cd95b910189b3dc231->attributeCount) { while ($v4b43b0aee35624cd95b910189b3dc231->moveToNextAttribute()) { $v815be97df65d6c4b510cd07189c5347a = $v9a09b4dfda82e3e665e31092d1c3ec8d->createAttribute($v4b43b0aee35624cd95b910189b3dc231->name); $v815be97df65d6c4b510cd07189c5347a->appendChild($v9a09b4dfda82e3e665e31092d1c3ec8d->createTextNode($v4b43b0aee35624cd95b910189b3dc231->value)); $v65c10911d8b8591219a21ebacf46da01->appendChild($v815be97df65d6c4b510cd07189c5347a); } } $v3f02ab347aeca12e013036ce82046c38 = $this->__getNodePath($v5f0b6ebc4bea10285ba2b8a6ce78b863); if ($v3f02ab347aeca12e013036ce82046c38 == "КоммерческаяИнформация/Классификатор/Группы") { $v556216bbe3169f8132fe2b1683164988 = $v4b43b0aee35624cd95b910189b3dc231->readOuterXML(); $v1471e4e05a4db95d353cc867fe317314 = new DOMDocument('1.0', 'utf-8'); $v1471e4e05a4db95d353cc867fe317314->loadXML($v556216bbe3169f8132fe2b1683164988); $v076933917d4df1df9aeaf50e0d25297b = $v1471e4e05a4db95d353cc867fe317314->getElementsByTagName('Группа'); foreach ($v076933917d4df1df9aeaf50e0d25297b as $vdb0f6f37ebeb6ea09489124345af2a45) { if ($v4757fe07fd492a8be0ea6a760d683d6e++ < $this->offset) { continue; } if ($v95723b5e620e47cf613462b9f293282a + 1 > $this->block_size) { break; } $this->__collectGroup($v9a09b4dfda82e3e665e31092d1c3ec8d, $v65c10911d8b8591219a21ebacf46da01, $vdb0f6f37ebeb6ea09489124345af2a45); $v95723b5e620e47cf613462b9f293282a++; } $v5f0b6ebc4bea10285ba2b8a6ce78b863 = $v5f0b6ebc4bea10285ba2b8a6ce78b863->parentNode; $v7aa28ed115707345d0274032757e8991 = $v4b43b0aee35624cd95b910189b3dc231->next(); continue 2; } break; case XMLReader::END_ELEMENT: $v5f0b6ebc4bea10285ba2b8a6ce78b863 = $v5f0b6ebc4bea10285ba2b8a6ce78b863->parentNode; break; case XMLReader::ATTRIBUTE: $v815be97df65d6c4b510cd07189c5347a = $v9a09b4dfda82e3e665e31092d1c3ec8d->createAttribute($v4b43b0aee35624cd95b910189b3dc231->name); $v815be97df65d6c4b510cd07189c5347a->appendChild($v9a09b4dfda82e3e665e31092d1c3ec8d->createTextNode($v4b43b0aee35624cd95b910189b3dc231->value)); $v5f0b6ebc4bea10285ba2b8a6ce78b863->appendChild($v815be97df65d6c4b510cd07189c5347a); break; case XMLReader::TEXT: $vc7824f3d4d5f7b2f22d034758c1e9454 = $v9a09b4dfda82e3e665e31092d1c3ec8d->createTextNode($v4b43b0aee35624cd95b910189b3dc231->value); $v5f0b6ebc4bea10285ba2b8a6ce78b863->appendChild($vc7824f3d4d5f7b2f22d034758c1e9454); break; case XMLReader::CDATA: $vd9ef6bda8fb69f1c7e277bd1c2cd21d1 = $v9a09b4dfda82e3e665e31092d1c3ec8d->createCDATASection($v4b43b0aee35624cd95b910189b3dc231->value); $v5f0b6ebc4bea10285ba2b8a6ce78b863->appendChild($vd9ef6bda8fb69f1c7e277bd1c2cd21d1); break; case XMLReader::NONE: default: } $v7aa28ed115707345d0274032757e8991 = $v4b43b0aee35624cd95b910189b3dc231->read(); } $this->offset += $v95723b5e620e47cf613462b9f293282a; if (!$v7aa28ed115707345d0274032757e8991) { $this->complete = true; } return $v9a09b4dfda82e3e665e31092d1c3ec8d; }
function generate_higher_level_taxa_list($data_dump_url = false) { if (!$data_dump_url) { $data_dump_url = $this->data_dump_url; } $path = self::download_and_extract_remote_file($data_dump_url, true); // true means it will use cache echo "\n xml file: [{$path}] \n"; $reader = new \XMLReader(); $reader->open($path); $i = 0; $sl_taxa = array(); // species-level taxa $hl_taxa = array(); // higher-level taxa while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "record") { $string = $reader->readOuterXML(); $xml = simplexml_load_string($string); //for species-level taxa if ($sciname = @$xml->taxonomy->species->taxon->name) { $sl_taxa["{$sciname}"]["rank"] = "species"; $sl_taxa["{$sciname}"]["taxon_id"] = $xml->taxonomy->species->taxon->taxon_id; } //for higher-level taxa if ($sciname = @$xml->taxonomy->genus->taxon->name) { $hl_taxa["{$sciname}"]["rank"] = "genus"; $hl_taxa["{$sciname}"]["taxon_id"] = $xml->taxonomy->genus->taxon->taxon_id; } if ($sciname = @$xml->taxonomy->subfamily->taxon->name) { $hl_taxa["{$sciname}"]["rank"] = "subfamily"; $hl_taxa["{$sciname}"]["taxon_id"] = $xml->taxonomy->subfamily->taxon->taxon_id; } if ($sciname = @$xml->taxonomy->family->taxon->name) { $hl_taxa["{$sciname}"]["rank"] = "family"; $hl_taxa["{$sciname}"]["taxon_id"] = $xml->taxonomy->family->taxon->taxon_id; } if ($sciname = @$xml->taxonomy->order->taxon->name) { $hl_taxa["{$sciname}"]["rank"] = "order"; $hl_taxa["{$sciname}"]["taxon_id"] = $xml->taxonomy->order->taxon->taxon_id; } if ($sciname = @$xml->taxonomy->class->taxon->name) { $hl_taxa["{$sciname}"]["rank"] = "class"; $hl_taxa["{$sciname}"]["taxon_id"] = $xml->taxonomy->class->taxon->taxon_id; } if ($sciname = @$xml->taxonomy->phylum->taxon->name) { $hl_taxa["{$sciname}"]["rank"] = "phylum"; $hl_taxa["{$sciname}"]["taxon_id"] = $xml->taxonomy->phylum->taxon->taxon_id; } if ($sciname = @$xml->taxonomy->kingdom->taxon->name) { $hl_taxa["{$sciname}"]["rank"] = "kingdom"; $hl_taxa["{$sciname}"]["taxon_id"] = $xml->taxonomy->kingdom->taxon->taxon_id; } } } unlink($path); ksort($hl_taxa); ksort($sl_taxa); echo "\n\n higher-level taxa count: " . count($hl_taxa); $i = 0; if (!($fn = fopen($this->MASTER_LIST, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $this->MASTER_LIST); return; } foreach ($hl_taxa as $key => $value) { $i++; echo "\n {$i}. {$key} -- {$value['rank']} {$value['taxon_id']}"; fwrite($fn, $value["taxon_id"] . "\t" . $key . "\t" . $value["rank"] . "\n"); } echo "\n\n species-level taxa count: " . count($sl_taxa); echo "\n higher-level taxa count: " . count($hl_taxa); fclose($fn); self::reconcile_with_old_master_list($hl_taxa); // debug - uncomment in normal operation, comment when developing for quick processing }
public function export($v92ec19ffde05e15769b1bb3ee05ad745) { set_time_limit(0); if (!count($v92ec19ffde05e15769b1bb3ee05ad745)) { $v8be74552df93e31bbdd6b36ed74bdb6a = new selector('pages'); $v8be74552df93e31bbdd6b36ed74bdb6a->where('hierarchy')->page(0)->childs(0); $v92ec19ffde05e15769b1bb3ee05ad745 = $v8be74552df93e31bbdd6b36ed74bdb6a->result; } if (getRequest('as_file') === '0') { $ved780287e302ec3b9fd3c5e78771919f = new xmlExporter($this->getSourceName()); $ved780287e302ec3b9fd3c5e78771919f->addBranches($v92ec19ffde05e15769b1bb3ee05ad745); $result = $ved780287e302ec3b9fd3c5e78771919f->execute(); return $result->saveXML(); } $v857a5246dff0c3c79e476b004684f6d3 = "./sys-temp/export/"; $vb80bb7740288fda1f201890375a60c8f = getRequest('param0'); $v97fd815a3803a0588876bdd862014fed = $v857a5246dff0c3c79e476b004684f6d3 . $vb80bb7740288fda1f201890375a60c8f . "." . parent::getFileExt(); $v6990a54322d9232390a784c5c9247dd6 = $v857a5246dff0c3c79e476b004684f6d3 . $vb80bb7740288fda1f201890375a60c8f; if (!is_dir($v6990a54322d9232390a784c5c9247dd6)) { mkdir($v6990a54322d9232390a784c5c9247dd6, 0777, true); } if (file_exists($v97fd815a3803a0588876bdd862014fed) && !file_exists(CURRENT_WORKING_DIR . '/sys-temp/runtime-cache/' . md5($this->getSourceName()))) { unlink($v97fd815a3803a0588876bdd862014fed); } if ($v92ec19ffde05e15769b1bb3ee05ad745) { $v33030abc929f083da5f6c3f755b46034 = array('./tpls/', './xsltTpls/', './css/', './js/', './usels/', './umaps/', './templates/'); foreach ($v33030abc929f083da5f6c3f755b46034 as $v100664c6e2c0333b19a729f2f3ddb7dd) { if (is_dir($v100664c6e2c0333b19a729f2f3ddb7dd)) { $v736007832d2167baaae763fd3a3f3cf1 = new umiDirectory($v100664c6e2c0333b19a729f2f3ddb7dd); $v45b963397aa40d4a0063e0d85e4fe7a1 = $v736007832d2167baaae763fd3a3f3cf1->getAllFiles(1); foreach ($v45b963397aa40d4a0063e0d85e4fe7a1 as $vd6fe1d0be6347b8ef2427fa629c04485 => $vb068931cc450442b63f5b3d276ea4297) { $v8c7dd922ad47494fc02c388e12c00eac = new umiFile($vd6fe1d0be6347b8ef2427fa629c04485); if (!is_dir($v6990a54322d9232390a784c5c9247dd6 . ltrim($v8c7dd922ad47494fc02c388e12c00eac->getDirName(), '.'))) { mkdir($v6990a54322d9232390a784c5c9247dd6 . ltrim($v8c7dd922ad47494fc02c388e12c00eac->getDirName(), '.'), 0777, true); } copy($v8c7dd922ad47494fc02c388e12c00eac->getFilePath(), $v6990a54322d9232390a784c5c9247dd6 . $v8c7dd922ad47494fc02c388e12c00eac->getFilePath(true)); } } } } $v71b70dd1e455c477220693d84ccd5682 = $v97fd815a3803a0588876bdd862014fed . '.tmp'; $v480d1b61a0432d1319f7504a3d7318dd = (int) mainConfiguration::getInstance()->get("modules", "exchange.export.limit"); if ($v480d1b61a0432d1319f7504a3d7318dd <= 0) { $v480d1b61a0432d1319f7504a3d7318dd = 25; } $ved780287e302ec3b9fd3c5e78771919f = new xmlExporter($this->getSourceName(), $v480d1b61a0432d1319f7504a3d7318dd); $ved780287e302ec3b9fd3c5e78771919f->addBranches($v92ec19ffde05e15769b1bb3ee05ad745); $vdd988cfd769c9f7fbd795a0f5da8e751 = $ved780287e302ec3b9fd3c5e78771919f->execute(); if (file_exists($v97fd815a3803a0588876bdd862014fed)) { $v1de9b0a30075ae8c303eb420c103c320 = new XMLReader(); $va82feee3cc1af8bcabda979e8775ef0f = new XMLWriter(); $v1de9b0a30075ae8c303eb420c103c320->open($v97fd815a3803a0588876bdd862014fed); $va82feee3cc1af8bcabda979e8775ef0f->openURI($v71b70dd1e455c477220693d84ccd5682); $va82feee3cc1af8bcabda979e8775ef0f->startDocument('1.0', 'utf-8'); $va82feee3cc1af8bcabda979e8775ef0f->startElement('umidump'); $va82feee3cc1af8bcabda979e8775ef0f->writeAttribute('version', '2.0'); $va82feee3cc1af8bcabda979e8775ef0f->writeAttribute('xmlns:xlink', 'http://www.w3.org/TR/xlink'); $v7aa28ed115707345d0274032757e8991 = $v1de9b0a30075ae8c303eb420c103c320->read(); while ($v7aa28ed115707345d0274032757e8991) { if ($v1de9b0a30075ae8c303eb420c103c320->nodeType == XMLReader::ELEMENT) { $ve24455211a964330063a18670d943835 = $v1de9b0a30075ae8c303eb420c103c320->name; if ($ve24455211a964330063a18670d943835 != 'umidump') { $va82feee3cc1af8bcabda979e8775ef0f->startElement($ve24455211a964330063a18670d943835); if ($ve24455211a964330063a18670d943835 != 'meta') { if (!$v1de9b0a30075ae8c303eb420c103c320->isEmptyElement) { $v7852ddca47412c0d947ebf27eb83ed3a = $v1de9b0a30075ae8c303eb420c103c320->read(); while ($v7852ddca47412c0d947ebf27eb83ed3a) { if ($v1de9b0a30075ae8c303eb420c103c320->nodeType == XMLReader::ELEMENT) { $vcf7f5c76225a101e6320a96c02f92fc1 = $v1de9b0a30075ae8c303eb420c103c320->name; $va82feee3cc1af8bcabda979e8775ef0f->writeRaw($v1de9b0a30075ae8c303eb420c103c320->readOuterXML()); $v7852ddca47412c0d947ebf27eb83ed3a = $v1de9b0a30075ae8c303eb420c103c320->next(); } elseif ($v1de9b0a30075ae8c303eb420c103c320->nodeType == XMLReader::END_ELEMENT && $v1de9b0a30075ae8c303eb420c103c320->name == $ve24455211a964330063a18670d943835) { $v7852ddca47412c0d947ebf27eb83ed3a = false; } else { $v7852ddca47412c0d947ebf27eb83ed3a = $v1de9b0a30075ae8c303eb420c103c320->next(); } } } if ($vdd988cfd769c9f7fbd795a0f5da8e751->getElementsByTagName($ve24455211a964330063a18670d943835)->item(0)->hasChildNodes()) { $v268184c12df027f536154d099d497b31 = $vdd988cfd769c9f7fbd795a0f5da8e751->getElementsByTagName($ve24455211a964330063a18670d943835)->item(0)->childNodes; foreach ($v268184c12df027f536154d099d497b31 as $v1b7d5726533ab525a8760351e9b5e415) { $va5e171f642af8e3bd24c50cdc4d66fe3 = new DOMDocument(); $va5e171f642af8e3bd24c50cdc4d66fe3->formatOutput = true; $v36c4536996ca5615dcf9911f068786dc = $va5e171f642af8e3bd24c50cdc4d66fe3->importNode($v1b7d5726533ab525a8760351e9b5e415, true); $va5e171f642af8e3bd24c50cdc4d66fe3->appendChild($v36c4536996ca5615dcf9911f068786dc); $va82feee3cc1af8bcabda979e8775ef0f->writeRaw($va5e171f642af8e3bd24c50cdc4d66fe3->saveXML($v36c4536996ca5615dcf9911f068786dc, LIBXML_NOXMLDECL)); } } } elseif ($ve24455211a964330063a18670d943835 == 'meta') { $va82feee3cc1af8bcabda979e8775ef0f->writeRaw($v1de9b0a30075ae8c303eb420c103c320->readInnerXML()); $v92ec19ffde05e15769b1bb3ee05ad745 = $vdd988cfd769c9f7fbd795a0f5da8e751->getElementsByTagName('branches'); if ($v92ec19ffde05e15769b1bb3ee05ad745->item(0)) { $va82feee3cc1af8bcabda979e8775ef0f->writeRaw($vdd988cfd769c9f7fbd795a0f5da8e751->saveXML($v92ec19ffde05e15769b1bb3ee05ad745->item(0), LIBXML_NOXMLDECL)); } } $va82feee3cc1af8bcabda979e8775ef0f->fullEndElement(); $v7aa28ed115707345d0274032757e8991 = $v1de9b0a30075ae8c303eb420c103c320->next(); continue; } } $v7aa28ed115707345d0274032757e8991 = $v1de9b0a30075ae8c303eb420c103c320->read(); } $va82feee3cc1af8bcabda979e8775ef0f->fullEndElement(); $v1de9b0a30075ae8c303eb420c103c320->close(); $va82feee3cc1af8bcabda979e8775ef0f->endDocument(); $va82feee3cc1af8bcabda979e8775ef0f->flush(); unlink($v97fd815a3803a0588876bdd862014fed); rename($v71b70dd1e455c477220693d84ccd5682, $v97fd815a3803a0588876bdd862014fed); } else { file_put_contents($v97fd815a3803a0588876bdd862014fed, $vdd988cfd769c9f7fbd795a0f5da8e751->saveXML()); } $this->completed = $ved780287e302ec3b9fd3c5e78771919f->isCompleted(); return false; }
/** * Import products to temp table * * @param $filename * @return string */ private function importProducts($filename) { // Последний товар, на котором остановились $lastImportProduct = $this->options['lastImportProduct']; $firstImport = empty($lastImportProduct) ? 1 : 0; //clear temp category table if ($firstImport) { $this->modx->exec("TRUNCATE TABLE {$this->modx->getTableName('mSkladProductTemp')}"); } $reader = new XMLReader(); $reader->open($this->config['temp_dir'] . $filename); //search products while ($reader->read() && $reader->name !== 'Товар') { } // Номер текущего товара $currentImportProduct = 0; $prodSql = array(); while ($reader->name === 'Товар') { if ($firstImport || $currentImportProduct > $lastImportProduct) { $xml = new SimpleXMLElement($reader->readOuterXML()); $prod_name = isset($xml->Наименование) ? addslashes((string) $xml->Наименование) : ''; $prod_description = isset($xml->Описание) ? addslashes((string) $xml->Описание) : ''; //standart properties $prod_article = isset($xml->Артикул) ? addslashes((string) $xml->Артикул) : ''; $prod_manufacturer = isset($xml->Изготовитель) ? addslashes((string) $xml->Изготовитель->Наименование) : ''; $prod_bar_code = isset($xml->Штрихкод) ? addslashes((string) $xml->Штрихкод) : ''; //additional properties $prod_properties = array(); if (isset($xml->ЗначенияСвойств)) { foreach ($xml->ЗначенияСвойств->ЗначенияСвойства as $xml_property) { $propertyId = addslashes((string) $xml_property->Ид); $propertyData = $_SESSION['properties_mapping'][$propertyId]; $propertyName = $propertyData['Наименование']; $propertyVal = addslashes((string) $xml_property->Значение); if (isset($propertyData['Значения'])) { $propertyVal = $propertyData['Значения'][$propertyVal]; } if (!empty($propertyVal)) { $prod_properties[$propertyName] = $propertyVal; } } } if (isset($xml->СписокСвойствОписания)) { foreach ($xml->СписокСвойствОписания->СвойствоОписания as $xml_property) { $propertyName = addslashes((string) $xml_property->Наименование); $propertyVal = addslashes((string) $xml_property->Значение); if (!empty($propertyVal)) { if (!isset($prod_properties[$propertyName]) && !empty($propertyVal)) { $prod_properties[$propertyName] = $propertyVal; } } } } if (isset($xml->ЗначенияРеквизитов)) { foreach ($xml->ЗначенияРеквизитов->ЗначениеРеквизита as $xml_property) { $propertyName = addslashes((string) $xml_property->Наименование); $propertyVal = addslashes((string) $xml_property->Значение); if ($propertyName == 'ВидНоменклатуры' || $propertyName == 'ТипНоменклатуры') { continue; } if (!empty($propertyVal)) { if (!isset($prod_properties[$propertyName]) && !empty($propertyVal)) { $prod_properties[$propertyName] = $propertyVal; } } } } $prod_properties = addslashes($this->modx->msklad->utf_json_encode($prod_properties)); // $prod_images = addslashes($this->modx->msklad->utf_json_encode((array) $xml->Картинка)); $prod_features = addslashes($this->modx->msklad->utf_json_encode((array) $xml->ХарактеристикиТовара)); $prod_uuid = isset($xml->Ид) ? addslashes((string) $xml->Ид) : ''; $prod_parent_uuid = isset($xml->Группы->Ид) ? addslashes((string) $xml->Группы->Ид) : ''; $prod_status = isset($xml->Статус) ? addslashes((string) $xml->Статус) : ''; if (count($prodSql) < 200) { $prodSql[] = "('{$prod_name}', '{$prod_article}', '{$prod_manufacturer}', '{$prod_images}', '{$prod_bar_code}', '{$prod_description}', '{$prod_features}', '{$prod_properties}', '{$prod_uuid}', '{$prod_parent_uuid}', '{$prod_status}')"; } else { $sql = "INSERT INTO " . $this->modx->getTableName('mSkladProductTemp') . " (`name`, `article`, `manufacturer`, `images`, `bar_code`, `description`, `features`, `properties`, `uuid`, `parent_uuid`, `status`) VALUES\n\t\t\t\t\t\t " . implode(',', $prodSql) . ";"; $stmt = $this->modx->prepare($sql); $stmt->execute(); $prodSql = array(); } $lastImportProduct = $_SESSION['lastImportProduct'] = $currentImportProduct; //if exec time more max time, break cycle $exec_time = microtime(true) - $this->config['start_time']; if ($exec_time + 1 >= $this->config['max_exec_time']) { break; } } $reader->next('Товар'); $currentImportProduct++; } $reader->close(); //if isset insert query if (count($prodSql) > 0) { $sql = "INSERT INTO " . $this->modx->getTableName('mSkladProductTemp') . " (`name`, `article`, `manufacturer`, `images`, `bar_code`, `description`, `features`, `properties`, `uuid`, `parent_uuid`, `status`) VALUES\n\t\t\t\t " . implode(',', $prodSql) . ";"; $stmt = $this->modx->prepare($sql); $stmt->execute(); } else { if ($totalProducts = $this->getTotalProducts()) { $this->options['totalProducts'] = $_SESSION['totalProducts'] = $totalProducts; } else { $_SESSION['importFinish'] = 1; } } return 'progress' . PHP_EOL . ' Выгружено товаров в временную базу ' . $lastImportProduct . PHP_EOL; }
public function player_enc($sport='',$filenum=''){ //$this->output->enable_profiler(TRUE); $debug=true; if(empty($sport))$sport=strtolower($this->uri->segment(3)); if(empty($filenum))$filenum=strtolower($this->uri->segment(4)); if(!empty($sport)){ if($sport=="nfl"){ if(empty($filenum)) exit; $filename="NFL_PLAYER_ENC\$".$filenum.".XML"; $url=$this->statspath.$filename; $layer1="nfl-player-encyclopedia"; $layer2="nfl-player-encyclopedia-year"; $career="nfl-player-encyclopedia-career"; $categories=array( "games"=>"nfl-player-encyclopedia-games", "offense"=>"nfl-player-encyclopedia-offense", "defense"=>"nfl-player-encyclopedia-defense", "misc"=>"nfl-player-encyclopedia-miscellaneous", "kicking"=>"nfl-player-encyclopedia-kicking", "punting"=>"nfl-player-encyclopedia-punting", "returning"=>"nfl-player-encyclopedia-returning", "offline"=>"nfl-player-encyclopedia-off-line", "fumbles"=>"nfl-player-encyclopedia-fumbles", ); }elseif($sport=="nba"){ $filename="NBA_PLAYER_ENC.XML"; $url=$this->statspath.$filename; $layer1="nba-player-encyclopedia"; $layer2="nba-player-encyclopedia-year"; $career="nba-player-encyclopedia-career"; $skip=array("team-name","team-city","team-code"); }elseif($sport=="cbk"){ if(empty($filenum)) exit; $filename="CBK_PLAYER_ENC\$".$filenum.".XML"; $url=$this->statspath.$filename; $layer1="cbk-player-encyclopedia"; $layer2="cbk-player-encyclopedia-year"; $career="cbk-player-encyclopedia-career"; $skip=array("team-name","team-city","team-code"); }elseif($sport=="cfb"){ if(empty($filenum)) exit; $filename="CFB_PLAYER_ENC\$".$filenum.".XML"; $url=$this->statspath.$filename; $layer1="cfb-player-encyclopedia"; $layer2="cfb-player-encyclopedia-year"; $career="cfb-player-encyclopedia-career"; $skip=array("team-name","team-city","team-code"); }elseif($sport=="nhl"){ if(empty($filenum)) exit; $filename="NHL_PLAYER_ENC\$".$filenum.".XML"; $url=$this->statspath.$filename; $layer1="nhl-player-encyclopedia"; $layer2="nhl-player-encyclopedia-year"; $career="nhl-player-encyclopedia-career"; $categories=array( "skating"=>"nhl-skating-stats", "goaltending"=>"nhl-goaltending-stats", ); $skip=array("team-name","team-city","team-code","sequence"); //DELETE FROM `ci_sport_player_career` WHERE `player_id` IN (301772,301769,543573,225414,296021,231130,178023,329643,550521,386807,170695,542954,392942,184268,549269,504293,170560,600117,171423,547716,330276,268149,504313,564333,607591,229378,172508,610878,544288,496357,329563) }elseif($sport=="mlb"){ if(empty($filenum)) exit; $filename="MLB_PLAYER_ENC\$".$filenum.".XML"; $url=$this->statspath.$filename; $layer1="baseball-mlb-player-encyclopedia"; $layer2="baseball-mlb-player-encyclopedia-year"; $career=false; $categories=array( "hitting"=>"baseball-mlb-player-encyclopedia-hitting", "pitching"=>"baseball-mlb-player-encyclopedia-pitching", "fielding"=>"baseball-mlb-player-encyclopedia-fielding", ); $skip=array("team-name","team-city","team-code","sequence"); }elseif($sport=="golf"){ $filename="GOLF_GOLFER_ENC_PGA.XML"; $url=$this->statspath.$filename; $layer1="golfer-encyclopedia"; $layer2="golfer-encyclopedia-year"; $career="golfer-encyclopedia-career"; $skip=array(); } echo ($debug)?"<a href='".site_url("statsfeed/".$filename)."'>XML</a>":""; $XMLReader = new XMLReader; $xml_file_path=$this->statspath.$filename; $XMLReader->open($xml_file_path); $player_ids=array(); while ($XMLReader->read() && $XMLReader->name !==$layer1); while ($XMLReader->name === $layer1) { $node = new SimpleXMLElement($XMLReader->readOuterXML()); if($sport=='golf'){ $player_id=(int)$node->{'golfer-code'}->attributes()->{'global-id'}; }else{ $player_id=(int)$node->{'player-code'}->attributes()->{'global-id'}; } //Delete this user's existing data $this->db->where('player_id',$player_id); $this->db->delete('sport_player_career'); //Create a temporary csv file $csv_path=$this->uploadpath.$player_id.".csv"; $handle=fopen($csv_path,"c"); echo ($debug)?"<hr><h1 style='background:green;color:white;margin:0;padding:.25em;'>PLAYER_ID: ".$player_id."</h1>":""; $player_ids[]=$player_id; foreach($node->{$layer2} as $yearly){ //echo "<pre>".print_r($yearly,true)."</pre>"; $year=$yearly->attributes()->year; if(isset($yearly->{'team-code'})) $team_id=$yearly->{'team-code'}->attributes()->{'global-id'}; else $team_id=0; echo ($debug)?"<h2 style='background:yellow;margin:0;padding:.25em;'>YEAR: ".$year." - TEAM: ".$team_id."</h2>":""; if($sport=="nfl" || $sport=="nhl" || $sport=="mlb"){ foreach($categories as $category=>$catkey){ if($catkey!=false){ if(isset($yearly->{$catkey})){ $cats=$yearly->{$catkey}; echo ($debug)?"<h3 style='background:grey;margin:0;padding:.25em;'>CATEGORY: ".$category."</h3>":""; foreach($cats as $catstats){ //echo "<pre>".print_r($atts,true)."</pre>"; foreach($catstats as $key=>$atts){ foreach($atts->attributes() as $key2=>$value){ $uglykeycheck=explode('-',$key); if($key!=$key2 && $uglykeycheck[count($uglykeycheck)-1]!=$key2) $k=$key."-".$key2; else $k=$key; $value=(float)$value; $insert=array( "player_id"=>$player_id, "sport"=>$sport, "year"=>$year, "team_id"=>$team_id, "category"=>$category, "statkey"=>$k, "statvalue"=>$value ); fputcsv($handle, $insert); //echo ($debug)?"<h4>(1) ".$k." : ".$value."</h4>":""; } } } } } } }else{ foreach($yearly as $key1=>$y){ if(!in_array($key1,$skip)){ //echo ($debug)?"<h3 style='background:grey;margin:0;padding:.25em;'>CATEGORY: ".$category."</h3>":""; //echo ($debug)?"<h4>".$key1."</h4><pre>".print_r($y,true)."</pre>":""; if(is_array($y)){ foreach($y as $key15=>$y2){ foreach($y->attributes() as $key2=>$value){ $uglykeycheck=explode('-',$key1); if($key1!=$key2 && $key2 != $key15 && $uglykeycheck[count($uglykeycheck)-1]!=$key2) $k=$key1."-".$key15."-".$key2; elseif($key1!=$key2 && $key2 == $key15 && $uglykeycheck[count($uglykeycheck)-1]!=$key2) $k=$key1."-".$key2; elseif($key1!=$key2 && $key1 == $key15 && $uglykeycheck[count($uglykeycheck)-1]!=$key2) $k=$key15."-".$key2; elseif($key1!=$key2 && $uglykeycheck[count($uglykeycheck)-1]!=$key2) $k=$key1."-".$key2; else $k=$key1; $value=(float)$value; $insert=array( "player_id"=>$player_id, "sport"=>$sport, "year"=>$year, "team_id"=>$team_id, "category"=>"n/a", "statkey"=>$k, "statvalue"=>$value ); fputcsv($handle, $insert); //echo ($debug)?"<h4>(2) ".$k." : ".$value."</h4>":""; } } }else{ foreach($y->attributes() as $key2=>$value){ $uglykeycheck=explode('-',$key1); if($key1!=$key2 && $uglykeycheck[count($uglykeycheck)-1]!=$key2) $k=$key1."-".$key2; else $k=$key1; $value=(float)$value; $insert=array( "player_id"=>$player_id, "sport"=>$sport, "year"=>$year, "team_id"=>$team_id, "category"=>"n/a", "statkey"=>$k, "statvalue"=>$value ); fputcsv($handle, $insert); //echo ($debug)?"<h4>(3) ".$k." : ".$value."</h4>":""; } } } } } } //Get the career stats here if($career !== false && isset($node->{$career})){ $cats=$node->{$career}; $category="career"; echo ($debug)?"<h2 style='background:yellow;margin:0;padding:.25em;'>CAREER</h2>":""; //echo ($debug)?"<pre>".print_r($cats,true)."</pre>":""; if($sport=="nfl" || $sport=="nhl"){ foreach($categories as $category=>$catkey){ if($catkey!=false){ $cats=$node->{$career}->{$catkey}; echo ($debug)?"<h3 style='background:grey;margin:0;padding:.25em;'>CATEGORY: ".$category."</h3>":""; //echo "<pre>".print_r($cats,true)."</pre>"; foreach($cats as $catstats){ //echo "<pre>".print_r($atts,true)."</pre>"; foreach($catstats as $key=>$atts){ foreach($atts->attributes() as $key2=>$value){ $uglykeycheck=explode('-',$key); if($key!=$key2 && $uglykeycheck[count($uglykeycheck)-1]!=$key2) $k=$key."-".$key2; else $k=$key; $value=(float)$value; $insert=array( "player_id"=>$player_id, "sport"=>$sport, "year"=>0, "team_id"=>$team_id, "category"=>$category, "statkey"=>$k, "statvalue"=>$value ); fputcsv($handle, $insert); //echo ($debug)?"<h4>(4) ".$k." : ".$value."</h4>":""; } } } } } }else{ foreach($cats as $catstats){ foreach($catstats as $key1=>$y) if(!in_array($key1,$skip)){ //echo ($debug)?"<h3 style='background:grey;margin:0;padding:.25em;'>CATEGORY: ".$category."</h3>":""; foreach($y->attributes() as $key2=>$value){ $uglykeycheck=explode('-',$key1); if($key1!=$key2 && $uglykeycheck[count($uglykeycheck)-1]!=$key2) $k=$key1."-".$key2; else $k=$key1; $value=(float)$value; $insert=array( "player_id"=>$player_id, "sport"=>$sport, "year"=>0, "team_id"=>$team_id, "category"=>$category, "statkey"=>$k, "statvalue"=>$value ); fputcsv($handle, $insert); echo ($debug)?"<h4>(5) ".$k." : ".$value."</h4>":""; } } } } } fclose($handle); //exit; $command="mysql -u".$this->db->username." -p".$this->db->password." -e\"LOAD DATA LOCAL INFILE '".$csv_path."' ". "INTO TABLE ".$this->db->dbprefix('sport_player_career')." ". "FIELDS TERMINATED BY ',' ". "LINES TERMINATED BY '".htmlspecialchars('\\')."n' ". "(player_id, sport, year, team_id, category, statkey, statvalue);\" ".$this->db->database; //echo $command; $execresult=exec($command); //var_dump($execresult); //exit; unlink($csv_path); $XMLReader->next($layer1); } echo ($debug)?implode(',',$player_ids):""; $this->update_log($filename); } }
/** * READS THE GIVEN DOCX FILE INTO HTML FORMAT * * @param String $filename The DOCX file name * @return String With HTML code */ public function readDocument($filename) { $this->readZipPart($filename); $reader = new XMLReader(); $reader->XML($this->doc_xml->saveXML()); $text = ''; $list_format = ""; $formatting['header'] = 0; // loop through docx xml dom while ($reader->read()) { // look for new paragraphs $paragraph = new XMLReader(); $p = $reader->readOuterXML(); if ($reader->nodeType == XMLREADER::ELEMENT && $reader->name === 'w:p') { // set up new instance of XMLReader for parsing paragraph independantly $paragraph->xml($p); preg_match('/<w:pStyle w:val="(Heading.*?[1-6])"/', $p, $matches); if (isset($matches[1])) { switch ($matches[1]) { case 'Heading1': $formatting['header'] = 1; break; case 'Heading2': $formatting['header'] = 2; break; case 'Heading3': $formatting['header'] = 3; break; case 'Heading4': $formatting['header'] = 4; break; case 'Heading5': $formatting['header'] = 5; break; case 'Heading6': $formatting['header'] = 6; break; default: $formatting['header'] = 0; break; } } // open h-tag or paragraph $text .= $formatting['header'] > 0 ? '<h' . $formatting['header'] . '>' : '<p>'; // loop through paragraph dom while ($paragraph->read()) { // look for elements if ($paragraph->nodeType == XMLREADER::ELEMENT && $paragraph->name === 'w:r') { if ($list_format == "") { $text .= $this->checkFormating($paragraph); } else { $text .= $list_format['open']; $text .= $this->checkFormating($paragraph); $text .= $list_format['close']; } $list_format = ""; $paragraph->next(); } else { if ($paragraph->nodeType == XMLREADER::ELEMENT && $paragraph->name === 'w:pPr') { //lists $list_format = $this->getListFormating($paragraph); $paragraph->next(); } else { if ($paragraph->nodeType == XMLREADER::ELEMENT && $paragraph->name === 'w:drawing') { //images $text .= $this->checkImageFormating($paragraph); $paragraph->next(); } else { if ($paragraph->nodeType == XMLREADER::ELEMENT && $paragraph->name === 'w:hyperlink') { $hyperlink = $this->getHyperlink($paragraph); $text .= $hyperlink['open']; $text .= $this->checkFormating($paragraph); $text .= $hyperlink['close']; $paragraph->next(); } } } } } $text .= $formatting['header'] > 0 ? '</h' . $formatting['header'] . '>' : '</p>'; } } $reader->close(); if ($this->debug) { echo "<div style='width:100%; height: 200px;'>"; echo iconv("ISO-8859-1", "UTF-8", $text); echo "</div>"; } return iconv("ISO-8859-1", "UTF-8", $text); }
public static function eol_schema_validate($uri) { if (!$uri) { return false; } $valid = SchemaValidator::validate($uri); if ($valid !== true) { return array(); } $errors = array(); $warnings = array(); $reader = new \XMLReader(); $reader->open($uri); $i = 0; while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "taxon") { $taxon_xml = $reader->readOuterXML(); $t = simplexml_load_string($taxon_xml, null, LIBXML_NOCDATA); $t_dc = $t->children("http://purl.org/dc/elements/1.1/"); $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/"); $identifier = Functions::import_decode($t_dc->identifier); $source_url = Functions::import_decode($t_dc->source); $scientific_name = Functions::import_decode($t_dwc->ScientificName); if (!$identifier) { $warnings[$scientific_name]["taxon without dc:identifier"] = true; } if (!$source_url) { $warnings[$scientific_name]["taxon without dc:source"] = true; } foreach ($t->dataObject as $d) { $d_dc = $d->children("http://purl.org/dc/elements/1.1/"); $identifier = Functions::import_decode($d_dc->identifier); $data_type = Functions::import_decode($d->dataType); $license = Functions::import_decode($d->license); $source_url = Functions::import_decode($d_dc->source); $description = Functions::import_decode($d_dc->description, 0, 0); $object_url = Functions::import_decode($d->mediaURL); $subjects = array(); foreach ($d->subject as $s) { $subjects[] = trim((string) $s); } /* Checking requirements*/ if (!$identifier) { $warnings[$scientific_name]["data object without dc:identifier"] = true; } if (!$license) { $warnings[$scientific_name]["data object without license"] = true; } //if text: must have description if ($data_type == "http://purl.org/dc/dcmitype/Text" && !$description) { $errors[$scientific_name]["text without dc:description"] = true; } //if text: must have subject if ($data_type == "http://purl.org/dc/dcmitype/Text" && !$subjects) { $errors[$scientific_name]["text without subject"] = true; } //if image, movie or sound: must have object_url if ($data_type != "http://purl.org/dc/dcmitype/Text" && !$object_url) { $errors[$scientific_name]["media without mediaURL"] = true; } } //unset($xml->taxon[$i]); $xml->taxon[$i] = null; $i++; //if($i%100==0 && DEBUG) debug("Parsed taxon $i"); //if(defined("DEBUG_PARSE_TAXON_LIMIT") && $i >= DEBUG_PARSE_TAXON_LIMIT) break; } } return array($errors, $warnings); }
public function parseImport() { $importFile = DIR_CACHE . 'exchange1c/import.xml'; $reader = new XMLReader(); $reader->open($importFile); $group = new XMLReader(); $product = new XMLReader(); $data = array(); $this->load->model('dataexchange/exchange1c'); while ($reader->read()) { if ($reader->nodeType == XMLReader::ELEMENT) { switch ($reader->name) { case 'Группы': // Подочернее добавление групп $this->inserCategory($reader->readOuterXML()); $reader->next(); break; case 'Товар': $product->XML($reader->readOuterXML()); $data = array(); //echo '<pre>'; while ($product->read()) { if ($product->nodeType == XMLReader::ELEMENT) { switch ($product->name) { case 'Ид': $data['id'] = $product->readString(); break; case 'Наименование': $data['name'] = $product->readString(); //echo 1; break; // Изображение // Изображение case 'Картинка': $data['image'] = $product->readString(); break; case 'Группы': $xml = $product->readOuterXML(); $xml = simplexml_load_string($xml); $data['category_1c_id'] = strval($xml->Ид); unset($xml); $product->next(); break; case 'Модель': $data['model'] = $product->readString(); break; case 'Описание': $data['description'] = $product->readString(); break; case 'ЗначенияСвойств': case 'ХарактеристикиТовара': case 'СтавкиНалогов': $product->next(); break; case 'ЗначенияРеквизитов': $product->next(); break; case 'Статус': $data['status'] = $product->readString(); break; } } } // Добавляем/Обновляем продукт $this->setProduct($data); break; } } } $reader->close(); }
$text = ''; $formatting['bold'] = 'closed'; $formatting['italic'] = 'closed'; $formatting['underline'] = 'closed'; $formatting['header'] = 0; $for_image = $xmlFile2; $status = false; // loop through docx xml dom while ($reader->read()) { // look for new paragraphs if ($reader->nodeType == XMLREADER::ELEMENT && $reader->name === 'w:p') { // set up new instance of XMLReader for parsing paragraph independantly $paragraph = new XMLReader; $p = $reader->readOuterXML(); $paragraph->xml($p); // search for heading preg_match('/<w:pStyle w:val="(Heading.*?[1-6])"/', $p, $matches); switch ($matches[1]) { case 'Heading1': $formatting['header'] = 1; break; case 'Heading2': $formatting['header'] = 2; break; case 'Heading3': $formatting['header'] = 3; break; case 'Heading4': $formatting['header'] = 4; break; case 'Heading5': $formatting['header'] = 5; break;
function save_dna_sequence_from_big_xml() { echo "\n\n saving dna sequence from big xml file...\n"; // from 212.php this file will always be re-created require_library('connectors/BoldsImagesAPIv2'); $func = new BoldsImagesAPIv2(); $path = $func->download_and_extract_remote_file(); echo "\n\n {$path}"; $reader = new \XMLReader(); $reader->open($path); $taxa_sequences = array(); while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "record") { $string = $reader->readOuterXML(); $xml = simplexml_load_string($string); $best_sequence = ""; if (@$xml->sequences->sequence) { if ($taxon_id = trim(@$xml->taxonomy->species->taxon->taxon_id)) { } elseif ($taxon_id = trim(@$xml->taxonomy->genus->taxon->taxon_id)) { } elseif ($taxon_id = trim(@$xml->taxonomy->subfamily->taxon->taxon_id)) { } elseif ($taxon_id = trim(@$xml->taxonomy->family->taxon->taxon_id)) { } elseif ($taxon_id = trim(@$xml->taxonomy->order->taxon->taxon_id)) { } elseif ($taxon_id = trim(@$xml->taxonomy->class->taxon->taxon_id)) { } elseif ($taxon_id = trim(@$xml->taxonomy->phylum->taxon->taxon_id)) { } elseif ($taxon_id = trim(@$xml->taxonomy->kingdom->taxon->taxon_id)) { } $i = 0; foreach (@$xml->sequences->sequence as $sequence) { $i++; if ($sequence->markercode == "COI-5P") { if (strlen($best_sequence) < strlen($sequence->nucleotides)) { $best_sequence = trim($sequence->nucleotides); } } } if ($best_sequence) { if (@$taxa_sequences[$taxon_id]) { $old = $taxa_sequences[$taxon_id]["s"]; if (strlen($old) < strlen($best_sequence)) { $taxa_sequences[$taxon_id]["s"] = $best_sequence; } $taxa_sequences[$taxon_id]["c"] += $i; } else { $taxa_sequences[$taxon_id]["s"] = $best_sequence; $taxa_sequences[$taxon_id]["c"] = $i; } } } } } self::save_to_json_file($taxa_sequences, $this->SAVED_SEQUENCES_FILE); unlink($path); }
/** * {@inheritdoc} */ public function current() { $node = new SimpleXMLElement($this->reader->readOuterXML()); return $this->createModelInstance($node); }
public function run($directory, array $size_post, $position = NULL) { $this->size_post = $size_post; $position = (int) $position; $list = glob($directory . DIRECTORY_SEPARATOR . $this->xml_folder . DIRECTORY_SEPARATOR . '*.xml'); $return = array('log' => array(), 'errors' => FALSE, 'position' => NULL, 'counter' => array('product' => 0, 'nomenclature' => 0)); $counter =& $return['counter']; while ($_file = current($list)) { if (key($list) < $position) { next($list); continue; } $xml_reader = new XMLReader(); $xml_reader->open($_file); $xml_reader->read(); if ($xml_reader->name !== 'root') { throw new Exception('Invalid XML-file format'); } $xml_reader->read(); do { if ($xml_reader->nodeType !== XMLReader::ELEMENT) { continue; } $product_xml = simplexml_load_string($xml_reader->readOuterXML()); $product_data = $this->parse_product($product_xml); unset($product_xml); if (empty($product_data['category_id']) or empty($product_data['code']) or empty($product_data['name'])) { continue; } $product_orm = $this->load_product($product_data, $directory); if ($product_orm === NULL) { continue; } foreach ($product_data['nomenclature'] as $_nomen_xml) { $_nomen_data = $this->parse_nomenclature($_nomen_xml); $_nomen_orm = $this->load_nomenclature($_nomen_data, $product_orm->id); if ($_nomen_orm !== NULL) { $counter['nomenclature']++; } } $counter['product']++; } while ($xml_reader->next()); if (next($list) !== FALSE) { $return['position'] = key($list); } else { $return['position'] = TRUE; } break; } $return['log'] = $this->log; $return['errors'] = $this->errors; return $return; }
$z = new XMLReader(); $z->open('xml/' . $source); $feeds = array(); $doc = new DOMDocument(); require_once 'models/feed.php'; function x_attribute($object, $attribute) { if (isset($object[$attribute])) { return (string) $object[$attribute]; } } while ($z->read() && $z->name !== 'source') { } while ($z->name === 'source') { $node = new SimpleXMLElement($z->readOuterXML()); $feed = new Feed(); $feed->setTitle($node->title); $feed->setURL($node->feed_url); $feed->setPicture($node->picture); $feeds[] = $feed; if (!file_exists('xml/feeds/' . $feed->title . '.xml')) { file_put_contents('xml/feeds/' . $feed->title . ".xml", file_get_contents($feed->feed_url)); } else { $ttl = 1800; $age = time() - filemtime('xml/feeds/' . $feed->title . '.xml'); if ($age >= $ttl) { unlink('xml/feeds/' . $feed->title . '.xml'); file_put_contents('xml/feeds/' . $feed->title . ".xml", file_get_contents($feed->feed_url)); } }
function get_sources($xml_file_path) { // Specify configuration $config = array('indent' => true, 'output-xhtml' => false, 'output-html' => true, 'show-warnings' => false, 'show-body-only' => true, 'wrap' => 200); $count = 1; $XMLReader = new XMLReader(); $XMLReader->open($xml_file_path); // Move to the first "[item name]" node in the file. while ($XMLReader->read() && $XMLReader->name !== "Row") { } // Now that we're at the right depth, hop to the next "[item name]" until the end of tree/file. while ($XMLReader->name === "Row") { if ($count > 1) { $dom = new simple_html_dom(); $dom->load($XMLReader->readOuterXML()); $record = null; $record['url'] = $dom->find("Item", 0)->plaintext; $record['faq_id'] = substr($record['url'], strpos($record['url'], '?p_faq_id=') + 10); $record['question'] = $dom->find("Item", 1)->plaintext; $search = array('<BR>', '</LI>', '</P>', '</UL>', ' '); $replace = array(" \n", "</LI> \n", "</P> \n\n", "</UL> \n\n", ' '); $record['answer_text'] = strip_tags(str_replace($search, $replace, html_entity_decode($dom->find("Item", 2)->innertext))); $record['answer_html'] = html_entity_decode($dom->find("Item", 2)->innertext); $tidy = new tidy(); $tidy->parseString($record['answer_html'], $config, 'utf8'); $tidy->cleanRepair(); $record['answer_html'] = $tidy->value; $record['ranking'] = $dom->find("Item", 3)->plaintext; $record['last_updated'] = $dom->find("Item", 4)->plaintext; $record['last_updated'] = $record['last_updated'] ? date(DATE_ATOM, strtotime($record['last_updated'])) : null; $record['topic'] = $dom->find("Item", 5)->plaintext; $record['subtopic'] = $dom->find("Item", 6)->plaintext; // Set empty strings as null array_walk($record, 'check_null'); scraperwiki::save(array('url'), $record); //$records[] = $record; } // Skip to the next node of interest. $XMLReader->next("Row"); $count++; } //return $records; }
function loadXML($xmlLocation) { $services = array(); // Read in the XML $xml = new \XMLReader(); $xml->open($xmlLocation); // Move to the first "record" node while ($xml->read() && $xml->name !== 'record') { } // Iterate through each "record" until the end of the tree while ($xml->name === 'record') { // Import the node into a simple XML element $service = new \SimpleXMLElement($xml->readOuterXML()); $xml->next('record'); $services[(string) $service->protocol][(string) $service->number] = (string) $service->description; } return $services; }
public function parseImport() { $importFile = DIR_CACHE . 'exchange1c/import.xml'; $reader = new XMLReader(); $reader->open($importFile); $group = new XMLReader(); $product = new XMLReader(); $data = array(); $this->load->model('dataexchange/exchange1c'); while ($reader->read()) { if ($reader->nodeType == XMLReader::ELEMENT) { switch ($reader->name) { case 'Группы': // Подочернее добавление групп $this->inserCategory($reader->readOuterXML()); $reader->next(); break; case 'Товар': $product->XML($reader->readOuterXML()); $data = array(); //echo '<pre>'; while ($product->read()) { if ($product->nodeType == XMLReader::ELEMENT) { switch ($product->name) { case 'Ид': //Берем первую часть uuid т.к. могут быть и uuid#id $uuid = explode("#", $product->readString()); $data['id'] = $uuid[0]; $data['uuid'] = $uuid[0]; break; case 'Наименование': $data['name'] = $product->readString(); //echo 1; break; // Изображение // Изображение case 'Картинка': //Обрабатываем несколько изображений if (isset($data['image'])) { $data['product_image'][] = $product->readString(); } else { $data['image'] = $product->readString(); } break; case 'Группы': $xml = $product->readOuterXML(); $xml = simplexml_load_string($xml); $data['category_1c_id'] = strval($xml->Ид); unset($xml); $product->next(); break; case 'Модель': $data['model'] = $product->readString(); break; case 'Описание': $data['description'] = $product->readString(); break; case 'ЗначенияСвойств': $xml = simplexml_load_string($product->readOuterXML()); foreach ($xml as $property) { if (isset($PROPERTIES[(string) $property->Ид])) { switch ($PROPERTIES[(string) $property->Ид]) { case 'Псевдоним': $data['keyword'] = $property->Значение; break; case 'Производитель': $query = $this->db->query("SELECT manufacturer_id FROM " . DB_PREFIX . "manufacturer WHERE name='" . (string) $property->Значение . "'"); if ($query->num_rows) { $data['manufacturer_id'] = $query->row['manufacturer_id']; } else { $data['manufacturer_id'] = 0; } break; case 'h1': $data['h1'] = $property->Значение; break; case 'title': $data['title'] = $property->Значение; break; case 'Сортировка': $data['sort_order'] = $property->Значение; break; } } } unset($xml); $product->next(); break; case 'ХарактеристикиТовара': case 'СтавкиНалогов': $product->next(); break; case 'ЗначенияРеквизитов': $product->next(); break; case 'Статус': $data['status'] = $product->readString(); break; } } } // Добавляем/Обновляем продукт $this->setProduct($data); break; } } } $reader->close(); }
public function parse_cron() { try { // Для корректной работы установим локаль UTF-8 setlocale(LC_ALL, 'ru_RU.UTF-8'); $all_hash = ''; foreach (explode("\n", trim($this->settings->url)) as $url) { $url = trim($url); if (empty($this->settings->shop_id)) { $this->addLog($this->settings->name, 'Не указан магазин для прайса'); continue; } $url_headers = @get_headers($url); if ($url_headers[0] == 'HTTP/1.1 404 NOT FOUND') { $this->addLog($this->settings->name, 'Ссылка для парсинга ' . $url . ' не правильная'); continue; } $this->getShop($this->settings->shop_id); $this->addLog($this->settings->name, 'Ссылка для парсинга ' . $url); $reader = new XMLReader(); $reader->open($url); while ($reader->read()) { switch ($reader->nodeType) { case XMLReader::ELEMENT: if ($reader->name === 'categories') { $xml_categories = new SimpleXMLElement($reader->readOuterXML()); $this->parseCategories($xml_categories); } if ($reader->name === 'offer') { $items = $this->getItems($reader); } break; } } // Отмечаем строки, которых нет в прайсе как удаленные if (!empty($items)) { $query = $this->db->placehold("UPDATE __price SET status='deleted', product_status='updated' WHERE shop_id=? AND ver!=? AND status!='deleted'", $this->settings->shop_id, $this->ver); $this->db->query($query); } } } catch (\Exception $e) { echo $e->getMessage() . " (" . $e->getFile() . ", " . $e->getLine() . ")", "\n"; return FALSE; } return TRUE; }
public static function eol_schema_validate($uri) { if (!$uri) { return false; } $valid = SchemaValidator::validate($uri); if ($valid !== true) { return array(); } $errors = array(); $warnings = array(); $reader = new \XMLReader(); $reader->open($uri); $i = 0; while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "taxon") { $taxon_xml = $reader->readOuterXML(); $t = simplexml_load_string($taxon_xml, null, LIBXML_NOCDATA); $t_dc = $t->children("http://purl.org/dc/elements/1.1/"); $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/"); $identifier = Functions::import_decode($t_dc->identifier); $source_url = Functions::import_decode($t_dc->source); $scientific_name = Functions::import_decode($t_dwc->ScientificName); if (!$identifier) { $warnings[$scientific_name]["taxon without dc:identifier"] = true; } if (!$source_url) { $warnings[$scientific_name]["taxon without dc:source"] = true; } foreach ($t->dataObject as $d) { $d_dc = $d->children("http://purl.org/dc/elements/1.1/"); $identifier = Functions::import_decode($d_dc->identifier); /* Checking requirements*/ if (!$identifier) { $warnings[$scientific_name]["data object without dc:identifier"] = true; } } $xml->taxon[$i] = null; $i++; } } return array($errors, $warnings); }
function processXmls(&$p_aUrls, $p_aXmls, $p_iGZIP, $p_sListingTagName, $p_sUrlTagName, $p_sTerritory, $p_sSection, $p_sSourceID, &$total_in_xmls) { $total_in_xmls = 0; foreach ($p_aXmls as $l_sXml) { if ($l_sXml == 'http://api.ilsa.ru/sale/v1/dealers.xml') { $newfile = substr($l_sXml, strrpos($l_sXml, '/') + 1); $linkToXmlFile = "compress.zlib://" . $newfile; $xml = new XMLReader(); $xml->open($linkToXmlFile); $readSuccess = TRUE; $readOuterSuccess = TRUE; $l_aXmls = array(); while ($readSuccess) { $readSuccess = $xml->read(); if ($readSuccess && $xml->name == "Dealer" && $xml->nodeType == XMLReader::ELEMENT) { $tmp = $xml->readOuterXML(); $l_oDealer = simplexml_load_string($tmp); if (isset($l_oDealer->Offers->Link)) { $l_aXmls[] = trim($l_oDealer->Offers->Link); } } else { //print "\n\$readSuccess = ".$readSuccess.". \$xml->name = ".$xml->name.". \$xml->nodeType = ".$xml->nodeType; } } $local_file = 'autoi.xml'; $linkToXmlFile = $local_file; foreach ($l_aXmls as $dealer) { print "\n" . date("d/m/y : H:i:s", time()) . " pid: " . getmypid() . " parsing: " . $dealer; $access_key = "YzQ2OTg1MWQ2YWU1Y2MwMGZlYTc5MzQ5YTliMGY4OWZlNWVjOGRmOGQ2M2EyNjFkY2MxMDcwMGYyMmQ3NTdhNg"; $curl = curl_init($dealer); curl_setopt($curl, CURLOPT_HTTPHEADER, array('Authorization: Bearer ' . $access_key)); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); $source = curl_exec($curl); curl_close($curl); if ($source === FALSE) { print "\n" . date("d/m/y : H:i:s", time()) . " pid: " . getmypid() . " failed to copy {$dealer}..."; sleep(5); } else { $fp = fopen($local_file, 'w'); fwrite($fp, $source); fclose($fp); $xml = new XMLReader(); $xml->open($linkToXmlFile); $readSuccess = TRUE; $readOuterSuccess = TRUE; while ($readSuccess) { $readSuccess = $xml->read(); if ($readSuccess && $xml->name == "Vehicle" && $xml->nodeType == XMLReader::ELEMENT) { $total_in_xmls++; $tmp = $xml->readOuterXML(); $l_oListing = simplexml_load_string($tmp); $url = $l_oListing->attributes()->Id; $url = 'http://auto.ilsa.ru/car/' . $url; if (isset($p_aUrls[$url])) { unset($p_aUrls[$url]); } unset($l_oListing); } else { //print "\n\$readSuccess = ".$readSuccess.". \$xml->name = ".$xml->name.". \$xml->nodeType = ".$xml->nodeType; } } unset($xml); } } } else { if ($l_sXml == 'http://xml.jcat.ru/export/pingola/realty-vip/') { $newfile = 'jcat_realty_vip.xml'; } elseif ($l_sXml == 'http://xml.jcat.ru/export/pingola/cars-vip/') { $newfile = 'jcat_cars_vip.xml'; } elseif ($l_sXml == 'http://xml.jcat.ru/export/pingola/jobs-vip/') { $newfile = 'jcat_jobs_vip.xml'; } elseif ($l_sXml == 'http://xml.jcat.ru/export/pingola/realty/') { $newfile = 'jcat_realty.xml'; } elseif ($l_sXml == 'http://xml.jcat.ru/export/pingola/cars/') { $newfile = 'jcat_cars.xml'; } elseif ($l_sXml == 'http://xml.jcat.ru/export/pingola/jobs/') { $newfile = 'jcat_jobs.xml'; } elseif ($l_sXml == 'http://arenda-kvartir.ndv.ru/flib/xml_pingola.php') { $newfile = 'xml_pingola_rent.xml'; } elseif ($l_sXml == 'http://www.trucksale.ru/cron/export-pingola/') { $newfile = 'trucksale.xml'; } elseif ($l_sXml == 'http://www.mjobs.ru/rss/yvlAll/') { $newfile = 'mjobs.xml'; } elseif ($l_sXml == 'http://110km.ru/871230y231/') { $newfile = '110km.xml'; } else { $newfile = substr($l_sXml, strrpos($l_sXml, '/') + 1); } printer("Reading " . $newfile); if ($p_iGZIP) { $linkToXmlFile = "compress.zlib://" . $newfile; } else { $linkToXmlFile = $newfile; } $xml = new XMLReader(); $xml->open($linkToXmlFile); $readSuccess = TRUE; while ($readSuccess) { $readSuccess = $xml->read(); if ($xml->name == $p_sListingTagName && $xml->nodeType == XMLReader::ELEMENT) { $total_in_xmls++; $tmp = $xml->readOuterXML(); $l_oListing = simplexml_load_string($tmp); $url = trim($l_oListing->{$p_sUrlTagName}); if ($p_sTerritory == 'ru' && $p_sSection == 'vehicles' && $p_sSourceID == '224') { $url = substr($url, strrpos($url, '/') + 1); } elseif ($p_sTerritory == 'ru' && $p_sSection == 'vehicles' && $p_sSourceID == '382') { if (strpos($url, 'http://') === FALSE) { $url = 'http://' . $url; } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'realestate' && $p_sSourceID == '5597') { if (strpos($url, 'http://') === FALSE) { $url = 'http://' . $url; } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'jobs' && $p_sSourceID == '5637') { if (strpos($url, 'http://') === FALSE) { $url = 'http://' . $url; } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'vehicles' && $p_sSourceID == '404') { $url = str_replace('http://automobile.ru/', 'http://www.automobile.ru/', $url); if (strpos($url, '?utm') !== FALSE) { $url = str_replace('?utm', '/?utm', $url); } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'vehicles' && strpos($l_sXml, 'ftp://carcopy.ru/') !== FALSE) { $url = 'http://zaavto.ru/search/car/detail.php?id_vehicle=' . $url; } elseif ($p_sTerritory == 'ru' && $p_sSection == 'vehicles' && $p_sSourceID == '1364') { if (strpos($url, '?') !== FALSE) { $url = substr($url, 0, strpos($url, '?')); } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'realestate' && $p_sSourceID == '1189') { if (strpos($url, '&') !== FALSE) { $url = substr($url, 0, strpos($url, '&')); } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'realestate' && $p_sSourceID == '2387') { $url = str_replace('http://domus-finance.ru/', 'http://www.domus-finance.ru/', $url); } elseif ($p_sTerritory == 'ru' && $p_sSection == 'realestate' && $p_sSourceID == '1369') { if (strpos($url, '#') !== FALSE) { $url = substr($url, 0, strpos($url, '#')); } if (strpos($url, '?') !== FALSE) { $url = substr($url, 0, strpos($url, '?')); } if (strpos($url, '-') !== FALSE) { $url = substr($url, strrpos($url, '-') + 1); } else { $url = substr($url, strrpos($url, '/', -5) + 1); } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'realestate' && $p_sSourceID == '1577') { if (strpos($url, '#') !== FALSE) { $url = substr($url, 0, strpos($url, '#')); } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'realestate' && $p_sSourceID == '188') { if (strpos($url, '#') !== FALSE) { $url = substr($url, 0, strpos($url, '#')); } $url = str_replace('http://realestate.ru/', 'http://www.realestate.ru/', $url); } elseif ($p_sTerritory == 'ru' && $p_sSection == 'realestate' && $p_sSourceID == '4587') { $url = str_replace('http://http://', 'http://', $url); } elseif ($p_sTerritory == 'ru' && $p_sSection == 'jobs' && $p_sSourceID == '290') { $url = substr($url, strpos($url, '/vacancy') + strlen('/vacancy')); if (strpos($url, '-') !== FALSE) { $url = substr($url, 0, strpos($url, '-')); } else { $url = substr($url, 0, strpos($url, '.html')); } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'jobs' && $p_sSourceID == '197') { if (strpos($url, '?') !== FALSE) { $url = substr($url, 0, strpos($url, '?')); } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'realestate' && $p_sSourceID == '1394') { if (strpos($url, '?') !== FALSE) { $url = substr($url, 0, strpos($url, '?')); } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'vehicles' && ($p_sSourceID == '96' || $p_sSourceID == '1709')) { if (strpos($url, '?') !== FALSE) { $url = substr($url, 0, strpos($url, '?')); } } elseif ($p_sTerritory == 'ru' && $p_sSection == 'realestate' && $p_sSourceID == '5474') { $tmp = 'internal-id'; $url = 'http://soft-estate.ru/announcements/view?id=' . trim($l_oListing->attributes()->{$tmp}); } if (isset($p_aUrls[$url])) { unset($p_aUrls[$url]); } } } unset($xml); mem(); } } }
public function queue_import_xml($filename) { only_admin_access(); if (!is_file($filename)) { return array('error' => "You have not provided a existing backup to restore."); } $chunk_size = $this->batch_size; libxml_use_internal_errors(true); $chunks_folder = $this->get_chunks_location(); $content_items = array(); $chunk_size = $this->batch_size; $i = 0; $xml_paths = $this->xml_paths; $content_batch = ""; foreach ($xml_paths as $xml_key => $xml_path) { $XMLReader = new \XMLReader(); $xml_file_path = $filename; $XMLReader->open($xml_file_path); // Move to the first "[item name]" node in the file. while ($XMLReader->read() && $XMLReader->name != $xml_path) { //$xml_str = $XMLReader->readOuterXML(); // d($xml_str); } // Now that we're at the right depth, hop to the next "[item name]" until the end of tree/file. while ($XMLReader->name === $xml_path) { $xml_str = $XMLReader->readOuterXML(); if ($xml_str != '') { //$content_batch = $content_batch . $xml_str . "\n"; $content_batch = $xml_str; //if ($i % $chunk_size == 0) { $file_name = 'import_chunk_xml_' . md5($content_batch); $file_location = $chunks_folder . $file_name; if (!is_file($file_location)) { $content_batch = str_replace('content:encoded', 'content', $content_batch); $content_batch = str_replace('<' . $xml_path, '<item', $content_batch); $content_batch = str_replace('</' . $xml_path, '</item', $content_batch); $rss_stub = '<?xml version="1.0"?>' . "\n"; file_put_contents($file_location, $rss_stub . $content_batch); } $content_batch = ""; // } $i++; $XMLReader->next($xml_path); } } //$XMLReader->close(); } $file_name = 'import_chunk_xml_' . md5($content_batch); $file_location = $chunks_folder . $file_name; if (!is_file($file_location)) { file_put_contents($file_location, $content_batch); } return array('success' => $i . " xml items will be imported"); }