/** * Create a PHP array from the XML file * * @param String $xmlFile The XML file or a string containing xml to parse * * @return Array * * @throws \Propel\Common\Config\Exception\XmlParseException if parse errors occur */ public static function convert($xmlToParse) { if (!is_string($xmlToParse)) { throw new InvalidArgumentException("XmlToArrayConverter::convert method expects an xml file to parse, or a string containing valid xml"); } if (file_exists($xmlToParse)) { $xmlToParse = file_get_contents($xmlToParse); } //Empty xml file returns empty array if ('' === $xmlToParse) { return array(); } if ($xmlToParse[0] !== '<') { throw new InvalidArgumentException('Invalid xml content'); } $currentEntityLoader = libxml_disable_entity_loader(true); $currentInternalErrors = libxml_use_internal_errors(true); $xml = simplexml_load_string($xmlToParse); $errors = libxml_get_errors(); libxml_clear_errors(); libxml_use_internal_errors($currentInternalErrors); libxml_disable_entity_loader($currentEntityLoader); if (count($errors) > 0) { throw new XmlParseException($errors); } $conf = self::simpleXmlToArray($xml); return $conf; }
/** * Checks to see if some XML is valid * @param string $string as a string. * @return boolean true if the XML appears valid. */ private function isValidXml($string) { $orig_error_setting = libxml_use_internal_errors(true); // See security note elsewhere in this file and http://php.net/manual/en/function.libxml-disable-entity-loader.php // Supported from 5.2.11, so allow for older versions to work as well. if (function_exists('libxml_disable_entity_loader')) { $original_el_setting = libxml_disable_entity_loader(false); } // Suppress anything PHP might moan about. $temp = @simplexml_load_string($string); $ok = false; if (!$temp) { $errors = array(); foreach (libxml_get_errors() as $libXMLError) { $errors[] = $libXMLError->file . ' : line ' . $libXMLError->line . ', col:' . $libXMLError->column . ', message:' . $libXMLError->message; } libxml_clear_errors(); _debug("Error detected in XML : " . implode(',', $errors)); $ok = false; } else { $ok = true; } if (function_exists('libxml_disable_entity_loader')) { libxml_disable_entity_loader($original_el_setting); } libxml_use_internal_errors($orig_error_setting); return $ok; }
public static function static__escaped_fragment_($_escaped_fragment_) { \libxml_use_internal_errors(true); $html = new \DOMDocument(); $html->loadHTML(static::default_page($_escaped_fragment_ ? $_escaped_fragment_ : true)); if ($error = \libxml_get_last_error()) { //new \SYSTEM\LOG\ERROR('Parse Error: '.$error->message.' line:'.$error->line.' html: '.$html->saveHTML()); \libxml_clear_errors(); } $state = \SYSTEM\PAGE\State::get(static::get_apigroup(), $_escaped_fragment_ ? $_escaped_fragment_ : static::get_default_state(), false); foreach ($state as $row) { $frag = new \DOMDocument(); parse_str(\parse_url($row['url'], PHP_URL_QUERY), $params); $class = static::get_class($params); if ($class) { $frag->loadHTML(\SYSTEM\API\api::run('\\SYSTEM\\API\\verify', $class, static::get_params($params), static::get_apigroup(), true, false)); if ($error = \libxml_get_last_error()) { //new \SYSTEM\LOG\ERROR('Parse Error: '.$error->message.' line:'.$error->line.' html: '.$frag->saveHTML()); \libxml_clear_errors(); } $html->getElementById(substr($row['div'], 1))->appendChild($html->importNode($frag->documentElement, true)); //Load subpage css foreach ($row['css'] as $css) { $css_frag = new \DOMDocument(); $css_frag->loadHTML('<link href="' . $css . '" rel="stylesheet" type="text/css">'); $html->getElementsByTagName('head')[0]->appendChild($html->importNode($css_frag->documentElement, true)); } } } echo $html->saveHTML(); new \SYSTEM\LOG\COUNTER("API was called sucessfully."); die; }
/** * Creates and returns XmlScheme object for addon * * @param string $addon_id Addon name * @param string $path Path to addons * @return AXmlScheme object */ public static function getScheme($addon_id, $path = '') { if (empty($path)) { $path = Registry::get('config.dir.addons'); } libxml_use_internal_errors(true); if (!isset(self::$schemas[$addon_id])) { $_xml = self::readXml($path . $addon_id . '/addon.xml'); if ($_xml !== FALSE) { $versions = self::getVersionDefinition(); $version = isset($_xml['scheme']) ? (string) $_xml['scheme'] : '1.0'; self::$schemas[$addon_id] = new $versions[$version]($_xml); } else { $errors = libxml_get_errors(); $text_errors = array(); foreach ($errors as $error) { $text_errors[] = self::displayXmlError($error, $_xml); } libxml_clear_errors(); if (!empty($text_errors)) { fn_set_notification('E', __('xml_error'), '<br/>' . implode('<br/>', $text_errors)); } return false; } } return self::$schemas[$addon_id]; }
private function tempGetContent($url) { $curl = new \Curl\Curl(); $curl->get($url); $content = $curl->response; if (empty($content)) { return ''; } $dom = new \DOMDocument('1.0', 'utf-8'); libxml_use_internal_errors(true); $dom->loadHTML($content); libxml_clear_errors(); $dom->preserveWhiteSpace = false; // remove redundant white spaces $body = $dom->getElementsByTagName('body'); $bodyContent = null; if ($body && $body->length > 0) { // remove scripts while (($r = $dom->getElementsByTagName('script')) && $r->length) { $r->item(0)->parentNode->removeChild($r->item(0)); } $domBody = $body->item(0); $bodyContent = $dom->saveXML($domBody); //$bodyContent = $this->dom->saveHTML($this->domBody); // argument not allowed on 5.3.5 or less, see: http://www.php.net/manual/de/domdocument.savehtml.php } return $bodyContent; }
/** * {@inheritdoc} */ public function decode($data, $format) { $internalErrors = libxml_use_internal_errors(true); $disableEntities = libxml_disable_entity_loader(true); libxml_clear_errors(); $dom = new \DOMDocument(); $dom->loadXML($data, LIBXML_NONET); libxml_use_internal_errors($internalErrors); libxml_disable_entity_loader($disableEntities); foreach ($dom->childNodes as $child) { if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { throw new UnexpectedValueException('Document types are not allowed.'); } } $xml = simplexml_import_dom($dom); if ($error = libxml_get_last_error()) { throw new UnexpectedValueException($error->message); } if (!$xml->count()) { if (!$xml->attributes()) { return (string) $xml; } $data = array(); foreach ($xml->attributes() as $attrkey => $attr) { $data['@' . $attrkey] = (string) $attr; } $data['#'] = (string) $xml; return $data; } return $this->parseXml($xml); }
public function __construct($strConfigSet = "") { libxml_use_internal_errors(); $this->objSimpleXml = simplexml_load_file(SVN2RSS_PROJECT_ROOT . "/" . SVN2RSS_CONFIG_FILE); $arrParseErrors = libxml_get_errors(); libxml_clear_errors(); if (count($arrParseErrors) > 0) { throw new Svn2RssException("Error parsing xml-config-file " . SVN2RSS_CONFIG_FILE . ".\nErrors:\n" . implode("\n", $arrParseErrors)); } if ($strConfigSet == "") { $strConfigSet = $this->getStrDefaultConfigSet(); } if ($strConfigSet == "") { throw new Svn2RssException("No default config-set defined in " . SVN2RSS_CONFIG_FILE); } //load the config-set requested $this->strConfigSetName = $strConfigSet; foreach ($this->objSimpleXml->configSets->configSet as $objOneConfigSet) { $arrAttributes = $objOneConfigSet->attributes(); if ($arrAttributes->id . "" == $strConfigSet) { $this->objCurrentConfigSetXml = $objOneConfigSet; } } if ($this->objCurrentConfigSetXml == null) { throw new Svn2RssException("Loading of config set " . $strConfigSet . " failed."); } }
public function getItems($max_length = 10, $force_cache_update = false) { if ($this->items === null) { $this->items = array(); $xml = $this->getXML($max_length, $force_cache_update); if ($xml != '') { $errors = libxml_use_internal_errors(true); try { $document = new DOMDocument(); $document->loadXML($xml); $xpath = new DOMXPath($document); $this->registerXPathNamespaces($xpath); $count = 0; $elements = $xpath->query('//rss/channel/item'); foreach ($elements as $element) { $count++; if ($count > $max_length) { break; } $this->items[] = new NewsFlashRSSItem($xpath, $element); } } catch (Exception $e) { // ignore XML parsing exception, just return no items. } libxml_clear_errors(); libxml_use_internal_errors($errors); } } return $this->items; }
function show_internal_errors() { foreach (libxml_get_errors() as $error) { printf("Internal: %s\n", $error->message); } libxml_clear_errors(); }
/** * @param string $xml * * @return \DOMDocument */ public static function fromString($xml) { if (!is_string($xml) || trim($xml) === '') { throw InvalidArgumentException::invalidType('non-empty string', $xml); } $entityLoader = libxml_disable_entity_loader(true); $internalErrors = libxml_use_internal_errors(true); libxml_clear_errors(); $domDocument = self::create(); $options = LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NONET; if (defined(LIBXML_COMPACT)) { $options |= LIBXML_COMPACT; } $loaded = $domDocument->loadXML($xml, $options); libxml_use_internal_errors($internalErrors); libxml_disable_entity_loader($entityLoader); if (!$loaded) { $error = libxml_get_last_error(); libxml_clear_errors(); throw new UnparseableXmlException($error); } libxml_clear_errors(); foreach ($domDocument->childNodes as $child) { if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { throw new RuntimeException('Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body'); } } return $domDocument; }
public function __construct() { //new DOM Object $this->redditDOM = new DOMDocument(); //get reddit source from transient or load current source if (false === ($reddit_body = get_transient('reddit_resp'))) { $resp = wp_remote_get('http://www.reddit.com'); $reddit_body = $resp[body]; set_transient('reddit_resp', $reddit_body, 60 * 60 * 6); } //HTML 5 doesn't include a DTD which confuses libxml so suppress errors libxml_use_internal_errors(true); $this->redditDOM->loadHTML($reddit_body); //clear libXML errors libxml_clear_errors(); libxml_use_internal_errors(false); //load headlines from reddit source $headlinestable = $this->redditDOM->getElementById("siteTable"); $this->headlines = array(); foreach ($headlinestable->childNodes as $child) { if ($child->textContent != NULL && $child->getElementsByTagName("a")->item(1)->nodeValue != "random subreddit") { array_push($this->headlines, new RedditHeadline($child)); } } usort($this->headlines, "compareHeadlines"); }
/** * Initialize DOM document */ public function initDomDocument() { if (null === ($document = $this->getDocument())) { #require_once 'Zend/Dom/Exception.php'; throw new Zend_Dom_Exception('Cannot query; no document registered'); } libxml_use_internal_errors(true); $this->_domDocument = new DOMDocument(); switch ($this->getDocumentType()) { case self::DOC_XML: $success = $this->_domDocument->loadXML($document); break; case self::DOC_HTML: case self::DOC_XHTML: default: $success = $this->_domDocument->loadHTML($document); break; } $errors = libxml_get_errors(); if (!empty($errors)) { $this->_documentErrors = $errors; libxml_clear_errors(); } libxml_use_internal_errors(false); if (!$success) { #require_once 'Zend/Dom/Exception.php'; throw new Zend_Dom_Exception(sprintf('Error parsing document (type == %s)', $this->getDocumentType())); } return $this; }
/** * Import the xml document from the stream into the repository * * @param NodeInterface $parentNode as in importXML * @param NamespaceRegistryInterface $ns as in importXML * @param string $uri as in importXML * @param integer $uuidBehavior as in importXML * * @see PHPCR\SessionInterface::importXML */ public static function importXML(NodeInterface $parentNode, NamespaceRegistryInterface $ns, $uri, $uuidBehavior) { $use_errors = libxml_use_internal_errors(true); libxml_clear_errors(); if (!file_exists($uri)) { throw new \RuntimeException("File {$uri} does not exist or is not readable"); } $xml = new FilteredXMLReader(); $xml->open($uri); if (libxml_get_errors()) { libxml_use_internal_errors($use_errors); throw new InvalidSerializedDataException("Invalid xml file {$uri}"); } $xml->read(); try { if ('node' == $xml->localName && NamespaceRegistryInterface::NAMESPACE_SV == $xml->namespaceURI) { // TODO: validate with DTD? self::importSystemView($parentNode, $ns, $xml, $uuidBehavior); } else { self::importDocumentView($parentNode, $ns, $xml, $uuidBehavior); } } catch (\Exception $e) { // restore libxml setting libxml_use_internal_errors($use_errors); // and rethrow exception to not hide it throw $e; } libxml_use_internal_errors($use_errors); }
/** * sanitize the html * * @since 2.2.0 * * @param string $html target with html * @param boolean $filter optional filter nodes * * @return string */ public function sanitize($html = null, $filter = true) { $doc = new DOMDocument(); $doc->loadHTML($html); $body = $doc->getElementsByTagName('body'); /* filter nodes */ if ($filter === true) { /* disable errors */ libxml_use_internal_errors(true); /* process tags */ foreach ($this->_htmlTags as $tag) { $node = $doc->getElementsByTagName($tag); foreach ($node as $childNode) { $childNode->parentNode->removeChild($childNode); } } /* process attributes */ foreach ($body as $node) { foreach ($node->childNodes as $childNode) { foreach ($this->_htmlAttributes as $attribute) { if ($childNode->hasAttribute($attribute)) { $childNode->removeAttribute($attribute); } } } } /* clear errors */ libxml_clear_errors(); } $output = $doc->saveHTML($body->item(0)->childNodes->item(0)); return $output; }
/** * Convert string with xml data to php array. * * @throws Exception * * @param string $string * * @return array */ public function read($string) { libxml_use_internal_errors(true); libxml_disable_entity_loader(true); $result = simplexml_load_string($string, null, LIBXML_IMPORT_FLAGS); if (!$result) { $errors = libxml_get_errors(); libxml_clear_errors(); foreach ($errors as $error) { $text = ''; switch ($error->level) { case LIBXML_ERR_WARNING: $text .= _s('XML file contains warning %1$s:', $error->code); break; case LIBXML_ERR_ERROR: $text .= _s('XML file contains error %1$s:', $error->code); break; case LIBXML_ERR_FATAL: $text .= _s('XML file contains fatal error %1$s:', $error->code); break; } $text .= trim($error->message) . ' [ Line: ' . $error->line . ' | Column: ' . $error->column . ' ]'; throw new Exception($text); } } $xml = new XMLReader(); $xml->xml($string); $array = $this->xmlToArray($xml); $xml->close(); return $array; }
function get_commitees($url) { $html = get_html($url); if ($html === false) { echo 'connection error'; } else { $oldSetting = libxml_use_internal_errors(true); libxml_clear_errors(); $dom = new DOMDocument(); $dom->loadHtml($html); $tbody = $dom->getElementsByTagName('tbody'); $trs = $tbody[0]->getElementsByTagName('tr'); $commitees = array(); foreach ($trs as $tr) { $tds = $tr->getElementsByTagName('td'); $link = $tds[0]->getElementsByTagName('a'); if ($link->length > 0) { $commitee = array('name' => $link[0]->nodeValue, 'link' => $link[0]->getAttribute('href')); } $commitees[] = $commitee; } libxml_clear_errors(); libxml_use_internal_errors($oldSetting); return $commitees; } return false; }
/** * Check a given XML file against the DRV rules * * @param string $pathToFile full path to the XML file * @return bool if there were any errors during processing */ private function is_valid_drv_file($pathToFile) { $hasErrors = false; // Enable user error handling libxml_use_internal_errors(true); $xml = new \DOMDocument(); $xml->load($pathToFile); $pathToSchema = realpath($this->get('kernel')->getRootDir() . '/Resources/drv_import/meldungen_2010.xsd'); if (!file_exists($pathToSchema)) { $message = 'Konnte DRV-Schema auf Server nicht finden!'; $this->addFlash('error', $message); $this->get('logger')->warning($message . ' Gesuchter Pfad: ' . $pathToSchema); $hasErrors = true; } if (!$hasErrors && !$xml->schemaValidate($pathToSchema)) { if (self::DRV_DEBUG) { print '<b>DOMDocument::schemaValidate() generated Errors!</b>' . "\n"; $errors = libxml_get_errors(); libxml_clear_errors(); foreach ($errors as $error) { print '<<<<<<<<<<<<<<<<<<<<<<<<<' . "\n"; print $this->libxml_display_error($error); print_r($error); print '>>>>>>>>>>>>>>>>>>>>>>>>>' . "\n"; } } else { $this->addFlash('error', 'Nur XML-Export-Dateien vom DRV sind erlaubt!'); $hasErrors = true; } } return $hasErrors; }
/** * * @param \sylma\dom\handler $doc * @param type $bXML * @return \sylma\dom\handler */ public function parseDocument(dom\handler $doc, $bXML = true) { // WARNING, XML_Document typed can cause crashes $mResult = null; $dom = $this->getControler(); if ($doc->isEmpty()) { $doc->throwException('Cannot parse empty document'); } if ($this->isEmpty()) { $this->throwException(t('Cannot parse empty template')); } $this->includeExternals(); libxml_use_internal_errors(true); $this->getProcessor()->importStylesheet($this->getDocument()); $this->retrieveErrors(); libxml_clear_errors(); if ($bXML) { $doc->getRoot(); $mResult = $this->getProcessor()->transformToDoc($doc->getDocument()); if ($mResult && $mResult->documentElement) { $mResult = $dom->create('handler', array($mResult)); } else { $this->throwException('No result on parsing'); } } else { $mResult = $this->getProcessor()->transformToXML($doc->getDocument()); } $this->retrieveErrors(); libxml_clear_errors(); libxml_use_internal_errors(false); $dom->addStat('parse', array($this, $doc)); return $mResult; }
/** * {@inheritdoc} * * @api */ public function load($resource, $locale, $domain = 'messages') { if (!stream_is_local($resource)) { throw new InvalidResourceException(sprintf('This is not a local file "%s".', $resource)); } if (!file_exists($resource)) { throw new NotFoundResourceException(sprintf('File "%s" not found.', $resource)); } try { $dom = XmlUtils::loadFile($resource); } catch (\InvalidArgumentException $e) { throw new InvalidResourceException(sprintf('Unable to load "%s".', $resource), $e->getCode(), $e); } $internalErrors = libxml_use_internal_errors(true); libxml_clear_errors(); $xpath = new \DOMXPath($dom); $nodes = $xpath->evaluate('//TS/context/name[text()="' . $domain . '"]'); $catalogue = new MessageCatalogue($locale); if ($nodes->length == 1) { $translations = $nodes->item(0)->nextSibling->parentNode->parentNode->getElementsByTagName('message'); foreach ($translations as $translation) { $translationValue = (string) $translation->getElementsByTagName('translation')->item(0)->nodeValue; if (!empty($translationValue)) { $catalogue->set((string) $translation->getElementsByTagName('source')->item(0)->nodeValue, $translationValue, $domain); } $translation = $translation->nextSibling; } $catalogue->addResource(new FileResource($resource)); } libxml_use_internal_errors($internalErrors); return $catalogue; }
public function getJobs() { $jobs = []; $tz = new \DateTimeZone('Europe/London'); foreach ($this->getRss()->channel->item as $job) { $jobClass = new JobModel(); $explodedTitle = explode(':', (string) $job->title); $jobClass->applyurl = (string) $job->guid; $jobClass->position = (string) (count($explodedTitle) > 1) ? trim($explodedTitle[1]) : trim($job->title); $jobClass->dateadded = (string) (new \DateTime($job->pubDate))->setTimezone($tz)->format('Y-m-d H:i:s'); $jobClass->description = (string) $job->description; $jobClass->sourceid = self::SOURCE_ID; $jobClass->company->name = trim($explodedTitle[0]); $doc = new \DOMDocument(); libxml_use_internal_errors(true); $doc->loadHTML(file_get_contents($jobClass->applyurl)); libxml_clear_errors(); $xpath = new \DOMXpath($doc); $elements = $xpath->query("//li[@class='twitter']"); $jobClass->company->twitter = $elements->length > 0 ? str_replace('@', '', trim($elements->item(0)->textContent)) : ''; $jobClass->company->logo = ''; $jobs[] = $jobClass; } return $jobs; }
/** * Returns array of simple xml objects, where key is a handle name * * @return SimpleXmlElement[] * @throws RuntimeException in case of load error (malformed xml, etc) */ public function load() { $this->validate(); $original = libxml_use_internal_errors(true); $simpleXmlElement = simplexml_load_file($this->filePath); $errors = libxml_get_errors(); libxml_clear_errors(); libxml_use_internal_errors($original); if ($simpleXmlElement === false) { $messages = array(); foreach ($errors as $error) { $messages[] = sprintf('%s, line %s, column %s', trim($error->message), $error->line, $error->column); } throw new RuntimeException(sprintf('File "%s" has a malformed xml structure: %s', $this->filePath, PHP_EOL . implode(PHP_EOL, $messages))); } $stringXml = array(); // First convert all elements to string, // as in xml file can be multiple string with the same handle names foreach ($simpleXmlElement->children() as $key => $element) { if (!isset($stringXml[$key])) { $stringXml[$key] = ''; } foreach ($element->children() as $child) { $stringXml[$key] .= $child->asXml(); } } $result = array(); foreach ($stringXml as $key => $xml) { $result[$key] = simplexml_load_string(sprintf('<%1$s>%2$s</%1$s>', $key, $xml)); } return $result; }
public function loadLocalisationPack($file, $selection, $install_mode = false, $iso_localization_pack = null) { if (!($xml = @simplexml_load_string($file))) { return false; } libxml_clear_errors(); $main_attributes = $xml->attributes(); $this->name = (string) $main_attributes['name']; $this->version = (string) $main_attributes['version']; if ($iso_localization_pack) { $id_country = (int) Country::getByIso($iso_localization_pack); if ($id_country) { $country = new Country($id_country); } if (!$id_country || !Validate::isLoadedObject($country)) { $this->_errors[] = Tools::displayError(sprintf('Cannot load country : %1d', $id_country)); return false; } if (!$country->active) { $country->active = 1; if (!$country->update()) { $this->_errors[] = Tools::displayError(sprintf('Cannot enable the associated country: %1s', $country->name)); } } } $res = true; if (empty($selection)) { $res &= $this->_installStates($xml); $res &= $this->_installTaxes($xml); $res &= $this->_installCurrencies($xml, $install_mode); $res &= $this->installConfiguration($xml); $res &= $this->installModules($xml); $res &= $this->updateDefaultGroupDisplayMethod($xml); if (($res || $install_mode) && isset($this->iso_code_lang)) { if (!($id_lang = (int) Language::getIdByIso($this->iso_code_lang, true))) { $id_lang = 1; } if (!$install_mode) { Configuration::updateValue('PS_LANG_DEFAULT', $id_lang); } } elseif (!isset($this->iso_code_lang) && $install_mode) { $id_lang = 1; } if (!Language::isInstalled(Language::getIsoById($id_lang))) { $res &= $this->_installLanguages($xml, $install_mode); $res &= $this->_installUnits($xml); } if ($install_mode && $res && isset($this->iso_currency)) { Cache::clean('Currency::getIdByIsoCode_*'); $res &= Configuration::updateValue('PS_CURRENCY_DEFAULT', (int) Currency::getIdByIsoCode($this->iso_currency)); Currency::refreshCurrencies(); } } else { foreach ($selection as $selected) { // No need to specify the install_mode because if the selection mode is used, then it's not the install $res &= Validate::isLocalizationPackSelection($selected) ? $this->{'_install' . $selected}($xml) : false; } } return $res; }
/** * Load the videos from a specified page. Is partly recursive. * * @param $url * * @return array */ public function _retrieve_videos($url) { $body = wp_remote_retrieve_body(wp_safe_remote_get($url)); if ('' === $body) { return false; } $dom = new DOMDocument(); libxml_use_internal_errors(true); $dom->loadHTML($body); libxml_clear_errors(); $finder = new DOMXPath($dom); $videos = $finder->query('//*[contains(@class, "video-list")]/li'); $older_videos = $finder->query('//*[contains(@class, "nav-previous")]/a'); $data = array('videos' => '', 'total_videos' => $videos->length); /** @var $reply \DOMNode */ foreach ($videos as $video) { $img = $finder->query('*[contains(@class, "video-thumbnail")]/img', $video)->item(0)->getAttribute('src'); $a_text = $finder->query('*[contains(@class, "video-description")]/h4/a', $video)->item(0)->nodeValue; $a_href = $finder->query('*[contains(@class, "video-description")]/h4/a', $video)->item(0)->getAttribute('href'); $event = $finder->query('*[contains(@class, "video-description")]/*[contains(@class, "video-events")]/a', $video)->item(0)->nodeValue; $description = $finder->query('*[contains(@class, "video-description")]/*[contains(@class, "video-excerpt")]/p', $video)->item(0)->nodeValue; preg_match('/^((?:\\S+\\s+){2}\\S+).*/', $description, $matches); $description = str_replace('—', '–', $description); $date = new DateTime($matches[1]); $data['videos'][] = array('title' => $a_text, 'date' => $date->format('Y-m-d'), 'url' => $a_href, 'image' => $img, 'event' => $event, 'description' => $description); } if ($older_videos->length) { $more_videos = $this->_retrieve_videos($older_videos->item(0)->getAttribute('href')); $data['videos'] = array_merge($data['videos'], $more_videos['videos']); $data['total_videos'] += $more_videos['total_videos']; } return $data; }
public function validate($xml,$schema) { // Enable user error handling libxml_use_internal_errors(true); try { if(empty($xml)) { throw new Exception("You provided an empty XML string"); } $doc = DOMDocument::loadXML($xml); if(!($doc instanceof DOMDocument)){ $this->_errors = libxml_get_errors(); } if(!@$doc->schemaValidate($schema)){ $this->_errors = libxml_get_errors(); } } catch (Exception $e) { $this->_errors = array(0 => array('message'=>$e->getMessage())); } // Disable user error handling & Error Cleanup libxml_use_internal_errors(false); libxml_clear_errors(); // If there are no errors, assume that it is all OK! return empty($this->_errors); }
/** * Validate XML to be valid for import * @param string $xml * @param WP_Error[optional] $errors * @return bool Validation status */ public static function validateXml(&$xml, $errors = NULL) { if (FALSE === $xml or '' == $xml) { $errors and $errors->add('form-validation', __('WP All Import can\'t read your file.<br/><br/>Probably, you are trying to import an invalid XML feed. Try opening the XML feed in a web browser (Google Chrome is recommended for opening XML files) to see if there is an error message.<br/>Alternatively, run the feed through a validator: http://validator.w3.org/<br/>99% of the time, the reason for this error is because your XML feed isn\'t valid.<br/>If you are 100% sure you are importing a valid XML feed, please contact WP All Import support.', 'wp_all_import_plugin')); } else { PMXI_Import_Record::preprocessXml($xml); if (function_exists('simplexml_load_string')) { libxml_use_internal_errors(true); libxml_clear_errors(); $_x = @simplexml_load_string($xml); $xml_errors = libxml_get_errors(); libxml_clear_errors(); if ($xml_errors) { $error_msg = '<strong>' . __('Invalid XML', 'wp_all_import_plugin') . '</strong><ul>'; foreach ($xml_errors as $error) { $error_msg .= '<li>'; $error_msg .= __('Line', 'wp_all_import_plugin') . ' ' . $error->line . ', '; $error_msg .= __('Column', 'wp_all_import_plugin') . ' ' . $error->column . ', '; $error_msg .= __('Code', 'wp_all_import_plugin') . ' ' . $error->code . ': '; $error_msg .= '<em>' . trim(esc_html($error->message)) . '</em>'; $error_msg .= '</li>'; } $error_msg .= '</ul>'; $errors and $errors->add('form-validation', $error_msg); } else { return true; } } else { $errors and $errors->add('form-validation', __('Required PHP components are missing.', 'wp_all_import_plugin')); $errors and $errors->add('form-validation', __('WP All Import requires the SimpleXML PHP module to be installed. This is a standard feature of PHP, and is necessary for WP All Import to read the files you are trying to import.<br/>Please contact your web hosting provider and ask them to install and activate the SimpleXML PHP module.', 'wp_all_import_plugin')); } } return false; }
/** * Strips blacklisted tags and attributes from content. * * See following for blacklist: * https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#html-tags */ public static function strip($content) { if (empty($content)) { return $content; } $blacklisted_tags = self::get_blacklisted_tags(); $blacklisted_attributes = self::get_blacklisted_attributes(); $blacklisted_protocols = self::get_blacklisted_protocols(); $libxml_previous_state = libxml_use_internal_errors(true); $dom = new DOMDocument(); // Wrap in dummy tags, since XML needs one parent node. // It also makes it easier to loop through nodes. // We can later use this to extract our nodes. $result = $dom->loadHTML('<html><body>' . $content . '</body></html>'); libxml_clear_errors(); libxml_use_internal_errors($libxml_previous_state); if (!$result) { return $content; } $body = $dom->getElementsByTagName('body')->item(0); self::strip_tags($body, $blacklisted_tags); self::strip_attributes_recursive($body, $blacklisted_attributes, $blacklisted_protocols); // Only want children of the body tag, since we have a subset of HTML. $out = ''; foreach ($body->childNodes as $node) { $out .= $dom->saveXML($node); } return $out; }
public function runTest() { libxml_use_internal_errors(true); $xml = XMLReader::open(join(DIRECTORY_SEPARATOR, array($this->directory, $this->fileName))); $xml->setSchema(join(DIRECTORY_SEPARATOR, array($this->directory, $this->xsdFilename))); $this->logger->trace(__METHOD__); $this->logger->info(' XML file to test validity is ' . $this->fileName . 'using XSD file ' . $this->xsdFilename); // You have to parse the XML-file if you want it to be validated $currentReadCount = 1; $validationFailed = false; while ($xml->read() && $validationFailed == false) { // I want to break as soon as file is shown not to be valid // We could allow it to collect a few messages, but I think it's best // to do a manual check once we have discovered the file is not // correct. Speed is really what we want here! if ($currentReadCount++ % Constants::XML_PROCESSESING_CHECK_ERROR_COUNT == 0) { if (count(libxml_get_errors()) > 0) { $validationFailed = true; } } } if (count(libxml_get_errors()) == 0) { $this->testProperty->addTestResult(true); $this->logger->info(' RESULT Validation of [' . $this->fileName . '] against [' . $this->xsdFilename . '] succeeded'); $this->testProperty->addTestResultDescription('Validation of [' . $this->fileName . '] against [' . $this->xsdFilename . '] succeeded'); $this->testProperty->addTestResultReportDescription('Filen ' . $this->fileName . ' validerer mot filen' . $this->xsdFilename); } else { $this->testProperty->addTestResult(false); $this->logger->error(' RESULT Validation of [' . $this->fileName . '] against [' . $this->xsdFilename . '] failed'); $this->testProperty->addTestResultDescription('Validation of [' . $this->fileName . '] against [' . $this->xsdFilename . '] failed'); $this->testProperty->addTestResultReportDescription('Filen ' . $this->fileName . ' validerer ikke mot filen' . $this->xsdFilename); } libxml_clear_errors(); }
/** * This function attempts to validate an XML string against the specified schema. * * It will parse the string into a DOM document and validate this document against the schema. * * @param string $xml The XML string or document which should be validated. * @param string $schema The schema filename which should be used. * @param boolean $debug To disable/enable the debug mode * * @return string | DOMDocument $dom string that explains the problem or the DOMDocument */ public static function validateXML($xml, $schema, $debug = false) { assert('is_string($xml) || $xml instanceof DOMDocument'); assert('is_string($schema)'); libxml_clear_errors(); libxml_use_internal_errors(true); if ($xml instanceof DOMDocument) { $dom = $xml; } else { $dom = new DOMDocument(); $dom = self::loadXML($dom, $xml); if (!$dom) { return 'unloaded_xml'; } } $schemaFile = dirname(__FILE__) . '/schemas/' . $schema; $oldEntityLoader = libxml_disable_entity_loader(false); $res = $dom->schemaValidate($schemaFile); libxml_disable_entity_loader($oldEntityLoader); if (!$res) { $xmlErrors = libxml_get_errors(); syslog(LOG_INFO, 'Error validating the metadata: ' . var_export($xmlErrors, true)); if ($debug) { foreach ($xmlErrors as $error) { echo $error->message . "\n"; } } return 'invalid_xml'; } return $dom; }
/** * @inheritdoc */ protected function doLoad($file) { libxml_use_internal_errors(true); $xml = new \DOMDocument(); $xml->load($file); if (!$xml->schemaValidate(__DIR__ . DIRECTORY_SEPARATOR . "XML" . DIRECTORY_SEPARATOR . "configuration.xsd")) { libxml_clear_errors(); throw MigrationException::configurationNotValid('XML configuration did not pass the validation test.'); } $xml = simplexml_load_file($file, "SimpleXMLElement", LIBXML_NOCDATA); $config = []; if (isset($xml->name)) { $config['name'] = (string) $xml->name; } if (isset($xml->table['name'])) { $config['table_name'] = (string) $xml->table['name']; } if (isset($xml->table['column'])) { $config['column_name'] = (string) $xml->table['column']; } if (isset($xml->{'migrations-namespace'})) { $config['migrations_namespace'] = (string) $xml->{'migrations-namespace'}; } if (isset($xml->{'organize-migrations'})) { $config['organize_migrations'] = $xml->{'organize-migrations'}; } if (isset($xml->{'migrations-directory'})) { $config['migrations_directory'] = $this->getDirectoryRelativeToFile($file, (string) $xml->{'migrations-directory'}); } if (isset($xml->migrations->migration)) { $config['migrations'] = $xml->migrations->migration; } $this->setConfiguration($config); }
/** * Create an Array from XML * * This method sets up the SimpleXMLIterator and starts the parsing * of an xml body to iterate through it and transform it into * an array that can be used by the developers. * * @param string $xml * @return array An array mapped to the passed xml */ public static function arrayFromXml($xml) { // replace namespace defs $xml = str_replace('xmlns=', 'ns=', $xml); // catch libxml errors libxml_use_internal_errors(true); try { $iterator = new SimpleXMLIterator($xml); } catch(Exception $e) { $xmlErrors = libxml_get_errors(); return new Frapi_Exception( 'Xml Parsing Failed', 'INVALID_XML', 400, 'xml_parsing' ); libxml_clear_errors(); } $xmlRoot = $iterator->getName(); $type = $iterator->attributes()->type; // SimpleXML provides the root information on construct self::$_xmlRoot = $iterator->getName(); self::$_responseType = $type; // return the mapped array with the root element as the header return array($xmlRoot => self::_iteratorToArray($iterator)); }