function _testXPath($xpath_expression) { if (!class_exists('DOMDocument') || !class_exists('DOMXPath')) { if (function_exists('domxml_open_mem')) { $dom = domxml_open_mem($this->_response); if (!$dom) { $this->fail('Error parsing doc'); return false; } var_dump($dom); $xpath = $dom->xpath_init(); var_dump($xpath); $ctx = $dom->xpath_new_context(); var_dump($xpath_expression); $result = $ctx->xpath_eval($xpath_expression); var_dump($result); $return = new stdClass(); $return->length = count($result->nodeset); return $return; } $this->fail('No xpath support built in'); return false; } else { if (extension_loaded('domxml')) { $this->fail('Please disable the domxml extension. Only php5 builtin domxml is supported'); return false; } } $dom = new DOMDocument(); $dom->loadHtml($this->_response); $xpath = new DOMXPath($dom); $node = $xpath->query($xpath_expression); return $node; }
function process(&$article) { $owner_uid = $article["owner_uid"]; if (strpos($article["guid"], "bunicomic.com") !== FALSE || strpos($article["guid"], "buttersafe.com") !== FALSE || strpos($article["guid"], "whompcomic.com") !== FALSE || strpos($article["guid"], "extrafabulouscomics.com") !== FALSE || strpos($article["guid"], "happyjar.com") !== FALSE || strpos($article["guid"], "csectioncomics.com") !== FALSE) { if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) { // lol at people who block clients by user agent // oh noes my ad revenue Q_Q $res = fetch_file_contents($article["link"], false, false, false, false, false, 0, "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"); $doc = new DOMDocument(); @$doc->loadHTML($res); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $basenode = $xpath->query('//div[@id="comic"]')->item(0); if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } return true; } return false; }
/** * Constructs a new object object from DOM Document. * * @param DomDocument $dom the ReST fragment for this object */ public function __construct(DomDocument $dom) { $xpath = new DOMXPath($dom); $result = $xpath->query('//result/weblog'); if ($result->length == 1) { $this->_weblog = new Zend_Service_Technorati_Weblog($result->item(0)); } else { // follow the same behavior of blogPostTags // and raise an Exception if the URL is not a valid weblog throw new Zend_Service_Technorati_Exception( "Your URL is not a recognized Technorati weblog"); } $result = $xpath->query('//result/url/text()'); if ($result->length == 1) { try { // fetched URL often doens't include schema // and this issue causes the following line to fail $this->_url = Zend_Service_Technorati_Utils::normalizeUriHttp($result->item(0)->data); } catch(Zend_Service_Technorati_Exception $e) { if ($this->getWeblog() instanceof Zend_Service_Technorati_Weblog) { $this->_url = $this->getWeblog()->getUrl(); } } } $result = $xpath->query('//result/inboundblogs/text()'); if ($result->length == 1) $this->_inboundBlogs = (int) $result->item(0)->data; $result = $xpath->query('//result/inboundlinks/text()'); if ($result->length == 1) $this->_inboundLinks = (int) $result->item(0)->data; }
function generate() { parent::generate(); $xpath = new DOMXPath($this->_doc); $this->appendLine('<?php'); $this->appendLine('require_once("KalturaClientBase.php");'); $this->appendLine(''); // enumes $enumNodes = $xpath->query("/xml/enums/enum"); foreach ($enumNodes as $enumNode) { $this->writeEnum($enumNode); } // classes $classNodes = $xpath->query("/xml/classes/class"); foreach ($classNodes as $classNode) { $this->writeClass($classNode); } $serviceNodes = $xpath->query("/xml/services/service"); foreach ($serviceNodes as $serviceNode) { $this->writeService($serviceNode); } $this->appendLine(); $this->writeMainClient($serviceNodes); $this->appendLine(); $this->addFile("KalturaClient.php", $this->getTextBlock()); }
/** * Constructs a new object from DOM Element. * * @param DomElement $dom the ReST fragment for this object */ public function __construct(DomElement $dom) { $xpath = new DOMXPath($dom->ownerDocument); $result = $xpath->query('./firstname/text()', $dom); if ($result->length == 1) { $this->setFirstName($result->item(0)->data); } $result = $xpath->query('./lastname/text()', $dom); if ($result->length == 1) { $this->setLastName($result->item(0)->data); } $result = $xpath->query('./username/text()', $dom); if ($result->length == 1) { $this->setUsername($result->item(0)->data); } $result = $xpath->query('./description/text()', $dom); if ($result->length == 1) { $this->setDescription($result->item(0)->data); } $result = $xpath->query('./bio/text()', $dom); if ($result->length == 1) { $this->setBio($result->item(0)->data); } $result = $xpath->query('./thumbnailpicture/text()', $dom); if ($result->length == 1) { $this->setThumbnailPicture($result->item(0)->data); } }
function parseResinImprFile($file_path) { global $hostDict; $doc = new DOMDocument(); @$doc->loadHTMLFile($file_path); $query = "//table[1]/tr[@class != 'first']/td[position() < 3]"; $xpath = new DOMXPath($doc); $entries = $xpath->query($query); $index = 0; $key = ""; $value = ""; foreach ($entries as $entry) { if ($index % 2 == 0) { $key = $entry->nodeValue; } else { $value = $entry->nodeValue; } if ($index != 0 and $index % 2 == 1) { if ($key == "AD_EXCHANGE.bidResult") { # 还不能区分bid 和 bidResult, 所以加到一起 $key = "AD_EXCHANGE.bid"; } if (array_key_exists($key, $hostDict) && !in_array($value, $hostDict[$key])) { array_push($hostDict[$key], $value); } } $index++; } }
/** * Convert dom node tree to array * * @param \DOMDocument $source * @return array * @throws \InvalidArgumentException */ public function convert($source) { $output = []; $xpath = new \DOMXPath($source); $indexers = $xpath->evaluate('/config/indexer'); /** @var $typeNode \DOMNode */ foreach ($indexers as $indexerNode) { $data = []; $indexerId = $this->getAttributeValue($indexerNode, 'id'); $data['indexer_id'] = $indexerId; $data['primary'] = $this->getAttributeValue($indexerNode, 'primary'); $data['view_id'] = $this->getAttributeValue($indexerNode, 'view_id'); $data['action_class'] = $this->getAttributeValue($indexerNode, 'class'); $data['title'] = ''; $data['description'] = ''; /** @var $childNode \DOMNode */ foreach ($indexerNode->childNodes as $childNode) { if ($childNode->nodeType != XML_ELEMENT_NODE) { continue; } /** @var $childNode \DOMElement */ $data = $this->convertChild($childNode, $data); } $output[$indexerId] = $data; } return $output; }
/** * Unserializes the property. * * This static method should return a an instance of this object. * * @param \DOMElement $prop * @param array $propertyMap * @return DAV\IProperty */ static function unserialize(\DOMElement $prop, array $propertyMap) { $xpath = new \DOMXPath($prop->ownerDocument); $xpath->registerNamespace('d', 'urn:DAV'); // Finding the 'response' element $xResponses = $xpath->evaluate('d:response', $prop); $result = []; for ($jj = 0; $jj < $xResponses->length; $jj++) { $xResponse = $xResponses->item($jj); // Parsing 'href' $href = Href::unserialize($xResponse, $propertyMap); $properties = []; // Parsing 'status' in 'd:response' $responseStatus = $xpath->evaluate('string(d:status)', $xResponse); if ($responseStatus) { list(, $responseStatus, ) = explode(' ', $responseStatus, 3); } // Parsing 'propstat' $xPropstat = $xpath->query('d:propstat', $xResponse); for ($ii = 0; $ii < $xPropstat->length; $ii++) { // Parsing 'status' $status = $xpath->evaluate('string(d:status)', $xPropstat->item($ii)); list(, $statusCode, ) = explode(' ', $status, 3); $usedPropertyMap = $statusCode == '200' ? $propertyMap : []; // Parsing 'prop' $properties[$statusCode] = DAV\XMLUtil::parseProperties($xPropstat->item($ii), $usedPropertyMap); } $result[] = new Response($href->getHref(), $properties, $responseStatus ? $responseStatus : null); } return new self($result); }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url); // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $doc = new DOMDocument(); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Photo -- skip logo if it was used in place of photo if (strpos($recipe->photo_url, "FDC_Logo_vertical.png") !== false || strpos($recipe->photo_url, "FDC_share-logo.png") !== false) { $recipe->photo_url = ''; } if ($recipe->photo_url) { $recipe->photo_url = str_replace("/thumbs/", "/large/", $recipe->photo_url); } // Yield $yield = ''; $nodes = $xpath->query('//*[@class="yield"]'); // Find as 'yield' if ($nodes->length) { $line = $nodes->item(0)->nodeValue; $line = RecipeParser_Text::formatYield($line); $recipe->yield = $line; // Or as number of 'servings' } else { $nodes = $xpath->query('//*[@class="servings"]//*[@class="value"]'); if ($nodes->length) { $line = $nodes->item(0)->nodeValue; $line = RecipeParser_Text::formatYield($line); $recipe->yield = $line; } } return $recipe; }
/** * addEntriesFromFeed adds all the entries from a zotero api feed to the collection * @param $feed either a DOMDocument or string xml of the feed */ public function addEntriesFromFeed($feed) { if (is_string($feed)) { $dom = new DOMDocument(); //cleanup GET param separators in the links in the feed $feed = str_replace('&', '&', $feed); $dom->loadXML($feed); } else { if (get_class($feed) == 'DOMDocument') { $dom = $feed; //$newFeedNode = $dom->importNode($feed, true); //$dom->appendChild($newFeedNode); } else { throw new Exception('Entry must be either an XML string or an ATOM feed DOMNode'); } } $xpath = new DOMXPath($dom); $xpath->registerNamespace('zxfer', 'http://zotero.org/ns/transfer'); $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); $entryNodes = $xpath->query('//atom:entry'); for ($i = 0; $i < $entryNodes->length; $i++) { $newEntry = new phpZoteroEntry($entryNodes->item($i)); $this->entries[$newEntry->itemUri] = $newEntry; } }
/** * load all app files for given path and build.xml document * * @param string $path app base path * @param DOMDocument $build build.xml * @return boolean */ private function _doBuild($path, DOMDocument $build) { $Classes = Classes::get(); $x = new DOMXPath($build); $app = $x->query('/build/app'); if ($app->length != 1) { throw new PException('App error!'); } $app = $app->item(0); if (!$app->hasAttribute('name')) { throw new PException('App name error!'); } $this->_apps[$app->getAttribute('name')] = $build; $files = $x->query('/build/files/file'); foreach ($files as $file) { if ($file->hasAttribute('class')) { $Classes->addClass($file->getAttribute('class'), $path . $file->nodeValue); continue; } if ($file->hasAttribute('include')) { if (!file_exists($path . $file->nodeValue)) { continue; } $this->_includes[] = $path . $file->nodeValue; continue; } } return true; }
/** * Handle a node * * Handle / transform a given node, and return the result of the * conversion. * * @param ezcDocumentElementVisitorConverter $converter * @param DOMElement $node * @param mixed $root * @return mixed */ public function handle(ezcDocumentElementVisitorConverter $converter, DOMElement $node, $root) { $quote = $root->ownerDocument->createElement('blockquote'); // Locate optional attribution elements, and transform them below the // recursive quote visiting. $xpath = new DOMXPath($node->ownerDocument); $attributionNodes = $xpath->query('*[local-name() = "attribution"]', $node); $attributions = array(); foreach ($attributionNodes as $attribution) { $attributions[] = $attribution->cloneNode(true); $attribution->parentNode->removeChild($attribution); } // Recursively decorate blockquote, after all attribution nodes are // removed $quote = $converter->visitChildren($node, $quote); $root->appendChild($quote); // Append attribution nodes, if any foreach ($attributions as $attribution) { $div = $root->ownerDocument->createElement('div'); $div->setAttribute('class', 'attribution'); $quote->appendChild($div); $cite = $root->ownerDocument->createElement('cite', htmlspecialchars($attribution->textContent)); $div->appendChild($cite); } return $root; }
/** * @param \DOMNode $node * @param string $selector * @return \DOMNodeList */ public function findAll($node, $selector) { $domXPath = new \DOMXPath($node->ownerDocument); $converter = new CssSelectorConverter(); $xpath = $converter->toXPath($selector); return $domXPath->query($xpath, $node); }
public function apply(KalturaRelatedFilter $filter, KalturaObject $parentObject) { $filterProperty = $this->filterProperty; $parentProperty = $this->parentProperty; KalturaLog::debug("Mapping XPath {$parentProperty} to " . get_class($filter) . "::{$filterProperty}"); if (!$parentObject instanceof KalturaMetadata) { throw new KalturaAPIException(KalturaErrors::INVALID_OBJECT_TYPE, get_class($parentObject)); } if (!property_exists($filter, $filterProperty)) { throw new KalturaAPIException(KalturaErrors::PROPERTY_IS_NOT_DEFINED, $filterProperty, get_class($filter)); } $xml = $parentObject->xml; $doc = new KDOMDocument(); $doc->loadXML($xml); $xpath = new DOMXPath($doc); $metadataElements = $xpath->query($parentProperty); if ($metadataElements->length == 1) { $filter->{$filterProperty} = $metadataElements->item(0)->nodeValue; } elseif ($metadataElements->length > 1) { $values = array(); foreach ($metadataElements as $element) { $values[] = $element->nodeValue; } $filter->{$filterProperty} = implode(',', $values); } elseif (!$this->allowNull) { return false; } return true; }
public function patch($version, \DOMDocument $domct, \DOMDocument $domth, Connection $connbas, \unicode $unicode) { if ($version == "") { $th = $domth->documentElement; $ct = $domct->documentElement; $th->setAttribute("id", "0"); $xp = new DOMXPath($domth); $te = $xp->query("/thesaurus/te"); if ($te->length > 0) { $te0 = $te->item(0); $th->setAttribute("nextid", $te0->getAttribute("nextid")); $te = $xp->query("te", $te0); $te1 = []; for ($i = 0; $i < $te->length; $i++) { $te1[] = $te->item($i); } foreach ($te1 as $tei) { $th->appendChild($tei); $this->fixThesaurus2($domth, $tei, 0, $unicode); } $te0->parentNode->removeChild($te0); } $ct->setAttribute("version", $version = "2.0.0"); $th->setAttribute("version", "2.0.0"); $th->setAttribute("creation_date", $now = date("YmdHis")); $th->setAttribute("modification_date", $now); $version = "2.0.0"; } return $version; }
/** * Returns array, containing detailed results for any Google search. * * @access private * @param string $query String, containing the search query. * @param string $tld String, containing the desired Google top level domain. * @return array Returns array, containing the keys 'URL', 'Title' and 'Description'. */ public static function googleArray($query) { $result = array(); $pages = 1; $delay = 0; for ($start = 0; $start < $pages; $start++) { $url = 'http://www.google.' . GOOGLE_TLD . '/custom?q=' . $query . '&filter=0' . '&num=100' . ($start == 0 ? '' : '&start=' . $start . '00'); $str = SEOstats::cURL($url); if (preg_match("#answer=86640#i", $str)) { $e = 'Please read: http://www.google.com/support/websearch/' . 'bin/answer.py?&answer=86640&hl=en'; throw new SEOstatsException($e); } else { $html = new DOMDocument(); @$html->loadHtml($str); $xpath = new DOMXPath($html); $links = $xpath->query("//div[@class='g']//a"); $descs = $xpath->query("//td[@class='j']//div[@class='std']"); $i = 0; foreach ($links as $link) { if (!preg_match('#cache#si', $link->textContent) && !preg_match('#similar#si', $link->textContent)) { $result[] = array('url' => $link->getAttribute('href'), 'title' => utf8_decode($link->textContent), 'descr' => utf8_decode($descs->item($i)->textContent)); $i++; } } if (preg_match('#<div id="nn"><\\/div>#i', $str) || preg_match('#<div id=nn><\\/div>#i', $str)) { $pages += 1; $delay += 200000; usleep($delay); } else { $pages -= 1; } } } return $result; }
/** * Exclude some html parts by class inside content wrapped with TYPO3SEARCH_begin and TYPO3SEARCH_end * markers. * * @param string $indexableContent HTML markup * @return string HTML */ public function excludeContentByClass($indexableContent) { if (empty(trim($indexableContent))) { return html_entity_decode($indexableContent); } $excludeClasses = $this->getConfiguration()->getIndexQueuePagesExcludeContentByClassArray(); if (count($excludeClasses) === 0) { return html_entity_decode($indexableContent); } $isInContent = Util::containsOneOfTheStrings($indexableContent, $excludeClasses); if (!$isInContent) { return html_entity_decode($indexableContent); } $doc = new \DOMDocument('1.0', 'UTF-8'); libxml_use_internal_errors(true); $doc->loadHTML('<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL . $indexableContent); $xpath = new \DOMXPath($doc); foreach ($excludeClasses as $excludePart) { $elements = $xpath->query("//*[contains(@class,'" . $excludePart . "')]"); if (count($elements) == 0) { continue; } foreach ($elements as $element) { $element->parentNode->removeChild($element); } } $html = $doc->saveHTML($doc->documentElement->parentNode); // remove XML-Preamble, newlines and doctype $html = preg_replace('/(<\\?xml[^>]+\\?>|\\r?\\n|<!DOCTYPE.+?>)/imS', '', $html); $html = str_replace(array('<html>', '</html>', '<body>', '</body>'), array('', '', '', ''), $html); return $html; }
function ConsultarCEP($cep) { $url = 'http://www.buscacep.correios.com.br/sistemas/buscacep/resultadoBuscaCepEndereco.cfm'; $fields = array('relaxation' => urlencode(intval($cep)), 'tipoCEP' => urlencode('ALL'), 'semelhante' => urlencode('N')); $fields_string = ''; foreach ($fields as $key => $value) { $fields_string .= $key . '=' . $value . '&'; } rtrim($fields_string, '&'); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_POST, count($fields)); curl_setopt($ch, CURLOPT_POSTFIELDS, $fields_string); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); $result = utf8_decode(curl_exec($ch)); curl_close($ch); $doc = new DOMDocument(); $doc->preserveWhiteSpace = false; $doc->strictErrorChecking = false; $doc->recover = true; $doc->loadHTML(mb_convert_encoding($result, 'HTML-ENTITIES', 'UTF-8')); $xpath = new DOMXPath($doc); $query = "//table[@class='tmptabela']//td"; $entries = $xpath->query($query); $uf = explode('/', $entries->item(2)->nodeValue)[1]; $cidade = explode('/', $entries->item(2)->nodeValue)[0]; $bairro = substr($entries->item(1)->nodeValue, 0, -2); $logradouro = substr($entries->item(0)->nodeValue, 0, -2); $return = array('uf' => trim($uf), 'cidade' => trim($cidade), 'bairro' => trim($bairro), 'logradouro' => trim($logradouro)); if (!empty($return)) { return $return; } else { return false; } }
protected function parseSpecificContributions(&$contribNode) { parent::parseSpecificContributions($contribNode); if ($contribNode->nodeName != "actions") { return; } $actionXpath = new DOMXPath($contribNode->ownerDocument); if (!isset($this->options["FTP_LOGIN_SCREEN"]) || $this->options["FTP_LOGIN_SCREEN"] != "TRUE" || $this->options["FTP_LOGIN_SCREEN"] === false) { // Remove "ftp_login" && "ftp_set_data" actions $nodeList = $actionXpath->query('action[@name="dynamic_login"]', $contribNode); if (!$nodeList->length) { return; } unset($this->actions["dynamic_login"]); $contribNode->removeChild($nodeList->item(0)); $nodeList = $actionXpath->query('action[@name="ftp_set_data"]', $contribNode); if (!$nodeList->length) { return; } unset($this->actions["ftp_set_data"]); $contribNode->removeChild($node = $nodeList->item(0)); } else { // Replace "login" by "dynamic_login" $loginList = $actionXpath->query('action[@name="login"]', $contribNode); if ($loginList->length && $loginList->item(0)->getAttribute("auth_ftp_impl") == null) { $contribNode->removeChild($loginList->item(0)); } $dynaLoginList = $actionXpath->query('action[@name="dynamic_login"]', $contribNode); if ($dynaLoginList->length) { $dynaLoginList->item(0)->setAttribute("name", "login"); $dynaLoginList->item(0)->setAttribute("auth_ftp_impl", "true"); } } }
/** * Find all return tags that contain 'self' or '$this' and replace those * terms for the name of the current class' type. * * @param \DOMDocument $xml Structure source to apply behaviour onto. * * @return \DOMDocument */ public function process(\DOMDocument $xml) { $this->log('Linking to license URLs in @license tags'); $licenseMap = array('#^\\s*(GPL|GNU General Public License)((\\s?v?|version)?2)\\s*$#i' => 'http://opensource.org/licenses/GPL-2.0', '#^\\s*(GPL|GNU General Public License)((\\s?v?|version)?3?)\\s*$#i' => 'http://opensource.org/licenses/GPL-3.0', '#^\\s*(LGPL|GNU (Lesser|Library) (General Public License|GPL))' . '((\\s?v?|version)?2(\\.1)?)\\s*$#i' => 'http://opensource.org/licenses/LGPL-2.1', '#^\\s*(LGPL|GNU (Lesser|Library) (General Public License|GPL))' . '((\\s?v?|version)?3?)\\s*$#i' => 'http://opensource.org/licenses/LGPL-3.0', '#^\\s*((new |revised |modified |three-clause |3-clause )BSD' . '( License)?)\\s*$#i' => 'http://opensource.org/licenses/BSD-3-Clause', '#^\\s*((simplified |two-clause |2-clause |Free)BSD)( License)?\\s*$#i' => 'http://opensource.org/licenses/BSD-2-Clause', '#^\\s*MIT( License)?\\s*$#i' => 'http://opensource.org/licenses/MIT'); $xpath = new \DOMXPath($xml); $nodes = $xpath->query('//tag[@name="license"]/@description'); /** @var \DOMElement $node */ foreach ($nodes as $node) { $license = $node->nodeValue; // FIXME: migrate to '#^' . PHPDOC::LINK_REGEX . '(\s+(?P<text>.+)) // ?$#u' once that const exists if (preg_match('#^(?i)\\b(?P<url>(?:https?://|www\\d{0,3}[.]|[a-z0-9.\\-]+[.]' . '[a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+' . '(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|' . '[^\\s`!()\\[\\]{};:\'".,<>?«»“”‘’]))(\\s+(?P<text>.+))?$#u', $license, $matches)) { if (!isset($matches['text']) || !$matches['text']) { // set text to URL if not present $matches['text'] = $matches['url']; } $node->parentNode->setAttribute('link', $matches['url']); $node->nodeValue = $matches['text']; // bail out early continue; } // check map if any license matches foreach ($licenseMap as $regex => $url) { if (preg_match($regex, $license, $matches)) { $node->parentNode->setAttribute('link', $url); // we're done here break; } } } return $xml; }
function hook_article_filter($article) { $owner_uid = $article["owner_uid"]; if (strpos($article["guid"], "dilbert.com") !== FALSE) { if (strpos($article["plugin_data"], "dilbert,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess... $matches = array(); foreach ($entries as $entry) { if (preg_match("/dyn\\/str_strip\\/.*zoom\\.gif\$/", $entry->getAttribute("src"), $matches)) { $entry->setAttribute("src", rewrite_relative_url("http://dilbert.com/", $matches[0])); $basenode = $entry; break; } } if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "dilbert,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } } return $article; }
function xsl_transform($filename, $xslname = null) { // Get the original XML document $xml = new DOMDocument(); $xml->load($filename); if ($xslname == null) { // extract bound stylesheet from embedded link $xp = new DOMXPath($xml); // use xpath to get the directive $pi = $xp->evaluate('/processing-instruction("xml-stylesheet")')->item(0); // extract the "data" part of it $data = $pi->data; // find out where the href starts $start = strpos($data, 'href='); // and extract the stylesheet name $xslname = XML_FOLDER . substr($data, $start + 6, -1); } // load the XSL stylesheet $xsl = new DOMDocument(); $xsl->load($xslname); // prime the transform engine $xslt = new XSLTProcessor(); $xslt->importStyleSheet($xsl); // and away we go! return $xslt->transformToXml($xml); }
/** * Constructs a new object object from DOM Document. * * @param DomDocument $dom the ReST fragment for this object */ public function __construct(DomDocument $dom) { $xpath = new DOMXPath($dom); /** * @see Zend_Service_Technorati_Author */ require_once 'Zend/Service/Technorati/Author.php'; $result = $xpath->query('//result'); if ($result->length == 1) { $this->_author = new Zend_Service_Technorati_Author($result->item(0)); } /** * @see Zend_Service_Technorati_Weblog */ require_once 'Zend/Service/Technorati/Weblog.php'; $result = $xpath->query('//item/weblog'); if ($result->length >= 1) { foreach ($result as $weblog) { $this->_weblogs[] = new Zend_Service_Technorati_Weblog($weblog); } } }
public function getNames($ids, &$names) { $result = false; $names = array(); $onlinePlayers = 0; $params = array(); $params["version"] = "2"; $params["ids"] = implode(",", $ids); if ($this->request("/eve/CharacterName.xml.aspx", $params)) { $domPath = new DOMXPath($this->document); $nodes = $domPath->query("descendant::rowset[@name='characters']/row"); foreach ($nodes as $node) { $id = $node->getAttribute("characterID"); $name = $node->getAttribute("name"); $names[$id] = $name; } if (count($ids) == count($names)) $result = true; } return $result; }
public function process(array $documents, &$context) { $document = $documents[self::URL_MEDIA]; $dom = self::getDOM($document); $xpath = new DOMXPath($dom); Database::delete('mediarelation', ['media_id' => $context->media->id]); $data = []; foreach ($xpath->query('//table[@class=\'anime_detail_related_anime\']/tr') as $node) { $typeMal = strtolower(Strings::removeSpaces($node->childNodes[0]->textContent)); $type = Strings::makeEnum($typeMal, ['adaptation' => MediaRelation::Adaptation, 'alternative setting' => MediaRelation::AlternativeSetting, 'alternative version' => MediaRelation::AlternativeVersion, 'character' => MediaRelation::Character, 'full story' => MediaRelation::FullStory, 'other' => MediaRelation::Other, 'parent story' => MediaRelation::ParentStory, 'prequel' => MediaRelation::Prequel, 'sequel' => MediaRelation::Sequel, 'side story' => MediaRelation::SideStory, 'spin-off' => MediaRelation::SpinOff, 'summary' => MediaRelation::Summary], null); if ($type === null) { throw new BadProcessorDocumentException($document, 'unknown relation type: ' . $typeMal); } $links = $node->childNodes[1]->getElementsByTagName('a'); foreach ($links as $link) { $link = $link->getAttribute('href'); if (preg_match('#^/(anime|manga)/([0-9]+)/#', $link, $matches)) { $idMal = Strings::makeInteger($matches[2]); if ($matches[1] === 'anime') { $media = Media::Anime; } elseif ($matches[1] === 'manga') { $media = Media::Manga; } $data[] = ['media_id' => $context->media->id, 'mal_id' => $idMal, 'media' => $media, 'type' => $type]; } } } Database::insert('mediarelation', $data); $context->relationData = $data; }
function addTrustedClient($ip, $desc, $appId) { $requestURI = $this->PIDS_SERVICE_BASE_URI . 'addClient'; $requestURI .= "?ip=" . $ip . "&desc=" . $desc; $requestURI .= strlen($appId) == 40 ? "&appId=" . $appId : ''; $response = file_get_contents($requestURI); $result_array = array(); if (!$response) { $result_array['errorMessages'] = "Error whilst attempting to fetch from URI: " . $this->PIDS_SERVICE_BASE_URI; } else { $responseDOMDoc = new DOMDocument(); $result = $responseDOMDoc->loadXML($response); if ($result) { $messageType = strtoupper($responseDOMDoc->getElementsByTagName("response")->item(0)->getAttribute("type")); if ($messageType == 'SUCCESS') { $xPath = new DOMXPath($responseDOMDoc); $nodeList = $xPath->query("//property[@name='appId']"); $appId = $nodeList->item(0)->getAttribute("value"); if (strlen($appId) == 40) { $result_array['app_id'] = $appId; } else { $result_array['errorMessages'] = "Could not extract appId. Status of request unknown.<br/>"; } } elseif ($messageType == 'FAILURE') { foreach ($responseDOMDoc->getElementsByTagName("response")->item(0)->getElementsByTagName("message") as $message) { $result_array['errorMessages'] = $message->nodeValue . "<br/>"; } } } else { $result_array['errorMessages'] = "Error whilst attempting to load XML response. Response could not be parsed."; } } return $result_array; }
protected function describeAnalysis(Analysis $analysis, array $options = array()) { $output = $options['output']; $xml = new \DOMDocument('1.0', 'UTF-8'); $xpath = new \DOMXPath($xml); $xml->formatOutput = true; $xml->preserveWhiteSpace = true; $pmd = $xml->createElement('pmd'); $pmd->setAttribute('timestamp', $analysis->getEndAt()->format('c')); $xml->appendChild($pmd); foreach ($analysis->getViolations() as $violation) { /** * @var $violation \SensioLabs\Insight\Sdk\Model\Violation */ $filename = $violation->getResource(); $nodes = $xpath->query(sprintf('//file[@name="%s"]', $filename)); if ($nodes->length > 0) { $node = $nodes->item(0); } else { $node = $xml->createElement('file'); $node->setAttribute('name', $filename); $pmd->appendChild($node); } $violationNode = $xml->createElement('violation', $violation->getMessage()); $node->appendChild($violationNode); $violationNode->setAttribute('beginline', $violation->getLine()); $violationNode->setAttribute('endline', $violation->getLine()); $violationNode->setAttribute('rule', $violation->getTitle()); $violationNode->setAttribute('ruleset', $violation->getCategory()); $violationNode->setAttribute('priority', $this->getPriority($violation)); } $output->writeln($xml->saveXML()); }
/** * get rendered menu and adds drop down markup * * @param string $html rendered navigation * @param string $class css class to check for adding drop down * * @return string */ public function __invoke($html, $class = 'toplevel', $toggle = false) { $domDoc = new \DOMDocument('1.0', 'utf-8'); $domDoc->loadXML('<?xml version="1.0" encoding="utf-8"?>' . $html); $xpath = new \DOMXPath($domDoc); foreach ($xpath->query('//a[starts-with(@class, "' . $class . '")]') as $item) { $result = $xpath->query('../ul', $item); if ($result->length === 1) { $ul = $result->item(0); $ul->setAttribute('class', 'dropdown-menu'); $li = $item->parentNode; $li->setAttribute('id', substr($item->getAttribute('href'), 1)); if (($existingClass = $li->getAttribute('class')) !== '') { $li->setAttribute('class', $existingClass . ' dropdown'); } else { $li->setAttribute('class', 'dropdown'); } if ($toggle) { $item->setAttribute('data-toggle', 'dropdown'); } if (($existingClass = $item->getAttribute('class')) !== '') { $item->setAttribute('class', $item->getAttribute('class') . ' dropdown-toggle'); } else { $item->setAttribute('class', 'dropdown-toggle'); } $space = $domDoc->createTextNode(' '); $item->appendChild($space); $caret = $domDoc->createElement('b', ''); $caret->setAttribute('class', 'caret'); $item->appendChild($caret); } } return $domDoc->saveXML($xpath->query('/ul')->item(0), LIBXML_NOEMPTYTAG); }
/** * Filter XHtml document * * Filter for the document, which may modify / restructure a document and * assign semantic information bits to the elements in the tree. * * @param DOMDocument $document * @return DOMDocument */ public function filter(DOMDocument $document) { $xpath = new DOMXPath($document); // Find all tables $tables = $xpath->query('//*[local-name() = "table"]'); foreach ($tables as $table) { // Ignore tables, which again contain tables, as these most // probably contain the website content somehow. if ($xpath->query('.//*[local-name() = "table"]', $table)->length > 0) { continue; } // Extract all cells from the table and check what they contain $cells = $xpath->query('.//*[local-name() = "td"] | .//*[local-name() = "th"]', $table); $cellCount = $cells->length; $cellContentCount = 0; foreach ($cells as $cell) { $cellContentCount += (int) $this->cellHasContent($cell); } // Completely remove table, if it does not meet the configured // expectations if ($cellContentCount / $cellCount < $this->threshold) { $table->parentNode->removeChild($table); continue; } // Tables with only one column are most probably also used only for // layout. We remove them, too. if ($xpath->query('.//*[local-name() = "tr"]', $table)->length >= $cellCount) { $table->parentNode->removeChild($table); continue; } } }
function hook_article_filter($article) { if (strpos($article["link"], "titanic-magazin.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove advertisement + tracking stuff $stuff = $xpath->query('(//script)|(//noscript)|(//form)|(//a[@name="form"])|(//p)|(//a[@href="newsticker.html"])'); foreach ($stuff as $removethis) { if ($removethis->localName === "p") { if ($removethis->textContent == "bezahlte Anzeige") { $removethis->parentNode->removeChild($removethis); } } else { $removethis->parentNode->removeChild($removethis); } } // now get the (cleaned) article $entries = $xpath->query('(//div[@class="tt_news-bodytext"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }