PHP AsteFeed\Parser XmlParser Examples

Programming Language: PHP

Namespace/Package Name: AsteFeed\Parser

Class/Type: XmlParser

Examples at hotexamples.com: 12

PHP AsteFeed\Parser XmlParser - 12 examples found. These are the top rated real world PHP examples of AsteFeed\Parser\XmlParser extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getXPathResult(3)

getHtmlDocument(3)

getDomDocument(2)

getSimpleXml(2)

getEncodingFromMetaTag(1)

getEncodingFromXmlTag(1)

getErrors(1)

htmlToXml(1)

Example #1

Show file

File: Html.php Project: aduroo/feed-api

 /**
  * Initialize the filter, all inputs data must be encoded in UTF-8 before.
  *
  * @param string $html    HTML content
  * @param string $website Site URL (used to build absolute URL)
  */
 public function __construct($html, $website)
 {
     $this->config = new Config();
     $this->input = XmlParser::htmlToXml($html);
     $this->output = '';
     $this->tag = new Tag($this->config);
     $this->website = $website;
     $this->attribute = new Attribute(new Url($website));
 }

Example #2

Show file

File: Import.php Project: aduroo/feed-api

 /**
  * Parse the OPML file.
  *
  * @return array|false
  */
 public function execute()
 {
     Logger::setMessage(get_called_class() . ': start importation');
     $xml = XmlParser::getSimpleXml(trim($this->content));
     if ($xml === false || $xml->getName() !== 'opml' || !isset($xml->body)) {
         Logger::setMessage(get_called_class() . ': OPML tag not found or malformed XML document');
         return false;
     }
     $this->parseEntries($xml->body);
     Logger::setMessage(get_called_class() . ': ' . count($this->items) . ' subscriptions found');
     return $this->items;
 }

Example #3

Show file

File: RuleParser.php Project: aduroo/feed-api

 /**
  * Constructor.
  *
  * @param string $html
  * @param array  $rules
  */
 public function __construct($html, array $rules)
 {
     $this->rules = $rules;
     $this->dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">' . $html);
     $this->xpath = new DOMXPath($this->dom);
 }

Example #4

Show file

File: Reader.php Project: aduroo/feed-api

 /**
  * Detect the feed format.
  *
  * @param string $content Feed content
  *
  * @return string
  */
 public function detectFormat($content)
 {
     $dom = XmlParser::getHtmlDocument($content);
     $xpath = new DOMXPath($dom);
     foreach ($this->formats as $parser_name => $query) {
         $nodes = $xpath->query($query);
         if ($nodes->length === 1) {
             return $parser_name;
         }
     }
     return '';
 }

Example #5

Show file

File: Tag.php Project: aduroo/feed-api

 /**
  * Remove script tags.
  *
  * @param string $data Input data
  *
  * @return string
  */
 public function removeBlacklistedTags($data)
 {
     $dom = XmlParser::getDomDocument($data);
     if ($dom === false) {
         return '';
     }
     $xpath = new DOMXpath($dom);
     $nodes = $xpath->query(implode(' | ', $this->tag_blacklist));
     foreach ($nodes as $node) {
         $node->parentNode->removeChild($node);
     }
     return $dom->saveXML();
 }

Example #6

Show file

File: Parser.php Project: aduroo/feed-api

 /**
  * Parse the document.
  *
  * @return \AsteFeed\Parser\Feed
  */
 public function execute()
 {
     Logger::setMessage(get_called_class() . ': begin parsing');
     $xml = XmlParser::getSimpleXml($this->content);
     if ($xml === false) {
         Logger::setMessage(get_called_class() . ': Applying XML workarounds');
         $this->content = Filter::normalizeData($this->content);
         $xml = XmlParser::getSimpleXml($this->content);
         if ($xml === false) {
             Logger::setMessage(get_called_class() . ': XML parsing error');
             Logger::setMessage(XmlParser::getErrors());
             throw new MalformedXmlException('XML parsing error');
         }
     }
     $this->used_namespaces = $xml->getNamespaces(true);
     $xml = $this->registerSupportedNamespaces($xml);
     $feed = new Feed();
     $this->findFeedUrl($xml, $feed);
     $this->checkFeedUrl($feed);
     $this->findSiteUrl($xml, $feed);
     $this->checkSiteUrl($feed);
     $this->findFeedTitle($xml, $feed);
     $this->findFeedDescription($xml, $feed);
     $this->findFeedLanguage($xml, $feed);
     $this->findFeedId($xml, $feed);
     $this->findFeedDate($xml, $feed);
     $this->findFeedLogo($xml, $feed);
     $this->findFeedIcon($xml, $feed);
     foreach ($this->getItemsTree($xml) as $entry) {
         $entry = $this->registerSupportedNamespaces($entry);
         $item = new Item();
         $item->xml = $entry;
         $item->namespaces = $this->used_namespaces;
         $this->findItemAuthor($xml, $entry, $item);
         $this->findItemUrl($entry, $item);
         $this->checkItemUrl($feed, $item);
         $this->findItemTitle($entry, $item);
         $this->findItemContent($entry, $item);
         // Id generation can use the item url/title/content (order is important)
         $this->findItemId($entry, $item, $feed);
         $this->findItemDate($entry, $item, $feed);
         $this->findItemEnclosure($entry, $item, $feed);
         $this->findItemLanguage($entry, $item, $feed);
         $this->findItemMedia($entry, $item, $feed);
         // Order is important (avoid double filtering)
         $this->filterItemContent($feed, $item);
         $this->scrapWebsite($item);
         $feed->items[] = $item;
     }
     Logger::setMessage(get_called_class() . PHP_EOL . $feed);
     return $feed;
 }

Example #7

Show file

File: Atom.php Project: aduroo/feed-api

 /**
  * Get the entry content.
  *
  * @param SimpleXMLElement $entry XML Entry
  *
  * @return string
  */
 private function getContent(SimpleXMLElement $entry)
 {
     $content = current(XmlParser::getXPathResult($entry, 'atom:content', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'content'));
     if (!empty($content) && count($content->children())) {
         $xml_string = '';
         foreach ($content->children() as $child) {
             $xml_string .= $child->asXML();
         }
         return $xml_string;
     } elseif (trim((string) $content) !== '') {
         return (string) $content;
     }
     $summary = XmlParser::getXPathResult($entry, 'atom:summary', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'summary');
     return (string) current($summary);
 }

Example #8

Show file

File: Rss10.php Project: aduroo/feed-api

 /**
  * Find the item language.
  *
  * @param SimpleXMLElement      $entry Feed item
  * @param \AsteFeed\Parser\Item $item  Item object
  * @param \AsteFeed\Parser\Feed $feed  Feed object
  */
 public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
 {
     $language = XmlParser::getXPathResult($entry, 'dc:language', $this->namespaces);
     $item->language = (string) current($language) ?: $feed->language;
 }

Example #9

Show file

File: Scraper.php Project: aduroo/feed-api

 /**
  * Normalize encoding and strip head tag.
  */
 public function prepareHtml()
 {
     $html_encoding = XmlParser::getEncodingFromMetaTag($this->html);
     $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding);
     $this->html = Filter::stripHeadTags($this->html);
     Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '" ; HTML Encoding "' . $html_encoding . '"');
 }

Example #10

Show file

File: Favicon.php Project: aduroo/feed-api

 /**
  * Extract the icon links from the HTML.
  *
  * @param string $html HTML
  *
  * @return array
  */
 public function extract($html)
 {
     $icons = array();
     if (empty($html)) {
         return $icons;
     }
     $dom = XmlParser::getHtmlDocument($html);
     $xpath = new DOMXpath($dom);
     $elements = $xpath->query("//link[contains(@rel, 'icon') and not(contains(@rel, 'apple'))]");
     for ($i = 0; $i < $elements->length; ++$i) {
         $icons[] = $elements->item($i)->getAttribute('href');
     }
     return $icons;
 }

Example #11

Show file

File: Rss20.php Project: aduroo/feed-api

 /**
  * Find the item media.
  *
  * @param SimpleXMLElement      $entry Feed item
  * @param \AsteFeed\Parser\Item $item  Item object
  * @param \AsteFeed\Parser\Feed $feed  Feed object
  */
 public function findItemMedia(SimpleXMLElement $entry, Item $item, Feed $feed)
 {
     $item->media = new Media();
     $mediaTags = XmlParser::getXPathResult($entry, 'media:*', $this->namespaces);
     foreach ($mediaTags as $mediaTag) {
         $name = $mediaTag->getName();
         $array = [];
         foreach ($mediaTag->attributes() as $key => $value) {
             $array[$key] = (string) $value;
         }
         if (strlen($mediaTag) > 0) {
             $array['content'];
         }
         if ($name == "thumbnail") {
             $item->media->thumbnails[] = $array;
         } else {
             $item->media->{$name} = $array;
         }
     }
 }

Example #12

Show file

File: CandidateParser.php Project: aduroo/feed-api

 /**
  * Strip useless tags.
  *
  * @param string $content
  *
  * @return string
  */
 public function stripGarbage($content)
 {
     $dom = XmlParser::getDomDocument($content);
     if ($dom !== false) {
         $xpath = new DOMXPath($dom);
         $this->stripTags($xpath);
         $this->stripAttributes($dom, $xpath);
         $content = $dom->saveXML($dom->documentElement);
     }
     return $content;
 }