private function getDomObject() { if ($this->_verbose > 2) { $this->log('IN:[' . __FUNCTION__ . ']', 0, "purple"); } if ($this->_verbose > 1) { $this->log("Content length: " . strlen($this->_content), 0, "light_purple"); } $DOM = new \DomDocument(); switch ($this->_domType) { case "xml": if ($this->_verbose > 2) { $this->log('<' . __LINE__ . '> DOMDocument->loadXML', 0, "purple"); } @$DOM->loadXML($this->_content); break; case "html": if ($this->_verbose > 2) { $this->log('<' . __LINE__ . '> DOMDocument->loadXML'); } @$DOM->loadHtml($this->_content); break; default: throw new \Exception("Unknwon DOM type used to load content \"" . $this->_domType . "\""); break; } return $DOM; }
public function run($str = NULL) { if ($str == NULL) { return $this; } $document = jqm_use($this->node->_parentElement); $dom = $document->_DOM; if ($dom->doctype) { $dom->removeChild($dom->doctype); } $find = $this->node->getPathById($dom); if (!$find) { $find = $this->node->_path; } $xpath = new DomXpath($dom); $find = $xpath->query($find); if ($find->length > 0) { $child = new DomDocument(); $child->loadHtml($str); if ($child->doctype) { $child->removeChild($child->doctype); } $child->normalize(); $frag = $dom->importNode($child->firstChild->firstChild->firstChild, true); $save = $find->item(0)->parentNode->insertBefore($frag, $find->item(0)); $this->node->_path = $save->nextSibling->getNodePath(); $document->_DOM = $dom; } return $this; }
protected function xpathQuery($html, $xpath) { $dom = new \DomDocument(); $dom->loadHtml($html); $domXpath = new \DomXPath($dom); return $domXpath->query($xpath); }
public function run($str = NULL) { if ($str == NULL) { return $this; } $dom = $this->node->dom()->get(); $this->node->_lastDom = $dom->lastdom . $str; $rec = new DomDocument(); $rec->loadHtml($str); if ($rec->doctype) { $rec->removeChild($rec->doctype); } $document = jqm_use($this->node->_parentElement); $dom = $document->_DOM; if ($dom->doctype) { $dom->removeChild($dom->doctype); } $body = $rec->childNodes->item(0)->firstChild; $frag = $dom->importNode($body, true); $xpath = new DomXpath($dom); $find = $this->node->getPathByID($dom); $find = $xpath->query($find); if ($find->length > 0) { $save = $find->item(0)->parentNode->insertBefore($frag->firstChild, $find->item(0)->nextSibling); $this->node->_path = $save->previousSibling->getNodePath(); $document->_DOM = $dom; } return $this; }
public function testConvertsBoldTag() { $html = '<b>some text</b>'; $doc = new \DomDocument(); $doc->loadHtml($html); $tag = $doc->getElementsByTagName('b')->item(0); $parser = new \Markdownable\Tag\B(); $this->assertSame($parser->parse($tag), '**some text**'); }
public function testConvertsPTag() { $html = '<p>A paragraph of text</p>'; $doc = new \DomDocument(); $doc->loadHtml($html); $tag = $doc->getElementsByTagName('p')->item(0); $parser = new \Markdownable\Tag\P(); $this->assertSame($parser->parse($tag), PHP_EOL . PHP_EOL . 'A paragraph of text' . PHP_EOL); }
public function testConvertsEmTag() { $html = '<em>text</em>'; $doc = new \DomDocument(); $doc->loadHtml($html); $tag = $doc->getElementsByTagName('em')->item(0); $parser = new \Markdownable\Tag\Em(); $this->assertSame($parser->parse($tag), '*text*'); }
public function testConvertsHtmlBrTag() { $html = '<br>'; $doc = new \DomDocument(); $doc->loadHtml($html); $tag = $doc->getElementsByTagName('br')->item(0); $parser = new \Markdownable\Tag\Br(); $this->assertSame($parser->parse($tag), PHP_EOL); }
public function testConvertsAnchorTagWithTitle() { $html = '<a href="http://example.com" title="A Link">link</a>'; $doc = new \DomDocument(); $doc->loadHtml($html); $tag = $doc->getElementsByTagName('a')->item(0); $parser = new \Markdownable\Tag\A(); $this->assertSame($parser->parse($tag), '[link](http://example.com "A Link")'); }
public function testConvertsImgTagWithTitle() { $html = '<img src="img.png" alt="alt text" title="An Image" />'; $doc = new \DomDocument(); $doc->loadHtml($html); $tag = $doc->getElementsByTagName('img')->item(0); $parser = new \Markdownable\Tag\Img(); $this->assertSame($parser->parse($tag), '![alt text](img.png "An Image")'); }
public function testConvertsH6Tag() { $html = '<h6>Header 6</h6>'; $doc = new \DomDocument(); $doc->loadHtml($html); $tag = $doc->getElementsByTagName('h6')->item(0); $parser = new \Markdownable\Tag\H6(); $this->assertSame($parser->parse($tag), PHP_EOL . PHP_EOL . '###### Header 6' . PHP_EOL . PHP_EOL); }
/** * Parse te given XML attributes into an array * * @author troelskn * @link http://stackoverflow.com/a/1083821/172068 * @param $input * @return array */ public function parseAttributes($input) { $dom = new DomDocument(); $dom->loadHtml("<html {$input} />"); $attributes = array(); foreach ($dom->documentElement->attributes as $name => $attr) { $attributes[$name] = $attr->value; } return $attributes; }
function getDataFromFeed($url) { $data = apc_fetch($url); if (!$data) { error_log('CACHE MISS: ' . $url); $data = file_get_contents($url); apc_store($url, $data, 600); } $doc = new DomDocument(); @$doc->loadHtml($data); return $doc; }
/** * Extracts information from the file $fileName associated with the url $url. * * The document type for this document is given in $type, and the images on * disk should be in the directory named $imagePath. The urls where the * images link to should be in $imageUrlPath. * * @param string $fileName * @param string $type * @param string $url * @param string $imagePath * @param string $imageUrlPath * @return array(ezcSearchDocument) */ public static function extract($fileName, $type, $url, $imagePath = null, $imageUrlPath = null) { $published = filemtime($fileName); $converted = file_get_contents($fileName); $dom = new DomDocument(); @$dom->loadHtml($converted); $tbody = $dom->getElementsByTagName('div')->item(0); $xpath = new DOMXPath($dom); $tocElem = $xpath->evaluate("//h1[@class='title']", $tbody)->item(0); $title = $tocElem ? $tocElem->nodeValue : 'no title'; $docs = array(); $body = $urls = array(); $currentUrl = $url; $lastUrl = $url; $currentBody = ''; // child::*[self::p or self::h1] $xpath = new DOMXPath($dom); $tbody = $xpath->evaluate("//p|//h1|//ol|//ul|//dl|//img|//a", $tbody); $body = ''; foreach ($tbody as $item) { switch ($item->tagName) { case 'a': $name = $item->getAttribute('name'); if (strlen($name)) { $currentUrl = $url . '#' . $name; } break; case 'img': $alt = $item->getAttribute('alt'); $src = $item->getAttribute('src'); $location = $imagePath == null ? dirname($fileName) . '/' . $src : $imagePath . '/' . preg_replace('@(\\.\\./)+@', '', $src); $imgurl = $src[0] == '/' ? $src : ($imageUrlPath === null ? $url . '/' . $src : $imageUrlPath . '/' . preg_replace('@(\\.\\./)+@', '', $src)); echo " - {$src} => {$imgurl}\n"; $docs[] = self::extractImage($alt, $location, $imgurl); break; case 'p': case 'h1': case 'dl': if ($lastUrl !== $currentUrl) { $docs[] = new ezcSearchSimpleArticle(null, $title, $currentBody, $published, $lastUrl, $type); $currentBody = ''; $lastUrl = $currentUrl; } $currentBody .= strip_tags($dom->saveXml($item)) . "\n\n"; break; } } if ($currentBody != '') { $docs[] = new ezcSearchSimpleArticle(null, $title, $currentBody, $published, $lastUrl, $type); } return $docs; }
function getDataFromHtml($url) { $ignoreCache = isset($_GET['ignore_cache']); $data = apc_fetch($url); if (!$data || $ignoreCache) { error_log('CACHE MISS: ' . $url); $data = file_get_contents($url); apc_store($url, $data, 600); } $doc = new DomDocument(); @$doc->loadHtml($data); return $doc; }
public function testConvertsMultilineBlockquoteTag() { $html = <<<EOD <blockquote> Some line of text Another line of text </blockquote> EOD; $doc = new \DomDocument(); $doc->loadHtml($html); $tag = $doc->getElementsByTagName('blockquote')->item(0); $parser = new \Markdownable\Tag\Blockquote(); $this->assertSame($parser->parse($tag), PHP_EOL . PHP_EOL . '> Some line of text' . PHP_EOL . '> Another line of text' . PHP_EOL . PHP_EOL); }
/** * Filter the page html and look for an <a><img> element added by the chooser * or an <a> element added by the moodle file picker * * * @param string $html * @param array $options * @return string */ public function filter($html, array $options = array()) { global $COURSE; $courseid = isset($COURSE->id) ? $COURSE->id : null; if (empty($html) || !is_string($html) || strpos($html, $this->_binumi_client->get_host()) === false) { return $html; } $dom = new DomDocument(); $sanitized_html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); if (defined('LIBXML_HTML_NOIMPLIED') && defined('LIBXML_HTML_NODEFDTD')) { @$dom->loadHtml($sanitized_html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); } else { @$dom->loadHtml($sanitized_html); } $xpath = new DOMXPath($dom); foreach ($xpath->query('//a') as $node) { $href = $node->getAttribute('href'); $class = $node->getAttribute('class'); if (empty($href)) { continue; } if ($class != 'binumi-embed') { continue; } if ((bool) preg_match($this->_re_embed_url, $href)) { $newnode = $dom->createDocumentFragment(); $imgnode = $node->firstChild; $href = htmlspecialchars($href); extract($this->_get_image_elem_dimensions($imgnode)); $html = $this->_get_iframe_embed_html($href, $width, $height); $newnode->appendXML($html); $node->parentNode->replaceChild($newnode, $node); } } return $dom->saveHTML(); }
function getUrlsFromHtml($html) { $urls = array(); $xml = new \DomDocument(); @$xml->loadHtml($html); $xml = $xml->saveXml(); $xml = str_replace('xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml"', 'xmlns="http://www.w3.org/1999/xhtml"', $xml); $xml = str_replace('xml:lang="en" lang="en" xml:lang="en"', 'xml:lang="en" lang="en"', $xml); $xml = simplexml_load_string(trim($xml)); $xml->registerXpathNamespace('e', 'http://www.w3.org/1999/xhtml'); $nodes = $xml->xpath('//e:a'); foreach ($nodes as $node) { $urls[] = (string) $node['href']; } return $urls; }
function bady2cdata($html, $xml) { //Get XML params $dom = new DomDocument(); $dom->preserveWhiteSpace = FALSE; $dom->loadHtml($html); $params = $dom->getElementsByTagName('body'); foreach ($params as $k => $v) { $html = $v->nodeValue; } $params = $dom->getElementsByTagName('title'); foreach ($params as $k => $v) { $title = $v->nodeValue; } $params = $dom->getElementsByTagName('meta'); foreach ($params as $k => $v) { if ($v->getAttribute('name') == 'description') { $description = $v->getAttribute('content'); } if ($v->getAttribute('name') == 'Author') { $author = $v->getAttribute('content'); } } //Write to XML $dom = new DomDocument(); $dom->preserveWhiteSpace = FALSE; $dom->loadXML($xml); $ModulePrefs = $dom->getElementsByTagName('ModulePrefs'); foreach ($ModulePrefs as $prefs) { $prefs->setAttribute('title', $title); $prefs->setAttribute('description', $description); $prefs->setAttribute('author', $author); } $params = $dom->getElementsByTagName('Content'); foreach ($params as $k => $v) { //echo $v->nodeValue; $v->nodeValue = $html; } $s = '<?xml version="1.0" encoding="UTF-8" ?>'; $s .= $dom->saveHTML(); return $s; }
static function render_template_example($post) { $html = self::render_html_base_by_post($post); $return = spnl_do_content_tags($html, $post->ID, $post->ID, 0, true); $return = spnl_do_email_tags($return, $post->ID, $post->ID, 0, true); $return = spnl_do_subscriber_tags($return, $post->ID, $post->ID, 0, true); //$body_html = preg_replace( $pattern , site_url() ."?sendpress=link&fxti=".$subscriber_key."&spreport=". $this->id ."&spurl=$0", $body_html ); if (class_exists("DomDocument")) { $dom = new DomDocument(); $dom->strictErrorChecking = false; @$dom->loadHtml($return); $pTags = $dom->getElementsByTagName('p'); foreach ($pTags as $pElement) { $px = $pElement->getAttribute('style'); $pElement->setAttribute('style', $px . ' margin-top:0;margin-bottom:10px;'); } $return = $dom->saveHtml(); } return $return; }
public function testConvertsOlListTag() { $html = <<<EOD <ol> <li>Item 1</li> <li>Item 2</li> <li>Item 3</li> </ol> EOD; $doc = new \DomDocument(); $doc->loadHtml($html); $tag = $doc->getElementsByTagName('ol')->item(0); $parser = new \Markdownable\Tag\Li(); $output = ''; foreach ($tag->childNodes as $child) { if ($child instanceof \DOMElement) { $output .= $parser->parse($child); } } $this->assertSame($output, '1. Item 1' . PHP_EOL . '2. Item 2' . PHP_EOL . '3. Item 3' . PHP_EOL); }
<?php include "inc/main.inc"; include "inc/header.inc"; $doc = new DomDocument(); // We need to validate our document before refering to the id $doc->validateOnParse = false; $doc->loadHtml(file_get_contents($_GET['page'])); ?> <div data-role="page" id="content" data-add-back-btn="true"> <div data-role="header"> <h1><?echo $_GET['title'] ?></h1> </div><!-- /header --> <div data-role="content"> <p> <? $links = $doc->getElementsByTagName("a"); foreach($links as $link) { $href = $link->getAttribute("href"); $newLink = "content.php?page=" . $href; $link->setAttribute("href",$newLink); } $elemContent = $doc->getElementById('content'); $elemText = $elemContent->ownerDocument->saveXML($elemContent); echo $elemText; ?> </p>
function onAfterDispatch() { global $offlajnParams, $offlajnDashboard; $app = JFactory::getApplication(); if (!defined('OFFLAJNADMIN') || isset($_REQUEST['output']) && $_REQUEST['output'] == 'json') { return; } $doc = JFactory::getDocument(); $c = $doc->getBuffer('component'); $dom = new DomDocument(); if (function_exists("mb_convert_encoding")) { @$dom->loadHtml('<?xml encoding="UTF-8"><div>' . mb_convert_encoding($c, 'HTML-ENTITIES', "UTF-8") . '</div>'); } else { @$dom->loadHtml('<?xml encoding="UTF-8"><div>' . htmlspecialchars_decode(utf8_decode(htmlentities($c, ENT_COMPAT, 'utf-8', false))) . '</div>'); } $lis = array(); $moduleparams = ""; $advanced = JRequest::getCmd('option') == 'com_advancedmodules'; if (version_compare(JVERSION, '3.0.0', 'ge') && !$this->getElementById($dom, 'module-sliders')) { // Joomla 3.0.3 fix if (version_compare(JVERSION, '3.1.99', 'ge')) { $moduleparams = $this->getElementByClass($dom, 'span9'); } elseif (version_compare(JVERSION, '3.0.3', 'ge')) { $moduleparams = $this->getElementById($dom, 'collapse0'); } else { $moduleparams = $this->getElementById($dom, 'options-basic'); } if ($advanced) { $moduleparams = version_compare(JVERSION, '3.2.2', 'ge') ? $this->getElementByClass($dom, 'span9') : $this->getElementByClass($dom, 'span6', 1); } if ($moduleparams) { $element = $dom->createElement('div'); $element->setAttribute('id', 'content-box'); $moduleparams->appendChild($element); $moduleparams = $element; $element = $dom->createElement('div'); $element->setAttribute('id', 'module-sliders'); $element->setAttribute('class', 'pane-sliders'); $moduleparams->appendChild($element); $moduleparams = $element; } } elseif (version_compare(JVERSION, '1.6.0', 'ge')) { $moduleparams = $this->getElementById($dom, 'module-sliders'); } else { $moduleparams = $this->getElementById($dom, 'menu-pane'); } if ($moduleparams) { $removed = array(); while ($cNode = $moduleparams->firstChild) { $removed[] = $moduleparams->removeChild($cNode); } if (version_compare(JVERSION, '1.6.0', 'ge')) { array_splice($removed, 0, 2); } else { array_splice($removed, 0, 1); } $html = '<div>'; $html .= isset($offlajnDashboard) ? $offlajnDashboard : ''; $html .= isset($offlajnParams['first']) && is_array($offlajnParams['first']) ? implode("\n", $offlajnParams['first']) : ''; $html .= isset($offlajnParams['last']) && is_array($offlajnParams['last']) ? implode("\n", $offlajnParams['last']) : ''; $html .= '</div>'; $tabsDom = new DomDocument(); if (function_exists("mb_convert_encoding")) { @$tabsDom->loadHtml('<?xml encoding="UTF-8">' . mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8")); } else { @$tabsDom->loadHtml('<?xml encoding="UTF-8">' . htmlspecialchars_decode(utf8_decode(htmlentities($html, ENT_COMPAT, 'utf-8', false)))); } $node = $dom->importNode($tabsDom->getElementsByTagName('div')->item(0), true); while ($cNode = $node->firstChild) { if (@$cNode->tagName == 'div') { $moduleparams->appendChild($cNode); } else { $node->removeChild($cNode); } } if (count($removed) > 0) { foreach ($removed as $r) { if ($r instanceof DOMElement) { $r->setAttribute("class", $r->getAttribute("class") . " legacy"); $moduleparams->appendChild($r); } } } if (!version_compare(JVERSION, '1.6.0', 'ge')) { $tables = $dom->getElementsByTagName('table'); foreach ($tables as $table) { $table->setAttribute("cellspacing", "0"); } } $params = $moduleparams->getElementsByTagName('h3'); foreach ($params as $param) { $span = $param->getElementsByTagName('span')->item(0); $titleWords = explode(" ", $span->textContent); $titleWords[count($titleWords) - 1] = "<b>" . $titleWords[count($titleWords) - 1] . "</b>"; $newTitle = implode(' ', $titleWords); $span->removeChild($span->firstChild); $newText = $dom->createCDATASection($newTitle); $span->appendChild($newText); } $j = 0; foreach ($moduleparams->childNodes as $param) { $param->setAttribute("id", "offlajnpanel-" . $j); $j++; } } if (!isset($doc->_script['text/javascript'])) { $doc->_script['text/javascript'] = array(); } $doc->_script['text/javascript'] = preg_replace("/window.addEvent.*?pane-toggler.*?\\}\\);.*?\\}\\);/i", '', $doc->_script['text/javascript']); $doc->_script['text/javascript'] .= ' window.addEvent("domready", function(){ if(document.formvalidator) document.formvalidator.isValid = function() {return true;}; });'; if (version_compare(JVERSION, '3.0.0', 'ge')) { if ($moduleparams && $moduleparams->parentNode) { function getInnerHTML($Node) { $Document = new DOMDocument(); $Document->appendChild($Document->importNode($Node, true)); return $Document->saveHTML(); } $nc = getInnerHTML($moduleparams->parentNode); } else { $nc = $dom->saveHTML(); } $nc = preg_replace("/.*?<body>/si", '', $nc, 1); $nc = preg_replace("/<\\/body>.*/si", '', $nc, 1); $pattern = '/<div\\s*class="tab-pane"\\s*id="options-basic".*?>/'; if (version_compare(JVERSION, '3.1.99', 'ge')) { $pattern = '/<div\\s*class="span9".*?>/'; } elseif (version_compare(JVERSION, '3.0.3', 'ge')) { $pattern = '/<div\\s*class="accordion-body collapse in"\\s*id="collapse0".*?>/'; } if ($advanced) { $pattern = version_compare(JVERSION, '3.2.2', 'ge') ? '/<div\\s*class="span9".*?>/' : '/<\\/div>\\s*<div\\s*class="span6".*?>/'; } preg_match($pattern, $c, $matches); if (count($matches) > 0) { $c = str_replace($matches[0], $matches[0] . $nc, $c); } else { $c = $nc; } } else { $c = $dom->saveHtml(); $c = preg_replace("/.*?<body><div>/si", '', $c, 1); $c = preg_replace("/<\\/div><\\/body>.*/si", '', $c, 1); } $doc->setBuffer($c, 'component'); }
/** * Filter the page html and look for an <a><img> element added by the chooser * or an <a> element added by the moodle file picker * * NOTE: Thumbnail html from the Chooser and a link from the old filepicker * are of the same form (see $_re_api1_public_urls). * A thumbnail link from the new repository filepicker plugin is * different (see $_re_api2_public_urls). * The latest version of the local lib and rich text editor plugins * use both a trusted and a regular embed url as the href value of * the thumbnail html (this is the preferred route going forward). * Both types of embed_urls will need a user to be authenticated * before they can view the embed. * * @param string $html * @param array $options * @return string */ public function filter($html, array $options = array()) { global $COURSE; $courseid = isset($COURSE->id) ? $COURSE->id : null; if (empty($html) || !is_string($html) || strpos($html, $this->_mcore_client->get_host()) === false) { return $html; } $dom = new DomDocument(); @$dom->loadHtml(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); $xpath = new DOMXPath($dom); foreach ($xpath->query('//a') as $node) { $href = $node->getAttribute('href'); if (empty($href)) { continue; } if ((bool) preg_match($this->_re_embed_url, $href)) { $newnode = $dom->createDocumentFragment(); $imgnode = $node->firstChild; if ($this->_mcore_client->has_lti_config() && !is_null($courseid)) { $href = $this->_generate_embed_url($href, $courseid); } else { $href = htmlspecialchars($href); } extract($this->_get_image_elem_dimensions($imgnode)); $html = $this->_get_iframe_embed_html($href, $width, $height); $newnode->appendXML($html); $node->parentNode->replaceChild($newnode, $node); } else { if ((bool) preg_match($this->_re_api1_public_urls, $href)) { $newnode = $dom->createDocumentFragment(); $imgnode = $node->firstChild; extract($this->_get_image_elem_dimensions($imgnode)); $html = $this->_get_embed_html_from_api1_public_url($href, $width, $height, $courseid); $newnode->appendXML($html); $node->parentNode->replaceChild($newnode, $node); } else { if ((bool) preg_match($this->_re_api2_public_urls, $href)) { $newnode = $dom->createDocumentFragment(); $width = $this->_default_thumb_width; $height = $this->_default_thumb_height; $html = $this->_get_embed_html_from_api2_public_url($href, $width, $height, $courseid); $newnode->appendXML($html); $node->parentNode->replaceChild($newnode, $node); } } } } return $dom->saveHTML(); }
foreach ($searchpages as $searchpage) { $original_file = file_get_contents("{$searchpage}"); $stripped_file = strip_tags($original_file, "<a>"); preg_match_all("/<a(?:[^>]*)href=\"([^\"]*)\"(?:[^>]*)>(?:[^<]*)<\\/a>/is", $stripped_file, $matches); //extract pages foreach ($matches[1] as $key => $value) { if (strstr($value, "/company/")) { $companypages[] = $value; } } } #print_r($companypages); foreach ($companypages as $key => $value) { $html = file_get_contents($value); $dom = new DomDocument(); $dom->loadHtml($html); $xpath = new DomXpath($dom); $companies[$key]['name'] = trim($xpath->query('//*[@id="or-name"]')->item(0)->nodeValue); $companies[$key]['title'] = trim($xpath->query('//*[@id="or-job-title"]')->item(0)->nodeValue); #$companies[$key]['company']= $xpath->query('//*[@title]')->item(0); $companies[$key]['company'] = ucwords(str_replace('-', ' ', substr($value, strrpos($value, '/') + 1))); $companies[$key]['address'] = trim($xpath->query('//*[@class="street-address"]')->item(0)->nodeValue); $companies[$key]['city'] = trim($xpath->query('//*[@class="locality"]')->item(0)->nodeValue); $companies[$key]['state'] = trim($xpath->query('//*[@class="region"]')->item(0)->nodeValue); $companies[$key]['zip'] = trim($xpath->query('//*[@class="postal-code"]')->item(0)->nodeValue); $companies[$key]['country'] = trim($xpath->query('//*[@class="country-name"]')->item(0)->nodeValue); } foreach ($companies as $value) { scraperwiki::save(array('name', 'title', 'company', 'address', 'city', 'state', 'zip', 'country'), $value); echo implode(',', $value) . PHP_EOL; }
<?php header('Access-Control-Allow-Origin: *'); $xml = new DomDocument("1.0", "UTF-8"); $container = $xml->createElement("holder"); $xml->appendChild($container); $html = file_get_contents('http://imgur.com/t/pizza'); $tidy = new tidy(); $html = $tidy->repairString($html); $d = new DomDocument(); $d->loadHtml($html); $p = new DomXPath($d); $images = array(); foreach ($p->query('img') as $i) { $images[] = $i; $container->appendChild($xml->createTextNode($i)); } foreach ($images as $i) { echo $out; } /* for($i=1; $i<10;$i++){ $hold = $xml->createElement("image"); $src = $xml->createAttribute("src"); $tsrc = $xml->createTextNode($images[$i]); $src->appendChild($tsrc); $hold->appendChild($src); $container->appendChild($hold); }
function html() { $post_template = $this->id(); global $wpdb; //$email = $this->email(); // Get any existing copy of our transient data if (SendPress_Email_Cache::get($this->id()) != null) { $body_html = SendPress_Email_Cache::get($this->id()); $post_template = get_post_meta($this->id(), '_sendpress_template', true); $body_html = spnl_do_email_tags($body_html, $post_template, $this->id(), $this->subscriber_id(), true); } else { if (false === ($body_html = get_transient('sendpress_report_body_html_' . $this->id())) || $this->purge() == true) { // It wasn't there, so regenerate the data and save the transient if (!$this->post_info) { $this->post_info = get_post($this->id()); } if ($this->cache() !== false) { $body_html = $this->cache(); } else { $body_html = SendPress_Template::get_instance()->render($this->id(), false, false, $this->remove_links()); $this->cache($body_html); } set_transient('sendpress_report_body_html_' . $this->id(), $body_html, 60 * 60 * 2); } } $subscriber = SendPress_Data::get_subscriber($this->subscriber_id()); if (!is_null($subscriber)) { $body_html = str_replace("*|FNAME|*", $subscriber->firstname, $body_html); $body_html = str_replace("*|LNAME|*", $subscriber->lastname, $body_html); $body_html = str_replace("*|EMAIL|*", $subscriber->email, $body_html); $body_html = str_replace("*|ID|*", $subscriber->subscriberID, $body_html); } $open_info = array("id" => $this->subscriber_id(), "report" => $this->id(), "view" => "open"); $code = SendPress_Data::encrypt($open_info); $link = SendPress_Manager::public_url($code); $tracker = "<img src='" . $link . "' width='1' height='1'/></body>"; $body_html = str_replace("</body>", $tracker, $body_html); $body_link = get_post_meta($this->id(), 'body_link', true); $body_html = spnl_do_subscriber_tags($body_html, $post_template, $this->id(), $this->subscriber_id(), true); //$pattern ="/(?<=href=(\"|'))[^\"']+(?=(\"|'))/"; //$body_html = preg_replace( $pattern , site_url() ."?sendpress=link&fxti=".$subscriber_key."&spreport=". $this->id ."&spurl=$0", $body_html ); if (class_exists("DomDocument")) { $dom = new DomDocument(); $dom->strictErrorChecking = false; @$dom->loadHtml($body_html); $pTags = $dom->getElementsByTagName('p'); foreach ($pTags as $pElement) { $px = $pElement->getAttribute('style'); $pElement->setAttribute('style', $px . ' margin-top:0;margin-bottom:10px;'); } if ($this->tracker()) { $aTags = $dom->getElementsByTagName('a'); foreach ($aTags as $aElement) { $href = $aElement->getAttribute('href'); /* $style = $aElement->getAttribute('style'); if($style == ""){ $aElement->setAttribute('style'); } */ //ADD TO DB? if (strrpos($href, "*|") === false && strrpos($href, "#") !== 0) { if (SendPress_Option::get('skip_mailto', false) == true && strrpos($href, "mailto") !== false) { continue; } /* $urlinDB = SendPress_Data::get_url_by_report_url( $this->id(), $href ); if(!isset($urlinDB[0])){ $urlData = array( 'url' => trim($href), 'reportID' => $this->id(), ); $urlID = SendPress_Data::insert_report_url( $urlData ); } else { $urlID = $urlinDB[0]->urlID; } $link = array( "id"=>$this->subscriber_id(), "report"=> $this->id(), "urlID"=> $urlID, "view"=>"link" ); */ $link = array("id" => $this->subscriber_id(), "report" => $this->id(), "view" => "tracker", "url" => $href); $code = SendPress_Data::encrypt($link); $link = SendPress_Manager::public_url($code); $href = $link; $aElement->setAttribute('href', $href); } } } $body_html = $dom->saveHtml(); } $link_data = array("id" => $this->subscriber_id(), "report" => $this->id(), "urlID" => '0', "view" => "manage", "listID" => $this->list_id(), "action" => "unsubscribe"); $code = SendPress_Data::encrypt($link_data); $link = SendPress_Manager::public_url($code); if (SendPress_Option::get('old_unsubscribe_link', false) === true) { $start_text = __("Not interested anymore?", "sendpress"); $unsubscribe = __("Unsubscribe", "sendpress"); $instantly = __("Instantly", "sendpress"); $remove_me_old = $start_text . ' <a href="' . $link . '" style="color: ' . $body_link . ';" >' . $unsubscribe . '</a> ' . $instantly . '.'; $body_html = str_replace("*|SP:UNSUBSCRIBE|*", $remove_me_old, $body_html); $body_html = str_replace("*|SP:MANAGE|*", '', $body_html); } else { $link_data = array("id" => $this->subscriber_id(), "report" => $this->id(), "urlID" => '0', "view" => "manage", "listID" => $this->list_id(), "action" => ""); $code = SendPress_Data::encrypt($link_data); $manage_link = SendPress_Manager::public_url($code); $unsubscribe = __("Unsubscribe", "sendpress"); $manage = __("Manage Subscription", "sendpress"); $remove_me = ' <a href="' . $link . '" style="color: ' . $body_link . ';" >' . $unsubscribe . '</a> | '; $manage = ' <a href="' . $manage_link . '" style="color: ' . $body_link . ';" >' . $manage . '</a> '; $body_html = str_replace("*|SP:UNSUBSCRIBE|*", $remove_me, $body_html); $body_html = str_replace("*|SP:MANAGE|*", $manage, $body_html); } if (!is_null($subscriber)) { $body_html = str_replace("*|FNAME|*", $subscriber->firstname, $body_html); $body_html = str_replace("*|LNAME|*", $subscriber->lastname, $body_html); $body_html = str_replace("*|EMAIL|*", $subscriber->email, $body_html); $body_html = str_replace("*|ID|*", $subscriber->subscriberID, $body_html); } //$body_html = apply_filters('sendpress_post_render_email', $body_html); //echo $body_html; //print_r($email); return $body_html; }
static function link_style($color, $content) { if (class_exists("DomDocument")) { $dom = new DomDocument('1.0', 'UTF-8'); //$content = str_replace (' ', '@nbsp;', $content); if (function_exists('mb_convert_encoding')) { $content = mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'); // htmlspecialchars($content); } $dom->strictErrorChecking = false; @$dom->loadHtml($content); $aTags = $dom->getElementsByTagName('a'); foreach ($aTags as $aElement) { $style = $aElement->getAttribute('style'); $style .= ' color: ' . $color . '; '; $aElement->setAttribute('style', $style); } //$content = $dom->saveHTML(); $content = preg_replace(array("/^\\<\\!DOCTYPE.*?<html><body>/si", "!</body></html>\$!si"), "", $dom->saveHTML()); $content = str_replace("%7B", "{", $content); $content = str_replace("%7D", "}", $content); return $content; } return $content; }
/** * Extract a single review into a review instance * @param DomElement $node * @return Review */ public function extractReview(DomElement $node) { $review = new Review(); $doc = new DomDocument(); $html = $node->ownerDocument->saveHtml($node); $doc->loadHtml('<?xml encoding="utf-8" ?>' . $html); $xpath = new DOMXpath($doc); /** * Author Username * // member_info/ * /username/span */ $nodes = $xpath->query("//div[contains(@class, 'member_info')]/*/div[contains(@class, 'username')]/span"); if ($nodes->length === 1) { $review->setAuthor(trim($nodes->item(0)->nodeValue)); } /** * Author Location */ $nodes = $xpath->query("//div[contains(@class, 'member_info')]/div[contains(@class, 'location')]"); if ($nodes->length === 1) { $review->setAuthorLocation(trim($nodes->item(0)->nodeValue)); } /** * Review Permalink */ $nodes = $xpath->query("//div[contains(@class, 'quote')]/a"); if ($nodes->length === 1) { // URL linked to from the title is an absolute path without host/scheme // Also, URL contains a fragment that should be removed $path = trim($nodes->item(0)->getAttribute('href')); $url = new Uri($this->getOptions()->getUrl()); $new = sprintf('%s://%s/%s', $url->getScheme(), $url->getHost(), ltrim($path, '/')); $url = new Uri($new); $url->setFragment(null); $review->setUrl((string) $url); } /** * Title Quote */ $nodes = $xpath->query("//div[contains(@class, 'quote')]/a/span[contains(@class, 'noQuotes')]"); if ($nodes->length === 1) { $review->setTitle(trim($nodes->item(0)->nodeValue)); } /** * Rating as an alt tag in an image */ $nodes = $xpath->query("//div[contains(@class, 'rating')]/span[contains(@class, 'rate')]/img"); if ($nodes->length === 1) { $img = $nodes->item(0); $alt = $img->getAttribute('alt'); if (preg_match('/([0-9\\.]+)\\s[\\w]+\\s([0-9\\.]+)/', $alt, $match)) { $review->setStarRating((double) $match[1]); $review->setMaxStarRating((double) $match[2]); } } /** * Rating Date */ $nodes = $xpath->query("//div[contains(@class, 'rating')]/span[contains(@class, 'ratingDate')]"); if ($nodes->length === 1) { $span = $nodes->item(0); if ($span->hasAttribute('title')) { $dateString = $span->getAttribute('title'); $date = DateTime::createFromFormat('d F Y', $dateString); if (false !== $date) { $review->setDate($date); } } else { if (preg_match('/([0-9]{1,2}\\s[\\w]+\\s[0-9]{4})/i', trim($span->nodeValue), $match)) { $date = DateTime::createFromFormat('d F Y', $match[1]); if (false !== $date) { $review->setDate($date); } } } } /** * Review Excerpt */ $nodes = $xpath->query("//div[contains(@class, 'entry')]/p[contains(@class, 'partial_entry')]"); if ($nodes->length === 1) { // Use first child node->nodeValue to skip links for 'more...' $review->setExcerpt(trim($nodes->item(0)->childNodes->item(0)->nodeValue)); } return $review; }
/** * Converts a raw solr result into a document using the definition $def * * @param ezcSearchDocumentDefinition $def * @param mixed $response * @return ezcSearchResult */ private function createResponseFromData(ezcSearchDocumentDefinition $def, $response) { if (is_string($response)) { // try to find the error message and return that $s = new ezcSearchResult(); $dom = new DomDocument(); @$dom->loadHtml($response); $tbody = $dom->getElementsByTagName('body')->item(0); $xpath = new DOMXPath($dom); $tocElem = $xpath->evaluate('//pre', $tbody)->item(0); $error = $tocElem->nodeValue; $s->error = $error; return $s; } $s = new ezcSearchResult(); $s->status = $response->responseHeader->status; $s->queryTime = $response->responseHeader->QTime; $s->resultCount = $response->response->numFound; $s->start = $response->response->start; foreach ($response->response->docs as $document) { $resultDocument = $this->createDataForHit($document, $def); $idProperty = $def->idProperty; $s->documents[$resultDocument->document->{$idProperty}] = $resultDocument; } // process highlighting if (isset($response->highlighting) && count($s->documents)) { foreach ($s->documents as $id => $document) { $document->highlight = array(); if (isset($response->highlighting->{$id})) { foreach ($def->fields as $field) { $fieldName = $this->mapFieldType($field->field, $field->type); if ($field->highlight && isset($response->highlighting->{$id}->{$fieldName})) { $document->highlight[$field->field] = $response->highlighting->{$id}->{$fieldName}; } } } } } // process facets if (isset($response->facet_counts) && isset($response->facet_counts->facet_fields)) { $facets = $response->facet_counts->facet_fields; foreach ($def->fields as $field) { $fieldName = $this->mapFieldType($field->field, $field->type); if (isset($facets->{$fieldName})) { // sigh, stupid array format needs fixing $facetValues = array(); $facet = $facets->{$fieldName}; for ($i = 0; $i < count($facet); $i += 2) { $facetValues[$facet[$i]] = $facet[$i + 1]; } $s->facets[$field->field] = $facetValues; } } } return $s; }