public function getAllURLFromHTML() { $output = array(); $dom = new DOMDocument(); @$dom->loadHTML($this->htmlCode); $xpath = new DomXPath($dom); $hrefs = $xpath->evaluate("/html/body//a"); for ($i = 0; $i < $hrefs->length; $i++) { $h = $this->encodeURL($hrefs->item($i)->getAttribute('href')); $h = url_to_absolute($this->url, $h); array_push($output, $h); } return array_unique($output); }
public function hookDisplayProductDeliveryTime($params) { $product = $params['product']; $id_lang = Context::getContext()->language->id; $file = dirname(__FILE__) . '/content/' . $product->id . '_c.html'; $fileu = dirname(__FILE__) . '/content/' . $product->id . '_u.html'; $content = ''; if (Tools::getValue("u", false) !== false) { $url = Tools::getValue('url'); if (Tools::getValue('url', false) !== false) { if ($url != '') { file_put_contents($fileu, $url); } $url = file_get_contents($fileu); if ($url != '') { $content = $this->getProductContent($url); file_put_contents($file, $content); $messages[] = "load from url"; } else { $messages[] = "Error: url not found"; } } if ($content == '') { $messages[] = "update content"; $content = file_get_contents($file); } $query = "//select[@id='sku-variants']/option"; $dom = new DomDocument(); libxml_use_internal_errors(true); $dom->loadHTML($content); $xpath = new DomXPath($dom); $nodes = $xpath->query($query); if ($nodes->length == 0) { // update only base price $query = "string(//div[@id='price-container']/span/text())"; $price = $xpath->evaluate($query); $query = "string(//div[@id='price-container']/del/text())"; $oldprice = $xpath->evaluate($query); $price = preg_replace("/[^0-9]/", '', $price); $oldprice = preg_replace("/[^0-9]/", '', $oldprice); if ($price == "" && $oldprice == "") { $tab[0] = array(); $combinations = 0; $query = "string(//div[@id='complectData']/text())"; $node = $xpath->evaluate($query); $product_data = json_decode($node); $offers = $product_data->items[0]->OFFERS; //array_multisort($offers['BASE_PRICE'], SORT_ASC); foreach ($offers as $offer) { $size = $offer->SHIRINA . '×' . $offer->DLINA; $id_attr = egormprod::getAttributeId($size); if ($id_attr) { $tab[0][] = $id_attr; $prices[] = $offer->BASE_PRICE; } else { $messages[] = "Error: size " . $size . " not found "; } } array_multisort($prices, SORT_ASC, SORT_NUMERIC, $tab[0]); $this->basePrice = $prices[0]; foreach ($prices as $key => $price_tmp) { $prices[$key] = $price_tmp - $this->basePrice; } $this->updateProductBasePrice($product->id); egormprod::setAttributesImpacts($product->id, $tab); //AdminAttributeGeneratorController::setAttributesImpacts($product->id, $tab); $combinations = $this->getCombination($tab); $values = $this->getMap($product->id, $combinations, $prices); //array_values(array_map(array($this, 'addAttribute'), $this->combinations)); SpecificPriceRule::disableAnyApplication(); $product->deleteProductAttributes(); $product->generateMultipleCombinations($values, $combinations); SpecificPriceRule::enableAnyApplication(); SpecificPriceRule::applyAllRules(array((int) $product->id)); $messageTextErr = ""; foreach ($messages as $message) { $messageTextErr .= $message . "\\r\\n"; } } else { // pillows $this->basePrice = $oldprice > 0 ? $oldprice : $price; $this->updateProductBasePrice($product->id); } } else { $tab[0] = array(); $combinations = 0; foreach ($nodes as $key => $node) { $product_data = json_decode($node->getAttribute('data-price')); if ($key == 0) { $this->basePrice = $product_data->VALUE; } $f1 = json_decode($node->getAttribute('data-cases')); $size = str_replace(' ', '', $node->getAttribute('value')); $id_attr = egormprod::getAttributeId($size); if ($id_attr) { $tab[0][] = $id_attr; $prices[] = $product_data->VALUE - $this->basePrice; } else { $messages[] = "Error: size " . $size . " not found "; } } $this->updateProductBasePrice($product->id); egormprod::setAttributesImpacts($product->id, $tab); //AdminAttributeGeneratorController::setAttributesImpacts($product->id, $tab); $combinations = $this->getCombination($tab); $values = $this->getMap($product->id, $combinations, $prices); //array_values(array_map(array($this, 'addAttribute'), $this->combinations)); SpecificPriceRule::disableAnyApplication(); $product->deleteProductAttributes(); $product->generateMultipleCombinations($values, $combinations); SpecificPriceRule::enableAnyApplication(); SpecificPriceRule::applyAllRules(array((int) $product->id)); $messageTextErr = ""; foreach ($messages as $message) { $messageTextErr .= $message . "\\r\\n"; } } } if (Tools::getValue("admin", false) !== false) { $f = 1; $this->context->cookie->__set('a', 1); } if (Tools::getValue("admin") == "-1") { $f = 1; $this->context->cookie->__unset('a'); } if ($this->context->cookie->__isset('a')) { $url = file_get_contents($fileu); $this->smarty->assign(array('messageTextErr' => $messageTextErr, 'url' => $url)); return $this->display(__FILE__, 'up.tpl'); } }
function feed_import($xml, $importer, &$contact, &$hub) { $a = get_app(); logger("Import Atom/RSS feed", LOGGER_DEBUG); if ($xml == "") { return; } $doc = new DOMDocument(); @$doc->loadXML($xml); $xpath = new DomXPath($doc); $xpath->registerNamespace('atom', "http://www.w3.org/2005/Atom"); $xpath->registerNamespace('dc', "http://purl.org/dc/elements/1.1/"); $xpath->registerNamespace('content', "http://purl.org/rss/1.0/modules/content/"); $xpath->registerNamespace('rdf', "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); $xpath->registerNamespace('rss', "http://purl.org/rss/1.0/"); $xpath->registerNamespace('media', "http://search.yahoo.com/mrss/"); $author = array(); // Is it RDF? if ($xpath->query('/rdf:RDF/rss:channel')->length > 0) { //$author["author-link"] = $xpath->evaluate('/rdf:RDF/rss:channel/rss:link/text()')->item(0)->nodeValue; $author["author-name"] = $xpath->evaluate('/rdf:RDF/rss:channel/rss:title/text()')->item(0)->nodeValue; if ($author["author-name"] == "") { $author["author-name"] = $xpath->evaluate('/rdf:RDF/rss:channel/rss:description/text()')->item(0)->nodeValue; } $entries = $xpath->query('/rdf:RDF/rss:item'); } // Is it Atom? if ($xpath->query('/atom:feed/atom:entry')->length > 0) { //$self = $xpath->query("/atom:feed/atom:link[@rel='self']")->item(0)->attributes; //if (is_object($self)) // foreach($self AS $attributes) // if ($attributes->name == "href") // $author["author-link"] = $attributes->textContent; //if ($author["author-link"] == "") { // $alternate = $xpath->query("/atom:feed/atom:link[@rel='alternate']")->item(0)->attributes; // if (is_object($alternate)) // foreach($alternate AS $attributes) // if ($attributes->name == "href") // $author["author-link"] = $attributes->textContent; //} $author["author-name"] = $xpath->evaluate('/atom:feed/atom:title/text()')->item(0)->nodeValue; if ($author["author-name"] == "") { $author["author-name"] = $xpath->evaluate('/atom:feed/atom:subtitle/text()')->item(0)->nodeValue; } if ($author["author-name"] == "") { $author["author-name"] = $xpath->evaluate('/atom:feed/atom:author/atom:name/text()')->item(0)->nodeValue; } //$author["author-avatar"] = $xpath->evaluate('/atom:feed/atom:logo/text()')->item(0)->nodeValue; $author["edited"] = $author["created"] = $xpath->query('/atom:feed/atom:updated/text()')->item(0)->nodeValue; $author["app"] = $xpath->evaluate('/atom:feed/atom:generator/text()')->item(0)->nodeValue; $entries = $xpath->query('/atom:feed/atom:entry'); } // Is it RSS? if ($xpath->query('/rss/channel')->length > 0) { //$author["author-link"] = $xpath->evaluate('/rss/channel/link/text()')->item(0)->nodeValue; $author["author-name"] = $xpath->evaluate('/rss/channel/title/text()')->item(0)->nodeValue; //$author["author-avatar"] = $xpath->evaluate('/rss/channel/image/url/text()')->item(0)->nodeValue; if ($author["author-name"] == "") { $author["author-name"] = $xpath->evaluate('/rss/channel/copyright/text()')->item(0)->nodeValue; } if ($author["author-name"] == "") { $author["author-name"] = $xpath->evaluate('/rss/channel/description/text()')->item(0)->nodeValue; } $author["edited"] = $author["created"] = $xpath->query('/rss/channel/pubDate/text()')->item(0)->nodeValue; $author["app"] = $xpath->evaluate('/rss/channel/generator/text()')->item(0)->nodeValue; $entries = $xpath->query('/rss/channel/item'); } //if ($author["author-link"] == "") $author["author-link"] = $contact["url"]; if ($author["author-name"] == "") { $author["author-name"] = $contact["name"]; } //if ($author["author-avatar"] == "") $author["author-avatar"] = $contact["thumb"]; $author["owner-link"] = $contact["url"]; $author["owner-name"] = $contact["name"]; $author["owner-avatar"] = $contact["thumb"]; $header = array(); $header["uid"] = $importer["uid"]; $header["network"] = NETWORK_FEED; $header["type"] = "remote"; $header["wall"] = 0; $header["origin"] = 0; $header["gravity"] = GRAVITY_PARENT; $header["private"] = 2; $header["verb"] = ACTIVITY_POST; $header["object-type"] = ACTIVITY_OBJ_NOTE; $header["contact-id"] = $contact["id"]; if (!strlen($contact["notify"])) { // one way feed - no remote comment ability $header["last-child"] = 0; } if (!is_object($entries)) { return; } $entrylist = array(); foreach ($entries as $entry) { $entrylist[] = $entry; } foreach (array_reverse($entrylist) as $entry) { $item = array_merge($header, $author); $item["title"] = $xpath->evaluate('atom:title/text()', $entry)->item(0)->nodeValue; if ($item["title"] == "") { $item["title"] = $xpath->evaluate('title/text()', $entry)->item(0)->nodeValue; } if ($item["title"] == "") { $item["title"] = $xpath->evaluate('rss:title/text()', $entry)->item(0)->nodeValue; } $alternate = $xpath->query("atom:link[@rel='alternate']", $entry)->item(0)->attributes; if (!is_object($alternate)) { $alternate = $xpath->query("atom:link", $entry)->item(0)->attributes; } if (is_object($alternate)) { foreach ($alternate as $attributes) { if ($attributes->name == "href") { $item["plink"] = $attributes->textContent; } } } if ($item["plink"] == "") { $item["plink"] = $xpath->evaluate('link/text()', $entry)->item(0)->nodeValue; } if ($item["plink"] == "") { $item["plink"] = $xpath->evaluate('rss:link/text()', $entry)->item(0)->nodeValue; } $item["plink"] = original_url($item["plink"]); $item["uri"] = $xpath->evaluate('atom:id/text()', $entry)->item(0)->nodeValue; if ($item["uri"] == "") { $item["uri"] = $xpath->evaluate('guid/text()', $entry)->item(0)->nodeValue; } if ($item["uri"] == "") { $item["uri"] = $item["plink"]; } $item["parent-uri"] = $item["uri"]; $published = $xpath->query('atom:published/text()', $entry)->item(0)->nodeValue; if ($published == "") { $published = $xpath->query('pubDate/text()', $entry)->item(0)->nodeValue; } if ($published == "") { $published = $xpath->query('dc:date/text()', $entry)->item(0)->nodeValue; } $updated = $xpath->query('atom:updated/text()', $entry)->item(0)->nodeValue; if ($updated == "") { $updated = $published; } if ($published != "") { $item["created"] = $published; } if ($updated != "") { $item["edited"] = $updated; } $creator = $xpath->query('author/text()', $entry)->item(0)->nodeValue; if ($creator == "") { $creator = $xpath->query('atom:author/atom:name/text()', $entry)->item(0)->nodeValue; } if ($creator == "") { $creator = $xpath->query('dc:creator/text()', $entry)->item(0)->nodeValue; } if ($creator != "") { $item["author-name"] = $creator; } if ($pubDate != "") { $item["edited"] = $item["created"] = $pubDate; } $creator = $xpath->query('dc:creator/text()', $entry)->item(0)->nodeValue; if ($creator != "") { $item["author-name"] = $creator; } //$item["object"] = $xml; $r = q("SELECT `id` FROM `item` WHERE `uid` = %d AND `uri` = '%s' AND `network` IN ('%s', '%s')", intval($importer["uid"]), dbesc($item["uri"]), dbesc(NETWORK_FEED), dbesc(NETWORK_DFRN)); if ($r) { logger("Item with uri " . $item["uri"] . " for user " . $importer["uid"] . " already existed under id " . $r[0]["id"], LOGGER_DEBUG); continue; } // To-Do? // <category>Ausland</category> // <media:thumbnail width="152" height="76" url="http://www.taz.de/picture/667875/192/14388767.jpg"/> $attachments = array(); $enclosures = $xpath->query("enclosure", $entry); foreach ($enclosures as $enclosure) { $href = ""; $length = ""; $type = ""; $title = ""; foreach ($enclosure->attributes as $attributes) { if ($attributes->name == "url") { $href = $attributes->textContent; } elseif ($attributes->name == "length") { $length = $attributes->textContent; } elseif ($attributes->name == "type") { $type = $attributes->textContent; } } if (strlen($item["attach"])) { $item["attach"] .= ','; } $attachments[] = array("link" => $href, "type" => $type, "length" => $length); $item["attach"] .= '[attach]href="' . $href . '" length="' . $length . '" type="' . $type . '"[/attach]'; } if ($contact["fetch_further_information"]) { $preview = ""; // Handle enclosures and treat them as preview picture foreach ($attachments as $attachment) { if ($attachment["type"] == "image/jpeg") { $preview = $attachment["link"]; } } $item["body"] = $item["title"] . add_page_info($item["plink"], false, $preview, $contact["fetch_further_information"] == 2, $contact["ffi_keyword_blacklist"]); $item["tag"] = add_page_keywords($item["plink"], false, $preview, $contact["fetch_further_information"] == 2, $contact["ffi_keyword_blacklist"]); $item["title"] = ""; $item["object-type"] = ACTIVITY_OBJ_BOOKMARK; unset($item["attach"]); } else { $body = trim($xpath->evaluate('atom:content/text()', $entry)->item(0)->nodeValue); if ($body == "") { $body = trim($xpath->evaluate('content:encoded/text()', $entry)->item(0)->nodeValue); } if ($body == "") { $body = trim($xpath->evaluate('description/text()', $entry)->item(0)->nodeValue); } if ($body == "") { $body = trim($xpath->evaluate('atom:summary/text()', $entry)->item(0)->nodeValue); } // remove the content of the title if it is identically to the body // This helps with auto generated titles e.g. from tumblr if (title_is_body($item["title"], $body)) { $item["title"] = ""; } $item["body"] = html2bbcode($body); } logger("Stored feed: " . print_r($item, true), LOGGER_DEBUG); $notify = item_is_remote_self($contact, $item); $id = item_store($item, false, $notify); //print_r($item); logger("Feed for contact " . $contact["url"] . " stored under id " . $id); } }
function getHuffingtonPostArticle($link) { date_default_timezone_set("America/Los_Angeles"); $dom = new DOMDocument(); libxml_use_internal_errors(true); //loads html via link @$dom->loadHTMLFile($link); $xpath = new DomXPath($dom); //get title $t = $dom->getElementsByTagName("h1"); foreach ($t as $node) { $title = $node->nodeValue; } //check if article exist $classname = "mainentrycontent"; if ($xpath->evaluate("boolean(//div[contains(@id, '{$classname}')])") == false) { return array('', ''); } //check if node exist and if it exist get the img src $img_classname = "main-visual group embedded-image"; if ($xpath->evaluate("boolean(//div[contains(@class, '{$img_classname}')]//img/@src)")) { $data = $xpath->evaluate("//div[contains(@class, '{$img_classname}')]//img/@src"); $img_src = $dom->saveHTML($data->item(0)); $coverImg = "<img {$img_src}></img>"; } else { // finds first image in article to use as cover image $data = $xpath->query("//div[contains(@id, '{$classname}')]//img/@src"); $img_src = $dom->saveHTML($data->item(0)); $coverImg = ""; } //removes scripts while (($r = $dom->getElementsByTagName("script")) && $r->length) { $r->item(0)->parentNode->removeChild($r->item(0)); } //filter unwanted content $toBeRemoved = "slideshow"; foreach ($xpath->query("//div[contains(@class,'{$toBeRemoved}')] ") as $r) { $r->parentNode->removeChild($r); } //gets the main article $data = $xpath->query("//div[contains(@id, '{$classname}')]"); $htmlString = $dom->saveHTML($data->item(0)); //adds HR tag to be used in joomla $htmlString = addHR($htmlString); //echo htmlspecialchars($htmlString); $string = "<p>" . date("m/d/y @ h:ia") . "</p>" . $coverImg . $htmlString . "<p> Original article: <a href='{$link}'> " . $link . "</a></p>"; return array($title, $string); }