function hook_article_filter($article) { if (strpos($article["link"], "hltv.org/news/") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "auto")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove advertisement stuff $stuff = $xpath->query('(//div[@id="_mcePaste"])'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } $entries = $xpath->query('(//div[@id="newsContent"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "tagesschau.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove header, footer $stuff = $xpath->query('(//script)|(//noscript)|(//h3[@class="headline"])|(//div[@class="infokasten"])|(//div[@class="socialMedia"])|(//div[@class="linklist"])|(//img[@title="galerie"])'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } /* $iframes = $xpath->query('(//iframe[@src])'); */ /* foreach ($iframes as $iframe) { */ /* $src = $iframe->getAttribute("src"); */ /* $src = "http://www.tagesschau.de/"+$src; */ /* $iframe->setAttribute("src", $src); */ /* } */ $entries = $xpath->query('(//div[@class="box"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { $owner_uid = $article["owner_uid"]; if (strpos($article["guid"], "dilbert.com") !== FALSE) { if (strpos($article["plugin_data"], "dilbert,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess... $matches = array(); foreach ($entries as $entry) { if (preg_match("/dyn\\/str_strip\\/.*zoom\\.gif\$/", $entry->getAttribute("src"), $matches)) { $entry->setAttribute("src", rewrite_relative_url("http://dilbert.com/", $matches[0])); $basenode = $entry; break; } } if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "dilbert,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "gulli.com") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove advertisement stuff $stuff = $xpath->query('(//script)|(//noscript)|(//div[@class="adsenseContainer"])|(//div[@class="_newsCrumb"])|(//div[@class="_forumBox"])|(//div[@class="nointelliTXT"])'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } // now get the (cleaned) article $entries = $xpath->query('(//div[@id="_contentLeft"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "emsvechtewelle.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove advertisement stuff $stuff = $xpath->query('(//script)|(//noscript)|(//style)'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } $entries = $xpath->query('(//div[@id="content"]/p)'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "daujones.com") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove advertisement stuff $stuff = $xpath->query('(//form)|(//span)|(//div[@class="rightnav"])|(//div[@class="kommentare"])|(//a)|(//h2[@style])|(//center)'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } $entries = $xpath->query('(//div[@class="maincontent"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "raumfahrer.net") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; // TODO: Add Express mp3 as attachment/enclosure once plugins are able to do that if ($doc) { $xpath = new DOMXPath($doc); $removestuff = $xpath->query('(//div[@class="druckansicht"])|(//td[@class="head"])'); foreach ($removestuff as $entry) { $entry->parentNode->removeChild($entry); } $entries = $xpath->query('(//td[@class="tab_text"])'); foreach ($entries as $entry) { $basenode = $entry->parentNode->parentNode; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "volksstimme.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "auto")); $basenode = ""; if ($doc) { $xpath = new DOMXPath($doc); // first remove advertisement stuff /* $stuff = $xpath->query('(//div[contains(@class, "em_left")])|(//div[contains(@class, "em_artikelansicht_tags")])|(//div[contains(@class, "em_ads_")])'); */ /* foreach ($stuff as $removethis) { */ /* $removethis->parentNode->removeChild($removethis); */ /* } */ $entries = $xpath->query('(//div[@itemprop="image"]|//div[@itemprop="articleBody"])'); foreach ($entries as $entry) { _debug("Muh, found stuff..."); $basenode = $basenode . $doc->saveXML($entry); _debug("Length of basenode: " . strlen($basenode)); } if (!empty($basenode)) { $article["content"] = $basenode; } } } return $article; }
function hook_article_filter($article) { $owner_uid = $article["owner_uid"]; if (strpos($article["guid"], "buttersafe.com") !== FALSE) { if (strpos($article["plugin_data"], "buttersafe,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src])'); $matches = array(); foreach ($entries as $entry) { if (preg_match("/(http:\\/\\/buttersafe.com\\/comics\\/\\d{4}.*)/i", $entry->getAttribute("src"), $matches)) { $basenode = $entry; break; } } if ($basenode) { $article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG); $article["plugin_data"] = "buttersafe,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "sz.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove header, footer $stuff = $xpath->query('(//script)|(//noscript)|(//div[@class="ad"])|(//section[@class="header"])|(//section[@class="footer"])|(//span[@class="imagelabel"])'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } $entries = $xpath->query('(//article)'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "tagesschau.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove header, footer $stuff = $xpath->query('(//script)|(//noscript)|(//iframe)|(//div[contains(@class, "infokasten")])|(//div[@class="teaser"])|(//div[@class="socialMedia"])|(//div[contains(@class, "linklist")])|(//div[@class="metablockwrapper"])'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } $entries = $xpath->query('(//div[contains(@class, "sectionZ")])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "winfuture.de") !== FALSE) { $doc = new DOMDocument(); $html = fetch_file_contents($article["link"]); $html = preg_replace("/(<[\\ ]*br[\\/\\ ]*>){2}/", "<br />", $html); // remove double linebreaks @$doc->loadHTML($html); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove advertisement + tracking stuff $stuff = $xpath->query('(//script)|(//noscript)|(//div[@id="wf_ContentAd"])|(//div[@id="wf_SingleAd"])|(//img[@width="1"])'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } // now get the (cleaned) article $entries = $xpath->query('(//div[@id="news_content"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "taz.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove advertisement stuff $stuff = $xpath->query('(//script)|(//noscript)|(//iframe)|(//style)|(//div[@class="sectfoot"])|(//div[@id="tzi_paywall"])|(//div[contains(@class, "rack")])'); foreach ($stuff as $removethis) { _debug("Remove1: " . $doc->saveXML($removethis)); $removethis->parentNode->removeChild($removethis); } $entries = $xpath->query('(//div[@class="sectbody"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { _debug("Result: " . $doc->saveXML($basenode)); $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "nationalgeographic.com") !== FALSE) { if (strpos($article["plugin_data"], "natgeo,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $basenode = $doc->getElementById("content_mainA"); $trash = $xpath->query("//*[@class='aside' or @id='livefyre' or @id='powered_by_livefyre' or @class='social_buttons']"); foreach ($trash as $t) { $t->parentNode->removeChild($t); } if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "natgeo,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } } return $article; }
function process(&$article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "threewordphrase.com") !== FALSE) { if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXpath($doc); $basenode = $xpath->query("//td/center/img")->item(0); if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } return true; } return false; }
function process(&$article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "erzaehlmirnix.wordpress.com/") !== FALSE) { if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[contains(@src, "erzaehlmirnix.files.wordpress.com")])'); $found = false; foreach ($entries as $entry) { $basenode = $entry; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } return true; } return false; }
function process(&$article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "userfriendly.org/cartoons") !== FALSE) { if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@alt])'); foreach ($entries as $entry) { if (strpos($entry->getAttribute('alt'), 'Strip for') !== false) { $basenode = $entry; break; } } if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } return true; } return false; }
function hook_article_filter($article) { if (strpos($article["guid"], "rockpapershotgun.com") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // remove category stuff $stuff = $xpath->query('(//div[@class="dd_post_share"])|(//div[@class="social"])|(//iframe)'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } $entries = $xpath->query('(//div[@class="entry"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function process(&$article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "explosm.net/comics") !== FALSE) { if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess... $matches = array(); foreach ($entries as $entry) { if (preg_match("/(http:\\/\\/.*\\/db\\/files\\/Comics\\/.*)/i", $entry->getAttribute("src"), $matches)) { $basenode = $entry; break; } } if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } return true; } return false; }
function process(&$article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "blog.beetlebum.de") !== FALSE || strpos($article["link"], "fb-kundendienst.de") !== FALSE || strpos($article["link"], "marydeathcomics.com") !== FALSE || strpos($article["link"], "commitstrip.com") !== FALSE) { if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//div[@class="entry-content"]//img[@src])|(//div[@id="content-wrapper"]//img[@src])'); foreach ($entries as $entry) { if (preg_match("/(https?:\\/\\/.*\\/wp-content\\/uploads\\/.*)/i", $entry->getAttribute("src"))) { $basenode = $entry; break; } } if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } return true; } return false; }
function hook_article_filter($article) { if (strpos($article["link"], "stimmthaltnicht.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // remove category stuff /* $stuff = $xpath->query('(//div[@class="category_link"])'); */ /* foreach ($stuff as $removethis) { */ /* $removethis->parentNode->removeChild($removethis); */ /* } */ $entries = $xpath->query('(//div[@class="entry-content"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function process(&$article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "cad-comic.com/cad/") !== FALSE) { if (strpos($article["title"], "News:") === FALSE && strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $basenode = $xpath->query('(//img[contains(@src, "/comics/cad-")])')->item(0); if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } return true; } return false; }
function process(&$article) { $owner_uid = $article["owner_uid"]; if (strpos($article["guid"], "bunicomic.com") !== FALSE || strpos($article["guid"], "buttersafe.com") !== FALSE || strpos($article["guid"], "whompcomic.com") !== FALSE || strpos($article["guid"], "extrafabulouscomics.com") !== FALSE || strpos($article["guid"], "happyjar.com") !== FALSE || strpos($article["guid"], "csectioncomics.com") !== FALSE) { if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) { // lol at people who block clients by user agent // oh noes my ad revenue Q_Q $res = fetch_file_contents($article["link"], false, false, false, false, false, 0, "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"); $doc = new DOMDocument(); @$doc->loadHTML($res); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $basenode = $xpath->query('//div[@id="comic"]')->item(0); if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } return true; } return false; }
function hook_article_filter($article) { if (strpos($article["link"], "titanic-magazin.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); // first remove advertisement + tracking stuff $stuff = $xpath->query('(//script)|(//noscript)|(//form)|(//a[@name="form"])|(//p)|(//a[@href="newsticker.html"])'); foreach ($stuff as $removethis) { if ($removethis->localName === "p") { if ($removethis->textContent == "bezahlte Anzeige") { $removethis->parentNode->removeChild($removethis); } } else { $removethis->parentNode->removeChild($removethis); } } // now get the (cleaned) article $entries = $xpath->query('(//div[@class="tt_news-bodytext"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { if (strpos($article["link"], "gameone.de") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "auto")); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); /* // first remove advertisement stuff */ /* $stuff = $xpath->query('(//script)|(//noscript)|(//style)|(//hr[@noshade])|(//div[@align="center"])'); */ /* foreach ($stuff as $removethis) { */ /* $removethis->parentNode->removeChild($removethis); */ /* } */ $entries = $xpath->query('(//div[@class="post single"])'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } } return $article; }
function hook_article_filter($article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE) { if (strpos($article["plugin_data"], "pennyarcade,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess... $matches = array(); foreach ($entries as $entry) { if (preg_match("/(http:\\/\\/art.penny-arcade.com\\/.*)/i", $entry->getAttribute("src"), $matches)) { $basenode = $entry; break; } } if ($basenode) { $article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG); $article["plugin_data"] = "pennyarcade,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } } return $article; }
function process(&$article) { if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE) { if ($debug_enabled) { _debug("af_pennyarcade: Processing comic"); } $doc = new DOMDocument(); $doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $basenode = $xpath->query('(//div[@id="comicFrame"])')->item(0); if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } return true; } if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "News Post:") !== FALSE) { if ($debug_enabled) { _debug("af_pennyarcade: Processing news post"); } $doc = new DOMDocument(); $doc->loadHTML(fetch_file_contents($article["link"])); if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//div[@class="post"])'); $basenode = false; foreach ($entries as $entry) { $basenode = $entry; } $meta = $xpath->query('(//div[@class="meta"])')->item(0); if ($meta->parentNode) { $meta->parentNode->removeChild($meta); } $header = $xpath->query('(//div[@class="postBody"]/h2)')->item(0); if ($header->parentNode) { $header->parentNode->removeChild($header); } $header = $xpath->query('(//div[@class="postBody"]/div[@class="comicPost"])')->item(0); if ($header->parentNode) { $header->parentNode->removeChild($header); } $avatar = $xpath->query('(//div[@class="avatar"]//img)')->item(0); if ($basenode) { $basenode->insertBefore($avatar, $basenode->firstChild); } $uninteresting = $xpath->query('(//div[@class="avatar"])'); foreach ($uninteresting as $i) { $i->parentNode->removeChild($i); } if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } return true; } return false; }
function hook_article_filter($article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE) { if (strpos($article["plugin_data"], "pennyarcade,{$owner_uid}:") === FALSE) { if ($debug_enabled) { _debug("af_pennyarcade: Processing comic"); } $doc = new DOMDocument(); $doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//div[@class="post comic"])'); foreach ($entries as $entry) { $basenode = $entry; } if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "pennyarcade,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } } if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "News Post:") !== FALSE) { if (strpos($article["plugin_data"], "pennyarcade,{$owner_uid}:") === FALSE) { if ($debug_enabled) { _debug("af_pennyarcade: Processing news post"); } $doc = new DOMDocument(); $doc->loadHTML(fetch_file_contents($article["link"])); if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//div[@class="post"])'); $basenode = false; foreach ($entries as $entry) { $basenode = $entry; } $uninteresting = $xpath->query('(//div[@class="heading"])'); foreach ($uninteresting as $i) { $i->parentNode->removeChild($i); } if ($basenode) { $article["content"] = $doc->saveXML($basenode); $article["plugin_data"] = "pennyarcade,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } } return $article; }
function process(&$article) { if (strpos($article["link"], "threewordphrase.com") !== FALSE) { $doc = new DOMDocument(); @$doc->loadHTML(fetch_file_contents($article["link"])); $basenode = false; if ($doc) { $xpath = new DOMXpath($doc); $basenode = $xpath->query("//td/center/img")->item(0); if ($basenode) { $article["content"] = $doc->saveXML($basenode); } } return true; } return false; }
protected function load_page($link) { $doc = new DOMDocument(); # curl does not follow the 301? $url = str_replace("-rss", "", $link); $html = fetch_file_contents($url, false, false, false, false, false, 0, "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"); $html_enc = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc->loadHTML($html_enc); $basenode = false; $add_content = ""; if ($doc) { $xpath = new DOMXPath($doc); $nextpage = $xpath->query('//table[@id="table-jtoc"]/tr/td/a[@id="atoc_next"]'); if ($nextpage && $nextpage->length > 0 && $nextpage->item(0)->hasAttributes()) { $add_content = $this->load_page("http://www.golem.de" . $nextpage->item(0)->attributes->getNamedItem("href")->value); } // first remove advertisement stuff $stuff = $xpath->query('(//script)|(//noscript)|(//div[contain(@class, "iqad")])|(//ol[@id="list-jtoc"])|(//table[@id="table-jtoc"])|(//header[@class="cluster-header"]/h1)'); foreach ($stuff as $removethis) { $removethis->parentNode->removeChild($removethis); } // now get the (cleaned) article $entries = $xpath->query('(//article)'); foreach ($entries as $entry) { $basenode = $entry; break; } if ($basenode) { return $doc->saveXML($basenode) . $add_content; } else { return false; } } }