Example #1
0
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "hltv.org/news/") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "auto"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove advertisement stuff
             $stuff = $xpath->query('(//div[@id="_mcePaste"])');
             foreach ($stuff as $removethis) {
                 $removethis->parentNode->removeChild($removethis);
             }
             $entries = $xpath->query('(//div[@id="newsContent"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "tagesschau.de") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove header, footer
             $stuff = $xpath->query('(//script)|(//noscript)|(//h3[@class="headline"])|(//div[@class="infokasten"])|(//div[@class="socialMedia"])|(//div[@class="linklist"])|(//img[@title="galerie"])');
             foreach ($stuff as $removethis) {
                 $removethis->parentNode->removeChild($removethis);
             }
             /* $iframes = $xpath->query('(//iframe[@src])'); */
             /* foreach ($iframes as $iframe) { */
             /*     $src = $iframe->getAttribute("src"); */
             /*     $src = "http://www.tagesschau.de/"+$src; */
             /*     $iframe->setAttribute("src", $src); */
             /* } */
             $entries = $xpath->query('(//div[@class="box"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
 function hook_article_filter($article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["guid"], "dilbert.com") !== FALSE) {
         if (strpos($article["plugin_data"], "dilbert,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//img[@src])');
                 // we might also check for img[@class='strip'] I guess...
                 $matches = array();
                 foreach ($entries as $entry) {
                     if (preg_match("/dyn\\/str_strip\\/.*zoom\\.gif\$/", $entry->getAttribute("src"), $matches)) {
                         $entry->setAttribute("src", rewrite_relative_url("http://dilbert.com/", $matches[0]));
                         $basenode = $entry;
                         break;
                     }
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "dilbert,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
     }
     return $article;
 }
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "gulli.com") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove advertisement stuff
             $stuff = $xpath->query('(//script)|(//noscript)|(//div[@class="adsenseContainer"])|(//div[@class="_newsCrumb"])|(//div[@class="_forumBox"])|(//div[@class="nointelliTXT"])');
             foreach ($stuff as $removethis) {
                 $removethis->parentNode->removeChild($removethis);
             }
             // now get the (cleaned) article
             $entries = $xpath->query('(//div[@id="_contentLeft"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "emsvechtewelle.de") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove advertisement stuff
             $stuff = $xpath->query('(//script)|(//noscript)|(//style)');
             foreach ($stuff as $removethis) {
                 $removethis->parentNode->removeChild($removethis);
             }
             $entries = $xpath->query('(//div[@id="content"]/p)');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "daujones.com") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove advertisement stuff
             $stuff = $xpath->query('(//form)|(//span)|(//div[@class="rightnav"])|(//div[@class="kommentare"])|(//a)|(//h2[@style])|(//center)');
             foreach ($stuff as $removethis) {
                 $removethis->parentNode->removeChild($removethis);
             }
             $entries = $xpath->query('(//div[@class="maincontent"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "raumfahrer.net") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(fetch_file_contents($article["link"]));
         $basenode = false;
         // TODO: Add Express mp3 as attachment/enclosure once plugins are able to do that
         if ($doc) {
             $xpath = new DOMXPath($doc);
             $removestuff = $xpath->query('(//div[@class="druckansicht"])|(//td[@class="head"])');
             foreach ($removestuff as $entry) {
                 $entry->parentNode->removeChild($entry);
             }
             $entries = $xpath->query('(//td[@class="tab_text"])');
             foreach ($entries as $entry) {
                 $basenode = $entry->parentNode->parentNode;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
Example #8
0
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "volksstimme.de") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "auto"));
         $basenode = "";
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove advertisement stuff
             /* $stuff = $xpath->query('(//div[contains(@class, "em_left")])|(//div[contains(@class, "em_artikelansicht_tags")])|(//div[contains(@class, "em_ads_")])'); */
             /* foreach ($stuff as $removethis) { */
             /*     $removethis->parentNode->removeChild($removethis); */
             /* } */
             $entries = $xpath->query('(//div[@itemprop="image"]|//div[@itemprop="articleBody"])');
             foreach ($entries as $entry) {
                 _debug("Muh, found stuff...");
                 $basenode = $basenode . $doc->saveXML($entry);
                 _debug("Length of basenode: " . strlen($basenode));
             }
             if (!empty($basenode)) {
                 $article["content"] = $basenode;
             }
         }
     }
     return $article;
 }
 function hook_article_filter($article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["guid"], "buttersafe.com") !== FALSE) {
         if (strpos($article["plugin_data"], "buttersafe,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//img[@src])');
                 $matches = array();
                 foreach ($entries as $entry) {
                     if (preg_match("/(http:\\/\\/buttersafe.com\\/comics\\/\\d{4}.*)/i", $entry->getAttribute("src"), $matches)) {
                         $basenode = $entry;
                         break;
                     }
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
                     $article["plugin_data"] = "buttersafe,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
     }
     return $article;
 }
Example #10
0
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "sz.de") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove header, footer
             $stuff = $xpath->query('(//script)|(//noscript)|(//div[@class="ad"])|(//section[@class="header"])|(//section[@class="footer"])|(//span[@class="imagelabel"])');
             foreach ($stuff as $removethis) {
                 $removethis->parentNode->removeChild($removethis);
             }
             $entries = $xpath->query('(//article)');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
Example #11
0
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "tagesschau.de") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove header, footer
             $stuff = $xpath->query('(//script)|(//noscript)|(//iframe)|(//div[contains(@class, "infokasten")])|(//div[@class="teaser"])|(//div[@class="socialMedia"])|(//div[contains(@class, "linklist")])|(//div[@class="metablockwrapper"])');
             foreach ($stuff as $removethis) {
                 $removethis->parentNode->removeChild($removethis);
             }
             $entries = $xpath->query('(//div[contains(@class, "sectionZ")])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
Example #12
0
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "winfuture.de") !== FALSE) {
         $doc = new DOMDocument();
         $html = fetch_file_contents($article["link"]);
         $html = preg_replace("/(<[\\ ]*br[\\/\\ ]*>){2}/", "<br />", $html);
         // remove double linebreaks
         @$doc->loadHTML($html);
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove advertisement + tracking stuff
             $stuff = $xpath->query('(//script)|(//noscript)|(//div[@id="wf_ContentAd"])|(//div[@id="wf_SingleAd"])|(//img[@width="1"])');
             foreach ($stuff as $removethis) {
                 $removethis->parentNode->removeChild($removethis);
             }
             // now get the (cleaned) article
             $entries = $xpath->query('(//div[@id="news_content"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
Example #13
0
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "taz.de") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove advertisement stuff
             $stuff = $xpath->query('(//script)|(//noscript)|(//iframe)|(//style)|(//div[@class="sectfoot"])|(//div[@id="tzi_paywall"])|(//div[contains(@class, "rack")])');
             foreach ($stuff as $removethis) {
                 _debug("Remove1: " . $doc->saveXML($removethis));
                 $removethis->parentNode->removeChild($removethis);
             }
             $entries = $xpath->query('(//div[@class="sectbody"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 _debug("Result: " . $doc->saveXML($basenode));
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
Example #14
0
 function hook_article_filter($article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "nationalgeographic.com") !== FALSE) {
         if (strpos($article["plugin_data"], "natgeo,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $basenode = $doc->getElementById("content_mainA");
                 $trash = $xpath->query("//*[@class='aside' or @id='livefyre' or @id='powered_by_livefyre' or @class='social_buttons']");
                 foreach ($trash as $t) {
                     $t->parentNode->removeChild($t);
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "natgeo,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
     }
     return $article;
 }
Example #15
0
 function process(&$article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "threewordphrase.com") !== FALSE) {
         if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXpath($doc);
                 $basenode = $xpath->query("//td/center/img")->item(0);
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
         return true;
     }
     return false;
 }
 function process(&$article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "erzaehlmirnix.wordpress.com/") !== FALSE) {
         if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//img[contains(@src, "erzaehlmirnix.files.wordpress.com")])');
                 $found = false;
                 foreach ($entries as $entry) {
                     $basenode = $entry;
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
         return true;
     }
     return false;
 }
 function process(&$article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "userfriendly.org/cartoons") !== FALSE) {
         if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//img[@alt])');
                 foreach ($entries as $entry) {
                     if (strpos($entry->getAttribute('alt'), 'Strip for') !== false) {
                         $basenode = $entry;
                         break;
                     }
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
         return true;
     }
     return false;
 }
Example #18
0
 function hook_article_filter($article)
 {
     if (strpos($article["guid"], "rockpapershotgun.com") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // remove category stuff
             $stuff = $xpath->query('(//div[@class="dd_post_share"])|(//div[@class="social"])|(//iframe)');
             foreach ($stuff as $removethis) {
                 $removethis->parentNode->removeChild($removethis);
             }
             $entries = $xpath->query('(//div[@class="entry"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
 function process(&$article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "explosm.net/comics") !== FALSE) {
         if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//img[@src])');
                 // we might also check for img[@class='strip'] I guess...
                 $matches = array();
                 foreach ($entries as $entry) {
                     if (preg_match("/(http:\\/\\/.*\\/db\\/files\\/Comics\\/.*)/i", $entry->getAttribute("src"), $matches)) {
                         $basenode = $entry;
                         break;
                     }
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
         return true;
     }
     return false;
 }
 function process(&$article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "blog.beetlebum.de") !== FALSE || strpos($article["link"], "fb-kundendienst.de") !== FALSE || strpos($article["link"], "marydeathcomics.com") !== FALSE || strpos($article["link"], "commitstrip.com") !== FALSE) {
         if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//div[@class="entry-content"]//img[@src])|(//div[@id="content-wrapper"]//img[@src])');
                 foreach ($entries as $entry) {
                     if (preg_match("/(https?:\\/\\/.*\\/wp-content\\/uploads\\/.*)/i", $entry->getAttribute("src"))) {
                         $basenode = $entry;
                         break;
                     }
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
         return true;
     }
     return false;
 }
Example #21
0
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "stimmthaltnicht.de") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // remove category stuff
             /* $stuff = $xpath->query('(//div[@class="category_link"])'); */
             /* foreach ($stuff as $removethis) { */
             /*     $removethis->parentNode->removeChild($removethis); */
             /* } */
             $entries = $xpath->query('(//div[@class="entry-content"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
Example #22
0
 function process(&$article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "cad-comic.com/cad/") !== FALSE) {
         if (strpos($article["title"], "News:") === FALSE && strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $basenode = $xpath->query('(//img[contains(@src, "/comics/cad-")])')->item(0);
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
         return true;
     }
     return false;
 }
 function process(&$article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["guid"], "bunicomic.com") !== FALSE || strpos($article["guid"], "buttersafe.com") !== FALSE || strpos($article["guid"], "whompcomic.com") !== FALSE || strpos($article["guid"], "extrafabulouscomics.com") !== FALSE || strpos($article["guid"], "happyjar.com") !== FALSE || strpos($article["guid"], "csectioncomics.com") !== FALSE) {
         if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) {
             // lol at people who block clients by user agent
             // oh noes my ad revenue Q_Q
             $res = fetch_file_contents($article["link"], false, false, false, false, false, 0, "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)");
             $doc = new DOMDocument();
             @$doc->loadHTML($res);
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $basenode = $xpath->query('//div[@id="comic"]')->item(0);
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
         return true;
     }
     return false;
 }
Example #24
0
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "titanic-magazin.de") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "UTF-8"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             // first remove advertisement + tracking stuff
             $stuff = $xpath->query('(//script)|(//noscript)|(//form)|(//a[@name="form"])|(//p)|(//a[@href="newsticker.html"])');
             foreach ($stuff as $removethis) {
                 if ($removethis->localName === "p") {
                     if ($removethis->textContent == "bezahlte Anzeige") {
                         $removethis->parentNode->removeChild($removethis);
                     }
                 } else {
                     $removethis->parentNode->removeChild($removethis);
                 }
             }
             // now get the (cleaned) article
             $entries = $xpath->query('(//div[@class="tt_news-bodytext"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
Example #25
0
 function hook_article_filter($article)
 {
     if (strpos($article["link"], "gameone.de") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(mb_convert_encoding(fetch_file_contents($article["link"]), 'HTML-ENTITIES', "auto"));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             /* 					// first remove advertisement stuff */
             /* 					$stuff = $xpath->query('(//script)|(//noscript)|(//style)|(//hr[@noshade])|(//div[@align="center"])'); */
             /* 					foreach ($stuff as $removethis) { */
             /* 						$removethis->parentNode->removeChild($removethis); */
             /* 					} */
             $entries = $xpath->query('(//div[@class="post single"])');
             foreach ($entries as $entry) {
                 $basenode = $entry;
                 break;
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
     }
     return $article;
 }
 function hook_article_filter($article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE) {
         if (strpos($article["plugin_data"], "pennyarcade,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//img[@src])');
                 // we might also check for img[@class='strip'] I guess...
                 $matches = array();
                 foreach ($entries as $entry) {
                     if (preg_match("/(http:\\/\\/art.penny-arcade.com\\/.*)/i", $entry->getAttribute("src"), $matches)) {
                         $basenode = $entry;
                         break;
                     }
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
                     $article["plugin_data"] = "pennyarcade,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
     }
     return $article;
 }
Example #27
0
 function process(&$article)
 {
     if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE) {
         if ($debug_enabled) {
             _debug("af_pennyarcade: Processing comic");
         }
         $doc = new DOMDocument();
         $doc->loadHTML(fetch_file_contents($article["link"]));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXPath($doc);
             $basenode = $xpath->query('(//div[@id="comicFrame"])')->item(0);
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
         return true;
     }
     if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "News Post:") !== FALSE) {
         if ($debug_enabled) {
             _debug("af_pennyarcade: Processing news post");
         }
         $doc = new DOMDocument();
         $doc->loadHTML(fetch_file_contents($article["link"]));
         if ($doc) {
             $xpath = new DOMXPath($doc);
             $entries = $xpath->query('(//div[@class="post"])');
             $basenode = false;
             foreach ($entries as $entry) {
                 $basenode = $entry;
             }
             $meta = $xpath->query('(//div[@class="meta"])')->item(0);
             if ($meta->parentNode) {
                 $meta->parentNode->removeChild($meta);
             }
             $header = $xpath->query('(//div[@class="postBody"]/h2)')->item(0);
             if ($header->parentNode) {
                 $header->parentNode->removeChild($header);
             }
             $header = $xpath->query('(//div[@class="postBody"]/div[@class="comicPost"])')->item(0);
             if ($header->parentNode) {
                 $header->parentNode->removeChild($header);
             }
             $avatar = $xpath->query('(//div[@class="avatar"]//img)')->item(0);
             if ($basenode) {
                 $basenode->insertBefore($avatar, $basenode->firstChild);
             }
             $uninteresting = $xpath->query('(//div[@class="avatar"])');
             foreach ($uninteresting as $i) {
                 $i->parentNode->removeChild($i);
             }
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
         return true;
     }
     return false;
 }
 function hook_article_filter($article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE) {
         if (strpos($article["plugin_data"], "pennyarcade,{$owner_uid}:") === FALSE) {
             if ($debug_enabled) {
                 _debug("af_pennyarcade: Processing comic");
             }
             $doc = new DOMDocument();
             $doc->loadHTML(fetch_file_contents($article["link"]));
             $basenode = false;
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//div[@class="post comic"])');
                 foreach ($entries as $entry) {
                     $basenode = $entry;
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "pennyarcade,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
     }
     if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "News Post:") !== FALSE) {
         if (strpos($article["plugin_data"], "pennyarcade,{$owner_uid}:") === FALSE) {
             if ($debug_enabled) {
                 _debug("af_pennyarcade: Processing news post");
             }
             $doc = new DOMDocument();
             $doc->loadHTML(fetch_file_contents($article["link"]));
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//div[@class="post"])');
                 $basenode = false;
                 foreach ($entries as $entry) {
                     $basenode = $entry;
                 }
                 $uninteresting = $xpath->query('(//div[@class="heading"])');
                 foreach ($uninteresting as $i) {
                     $i->parentNode->removeChild($i);
                 }
                 if ($basenode) {
                     $article["content"] = $doc->saveXML($basenode);
                     $article["plugin_data"] = "pennyarcade,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
     }
     return $article;
 }
Example #29
0
 function process(&$article)
 {
     if (strpos($article["link"], "threewordphrase.com") !== FALSE) {
         $doc = new DOMDocument();
         @$doc->loadHTML(fetch_file_contents($article["link"]));
         $basenode = false;
         if ($doc) {
             $xpath = new DOMXpath($doc);
             $basenode = $xpath->query("//td/center/img")->item(0);
             if ($basenode) {
                 $article["content"] = $doc->saveXML($basenode);
             }
         }
         return true;
     }
     return false;
 }
Example #30
-3
 protected function load_page($link)
 {
     $doc = new DOMDocument();
     # curl does not follow the 301?
     $url = str_replace("-rss", "", $link);
     $html = fetch_file_contents($url, false, false, false, false, false, 0, "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)");
     $html_enc = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML($html_enc);
     $basenode = false;
     $add_content = "";
     if ($doc) {
         $xpath = new DOMXPath($doc);
         $nextpage = $xpath->query('//table[@id="table-jtoc"]/tr/td/a[@id="atoc_next"]');
         if ($nextpage && $nextpage->length > 0 && $nextpage->item(0)->hasAttributes()) {
             $add_content = $this->load_page("http://www.golem.de" . $nextpage->item(0)->attributes->getNamedItem("href")->value);
         }
         // first remove advertisement stuff
         $stuff = $xpath->query('(//script)|(//noscript)|(//div[contain(@class, "iqad")])|(//ol[@id="list-jtoc"])|(//table[@id="table-jtoc"])|(//header[@class="cluster-header"]/h1)');
         foreach ($stuff as $removethis) {
             $removethis->parentNode->removeChild($removethis);
         }
         // now get the (cleaned) article
         $entries = $xpath->query('(//article)');
         foreach ($entries as $entry) {
             $basenode = $entry;
             break;
         }
         if ($basenode) {
             return $doc->saveXML($basenode) . $add_content;
         } else {
             return false;
         }
     }
 }