public function test_realArticles() { $urls = array("http://spectrum.ieee.org/cars-that-think/transportation/self-driving/california-may-be-making-testing-selfdriving-cars-easier", "http://feedproxy.google.com/~r/cnx-software/blog/~3/j4GR4BG3ptY/", "http://feeds.sciencedaily.com/~r/sciencedaily/top_news/top_science/~3/vGzKzIZOy4E/160930144424.htm", "http://arstechnica.com/science/2016/10/hurricane-matthew-may-strike-the-florida-space-coast-threaten-iconic-nasa-buildings/", "http://arstechnica.com/gadgets/2016/10/galaxy-note-7-recall-part-2-samsung-admits-replacement-units-are-unsafe/"); foreach ($urls as $url) { echo "Testing against page: " . $url; $doc = new DOMDocument(); $doc->preserveWhiteSpace = FALSE; downloadArticle($doc, $url); parseArticle($doc); if (isset($GLOBALS["error"])) { echo "Error: " . $GLOBALS["error"] . "\n"; } } }
$doc = new DOMDocument(); $doc->preserveWhiteSpace = FALSE; if (isset($_GET["debug"])) { if ($_GET["debug"] == true) { $GLOBALS["debug"] = 1; } } if (isset($_GET["academic"])) { if ($_GET["academic"] == true) { echo "<p>Was an academic source, getting access past pay-wall...</p>"; //echo "<p>Get Academic Returned: ".getAcademicPage($_GET["targetUrl"])."</p>"; @$doc->loadHTML(getAcademicPage($_GET["targetUrl"])); // we don't want to see every parse fail } } else { downloadArticle($doc, $_GET["targetUrl"]); } parseArticle($doc); } ?> <!DOCTYPE html> <html lang="en"> <head> <link rel="stylesheet" type="text/css" href="styles/main.css"> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <?php echo "<title>" . $GLOBALS["title"] . "</title>"; ?> </head>