/** * The remove operation allows the user to delete statements from a model on the server. * * * @version $Id: remove.php 268 2006-05-15 05:28:09Z tgauss $ * @author Phil Dawes <*****@*****.**> * * @package netapi * @todo nothing * @access public */ function removeFromModel($model, $contenttype, $postdata) { $p = getParser($contenttype); $m = $p->parse2model($postdata); $it = $m->getStatementIterator(); while ($it->hasNext()) { $statement = $it->next(); $model->remove($statement); } echo "200 - The data has been removed from the model."; }
/** * The add operation allows the user to add statements to a model on the server. * * @version $Id: add.php 268 2006-05-15 05:28:09Z tgauss $ * @author Phil Dawes <*****@*****.**> * * @package netapi * @todo nothing * @access public */ function addStatementsToModel($model, $contenttype, $postdata) { $p = getParser($contenttype); $m = $p->parse2model($postdata); $it = $m->getStatementIterator(); while ($it->hasNext()) { $statement = $it->next(); $model->add($statement); } echo "200 - The data has been added to the model."; }
function scrape($link = 'http://google.com', $logger = null) { $browsers = array('Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.4; en; rv:1.9.0.19) Gecko/2011091218 Camino/2.0.9 (like Firefox/3.0.19)', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.97 Safari/537.22', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.12 (KHTML, like Gecko) Chrome/24.0.1273.0 Safari/537.12', 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/420+ (KHTML, like Gecko)', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.0.3) Gecko/2008092414 Firefox/3.0.3', 'Opera/10.00 (X11; Linux i686 ; U; en) Presto/2.2.0', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.00', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-us) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31'); $browser = $browsers[array_rand($browsers)]; if ($logger) { $logger->info('scraping ' . $link . ' with ' . $browser); } // create HTML DOM try { $opts = array('http' => array('method' => "GET", 'header' => "User-Agent: " . $browser . "\r\n")); $context = stream_context_create($opts); $urlContents = file_get_contents($link, false, $context); $html = str_get_html($urlContents); $logger->info('html content ' . $html); } catch (Exception $e) { $logger->info('error ' . $e->getMessage()); return array(); } $parser = getParser($link); $annonces = $parser->parse($html); $html->clear(); unset($html); return $annonces; }