/**
  * Scrape the given topic url for all the posts in a topic and return as xml.
  */
 private function scrape_topic($link)
 {
     $scraper = new Scraper($link);
     $scraper->run();
     $topic_scraper = new GoogleGroupsTopicScraper($scraper->html);
     $topic = $topic_scraper->run();
     $i = 0;
     $xml = '';
     if (is_array($topic)) {
         foreach ($topic as $detail) {
             $xml .= "      <post idx=\"{$i}\">\n";
             $xml .= '        <author>' . $detail['author'] . "</author>\n";
             $xml .= '        <email>' . $detail['email'] . "</email>\n";
             $xml .= '        <date>' . $detail['date'] . "</date>\n";
             $xml .= '        <timestamp>' . $detail['timestamp'] . "</timestamp>\n";
             $xml .= "        <body>\n";
             $xml .= "<![CDATA[\n" . $detail['body'] . "\n]]>\n";
             $xml .= "        </body>\n";
             $xml .= "      </post>\n";
             $i++;
         }
     } else {
         print "ERROR: bad topic (url={$link})\n";
     }
     return $xml;
 }
Exemplo n.º 2
0
        //
        curl_setopt($ch, CURLOPT_AUTOREFERER, true);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        //
        curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, true);
        curl_setopt($ch, CURLOPT_CURLOPT_MAXREDIRS, 10);
        $this->result = curl_exec($ch);
        curl_close($ch);
    }
    private function exec_FGC()
    {
        $this->result = file_get_contents($this->url);
    }
    public function run()
    {
        switch (CURL_ENABLED) {
            case true:
                $this->exec_CURL();
                break;
            case false:
                $this->exec_FGC();
                break;
        }
    }
}
$url = $_GET['url'];
$scraper = new Scraper($url);
$scraper->run();
echo '<pre>';
print_r($scraper->result);
echo '</pre>';