$lines = $c->getalllineswhere('>>'); $c->close(); unset($c); //echo '$lines:', htmlspecialchars(print_r($lines, true)), '<br />'; flush(); foreach ($lines as $line) { $bigC++; if ($bigC >= $fromC) { $link = Crawler::extract($line, 'href="', '"'); echo 'Opening ', $bigC, ' ', $link, '<br />'; flush(); $c = new Crawler($link); if ($c->stream) { $c->go2linewhere('time SG_txtc'); $time = $c->getbetween('>(', ')<'); $blines = $c->getalllineswhere('/orignal/'); $c->close(); unset($c); //echo '$blines:', htmlspecialchars(print_r($blines, true)),'<br />'; flush(); foreach ($blines as $bline) { if (strpos($bline, 'url=') === false) { $blink = Crawler::extract($bline, 'HREF="', '"'); } else { if (strpos($bline, 'url=') !== false) { $blink = Crawler::extract($bline, 'url=', '"'); } } $blink = str_replace('&690', '', $blink); echo '<a href="', $blink, '">', $time, "</a><br />\n"; //echo '<a href="', $blink, '">', Crawler::n($bigC, 3), "</a><br />\n";
require_once "crawler.php"; $istart = 1; $ifinish = 12; $start = 'http://photo.blog.sina.com.cn/u/1264425925/page'; $fromC = 0; extract($_GET); extract($_POST); $bigC = 0; for ($i = $istart; $i <= $ifinish; $i++) { $bigC = $i * 1000; $turl = $start . $i; echo $i, $turl, "<br />\n"; flush(); $c = new Crawler($turl); if ($c->stream) { $lines = $c->getalllineswhere('pt_border'); $c->close(); //echo '$lines:', htmlspecialchars(print_r($lines, true)), '<br />'; flush(); foreach ($lines as $line) { $bigC++; if ($bigC >= $fromC) { $link = Crawler::extract($line, '<a href="', '"'); $imgurl = str_replace('photo.blog.sina.com.cn', 'static10.photo.sina.com.cn', $link); $imgurl = str_replace('/photo/', '/orignal/', $imgurl); echo "<a href='{$imgurl}'>" . basename($imgurl) . "</a><br/>\n"; } } } flush(); }