private function download_all($base, $destination) { $sitename = "http://www.imagefap.com"; $finish = false; $firstbase = $base; $i = 1; while (!$finish) { $c = new Crawler($base); echo $base . "\n"; $c->go_to(array('<table style=', ':: next ::')); if (Crawler::is_there($c->curline, ':: next ::')) { $finish = false; $urld = Crawler::extract($c->curline, 'href="', '"'); $base = $firstbase . html_entity_decode($urld); $c->go_to('<table style='); } else { $finish = true; } while ($line = $c->readline()) { if (Crawler::is_there($line, 'border=0')) { $img = Crawler::extract($line, 'src="', '"'); $img = str_replace('/thumb/', '/full/', $img); $img = preg_replace('/\\/x\\d\\./', '/', $img); $filename = basename($img); $ext = Crawler::cutfromlast($filename, '.'); $text = Crawler::n($i++, 4); $this->save_to($img, "{$destination}/{$text}{$ext}"); } else { if (Crawler::is_there($line, '</form>')) { break; } } } $c->close(); } }
$sitename = "http://eatmanga.com"; $pref = $_POST['base']; if (!Crawler::is_there($pref, '/index.php/')) { $pref = str_replace($sitename . '/Manga', $sitename . '/index.php/Manga', $pref); } if ($base) { $finish = false; $page = 1; while (!$finish) { echo "{$base}<br/>\n"; flush(); $c = new Crawler($base); $c->go2linewhere('mangaviewer_toppest_navig'); if (Crawler::is_there($c->curline, ' ›')) { $finish = false; $base = $pref . '/?page=' . ++$page; } else { $finish = true; } $ledak = explode('<img src="', $c->curline); $c->close(); for ($i = 1; $i < count($ledak); ++$i) { $segm = $ledak[$i]; $parturl = Crawler::cutuntil($segm, '"'); $parturl = str_replace('index.php', 'mangas', $parturl); $parturl = str_replace('?action=thumb', '', $parturl); echo '<a href="' . $sitename . $parturl . '">' . $prefix . '-' . Crawler::n($chapter, 3) . '-' . basename($parturl) . '</a><br/>' . "\n"; flush(); } } }
function crawl_1_page($start_url) { global $prefix; global $bas; $cr = new Crawler($start_url); /* echo $cr->readline();flush(); while(!feof($cr->stream)) { } exit(0); */ $cr->go2linewhere('headerSelect'); $cr->readline(); $line = $cr->curline; $cr->close(); $chap = Crawler::cutfromlast1($start_url, '/'); if (strpos($chap, '.') === false) { $chap = Crawler::n($chap, 3); } else { $a = explode('.', $chap); $a[0] = Crawler::n($a[0], 3); $chap = implode('.', $a); } $pi = 1; // page i $ledak = explode('value="', $line); $pages = array(); for ($i = 1; $i < count($ledak); $i++) { $uurl = Crawler::cutuntil($ledak[$i], '"'); $key = Crawler::cutfromlast1($uurl, '/'); $pages[$key] = strpos($uurl, 'http://') === 0 ? $uurl : $bas . $uurl; } //print_r($pages);flush(); $results = array(); foreach ($pages as $pagenum => $new_url) { $berhasil = false; while (!$berhasil) { $cr = new Crawler($new_url); //echo "URL:$new_url<br/>\n";flush(); $cr->go2linewhere('id="readerPage"'); if ($cr->strpos('<img src="') === false) { $cr->readline(); } $line = $cr->curline; $cr->close(); $img_url = Crawler::extract($line, '<img src="', '"'); //echo "IMG:$img_url<br/>\n";flush(); $real_name = basename($img_url); $ext = Crawler::cutfromlast($img_url, '.'); //$filename = $prefix . '-' . $chap . '-' . Crawler::n($pagenum, 2) . $ext; $filename = $prefix . '-' . $chap . '-' . urldecode($real_name); $val = $img_url; if (!empty($val)) { $berhasil = true; $key = $filename; $results[$filename] = $img_url; ?> <a href="<?php echo $val; ?> "><?php echo $key; ?> </a><br /> <?php flush(); } else { } } } }
/** * $n: number to be padded * $l: goal length * $ic: is chapter? if true then "305.5",4 will become "0305.5" */ public static function pad($n, $l, $ic = true) { if ($ic) { if (strpos($n, '.') !== false) { $temp = Crawler::cutuntil($n, '.'); return Crawler::n($temp, $l) . Crawler::cutfrom($n, '.'); } else { return Crawler::n($n, $l); } } else { return Crawler::n($n, $l); } }
public function crawl_page($url, $text) { echo "Entering '{$url}'<br/>\n"; flush(); $c = new Crawler($url, true); $dah_gambar = false; $i = 1; while ($line = $c->readline()) { if (preg_match('/pic dashedOn/i', $line)) { $dah_gambar = true; $line = $c->readline(); preg_match('/<img src="([^"]+)"/i', $line, $matches); $img = $matches[1]; $tempi = Crawler::n($i++, 3) . substr($img, strrpos($img, '.')); echo "<a href='{$img}'>{$text}</a><br/>\n"; flush(); } else { if (preg_match('/commentButton/i', $line) && $dah_gambar) { break; } } } $c->close(); unset($c); }
function ichan_realm($start_url) { // http://ichan.org/l/res/33005.html // <a href="http://c3a56840.linkbucks.com/url/http://ichan.org/l/src/128001023914.jpg"> $c = new Crawler($start_url); $i = 0; $c->go_to('class="filesize"'); while ($line = $c->readline()) { if (Crawler::is_there($line, '<a href') && Crawler::is_there($line, 'http://ichan.org/') && Crawler::is_there($line, '/src/')) { $raw = Crawler::extract($line, '"', '"'); $img = preg_replace('/http:\\/\\/[\\w\\.]+\\/url\\//', '', $raw); $text = Crawler::n(++$i, 3) . '.jpg'; echo "<a href='{$img}'>{$text}</a><br/>\n"; } else { if (Crawler::is_there($line, '"footerbg"')) { break; } } } $c->close(); }