public function go() { $start_url = $this->url; $c = new Crawler($start_url); $c->go2linewhere('<p><a href="'); $c->close(); $ledak = explode('<a href="', $c->curline); for ($i = 1; $i < count($ledak); ++$i) { $aurl = Crawler::cutuntil($ledak[$i], '"'); $aurl = str_replace('http://hentaifromhell.net/redirect.html?', '', $aurl); echo "<a href='{$aurl}'>{$aurl}</a><br />\n"; flush(); /* $basename = Crawler::cutuntillast($aurl, '/'); if (!in_array($basename, $this->blacklist)) { $c = new Crawler($aurl); $c->go2linewhere('id="thepic"'); $imgurl = $c->getbetween('SRC="', '"'); $c->close(); echo "<a href='$basename/$imgurl'>".Crawler::n($i,3).".jpg</a><br />\n"; flush(); } else { echo "$i blacklisted server<br/>";flush(); } */ } }
} break; case 'phase4': // folder yg -original$ dan -reprint$ digabung jdai satu // filenamenya diprefix ^original- dan ^reprint- $dirname = 'Carl Barks'; $subdirs = scandir($dirname); $n = count($subdirs); $x = 0; for ($i = 0; $i < $n; ++$i) { // jika berakhiran -original$, // asumsi terurut alfabet menurun jadi -original dulu baru persis di bawahnya -reprint if (preg_match('/\\-original$/', $subdirs[$i]) && preg_match('/\\-reprint/', $subdirs[$i + 1])) { $orig = $subdirs[$i]; $repr = $subdirs[$i + 1]; $cleanname = Crawler::cutuntil($orig, '-original'); exec("mkdir \"{$dirname}\\{$cleanname}\""); echo $cleanname . "\n"; //debug // baca isi original dan move ke tempat dan nama baru $origfiles = scandir($dirname . '\\' . $orig); foreach ($origfiles as $filename) { if ($filename != '.' && $filename != '..') { $from = $dirname . '\\' . $orig . '\\' . $filename; $to = $dirname . '\\' . $cleanname . '\\original-' . $filename; exec("move \"{$from}\" \"{$to}\""); echo "From {$from}; To {$to}; \n"; } } exec("rmdir \"{$dirname}\\{$orig}\""); $reprfiles = scandir($dirname . '\\' . $repr);
$ch = Crawler::extract($line, "value='", "'"); if ($ch) { $chapters[$ch] = $ch; } } else { if (Crawler::is_there($line, '</select>')) { break; } } } } else { $c->go_to('name="chapter"'); $ledak = explode('value="', $c->curline); $chapters = array(); for ($i = 1; $i < count($ledak); $i++) { $ch = Crawler::cutuntil($ledak[$i], '"'); $chapters[$ch] = $ch; } } } $c->close(); foreach ($chapters as $key => $val) { echo '<input type="checkbox" name="chapters[' . $key . ']" value="' . $val . '" />' . $key . '<input type="text" name="infixs[' . $key . ']" value="' . $key . '" />' . '<br/>' . "\n"; flush(); } } else { if (@$stage2) { foreach ($chapters as $key => $val) { echo '<input type="checkbox" name="chapters[' . $key . ']" value="' . $val . '" checked="checked"/>' . $val . '<input type="text" name="infixs[' . $key . ']" value="' . $infixs[$key] . '" />' . '<br/>' . "\n"; flush(); }
$sitename = "http://eatmanga.com"; $pref = $_POST['base']; if (!Crawler::is_there($pref, '/index.php/')) { $pref = str_replace($sitename . '/Manga', $sitename . '/index.php/Manga', $pref); } if ($base) { $finish = false; $page = 1; while (!$finish) { echo "{$base}<br/>\n"; flush(); $c = new Crawler($base); $c->go2linewhere('mangaviewer_toppest_navig'); if (Crawler::is_there($c->curline, ' ›')) { $finish = false; $base = $pref . '/?page=' . ++$page; } else { $finish = true; } $ledak = explode('<img src="', $c->curline); $c->close(); for ($i = 1; $i < count($ledak); ++$i) { $segm = $ledak[$i]; $parturl = Crawler::cutuntil($segm, '"'); $parturl = str_replace('index.php', 'mangas', $parturl); $parturl = str_replace('?action=thumb', '', $parturl); echo '<a href="' . $sitename . $parturl . '">' . $prefix . '-' . Crawler::n($chapter, 3) . '-' . basename($parturl) . '</a><br/>' . "\n"; flush(); } } }
</form> <?php $masih = true; while ($masih) { echo "{$start_url}<br/>\n"; flush(); $craw = new Crawler($start_url); //first get the pictures $craw->go2linewhere('class="photogallery-celeb"'); $craw->readline(); $line = $craw->curline; //echo $line; //echo '<br />HOI<br />'; $ledakan = explode('<img src="', $line); for ($i = 1; $i < count($ledakan); $i++) { $imgurl = str_replace('/t/', '/', Crawler::cutuntil($ledakan[$i], '"')); $file = basename($imgurl); echo "<a href='{$imgurl}'>{$file}</a><br />\n"; } //then check the next link $craw->go_to('class="arrow">»</a>'); $url = $craw->getbetweenlast('<a href="', '"'); if ($url == '#') { $masih = false; } else { $start_url = dirname($start_url . 'a') . '/' . $url; } /* $craw->go2lineor(array('<span class="global_pref_next_no_link">»', 'class="global_pref_next">»')); if (strpos($craw->curline, '<span class="global_pref_next_no_link">»') !== false) { $masih = false;
function crawl_1_page($start_url) { global $prefix; global $bas; $cr = new Crawler($start_url); /* echo $cr->readline();flush(); while(!feof($cr->stream)) { } exit(0); */ $cr->go2linewhere('headerSelect'); $cr->readline(); $line = $cr->curline; $cr->close(); $chap = Crawler::cutfromlast1($start_url, '/'); if (strpos($chap, '.') === false) { $chap = Crawler::n($chap, 3); } else { $a = explode('.', $chap); $a[0] = Crawler::n($a[0], 3); $chap = implode('.', $a); } $pi = 1; // page i $ledak = explode('value="', $line); $pages = array(); for ($i = 1; $i < count($ledak); $i++) { $uurl = Crawler::cutuntil($ledak[$i], '"'); $key = Crawler::cutfromlast1($uurl, '/'); $pages[$key] = strpos($uurl, 'http://') === 0 ? $uurl : $bas . $uurl; } //print_r($pages);flush(); $results = array(); foreach ($pages as $pagenum => $new_url) { $berhasil = false; while (!$berhasil) { $cr = new Crawler($new_url); //echo "URL:$new_url<br/>\n";flush(); $cr->go2linewhere('id="readerPage"'); if ($cr->strpos('<img src="') === false) { $cr->readline(); } $line = $cr->curline; $cr->close(); $img_url = Crawler::extract($line, '<img src="', '"'); //echo "IMG:$img_url<br/>\n";flush(); $real_name = basename($img_url); $ext = Crawler::cutfromlast($img_url, '.'); //$filename = $prefix . '-' . $chap . '-' . Crawler::n($pagenum, 2) . $ext; $filename = $prefix . '-' . $chap . '-' . urldecode($real_name); $val = $img_url; if (!empty($val)) { $berhasil = true; $key = $filename; $results[$filename] = $img_url; ?> <a href="<?php echo $val; ?> "><?php echo $key; ?> </a><br /> <?php flush(); } else { } } } }
echo '<tr><td colspan="2">Progress.. '; $c = new Crawler($base); $c->go_to('class="datarow"'); $chapters = array(); $descriptions = array(); $infix = array(); while ($line = $c->readline()) { if (Crawler::is_there($line, '"datarow-1"')) { $line1 = $line; $c->readline(); $line2 = $c->readline(); //$line2 = $c->curline; $chapters[] = Crawler::extract($line2, 'href="', '"'); $desc = Crawler::extract($line1, '>', '</td>'); $descriptions[] = $desc; $ifx = Crawler::cutuntil($desc, ' '); $infix[] = $ifx; echo $ifx . '.. '; flush(); } else { if (Crawler::is_there($line, '</table>')) { break; } } } $c->close(); echo 'End</td></tr>'; flush(); //$chapters = array_reverse($chapters); //$descriptions = array_reverse($descriptions); //$infix = array_reverse($infix);
public static function extract_to_array($string, $from = null, $to = null) { $ledak = explode($from, $string); $n = count($ledak); $temp = array(); for ($i = 1; $i < $n; ++$i) { $temp[] = Crawler::cutuntil($ledak[$i], $to); } return $temp; }
function animephile_realm($url) { $base = 'http://www.animephile.com'; $name = basename($url); $name = Crawler::cutuntil($name, '.'); if (strpos($url, '/hentai-doujinshi/')) { $c = new Crawler($url); $c->go_to('id="mainimage"'); preg_match('/"viewerLabel"> of (\\d+)<\\//', $c->curline, $m); $max = $m[1]; for ($i = 1; $i <= $max; $i++) { $c = new Crawler($url . '?page=' . $i); $c->go_to('id="mainimage"'); // current image preg_match('/id="mainimage" src="([^"]+)"/', $c->curline, $m); $r = $m[1]; echo "<a href=\"{$base}{$r}\">{$name}</a><br />\n"; } } else { $c = new Crawler($url); $c->go_to('id="gallery"'); $raw = Crawler::extract_to_array($c->curline, 'src="', '"'); foreach ($raw as $r) { $r = str_replace('/thumbs/', '/', $r); $name = basename(dirname($r)); echo "<a href=\"{$r}\">{$name}</a><br />\n"; } } }
} ?> <html> <head><meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" /> </head> <body> <form action="" method="post"> Start url: <input type="text" name="start_url" value="<?php echo isset($start_url) ? $start_url : ''; ?> " /> <input type="submit" value="Submit" /> </form> <?php if ($start_url) { $start_url = Crawler::is_there($start_url, '#') ? Crawler::cutuntil($start_url, '#') : $start_url; $ledak = explode('/', $start_url); if (count($ledak) == 5) { // 1 album crawl_album($start_url); } else { if (count($ledak) == 4) { // 1 gallery $c = new Crawler($start_url); $c->go_to('<noscript>'); $c->go_to('<noscript>'); $links = array(); while ($line = $c->readline()) { if (Crawler::is_there($line, '<a href="')) { $url = Crawler::extract($line, '<a href="', '"'); $c->readline();
<body> <form action="" method="post"> Start url: <input type="text" name="start_url" value="<?php echo isset($start_url) ? $start_url : ''; ?> " /> <input type="submit" /> </form> <?php $basename = 'http://bluelaguna.net'; if ($start_url) { $c = new Crawler($start_url); $c->go2linewhere('<iframe id="rectangle"'); $c->close(); $ledak = explode('<a href="', $c->curline); $big = array(); for ($i = 1; $i < count($ledak); ++$i) { $aurl = $basename . Crawler::cutuntil($ledak[$i], '"'); echo "{$aurl}<br />"; $c = new Crawler($aurl); $c->go2linewhere('<iframe id="rectangle"'); $c->close(); $ledak2 = explode('<a href="', $c->curline); for ($j = 1; $j < count($ledak2); ++$j) { $burl = Crawler::cutuntil($ledak2[$j], '"'); echo '<a href="' . htmlentities($burl) . '">' . basename($burl) . "</a><br />\n"; } echo "<br />\n"; flush(); } }