function crawl_1_chapter($url, $chapter) { global $sitename; global $prefix; $c = new Crawler($url); $c->go_to('name="pagejump"'); $pages = array(); while ($line = $c->readline()) { if (Crawler::is_there($line, '<option')) { $pages[] = Crawler::extract($line, 'value="', '"'); } else { if (Crawler::is_there($line, '</select>')) { break; } } } $c->go_to('id="nextpage"'); $c->readline(); $img = $c->getbetween('src="', '"'); $c->close(); $img_base = dirname($img); $ext = '.jpg'; $chapter = Crawler::pad($chapter, 3); foreach ($pages as $page) { echo "<a href='{$img_base}/{$page}{$ext}'>{$prefix}-{$chapter}-{$page}{$ext}</a><br/>\n"; flush(); } //print_r($pages);flush(); }
public function extract_page($url) { echo $url, "<br />\n"; flush(); $c = new Crawler($url); $c->go2linewhere('<a accesskey="v"'); $h = $c->getbetween('<img src="', '"'); echo '<a href="' . $h . '">' . basename($h) . '</a>' . "<br />\n"; flush(); $c->close(); }
function crawl_indowebster($url) { //echo "'$url'"; $craw = new Crawler($url); $craw->go2lineregexor('/(<\\/div><\\/a><\\/div><\\/div>)/', 1, 'href="#idws7"'); $setring = $craw->getbetween('location.href=\'', '\''); $path = Crawler::extract($setring, 'path=', '&'); $file_orig = Crawler::cutafter($setring, 'file_orig='); $craw->close(); return '<a href="' . dirname($setring) . '/' . $path . '">' . rawurldecode($file_orig) . '</a>'; }
public function mangareader_1_page($fil, $url, $prefix, $chapter) { $chapter = Crawler::pad($chapter, 3); $c = new Crawler($fil); $c->go_to('width="800"'); $img = $c->getbetween('src="', '"'); preg_match('/(\\d+\\.\\w+)$/', basename($img), $m); $iname = $m[1]; $c->close(); $name = $prefix . '-' . $chapter . '-' . $iname; return array($name => $img); }
public function mangareader_1_page($fil, $url, $chapter) { $prefix = $this->prefix; $chapter = Crawler::pad($chapter, 3); $c = new Crawler($fil); $c->go_to('width="800"'); $img = $c->getbetween('src="', '"'); // if (@$_GET['show_url']) echo "<a href='$url'>URL</a> "; preg_match('/(\\d+\\.\\w+)$/', basename($img), $m); $iname = $m[1]; echo '<li><a href="' . $img . '">' . $prefix . '-' . $chapter . '-' . $iname . '</a>' . "</li>\n"; $c->close(); }
function crawl_one_page($url) { $nims = array(); $kraw = new Crawler($url); $kraw->go2linewhere('------------------------------------------'); $kraw->go2linewhere('------------------------------------------'); $kraw->readline(); while ($kraw->strpos('------------------------------------------') === false) { $nims[] = $kraw->getbetween(' ', ' '); $kraw->readline(); } $kraw->close(); return $nims; }
public function go() { // http://www.fakku.net/viewonline.php?id=2589 // pake curl $base = 'http://www.fakku.net'; // $this->url = str_replace('viewmanga.php', 'viewonline.php', $this->url); if (!preg_match('/\\/read$/', $this->url)) { $this->url .= '/read'; } /* $ch = curl_init($this->url); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); file_put_contents('fakku.temp', curl_exec($ch)); curl_close($ch); */ $craw = new Crawler($this->url, array('use_curl' => true, 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13')); $craw->go_to('var data = {'); $json = Crawler::extract($craw->curline, ' = ', ';'); $obj = json_decode($json); /* $craw->go_to('var mirror = '); $mirror = $craw->getbetween("'", "'"); $craw->go_to('var mirror = '); $mirror2 = $craw->getbetween("'", "'"); if ($mirror2) $mirror = $mirror2; */ // 2012-05-06 fakku berubah $craw->go_to('function imgpath('); $craw->go_to('return \''); $imgpath = $craw->getbetween("return '", "';"); $craw->close(); $dir = basename(dirname($this->url)); foreach ($obj->thumbs as $key => $val) { $filename = Crawler::pad($key + 1, 3); // $img = $mirror . '/' . $obj->meta->dir . 'images/' . $filename; $img = str_replace("' + x + '", $filename, $imgpath); $text = $dir; echo "<a href='{$img}'>{$text}</a><br/>\n"; flush(); } }
function crawl1page($url) { echo 'Entering ' . $url . '<br/>'; flush(); $c = new Crawler($url); $c->go2linewhere('<div class="ngg-gallery-thumbnail"'); $c->readline(); $sample = $c->getbetween('href="', '"'); $c->close(); $dir = dirname($sample); if (!$dir) { return; } $folder = substr($dir, strrpos($dir, '/') + 1); $dir = dirname($dir) . '/' . rawurlencode($folder) . '/'; echo 'Dir:' . $dir . '<br/>' . "\n"; flush(); $c = new Crawler($dir); $c->go2linewhere('<ul>'); $c->readline(); while ($line = $c->readline()) { //echo $line;flush(); if (strpos($line, '</ul>') !== false) { break; } else { if (strpos($line, '"thumbs/"')) { break; } } $filename = Crawler::extract($line, 'href="', '"'); echo '<a href="' . $dir . $filename . '">' . rawurldecode($filename) . '</a><br/>' . "\n"; flush(); } $c->close(); echo '<br/>' . "\n"; flush(); }
function mangatopia_chapters($chapters, $infixs) { global $base; global $sitename; global $prefix; $base = dirname(dirname(dirname($base))); // $chapters = array_reverse($chapters); // $infixs = array_reverse($infixs); foreach ($chapters as $key => $val) { $url = $base . '/' . $val . '/page/01'; echo "{$url}<br/>"; $chapter = $val; $c = new Crawler($url); $c->go_to('id="pages"'); $pages = array(); while ($line = $c->readline()) { if (Crawler::is_there($line, 'value=')) { $pages[] = Crawler::extract($line, 'value="', '"'); } else { if (Crawler::is_there($line, '</select>')) { break; } } } $c->close(); $url = dirname($url); //print_r($pages);flush(); foreach ($pages as $page) { //echo "$url/$page<br/>";flush(); do { try { $c = new Crawler($url . '/' . $page); echo '1'; flush(); $c->go_to('class="page"'); echo '2'; flush(); $img = $c->getbetween('<img src="', '"'); echo '3'; flush(); //$ifx = Crawler::pad($chapter, 3); $ifx = $infixs[$key]; echo '<a href="' . $sitename . '/' . $img . '">' . $prefix . '-' . $ifx . '-' . basename($img) . '</a>' . "<br/>\n"; flush(); $c->close(); $berhasil = true; } catch (Exception $e) { $berhasil = false; } } while (!$berhasil); } } }
<?php require 'crawler.php'; //http://www.viraindo.com/ $site = 'http://www.viraindo.com/'; $c = new Crawler($site); $c->go_to('WIDTH=273'); while ($line = $c->readline()) { if (Crawler::is_there($line, 'href="')) { $page = Crawler::extract($line, 'href="', '"'); $ket = Crawler::extract($line, '">', '</a'); $d = new Crawler($site . $page); $d->go_to('<img src="'); $img = $d->getbetween('<img src="', '"'); echo "<a href='{$site}{$img}'>{$ket}</a><br/>\n"; flush(); $d->close(); } else { if (Crawler::is_there($line, '<p></TD></TR>')) { break; } } } $c->close();
if ($c->stream) { $lines = $c->getalllineswhere('>>'); $c->close(); unset($c); //echo '$lines:', htmlspecialchars(print_r($lines, true)), '<br />'; flush(); foreach ($lines as $line) { $bigC++; if ($bigC >= $fromC) { $link = Crawler::extract($line, 'href="', '"'); echo 'Opening ', $bigC, ' ', $link, '<br />'; flush(); $c = new Crawler($link); if ($c->stream) { $c->go2linewhere('time SG_txtc'); $time = $c->getbetween('>(', ')<'); $blines = $c->getalllineswhere('/orignal/'); $c->close(); unset($c); //echo '$blines:', htmlspecialchars(print_r($blines, true)),'<br />'; flush(); foreach ($blines as $bline) { if (strpos($bline, 'url=') === false) { $blink = Crawler::extract($bline, 'HREF="', '"'); } else { if (strpos($bline, 'url=') !== false) { $blink = Crawler::extract($bline, 'url=', '"'); } } $blink = str_replace('&690', '', $blink); echo '<a href="', $blink, '">', $time, "</a><br />\n";
<?php require_once 'crawler.php'; //class Crawler $base = 'http://gravure.ecchi-squad.net/images/gravure/'; $folders = array(); $craw = new Crawler($base); $craw->go2linewhere('<img src="/icons/folder.gif"'); while (strpos($craw->curline, '</pre>') === false) { $folders[] = $craw->getbetween('<a href="', '"'); $craw->readline(); } $craw->close(); //print_r($folders); foreach ($folders as $folder) { unset($craw); $craw = new Crawler($base . $folder); $files = array(); $craw->go2linewhere('<img src="/icons/image2.gif"'); while (strpos($craw->curline, '</pre>') === false) { $files[] = $craw->getbetween('<a href="', '"'); $craw->readline(); } $craw->close(); $fold = substr($folder, 0, strlen($folder) - 1); foreach ($files as $file) { echo "<a href=\"{$base}{$folder}{$file}\">{$fold}</a><br />\n"; } flush(); }
<html> <body> <?php require_once "crawler.php"; $istart = 1; $ifinish = 399; $start = 'http://asianchicki.com/Girl.aspx?ID='; extract($_GET); extract($_POST); for ($i = $istart; $i <= $ifinish; $i++) { $turl = $start . $i; $c = new Crawler($turl); if ($c->stream) { $c->go2linewhere('Thumbnail'); $c->close(); $nama = $c->getbetween('ctl00_ContentPlaceHolder1_lblName">', '</span'); $ledak = explode('FileName="', $c->curline); //echo "<pre>{$c->curline}</pre><br />\n"; $ccount = count($ledak); for ($j = 1; $j < $ccount; $j++) { $iurl = Crawler::extract($ledak[$j], 'src="', '"'); $iurl = str_replace('Thumbnail', 'Viewer', $iurl); $parsed = Crawler::parse_url($iurl); echo '<a href="' . $iurl . '">' . $nama . '</a>' . "<br />\n"; } } flush(); }
<?php require_once "crawler.php"; $base = 'http://www.ez-wallpaper.org'; $berhenti = 0; $url = $base; while (!$berhenti) { echo "\nURL:{$url}\n"; $c = new Crawler($url); $c->readline(); while ($line = $c->readline()) { if ($c->strpos('nodeTitle') !== false) { $href = $c->getbetween('<a href="', '"'); $c2 = new Crawler($base . $href); $c2->go2linewhere('pageTitle'); $title = $c2->getbetween('>', '<'); $c2->go2linewhere('node_images'); $ledak = explode('<a href="', $c2->curline); for ($i = 1; $i < count($ledak); $i++) { $ahref = substr($ledak[$i], 0, strpos($ledak[$i], '"')); echo "<a href='{$ahref}'>{$title}</a><br />\n"; } //echo $c2->curline; $c2->close(); } else { if ($c->strpos('Go to next page') !== false) { echo "\nADA NEXT\n"; $url = $base . $c->getbetweenlast('</span><a href="', '"'); break; } else { if ($c->strpos('Go to previous page') !== false) {
require_once "crawler.php"; $start_date = '2009-03-10'; $base_url = 'http://www.dilbert.com'; $middle_url = '/strips/comic/'; extract($_GET); $selesai = false; $url = $base_url . $middle_url . $start_date; while (!$selesai) { $ada_next = false; $c = new Crawler($url); echo "URL is {$url}<br />\n"; flush(); $c->go2lineor(array('STR_Content', 'STR_Prev')); //echo "go2lineor selesai\n";flush(); if ($c->strpos('STR_Prev') !== false) { //masih ada next $ada_next = true; $url = $base_url . $c->getbetween('<a href="', '"'); $c->go2linewhere('STR_Content'); } else { $ada_next = false; $selesai = true; } $c->readline(); $img = $c->getbetween('<img src="', '"'); echo "<a href='{$base_url}{$img}'>{$start_date}</a><br />\n"; $start_date = Crawler::extract($url, 'comic/', '/'); $c->close(); echo "Closed\n"; flush(); }