function crawl_indowebster($url) { //echo "'$url'"; $craw = new Crawler($url); $craw->go2lineregexor('/(<\\/div><\\/a><\\/div><\\/div>)/', 1, 'href="#idws7"'); $setring = $craw->getbetween('location.href=\'', '\''); $path = Crawler::extract($setring, 'path=', '&'); $file_orig = Crawler::cutafter($setring, 'file_orig='); $craw->close(); return '<a href="' . dirname($setring) . '/' . $path . '">' . rawurldecode($file_orig) . '</a>'; }
public function go() { $mark1 = '<a target="_blank" title="Show fullsized image" href='; $mark2 = '<a title="Next Image" rel="next" href='; $host = 'http://lu.scio.us'; $finish = false; $number = 0; $url = $this->url; preg_match('/\\/([^\\/]+)\\/page\\/1/', $url, $m); $text = $m[1]; while (!$finish) { echo $url . "<br/>\n"; flush(); $c = new Crawler($url); $c->go_to('id="pid_'); while ($line = $c->readline()) { if (Crawler::is_there($line, 'src="')) { $img = Crawler::extract($line, 'src="', '"'); $img = str_replace('thumb_100_', @$_GET['big'] ? '' : 'normal__', $img); $num = Crawler::pad(++$number, 3); $filnm = basename($img); $ext = Crawler::cutafter($filnm, '.'); // $text = $num . $ext; // preg_match('/\/(\d+\/\d+)\//', $img, $m); // $text = $m[1]; echo "<a href='{$img}'>{$text}</a><br/>\n"; flush(); } else { if (Crawler::is_there($line, '</ul>')) { break; } } } $c->go_to('class="pager"'); $c->readline(); if (Crawler::is_there($c->curline, 'Pager_next')) { $finish = false; $url = $host . Crawler::extract($c->curline, '<a rel="next" href="', '"'); } else { $finish = true; } $c->close(); } }
echo @$prefix; ?> "><br /> <input type="submit"> </form> <?php //http://mangahead.com/Manga-English-Scan/History-Strongest-Disciple-Kenichi/Historys-Strongest-Disciple-Kenichi-392-English-Scan $sitename = "http://mangahead.com"; $pref = @$_POST['base']; if (!Crawler::is_there($pref, '/index.php/')) { $pref = str_replace($sitename . '/Manga', $sitename . '/index.php/Manga', $pref); } if (@$base) { $finish = false; if (Crawler::is_there($pref, '?page=')) { $page = Crawler::cutafter($pref, '?page='); $pref = Crawler::cutuntil($pref, '?page='); } else { $page = 1; } while (!$finish) { echo "{$base}<br/>\n"; flush(); $c = new Crawler($base); $c->go2linewhere('mangaviewer_toppest_navig'); if (Crawler::is_there($c->curline, ' ›')) { $finish = false; $base = $pref . '/?page=' . ++$page; } else { $finish = true; }
public static function back_url($full) { return Crawler::cutafter($full, Crawler::site_name($full)); }