Esempio n. 1
0
function crawl_1_chapter($url, $chapter)
{
    global $sitename;
    global $prefix;
    $c = new Crawler($url);
    $c->go_to('name="pagejump"');
    $pages = array();
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, '<option')) {
            $pages[] = Crawler::extract($line, 'value="', '"');
        } else {
            if (Crawler::is_there($line, '</select>')) {
                break;
            }
        }
    }
    $c->go_to('id="nextpage"');
    $c->readline();
    $img = $c->getbetween('src="', '"');
    $c->close();
    $img_base = dirname($img);
    $ext = '.jpg';
    $chapter = Crawler::pad($chapter, 3);
    foreach ($pages as $page) {
        echo "<a href='{$img_base}/{$page}{$ext}'>{$prefix}-{$chapter}-{$page}{$ext}</a><br/>\n";
        flush();
    }
    //print_r($pages);flush();
}
 public function extract_page($url)
 {
     echo $url, "<br />\n";
     flush();
     $c = new Crawler($url);
     $c->go2linewhere('<a accesskey="v"');
     $h = $c->getbetween('<img src="', '"');
     echo '<a href="' . $h . '">' . basename($h) . '</a>' . "<br />\n";
     flush();
     $c->close();
 }
Esempio n. 3
0
function crawl_indowebster($url)
{
    //echo "'$url'";
    $craw = new Crawler($url);
    $craw->go2lineregexor('/(<\\/div><\\/a><\\/div><\\/div>)/', 1, 'href="#idws7"');
    $setring = $craw->getbetween('location.href=\'', '\'');
    $path = Crawler::extract($setring, 'path=', '&');
    $file_orig = Crawler::cutafter($setring, 'file_orig=');
    $craw->close();
    return '<a href="' . dirname($setring) . '/' . $path . '">' . rawurldecode($file_orig) . '</a>';
}
 public function mangareader_1_page($fil, $url, $prefix, $chapter)
 {
     $chapter = Crawler::pad($chapter, 3);
     $c = new Crawler($fil);
     $c->go_to('width="800"');
     $img = $c->getbetween('src="', '"');
     preg_match('/(\\d+\\.\\w+)$/', basename($img), $m);
     $iname = $m[1];
     $c->close();
     $name = $prefix . '-' . $chapter . '-' . $iname;
     return array($name => $img);
 }
Esempio n. 5
0
 public function mangareader_1_page($fil, $url, $chapter)
 {
     $prefix = $this->prefix;
     $chapter = Crawler::pad($chapter, 3);
     $c = new Crawler($fil);
     $c->go_to('width="800"');
     $img = $c->getbetween('src="', '"');
     // if (@$_GET['show_url']) echo "<a href='$url'>URL</a> ";
     preg_match('/(\\d+\\.\\w+)$/', basename($img), $m);
     $iname = $m[1];
     echo '<li><a href="' . $img . '">' . $prefix . '-' . $chapter . '-' . $iname . '</a>' . "</li>\n";
     $c->close();
 }
Esempio n. 6
0
function crawl_one_page($url)
{
    $nims = array();
    $kraw = new Crawler($url);
    $kraw->go2linewhere('------------------------------------------');
    $kraw->go2linewhere('------------------------------------------');
    $kraw->readline();
    while ($kraw->strpos('------------------------------------------') === false) {
        $nims[] = $kraw->getbetween(' ', '  ');
        $kraw->readline();
    }
    $kraw->close();
    return $nims;
}
Esempio n. 7
0
 public function go()
 {
     // http://www.fakku.net/viewonline.php?id=2589
     // pake curl
     $base = 'http://www.fakku.net';
     // $this->url = str_replace('viewmanga.php', 'viewonline.php', $this->url);
     if (!preg_match('/\\/read$/', $this->url)) {
         $this->url .= '/read';
     }
     /*
     $ch = curl_init($this->url);
     curl_setopt($ch, CURLOPT_HEADER, 0);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     file_put_contents('fakku.temp', curl_exec($ch));
     curl_close($ch);
     */
     $craw = new Crawler($this->url, array('use_curl' => true, 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13'));
     $craw->go_to('var data = {');
     $json = Crawler::extract($craw->curline, ' = ', ';');
     $obj = json_decode($json);
     /*
     $craw->go_to('var mirror = ');
     $mirror = $craw->getbetween("'", "'");
     $craw->go_to('var mirror = ');
     $mirror2 = $craw->getbetween("'", "'");
     if ($mirror2) $mirror = $mirror2;
     */
     // 2012-05-06 fakku berubah
     $craw->go_to('function imgpath(');
     $craw->go_to('return \'');
     $imgpath = $craw->getbetween("return '", "';");
     $craw->close();
     $dir = basename(dirname($this->url));
     foreach ($obj->thumbs as $key => $val) {
         $filename = Crawler::pad($key + 1, 3);
         // $img = $mirror . '/' . $obj->meta->dir . 'images/' . $filename;
         $img = str_replace("' + x + '", $filename, $imgpath);
         $text = $dir;
         echo "<a href='{$img}'>{$text}</a><br/>\n";
         flush();
     }
 }
Esempio n. 8
0
function crawl1page($url)
{
    echo 'Entering ' . $url . '<br/>';
    flush();
    $c = new Crawler($url);
    $c->go2linewhere('<div class="ngg-gallery-thumbnail"');
    $c->readline();
    $sample = $c->getbetween('href="', '"');
    $c->close();
    $dir = dirname($sample);
    if (!$dir) {
        return;
    }
    $folder = substr($dir, strrpos($dir, '/') + 1);
    $dir = dirname($dir) . '/' . rawurlencode($folder) . '/';
    echo 'Dir:' . $dir . '<br/>' . "\n";
    flush();
    $c = new Crawler($dir);
    $c->go2linewhere('<ul>');
    $c->readline();
    while ($line = $c->readline()) {
        //echo $line;flush();
        if (strpos($line, '</ul>') !== false) {
            break;
        } else {
            if (strpos($line, '"thumbs/"')) {
                break;
            }
        }
        $filename = Crawler::extract($line, 'href="', '"');
        echo '<a href="' . $dir . $filename . '">' . rawurldecode($filename) . '</a><br/>' . "\n";
        flush();
    }
    $c->close();
    echo '<br/>' . "\n";
    flush();
}
Esempio n. 9
0
function mangatopia_chapters($chapters, $infixs)
{
    global $base;
    global $sitename;
    global $prefix;
    $base = dirname(dirname(dirname($base)));
    // $chapters = array_reverse($chapters);
    // $infixs = array_reverse($infixs);
    foreach ($chapters as $key => $val) {
        $url = $base . '/' . $val . '/page/01';
        echo "{$url}<br/>";
        $chapter = $val;
        $c = new Crawler($url);
        $c->go_to('id="pages"');
        $pages = array();
        while ($line = $c->readline()) {
            if (Crawler::is_there($line, 'value=')) {
                $pages[] = Crawler::extract($line, 'value="', '"');
            } else {
                if (Crawler::is_there($line, '</select>')) {
                    break;
                }
            }
        }
        $c->close();
        $url = dirname($url);
        //print_r($pages);flush();
        foreach ($pages as $page) {
            //echo "$url/$page<br/>";flush();
            do {
                try {
                    $c = new Crawler($url . '/' . $page);
                    echo '1';
                    flush();
                    $c->go_to('class="page"');
                    echo '2';
                    flush();
                    $img = $c->getbetween('<img src="', '"');
                    echo '3';
                    flush();
                    //$ifx = Crawler::pad($chapter, 3);
                    $ifx = $infixs[$key];
                    echo '<a href="' . $sitename . '/' . $img . '">' . $prefix . '-' . $ifx . '-' . basename($img) . '</a>' . "<br/>\n";
                    flush();
                    $c->close();
                    $berhasil = true;
                } catch (Exception $e) {
                    $berhasil = false;
                }
            } while (!$berhasil);
        }
    }
}
Esempio n. 10
0
<?php

require 'crawler.php';
//http://www.viraindo.com/
$site = 'http://www.viraindo.com/';
$c = new Crawler($site);
$c->go_to('WIDTH=273');
while ($line = $c->readline()) {
    if (Crawler::is_there($line, 'href="')) {
        $page = Crawler::extract($line, 'href="', '"');
        $ket = Crawler::extract($line, '">', '</a');
        $d = new Crawler($site . $page);
        $d->go_to('<img src="');
        $img = $d->getbetween('<img src="', '"');
        echo "<a href='{$site}{$img}'>{$ket}</a><br/>\n";
        flush();
        $d->close();
    } else {
        if (Crawler::is_there($line, '<p></TD></TR>')) {
            break;
        }
    }
}
$c->close();
Esempio n. 11
0
 if ($c->stream) {
     $lines = $c->getalllineswhere('>>');
     $c->close();
     unset($c);
     //echo '$lines:', htmlspecialchars(print_r($lines, true)), '<br />';
     flush();
     foreach ($lines as $line) {
         $bigC++;
         if ($bigC >= $fromC) {
             $link = Crawler::extract($line, 'href="', '"');
             echo 'Opening ', $bigC, ' ', $link, '<br />';
             flush();
             $c = new Crawler($link);
             if ($c->stream) {
                 $c->go2linewhere('time SG_txtc');
                 $time = $c->getbetween('>(', ')<');
                 $blines = $c->getalllineswhere('/orignal/');
                 $c->close();
                 unset($c);
                 //echo '$blines:', htmlspecialchars(print_r($blines, true)),'<br />';
                 flush();
                 foreach ($blines as $bline) {
                     if (strpos($bline, 'url=') === false) {
                         $blink = Crawler::extract($bline, 'HREF="', '"');
                     } else {
                         if (strpos($bline, 'url=') !== false) {
                             $blink = Crawler::extract($bline, 'url=', '"');
                         }
                     }
                     $blink = str_replace('&amp;690', '', $blink);
                     echo '<a href="', $blink, '">', $time, "</a><br />\n";
<?php

require_once 'crawler.php';
//class Crawler
$base = 'http://gravure.ecchi-squad.net/images/gravure/';
$folders = array();
$craw = new Crawler($base);
$craw->go2linewhere('<img src="/icons/folder.gif"');
while (strpos($craw->curline, '</pre>') === false) {
    $folders[] = $craw->getbetween('<a href="', '"');
    $craw->readline();
}
$craw->close();
//print_r($folders);
foreach ($folders as $folder) {
    unset($craw);
    $craw = new Crawler($base . $folder);
    $files = array();
    $craw->go2linewhere('<img src="/icons/image2.gif"');
    while (strpos($craw->curline, '</pre>') === false) {
        $files[] = $craw->getbetween('<a href="', '"');
        $craw->readline();
    }
    $craw->close();
    $fold = substr($folder, 0, strlen($folder) - 1);
    foreach ($files as $file) {
        echo "<a href=\"{$base}{$folder}{$file}\">{$fold}</a><br />\n";
    }
    flush();
}
Esempio n. 13
0
<html>
<body>
<?php 
require_once "crawler.php";
$istart = 1;
$ifinish = 399;
$start = 'http://asianchicki.com/Girl.aspx?ID=';
extract($_GET);
extract($_POST);
for ($i = $istart; $i <= $ifinish; $i++) {
    $turl = $start . $i;
    $c = new Crawler($turl);
    if ($c->stream) {
        $c->go2linewhere('Thumbnail');
        $c->close();
        $nama = $c->getbetween('ctl00_ContentPlaceHolder1_lblName">', '</span');
        $ledak = explode('FileName="', $c->curline);
        //echo "<pre>{$c->curline}</pre><br />\n";
        $ccount = count($ledak);
        for ($j = 1; $j < $ccount; $j++) {
            $iurl = Crawler::extract($ledak[$j], 'src="', '"');
            $iurl = str_replace('Thumbnail', 'Viewer', $iurl);
            $parsed = Crawler::parse_url($iurl);
            echo '<a href="' . $iurl . '">' . $nama . '</a>' . "<br />\n";
        }
    }
    flush();
}
Esempio n. 14
0
<?php

require_once "crawler.php";
$base = 'http://www.ez-wallpaper.org';
$berhenti = 0;
$url = $base;
while (!$berhenti) {
    echo "\nURL:{$url}\n";
    $c = new Crawler($url);
    $c->readline();
    while ($line = $c->readline()) {
        if ($c->strpos('nodeTitle') !== false) {
            $href = $c->getbetween('<a href="', '"');
            $c2 = new Crawler($base . $href);
            $c2->go2linewhere('pageTitle');
            $title = $c2->getbetween('>', '<');
            $c2->go2linewhere('node_images');
            $ledak = explode('<a href="', $c2->curline);
            for ($i = 1; $i < count($ledak); $i++) {
                $ahref = substr($ledak[$i], 0, strpos($ledak[$i], '"'));
                echo "<a href='{$ahref}'>{$title}</a><br />\n";
            }
            //echo $c2->curline;
            $c2->close();
        } else {
            if ($c->strpos('Go to next page') !== false) {
                echo "\nADA NEXT\n";
                $url = $base . $c->getbetweenlast('</span><a href="', '"');
                break;
            } else {
                if ($c->strpos('Go to previous page') !== false) {
Esempio n. 15
0
require_once "crawler.php";
$start_date = '2009-03-10';
$base_url = 'http://www.dilbert.com';
$middle_url = '/strips/comic/';
extract($_GET);
$selesai = false;
$url = $base_url . $middle_url . $start_date;
while (!$selesai) {
    $ada_next = false;
    $c = new Crawler($url);
    echo "URL is {$url}<br />\n";
    flush();
    $c->go2lineor(array('STR_Content', 'STR_Prev'));
    //echo "go2lineor selesai\n";flush();
    if ($c->strpos('STR_Prev') !== false) {
        //masih ada next
        $ada_next = true;
        $url = $base_url . $c->getbetween('<a href="', '"');
        $c->go2linewhere('STR_Content');
    } else {
        $ada_next = false;
        $selesai = true;
    }
    $c->readline();
    $img = $c->getbetween('<img src="', '"');
    echo "<a href='{$base_url}{$img}'>{$start_date}</a><br />\n";
    $start_date = Crawler::extract($url, 'comic/', '/');
    $c->close();
    echo "Closed\n";
    flush();
}