コード例 #1
0
ファイル: mangashare.php プロジェクト: JerryMaheswara/crawler
function crawl_1_chapter($url, $chapter)
{
    global $sitename;
    global $prefix;
    $c = new Crawler($url);
    $c->go_to('name="pagejump"');
    $pages = array();
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, '<option')) {
            $pages[] = Crawler::extract($line, 'value="', '"');
        } else {
            if (Crawler::is_there($line, '</select>')) {
                break;
            }
        }
    }
    $c->go_to('id="nextpage"');
    $c->readline();
    $img = $c->getbetween('src="', '"');
    $c->close();
    $img_base = dirname($img);
    $ext = '.jpg';
    $chapter = Crawler::pad($chapter, 3);
    foreach ($pages as $page) {
        echo "<a href='{$img_base}/{$page}{$ext}'>{$prefix}-{$chapter}-{$page}{$ext}</a><br/>\n";
        flush();
    }
    //print_r($pages);flush();
}
コード例 #2
0
function crawl_1_page($url)
{
    echo "URL2 {$url} <br/>\n";
    flush();
    $dirname = html_entity_decode(Crawler::cutfromlast1(substr($url, 0, strlen($url) - 1), '/'));
    $hasil = array();
    $c = new Crawler($url);
    $c->go_to('<div class="entry">');
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, "href='")) {
            $img = Crawler::extract($line, "href='", "'");
            echo "<a href='{$img}'>{$dirname}</a><br/>\n";
            flush();
        } else {
            if (Crawler::is_there($line, 'href="')) {
                $img = Crawler::extract($line, 'href="', '"');
                echo "<a href='{$img}'>{$dirname}</a><br/>\n";
                flush();
            } else {
                if (Crawler::is_there($line, '</div>')) {
                    break;
                }
            }
        }
    }
    $c->close();
}
コード例 #3
0
 public function go()
 {
     $mark1 = '<a target="_blank" title="Show fullsized image" href=';
     $mark2 = '<a title="Next Image" rel="next" href=';
     $host = 'http://lu.scio.us';
     $finish = false;
     $number = 0;
     $url = $this->url;
     preg_match('/\\/([^\\/]+)\\/page\\/1/', $url, $m);
     $text = $m[1];
     while (!$finish) {
         echo $url . "<br/>\n";
         flush();
         $c = new Crawler($url);
         $c->go_to('id="pid_');
         while ($line = $c->readline()) {
             if (Crawler::is_there($line, 'src="')) {
                 $img = Crawler::extract($line, 'src="', '"');
                 $img = str_replace('thumb_100_', @$_GET['big'] ? '' : 'normal__', $img);
                 $num = Crawler::pad(++$number, 3);
                 $filnm = basename($img);
                 $ext = Crawler::cutafter($filnm, '.');
                 // $text = $num . $ext;
                 // preg_match('/\/(\d+\/\d+)\//', $img, $m);
                 // $text = $m[1];
                 echo "<a href='{$img}'>{$text}</a><br/>\n";
                 flush();
             } else {
                 if (Crawler::is_there($line, '</ul>')) {
                     break;
                 }
             }
         }
         $c->go_to('class="pager"');
         $c->readline();
         if (Crawler::is_there($c->curline, 'Pager_next')) {
             $finish = false;
             $url = $host . Crawler::extract($c->curline, '<a rel="next" href="', '"');
         } else {
             $finish = true;
         }
         $c->close();
     }
 }
コード例 #4
0
ファイル: picasaweb.php プロジェクト: JerryMaheswara/crawler
function crawl_album($url, $alias = false)
{
    $c = new Crawler($url);
    $c->go_to('<noscript>');
    $c->go_to('<noscript>');
    $c->readline();
    $target = '';
    //$c->curline;
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, '</noscript>')) {
            break;
        } else {
            $target .= trim($line);
        }
    }
    $hasil = Crawler::extract_to_array($target, 'src="', '"');
    $c->close();
    /* kalo mo ngambil desc sebagai nama file
    	preg_match_all('/<img src="([^"]+)"><\\/a><p><a [^>]+>([^<]+)<\\/a>/', $target, $match);
    	//file_put_contents('picasaweb.out', print_r($match, true));exit;
    	foreach ($match[1] as $i => $uri) {
    		$info = pathinfo(basename($uri));
    		$ext = $info['extension'];
    		$name = $match[2][$i];
    		$img = str_replace('/s128/', '/', $uri);
    		echo "<a href='$img'>$name.$ext</a><br />\n";
    	}
    	exit;
    	*/
    if ($alias) {
        foreach ($hasil as $img) {
            $img = str_replace('/s128/', '/d/', $img);
            echo "<a href='{$img}'>{$alias}</a><br/>\n";
            flush();
        }
    } else {
        foreach ($hasil as $img) {
            $img = str_replace('/s128/', '/d/', $img);
            $basename = urldecode(basename($img));
            echo "<a href='{$img}'>{$basename}</a><br/>\n";
            flush();
        }
    }
}
コード例 #5
0
 public function get_images($chapter_url, $prefix, $infix)
 {
     $sitename = $this->sitename($chapter_url);
     $c = new Crawler($chapter_url);
     $c->go_to('id="pageMenu"');
     $pages = array();
     while ($line = $c->readline()) {
         if (Crawler::is_there($line, '<option')) {
             $pages[] = $sitename . Crawler::extract($line, 'value="', '"');
         } else {
             if (Crawler::is_there($line, '</select>')) {
                 break;
             }
         }
     }
     $c->close();
     $result = array();
     foreach ($pages as $page) {
         $result = $result + $this->mangareader_1_page($page, $page, $prefix, $infix);
     }
     return $result;
 }
コード例 #6
0
ファイル: foolreader.php プロジェクト: JerryMaheswara/crawler
function foolreader_1_chapter($url, $chapter)
{
    global $sitename;
    global $prefix;
    $chapter = Crawler::pad($chapter, 3);
    $c = new Crawler($url);
    $c->go_to('imageArray = new Array');
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, 'imageArray[')) {
            $img = Crawler::extract($line, "'", "'");
            if (strpos($img, 'http://') !== 0) {
                $img = $sitename . $img;
            }
            $fname = basename($img);
            echo "<a href='{$img}'>{$prefix}-{$chapter}-{$fname}</a><br/>\n";
        } else {
            if (Crawler::is_there($line, 'function loadImage')) {
                break;
            }
        }
    }
    $c->close();
    /*
    // @TODO
    $pages = array();
    while ($line = $c->readline()) {
    	if (Crawler::is_there($line, '<option')) {
    		$pages[] = $sitename . Crawler::extract($line, 'value=\'', "'");
    	} else if (Crawler::is_there($line, '</select>')) {
    		break;
    	}
    }
    //$pages = Crawler::extract_to_array($c->curline, 'value="', '"');
    $c->close();
    
    //Crawler::multiProcess(4, $pages, 'foolreader_1_page', array($chapter));
    */
}
コード例 #7
0
 public function crawl_chapter($v)
 {
     $c = new Crawler($v['url']);
     $c->go_to('id="pageMenu"');
     $pages = array();
     while ($line = $c->readline()) {
         if (Crawler::is_there($line, '<option')) {
             $pages[] = $this->sitename . Crawler::extract($line, 'value="', '"');
         } else {
             if (Crawler::is_there($line, '</select>')) {
                 break;
             }
         }
     }
     // $pages = Crawler::extract_to_array($c->curline, 'value="', '"');
     $c->close();
     echo '<ul>';
     // Crawler::multiProcess(4, $pages, array($this, 'mangareader_1_page'), array($v['infix']));
     foreach ($pages as $page) {
         $this->mangareader_1_page($page, $page, $v['infix']);
     }
     echo '</ul>';
 }
コード例 #8
0
 private function download_all($base, $destination)
 {
     $sitename = "http://www.imagefap.com";
     $finish = false;
     $firstbase = $base;
     $i = 1;
     while (!$finish) {
         $c = new Crawler($base);
         echo $base . "\n";
         $c->go_to(array('<table style=', ':: next ::'));
         if (Crawler::is_there($c->curline, ':: next ::')) {
             $finish = false;
             $urld = Crawler::extract($c->curline, 'href="', '"');
             $base = $firstbase . html_entity_decode($urld);
             $c->go_to('<table style=');
         } else {
             $finish = true;
         }
         while ($line = $c->readline()) {
             if (Crawler::is_there($line, 'border=0')) {
                 $img = Crawler::extract($line, 'src="', '"');
                 $img = str_replace('/thumb/', '/full/', $img);
                 $img = preg_replace('/\\/x\\d\\./', '/', $img);
                 $filename = basename($img);
                 $ext = Crawler::cutfromlast($filename, '.');
                 $text = Crawler::n($i++, 4);
                 $this->save_to($img, "{$destination}/{$text}{$ext}");
             } else {
                 if (Crawler::is_there($line, '</form>')) {
                     break;
                 }
             }
         }
         $c->close();
     }
 }
コード例 #9
0
                     if (!Crawler::is_there($line, '<a href')) {
                         $line = $c->readline();
                         // Title dan link
                     }
                 }
                 // ada yg berupa original/reprint, ...
                 if (preg_match('/class="red">(.*)<\\/h4>.*class="blue" href="([^"]*)">original<.*href="([^"]*)">reprint</', $line, $matches)) {
                     $result[$name][strip_tags($matches[1]) . '-original'] = html_entity_decode($matches[2]);
                     $result[$name][strip_tags($matches[1]) . '-reprint'] = html_entity_decode($matches[3]);
                 } else {
                     if (preg_match('/href="([^"]*)">(.*)<\\/a>/', $line, $matches)) {
                         $result[$name][strip_tags($matches[2])] = html_entity_decode($matches[1]);
                     }
                 }
             } else {
                 if (Crawler::is_there($line, '</tbody>')) {
                     break;
                 }
             }
         }
         $c->close();
     }
     ob_start();
     echo "<?php\n";
     echo '$a = ';
     var_export($result);
     echo ';';
     file_put_contents('disneycomics.phase1', ob_get_clean());
     break;
 case 'phase2':
     require 'disneycomics.phase1';
コード例 #10
0
ファイル: keishou.php プロジェクト: JerryMaheswara/crawler
function omfggscans_chapters($chapters, $infixs)
{
    global $base;
    global $sitename;
    global $prefix;
    foreach ($chapters as $key => $val) {
        $url = $base . "&c={$val}";
        $ifx = Crawler::pad($infixs[$key], 3);
        echo "{$url}<br/>\n";
        $c = new Crawler($url);
        // retrieve pages
        $c->go_to("name='page'");
        $pages = array();
        while ($line = $c->readline()) {
            if (Crawler::is_there($line, '<option')) {
                $pg = Crawler::extract($line, "value='", "'");
                $pgtext = Crawler::extract($line, "'>", "</");
                $pages[$pg] = $pgtext;
            } else {
                if (Crawler::is_there($line, '</select>')) {
                    break;
                }
            }
        }
        // sample image url
        $c->go_to("class='manga-img'");
        $src = Crawler::extract($c->curline, 'src="', '"');
        $pre_src = dirname($src) . '/';
        $post_src = '.png';
        $c->close();
        foreach ($pages as $k => $v) {
            $href = $pre_src . $v . $post_src;
            $text = "{$prefix}-{$ifx}-{$v}{$post_src}";
            echo "<a href='{$href}'>{$text}</a><br />\n";
        }
    }
}
コード例 #11
0
ファイル: eatmanga.php プロジェクト: JerryMaheswara/crawler
$base = $_POST['base'];
$prefix = $_POST['prefix'];
$sitename = "http://eatmanga.com";
$pref = $_POST['base'];
if (!Crawler::is_there($pref, '/index.php/')) {
    $pref = str_replace($sitename . '/Manga', $sitename . '/index.php/Manga', $pref);
}
if ($base) {
    $finish = false;
    $page = 1;
    while (!$finish) {
        echo "{$base}<br/>\n";
        flush();
        $c = new Crawler($base);
        $c->go2linewhere('mangaviewer_toppest_navig');
        if (Crawler::is_there($c->curline, '&nbsp;&nbsp;&rsaquo;')) {
            $finish = false;
            $base = $pref . '/?page=' . ++$page;
        } else {
            $finish = true;
        }
        $ledak = explode('<img src="', $c->curline);
        $c->close();
        for ($i = 1; $i < count($ledak); ++$i) {
            $segm = $ledak[$i];
            $parturl = Crawler::cutuntil($segm, '"');
            $parturl = str_replace('index.php', 'mangas', $parturl);
            $parturl = str_replace('?action=thumb', '', $parturl);
            echo '<a href="' . $sitename . $parturl . '">' . $prefix . '-' . Crawler::n($chapter, 3) . '-' . basename($parturl) . '</a><br/>' . "\n";
            flush();
        }
コード例 #12
0
ファイル: viraindo.php プロジェクト: JerryMaheswara/crawler
<?php

require 'crawler.php';
//http://www.viraindo.com/
$site = 'http://www.viraindo.com/';
$c = new Crawler($site);
$c->go_to('WIDTH=273');
while ($line = $c->readline()) {
    if (Crawler::is_there($line, 'href="')) {
        $page = Crawler::extract($line, 'href="', '"');
        $ket = Crawler::extract($line, '">', '</a');
        $d = new Crawler($site . $page);
        $d->go_to('<img src="');
        $img = $d->getbetween('<img src="', '"');
        echo "<a href='{$site}{$img}'>{$ket}</a><br/>\n";
        flush();
        $d->close();
    } else {
        if (Crawler::is_there($line, '<p></TD></TR>')) {
            break;
        }
    }
}
$c->close();
コード例 #13
0
ファイル: kode.php プロジェクト: JerryMaheswara/crawler
// 2 Pergi ke baris yang berisi definisi CSS salah satu potongan
$c->go_to('/#.+position.+width.+height.+top.+left/', '', true);
// 3 Iterasi hingga ketemu baris penutup (berisi '-->')
while ($line = $c->readline()) {
    if (preg_match('/#(\\w+) .+width:(\\d+).*height:(\\d+).*top:(\\d+).*left:(\\d+)/', $line, $match)) {
        // 3a Ambil informasi id, z-index, height, width, left, top tiap potongan
        list($all, $id, $width, $height, $top, $left) = $match;
        if (preg_match('/z-index:(\\d+)/', $line, $match)) {
            $zindex = $match[1];
        } else {
            $zindex = 0;
        }
        // 3b Masukkan ke array (var $imgs)
        $imgs[$id] = array('id' => $id, 'zindex' => $zindex, 'width' => $width, 'height' => $height, 'top' => $top, 'left' => $left);
    } else {
        if (Crawler::is_there($line, '-->')) {
            break;
        }
    }
}
// 4 Pergi ke baris yang berisi ukuran total gambar
$reg = '/<div .+position:relative.+width:(\\d+).+height:(\\d+)/';
$c->go_to($reg, '', true);
preg_match($reg, $c->curline, $match);
// 5 Ambil $tot_width dan $tot_height dari baris ini
list($all, $tot_width, $tot_height) = $match;
// 6 Iterasi hingga ketemu baris penutup (regex '/^\\s+<\\/div>/')
while ($line = $c->readline()) {
    if (preg_match('/<div id="([^"]+)".+src="([^"]+)"/', $line, $match)) {
        // 6a Ambil informasi id, src tiap potongan
        list($all, $id, $src) = $match;
コード例 #14
0
// Cookie
$base = 'http://www.comicgirls.net';
$imgs = array();
foreach ($targets as $k => $url) {
    $imgs[$k] = array();
    do {
        echo "{$url}<br />\n";
        $c = new Crawler($url);
        // Apakah ada next?
        $next = false;
        $c->go_to('>Navigation');
        while (!Crawler::is_there($line = $c->readline(), '<i>(')) {
            if (Crawler::is_there($line, '>Next<')) {
                $next = true;
                $url = $base . Crawler::extract($line, "href='", "'");
                break;
            }
        }
        // Grab the gallery
        $c->go_to("'catThumb'");
        while (!Crawler::is_there($line = $c->readline(), '</table>')) {
            if (Crawler::is_there($line, 'src=')) {
                $raw = $base . html_entity_decode(Crawler::extract($line, "src='", "'"));
                $new = preg_replace('/&max_size=.*$/', '&max_size=6000&thumb=NO', $raw);
                $imgs[$k][] = $new;
            }
        }
        $c->close();
    } while ($next);
}
var_export($imgs);
コード例 #15
0
ファイル: h.php プロジェクト: JerryMaheswara/crawler
function rule34($url)
{
    $text = rawurldecode(basename(dirname($url)));
    $site = 'http://rule34.paheal.net';
    $continue = true;
    while ($continue) {
        echo "{$url}<br/>";
        $c = new Crawler($url);
        $c->go_to("id='Navigationleft'");
        // $c->readline();
        // $c->readline();
        $line = $c->curline;
        if (preg_match('/<a href="([^\'"]+)">Next/', $line, $m)) {
            $url = $site . $m[1];
        } else {
            $continue = false;
        }
        $c->go_to("id='image-list'");
        while ($line = $c->readline()) {
            if (Crawler::is_there($line, '>Image Only<')) {
                $href = Crawler::extract($line, '<br><a href="', '"');
                echo "<a href='{$href}'>{$text}</a><br/>\n";
            } else {
                if (Crawler::is_there($line, '<footer>')) {
                    break;
                }
            }
        }
    }
}
コード例 #16
0
ファイル: gehentai.php プロジェクト: JerryMaheswara/crawler
             break;
         }
     }
 }
 $c->close();
 foreach ($pages as $url) {
     echo "URL:{$url}<br/>\n";
     $c = new Crawler($url, array('use_curl' => true));
     $c->go_to('</span>');
     // ambil image source
     $raws = Crawler::extract_to_array($c->curline, 'src="', '"');
     echo '<pre>';
     print_r($raws);
     echo '</pre>';
     // gambar image biasanya berada di $raws[4] atau $raws[5]
     if (Crawler::is_there($raws[0], '/n/next.png')) {
         array_shift($raws);
     }
     // gambar image namanya lebih panjang
     $base1 = basename($raws[4]);
     $base2 = basename($raws[5]);
     if (strlen($base1) > strlen($base2)) {
         $img = $raws[4];
     } else {
         $img = $raws[5];
     }
     $fname = basename($img);
     echo "<a href='{$img}'>{$fname}</a><br/>\n";
     // Download original
     if (preg_match('/href="([^"]+)">Download original/', $c->curline, $matches)) {
         $img = $matches[1];