Example #1
0
 public function crawl_page($url)
 {
     // crawl_page
     $c = new Crawler($url);
     // get title
     $c->go_to('<title>');
     $title = Crawler::extract($c->curline, 'PHD Comics: ', '</title>');
     $title = preg_replace('/\\W/', '_', $title);
     // get the date
     $c->go_to('date_left.gif');
     $c->readline(2);
     $line = $c->curline;
     preg_match('/([0-9]+)\\/([0-9]+)\\/([0-9]+)/mi', $line, $matches);
     //print_r($matches);flush();
     list($full, $month, $date, $year) = $matches;
     if (strlen($date) < 2) {
         $date = '0' . $date;
     }
     if (strlen($month) < 2) {
         $month = '0' . $month;
     }
     $fileprefix = "{$year}_{$month}_{$date}_{$title}";
     // get the img url
     $c->go2linewhere('<td bgcolor=#FFFFFF');
     $line = $c->curline;
     preg_match('/<img src=["\']?([^ ]+)["\']?/i', $line, $matches);
     $img = $matches[1];
     $filename = basename($img);
     $ext = substr($filename, strrpos($filename, '.'));
     echo "<a href='{$img}'>" . $fileprefix . $ext . "</a><br/>";
     flush();
     $c->close();
     unset($c);
 }
function crawl_1_page($url)
{
    echo "URL2 {$url} <br/>\n";
    flush();
    $dirname = html_entity_decode(Crawler::cutfromlast1(substr($url, 0, strlen($url) - 1), '/'));
    $hasil = array();
    $c = new Crawler($url);
    $c->go_to('<div class="entry">');
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, "href='")) {
            $img = Crawler::extract($line, "href='", "'");
            echo "<a href='{$img}'>{$dirname}</a><br/>\n";
            flush();
        } else {
            if (Crawler::is_there($line, 'href="')) {
                $img = Crawler::extract($line, 'href="', '"');
                echo "<a href='{$img}'>{$dirname}</a><br/>\n";
                flush();
            } else {
                if (Crawler::is_there($line, '</div>')) {
                    break;
                }
            }
        }
    }
    $c->close();
}
Example #3
0
function crawl_1_chapter($url, $chapter)
{
    global $sitename;
    global $prefix;
    $c = new Crawler($url);
    $c->go_to('name="pagejump"');
    $pages = array();
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, '<option')) {
            $pages[] = Crawler::extract($line, 'value="', '"');
        } else {
            if (Crawler::is_there($line, '</select>')) {
                break;
            }
        }
    }
    $c->go_to('id="nextpage"');
    $c->readline();
    $img = $c->getbetween('src="', '"');
    $c->close();
    $img_base = dirname($img);
    $ext = '.jpg';
    $chapter = Crawler::pad($chapter, 3);
    foreach ($pages as $page) {
        echo "<a href='{$img_base}/{$page}{$ext}'>{$prefix}-{$chapter}-{$page}{$ext}</a><br/>\n";
        flush();
    }
    //print_r($pages);flush();
}
Example #4
0
function crawl_indowebster($url)
{
    //echo "'$url'";
    $craw = new Crawler($url);
    $craw->go2lineregexor('/(<\\/div><\\/a><\\/div><\\/div>)/', 1, 'href="#idws7"');
    $setring = $craw->getbetween('location.href=\'', '\'');
    $path = Crawler::extract($setring, 'path=', '&');
    $file_orig = Crawler::cutafter($setring, 'file_orig=');
    $craw->close();
    return '<a href="' . dirname($setring) . '/' . $path . '">' . rawurldecode($file_orig) . '</a>';
}
Example #5
0
function crawl_1_page($fil, $url, $chapter)
{
    global $prefix;
    $c = new Crawler($fil);
    $c->go_to('id="img"');
    $c->readline();
    $c->close();
    $img = Crawler::extract($c->curline, 'src="', '"');
    if ($img) {
        $fname = Crawler::fix_filename(basename($img));
        echo "<a href='{$img}'>{$prefix}-{$chapter}-{$fname}</a><br />\n";
    } else {
        // Ulangi
        crawl_1_page($url, $url, $chapter);
    }
}
 public function go()
 {
     $start_url = $this->url;
     if (preg_match('/gallery1\\.hentaifromhell\\.net/', $start_url)) {
         $base_url = 'http://gallery1.hentaifromhell.net';
     } else {
         $base_url = 'http://gallery.hentaifromhell.net';
     }
     $finish = false;
     while (!$finish) {
         $finish = true;
         echo $start_url, "<br />\n";
         flush();
         $c = new Crawler($start_url);
         $c->go2linewhere('<li class="thumbnail">');
         while ($line = $c->readline()) {
             //echo "<pre>$line</pre><br/>\n";flush();
             if (strpos($line, 'src="') !== false) {
                 //ambil gambar
                 $uri = Crawler::extract($line, 'src="', '"');
                 $uri = str_replace('/thumbs/', '/images/', $uri);
                 preg_match('/(\\/small\\/\\d+-)/', $uri, $matches);
                 $uri = str_replace($matches[1], '/', $uri);
                 //$uri = html_entity_decode($uri);
                 //$this->extract_page($uri);
                 $file = basename(dirname($uri));
                 echo "<a href='{$uri}'>{$file}</a><br/>\n";
                 flush();
             } else {
                 if (strpos($line, 'class="pagNext"') !== false) {
                     //next page
                     $finish = false;
                     $start_url = html_entity_decode(Crawler::extract($line, 'class="pagNext" href="', '"'));
                     break;
                 } else {
                     if (strpos($line, '</table>') !== false) {
                         // selesai
                         break;
                     }
                 }
             }
         }
         $c->close();
     }
 }
 public function go()
 {
     $mark1 = '<a target="_blank" title="Show fullsized image" href=';
     $mark2 = '<a title="Next Image" rel="next" href=';
     $host = 'http://lu.scio.us';
     $finish = false;
     $number = 0;
     $url = $this->url;
     preg_match('/\\/([^\\/]+)\\/page\\/1/', $url, $m);
     $text = $m[1];
     while (!$finish) {
         echo $url . "<br/>\n";
         flush();
         $c = new Crawler($url);
         $c->go_to('id="pid_');
         while ($line = $c->readline()) {
             if (Crawler::is_there($line, 'src="')) {
                 $img = Crawler::extract($line, 'src="', '"');
                 $img = str_replace('thumb_100_', @$_GET['big'] ? '' : 'normal__', $img);
                 $num = Crawler::pad(++$number, 3);
                 $filnm = basename($img);
                 $ext = Crawler::cutafter($filnm, '.');
                 // $text = $num . $ext;
                 // preg_match('/\/(\d+\/\d+)\//', $img, $m);
                 // $text = $m[1];
                 echo "<a href='{$img}'>{$text}</a><br/>\n";
                 flush();
             } else {
                 if (Crawler::is_there($line, '</ul>')) {
                     break;
                 }
             }
         }
         $c->go_to('class="pager"');
         $c->readline();
         if (Crawler::is_there($c->curline, 'Pager_next')) {
             $finish = false;
             $url = $host . Crawler::extract($c->curline, '<a rel="next" href="', '"');
         } else {
             $finish = true;
         }
         $c->close();
     }
 }
Example #8
0
 public function go()
 {
     // http://www.fakku.net/viewonline.php?id=2589
     // pake curl
     $base = 'http://www.fakku.net';
     // $this->url = str_replace('viewmanga.php', 'viewonline.php', $this->url);
     if (!preg_match('/\\/read$/', $this->url)) {
         $this->url .= '/read';
     }
     /*
     $ch = curl_init($this->url);
     curl_setopt($ch, CURLOPT_HEADER, 0);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     file_put_contents('fakku.temp', curl_exec($ch));
     curl_close($ch);
     */
     $craw = new Crawler($this->url, array('use_curl' => true, 'agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13'));
     $craw->go_to('var data = {');
     $json = Crawler::extract($craw->curline, ' = ', ';');
     $obj = json_decode($json);
     /*
     $craw->go_to('var mirror = ');
     $mirror = $craw->getbetween("'", "'");
     $craw->go_to('var mirror = ');
     $mirror2 = $craw->getbetween("'", "'");
     if ($mirror2) $mirror = $mirror2;
     */
     // 2012-05-06 fakku berubah
     $craw->go_to('function imgpath(');
     $craw->go_to('return \'');
     $imgpath = $craw->getbetween("return '", "';");
     $craw->close();
     $dir = basename(dirname($this->url));
     foreach ($obj->thumbs as $key => $val) {
         $filename = Crawler::pad($key + 1, 3);
         // $img = $mirror . '/' . $obj->meta->dir . 'images/' . $filename;
         $img = str_replace("' + x + '", $filename, $imgpath);
         $text = $dir;
         echo "<a href='{$img}'>{$text}</a><br/>\n";
         flush();
     }
 }
 public function get_images($chapter_url, $prefix, $infix)
 {
     $sitename = $this->sitename($chapter_url);
     $c = new Crawler($chapter_url);
     $c->go_to('id="pageMenu"');
     $pages = array();
     while ($line = $c->readline()) {
         if (Crawler::is_there($line, '<option')) {
             $pages[] = $sitename . Crawler::extract($line, 'value="', '"');
         } else {
             if (Crawler::is_there($line, '</select>')) {
                 break;
             }
         }
     }
     $c->close();
     $result = array();
     foreach ($pages as $page) {
         $result = $result + $this->mangareader_1_page($page, $page, $prefix, $infix);
     }
     return $result;
 }
Example #10
0
function foolreader_1_chapter($url, $chapter)
{
    global $sitename;
    global $prefix;
    $chapter = Crawler::pad($chapter, 3);
    $c = new Crawler($url);
    $c->go_to('imageArray = new Array');
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, 'imageArray[')) {
            $img = Crawler::extract($line, "'", "'");
            if (strpos($img, 'http://') !== 0) {
                $img = $sitename . $img;
            }
            $fname = basename($img);
            echo "<a href='{$img}'>{$prefix}-{$chapter}-{$fname}</a><br/>\n";
        } else {
            if (Crawler::is_there($line, 'function loadImage')) {
                break;
            }
        }
    }
    $c->close();
    /*
    // @TODO
    $pages = array();
    while ($line = $c->readline()) {
    	if (Crawler::is_there($line, '<option')) {
    		$pages[] = $sitename . Crawler::extract($line, 'value=\'', "'");
    	} else if (Crawler::is_there($line, '</select>')) {
    		break;
    	}
    }
    //$pages = Crawler::extract_to_array($c->curline, 'value="', '"');
    $c->close();
    
    //Crawler::multiProcess(4, $pages, 'foolreader_1_page', array($chapter));
    */
}
Example #11
0
 public function crawl_chapter($v)
 {
     $c = new Crawler($v['url']);
     $c->go_to('id="pageMenu"');
     $pages = array();
     while ($line = $c->readline()) {
         if (Crawler::is_there($line, '<option')) {
             $pages[] = $this->sitename . Crawler::extract($line, 'value="', '"');
         } else {
             if (Crawler::is_there($line, '</select>')) {
                 break;
             }
         }
     }
     // $pages = Crawler::extract_to_array($c->curline, 'value="', '"');
     $c->close();
     echo '<ul>';
     // Crawler::multiProcess(4, $pages, array($this, 'mangareader_1_page'), array($v['infix']));
     foreach ($pages as $page) {
         $this->mangareader_1_page($page, $page, $v['infix']);
     }
     echo '</ul>';
 }
Example #12
0
function crawl1page($url)
{
    echo 'Entering ' . $url . '<br/>';
    flush();
    $c = new Crawler($url);
    $c->go2linewhere('<div class="ngg-gallery-thumbnail"');
    $c->readline();
    $sample = $c->getbetween('href="', '"');
    $c->close();
    $dir = dirname($sample);
    if (!$dir) {
        return;
    }
    $folder = substr($dir, strrpos($dir, '/') + 1);
    $dir = dirname($dir) . '/' . rawurlencode($folder) . '/';
    echo 'Dir:' . $dir . '<br/>' . "\n";
    flush();
    $c = new Crawler($dir);
    $c->go2linewhere('<ul>');
    $c->readline();
    while ($line = $c->readline()) {
        //echo $line;flush();
        if (strpos($line, '</ul>') !== false) {
            break;
        } else {
            if (strpos($line, '"thumbs/"')) {
                break;
            }
        }
        $filename = Crawler::extract($line, 'href="', '"');
        echo '<a href="' . $dir . $filename . '">' . rawurldecode($filename) . '</a><br/>' . "\n";
        flush();
    }
    $c->close();
    echo '<br/>' . "\n";
    flush();
}
 public function go()
 {
     $start_url = $this->url;
     if (preg_match('/gallery1\\.hentaifromhell\\.net/', $start_url)) {
         $base = 'http://gallery1.hentaifromhell.net';
     } else {
         $base = 'http://gallery.hentaifromhell.net';
     }
     $selesai = false;
     while (!$selesai) {
         echo "{$start_url}<br/>\n";
         $craw = new Crawler($start_url);
         $craw->go2linewhere('showimg.php?c=');
         while ($line = $craw->readline()) {
             if (strpos($line, 'showimg.php?c=') !== false) {
                 $raw = Crawler::extract_to_array($line, '<a href="', '"');
                 foreach ($raw as $r) {
                     $href = str_replace('showimg.php?c=', '', $r);
                     $text = basename(dirname($href));
                     // basename($href);
                     echo '<a href="' . $href . '">' . $text . '</a>' . "<br />\n";
                 }
                 // $href = Crawler::extract($line, '<a href="', '"');
             } else {
                 if (strpos($line, 'Next&raquo;') !== false) {
                     if (strpos($line, '<a href') !== false) {
                         $start_url = $base . Crawler::extract($line, '<a href="', '"');
                     } else {
                         $selesai = true;
                     }
                     break;
                 }
             }
         }
         $craw->close();
     }
 }
 private function download_all($base, $destination)
 {
     $sitename = "http://www.imagefap.com";
     $finish = false;
     $firstbase = $base;
     $i = 1;
     while (!$finish) {
         $c = new Crawler($base);
         echo $base . "\n";
         $c->go_to(array('<table style=', ':: next ::'));
         if (Crawler::is_there($c->curline, ':: next ::')) {
             $finish = false;
             $urld = Crawler::extract($c->curline, 'href="', '"');
             $base = $firstbase . html_entity_decode($urld);
             $c->go_to('<table style=');
         } else {
             $finish = true;
         }
         while ($line = $c->readline()) {
             if (Crawler::is_there($line, 'border=0')) {
                 $img = Crawler::extract($line, 'src="', '"');
                 $img = str_replace('/thumb/', '/full/', $img);
                 $img = preg_replace('/\\/x\\d\\./', '/', $img);
                 $filename = basename($img);
                 $ext = Crawler::cutfromlast($filename, '.');
                 $text = Crawler::n($i++, 4);
                 $this->save_to($img, "{$destination}/{$text}{$ext}");
             } else {
                 if (Crawler::is_there($line, '</form>')) {
                     break;
                 }
             }
         }
         $c->close();
     }
 }
Example #15
0
require_once "crawler.php";
$start_date = '2009-03-10';
$base_url = 'http://www.dilbert.com';
$middle_url = '/strips/comic/';
extract($_GET);
$selesai = false;
$url = $base_url . $middle_url . $start_date;
while (!$selesai) {
    $ada_next = false;
    $c = new Crawler($url);
    echo "URL is {$url}<br />\n";
    flush();
    $c->go2lineor(array('STR_Content', 'STR_Prev'));
    //echo "go2lineor selesai\n";flush();
    if ($c->strpos('STR_Prev') !== false) {
        //masih ada next
        $ada_next = true;
        $url = $base_url . $c->getbetween('<a href="', '"');
        $c->go2linewhere('STR_Content');
    } else {
        $ada_next = false;
        $selesai = true;
    }
    $c->readline();
    $img = $c->getbetween('<img src="', '"');
    echo "<a href='{$base_url}{$img}'>{$start_date}</a><br />\n";
    $start_date = Crawler::extract($url, 'comic/', '/');
    $c->close();
    echo "Closed\n";
    flush();
}
Example #16
0
    $ledak = explode('/', $start_url);
    if (count($ledak) == 5) {
        // 1 album
        crawl_album($start_url);
    } else {
        if (count($ledak) == 4) {
            // 1 gallery
            $c = new Crawler($start_url);
            $c->go_to('<noscript>');
            $c->go_to('<noscript>');
            $links = array();
            while ($line = $c->readline()) {
                if (Crawler::is_there($line, '<a href="')) {
                    $url = Crawler::extract($line, '<a href="', '"');
                    $c->readline();
                    $key = trim(Crawler::extract($c->curline, '<p>', '</p>'));
                    //$key = basename($url);
                    $links[$key] = $url;
                } else {
                    if (Crawler::is_there($line, '</noscript>')) {
                        break;
                    }
                }
            }
            $c->close();
            foreach ($links as $key => $val) {
                crawl_album($val, $key);
            }
        }
    }
}
Example #17
0
// Cookie
$base = 'http://www.comicgirls.net';
$imgs = array();
foreach ($targets as $k => $url) {
    $imgs[$k] = array();
    do {
        echo "{$url}<br />\n";
        $c = new Crawler($url);
        // Apakah ada next?
        $next = false;
        $c->go_to('>Navigation');
        while (!Crawler::is_there($line = $c->readline(), '<i>(')) {
            if (Crawler::is_there($line, '>Next<')) {
                $next = true;
                $url = $base . Crawler::extract($line, "href='", "'");
                break;
            }
        }
        // Grab the gallery
        $c->go_to("'catThumb'");
        while (!Crawler::is_there($line = $c->readline(), '</table>')) {
            if (Crawler::is_there($line, 'src=')) {
                $raw = $base . html_entity_decode(Crawler::extract($line, "src='", "'"));
                $new = preg_replace('/&max_size=.*$/', '&max_size=6000&thumb=NO', $raw);
                $imgs[$k][] = $new;
            }
        }
        $c->close();
    } while ($next);
}
var_export($imgs);
Example #18
0
<?php

require 'crawler.php';
//http://www.viraindo.com/
$site = 'http://www.viraindo.com/';
$c = new Crawler($site);
$c->go_to('WIDTH=273');
while ($line = $c->readline()) {
    if (Crawler::is_there($line, 'href="')) {
        $page = Crawler::extract($line, 'href="', '"');
        $ket = Crawler::extract($line, '">', '</a');
        $d = new Crawler($site . $page);
        $d->go_to('<img src="');
        $img = $d->getbetween('<img src="', '"');
        echo "<a href='{$site}{$img}'>{$ket}</a><br/>\n";
        flush();
        $d->close();
    } else {
        if (Crawler::is_there($line, '<p></TD></TR>')) {
            break;
        }
    }
}
$c->close();
Example #19
0
<html>
<body>
<?php 
require_once "crawler.php";
$istart = 1;
$ifinish = 399;
$start = 'http://asianchicki.com/Girl.aspx?ID=';
extract($_GET);
extract($_POST);
for ($i = $istart; $i <= $ifinish; $i++) {
    $turl = $start . $i;
    $c = new Crawler($turl);
    if ($c->stream) {
        $c->go2linewhere('Thumbnail');
        $c->close();
        $nama = $c->getbetween('ctl00_ContentPlaceHolder1_lblName">', '</span');
        $ledak = explode('FileName="', $c->curline);
        //echo "<pre>{$c->curline}</pre><br />\n";
        $ccount = count($ledak);
        for ($j = 1; $j < $ccount; $j++) {
            $iurl = Crawler::extract($ledak[$j], 'src="', '"');
            $iurl = str_replace('Thumbnail', 'Viewer', $iurl);
            $parsed = Crawler::parse_url($iurl);
            echo '<a href="' . $iurl . '">' . $nama . '</a>' . "<br />\n";
        }
    }
    flush();
}
Example #20
0
// http://g.e-hentai.org/codegen.php?gid=205508&t=900f2d2c1a&s=1-m-y&type=html
$sitename = "http://g.e-hentai.org";
$cookie = "__utma=185428086.1551282410.1291405578.1291422608.1291427459.4; __utmz=185428086.1291405578.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=sofia%20webber%20wiki; ipb_member_id=479443; ipb_pass_hash=af8cb1500880244286676a513f1a1ba4; uconfig=tl_m-uh_y-cats_0-ts_m-tr_1-prn_y-dm_l-rx_0-ry_0-sa_y-oi_n-qb_n-tf_n-hp_-hk_; lv=1291414385-1291422236; tips=1; __utmb=185428086.3.10.1291427459; __utmc=185428086; ipb_session_id=770adfca602081684d15ce9b0528d9b9";
if ($base) {
    // parse $base
    preg_match('/\\/g\\/([^\\/]+)\\/([^\\/]+)/', $base, $matches);
    $gid = $matches[1];
    $t = $matches[2];
    $codegen = "http://g.e-hentai.org/codegen.php?gid={$gid}&t={$t}&s=1-m-y&type=html";
    $c = new Crawler($codegen, array('use_curl' => true));
    echo $codegen . "<br/>";
    $c->go_to('class="ehggt"');
    $pages = array();
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, '"ehga"')) {
            $pages[] = Crawler::extract($line, 'href="', '"');
        } else {
            if (Crawler::is_there($line, '</table>')) {
                break;
            }
        }
    }
    $c->close();
    foreach ($pages as $url) {
        echo "URL:{$url}<br/>\n";
        $c = new Crawler($url, array('use_curl' => true));
        $c->go_to('</span>');
        // ambil image source
        $raws = Crawler::extract_to_array($c->curline, 'src="', '"');
        echo '<pre>';
        print_r($raws);
Example #21
0
function crawl_1_page($start_url)
{
    global $prefix;
    global $bas;
    $cr = new Crawler($start_url);
    /*
    	echo $cr->readline();flush();
    while(!feof($cr->stream)) {
    }
    exit(0);
    */
    $cr->go2linewhere('headerSelect');
    $cr->readline();
    $line = $cr->curline;
    $cr->close();
    $chap = Crawler::cutfromlast1($start_url, '/');
    if (strpos($chap, '.') === false) {
        $chap = Crawler::n($chap, 3);
    } else {
        $a = explode('.', $chap);
        $a[0] = Crawler::n($a[0], 3);
        $chap = implode('.', $a);
    }
    $pi = 1;
    // page i
    $ledak = explode('value="', $line);
    $pages = array();
    for ($i = 1; $i < count($ledak); $i++) {
        $uurl = Crawler::cutuntil($ledak[$i], '"');
        $key = Crawler::cutfromlast1($uurl, '/');
        $pages[$key] = strpos($uurl, 'http://') === 0 ? $uurl : $bas . $uurl;
    }
    //print_r($pages);flush();
    $results = array();
    foreach ($pages as $pagenum => $new_url) {
        $berhasil = false;
        while (!$berhasil) {
            $cr = new Crawler($new_url);
            //echo "URL:$new_url<br/>\n";flush();
            $cr->go2linewhere('id="readerPage"');
            if ($cr->strpos('<img src="') === false) {
                $cr->readline();
            }
            $line = $cr->curline;
            $cr->close();
            $img_url = Crawler::extract($line, '<img src="', '"');
            //echo "IMG:$img_url<br/>\n";flush();
            $real_name = basename($img_url);
            $ext = Crawler::cutfromlast($img_url, '.');
            //$filename = $prefix . '-' . $chap . '-' . Crawler::n($pagenum, 2) . $ext;
            $filename = $prefix . '-' . $chap . '-' . urldecode($real_name);
            $val = $img_url;
            if (!empty($val)) {
                $berhasil = true;
                $key = $filename;
                $results[$filename] = $img_url;
                ?>
				<a href="<?php 
                echo $val;
                ?>
"><?php 
                echo $key;
                ?>
</a><br />
				<?php 
                flush();
            } else {
            }
        }
    }
}
Example #22
0
                flush();
                $c = new Crawler($link);
                if ($c->stream) {
                    $c->go2linewhere('time SG_txtc');
                    $time = $c->getbetween('>(', ')<');
                    $blines = $c->getalllineswhere('/orignal/');
                    $c->close();
                    unset($c);
                    //echo '$blines:', htmlspecialchars(print_r($blines, true)),'<br />';
                    flush();
                    foreach ($blines as $bline) {
                        if (strpos($bline, 'url=') === false) {
                            $blink = Crawler::extract($bline, 'HREF="', '"');
                        } else {
                            if (strpos($bline, 'url=') !== false) {
                                $blink = Crawler::extract($bline, 'url=', '"');
                            }
                        }
                        $blink = str_replace('&amp;690', '', $blink);
                        echo '<a href="', $blink, '">', $time, "</a><br />\n";
                        //echo '<a href="', $blink, '">', Crawler::n($bigC, 3), "</a><br />\n";
                        flush();
                    }
                    flush();
                }
            }
        }
    }
    flush();
}
?>
Example #23
0
function omfggscans_chapters($chapters, $infixs)
{
    global $base;
    global $sitename;
    global $prefix;
    foreach ($chapters as $key => $val) {
        $url = $base . "&c={$val}";
        $ifx = Crawler::pad($infixs[$key], 3);
        echo "{$url}<br/>\n";
        $c = new Crawler($url);
        // retrieve pages
        $c->go_to("name='page'");
        $pages = array();
        while ($line = $c->readline()) {
            if (Crawler::is_there($line, '<option')) {
                $pg = Crawler::extract($line, "value='", "'");
                $pgtext = Crawler::extract($line, "'>", "</");
                $pages[$pg] = $pgtext;
            } else {
                if (Crawler::is_there($line, '</select>')) {
                    break;
                }
            }
        }
        // sample image url
        $c->go_to("class='manga-img'");
        $src = Crawler::extract($c->curline, 'src="', '"');
        $pre_src = dirname($src) . '/';
        $post_src = '.png';
        $c->close();
        foreach ($pages as $k => $v) {
            $href = $pre_src . $v . $post_src;
            $text = "{$prefix}-{$ifx}-{$v}{$post_src}";
            echo "<a href='{$href}'>{$text}</a><br />\n";
        }
    }
}
Example #24
0
function rule34($url)
{
    $text = rawurldecode(basename(dirname($url)));
    $site = 'http://rule34.paheal.net';
    $continue = true;
    while ($continue) {
        echo "{$url}<br/>";
        $c = new Crawler($url);
        $c->go_to("id='Navigationleft'");
        // $c->readline();
        // $c->readline();
        $line = $c->curline;
        if (preg_match('/<a href="([^\'"]+)">Next/', $line, $m)) {
            $url = $site . $m[1];
        } else {
            $continue = false;
        }
        $c->go_to("id='image-list'");
        while ($line = $c->readline()) {
            if (Crawler::is_there($line, '>Image Only<')) {
                $href = Crawler::extract($line, '<br><a href="', '"');
                echo "<a href='{$href}'>{$text}</a><br/>\n";
            } else {
                if (Crawler::is_there($line, '<footer>')) {
                    break;
                }
            }
        }
    }
}
Example #25
0
$start = 'http://photo.blog.sina.com.cn/u/1264425925/page';
$fromC = 0;
extract($_GET);
extract($_POST);
$bigC = 0;
for ($i = $istart; $i <= $ifinish; $i++) {
    $bigC = $i * 1000;
    $turl = $start . $i;
    echo $i, $turl, "<br />\n";
    flush();
    $c = new Crawler($turl);
    if ($c->stream) {
        $lines = $c->getalllineswhere('pt_border');
        $c->close();
        //echo '$lines:', htmlspecialchars(print_r($lines, true)), '<br />';
        flush();
        foreach ($lines as $line) {
            $bigC++;
            if ($bigC >= $fromC) {
                $link = Crawler::extract($line, '<a href="', '"');
                $imgurl = str_replace('photo.blog.sina.com.cn', 'static10.photo.sina.com.cn', $link);
                $imgurl = str_replace('/photo/', '/orignal/', $imgurl);
                echo "<a href='{$imgurl}'>" . basename($imgurl) . "</a><br/>\n";
            }
        }
    }
    flush();
}
?>
</body>
</html>
Example #26
0
 public static function site_name($full)
 {
     return Crawler::extract($full, 'http://', '/');
 }