public function get_info($base)
 {
     $sitename = $this->sitename($base);
     $c = new Crawler($base);
     $c->go_to('id="listing"');
     $list = array();
     while ($line = $c->readline()) {
         if (Crawler::is_there($line, 'class="chico_')) {
             if (!Crawler::is_there($line, ' href="')) {
                 $line = $c->readline();
             }
             $chp = Crawler::extract($line, 'href="', '"');
             $ifx = Crawler::cutfromlast1($chp, '/');
             $ifx = str_replace('chapter-', '', $ifx);
             $ifx = str_replace('.html', '', $ifx);
             $list[] = array('url' => $sitename . $chp, 'infix' => $ifx, 'desc' => strip_tags(Crawler::extract($line, '">', '</td>')));
         } else {
             if (Crawler::is_there($line, '</table>')) {
                 break;
             }
         }
     }
     $c->close();
     return array_reverse($list);
 }
 public function extract_info($base)
 {
     echo '<tr><td colspan="3">Progress.. ';
     $c = new Crawler($base);
     $c->go_to('id="listing"');
     $list = array();
     while ($line = $c->readline()) {
         if (Crawler::is_there($line, 'class="chico_')) {
             if (!Crawler::is_there($line, ' href="')) {
                 $line = $c->readline();
             }
             $chp = Crawler::extract($line, 'href="', '"');
             $ifx = Crawler::cutfromlast1($chp, '/');
             $ifx = str_replace('chapter-', '', $ifx);
             $ifx = str_replace('.html', '', $ifx);
             $list[] = array('url' => $this->sitename . $chp, 'infix' => $ifx, 'desc' => strip_tags(Crawler::extract($line, ': ', '</td>')));
             echo $ifx . '.. ';
         } else {
             if (Crawler::is_there($line, '</table>')) {
                 break;
             }
         }
     }
     $c->close();
     echo 'End</td></tr>';
     return $list;
 }
function crawl_1_page($url)
{
    echo "URL2 {$url} <br/>\n";
    flush();
    $dirname = html_entity_decode(Crawler::cutfromlast1(substr($url, 0, strlen($url) - 1), '/'));
    $hasil = array();
    $c = new Crawler($url);
    $c->go_to('<div class="entry">');
    while ($line = $c->readline()) {
        if (Crawler::is_there($line, "href='")) {
            $img = Crawler::extract($line, "href='", "'");
            echo "<a href='{$img}'>{$dirname}</a><br/>\n";
            flush();
        } else {
            if (Crawler::is_there($line, 'href="')) {
                $img = Crawler::extract($line, 'href="', '"');
                echo "<a href='{$img}'>{$dirname}</a><br/>\n";
                flush();
            } else {
                if (Crawler::is_there($line, '</div>')) {
                    break;
                }
            }
        }
    }
    $c->close();
}
function crawl_1_page($start_url)
{
    global $prefix;
    global $bas;
    $cr = new Crawler($start_url);
    /*
    	echo $cr->readline();flush();
    while(!feof($cr->stream)) {
    }
    exit(0);
    */
    $cr->go2linewhere('headerSelect');
    $cr->readline();
    $line = $cr->curline;
    $cr->close();
    $chap = Crawler::cutfromlast1($start_url, '/');
    if (strpos($chap, '.') === false) {
        $chap = Crawler::n($chap, 3);
    } else {
        $a = explode('.', $chap);
        $a[0] = Crawler::n($a[0], 3);
        $chap = implode('.', $a);
    }
    $pi = 1;
    // page i
    $ledak = explode('value="', $line);
    $pages = array();
    for ($i = 1; $i < count($ledak); $i++) {
        $uurl = Crawler::cutuntil($ledak[$i], '"');
        $key = Crawler::cutfromlast1($uurl, '/');
        $pages[$key] = strpos($uurl, 'http://') === 0 ? $uurl : $bas . $uurl;
    }
    //print_r($pages);flush();
    $results = array();
    foreach ($pages as $pagenum => $new_url) {
        $berhasil = false;
        while (!$berhasil) {
            $cr = new Crawler($new_url);
            //echo "URL:$new_url<br/>\n";flush();
            $cr->go2linewhere('id="readerPage"');
            if ($cr->strpos('<img src="') === false) {
                $cr->readline();
            }
            $line = $cr->curline;
            $cr->close();
            $img_url = Crawler::extract($line, '<img src="', '"');
            //echo "IMG:$img_url<br/>\n";flush();
            $real_name = basename($img_url);
            $ext = Crawler::cutfromlast($img_url, '.');
            //$filename = $prefix . '-' . $chap . '-' . Crawler::n($pagenum, 2) . $ext;
            $filename = $prefix . '-' . $chap . '-' . urldecode($real_name);
            $val = $img_url;
            if (!empty($val)) {
                $berhasil = true;
                $key = $filename;
                $results[$filename] = $img_url;
                ?>
				<a href="<?php 
                echo $val;
                ?>
"><?php 
                echo $key;
                ?>
</a><br />
				<?php 
                flush();
            } else {
            }
        }
    }
}
Exemple #5
0
}
// 4 Pergi ke baris yang berisi ukuran total gambar
$reg = '/<div .+position:relative.+width:(\\d+).+height:(\\d+)/';
$c->go_to($reg, '', true);
preg_match($reg, $c->curline, $match);
// 5 Ambil $tot_width dan $tot_height dari baris ini
list($all, $tot_width, $tot_height) = $match;
// 6 Iterasi hingga ketemu baris penutup (regex '/^\\s+<\\/div>/')
while ($line = $c->readline()) {
    if (preg_match('/<div id="([^"]+)".+src="([^"]+)"/', $line, $match)) {
        // 6a Ambil informasi id, src tiap potongan
        list($all, $id, $src) = $match;
        // 6b Gabungkan ke array tadi (var $imgs)
        $imgs[$id]['src'] = $src;
        $imgs[$id]['filename'] = basename($src);
        $imgs[$id]['ext'] = strtolower(Crawler::cutfromlast1(basename($src), '.'));
    } else {
        if (preg_match('/^\\s+<\\/div>/', $line)) {
            break;
        }
    }
}
// 7 Setelah seluruh informasi potongan didapat, urutkan ascending berdasarkan z-index
function the_comp($a, $b)
{
    if ($a['zindex'] == $b['zindex']) {
        return 0;
    }
    return (int) $a['zindex'] < (int) $b['zindex'] ? -1 : 1;
}
usort($imgs, 'the_comp');