public function get_info($base) { $sitename = $this->sitename($base); $c = new Crawler($base); $c->go_to('id="listing"'); $list = array(); while ($line = $c->readline()) { if (Crawler::is_there($line, 'class="chico_')) { if (!Crawler::is_there($line, ' href="')) { $line = $c->readline(); } $chp = Crawler::extract($line, 'href="', '"'); $ifx = Crawler::cutfromlast1($chp, '/'); $ifx = str_replace('chapter-', '', $ifx); $ifx = str_replace('.html', '', $ifx); $list[] = array('url' => $sitename . $chp, 'infix' => $ifx, 'desc' => strip_tags(Crawler::extract($line, '">', '</td>'))); } else { if (Crawler::is_there($line, '</table>')) { break; } } } $c->close(); return array_reverse($list); }
public function extract_info($base) { echo '<tr><td colspan="3">Progress.. '; $c = new Crawler($base); $c->go_to('id="listing"'); $list = array(); while ($line = $c->readline()) { if (Crawler::is_there($line, 'class="chico_')) { if (!Crawler::is_there($line, ' href="')) { $line = $c->readline(); } $chp = Crawler::extract($line, 'href="', '"'); $ifx = Crawler::cutfromlast1($chp, '/'); $ifx = str_replace('chapter-', '', $ifx); $ifx = str_replace('.html', '', $ifx); $list[] = array('url' => $this->sitename . $chp, 'infix' => $ifx, 'desc' => strip_tags(Crawler::extract($line, ': ', '</td>'))); echo $ifx . '.. '; } else { if (Crawler::is_there($line, '</table>')) { break; } } } $c->close(); echo 'End</td></tr>'; return $list; }
function crawl_1_page($url) { echo "URL2 {$url} <br/>\n"; flush(); $dirname = html_entity_decode(Crawler::cutfromlast1(substr($url, 0, strlen($url) - 1), '/')); $hasil = array(); $c = new Crawler($url); $c->go_to('<div class="entry">'); while ($line = $c->readline()) { if (Crawler::is_there($line, "href='")) { $img = Crawler::extract($line, "href='", "'"); echo "<a href='{$img}'>{$dirname}</a><br/>\n"; flush(); } else { if (Crawler::is_there($line, 'href="')) { $img = Crawler::extract($line, 'href="', '"'); echo "<a href='{$img}'>{$dirname}</a><br/>\n"; flush(); } else { if (Crawler::is_there($line, '</div>')) { break; } } } } $c->close(); }
function crawl_1_page($start_url) { global $prefix; global $bas; $cr = new Crawler($start_url); /* echo $cr->readline();flush(); while(!feof($cr->stream)) { } exit(0); */ $cr->go2linewhere('headerSelect'); $cr->readline(); $line = $cr->curline; $cr->close(); $chap = Crawler::cutfromlast1($start_url, '/'); if (strpos($chap, '.') === false) { $chap = Crawler::n($chap, 3); } else { $a = explode('.', $chap); $a[0] = Crawler::n($a[0], 3); $chap = implode('.', $a); } $pi = 1; // page i $ledak = explode('value="', $line); $pages = array(); for ($i = 1; $i < count($ledak); $i++) { $uurl = Crawler::cutuntil($ledak[$i], '"'); $key = Crawler::cutfromlast1($uurl, '/'); $pages[$key] = strpos($uurl, 'http://') === 0 ? $uurl : $bas . $uurl; } //print_r($pages);flush(); $results = array(); foreach ($pages as $pagenum => $new_url) { $berhasil = false; while (!$berhasil) { $cr = new Crawler($new_url); //echo "URL:$new_url<br/>\n";flush(); $cr->go2linewhere('id="readerPage"'); if ($cr->strpos('<img src="') === false) { $cr->readline(); } $line = $cr->curline; $cr->close(); $img_url = Crawler::extract($line, '<img src="', '"'); //echo "IMG:$img_url<br/>\n";flush(); $real_name = basename($img_url); $ext = Crawler::cutfromlast($img_url, '.'); //$filename = $prefix . '-' . $chap . '-' . Crawler::n($pagenum, 2) . $ext; $filename = $prefix . '-' . $chap . '-' . urldecode($real_name); $val = $img_url; if (!empty($val)) { $berhasil = true; $key = $filename; $results[$filename] = $img_url; ?> <a href="<?php echo $val; ?> "><?php echo $key; ?> </a><br /> <?php flush(); } else { } } } }
} // 4 Pergi ke baris yang berisi ukuran total gambar $reg = '/<div .+position:relative.+width:(\\d+).+height:(\\d+)/'; $c->go_to($reg, '', true); preg_match($reg, $c->curline, $match); // 5 Ambil $tot_width dan $tot_height dari baris ini list($all, $tot_width, $tot_height) = $match; // 6 Iterasi hingga ketemu baris penutup (regex '/^\\s+<\\/div>/') while ($line = $c->readline()) { if (preg_match('/<div id="([^"]+)".+src="([^"]+)"/', $line, $match)) { // 6a Ambil informasi id, src tiap potongan list($all, $id, $src) = $match; // 6b Gabungkan ke array tadi (var $imgs) $imgs[$id]['src'] = $src; $imgs[$id]['filename'] = basename($src); $imgs[$id]['ext'] = strtolower(Crawler::cutfromlast1(basename($src), '.')); } else { if (preg_match('/^\\s+<\\/div>/', $line)) { break; } } } // 7 Setelah seluruh informasi potongan didapat, urutkan ascending berdasarkan z-index function the_comp($a, $b) { if ($a['zindex'] == $b['zindex']) { return 0; } return (int) $a['zindex'] < (int) $b['zindex'] ? -1 : 1; } usort($imgs, 'the_comp');