/** * Le pasas una URL con la siguiente forma: http://submanga.com/Naruto * la clase descarga el ultimo capitulo automaticamente * @param string $manga_url */ function last($manga_url) { $html = file_get_contents($manga_url); $dom = new domDocument(); @$dom->loadHTML($html); $dom->preserveWhiteSpace = false; $tables = $dom->getElementsByTagName('table'); $rows = $tables->item(0)->getElementsByTagName('tr'); $i = 0; foreach ($rows as $row) { //solo queremos el primero, que es el ultimo capitulo :] if ($i == 2) { break; } $i++; /** buscamos los enlaces dentro de la fila ***/ $cols = $row->getElementsByTagName('a'); foreach ($cols as $link) { $serie = urlParameters(3, $link->getAttribute('href')); $capi = urlParameters(4, $link->getAttribute('href')); $urlmanga = "http://submanga.com/c/" . urlParameters(5, $link->getAttribute('href')); break; } } /* HASTA AQUI SE CONSIGUE LA URL DEL ULTIMO CAPITULO: http://submanga.com/c/107463 */ $html = file_get_contents($urlmanga); $dom = new domDocument(); @$dom->loadHTML($html); $dom->preserveWhiteSpace = false; $links = $dom->getElementsByTagName('img'); $i = 0; foreach ($links as $link) { if ($i == 3) { //guardamos el tercer enlace que es el de la imagen $imageurl = $link->getAttribute('src'); break; } $i++; } /* AQUI YA TENEMOS LA URL DONDE ESTAN LAS IMAGENES: http://img2.submanga.com/pages/107/1074632bf/ * EMPEZAMOS UN BUCLE QUE GUARDE LAS IMAGENES HASTA QUE DE ERROR */ //OWNED :) $error = 0; for ($i = 1; $error == 0; $i++) { $url = urlParameters(0, $imageurl) . "/" . urlParameters(1, $imageurl) . "/" . urlParameters(2, $imageurl) . "/" . urlParameters(3, $imageurl) . "/" . urlParameters(4, $imageurl) . "/" . urlParameters(5, $imageurl) . "/" . $i . ".jpg"; if (saveImg($url, $i . ".jpg", $serie . "-" . $capi) == 0) { $error = 1; } set_time_limit(20); } return $serie . "-" . $capi; }
function mainExecute() { //support variables for logic $matches = []; $alt_dom = ''; $i = 0; $memeIDCounter = 1; $page_count = 50; $csvFileName = 'mogmaster.csv'; $toCSV = true; //array for offline pages testing // $offlinePages = ['staticpages/Slender Man _ Know Your Meme.html','staticpages/Forever Alone _ Know Your Meme.html', 'staticpages/Zerg Rush _ Know Your Meme.html']; //Regexes for matching values from extractions $rgx_src = '/data-src="(.*)" src/'; //image url $rgx_title = '/title="(.*)"/'; //Meme title $rgx_pg_href = '/href="(.*)">/'; //url for meme's page $rgx_faves = '/>(.*)</'; //favorites count $rgx_views = '/>(.*)</'; //View count $rgx_origin = '/>(.*)</'; //Origin $rgx_org_year = ''; $rgx_nsfw = '/>NSFW</'; //utility variables specifying desired Dom content selectors for extractContent function $meme_img_path = '.entry_list .photo img'; $meme_url_path = '.entry_list h2 > a'; $meme_faves_path = '.num'; $meme_views_path = 'dd.views a'; $meme_origin_path = 'dd.entry_origin_link'; $meme_nsfw_path = 'span.label-nsfw'; //Variables to store scraped content $meme_name = ''; $meme_img_url = ''; // $meme_localPath = ''; $meme_faves = null; $meme_views = null; $meme_origin = null; $meme_year = null; $meme_learn_more = ''; //Recreate CSV file createCSV($csvFileName); //main work loop while ($i <= $page_count) { //counter for tracking meme url index in $meme_href $j = 0; //counter used for offline pages array index // $m = 0; //pull and store scraped dom $html = getDOM('http://knowyourmeme.com/memes/popular/page/' . ($i + 1)); delay(); //These arrays should refer to the same memes on the same indexes //extracts array used for meme images and titles $img_content = extractContent($html, $meme_img_path); //extracts array used for meme url to access additional content $meme_href = extractContent($html, $meme_url_path); foreach ($img_content as $curr) { //check if meme is NSFW $meme_nsfw = extractContent($curr, $meme_nsfw_path); if (!empty($meme_nsfw)) { $active = 0; } else { $active = 1; } echo "Meme NSFW: {$active}" . PHP_EOL; // //save meme name $meme_name = getValue($curr, $rgx_title, false, ''); preg_match($rgx_title, $curr, $matches); $meme_name = $matches[1]; echo "Meme Name: " . $meme_name . PHP_EOL; //save meme img url $meme_img_url = getValue($curr, $rgx_src, false, ''); preg_match($rgx_src, $curr, $matches); $meme_img_url = $matches[1]; echo "Meme IMG URL: " . $meme_img_url . PHP_EOL; //get href for current meme in $curr and set variable preg_match($rgx_pg_href, $meme_href[$j], $matches); $meme_learn_more = "http://knowyourmeme.com" . $matches[1]; echo "Meme Main URL: " . $meme_learn_more . PHP_EOL; //Get DOM for current selected meme to scrape additional content //for offline testing $alt_dom = getDOM("http://knowyourmeme.com" . $matches[1]); delay(); // $alt_dom = getDOM($offlinePages[$m]); //extract favorite count $meme_faves = getValue($alt_dom, $rgx_faves, true, $meme_faves_path); $fave_segment = extractContent($alt_dom, $meme_faves_path); preg_match($rgx_faves, $fave_segment[0], $matches); $meme_faves = $matches[1]; echo "Meme Favorite Count: " . $meme_faves . PHP_EOL; //extract view count $view_segment = extractContent($alt_dom, $meme_views_path); preg_match($rgx_views, $view_segment[0], $matches); $meme_views = $matches[1]; echo "Meme Views: " . $meme_views . PHP_EOL; //Save image file to local path and collect path for saving saveImg($meme_img_url, $memeIDCounter); $memeIDCounter++; // //extract meme origin // $origin_segment = extractContent($alt_dom, $meme_origin_path); // preg_match($rgx_origin, $origin_segment[0], $matches); // $meme_origin = $matches[1]; // echo "Meme Origin: " . $meme_origin . "<br>"; echo PHP_EOL; //create Mog object and call save method $mog = new Mog($meme_name, $meme_img_url, $meme_views, $meme_faves, $meme_learn_more, $active); if ($toCSV) { $mog->saveToCSV($csvFileName); } else { $mog->saveToDB(); } echo "Total Memes Scraped: {$memeIDCounter}" . PHP_EOL; // $m++; $j++; } echo 'Page ' . ($i + 1) . 'Done...' . PHP_EOL; echo PHP_EOL; $i++; sleep(rand(15, 60)); } echo "all pages done!" . PHP_EOL; }
<?php function saveImg($given, $index) { $ch = curl_init($given); $fp = fopen("../../database/img/mogs/{$index}", 'wb'); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fp); } saveImg("http://i3.kym-cdn.com/entries/icons/medium/000/002/994/1277081081470.jpg", 1); echo "Done";
} $columns = array("image.resource{source:master}"); $terms = array("irn", "{$irn}"); try { $res = $search->search("ecatalogue", $columns, $terms); } catch (Exception $e) { //echo "$e"; sendError(406); } if (isset($res->rows[0]["image"]["resource"])) { $img = $res->rows[0]["image"]["resource"]; } else { sendError(404); } $temp_img = tempnam(sys_get_temp_dir(), 'IMU'); saveImg($temp_img, $img); $fn = $img["identifier"]; $mime = $img["mimeFormat"]; sendImage($temp_img, $mime, $fn); function saveImg($newloc, $image) { // Save a copy of the resource $temp = $image['file']; $copy = fopen($newloc, 'wb'); for (;;) { $data = fread($temp, 4096); // read 4K at a time if ($data === false || strlen($data) == 0) { break; } fwrite($copy, $data);
function processResult($result) { $rows = $result->rows; //Process images foreach ($rows as $key => $r) { $image = $r['image']["resource"]; $imgname = $image["identifier"]; if (!empty($imgname)) { $imgloc = IMuTmpImageLoc() . $imgname; $imgurl = IMuTmpImageURL() . $imgname; saveImg($imgloc, $image); } else { $imgurl = null; } $result->rows[$key]["image"] = $imgurl; } return $result; }
function resizeImg($src, $crop, $sizes, $dir) { //Create and save images based on $src image type if (file_exists(ROOT_PATH . $src)) { switch (pathinfo($src, PATHINFO_EXTENSION)) { case 'png': case 'PNG': $sourceImg = imagecreatefrompng(ROOT_PATH . $src); function saveImg($file, $path) { imagepng($file, ROOT_PATH . $path); } break; case 'jpg': case 'JPG': case 'jpeg': case 'JPEG': $sourceImg = imagecreatefromjpeg(ROOT_PATH . $src); function saveImg($file, $path) { imagejpeg($file, ROOT_PATH . $path, '80'); } break; case 'gif': case 'GIF': $sourceImg = imagecreatefromgif(ROOT_PATH . $src); function saveImg($file, $path) { imagegif($file, ROOT_PATH . $path); } break; default: $abort = true; $errorArray[] = "Image file extension not allowed."; break; } } else { $abort = true; $errorArray[] = "Source image not found."; } $sourceW = $crop[2] - $crop[0]; $sourceH = $crop[3] - $crop[1]; $savedImgs = (object) []; // Check if file already exists, add counter if it does. $counter = 0; function checkExisting($filePath, $dir, $pathInfo, $size) { global $counter; if (file_exists(ROOT_PATH . $filePath)) { $counter++; $filePath = $dir . $pathInfo['filename'] . $size->suffix . "_" . $counter . "." . $pathInfo['extension']; return checkExisting($filePath, $dir, $pathInfo, $size); } else { return $filePath; } } $pathInfo = pathinfo($src); foreach ($sizes as $key => $size) { $destImg = imagecreatetruecolor($size->w, $size->h); imagecopyresampled($destImg, $sourceImg, 0, 0, $crop[0], $crop[1], $size->w, $size->h, $sourceW, $sourceH); $filePath = $dir . $pathInfo['filename'] . $size->suffix . "." . $pathInfo['extension']; $counter = 0; $filePath = checkExisting($filePath, $dir, $pathInfo, $size); saveImg($destImg, $filePath); $savedImgs->{$size->suffix} = "/" . $filePath; } return $savedImgs; }
function formatResults($result) { $rows = $result->rows; foreach ($rows as $key => $r) { //Is not a holder $result->rows[$key]['is_holder'] = false; //Fix creators $cs = $r["Creator"]; $rs = $r["Role"]; $creator = array(); foreach ($cs as $k2 => $c) { $creator[$k2]['Name'] = $c['Name']; if (isset($rs[$k2])) { $creator[$k2]['Role'] = $rs[$k2]; } else { $creator[$k2]['Role'] = ''; } } $result->rows[$key]["Creator"] = $creator; unset($result->rows[$key]['Role']); //Fix measurements $ms = $r['MesType']; $hs = $r["H"]; $ws = $r["W"]; $ds = $r["D"]; $measurments = array(); foreach ($ms as $k3 => $m) { $measurments[$k3]['Type'] = $m; $measurments[$k3]["Width"] = tryHash($ws, $k3); $measurments[$k3]["Height"] = tryHash($hs, $k3); $measurments[$k3]["Depth"] = tryHash($ds, $k3); } $result->rows[$key]['Measurements'] = $measurments; unset($result->rows[$key]['MesType']); unset($result->rows[$key]['W']); unset($result->rows[$key]['H']); unset($result->rows[$key]['D']); //Fix image $image = $r['image']["resource"]; $imgname = $r['image']["resource"]['identifier']; if ($imgname) { $imgloc = IMuImageLoc() . $imgname; saveImg($imgloc, $image); $result->rows[$key]['image'] = IMuImageURL() . urlencode($imgname); } else { $result->rows[$key]['image'] = null; } //Fix notes $notes = ''; if (isset($r['NotesA'])) { $notes .= $r['NotesA']; unset($result->rows[$key]['NotesA']); } if (isset($r['NotesA']) && isset($r['NotesB'])) { $notes .= "\n"; } if (isset($r['NotesB'])) { $notes .= $r['NotesB']; unset($result->rows[$key]['NotesB']); } $result->rows[$key]['Notes'] = $notes; return $result->rows[$key]; } return null; }