public static function parse($contents) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $rows = $crawler->filter('table[class="mt8 episode_list js-watch-episode-list ascend"] tr[class="episode-list-data"]'); $result = array(); foreach ($rows as $episodeItem) { $crawler = new Crawler($episodeItem); $episode = new Episode(); $episode->setNumber($crawler->filter('td[class="episode-number nowrap"]')->text()); $episode->setTitle($crawler->filter('td[class="episode-title"] a')->text()); // MAL does not always provide the air date! $date = $crawler->filter('td[class="episode-aired"]')->text(); if ($date !== 'N/A') { $dateTime = new DateTime(); $episode->setAirDate($dateTime->createFromFormat('M j, Y', $date)); } $extracted = $crawler->filter('td[class="episode-title"] span[class="di-ib"]'); if ($extracted->text() !== '' && $extracted->count() > 0) { # English: $extracted = explode('(', $extracted->text()); if (count($extracted) > 0) { $other_titles['english'] = array(trim($extracted[0], chr(0xc2) . chr(0xa0))); } # Japanese: if (count($extracted) > 1) { $other_titles['japanese'] = array(trim(str_replace(')', '', $extracted[1]))); } $episode->setOtherTitles($other_titles); } $result[] = $episode; } return $result; }
public static function parse($contents, $id, $type) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $rows = $crawler->filter('div[class="spaceit_pad"]'); $title = preg_replace('/ (\\w+?) Details/', '$2', $crawler->filter('div[class="normal_header"]')->text()); $result = array(); if ($type === 'anime') { foreach ($rows as $historyItem) { $crawler = new Crawler($historyItem); $date = explode(' ', $crawler->text()); $historyinfo['item'] = new Anime(); $historyinfo['item']->setId((int) $id); $historyinfo['item']->setTitle($title); $historyinfo['item']->setWatchedEpisodes((int) $date[1]); $historyinfo['type'] = $type; $historyinfo['time_updated'] = Date::formatTime($date[4] . ' ' . $date[6]); $result[] = $historyinfo; } } else { foreach ($rows as $historyItem) { $crawler = new Crawler($historyItem); $date = explode(' ', $crawler->text()); $historyinfo['item'] = new Manga(); $historyinfo['item']->setId((int) $id); $historyinfo['item']->setTitle($title); $historyinfo['item']->setChaptersRead((int) $date[1]); $historyinfo['type'] = $type; $historyinfo['time_updated'] = Date::formatTime($date[4] . ' ' . $date[6]); $result[] = $historyinfo; } } return $result; }
public function show() { $options = Config::get('onepager.options'); $progressBar = Cache::get('progressBar', function () { $c = curl_init('https://www.startnext.com/sanktionsfrei/widget/?w=200&h=300&l=de'); curl_setopt($c, CURLOPT_RETURNTRANSFER, true); $html = curl_exec($c); if (curl_error($c)) { die(curl_error($c)); } $status = curl_getinfo($c, CURLINFO_HTTP_CODE); curl_close($c); $percent = 0; if ($status == 200) { $crawler = new Crawler(); $crawler->addHTMLContent($html, 'UTF-8'); // get the percentage for the progressbar $styleString = $crawler->filter('.bar.bar-1')->attr('style'); $stringArray = explode(':', $styleString); $percent = substr($stringArray[1], 0, -2); // get the text for the progressbar $textArray = $crawler->filter('.status-text span')->extract(['_text']); } return ['percent' => $percent, 'progressText' => $textArray[0]]; }, 5); return view('home', ['options' => $options, 'percent' => $progressBar['percent'], 'progressText' => $progressBar['progressText']]); }
public static function parse($contents, $type) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $result = array(); $items = $crawler->filter('div [class="borderDark pt4 pb8 pl4 pr4 mb8"]'); foreach ($items as $item) { $result[] = self::parseReviews($item, $type); } return $result; }
public static function parse($contents, $type) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); //Filter into a set of tds from the source HTML table $mediaitems = $crawler->filter('tr[class="ranking-list"]'); foreach ($mediaitems as $item) { $resultset[] = self::parseRecord($item, $type); } return $resultset; }
/** * Get the CSRF Token. * * @return string A string representing the CSRF token required for login */ private function getCsrfToken() { $token = null; //Get the csrf_token for login $loginPageContent = $this->fetch('/login.php'); $crawler = new Crawler(); $crawler->addHTMLContent($loginPageContent, 'UTF-8'); $metaTags = $crawler->filter('meta[name="csrf_token"]'); foreach ($metaTags as $tag) { $name = $tag->attributes->getNamedItem('name'); if ($name !== null && $name->value == 'csrf_token') { $token = $tag->attributes->getNamedItem('content')->value; } } return $token; }
public static function parse($contents, $type) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $result = array(); $items = $crawler->filter('entry'); if ($type === 'anime') { foreach ($items as $item) { $result[] = self::parseAnime($item); } } else { foreach ($items as $item) { $result[] = self::parseManga($item); } } return $result; }
public static function parse($contents) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $rows = $crawler->filter('div[class="borderClass"]'); $result = array(); foreach ($rows as $historyItem) { $crawler = new Crawler($historyItem); $anime = new Anime(); $anime->setId(str_replace('#raArea1', '', $crawler->filter('a')->attr('id'))); $anime->setImageUrl(preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $crawler->filter('img')->attr('data-src'))); $anime->setTitle($crawler->filter('strong')->text()); $resultItem['item'] = $anime; $resultItem['recommendations'] = self::parseInformation($crawler); $result[] = $resultItem; } return $result; }
public static function parse($contents) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $classDay = 'seasonal-anime-list js-seasonal-anime-list js-seasonal-anime-list-key-'; $recordContainer = 'div[class="seasonal-anime js-seasonal-anime"]'; $result = array(); $result['monday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'monday clearfix"] ' . $recordContainer)); $result['tuesday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'tuesday clearfix"] ' . $recordContainer)); $result['wednesday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'wednesday clearfix"] ' . $recordContainer)); $result['thursday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'thursday clearfix"] ' . $recordContainer)); $result['friday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'friday clearfix"] ' . $recordContainer)); $result['saturday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'saturday clearfix"] ' . $recordContainer)); $result['sunday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'sunday clearfix"] ' . $recordContainer)); $result['other'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'other clearfix"] ' . $recordContainer)); $result['unknown'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'unknown clearfix"] ' . $recordContainer)); return $result; }
public static function parse($contents, $type) { $resultset = array(); $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $menubar = true; //Filter into a set of tds from the source HTML table $mediaitems = $crawler->filterXPath('//div[@id="content"]/div/table/tr'); foreach ($mediaitems as $item) { //tricky method to skip the menu bar which is also a <tr></tr> if ($menubar === true) { $menubar = false; } else { $resultset[] = self::parseRecord($item, $type); } } return $resultset; }
public static function parseMessage($contents, $id) { $crawler = new Crawler(); $crawler->addHTMLContent(str_replace('<br />', '', $contents), 'UTF-8'); $message = new Messages(); $message->setId((int) $id); # Action id of the message. # Example: # <input type="button" onclick="document.location='/mymessages.php?go=send&replyid=21193061&threadid=16092543&toname=Ratan12'" class="inputButton" value=" Reply "> $actionId = $crawler->filterXPath('//form[contains(@action,"delete")]/input[@name="id"]'); $message->setActionId((int) $actionId->attr('value')); # Thread id of the message. # Example: # <a href="?go=read&id=0000000&threadid=00000000"> $threadId = $crawler->filterXPath('//div/input[contains(@value,"Reply")]'); $threadId = $threadId->attr('onclick'); if (preg_match('/threadid=([\\d]*)/', $threadId, $threadMatches)) { $message->setThreadId((int) $threadMatches[1]); } # Username of the sender. # Example: # <a href="http://myanimelist.net/profile/ratan12">ratan12</a> $message->setUsername($crawler->filterXPath('//td[@class="dialog-text"]/h2/a')->text()); # Time of the received message. # Example: # <small>50 minutes ago</small> $time = $crawler->filterXPath('//td[@class="dialog-text"]/div[contains(@class,"lightLink")]'); if (count($time) > 0) { $message->setTime($time->text()); } # Subject. # Example: # <div style="margin-bottom: 4px; font-weight: bold;">re: coolmessage</div> $messageSubject = $crawler->filterXPath('//td[@class="dialog-text"]/div[contains(@class,"fw-b")]')->text(); $message->setSubject($messageSubject); # Message. $messageText = $crawler->filterXPath('//td[@class="dialog-text"]'); if (preg_match('/Test Test<\\/div>(.*?)<div/s', $messageText->html(), $messageBody)) { $message->setMessage($messageBody[1]); } return $message; }
public static function parse($contents) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $items = $crawler->filter('td[style="padding-left: 5px;"] table[width="100%"]'); //Bypass Undefined variable error. $staff = null; $characters = null; foreach ($items as $item) { //Bypass to determine if the last table contains the staff members $crawler = new Crawler($item); if ($crawler->filter('td[width="27"]')->count() != 1) { $staffitems = $crawler->children(); foreach ($staffitems as $staffitem) { $staff[] = self::parseStaff($staffitem); } } else { $characters[] = self::parseCharacters($crawler); } } return array('Characters' => $characters, 'Staff' => $staff); }
public static function parseTopic($contents) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $topicitems = $crawler->filter('div[class="forum_border_around "]'); foreach ($topicitems as $item) { $set[] = self::parseTopicDetails($item); } $pages = $crawler->filter('div[class="fl-r pb4"]')->text(); if ($pages != '') { $result['pages'] = (int) substr($pages, strpos($pages, ' (') + 2, strpos($pages, ')')); } else { $result['pages'] = 1; } $result['list'] = $set; return $result; }
/** * @param $content string * * @return string */ private function removeLastItem($content) { $document = new \DOMDocument('1.0', \Yii::$app->charset); $crawler = new Crawler(); $crawler->addHTMLContent($content, \Yii::$app->charset); $root = $document->appendChild($document->createElement('_root')); $crawler->rewind(); $root->appendChild($document->importNode($crawler->current(), true)); $domxpath = new \DOMXPath($document); $crawlerInverse = $domxpath->query(CssSelector::toXPath($this->widgetItem . ':last-child')); foreach ($crawlerInverse as $key => $elementToRemove) { $parent = $elementToRemove->parentNode; $parent->removeChild($elementToRemove); } $crawler->clear(); $crawler->add($document); return $crawler->filter('body')->eq(0)->html(); }
/** * Clear HTML widgetBody. Required to work with zero or more items. * * @param string $content */ private function removeItems($content) { $crawler = new Crawler(); $crawler->addHTMLContent($content, \Yii::$app->charset); $crawler->filter($this->widgetItem)->each(function ($nodes) { foreach ($nodes as $node) { $node->parentNode->removeChild($node); } }); return $crawler->html(); }
public static function parseExtendedPersonal($contents, Anime $anime) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); #Personal tags #<td class="borderClass"><textarea name="tags" rows="2" id="tagtext" cols="45" class="textarea">action, sci-fi</textarea></td> $personalTags = $crawler->filter('textarea[id="add_anime_tags"]')->text(); if (strlen($personalTags) > 0) { $personalTags = explode(',', $personalTags); foreach ($personalTags as $tag) { $tagArray[] = trim($tag); } $anime->setPersonalTags($tagArray); } #Start and Finish Dates #<tr> # <td class="borderClass">Start Date</td> # <td class="borderClass"> # Month: # <select name="startMonth" class="inputtext"> # <option value="00"> # <option value="1" >Jan<option value="2" selected>Feb<option value="3" >Mar<option value="4" >Apr<option value="5" >May<option value="6" >Jun<option value="7" >Jul<option value="8" >Aug<option value="9" >Sep<option value="10" >Oct<option value="11" >Nov<option value="12" >Dec </select> # Day: # <select name="startDay" class="inputtext"> # <option value="00"> # <option value="1" >1<option value="2" selected>2<option value="3" >3<option value="4" >4<option value="5" >5<option value="6" >6<option value="7" >7<option value="8" >8<option value="9" >9<option value="10" >10<option value="11" >11<option value="12" >12<option value="13" >13<option value="14" >14<option value="15" >15<option value="16" >16<option value="17" >17<option value="18" >18<option value="19" >19<option value="20" >20<option value="21" >21<option value="22" >22<option value="23" >23<option value="24" >24<option value="25" >25<option value="26" >26<option value="27" >27<option value="28" >28<option value="29" >29<option value="30" >30<option value="31" >31 </select> # Year: # <select name="startYear" class="inputtext"> # <option value="0000"> # <option value="2014" selected>2014<option value="2013" >2013<option value="2012" >2012<option value="2011" >2011<option value="2010" >2010<option value="2009" >2009<option value="2008" >2008<option value="2007" >2007<option value="2006" >2006<option value="2005" >2005<option value="2004" >2004<option value="2003" >2003<option value="2002" >2002<option value="2001" >2001<option value="2000" >2000<option value="1999" >1999<option value="1998" >1998<option value="1997" >1997<option value="1996" >1996<option value="1995" >1995<option value="1994" >1994<option value="1993" >1993<option value="1992" >1992<option value="1991" >1991<option value="1990" >1990<option value="1989" >1989<option value="1988" >1988<option value="1987" >1987<option value="1986" >1986<option value="1985" >1985<option value="1984" >1984 </select> # # <label><input type="checkbox" onchange="ChangeStartDate();" name="unknownStart" value="1"> <small>Unknown Date</label><br>Start Date represents the date you started watching the Anime <a href="javascript:setToday(1);">Insert Today</a></small> # </td> #</tr> #<tr> # <td class="borderClass">Finish Date</td> # <td class="borderClass"> # Month: # <select name="endMonth" class="inputtext" disabled> # <option value="00"> # <option value="1" >Jan<option value="2" >Feb<option value="3" >Mar<option value="4" >Apr<option value="5" >May<option value="6" >Jun<option value="7" >Jul<option value="8" >Aug<option value="9" >Sep<option value="10" >Oct<option value="11" >Nov<option value="12" >Dec </select> # Day: # <select name="endDay" class="inputtext" disabled> # <option value="00"> # <option value="1" >1<option value="2" >2<option value="3" >3<option value="4" >4<option value="5" >5<option value="6" >6<option value="7" >7<option value="8" >8<option value="9" >9<option value="10" >10<option value="11" >11<option value="12" >12<option value="13" >13<option value="14" >14<option value="15" >15<option value="16" >16<option value="17" >17<option value="18" >18<option value="19" >19<option value="20" >20<option value="21" >21<option value="22" >22<option value="23" >23<option value="24" >24<option value="25" >25<option value="26" >26<option value="27" >27<option value="28" >28<option value="29" >29<option value="30" >30<option value="31" >31 </select> # Year: # <select name="endYear" class="inputtext" disabled> # <option value="0000"> # <option value="2014" >2014<option value="2013" >2013<option value="2012" >2012<option value="2011" >2011<option value="2010" >2010<option value="2009" >2009<option value="2008" >2008<option value="2007" >2007<option value="2006" >2006<option value="2005" >2005<option value="2004" >2004<option value="2003" >2003<option value="2002" >2002<option value="2001" >2001<option value="2000" >2000<option value="1999" >1999<option value="1998" >1998<option value="1997" >1997<option value="1996" >1996<option value="1995" >1995<option value="1994" >1994<option value="1993" >1993<option value="1992" >1992<option value="1991" >1991<option value="1990" >1990<option value="1989" >1989<option value="1988" >1988<option value="1987" >1987<option value="1986" >1986<option value="1985" >1985<option value="1984" >1984 </select> # # <small><label><input type="checkbox" onchange="ChangeEndDate();" checked name="unknownEnd" value="1"> Unknown Date</label><br>Do <u>not</u> fill out the Finish Date unless status is <em>Completed</em> <a href="javascript:setToday(2);">Insert Today</a></small> # </td> #</tr> $isStarted = $crawler->filter('input[id="unknown_start"]')->attr('checked'); $isEnded = $crawler->filter('input[id="unknown_end"]')->attr('checked'); if ($isStarted != 'checked') { //So, MAL allows users to put in just years, just years and months, or all three values. //This mess here is to try and avoid things breaking. if ($crawler->filter('select[id="add_anime_start_date_year"] option:selected')->count() > 0) { $startYear = $crawler->filter('select[id="add_anime_start_date_year"] option:selected')->attr('value'); $startMonth = 6; $startDay = 15; if ($startYear !== '') { if ($crawler->filter('select[id="add_anime_start_date_month"] option:selected')->count() > 0) { $startMonth = $crawler->filter('select[id="add_anime_start_date_month"] option:selected')->attr('value'); if ($startMonth === '') { $startMonth = 6; } if ($crawler->filter('select[id="add_anime_start_date_day"] option:selected')->count() > 0) { $startDay = $crawler->filter('select[id="add_anime_start_date_day"] option:selected')->attr('value'); if ($startDay === '') { $startDay = 15; } } } $anime->setWatchingStart(DateTime::createFromFormat('Y-n-j', "{$startYear}-{$startMonth}-{$startDay}")); } } } if ($isEnded != 'checked') { //Same here, avoid breaking MAL's allowing of partial dates. if ($crawler->filter('select[id="add_anime_finish_date_year"] option:selected')->count() > 0) { $endYear = $crawler->filter('select[id="add_anime_finish_date_year"] option:selected')->attr('value'); $endMonth = 6; $endDay = 15; if ($endYear !== '') { if ($crawler->filter('select[id="add_anime_finish_date_month"] option:selected')->count() > 0) { $endMonth = $crawler->filter('select[id="add_anime_finish_date_month"] option:selected')->attr('value'); if ($endMonth === '') { $endMonth = 6; } if ($crawler->filter('select[id="add_anime_finish_date_day"] option:selected')->count() > 0) { $endDay = $crawler->filter('select[id="add_anime_finish_date_day"] option:selected')->attr('value'); if ($endDay === '') { $endDay = 15; } } } $anime->setWatchingEnd(DateTime::createFromFormat('Y-n-j', "{$endYear}-{$endMonth}-{$endDay}")); } } } #Priority #<td class="borderClass"><select name="priority" class="inputtext"> #<option value="0" selected>Low<option value="1" >Medium<option value="2" >High </select> $priority = $crawler->filter('select[id="add_anime_priority"] option:selected')->attr('value'); $anime->setPriority($priority); #Storage # #<td class="borderClass" align="left"><select name="storage" id="storage" onchange="StorageBooleanCheck(2);" class="inputtext"> # <option value="0">Select storage type # <option value="1" >Hard Drive<option value="6" >External HD<option value="7" >NAS<option value="2" >DVD / CD<option value="4" >Retail DVD<option value="5" >VHS<option value="3" >None </select> #<div style="margin: 3px 0px; display: none;" id="StorageDiv">Total <span id="storageDescription">DvD's</span> <input type="text" name="storageVal" id="storageValue" value="0.00" size="4" class="inputtext"></div> #</td> //Note that if storage isn't defined, nothing will be marked as selected. We thus have to get the value in two stages to avoid raising an exception. $storage = $crawler->filter('select[id="add_anime_storage_type"] option:selected'); if (count($storage)) { $anime->setStorage($storage->attr('value')); } #Storage Value - Either number of discs or size in GB #<div style="margin: 3px 0px; display: none;" id="StorageDiv">Total <span id="storageDescription">DvD's</span> <input type="text" name="storageVal" id="storageValue" value="1.00" size="4" class="inputtext"></div> $storageval = (double) $crawler->filter('input[id="add_anime_storage_value"]')->attr('value'); if ($storageval > 0) { $anime->setStorageValue($storageval); } #Rewatched #<label><input type="checkbox" id="add_anime_is_rewatching" name="add_anime[is_rewatching]" value="1" checked="checked"> $rewatch = $crawler->filter('input[id="add_anime_is_rewatching"]')->attr('checked'); if ($rewatch == null) { $anime->setRewatching(false); } else { $anime->setRewatching(true); } #Times Rewatched #<td class="borderClass"><input type="text" name="list_times_watched" value="0" size="4" class="inputtext"> $rewatchCount = $crawler->filter('input[id="add_anime_num_watched_times"]')->attr('value'); if ($rewatchCount > 0) { $anime->setRewatchCount($rewatchCount); } #Rewatch Value #<td class="borderClass"><select name="list_rewatch_value" class="inputtext"> # <option value="0">Select rewatch value<option value="1">Very Low<option value="2">Low<option value="3">Medium<option value="4">High<option selected value="5">Very High </select> $rewatchValue = $crawler->filter('select[id="add_anime_rewatch_value"] option:selected'); if (count($rewatchValue)) { $anime->setRewatchValue($rewatchValue->attr('value')); } #Comments #<td class="borderClass"><textarea name="list_comments" rows="5" cols="45" class="textarea"></textarea></td> $comments = trim($crawler->filter('textarea[id="add_anime_comments"]')->text()); if (strlen($comments)) { $anime->setPersonalComments($comments); } return $anime; }
public static function parseExtendedPersonal($contents, Manga $manga) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); #Personal tags #<td align="left" class="borderClass"><textarea name="tags" rows="2" id="tagtext" cols="45" class="textarea"></textarea><div class="spaceit_pad"><small>Popular tags: <a href="javascript:void(0);" onclick="detailedadd_addTag('cooking');">cooking</a>, <a href="javascript:void(0);" onclick="detailedadd_addTag('seinen');">seinen</a>, <a href="javascript:void(0);" onclick="detailedadd_addTag('drama');">drama</a>, <a href="javascript:void(0);" onclick="detailedadd_addTag('slice of life');">slice of life</a></small></div></td> $personalTags = $crawler->filter('textarea[id="add_manga_tags"]')->text(); if (strlen($personalTags) > 0) { $personalTags = explode(',', $personalTags); foreach ($personalTags as $tag) { $tagArray[] = trim($tag); } $manga->setPersonalTags($tagArray); } #Start and Finish Dates #<tr> # <td align="left" class="borderClass">Start Date</td> # <td align="left" class="borderClass"> # Month: # <select name="startMonth" id="smonth" class="inputtext"> # <option value="00"> # <option value="01" >Jan<option value="02" >Feb<option value="03" >Mar<option value="04" >Apr<option value="05" >May<option value="06" >Jun<option value="07" >Jul<option value="08" >Aug<option value="09" selected>Sep<option value="10" >Oct<option value="11" >Nov<option value="12" >Dec </select> # Day: # <select name="startDay" class="inputtext"> # <option value="00"> # <option value="01" >1<option value="02" >2<option value="03" >3<option value="04" >4<option value="05" >5<option value="06" >6<option value="07" >7<option value="08" >8<option value="09" >9<option value="10" >10<option value="11" >11<option value="12" >12<option value="13" >13<option value="14" >14<option value="15" >15<option value="16" >16<option value="17" >17<option value="18" >18<option value="19" >19<option value="20" >20<option value="21" >21<option value="22" >22<option value="23" >23<option value="24" >24<option value="25" selected>25<option value="26" >26<option value="27" >27<option value="28" >28<option value="29" >29<option value="30" >30<option value="31" >31 </select> # Year: # <select name="startYear" class="inputtext"> # <option value="0000"> # <option value="2014" >2014<option value="2013" selected>2013<option value="2012" >2012<option value="2011" >2011<option value="2010" >2010<option value="2009" >2009<option value="2008" >2008<option value="2007" >2007<option value="2006" >2006<option value="2005" >2005<option value="2004" >2004<option value="2003" >2003<option value="2002" >2002<option value="2001" >2001<option value="2000" >2000<option value="1999" >1999<option value="1998" >1998<option value="1997" >1997<option value="1996" >1996<option value="1995" >1995<option value="1994" >1994<option value="1993" >1993<option value="1992" >1992<option value="1991" >1991<option value="1990" >1990<option value="1989" >1989<option value="1988" >1988<option value="1987" >1987<option value="1986" >1986<option value="1985" >1985<option value="1984" >1984 </select> # # <label><input type="checkbox" onchange="ChangeStartDate();" name="unknownStart" value="1"> <small>Unknown Date</label><br>Start Date represents the date you started watching the Anime <a href="javascript:setToday(1);">Insert Today</a></small> # </td> #</tr> #<tr> # <td align="left" class="borderClass">Finish Date</td> # <td align="left" class="borderClass"> # Month: # <select name="endMonth" id="emonth" class="inputtext" > # <option value="00"> # <option value="01" >Jan<option value="02" >Feb<option value="03" >Mar<option value="04" >Apr<option value="05" >May<option value="06" >Jun<option value="07" >Jul<option value="08" >Aug<option value="09" >Sep<option value="10" selected>Oct<option value="11" >Nov<option value="12" >Dec </select> # Day: # <select name="endDay" class="inputtext" > # <option value="00"> # <option value="01" >1<option value="02" >2<option value="03" >3<option value="04" >4<option value="05" >5<option value="06" >6<option value="07" >7<option value="08" >8<option value="09" >9<option value="10" >10<option value="11" selected>11<option value="12" >12<option value="13" >13<option value="14" >14<option value="15" >15<option value="16" >16<option value="17" >17<option value="18" >18<option value="19" >19<option value="20" >20<option value="21" >21<option value="22" >22<option value="23" >23<option value="24" >24<option value="25" >25<option value="26" >26<option value="27" >27<option value="28" >28<option value="29" >29<option value="30" >30<option value="31" >31 </select> # Year: # <select name="endYear" class="inputtext" > # <option value="0000"> # <option value="2014" >2014<option value="2013" selected>2013<option value="2012" >2012<option value="2011" >2011<option value="2010" >2010<option value="2009" >2009<option value="2008" >2008<option value="2007" >2007<option value="2006" >2006<option value="2005" >2005<option value="2004" >2004<option value="2003" >2003<option value="2002" >2002<option value="2001" >2001<option value="2000" >2000<option value="1999" >1999<option value="1998" >1998<option value="1997" >1997<option value="1996" >1996<option value="1995" >1995<option value="1994" >1994<option value="1993" >1993<option value="1992" >1992<option value="1991" >1991<option value="1990" >1990<option value="1989" >1989<option value="1988" >1988<option value="1987" >1987<option value="1986" >1986<option value="1985" >1985<option value="1984" >1984 </select> # # <small><label><input type="checkbox" onchange="ChangeEndDate();" name="unknownEnd" value="1"> Unknown Date</label><br>Do <u>not</u> fill out the Finish Date unless status is <em>Completed</em> <a href="javascript:setToday(2);">Insert Today</a></small> # </td> #</tr> $isStarted = $crawler->filter('input[id="unknown_start"]')->attr('checked'); $isEnded = $crawler->filter('input[id="unknown_end"]')->attr('checked'); if ($isStarted != 'checked') { //So, MAL allows users to put in just years, just years and months, or all three values. //This mess here is to try and avoid things breaking. if ($crawler->filter('select[id="add_manga_start_date_year"] option:selected')->count() > 0) { $startYear = $crawler->filter('select[id="add_manga_start_date_year"] option:selected')->attr('value'); $startMonth = 6; $startDay = 15; if ($startYear !== '') { if ($crawler->filter('select[id="add_manga_start_date_month"] option:selected')->count() > 0) { $startMonth = $crawler->filter('select[id="add_manga_start_date_month"] option:selected')->attr('value'); if ($crawler->filter('select[id="add_manga_start_date_day"] option:selected')->count() > 0) { $startDay = $crawler->filter('select[id="add_manga_start_date_day"] option:selected')->attr('value'); } } $manga->setReadingStart(DateTime::createFromFormat('Y-n-j', "{$startYear}-{$startMonth}-{$startDay}")); } } } if ($isEnded != 'checked') { //Same here, avoid breaking MAL's allowing of partial dates. if ($crawler->filter('select[id="add_manga_finish_date_year"] option:selected')->count() > 0) { $endYear = $crawler->filter('select[id="add_manga_finish_date_year"] option:selected')->attr('value'); $endMonth = 6; $endDay = 15; if ($endYear !== '') { if ($crawler->filter('select[id="add_manga_finish_date_month"] option:selected')->count() > 0) { $endMonth = $crawler->filter('select[id="add_manga_finish_date_month"] option:selected')->attr('value'); if ($crawler->filter('select[id="add_manga_finish_date_day"] option:selected')->count() > 0) { $endDay = $crawler->filter('select[id="add_manga_finish_date_day"] option:selected')->attr('value'); } } $manga->setReadingEnd(DateTime::createFromFormat('Y-n-j', "{$endYear}-{$endMonth}-{$endDay}")); } } } #Priority #<td align="left" class="borderClass">Priority</td> #<td align="left" class="borderClass"><select name="priority" class="inputtext"> #<option value="0">Select</option> #<option value="0" selected>Low<option value="1" >Medium<option value="2" >High </select> #<div style="margin-top 3px;"><small>What is your priority level to read this manga?</small></div></td> $priorityList = $crawler->filter('select[id="add_manga_priority"] option:selected'); if (count($priorityList)) { $priority = $priorityList->attr('value'); $manga->setPriority($priority); } #Rewatched #<label><input type="checkbox" id="add_manga_is_rereading" name="add_manga[is_rereading]" value="1" checked="checked"> $reread = $crawler->filter('input[id="add_manga_is_rereading"]')->attr('checked'); if ($reread == null) { $manga->setRereading(false); } else { $manga->setRereading(true); } #Times Reread #<td align="left" class="borderClass"><input type="text" class="inputtext" size="4" value="0" name="times_read"> $rereadCount = $crawler->filter('input[id="add_manga_num_read_times"]')->attr('value'); if ($rereadCount > 0) { $manga->setRereadCount($rereadCount); } #Reread Value #<td align="left" class="borderClass"><select class="inputtext" name="reread_value"> # <option value="0">Select reread value</option><option value="1">Very Low</option><option value="2">Low</option><option value="3">Medium</option><option value="4">High</option><option value="5">Very High </option></select> $rereadValue = $crawler->filter('select[id="add_manga_reread_value"] option:selected'); if (count($rereadValue)) { $manga->setRereadValue($rereadValue->attr('value')); } #Comments #<td align="left" class="borderClass"><textarea class="textarea" cols="45" rows="5" name="comments"></textarea></td> $comments = trim($crawler->filter('textarea[id="add_manga_comments"]')->text()); if (strlen($comments)) { $manga->setPersonalComments($comments); } return $manga; }
public static function parse($contents) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $personrecord = new Person(); # Person ID. # Example: # <input type="hidden" name="vaid" value="185"> $personrecord->setId((int) $crawler->filter('input[name="vaid"]')->attr('value')); # Name # Example: # <div><h1 class="h1">Hanazawa, Kana</h1></div> $personrecord->setName(trim($crawler->filter('div h1')->text())); # Image # Example: # <a href="/people/185/Kana_Hanazawa/pictures"><img src="http://cdn.myanimelist.net/images/voiceactors/3/43500.jpg" alt="Hanazawa, Kana"></a> $personrecord->setImageUrl($crawler->filter('div#content tr td div img')->attr('src')); $leftcolumn = $crawler->filterXPath('//div[@id="content"]/table/tr/td[@class="borderClass"]'); # Given name $extracted = $leftcolumn->filterXPath('//span[text()="Given name:"]'); if ($extracted->count() > 0) { $personrecord->setGivenName(trim(str_replace($extracted->text(), '', $extracted->parents()->text()))); } # Family name # MAL messed this field up. It's not wrapped in a div, so the text is floating out in the td. $extracted = $leftcolumn->filterXPath('//span[text()="Family name:"]'); if ($extracted->count() > 0) { $matches = array(); # This regex matches "Family name:..." until it hits Birthday/Website/Alternate [name], one of which should # be the field following the Family name field preg_match('/Family name:.*?(?:(?!Birthday|Website|Alternate).)*/', $leftcolumn->text(), $matches); if (count($matches) > 0) { $personrecord->setFamilyName(trim(str_replace($extracted->text(), '', $matches[0]))); } } # Alternate names $extracted = $leftcolumn->filterXPath('//span[text()="Alternate names:"]'); if ($extracted->count() > 0) { $text = trim(str_replace($extracted->text(), '', $extracted->parents()->text())); $alternateNames = explode(', ', $text); $personrecord->setAlternateNames($alternateNames); } # Birthday $extracted = $leftcolumn->filterXPath('//span[text()="Birthday:"]'); if ($extracted->count() > 0) { $dateStr = trim(str_replace($extracted->text(), '', $extracted->parents()->text())); $dateStr = str_replace(' ', ' ', $dateStr); // Replace 2 spaces with 1, MAL seems to add 2 spaces in some dates if (strpos($dateStr, ',') === false) { if (strlen($dateStr) === 4) { // Only a year, Example ID 11746 $personrecord->setBirthday(DateTime::createFromFormat('Y m d', $dateStr . ' 01 01'), 'year'); } elseif (count(explode(' ', $dateStr)) == 2) { // Month + Year, i.e. "Feb 1999", Example ID 7277 $dateComponents = explode(' ', $dateStr); $month = $dateComponents[0]; $year = $dateComponents[1]; $personrecord->setBirthday(DateTime::createFromFormat('M Y d', $month . ' ' . $year . ' 01'), 'month'); } } else { $dateComponents = explode(' ', $dateStr); if (count($dateComponents) == 2) { // Month + Day, i.e. "Jun 15,", Example ID 2608 $month = $dateComponents[0]; $day = substr($dateComponents[1], 0, -1); $personrecord->setBirthday(DateTime::createFromFormat('M d Y', $month . ' ' . $day . ' 1970'), 'dayMonth'); } elseif (count($dateComponents) == 3) { // Full date, i.e. "Feb 25, 1989", Example ID 185 $personrecord->setBirthday(DateTime::createFromFormat('M j, Y', $dateStr), 'day'); } } } # Website # This isn't in a div, but the anchor element is the next sibling of the span $extracted = $leftcolumn->filterXPath('//span[text()="Website:"]'); if ($extracted->count() > 0) { $personrecord->setWebsiteUrl(trim(str_replace($extracted->text(), '', $extracted->nextAll()->attr("href")))); } # Favorites count $extracted = $leftcolumn->filterXPath('//span[text()="Member Favorites:"]'); if ($extracted->count() > 0) { $personrecord->setFavoritedCount(trim(str_replace($extracted->text(), '', $extracted->parents()->text()))); } # More Details # Note: CSS classes are misspelled, need to keep an eye on this $extracted = $leftcolumn->filter('div[class="people-informantion-more js-people-informantion-more"]'); if ($extracted->count() > 0) { $personrecord->setMoreDetails($extracted->html()); } # Extract from sections on the right column: Voice acting roles, anime staff positions, published manga $rightcolumn = $crawler->filter('div[id="content"] td[style="padding-left: 5px;"]'); // Voice acting roles $vaRoles = $rightcolumn->filterXPath('//div[text()="Voice Acting Roles"]'); if ($vaRoles->count() > 0) { $rolesArray = array(); // Iterate over each row in the table foreach ($vaRoles->nextAll()->children() as $item) { $node = new Crawler($item); // Fill in the character details $characterAnchor = $node->filterXPath('//td[3]/a'); $characterName = $characterAnchor->text(); $characterUrl = $characterAnchor->attr('href'); $isMainCharacer = strpos($node->filterXPath('//td[3]/div')->text(), "Main") !== false; $characterImage = preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $node->filterXPath('//td[4]/div/a/img')->attr('data-src')); $match = preg_match('/\\/(character)\\/(\\d+)\\/.*?/', $characterUrl, $urlParts); if ($match !== false && $match !== 0) { $characterId = (int) $urlParts[2]; } $itemArray = array('id' => $characterId, 'name' => $characterName, 'image_url' => $characterImage, 'main_role' => $isMainCharacer); $itemArray['anime'] = new Anime(); $animeDetails = $node->filterXPath('//td[2]/a'); // Fill in the anime details $itemArray['anime']->setImageUrl(preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $node->filterXPath('//td[1]/div/a/img')->attr('data-src'))); $itemArray['anime']->setTitle($animeDetails->text()); $match = preg_match('/\\/(anime)\\/(\\d+)\\/.*?/', $animeDetails->attr('href'), $urlParts); if ($match !== false && $match !== 0) { $itemArray['anime']->setId($urlParts[2]); } $rolesArray[] = $itemArray; } $personrecord->setVoiceActingRoles($rolesArray); } // Anime staff positions $staffPositions = $rightcolumn->filterXPath('//div[text()="Anime Staff Positions"]'); if ($staffPositions->count() > 0) { $positionsArray = array(); // Iterate over each row in the table foreach ($staffPositions->nextAll()->children() as $item) { $node = new Crawler($item); // Fill in the position details $position = $node->filterXPath('//td[2]/div/small')->text(); $positionDetails = $node->filterXPath('//td[2]/div')->text(); // Details are wrapped in parenthesis, so we'll find those parens and grab what's inside // Sometimes what comes back is malformed - i.e. "ep. 1-15)," - it appears malformed on the site too $match = preg_match('/\\((.*)\\)/', $positionDetails, $positionDetailsParts); if ($match !== false && $match !== 0) { $positionDetails = $positionDetailsParts[1]; } else { $positionDetails = null; } $itemArray = array('position' => $position, 'details' => $positionDetails); $itemArray['anime'] = new Anime(); $animeDetails = $node->filterXPath('//td[2]/a'); // Fill in the anime details $itemArray['anime']->setImageUrl(preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $node->filterXPath('//td[1]/div/a/img')->attr('data-src'))); $itemArray['anime']->setTitle($animeDetails->text()); $match = preg_match('/\\/(anime)\\/(\\d+)\\/.*?/', $animeDetails->attr('href'), $urlParts); if ($match !== false && $match !== 0) { $itemArray['anime']->setId($urlParts[2]); } $positionsArray[] = $itemArray; } $personrecord->setAnimeStaffPositions($positionsArray); } // Published manga $mangaPositions = $rightcolumn->filterXPath('//div[text()="Published Manga"]'); if ($mangaPositions->count() > 0) { $positionsArray = array(); // Iterate over each row in the table foreach ($mangaPositions->nextAll()->children() as $item) { $node = new Crawler($item); // Fill in the position $position = $node->filterXPath('//td[2]/div/small')->text(); $itemArray = array('position' => $position); $itemArray['manga'] = new Manga(); $mangaDetails = $node->filterXPath('//td[2]/a'); // Fill in the manga details $itemArray['manga']->setImageUrl(preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $node->filterXPath('//td[1]/div/a/img')->attr('data-src'))); $itemArray['manga']->setTitle($mangaDetails->text()); $match = preg_match('/\\/(manga)\\/(\\d+)\\/.*?/', $mangaDetails->attr('href'), $urlParts); if ($match !== false && $match !== 0) { $itemArray['manga']->setId($urlParts[2]); } $positionsArray[] = $itemArray; } $personrecord->setPublishedManga($positionsArray); } return $personrecord; }
/** * Set timezone setting by parsing the MAL settings. * * Parse the timezone setting used by MAL when an user logged in. * This depends on the location of the country when a user joined. * After parsing it stores the timezone in a static variable. * * @param string $settings The HTML source of the settings page that contains the timezone. * * @return string */ public static function setTimeZone($settings) { $crawler = new Crawler(); $crawler->addHTMLContent($settings, 'UTF-8'); self::$timeZone = $crawler->filter('option[selected]')->text(); }
public static function parseHistory($contents) { $crawler = new Crawler(); $crawler->addHTMLContent($contents, 'UTF-8'); $maincontent = $crawler->filter('table')->filter('tr'); //Empty array so we return something non-null if the list is empty. $historylist = array(); foreach ($maincontent as $historyentry) { $crawler = new Crawler($historyentry); // bypass for the MAL generated strings if ($crawler->filter('a')->count() > 0) { if (strpos($crawler->filter('a')->attr('href'), 'anime') !== false) { $historyinfo['item'] = new Anime(); $historyinfo['item']->setWatchedEpisodes((int) $crawler->filter('strong')->text()); $historyinfo['type'] = 'anime'; } else { $historyinfo['item'] = new Manga(); $historyinfo['item']->setChaptersRead((int) $crawler->filter('strong')->text()); $historyinfo['type'] = 'manga'; } $historyinfo['item']->setTitle($crawler->filter('a')->text()); $historyinfo['item']->setId((int) str_replace('/' . $historyinfo['type'] . '.php?id=', '', $crawler->filter('a')->attr('href'))); $historyinfo['time_updated'] = Date::formatTime(substr($crawler->filter('td')->eq(1)->text(), 1)); $historylist[] = $historyinfo; } } return $historylist; }