public static function parse($contents)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $rows = $crawler->filter('table[class="mt8 episode_list js-watch-episode-list ascend"] tr[class="episode-list-data"]');
     $result = array();
     foreach ($rows as $episodeItem) {
         $crawler = new Crawler($episodeItem);
         $episode = new Episode();
         $episode->setNumber($crawler->filter('td[class="episode-number nowrap"]')->text());
         $episode->setTitle($crawler->filter('td[class="episode-title"] a')->text());
         // MAL does not always provide the air date!
         $date = $crawler->filter('td[class="episode-aired"]')->text();
         if ($date !== 'N/A') {
             $dateTime = new DateTime();
             $episode->setAirDate($dateTime->createFromFormat('M j, Y', $date));
         }
         $extracted = $crawler->filter('td[class="episode-title"] span[class="di-ib"]');
         if ($extracted->text() !== '' && $extracted->count() > 0) {
             # English:
             $extracted = explode('(', $extracted->text());
             if (count($extracted) > 0) {
                 $other_titles['english'] = array(trim($extracted[0], chr(0xc2) . chr(0xa0)));
             }
             # Japanese:
             if (count($extracted) > 1) {
                 $other_titles['japanese'] = array(trim(str_replace(')', '', $extracted[1])));
             }
             $episode->setOtherTitles($other_titles);
         }
         $result[] = $episode;
     }
     return $result;
 }
 public static function parse($contents, $id, $type)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $rows = $crawler->filter('div[class="spaceit_pad"]');
     $title = preg_replace('/ (\\w+?) Details/', '$2', $crawler->filter('div[class="normal_header"]')->text());
     $result = array();
     if ($type === 'anime') {
         foreach ($rows as $historyItem) {
             $crawler = new Crawler($historyItem);
             $date = explode(' ', $crawler->text());
             $historyinfo['item'] = new Anime();
             $historyinfo['item']->setId((int) $id);
             $historyinfo['item']->setTitle($title);
             $historyinfo['item']->setWatchedEpisodes((int) $date[1]);
             $historyinfo['type'] = $type;
             $historyinfo['time_updated'] = Date::formatTime($date[4] . ' ' . $date[6]);
             $result[] = $historyinfo;
         }
     } else {
         foreach ($rows as $historyItem) {
             $crawler = new Crawler($historyItem);
             $date = explode(' ', $crawler->text());
             $historyinfo['item'] = new Manga();
             $historyinfo['item']->setId((int) $id);
             $historyinfo['item']->setTitle($title);
             $historyinfo['item']->setChaptersRead((int) $date[1]);
             $historyinfo['type'] = $type;
             $historyinfo['time_updated'] = Date::formatTime($date[4] . ' ' . $date[6]);
             $result[] = $historyinfo;
         }
     }
     return $result;
 }
 public function show()
 {
     $options = Config::get('onepager.options');
     $progressBar = Cache::get('progressBar', function () {
         $c = curl_init('https://www.startnext.com/sanktionsfrei/widget/?w=200&h=300&l=de');
         curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
         $html = curl_exec($c);
         if (curl_error($c)) {
             die(curl_error($c));
         }
         $status = curl_getinfo($c, CURLINFO_HTTP_CODE);
         curl_close($c);
         $percent = 0;
         if ($status == 200) {
             $crawler = new Crawler();
             $crawler->addHTMLContent($html, 'UTF-8');
             // get the percentage for the progressbar
             $styleString = $crawler->filter('.bar.bar-1')->attr('style');
             $stringArray = explode(':', $styleString);
             $percent = substr($stringArray[1], 0, -2);
             // get the text for the progressbar
             $textArray = $crawler->filter('.status-text span')->extract(['_text']);
         }
         return ['percent' => $percent, 'progressText' => $textArray[0]];
     }, 5);
     return view('home', ['options' => $options, 'percent' => $progressBar['percent'], 'progressText' => $progressBar['progressText']]);
 }
 public static function parse($contents, $type)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $result = array();
     $items = $crawler->filter('div [class="borderDark pt4 pb8 pl4 pr4 mb8"]');
     foreach ($items as $item) {
         $result[] = self::parseReviews($item, $type);
     }
     return $result;
 }
Exemple #5
0
 public static function parse($contents, $type)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     //Filter into a set of tds from the source HTML table
     $mediaitems = $crawler->filter('tr[class="ranking-list"]');
     foreach ($mediaitems as $item) {
         $resultset[] = self::parseRecord($item, $type);
     }
     return $resultset;
 }
 /**
  * Get the CSRF Token.
  *
  * @return string A string representing the CSRF token required for login
  */
 private function getCsrfToken()
 {
     $token = null;
     //Get the csrf_token for login
     $loginPageContent = $this->fetch('/login.php');
     $crawler = new Crawler();
     $crawler->addHTMLContent($loginPageContent, 'UTF-8');
     $metaTags = $crawler->filter('meta[name="csrf_token"]');
     foreach ($metaTags as $tag) {
         $name = $tag->attributes->getNamedItem('name');
         if ($name !== null && $name->value == 'csrf_token') {
             $token = $tag->attributes->getNamedItem('content')->value;
         }
     }
     return $token;
 }
 public static function parse($contents, $type)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $result = array();
     $items = $crawler->filter('entry');
     if ($type === 'anime') {
         foreach ($items as $item) {
             $result[] = self::parseAnime($item);
         }
     } else {
         foreach ($items as $item) {
             $result[] = self::parseManga($item);
         }
     }
     return $result;
 }
 public static function parse($contents)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $rows = $crawler->filter('div[class="borderClass"]');
     $result = array();
     foreach ($rows as $historyItem) {
         $crawler = new Crawler($historyItem);
         $anime = new Anime();
         $anime->setId(str_replace('#raArea1', '', $crawler->filter('a')->attr('id')));
         $anime->setImageUrl(preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $crawler->filter('img')->attr('data-src')));
         $anime->setTitle($crawler->filter('strong')->text());
         $resultItem['item'] = $anime;
         $resultItem['recommendations'] = self::parseInformation($crawler);
         $result[] = $resultItem;
     }
     return $result;
 }
 public static function parse($contents)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $classDay = 'seasonal-anime-list js-seasonal-anime-list js-seasonal-anime-list-key-';
     $recordContainer = 'div[class="seasonal-anime js-seasonal-anime"]';
     $result = array();
     $result['monday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'monday clearfix"] ' . $recordContainer));
     $result['tuesday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'tuesday clearfix"] ' . $recordContainer));
     $result['wednesday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'wednesday clearfix"] ' . $recordContainer));
     $result['thursday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'thursday clearfix"] ' . $recordContainer));
     $result['friday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'friday clearfix"] ' . $recordContainer));
     $result['saturday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'saturday clearfix"] ' . $recordContainer));
     $result['sunday'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'sunday clearfix"] ' . $recordContainer));
     $result['other'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'other clearfix"] ' . $recordContainer));
     $result['unknown'] = ScheduleParser::parseDay($crawler->filter('div[class="' . $classDay . 'unknown clearfix"] ' . $recordContainer));
     return $result;
 }
 public static function parse($contents, $type)
 {
     $resultset = array();
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $menubar = true;
     //Filter into a set of tds from the source HTML table
     $mediaitems = $crawler->filterXPath('//div[@id="content"]/div/table/tr');
     foreach ($mediaitems as $item) {
         //tricky method to skip the menu bar which is also a <tr></tr>
         if ($menubar === true) {
             $menubar = false;
         } else {
             $resultset[] = self::parseRecord($item, $type);
         }
     }
     return $resultset;
 }
 public static function parseMessage($contents, $id)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent(str_replace('<br />', '', $contents), 'UTF-8');
     $message = new Messages();
     $message->setId((int) $id);
     # Action id of the message.
     # Example:
     # <input type="button" onclick="document.location='/mymessages.php?go=send&replyid=21193061&threadid=16092543&toname=Ratan12'" class="inputButton" value=" Reply ">
     $actionId = $crawler->filterXPath('//form[contains(@action,"delete")]/input[@name="id"]');
     $message->setActionId((int) $actionId->attr('value'));
     # Thread id of the message.
     # Example:
     # <a href="?go=read&id=0000000&threadid=00000000">
     $threadId = $crawler->filterXPath('//div/input[contains(@value,"Reply")]');
     $threadId = $threadId->attr('onclick');
     if (preg_match('/threadid=([\\d]*)/', $threadId, $threadMatches)) {
         $message->setThreadId((int) $threadMatches[1]);
     }
     # Username of the sender.
     # Example:
     # <a href="http://myanimelist.net/profile/ratan12">ratan12</a>
     $message->setUsername($crawler->filterXPath('//td[@class="dialog-text"]/h2/a')->text());
     # Time of the received message.
     # Example:
     # <small>50 minutes ago</small>
     $time = $crawler->filterXPath('//td[@class="dialog-text"]/div[contains(@class,"lightLink")]');
     if (count($time) > 0) {
         $message->setTime($time->text());
     }
     # Subject.
     # Example:
     # <div style="margin-bottom: 4px; font-weight: bold;">re: coolmessage</div>
     $messageSubject = $crawler->filterXPath('//td[@class="dialog-text"]/div[contains(@class,"fw-b")]')->text();
     $message->setSubject($messageSubject);
     # Message.
     $messageText = $crawler->filterXPath('//td[@class="dialog-text"]');
     if (preg_match('/Test Test<\\/div>(.*?)<div/s', $messageText->html(), $messageBody)) {
         $message->setMessage($messageBody[1]);
     }
     return $message;
 }
 public static function parse($contents)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $items = $crawler->filter('td[style="padding-left: 5px;"] table[width="100%"]');
     //Bypass Undefined variable error.
     $staff = null;
     $characters = null;
     foreach ($items as $item) {
         //Bypass to determine if the last table contains the staff members
         $crawler = new Crawler($item);
         if ($crawler->filter('td[width="27"]')->count() != 1) {
             $staffitems = $crawler->children();
             foreach ($staffitems as $staffitem) {
                 $staff[] = self::parseStaff($staffitem);
             }
         } else {
             $characters[] = self::parseCharacters($crawler);
         }
     }
     return array('Characters' => $characters, 'Staff' => $staff);
 }
 public static function parseTopic($contents)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $topicitems = $crawler->filter('div[class="forum_border_around "]');
     foreach ($topicitems as $item) {
         $set[] = self::parseTopicDetails($item);
     }
     $pages = $crawler->filter('div[class="fl-r pb4"]')->text();
     if ($pages != '') {
         $result['pages'] = (int) substr($pages, strpos($pages, ' (') + 2, strpos($pages, ')'));
     } else {
         $result['pages'] = 1;
     }
     $result['list'] = $set;
     return $result;
 }
 /**
  * @param $content string
  *
  * @return string
  */
 private function removeLastItem($content)
 {
     $document = new \DOMDocument('1.0', \Yii::$app->charset);
     $crawler = new Crawler();
     $crawler->addHTMLContent($content, \Yii::$app->charset);
     $root = $document->appendChild($document->createElement('_root'));
     $crawler->rewind();
     $root->appendChild($document->importNode($crawler->current(), true));
     $domxpath = new \DOMXPath($document);
     $crawlerInverse = $domxpath->query(CssSelector::toXPath($this->widgetItem . ':last-child'));
     foreach ($crawlerInverse as $key => $elementToRemove) {
         $parent = $elementToRemove->parentNode;
         $parent->removeChild($elementToRemove);
     }
     $crawler->clear();
     $crawler->add($document);
     return $crawler->filter('body')->eq(0)->html();
 }
 /**
  * Clear HTML widgetBody. Required to work with zero or more items.
  *
  * @param string $content
  */
 private function removeItems($content)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($content, \Yii::$app->charset);
     $crawler->filter($this->widgetItem)->each(function ($nodes) {
         foreach ($nodes as $node) {
             $node->parentNode->removeChild($node);
         }
     });
     return $crawler->html();
 }
 public static function parseExtendedPersonal($contents, Anime $anime)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     #Personal tags
     #<td class="borderClass"><textarea name="tags" rows="2" id="tagtext" cols="45" class="textarea">action, sci-fi</textarea></td>
     $personalTags = $crawler->filter('textarea[id="add_anime_tags"]')->text();
     if (strlen($personalTags) > 0) {
         $personalTags = explode(',', $personalTags);
         foreach ($personalTags as $tag) {
             $tagArray[] = trim($tag);
         }
         $anime->setPersonalTags($tagArray);
     }
     #Start and Finish Dates
     #<tr>
     #   <td class="borderClass">Start Date</td>
     #               <td class="borderClass">
     #   Month:
     #   <select name="startMonth"  class="inputtext">
     #       <option value="00">
     #       <option value="1" >Jan<option value="2" selected>Feb<option value="3" >Mar<option value="4" >Apr<option value="5" >May<option value="6" >Jun<option value="7" >Jul<option value="8" >Aug<option value="9" >Sep<option value="10" >Oct<option value="11" >Nov<option value="12" >Dec         </select>
     #   Day:
     #   <select name="startDay"  class="inputtext">
     #       <option value="00">
     #       <option value="1" >1<option value="2" selected>2<option value="3" >3<option value="4" >4<option value="5" >5<option value="6" >6<option value="7" >7<option value="8" >8<option value="9" >9<option value="10" >10<option value="11" >11<option value="12" >12<option value="13" >13<option value="14" >14<option value="15" >15<option value="16" >16<option value="17" >17<option value="18" >18<option value="19" >19<option value="20" >20<option value="21" >21<option value="22" >22<option value="23" >23<option value="24" >24<option value="25" >25<option value="26" >26<option value="27" >27<option value="28" >28<option value="29" >29<option value="30" >30<option value="31" >31            </select>
     #   Year:
     #   <select name="startYear"  class="inputtext">
     #       <option value="0000">
     #       <option value="2014" selected>2014<option value="2013" >2013<option value="2012" >2012<option value="2011" >2011<option value="2010" >2010<option value="2009" >2009<option value="2008" >2008<option value="2007" >2007<option value="2006" >2006<option value="2005" >2005<option value="2004" >2004<option value="2003" >2003<option value="2002" >2002<option value="2001" >2001<option value="2000" >2000<option value="1999" >1999<option value="1998" >1998<option value="1997" >1997<option value="1996" >1996<option value="1995" >1995<option value="1994" >1994<option value="1993" >1993<option value="1992" >1992<option value="1991" >1991<option value="1990" >1990<option value="1989" >1989<option value="1988" >1988<option value="1987" >1987<option value="1986" >1986<option value="1985" >1985<option value="1984" >1984          </select>
     #   &nbsp;
     #   <label><input type="checkbox"  onchange="ChangeStartDate();"  name="unknownStart" value="1"> <small>Unknown Date</label><br>Start Date represents the date you started watching the Anime <a href="javascript:setToday(1);">Insert Today</a></small>
     #   </td>
     #</tr>
     #<tr>
     #   <td class="borderClass">Finish Date</td>
     #               <td class="borderClass">
     #   Month:
     #   <select name="endMonth" class="inputtext" disabled>
     #       <option value="00">
     #       <option value="1" >Jan<option value="2" >Feb<option value="3" >Mar<option value="4" >Apr<option value="5" >May<option value="6" >Jun<option value="7" >Jul<option value="8" >Aug<option value="9" >Sep<option value="10" >Oct<option value="11" >Nov<option value="12" >Dec         </select>
     #   Day:
     #   <select name="endDay" class="inputtext" disabled>
     #       <option value="00">
     #       <option value="1" >1<option value="2" >2<option value="3" >3<option value="4" >4<option value="5" >5<option value="6" >6<option value="7" >7<option value="8" >8<option value="9" >9<option value="10" >10<option value="11" >11<option value="12" >12<option value="13" >13<option value="14" >14<option value="15" >15<option value="16" >16<option value="17" >17<option value="18" >18<option value="19" >19<option value="20" >20<option value="21" >21<option value="22" >22<option value="23" >23<option value="24" >24<option value="25" >25<option value="26" >26<option value="27" >27<option value="28" >28<option value="29" >29<option value="30" >30<option value="31" >31            </select>
     #   Year:
     #   <select name="endYear" class="inputtext" disabled>
     #       <option value="0000">
     #       <option value="2014" >2014<option value="2013" >2013<option value="2012" >2012<option value="2011" >2011<option value="2010" >2010<option value="2009" >2009<option value="2008" >2008<option value="2007" >2007<option value="2006" >2006<option value="2005" >2005<option value="2004" >2004<option value="2003" >2003<option value="2002" >2002<option value="2001" >2001<option value="2000" >2000<option value="1999" >1999<option value="1998" >1998<option value="1997" >1997<option value="1996" >1996<option value="1995" >1995<option value="1994" >1994<option value="1993" >1993<option value="1992" >1992<option value="1991" >1991<option value="1990" >1990<option value="1989" >1989<option value="1988" >1988<option value="1987" >1987<option value="1986" >1986<option value="1985" >1985<option value="1984" >1984          </select>
     #   &nbsp;
     #   <small><label><input type="checkbox" onchange="ChangeEndDate();" checked name="unknownEnd" value="1"> Unknown Date</label><br>Do <u>not</u> fill out the Finish Date unless status is <em>Completed</em> <a href="javascript:setToday(2);">Insert Today</a></small>
     #   </td>
     #</tr>
     $isStarted = $crawler->filter('input[id="unknown_start"]')->attr('checked');
     $isEnded = $crawler->filter('input[id="unknown_end"]')->attr('checked');
     if ($isStarted != 'checked') {
         //So, MAL allows users to put in just years, just years and months, or all three values.
         //This mess here is to try and avoid things breaking.
         if ($crawler->filter('select[id="add_anime_start_date_year"] option:selected')->count() > 0) {
             $startYear = $crawler->filter('select[id="add_anime_start_date_year"] option:selected')->attr('value');
             $startMonth = 6;
             $startDay = 15;
             if ($startYear !== '') {
                 if ($crawler->filter('select[id="add_anime_start_date_month"] option:selected')->count() > 0) {
                     $startMonth = $crawler->filter('select[id="add_anime_start_date_month"] option:selected')->attr('value');
                     if ($startMonth === '') {
                         $startMonth = 6;
                     }
                     if ($crawler->filter('select[id="add_anime_start_date_day"] option:selected')->count() > 0) {
                         $startDay = $crawler->filter('select[id="add_anime_start_date_day"] option:selected')->attr('value');
                         if ($startDay === '') {
                             $startDay = 15;
                         }
                     }
                 }
                 $anime->setWatchingStart(DateTime::createFromFormat('Y-n-j', "{$startYear}-{$startMonth}-{$startDay}"));
             }
         }
     }
     if ($isEnded != 'checked') {
         //Same here, avoid breaking MAL's allowing of partial dates.
         if ($crawler->filter('select[id="add_anime_finish_date_year"] option:selected')->count() > 0) {
             $endYear = $crawler->filter('select[id="add_anime_finish_date_year"] option:selected')->attr('value');
             $endMonth = 6;
             $endDay = 15;
             if ($endYear !== '') {
                 if ($crawler->filter('select[id="add_anime_finish_date_month"] option:selected')->count() > 0) {
                     $endMonth = $crawler->filter('select[id="add_anime_finish_date_month"] option:selected')->attr('value');
                     if ($endMonth === '') {
                         $endMonth = 6;
                     }
                     if ($crawler->filter('select[id="add_anime_finish_date_day"] option:selected')->count() > 0) {
                         $endDay = $crawler->filter('select[id="add_anime_finish_date_day"] option:selected')->attr('value');
                         if ($endDay === '') {
                             $endDay = 15;
                         }
                     }
                 }
                 $anime->setWatchingEnd(DateTime::createFromFormat('Y-n-j', "{$endYear}-{$endMonth}-{$endDay}"));
             }
         }
     }
     #Priority
     #<td class="borderClass"><select name="priority" class="inputtext">
     #<option value="0" selected>Low<option value="1" >Medium<option value="2" >High         </select>
     $priority = $crawler->filter('select[id="add_anime_priority"] option:selected')->attr('value');
     $anime->setPriority($priority);
     #Storage
     #
     #<td class="borderClass" align="left"><select name="storage" id="storage" onchange="StorageBooleanCheck(2);" class="inputtext">
     #   <option value="0">Select storage type
     #   <option value="1" >Hard Drive<option value="6" >External HD<option value="7" >NAS<option value="2" >DVD / CD<option value="4" >Retail DVD<option value="5" >VHS<option value="3" >None          </select>
     #<div style="margin: 3px 0px; display: none;" id="StorageDiv">Total <span id="storageDescription">DvD's</span> <input type="text" name="storageVal" id="storageValue" value="0.00" size="4" class="inputtext"></div>
     #</td>
     //Note that if storage isn't defined, nothing will be marked as selected. We thus have to get the value in two stages to avoid raising an exception.
     $storage = $crawler->filter('select[id="add_anime_storage_type"] option:selected');
     if (count($storage)) {
         $anime->setStorage($storage->attr('value'));
     }
     #Storage Value - Either number of discs or size in GB
     #<div style="margin: 3px 0px; display: none;" id="StorageDiv">Total <span id="storageDescription">DvD's</span> <input type="text" name="storageVal" id="storageValue" value="1.00" size="4" class="inputtext"></div>
     $storageval = (double) $crawler->filter('input[id="add_anime_storage_value"]')->attr('value');
     if ($storageval > 0) {
         $anime->setStorageValue($storageval);
     }
     #Rewatched
     #<label><input type="checkbox" id="add_anime_is_rewatching" name="add_anime[is_rewatching]" value="1" checked="checked">
     $rewatch = $crawler->filter('input[id="add_anime_is_rewatching"]')->attr('checked');
     if ($rewatch == null) {
         $anime->setRewatching(false);
     } else {
         $anime->setRewatching(true);
     }
     #Times Rewatched
     #<td class="borderClass"><input type="text" name="list_times_watched" value="0" size="4" class="inputtext">
     $rewatchCount = $crawler->filter('input[id="add_anime_num_watched_times"]')->attr('value');
     if ($rewatchCount > 0) {
         $anime->setRewatchCount($rewatchCount);
     }
     #Rewatch Value
     #<td class="borderClass"><select name="list_rewatch_value" class="inputtext">
     #    <option value="0">Select rewatch value<option  value="1">Very Low<option  value="2">Low<option  value="3">Medium<option  value="4">High<option selected value="5">Very High            </select>
     $rewatchValue = $crawler->filter('select[id="add_anime_rewatch_value"] option:selected');
     if (count($rewatchValue)) {
         $anime->setRewatchValue($rewatchValue->attr('value'));
     }
     #Comments
     #<td class="borderClass"><textarea name="list_comments" rows="5" cols="45" class="textarea"></textarea></td>
     $comments = trim($crawler->filter('textarea[id="add_anime_comments"]')->text());
     if (strlen($comments)) {
         $anime->setPersonalComments($comments);
     }
     return $anime;
 }
 public static function parseExtendedPersonal($contents, Manga $manga)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     #Personal tags
     #<td align="left" class="borderClass"><textarea name="tags" rows="2" id="tagtext" cols="45" class="textarea"></textarea><div class="spaceit_pad"><small>Popular tags: <a href="javascript:void(0);" onclick="detailedadd_addTag('cooking');">cooking</a>, <a href="javascript:void(0);" onclick="detailedadd_addTag('seinen');">seinen</a>, <a href="javascript:void(0);" onclick="detailedadd_addTag('drama');">drama</a>, <a href="javascript:void(0);" onclick="detailedadd_addTag('slice of life');">slice of life</a></small></div></td>
     $personalTags = $crawler->filter('textarea[id="add_manga_tags"]')->text();
     if (strlen($personalTags) > 0) {
         $personalTags = explode(',', $personalTags);
         foreach ($personalTags as $tag) {
             $tagArray[] = trim($tag);
         }
         $manga->setPersonalTags($tagArray);
     }
     #Start and Finish Dates
     #<tr>
     #    <td align="left" class="borderClass">Start Date</td>
     #                <td align="left" class="borderClass">
     #    Month:
     #    <select name="startMonth" id="smonth"  class="inputtext">
     #        <option value="00">
     #        <option value="01" >Jan<option value="02" >Feb<option value="03" >Mar<option value="04" >Apr<option value="05" >May<option value="06" >Jun<option value="07" >Jul<option value="08" >Aug<option value="09" selected>Sep<option value="10" >Oct<option value="11" >Nov<option value="12" >Dec			</select>
     #    Day:
     #    <select name="startDay"  class="inputtext">
     #        <option value="00">
     #        <option value="01" >1<option value="02" >2<option value="03" >3<option value="04" >4<option value="05" >5<option value="06" >6<option value="07" >7<option value="08" >8<option value="09" >9<option value="10" >10<option value="11" >11<option value="12" >12<option value="13" >13<option value="14" >14<option value="15" >15<option value="16" >16<option value="17" >17<option value="18" >18<option value="19" >19<option value="20" >20<option value="21" >21<option value="22" >22<option value="23" >23<option value="24" >24<option value="25" selected>25<option value="26" >26<option value="27" >27<option value="28" >28<option value="29" >29<option value="30" >30<option value="31" >31			</select>
     #    Year:
     #    <select name="startYear"  class="inputtext">
     #        <option value="0000">
     #        <option value="2014" >2014<option value="2013" selected>2013<option value="2012" >2012<option value="2011" >2011<option value="2010" >2010<option value="2009" >2009<option value="2008" >2008<option value="2007" >2007<option value="2006" >2006<option value="2005" >2005<option value="2004" >2004<option value="2003" >2003<option value="2002" >2002<option value="2001" >2001<option value="2000" >2000<option value="1999" >1999<option value="1998" >1998<option value="1997" >1997<option value="1996" >1996<option value="1995" >1995<option value="1994" >1994<option value="1993" >1993<option value="1992" >1992<option value="1991" >1991<option value="1990" >1990<option value="1989" >1989<option value="1988" >1988<option value="1987" >1987<option value="1986" >1986<option value="1985" >1985<option value="1984" >1984			</select>
     #    &nbsp;
     #    <label><input type="checkbox"  onchange="ChangeStartDate();" name="unknownStart" value="1"> <small>Unknown Date</label><br>Start Date represents the date you started watching the Anime <a href="javascript:setToday(1);">Insert Today</a></small>
     #    </td>
     #</tr>
     #<tr>
     #    <td align="left" class="borderClass">Finish Date</td>
     #                <td align="left" class="borderClass">
     #    Month:
     #    <select name="endMonth" id="emonth" class="inputtext" >
     #        <option value="00">
     #        <option value="01" >Jan<option value="02" >Feb<option value="03" >Mar<option value="04" >Apr<option value="05" >May<option value="06" >Jun<option value="07" >Jul<option value="08" >Aug<option value="09" >Sep<option value="10" selected>Oct<option value="11" >Nov<option value="12" >Dec			</select>
     #    Day:
     #    <select name="endDay" class="inputtext" >
     #        <option value="00">
     #        <option value="01" >1<option value="02" >2<option value="03" >3<option value="04" >4<option value="05" >5<option value="06" >6<option value="07" >7<option value="08" >8<option value="09" >9<option value="10" >10<option value="11" selected>11<option value="12" >12<option value="13" >13<option value="14" >14<option value="15" >15<option value="16" >16<option value="17" >17<option value="18" >18<option value="19" >19<option value="20" >20<option value="21" >21<option value="22" >22<option value="23" >23<option value="24" >24<option value="25" >25<option value="26" >26<option value="27" >27<option value="28" >28<option value="29" >29<option value="30" >30<option value="31" >31			</select>
     #    Year:
     #    <select name="endYear" class="inputtext" >
     #        <option value="0000">
     #        <option value="2014" >2014<option value="2013" selected>2013<option value="2012" >2012<option value="2011" >2011<option value="2010" >2010<option value="2009" >2009<option value="2008" >2008<option value="2007" >2007<option value="2006" >2006<option value="2005" >2005<option value="2004" >2004<option value="2003" >2003<option value="2002" >2002<option value="2001" >2001<option value="2000" >2000<option value="1999" >1999<option value="1998" >1998<option value="1997" >1997<option value="1996" >1996<option value="1995" >1995<option value="1994" >1994<option value="1993" >1993<option value="1992" >1992<option value="1991" >1991<option value="1990" >1990<option value="1989" >1989<option value="1988" >1988<option value="1987" >1987<option value="1986" >1986<option value="1985" >1985<option value="1984" >1984			</select>
     #    &nbsp;
     #    <small><label><input type="checkbox" onchange="ChangeEndDate();"  name="unknownEnd" value="1"> Unknown Date</label><br>Do <u>not</u> fill out the Finish Date unless status is <em>Completed</em> <a href="javascript:setToday(2);">Insert Today</a></small>
     #    </td>
     #</tr>
     $isStarted = $crawler->filter('input[id="unknown_start"]')->attr('checked');
     $isEnded = $crawler->filter('input[id="unknown_end"]')->attr('checked');
     if ($isStarted != 'checked') {
         //So, MAL allows users to put in just years, just years and months, or all three values.
         //This mess here is to try and avoid things breaking.
         if ($crawler->filter('select[id="add_manga_start_date_year"] option:selected')->count() > 0) {
             $startYear = $crawler->filter('select[id="add_manga_start_date_year"] option:selected')->attr('value');
             $startMonth = 6;
             $startDay = 15;
             if ($startYear !== '') {
                 if ($crawler->filter('select[id="add_manga_start_date_month"] option:selected')->count() > 0) {
                     $startMonth = $crawler->filter('select[id="add_manga_start_date_month"] option:selected')->attr('value');
                     if ($crawler->filter('select[id="add_manga_start_date_day"] option:selected')->count() > 0) {
                         $startDay = $crawler->filter('select[id="add_manga_start_date_day"] option:selected')->attr('value');
                     }
                 }
                 $manga->setReadingStart(DateTime::createFromFormat('Y-n-j', "{$startYear}-{$startMonth}-{$startDay}"));
             }
         }
     }
     if ($isEnded != 'checked') {
         //Same here, avoid breaking MAL's allowing of partial dates.
         if ($crawler->filter('select[id="add_manga_finish_date_year"] option:selected')->count() > 0) {
             $endYear = $crawler->filter('select[id="add_manga_finish_date_year"] option:selected')->attr('value');
             $endMonth = 6;
             $endDay = 15;
             if ($endYear !== '') {
                 if ($crawler->filter('select[id="add_manga_finish_date_month"] option:selected')->count() > 0) {
                     $endMonth = $crawler->filter('select[id="add_manga_finish_date_month"] option:selected')->attr('value');
                     if ($crawler->filter('select[id="add_manga_finish_date_day"] option:selected')->count() > 0) {
                         $endDay = $crawler->filter('select[id="add_manga_finish_date_day"] option:selected')->attr('value');
                     }
                 }
                 $manga->setReadingEnd(DateTime::createFromFormat('Y-n-j', "{$endYear}-{$endMonth}-{$endDay}"));
             }
         }
     }
     #Priority
     #<td align="left" class="borderClass">Priority</td>
     #<td align="left" class="borderClass"><select name="priority" class="inputtext">
     #<option value="0">Select</option>
     #<option value="0" selected>Low<option value="1" >Medium<option value="2" >High                </select>
     #<div style="margin-top 3px;"><small>What is your priority level to read this manga?</small></div></td>
     $priorityList = $crawler->filter('select[id="add_manga_priority"] option:selected');
     if (count($priorityList)) {
         $priority = $priorityList->attr('value');
         $manga->setPriority($priority);
     }
     #Rewatched
     #<label><input type="checkbox" id="add_manga_is_rereading" name="add_manga[is_rereading]" value="1" checked="checked">
     $reread = $crawler->filter('input[id="add_manga_is_rereading"]')->attr('checked');
     if ($reread == null) {
         $manga->setRereading(false);
     } else {
         $manga->setRereading(true);
     }
     #Times Reread
     #<td align="left" class="borderClass"><input type="text" class="inputtext" size="4" value="0" name="times_read">
     $rereadCount = $crawler->filter('input[id="add_manga_num_read_times"]')->attr('value');
     if ($rereadCount > 0) {
         $manga->setRereadCount($rereadCount);
     }
     #Reread Value
     #<td align="left" class="borderClass"><select class="inputtext" name="reread_value">
     #	<option value="0">Select reread value</option><option value="1">Very Low</option><option value="2">Low</option><option value="3">Medium</option><option value="4">High</option><option value="5">Very High			</option></select>
     $rereadValue = $crawler->filter('select[id="add_manga_reread_value"] option:selected');
     if (count($rereadValue)) {
         $manga->setRereadValue($rereadValue->attr('value'));
     }
     #Comments
     #<td align="left" class="borderClass"><textarea class="textarea" cols="45" rows="5" name="comments"></textarea></td>
     $comments = trim($crawler->filter('textarea[id="add_manga_comments"]')->text());
     if (strlen($comments)) {
         $manga->setPersonalComments($comments);
     }
     return $manga;
 }
 public static function parse($contents)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $personrecord = new Person();
     # Person ID.
     # Example:
     # <input type="hidden" name="vaid" value="185">
     $personrecord->setId((int) $crawler->filter('input[name="vaid"]')->attr('value'));
     # Name
     # Example:
     # <div><h1 class="h1">Hanazawa, Kana</h1></div>
     $personrecord->setName(trim($crawler->filter('div h1')->text()));
     # Image
     # Example:
     # <a href="/people/185/Kana_Hanazawa/pictures"><img src="http://cdn.myanimelist.net/images/voiceactors/3/43500.jpg" alt="Hanazawa, Kana"></a>
     $personrecord->setImageUrl($crawler->filter('div#content tr td div img')->attr('src'));
     $leftcolumn = $crawler->filterXPath('//div[@id="content"]/table/tr/td[@class="borderClass"]');
     # Given name
     $extracted = $leftcolumn->filterXPath('//span[text()="Given name:"]');
     if ($extracted->count() > 0) {
         $personrecord->setGivenName(trim(str_replace($extracted->text(), '', $extracted->parents()->text())));
     }
     # Family name
     # MAL messed this field up. It's not wrapped in a div, so the text is floating out in the td.
     $extracted = $leftcolumn->filterXPath('//span[text()="Family name:"]');
     if ($extracted->count() > 0) {
         $matches = array();
         # This regex matches "Family name:..." until it hits Birthday/Website/Alternate [name], one of which should
         # be the field following the Family name field
         preg_match('/Family name:.*?(?:(?!Birthday|Website|Alternate).)*/', $leftcolumn->text(), $matches);
         if (count($matches) > 0) {
             $personrecord->setFamilyName(trim(str_replace($extracted->text(), '', $matches[0])));
         }
     }
     # Alternate names
     $extracted = $leftcolumn->filterXPath('//span[text()="Alternate names:"]');
     if ($extracted->count() > 0) {
         $text = trim(str_replace($extracted->text(), '', $extracted->parents()->text()));
         $alternateNames = explode(', ', $text);
         $personrecord->setAlternateNames($alternateNames);
     }
     # Birthday
     $extracted = $leftcolumn->filterXPath('//span[text()="Birthday:"]');
     if ($extracted->count() > 0) {
         $dateStr = trim(str_replace($extracted->text(), '', $extracted->parents()->text()));
         $dateStr = str_replace('  ', ' ', $dateStr);
         // Replace 2 spaces with 1, MAL seems to add 2 spaces in some dates
         if (strpos($dateStr, ',') === false) {
             if (strlen($dateStr) === 4) {
                 // Only a year, Example ID 11746
                 $personrecord->setBirthday(DateTime::createFromFormat('Y m d', $dateStr . ' 01 01'), 'year');
             } elseif (count(explode(' ', $dateStr)) == 2) {
                 // Month + Year, i.e. "Feb 1999", Example ID 7277
                 $dateComponents = explode(' ', $dateStr);
                 $month = $dateComponents[0];
                 $year = $dateComponents[1];
                 $personrecord->setBirthday(DateTime::createFromFormat('M Y d', $month . ' ' . $year . ' 01'), 'month');
             }
         } else {
             $dateComponents = explode(' ', $dateStr);
             if (count($dateComponents) == 2) {
                 // Month + Day, i.e. "Jun 15,", Example ID 2608
                 $month = $dateComponents[0];
                 $day = substr($dateComponents[1], 0, -1);
                 $personrecord->setBirthday(DateTime::createFromFormat('M d Y', $month . ' ' . $day . ' 1970'), 'dayMonth');
             } elseif (count($dateComponents) == 3) {
                 // Full date, i.e. "Feb 25, 1989", Example ID 185
                 $personrecord->setBirthday(DateTime::createFromFormat('M j, Y', $dateStr), 'day');
             }
         }
     }
     # Website
     # This isn't in a div, but the anchor element is the next sibling of the span
     $extracted = $leftcolumn->filterXPath('//span[text()="Website:"]');
     if ($extracted->count() > 0) {
         $personrecord->setWebsiteUrl(trim(str_replace($extracted->text(), '', $extracted->nextAll()->attr("href"))));
     }
     # Favorites count
     $extracted = $leftcolumn->filterXPath('//span[text()="Member Favorites:"]');
     if ($extracted->count() > 0) {
         $personrecord->setFavoritedCount(trim(str_replace($extracted->text(), '', $extracted->parents()->text())));
     }
     # More Details
     # Note: CSS classes are misspelled, need to keep an eye on this
     $extracted = $leftcolumn->filter('div[class="people-informantion-more js-people-informantion-more"]');
     if ($extracted->count() > 0) {
         $personrecord->setMoreDetails($extracted->html());
     }
     # Extract from sections on the right column: Voice acting roles, anime staff positions, published manga
     $rightcolumn = $crawler->filter('div[id="content"] td[style="padding-left: 5px;"]');
     // Voice acting roles
     $vaRoles = $rightcolumn->filterXPath('//div[text()="Voice Acting Roles"]');
     if ($vaRoles->count() > 0) {
         $rolesArray = array();
         // Iterate over each row in the table
         foreach ($vaRoles->nextAll()->children() as $item) {
             $node = new Crawler($item);
             // Fill in the character details
             $characterAnchor = $node->filterXPath('//td[3]/a');
             $characterName = $characterAnchor->text();
             $characterUrl = $characterAnchor->attr('href');
             $isMainCharacer = strpos($node->filterXPath('//td[3]/div')->text(), "Main") !== false;
             $characterImage = preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $node->filterXPath('//td[4]/div/a/img')->attr('data-src'));
             $match = preg_match('/\\/(character)\\/(\\d+)\\/.*?/', $characterUrl, $urlParts);
             if ($match !== false && $match !== 0) {
                 $characterId = (int) $urlParts[2];
             }
             $itemArray = array('id' => $characterId, 'name' => $characterName, 'image_url' => $characterImage, 'main_role' => $isMainCharacer);
             $itemArray['anime'] = new Anime();
             $animeDetails = $node->filterXPath('//td[2]/a');
             // Fill in the anime details
             $itemArray['anime']->setImageUrl(preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $node->filterXPath('//td[1]/div/a/img')->attr('data-src')));
             $itemArray['anime']->setTitle($animeDetails->text());
             $match = preg_match('/\\/(anime)\\/(\\d+)\\/.*?/', $animeDetails->attr('href'), $urlParts);
             if ($match !== false && $match !== 0) {
                 $itemArray['anime']->setId($urlParts[2]);
             }
             $rolesArray[] = $itemArray;
         }
         $personrecord->setVoiceActingRoles($rolesArray);
     }
     // Anime staff positions
     $staffPositions = $rightcolumn->filterXPath('//div[text()="Anime Staff Positions"]');
     if ($staffPositions->count() > 0) {
         $positionsArray = array();
         // Iterate over each row in the table
         foreach ($staffPositions->nextAll()->children() as $item) {
             $node = new Crawler($item);
             // Fill in the position details
             $position = $node->filterXPath('//td[2]/div/small')->text();
             $positionDetails = $node->filterXPath('//td[2]/div')->text();
             // Details are wrapped in parenthesis, so we'll find those parens and grab what's inside
             // Sometimes what comes back is malformed - i.e. "ep. 1-15)," - it appears malformed on the site too
             $match = preg_match('/\\((.*)\\)/', $positionDetails, $positionDetailsParts);
             if ($match !== false && $match !== 0) {
                 $positionDetails = $positionDetailsParts[1];
             } else {
                 $positionDetails = null;
             }
             $itemArray = array('position' => $position, 'details' => $positionDetails);
             $itemArray['anime'] = new Anime();
             $animeDetails = $node->filterXPath('//td[2]/a');
             // Fill in the anime details
             $itemArray['anime']->setImageUrl(preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $node->filterXPath('//td[1]/div/a/img')->attr('data-src')));
             $itemArray['anime']->setTitle($animeDetails->text());
             $match = preg_match('/\\/(anime)\\/(\\d+)\\/.*?/', $animeDetails->attr('href'), $urlParts);
             if ($match !== false && $match !== 0) {
                 $itemArray['anime']->setId($urlParts[2]);
             }
             $positionsArray[] = $itemArray;
         }
         $personrecord->setAnimeStaffPositions($positionsArray);
     }
     // Published manga
     $mangaPositions = $rightcolumn->filterXPath('//div[text()="Published Manga"]');
     if ($mangaPositions->count() > 0) {
         $positionsArray = array();
         // Iterate over each row in the table
         foreach ($mangaPositions->nextAll()->children() as $item) {
             $node = new Crawler($item);
             // Fill in the position
             $position = $node->filterXPath('//td[2]/div/small')->text();
             $itemArray = array('position' => $position);
             $itemArray['manga'] = new Manga();
             $mangaDetails = $node->filterXPath('//td[2]/a');
             // Fill in the manga details
             $itemArray['manga']->setImageUrl(preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $node->filterXPath('//td[1]/div/a/img')->attr('data-src')));
             $itemArray['manga']->setTitle($mangaDetails->text());
             $match = preg_match('/\\/(manga)\\/(\\d+)\\/.*?/', $mangaDetails->attr('href'), $urlParts);
             if ($match !== false && $match !== 0) {
                 $itemArray['manga']->setId($urlParts[2]);
             }
             $positionsArray[] = $itemArray;
         }
         $personrecord->setPublishedManga($positionsArray);
     }
     return $personrecord;
 }
Exemple #19
0
 /**
  * Set timezone setting by parsing the MAL settings.
  *
  * Parse the timezone setting used by MAL when an user logged in.
  * This depends on the location of the country when a user joined.
  * After parsing it stores the timezone in a static variable.
  *
  * @param string $settings The HTML source  of the settings page that contains the timezone.
  *
  * @return string
  */
 public static function setTimeZone($settings)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($settings, 'UTF-8');
     self::$timeZone = $crawler->filter('option[selected]')->text();
 }
Exemple #20
0
 public static function parseHistory($contents)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $maincontent = $crawler->filter('table')->filter('tr');
     //Empty array so we return something non-null if the list is empty.
     $historylist = array();
     foreach ($maincontent as $historyentry) {
         $crawler = new Crawler($historyentry);
         // bypass for the MAL generated strings
         if ($crawler->filter('a')->count() > 0) {
             if (strpos($crawler->filter('a')->attr('href'), 'anime') !== false) {
                 $historyinfo['item'] = new Anime();
                 $historyinfo['item']->setWatchedEpisodes((int) $crawler->filter('strong')->text());
                 $historyinfo['type'] = 'anime';
             } else {
                 $historyinfo['item'] = new Manga();
                 $historyinfo['item']->setChaptersRead((int) $crawler->filter('strong')->text());
                 $historyinfo['type'] = 'manga';
             }
             $historyinfo['item']->setTitle($crawler->filter('a')->text());
             $historyinfo['item']->setId((int) str_replace('/' . $historyinfo['type'] . '.php?id=', '', $crawler->filter('a')->attr('href')));
             $historyinfo['time_updated'] = Date::formatTime(substr($crawler->filter('td')->eq(1)->text(), 1));
             $historylist[] = $historyinfo;
         }
     }
     return $historylist;
 }