コード例 #1
0
 public static function parseDay($rows)
 {
     $result = array();
     foreach ($rows as $item) {
         $crawler = new Crawler($item);
         $anime = new Anime();
         $url = $crawler->filter('a[class="link-title"]')->attr('href');
         $id = preg_match('/\\/(anime|manga)\\/(\\d+)\\/.*?/', $url, $urlParts);
         if ($id !== false || $id !== 0) {
             $anime->setId((int) $urlParts[2]);
         }
         $anime->setTitle(trim($crawler->filter('a[class="link-title"]')->text()));
         $producer = $crawler->filter('span[class="producer"] a');
         if ($producer->count() > 0) {
             $anime->setProducers(explode(', ', $crawler->filter('span[class="producer"] a')->text()));
         }
         $anime->setEpisodes((int) str_replace(' eps', '', $crawler->filter('div[class="eps"] span')->text()));
         $genres = $crawler->filter('div[class="genres-inner js-genre-inner"] a');
         $genreArray = array();
         foreach ($genres as $genre) {
             $genreCrawler = new Crawler($genre);
             $genreArray[] = $genreCrawler->text();
         }
         $anime->setGenres($genreArray);
         $anime->setImageUrl($crawler->filter('div[class="image lazyload"]')->attr('data-bg'));
         $anime->setSynopsis(trim($crawler->filter('div[class="synopsis js-synopsis"]')->text()));
         $detail = explode('-', $crawler->filter('div[class="info"]')->text());
         $anime->setType(trim($detail[0]));
         $anime->setMembersCount((int) str_replace(',', '', trim($crawler->filter('span[class="member fl-r"]')->text())));
         $anime->setMembersScore((double) trim($crawler->filter('span[class="score"]')->text()));
         $result[] = $anime;
     }
     return $result;
 }
コード例 #2
0
 private static function parseAnime($item)
 {
     $crawler = new Crawler($item);
     $anime = new Anime();
     $anime->setId($crawler->filter('id')->text());
     $anime->setTitle($crawler->filter('title')->text());
     $otherTitles = array();
     $english = explode('; ', $crawler->filter('english')->text());
     if (count($english) > 0 && $english !== '') {
         $otherTitles['english'] = $english;
     }
     $synonyms = explode('; ', $crawler->filter('synonyms')->text());
     if (count($synonyms) > 0 && $synonyms[0] !== '') {
         $otherTitles['synonyms'] = $synonyms;
     }
     $anime->setOtherTitles($otherTitles);
     $anime->setEpisodes($crawler->filter('episodes')->text());
     $anime->setMembersScore($crawler->filter('score')->text());
     $anime->setStatus($crawler->filter('status')->text());
     $startDate = $crawler->filter('start_date')->text();
     if ($startDate !== '0000-00-00') {
         $anime->setStartDate((new \DateTime())->createFromFormat('Y-m-d', $startDate));
     }
     $EndDate = $crawler->filter('end_date')->text();
     if ($EndDate !== '0000-00-00') {
         $anime->setEndDate((new \DateTime())->createFromFormat('Y-m-d', $EndDate));
     }
     $anime->setSynopsis($crawler->filter('synopsis')->text());
     $anime->setImageUrl($crawler->filter('image')->text());
     return $anime;
 }
コード例 #3
0
 public static function parse($contents, $apiVersion)
 {
     $crawler = new Crawler();
     $crawler->addHTMLContent($contents, 'UTF-8');
     $animerecord = new Anime();
     # Anime ID.
     # Example:
     # <input type="hidden" name="aid" value="790">
     $animerecord->setId((int) $crawler->filter('input[name="aid"]')->attr('value'));
     # Title and rank.
     # Example:
     # <span itemprop="name">One Piece</span>
     $animerecord->setTitle(trim($crawler->filter('span[itemprop="name"]')->text()));
     $rank = $crawler->filterXPath('//span[contains(@class, "ranked")]');
     if (count($rank) > 0) {
         $animerecord->setRank((int) str_replace('Ranked #', '', $rank->text()));
     }
     # Title Image
     # Example:
     # <a href="http://myanimelist.net/anime/16353/Love_Lab/pic&pid=50257"><img src="http://cdn.myanimelist.net/images/anime/12/50257.jpg" alt="Love Lab" align="center"></a>
     $animerecord->setImageUrl(str_replace('t.jpg', '.jpg', $crawler->filter('div#content tr td div img')->attr('src')));
     # Alternative Titles section.
     # Example:
     # <h2>Alternative Titles</h2>
     # <div class="spaceit_pad"><span class="dark_text">English:</span> Lucky Star/div>
     # <div class="spaceit_pad"><span class="dark_text">Synonyms:</span> Lucky Star, Raki ☆ Suta</div>
     # <div class="spaceit_pad"><span class="dark_text">Japanese:</span> らき すた</div>
     $leftcolumn = $crawler->filterXPath('//div[@id="content"]/table/tr/td[@class="borderClass"]');
     # English:
     $extracted = $leftcolumn->filterXPath('//span[text()="English:"]');
     if ($extracted->count() > 0) {
         $text = trim(str_replace($extracted->text(), '', $extracted->parents()->text()));
         $other_titles['english'] = explode(', ', $text);
         $animerecord->setOtherTitles($other_titles);
     }
     # Synonyms:
     $extracted = $leftcolumn->filterXPath('//span[text()="Synonyms:"]');
     if ($extracted->count() > 0) {
         $text = trim(str_replace($extracted->text(), '', $extracted->parents()->text()));
         $other_titles['synonyms'] = explode(', ', $text);
         $animerecord->setOtherTitles($other_titles);
     }
     # Japanese:
     $extracted = $leftcolumn->filterXPath('//span[text()="Japanese:"]');
     if ($extracted->count() > 0) {
         $text = trim(str_replace($extracted->text(), '', $extracted->parents()->text()));
         $other_titles['japanese'] = explode(', ', $text);
         $animerecord->setOtherTitles($other_titles);
     }
     # Information section.
     # Example:
     # <h2>Information</h2>
     # <div><span class="dark_text">Type:</span> TV</div>
     # <div class="spaceit"><span class="dark_text">Episodes:</span> 24</div>
     # <div><span class="dark_text">Status:</span> Finished Airing</div>
     # <div class="spaceit"><span class="dark_text">Aired:</span> Apr  9, 2007 to Sep  17, 2007</div>
     # <div>
     #   <span class="dark_text">Producers:</span>
     #   <a href="http://myanimelist.net/anime.php?p=2">Kyoto Animation</a>,
     #   <a href="http://myanimelist.net/anime.php?p=104">Lantis</a>,
     #   <a href="http://myanimelist.net/anime.php?p=262">Kadokawa Pictures USA</a><sup><small>L</small></sup>,
     #   <a href="http://myanimelist.net/anime.php?p=286">Bang Zoom! Entertainment</a>
     # </div>
     # <div class="spaceit">
     #   <span class="dark_text">Genres:</span>
     #   <a href="http://myanimelist.net/anime.php?genre[]=4">Comedy</a>,
     #   <a href="http://myanimelist.net/anime.php?genre[]=20">Parody</a>,
     #   <a href="http://myanimelist.net/anime.php?genre[]=23">School</a>,
     #   <a href="http://myanimelist.net/anime.php?genre[]=36">Slice of Life</a>
     # </div>
     # <div><span class="dark_text">Duration:</span> 24 min. per episode</div>
     # <div class="spaceit"><span class="dark_text">Rating:</span> PG-13 - Teens 13 or older</div>
     # Type:
     $extracted = $leftcolumn->filterXPath('//span[text()="Type:"]');
     if ($extracted->count() > 0) {
         $animerecord->setType(trim(str_replace($extracted->text(), '', $extracted->parents()->text())));
     }
     # Episodes:
     $extracted = $leftcolumn->filterXPath('//span[text()="Episodes:"]');
     if ($extracted->count() > 0) {
         $episodeCount = trim(str_replace($extracted->text(), '', $extracted->parents()->text()));
         if (is_numeric($episodeCount)) {
             $animerecord->setEpisodes((int) $episodeCount);
         }
     }
     # Status:
     $extracted = $leftcolumn->filterXPath('//span[text()="Status:"]');
     if ($extracted->count() > 0) {
         $animerecord->setStatus(strtolower(trim(str_replace($extracted->text(), '', $extracted->parents()->text()))));
     }
     # Aired:
     $extracted = $leftcolumn->filterXPath('//span[text()="Aired:"]');
     if ($extracted->count() > 0) {
         /*
          * NOTE: The Ruby API has a bug where yet-to-air shows that only have one date
          * get that listed as the "end_date", not the "start_date". The code below fixes
          * this and in doing so delibrately breaks compatibility in order to present the
          * data properly.
          */
         $daterange = explode(' to ', trim(str_replace($extracted->text(), '', $extracted->parents()->text())));
         //MAL always provides record dates in US-style format.
         if (strpos($daterange[0], ',') === false) {
             if (strlen($daterange[0]) === 4) {
                 $animerecord->setStartDate(DateTime::createFromFormat('Y m d', $daterange[0] . ' 01 01'), 'year');
                 //Example ID 6535 or 9951
             } elseif ($daterange[0] !== 'Not available') {
                 $animerecord->setStartDate(DateTime::createFromFormat('M Y d', $daterange[0] . ' 01'), 'month');
                 //Example ID 22535 (check upcoming list)
             }
         } else {
             if (count(explode(' ', $daterange[0])) == 2) {
                 //MAL has been showing a comma with month and year (Jan, 2016), so catch that
                 $dateComponents = explode(' ', $daterange[0]);
                 $month = substr($dateComponents[0], 0, -1);
                 $year = $dateComponents[1];
                 $animerecord->setStartDate(DateTime::createFromFormat('M Y d', $month . ' ' . $year . ' 01'), 'month');
             } elseif (strlen($daterange[0]) !== 7 && strlen($daterange[0]) !== 8) {
                 $animerecord->setStartDate(DateTime::createFromFormat('M j, Y', $daterange[0]), 'day');
             }
         }
         //Series not yet to air won't list a range at all while currently airing series will use a "?"
         //For these, we should return a null
         if (count($daterange) > 1 && $daterange[1] !== '?') {
             //MAL always provides record dates in US-style format.
             //Not all dates are full, so we have to figure out how to properly parse them
             $dateParts = explode(' ', $daterange[1]);
             $firstIsNumber = is_numeric($dateParts[0]);
             $hasComma = strpos($dateParts[0], ',');
             if (count($dateParts) == 3) {
                 //Full date, normal processing
                 $endDate = DateTime::createFromFormat('M j, Y', $daterange[1]);
                 $animerecord->setEndDate($endDate, 'day');
             } elseif (count($dateParts) == 2) {
                 //We only have two parts, figure out what we were given
                 if ($firstIsNumber === false && $hasComma !== false) {
                     //So, it looks like month and year, because MAL adds the comma regardless.
                     $endDate = DateTime::createFromFormat('M, Y d', $daterange[1] . ' 01');
                     //Example ID 21275
                     $animerecord->setEndDate($endDate, 'month');
                 }
             } else {
                 if (count($dateParts) == 1 && $firstIsNumber) {
                     //Most likely just a year.
                     $endDate = DateTime::createFromFormat('Y m d', $daterange[1] . ' 01 01');
                     $animerecord->setEndDate($endDate, 'year');
                     //Example ID 11836
                 }
             }
         }
     }
     # Producers:
     $extracted = $leftcolumn->filterXPath('//span[text()="Producers:"]');
     if (strpos($extracted->parents()->text(), 'None found') === false && $extracted->count() > 0) {
         $records = $extracted->parents()->first()->filter('a');
         foreach ($records as $rItem) {
             $producers[] = $rItem->nodeValue;
         }
         $animerecord->setProducers($producers);
     }
     # Genres:
     $extracted = $leftcolumn->filterXPath('//span[text()="Genres:"]');
     if ($extracted->count() > 0) {
         $genres = array();
         $records = $extracted->parents()->first()->filter('a');
         foreach ($records as $rItem) {
             $genres[] = $rItem->nodeValue;
         }
         if (count($genres) > 0) {
             $animerecord->setGenres($genres);
         }
     }
     # Classification:
     $extracted = $leftcolumn->filterXPath('//span[text()="Rating:"]');
     if ($extracted->count() > 0) {
         $animerecord->setClassification(trim(str_replace($extracted->text(), '', $extracted->parents()->text())));
     }
     # Statistics
     # Example:
     # <h2>Statistics</h2>
     # <div>
     #   <span class="dark_text">Score:</span> 8.41<sup><small>1</small></sup>
     #   <small>(scored by 22601 users)</small>
     # </div>
     # <div class="spaceit"><span class="dark_text">Ranked:</span> #96<sup><small>2</small></sup></div>
     # <div><span class="dark_text">Popularity:</span> #15</div>
     # <div class="spaceit"><span class="dark_text">Members:</span> 36,961</div>
     # <div><span class="dark_text">Favorites:</span> 2,874</div>
     //TODO: Rewrite to properly clean up excess tags.
     # Score:
     $extracted = $leftcolumn->filterXPath('//span[text()="Score:"]');
     if ($extracted->count() > 0) {
         $extracted = str_replace($extracted->text(), '', $extracted->parents()->text());
         //Remove the parenthetical at the end of the string
         $extracted = trim(str_replace(strstr($extracted, '('), '', $extracted));
         //Sometimes there is a superscript number at the end from a note.
         //Scores are only two decimals, so number_format should chop off the excess, hopefully.
         if (strpos($extracted, 'N/A') === false) {
             $animerecord->setMembersScore((double) number_format($extracted, 2));
         }
     }
     # Popularity:
     $extracted = $leftcolumn->filterXPath('//span[text()="Popularity:"]');
     if ($extracted->count() > 0) {
         $extracted = str_replace($extracted->text(), '', $extracted->parents()->text());
         //Remove the hash at the front of the string and trim whitespace. Needed so we can cast to an int.
         $extracted = trim(str_replace('#', '', $extracted));
         $animerecord->setPopularityRank((int) $extracted);
     }
     # Members:
     $extracted = $leftcolumn->filterXPath('//span[text()="Members:"]');
     if ($extracted->count() > 0) {
         $extracted = str_replace($extracted->text(), '', $extracted->parents()->text());
         //PHP doesn't like commas in integers. Remove it.
         $extracted = trim(str_replace(',', '', $extracted));
         $animerecord->setMembersCount((int) $extracted);
     }
     # Members:
     $extracted = $leftcolumn->filterXPath('//span[text()="Favorites:"]');
     if ($extracted->count() > 0) {
         $extracted = str_replace($extracted->text(), '', $extracted->parents()->text());
         //PHP doesn't like commas in integers. Remove it.
         $extracted = trim(str_replace(',', '', $extracted));
         $animerecord->setFavoritedCount((int) $extracted);
     }
     # -
     # Extract from sections on the right column: Synopsis, Related Anime, Characters & Voice Actors, Reviews
     # Recommendations.
     # -
     $rightcolumn = $crawler->filterXPath('//div[@id="content"]/table/tr/td[2]');
     # Synopsis
     # Example:
     # <td>
     # <h2>Synopsis</h2>
     # Having fun in school, doing homework together, cooking and eating, playing videogames, watching anime. All those little things make up the daily life of the anime- and chocolate-loving Izumi Konata and her friends. Sometimes relaxing but more than often simply funny! <br />
     # -From AniDB
     $extracted = $crawler->filterXPath('//span[@itemprop="description"]');
     //Compatibility Note: We don't convert extended characters to HTML entities, we just
     //use the output directly from MAL. This should be okay as our return charset is UTF-8.
     $animerecord->setSynopsis('There is currently no synopsis for this title.');
     if ($extracted->count() > 0) {
         $animerecord->setSynopsis($extracted->html());
     }
     # Related Anime
     # Example:
     #<table class="anime_detail_related_anime" style="border-spacing:0px;">
     #  <tr>
     #    <td class="ar fw-n borderClass" nowrap="" valign="top">Adaptation:</td>
     #    <td class="borderClass" width="100%"><a href="/manga/587/Lucky☆Star">Lucky☆Star</a></td>
     #  </tr>
     #  <tr>
     #    <td class="ar fw-n borderClass" nowrap="" valign="top">Character:</td>
     #    <td class="borderClass" width="100%"><a href="/anime/3080/Anime_Tenchou">Anime Tenchou</a></td>
     #  </tr>
     #</table>
     $related = $rightcolumn->filter('table.anime_detail_related_anime');
     //NOTE: Not all relations are currently supported.
     if (iterator_count($related)) {
         $rows = $related->children();
         foreach ($rows as $row) {
             $rowItem = $row->firstChild;
             $relationType = strtr(strtolower(rtrim($rowItem->nodeValue, ':')), ' ', '_');
             //This gets the next td containing the items
             $relatedItem = $rowItem->nextSibling->firstChild;
             do {
                 if ($relatedItem->nodeType !== XML_TEXT_NODE && $relatedItem->tagName == 'a') {
                     $url = $relatedItem->attributes->getNamedItem('href')->nodeValue;
                     $id = preg_match('/\\/(anime|manga)\\/(\\d+)\\/.*?/', $url, $urlParts);
                     if (count($urlParts) > 2) {
                         if ($id !== false || $id !== 0) {
                             $itemId = (int) $urlParts[2];
                             $itemTitle = $relatedItem->textContent;
                             $itemUrl = $url;
                         }
                         $itemArray = array();
                         if ($urlParts[1] == 'anime') {
                             $itemArray['anime_id'] = $itemId;
                         } else {
                             $itemArray['manga_id'] = $itemId;
                         }
                         $itemArray['title'] = $itemTitle;
                         $itemArray['url'] = 'https://myanimelist.net' . $itemUrl;
                         $animerecord->addRelation($itemArray, $relationType);
                     }
                 }
                 //Grab next item
                 $relatedItem = $relatedItem->nextSibling;
             } while ($relatedItem !== null);
         }
     }
     # Personal Info
     $userPersonalInfo = $crawler->filterXPath('//h2[text()="Edit Status"]');
     // Only try to parse personal info if the box is there
     if ($userPersonalInfo->count() > 0) {
         #Watched Status - Only available when user is authenticated
         $my_data = $crawler->filter('select#myinfo_status');
         if (iterator_count($my_data) && iterator_count($my_data->filter('option[selected="selected"]'))) {
             $animerecord->setWatchedStatus($my_data->filter('option[selected="selected"]')->attr('value'));
         }
         #Watched Episodes - Only available when user is authenticated
         $my_data = $crawler->filter('input#myinfo_watchedeps');
         if (iterator_count($my_data)) {
             $animerecord->setWatchedEpisodes((int) $my_data->attr('value'));
         }
         #User's Score - Only available when user is authenticated
         $my_data = $crawler->filter('select#myinfo_score');
         if (iterator_count($my_data) && iterator_count($my_data->filter('option[selected="selected"]'))) {
             $animerecord->setScore((int) $my_data->filter('option[selected="selected"]')->attr('value'));
         }
         #Listed ID (?) - Only available when user is authenticated
         $my_data = $crawler->filterXPath('//a[text()="Edit Details"]');
         if (iterator_count($my_data)) {
             if (preg_match('/id=(\\d+)/', $my_data->attr('href'), $my_data)) {
                 $animerecord->setListedAnimeId((int) $my_data[1]);
             }
         }
     }
     if ($apiVersion >= '2.1') {
         # Background
         preg_match('/div>Background<\\/h2>(.+?)<div/s', $crawler->filter('span[itemprop="description"]')->parents()->html(), $matches);
         if (strpos($matches[0], 'No background information') !== false) {
             $animerecord->setBackground('No background information has been added to this title.');
         } else {
             $animerecord->setBackground(trim($matches[1]));
         }
         # Broadcast:
         $extracted = $leftcolumn->filterXPath('//span[text()="Broadcast:"]');
         if ($extracted->count() > 0) {
             $animerecord->setBroadcast(trim(preg_replace('/(\\w.+)s at(\\s\\d.+)\\((\\w.+)\\)/', '$1$2$3', str_replace($extracted->text(), '', $extracted->parents()->text()))));
         }
         # Duration:
         $extracted = $leftcolumn->filterXPath('//span[text()="Duration:"]');
         if ($extracted->count() > 0) {
             $duration = trim(str_replace($extracted->text(), '', $extracted->parents()->text()));
             // Handle varations that include minutes
             if (strpos($duration, 'min.') !== false) {
                 if (strpos($duration, 'hr.') !== false) {
                     //contains hours and minutes
                     preg_match('/([0-9]+) hr\\. ([0-9]+) min\\./', $duration, $durationParts);
                     //This could all be done in one line, but it's more understandable and maintainable broken up.
                     $hours = (int) $durationParts[1];
                     $minutes = (int) $durationParts[2];
                     $animerecord->setDuration($hours * 60 + $minutes);
                 } else {
                     //contains only minutes
                     preg_match('/([0-9]+) min\\./', $duration, $durationParts);
                     $animerecord->setDuration((int) $durationParts[1]);
                 }
                 //Handle hour-only durations
             } elseif (strpos($duration, 'hr.') !== false) {
                 preg_match('/([0-9]+) hr\\./', $duration, $durationParts);
                 $animerecord->setDuration((int) $durationParts[1] * 60);
             }
             // Any other format (such as just "Unknown") isn't understood and is ignored
         }
         // External links is only visible when an user has logged in any may be hidden on some records.
         $externalLinks = $crawler->filterXPath('//h2[text()="External Links"]');
         if ($externalLinks->count() > 0) {
             $extracted = $externalLinks->nextAll()->filter('a');
             # External Links:
             foreach ($extracted as $externalLinkRow) {
                 $animerecord->setExternalLinks($externalLinkRow->nodeValue, $externalLinkRow->getAttribute('href'));
             }
         }
         # Preview:
         $extracted = $crawler->filter('div[class="video-promotion"] a');
         if ($extracted->count() > 0) {
             $animerecord->setPreview(preg_replace('/\\?(.+?)$/', '$2', $extracted->attr('href')));
         }
         # Opening Theme:
         $extracted = $crawler->filter('div[class="theme-songs js-theme-songs opnening"] span');
         foreach ($extracted as $openingRow) {
             $animerecord->setOpeningTheme($openingRow->nodeValue);
         }
         # Ending Theme:
         $extracted = $crawler->filter('div[class="theme-songs js-theme-songs ending"] span');
         foreach ($extracted as $endingRow) {
             $animerecord->setEndingTheme($endingRow->nodeValue);
         }
         # Recommendations
         $extracted = $crawler->filter('div[id="anime_recommendation"] li[class="btn-anime"]');
         foreach ($extracted as $recommendationsRow) {
             $recommendationsCrawler = new Crawler($recommendationsRow);
             $anime = new Anime();
             // ID can be on either side of the hyphen
             $tempId = preg_replace('/.*\\//', '', $recommendationsCrawler->filter('a')->attr('href'));
             // Put the 2 IDs into $tempId
             $anime->setId(preg_replace('/(-|' . $animerecord->getId() . ')/', '', $tempId));
             // Remove the hyphen and the current anime's id, leaving the desired id
             $anime->setTitle($recommendationsCrawler->filter('span')->text());
             $anime->setImageUrl(preg_replace('/r(.+?)\\/(.+?)\\?(.+?)$/', '$2', $recommendationsCrawler->filter('img')->attr('data-src')));
             $animerecord->setRecommendations($anime);
         }
     }
     return $animerecord;
 }
コード例 #4
0
 public function testEpisodes()
 {
     $episodes = rand();
     $anime = new Anime();
     $anime->setEpisodes($episodes);
     $this->assertEquals($episodes, $anime->getEpisodes());
 }