예제 #1
0
 public function process(array $documents, &$context)
 {
     $document = $documents[self::URL_MEDIA];
     $dom = self::getDOM($document);
     $xpath = new DOMXPath($dom);
     Database::delete('mediarelation', ['media_id' => $context->media->id]);
     $data = [];
     foreach ($xpath->query('//table[@class=\'anime_detail_related_anime\']/tr') as $node) {
         $typeMal = strtolower(Strings::removeSpaces($node->childNodes[0]->textContent));
         $type = Strings::makeEnum($typeMal, ['adaptation' => MediaRelation::Adaptation, 'alternative setting' => MediaRelation::AlternativeSetting, 'alternative version' => MediaRelation::AlternativeVersion, 'character' => MediaRelation::Character, 'full story' => MediaRelation::FullStory, 'other' => MediaRelation::Other, 'parent story' => MediaRelation::ParentStory, 'prequel' => MediaRelation::Prequel, 'sequel' => MediaRelation::Sequel, 'side story' => MediaRelation::SideStory, 'spin-off' => MediaRelation::SpinOff, 'summary' => MediaRelation::Summary], null);
         if ($type === null) {
             throw new BadProcessorDocumentException($document, 'unknown relation type: ' . $typeMal);
         }
         $links = $node->childNodes[1]->getElementsByTagName('a');
         foreach ($links as $link) {
             $link = $link->getAttribute('href');
             if (preg_match('#^/(anime|manga)/([0-9]+)/#', $link, $matches)) {
                 $idMal = Strings::makeInteger($matches[2]);
                 if ($matches[1] === 'anime') {
                     $media = Media::Anime;
                 } elseif ($matches[1] === 'manga') {
                     $media = Media::Manga;
                 }
                 $data[] = ['media_id' => $context->media->id, 'mal_id' => $idMal, 'media' => $media, 'type' => $type];
             }
         }
     }
     Database::insert('mediarelation', $data);
     $context->relationData = $data;
 }
예제 #2
0
 public function process(array $documents, &$context)
 {
     $document = $documents[self::URL_MEDIA];
     $dom = self::getDOM($document);
     $xpath = new DOMXPath($dom);
     if ($xpath->query('//h1[text() = \'404 Not Found\']')->length >= 1) {
         throw new BadProcessorKeyException($context->key);
     }
     $title = Strings::removeSpaces(self::getNodeValue($xpath, '//h1//span'));
     if (empty($title)) {
         throw new BadProcessorDocumentException($document, 'empty title');
     }
     $typeMal = strtolower(Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Type\')]/following-sibling::node()[self::text()]')));
     $type = Strings::makeEnum($typeMal, ['tv' => AnimeMediaType::TV, 'ova' => AnimeMediaType::OVA, 'movie' => AnimeMediaType::Movie, 'special' => AnimeMediaType::Special, 'ona' => AnimeMediaType::ONA, 'music' => AnimeMediaType::Music, 'manga' => MangaMediaType::Manga, 'novel' => MangaMediaType::Novel, 'one-shot' => MangaMediaType::Oneshot, 'doujinshi' => MangaMediaType::Doujinshi, 'manhwa' => MangaMediaType::Manhwa, 'manhua' => MangaMediaType::Manhua, 'oel' => MangaMediaType::OEL, 'unknown' => $this->media == Media::Manga ? MangaMediaType::Unknown : AnimeMediaType::Unknown], null);
     if ($type === null) {
         throw new BadProcessorDocumentException($document, 'empty sub type');
     }
     $image = self::getNodeValue($xpath, '//meta[@property = \'og:image\']', null, 'content');
     $score = Strings::makeFloat(self::getNodeValue($xpath, '//span[@itemprop = \'ratingValue\']'));
     $scoredByUsers = Strings::makeInteger(self::getNodeValue($xpath, '//span[@itemprop = \'ratingCount\']'));
     $ranked = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Ranked\')]/following-sibling::node()[self::text()]'));
     $popularity = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Popularity\')]/following-sibling::node()[self::text()]'));
     $members = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Members\')]/following-sibling::node()[self::text()]'));
     $favorites = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Favorites\')]/following-sibling::node()[self::text()]'));
     $statusMal = strtolower(Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Status\')]/following-sibling::node()[self::text()]')));
     $status = Strings::makeEnum($statusMal, ['not yet published' => MediaStatus::NotYetPublished, 'not yet aired' => MediaStatus::NotYetPublished, 'publishing' => MediaStatus::Publishing, 'currently airing' => MediaStatus::Publishing, 'finished' => MediaStatus::Finished, 'finished airing' => MediaStatus::Finished], null);
     if ($status === null) {
         throw new BadProcessorDocumentException($document, 'unknown status: ' . $malStatus);
     }
     $publishedString = Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Aired\') or starts-with(text(), \'Published\')]/following-sibling::node()[self::text()]'));
     $position = strrpos($publishedString, ' to ');
     if ($position !== false) {
         $publishedFrom = Strings::makeDate(substr($publishedString, 0, $position));
         $publishedTo = Strings::makeDate(substr($publishedString, $position + 4));
     } else {
         $publishedFrom = Strings::makeDate($publishedString);
         $publishedTo = Strings::makeDate($publishedString);
     }
     $media =& $context->media;
     $media->media = $this->media;
     $media->title = $title;
     $media->sub_type = $type;
     $media->picture_url = $image;
     $media->average_score = $score;
     $media->average_score_users = $scoredByUsers;
     $media->publishing_status = $status;
     $media->popularity = $popularity;
     $media->members = $members;
     $media->favorites = $favorites;
     $media->ranking = $ranked;
     $media->published_from = $publishedFrom;
     $media->published_to = $publishedTo;
     $media->processed = date('Y-m-d H:i:s');
     R::store($media);
 }
예제 #3
0
 public function process(array $documents, &$context)
 {
     Database::delete('usermedia', ['user_id' => $context->user->id]);
     $context->user->cool = false;
     foreach (Media::getConstList() as $media) {
         $key = $media == Media::Anime ? self::URL_ANIMELIST : self::URL_MANGALIST;
         $isPrivate = strpos($documents[$key]->content, 'This list has been made private by the owner') !== false;
         $key = $media == Media::Anime ? self::URL_ANIMEINFO : self::URL_MANGAINFO;
         $doc = $documents[$key];
         $dom = self::getDOM($doc);
         $xpath = new DOMXPath($dom);
         if ($xpath->query('//myinfo')->length == 0) {
             throw new BadProcessorDocumentException($doc, 'myinfo block is missing');
         }
         if (strpos($doc->content, '</myanimelist>') === false) {
             throw new BadProcessorDocumentException($doc, 'list is only partially downloaded');
         }
         $nodes = $xpath->query('//anime | //manga');
         $data = [];
         foreach ($nodes as $root) {
             $mediaMalId = Strings::makeInteger(self::getNodeValue($xpath, 'series_animedb_id | series_mangadb_id', $root));
             $score = Strings::makeInteger(self::getNodeValue($xpath, 'my_score', $root));
             $startDate = Strings::makeDate(self::getNodeValue($xpath, 'my_start_date', $root));
             $finishDate = Strings::makeDate(self::getNodeValue($xpath, 'my_finish_date', $root));
             $status = Strings::makeEnum(self::getNodeValue($xpath, 'my_status', $root), [1 => UserListStatus::Completing, 2 => UserListStatus::Finished, 3 => UserListStatus::OnHold, 4 => UserListStatus::Dropped, 6 => UserListStatus::Planned], UserListStatus::Unknown);
             $finishedEpisodes = null;
             $finishedChapters = null;
             $finishedVolumes = null;
             switch ($media) {
                 case Media::Anime:
                     $finishedEpisodes = Strings::makeInteger(self::getNodeValue($xpath, 'my_watched_episodes', $root));
                     break;
                 case Media::Manga:
                     $finishedChapters = Strings::makeInteger(self::getNodeValue($xpath, 'my_read_chapters', $root));
                     $finishedVolumes = Strings::makeInteger(self::getNodeValue($xpath, 'my_read_volumes', $root));
                     break;
                 default:
                     throw new BadMediaException();
             }
             $data[] = ['user_id' => $context->user->id, 'mal_id' => $mediaMalId, 'media' => $media, 'score' => $score, 'start_date' => $startDate, 'end_date' => $finishDate, 'finished_episodes' => $finishedEpisodes, 'finished_chapters' => $finishedChapters, 'finished_volumes' => $finishedVolumes, 'status' => $status];
         }
         Database::insert('usermedia', $data);
         $dist = RatingDistribution::fromEntries(ReflectionHelper::arraysToClasses($data));
         $daysSpent = Strings::makeFloat(self::getNodeValue($xpath, '//user_days_spent_watching'));
         $user =& $context->user;
         $user->{Media::toString($media) . '_days_spent'} = $daysSpent;
         $user->{Media::toString($media) . '_private'} = $isPrivate;
         $user->cool |= ($dist->getRatedCount() >= 50 and $dist->getStandardDeviation() >= 1.5);
         R::store($user);
     }
 }
예제 #4
0
 public function process(array $documents, &$context)
 {
     $document = $documents[self::URL_PROFILE];
     $documentMobile = $documents[self::URL_PROFILE_MOBILE];
     $dom = self::getDOM($document);
     $domMobile = self::getDOM($documentMobile);
     $xpath = new DOMXPath($dom);
     $xpathMobile = new DOMXPath($domMobile);
     if ($xpath->query('//h1[text() = \'404 Not Found\']')->length >= 1) {
         throw new BadProcessorKeyException($context->key);
     }
     $name = Strings::removeSpaces(self::getNodeValue($xpath, '//h1//span'));
     $name = substr($name, 0, strpos($name, '\'s Profile'));
     $name = Strings::removeSpaces($name);
     if (empty($name)) {
         throw new BadProcessorDocumentException($document, 'Username missing');
     }
     $image = self::getNodeValue($xpath, '//div[contains(@class, \'user-image\')]//img', null, 'src');
     $joinDate = Strings::makeDate(self::getNodeValue($xpath, '//span[text() = \'Joined\']/following-sibling::span'));
     $malId = Strings::makeInteger(self::getNodeValue($xpath, '//input[@name = \'profileMemId\']', null, 'value'));
     $postCount = Strings::makeInteger(self::getNodeValue($xpath, '//a[@href=\'https://myanimelist.net/forum/index.php?action=search&u=' . $name . '&q=&uloc=1&loc=-1\']/span[2]'));
     $birthday = Strings::makeDate(self::getNodeValue($xpath, '//span[text() = \'Birthday\']/following-sibling::span'));
     $location = Strings::removespaces(self::getNodeValue($xpath, '//span[text() = \'Location\']/following-sibling::span'));
     $websiteNode = $xpath->query('//h4[text() = \'Also Available at\']/following-sibling::div/a/@href');
     if ($websiteNode->length >= 1) {
         $website = $xpath->query('//h4[text() = \'Also Available at\']/following-sibling::div/a/@href')->item(0)->nodeValue;
     } else {
         $website = "";
     }
     $gender = Strings::makeEnum(self::getNodeValue($xpath, '//span[text() = \'Gender\']/following-sibling::span'), ['Female' => UserGender::Female, 'Male' => UserGender::Male], UserGender::Unknown);
     $animeViewCount = Strings::makeInteger(self::getNodeValue($xpathMobile, '//td[text() = \'Anime List Views\']/following-sibling::td'));
     $mangaViewCount = Strings::makeInteger(self::getNodeValue($xpathMobile, '//td[text() = \'Manga List Views\']/following-sibling::td'));
     $user =& $context->user;
     $user->name = $name;
     $user->picture_url = $image;
     $user->join_date = $joinDate;
     $user->mal_id = $malId;
     $user->posts = $postCount;
     $user->birthday = $birthday;
     $user->location = $location;
     $user->website = $website;
     $user->gender = $gender;
     $user->anime_views = $animeViewCount;
     $user->manga_views = $mangaViewCount;
     $user->processed = date('Y-m-d H:i:s');
     R::store($user);
 }
 public function process(array $documents, &$context)
 {
     $doc = $documents[self::URL_MEDIA];
     $dom = self::getDOM($doc);
     $xpath = new DOMXPath($dom);
     Database::delete('mediarelation', ['media_id' => $context->media->id]);
     $data = [];
     $lastType = '';
     foreach ($xpath->query('//h2[starts-with(text(), \'Related\')]/../*') as $node) {
         if ($node->nodeName == 'h2' and (strpos($node->textContent, 'Related') === false or $node->textContent == 'Related Clubs')) {
             break;
         }
         if ($node->nodeName != 'a') {
             continue;
         }
         $link = $node->attributes->getNamedItem('href')->nodeValue;
         //relation type
         $malType = strtolower(Strings::removeSpaces($node->previousSibling->textContent));
         if ($malType == ',') {
             $type = $lastType;
         } else {
             $type = Strings::makeEnum($malType, ['sequel' => MediaRelation::Sequel, 'prequel' => MediaRelation::Prequel, 'side story' => MediaRelation::SideStory, 'parent story' => MediaRelation::ParentStory, 'adaptation' => MediaRelation::Adaptation, 'alternative version' => MediaRelation::AlternativeVersion, 'summary' => MediaRelation::Summary, 'character' => MediaRelation::Character, 'spin-off' => MediaRelation::SpinOff, 'alternative setting' => MediaRelation::AlternativeSetting, 'other' => MediaRelation::Other, 'full story' => MediaRelation::FullStory], null);
             if ($type === null) {
                 throw new BadProcessorDocumentException($doc, 'unknown relation type: ' . $malType);
             }
             $lastType = $type;
         }
         //relation id
         preg_match_all('/([0-9]+)/', $link, $matches);
         if (!isset($matches[0][0])) {
             continue;
         }
         $mediaMalId = Strings::makeInteger($matches[0][0]);
         //relation media
         if (strpos($link, '/anime') !== false) {
             $media = Media::Anime;
         } elseif (strpos($link, '/manga') !== false) {
             $media = Media::Manga;
         } else {
             continue;
         }
         $data[] = ['media_id' => $context->media->id, 'mal_id' => $mediaMalId, 'media' => $media, 'type' => $type];
     }
     Database::insert('mediarelation', $data);
     $context->relationData = $data;
 }
 public function process(array $documents, &$context)
 {
     $doc = $documents[self::URL_PROFILE];
     $dom = self::getDOM($doc);
     $xpath = new DOMXPath($dom);
     if ($xpath->query('//title[text() = \'Invalid User\']')->length >= 1) {
         throw new BadProcessorKeyException($context->key);
     }
     $userName = Strings::removeSpaces(self::getNodeValue($xpath, '//title'));
     $userName = substr($userName, 0, strpos($userName, '\'s Profile'));
     $userName = str_replace('Top - ', '', $userName);
     $userName = Strings::removeSpaces($userName);
     if (empty($userName)) {
         throw new BadProcessorDocumentException($doc, 'User name missing');
     }
     $pictureUrl = self::getNodeValue($xpath, '//td[@class = \'profile_leftcell\']//img', null, 'src');
     $joinDate = Strings::makeDate(self::getNodeValue($xpath, '//td[text() = \'Join Date\']/following-sibling::td'));
     $malId = Strings::makeInteger(self::getNodeValue($xpath, '//input[@name = \'profileMemId\']', null, 'value'));
     $animeViewCount = Strings::makeInteger(self::getNodeValue($xpath, '//td[text() = \'Anime List Views\']/following-sibling::td'));
     $mangaViewCount = Strings::makeInteger(self::getNodeValue($xpath, '//td[text() = \'Manga List Views\']/following-sibling::td'));
     $commentCount = Strings::makeInteger(self::getNodeValue($xpath, '//td[text() = \'Comments\']/following-sibling::td'));
     $postCount = Strings::makeInteger(self::getNodeValue($xpath, '//td[text() = \'Forum Posts\']/following-sibling::td'));
     $birthday = Strings::makeDate(self::getNodeValue($xpath, '//td[text() = \'Birthday\']/following-sibling::td'));
     $location = Strings::removespaces(self::getNodeValue($xpath, '//td[text() = \'Location\']/following-sibling::td'));
     $website = Strings::removeSpaces(self::getNodeValue($xpath, '//td[text() = \'Website\']/following-sibling::td'));
     $gender = Strings::makeEnum(self::getNodeValue($xpath, '//td[text() = \'Gender\']/following-sibling::td'), ['Female' => UserGender::Female, 'Male' => UserGender::Male], UserGender::Unknown);
     $user =& $context->user;
     $user->name = $userName;
     $user->picture_url = $pictureUrl;
     $user->join_date = $joinDate;
     $user->mal_id = $malId;
     $user->comments = $commentCount;
     $user->posts = $postCount;
     $user->birthday = $birthday;
     $user->location = $location;
     $user->website = $website;
     $user->gender = $gender;
     $user->anime_views = $animeViewCount;
     $user->manga_views = $mangaViewCount;
     $user->processed = date('Y-m-d H:i:s');
     R::store($user);
 }
 public function process(array $documents, &$context)
 {
     $doc = $documents[self::URL_MEDIA];
     $dom = self::getDOM($doc);
     $xpath = new DOMXPath($dom);
     if ($xpath->query('//div[@class = \'badresult\']')->length >= 1) {
         throw new BadProcessorKeyException($context->key);
     }
     $title = Strings::removeSpaces(self::getNodeValue($xpath, '//h1/*/following-sibling::node()[1][self::text()]'));
     if (empty($title)) {
         throw new BadProcessorDocumentException($doc, 'empty title');
     }
     //sub type
     $malSubType = strtolower(Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Type\')]/following-sibling::node()[self::text()]')));
     $subType = Strings::makeEnum($malSubType, ['tv' => AnimeMediaType::TV, 'ova' => AnimeMediaType::OVA, 'movie' => AnimeMediaType::Movie, 'special' => AnimeMediaType::Special, 'ona' => AnimeMediaType::ONA, 'music' => AnimeMediaType::Music, 'manga' => MangaMediaType::Manga, 'novel' => MangaMediaType::Novel, 'one shot' => MangaMediaType::OneShot, 'doujin' => MangaMediaType::Doujin, 'manhwa' => MangaMediaType::Manhwa, 'manhua' => MangaMediaType::Manhua, 'oel' => MangaMediaType::OEL, '' => $this->media == Media::Manga ? MangaMediaType::Unknown : AnimeMediaType::Unknown], null);
     if ($subType === null) {
         throw new BadProcessorDocumentException($doc, 'empty sub type');
     }
     //mal id
     $malId = self::getNodeValue($xpath, '//input[starts-with(@id, \'myinfo_\')]', null, 'value');
     //picture
     $pictureUrl = self::getNodeValue($xpath, '//td[@class = \'borderClass\']//img', null, 'src');
     //rank
     $averageScore = Strings::makeFloat(self::getNodeValue($xpath, '//span[starts-with(text(), \'Score\')]/following-sibling::node()[self::text()]'));
     $averageScoreUsers = Strings::extractInteger(self::getNodeValue($xpath, '//small[starts-with(text(), \'(scored by\')]'));
     $ranking = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Ranked\')]/following-sibling::node()[self::text()]'));
     $popularity = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Popularity\')]/following-sibling::node()[self::text()]'));
     $memberCount = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Members\')]/following-sibling::node()[self::text()]'));
     $favoriteCount = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Favorites\')]/following-sibling::node()[self::text()]'));
     //status
     $malStatus = strtolower(Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Status\')]/following-sibling::node()[self::text()]')));
     $status = Strings::makeEnum($malStatus, ['not yet published' => MediaStatus::NotYetPublished, 'not yet aired' => MediaStatus::NotYetPublished, 'publishing' => MediaStatus::Publishing, 'currently airing' => MediaStatus::Publishing, 'finished' => MediaStatus::Finished, 'finished airing' => MediaStatus::Finished], null);
     if ($status === null) {
         throw new BadProcessorDocumentException($doc, 'unknown status: ' . $malStatus);
     }
     //air dates
     $publishedString = Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Aired\') or starts-with(text(), \'Published\')]/following-sibling::node()[self::text()]'));
     $pos = strrpos($publishedString, ' to ');
     if ($pos !== false) {
         $publishedFrom = Strings::makeDate(substr($publishedString, 0, $pos));
         $publishedTo = Strings::makeDate(substr($publishedString, $pos + 4));
     } else {
         $publishedFrom = Strings::makeDate($publishedString);
         $publishedTo = Strings::makeDate($publishedString);
     }
     $media =& $context->media;
     $media->mal_id = $malId;
     $media->media = $this->media;
     $media->title = $title;
     $media->sub_type = $subType;
     $media->picture_url = $pictureUrl;
     $media->average_score = $averageScore;
     $media->average_score_users = $averageScoreUsers;
     $media->publishing_status = $status;
     $media->popularity = $popularity;
     $media->members = $memberCount;
     $media->favorites = $favoriteCount;
     $media->ranking = $ranking;
     $media->published_from = $publishedFrom;
     $media->published_to = $publishedTo;
     $media->processed = date('Y-m-d H:i:s');
     R::store($media);
 }