public function process(array $documents, &$context) { $document = $documents[self::URL_MEDIA]; $dom = self::getDOM($document); $xpath = new DOMXPath($dom); Database::delete('mediarelation', ['media_id' => $context->media->id]); $data = []; foreach ($xpath->query('//table[@class=\'anime_detail_related_anime\']/tr') as $node) { $typeMal = strtolower(Strings::removeSpaces($node->childNodes[0]->textContent)); $type = Strings::makeEnum($typeMal, ['adaptation' => MediaRelation::Adaptation, 'alternative setting' => MediaRelation::AlternativeSetting, 'alternative version' => MediaRelation::AlternativeVersion, 'character' => MediaRelation::Character, 'full story' => MediaRelation::FullStory, 'other' => MediaRelation::Other, 'parent story' => MediaRelation::ParentStory, 'prequel' => MediaRelation::Prequel, 'sequel' => MediaRelation::Sequel, 'side story' => MediaRelation::SideStory, 'spin-off' => MediaRelation::SpinOff, 'summary' => MediaRelation::Summary], null); if ($type === null) { throw new BadProcessorDocumentException($document, 'unknown relation type: ' . $typeMal); } $links = $node->childNodes[1]->getElementsByTagName('a'); foreach ($links as $link) { $link = $link->getAttribute('href'); if (preg_match('#^/(anime|manga)/([0-9]+)/#', $link, $matches)) { $idMal = Strings::makeInteger($matches[2]); if ($matches[1] === 'anime') { $media = Media::Anime; } elseif ($matches[1] === 'manga') { $media = Media::Manga; } $data[] = ['media_id' => $context->media->id, 'mal_id' => $idMal, 'media' => $media, 'type' => $type]; } } } Database::insert('mediarelation', $data); $context->relationData = $data; }
public function process(array $documents, &$context) { $document = $documents[self::URL_MEDIA]; $dom = self::getDOM($document); $xpath = new DOMXPath($dom); if ($xpath->query('//h1[text() = \'404 Not Found\']')->length >= 1) { throw new BadProcessorKeyException($context->key); } $title = Strings::removeSpaces(self::getNodeValue($xpath, '//h1//span')); if (empty($title)) { throw new BadProcessorDocumentException($document, 'empty title'); } $typeMal = strtolower(Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Type\')]/following-sibling::node()[self::text()]'))); $type = Strings::makeEnum($typeMal, ['tv' => AnimeMediaType::TV, 'ova' => AnimeMediaType::OVA, 'movie' => AnimeMediaType::Movie, 'special' => AnimeMediaType::Special, 'ona' => AnimeMediaType::ONA, 'music' => AnimeMediaType::Music, 'manga' => MangaMediaType::Manga, 'novel' => MangaMediaType::Novel, 'one-shot' => MangaMediaType::Oneshot, 'doujinshi' => MangaMediaType::Doujinshi, 'manhwa' => MangaMediaType::Manhwa, 'manhua' => MangaMediaType::Manhua, 'oel' => MangaMediaType::OEL, 'unknown' => $this->media == Media::Manga ? MangaMediaType::Unknown : AnimeMediaType::Unknown], null); if ($type === null) { throw new BadProcessorDocumentException($document, 'empty sub type'); } $image = self::getNodeValue($xpath, '//meta[@property = \'og:image\']', null, 'content'); $score = Strings::makeFloat(self::getNodeValue($xpath, '//span[@itemprop = \'ratingValue\']')); $scoredByUsers = Strings::makeInteger(self::getNodeValue($xpath, '//span[@itemprop = \'ratingCount\']')); $ranked = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Ranked\')]/following-sibling::node()[self::text()]')); $popularity = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Popularity\')]/following-sibling::node()[self::text()]')); $members = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Members\')]/following-sibling::node()[self::text()]')); $favorites = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Favorites\')]/following-sibling::node()[self::text()]')); $statusMal = strtolower(Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Status\')]/following-sibling::node()[self::text()]'))); $status = Strings::makeEnum($statusMal, ['not yet published' => MediaStatus::NotYetPublished, 'not yet aired' => MediaStatus::NotYetPublished, 'publishing' => MediaStatus::Publishing, 'currently airing' => MediaStatus::Publishing, 'finished' => MediaStatus::Finished, 'finished airing' => MediaStatus::Finished], null); if ($status === null) { throw new BadProcessorDocumentException($document, 'unknown status: ' . $malStatus); } $publishedString = Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Aired\') or starts-with(text(), \'Published\')]/following-sibling::node()[self::text()]')); $position = strrpos($publishedString, ' to '); if ($position !== false) { $publishedFrom = Strings::makeDate(substr($publishedString, 0, $position)); $publishedTo = Strings::makeDate(substr($publishedString, $position + 4)); } else { $publishedFrom = Strings::makeDate($publishedString); $publishedTo = Strings::makeDate($publishedString); } $media =& $context->media; $media->media = $this->media; $media->title = $title; $media->sub_type = $type; $media->picture_url = $image; $media->average_score = $score; $media->average_score_users = $scoredByUsers; $media->publishing_status = $status; $media->popularity = $popularity; $media->members = $members; $media->favorites = $favorites; $media->ranking = $ranked; $media->published_from = $publishedFrom; $media->published_to = $publishedTo; $media->processed = date('Y-m-d H:i:s'); R::store($media); }
public function process(array $documents, &$context) { Database::delete('usermedia', ['user_id' => $context->user->id]); $context->user->cool = false; foreach (Media::getConstList() as $media) { $key = $media == Media::Anime ? self::URL_ANIMELIST : self::URL_MANGALIST; $isPrivate = strpos($documents[$key]->content, 'This list has been made private by the owner') !== false; $key = $media == Media::Anime ? self::URL_ANIMEINFO : self::URL_MANGAINFO; $doc = $documents[$key]; $dom = self::getDOM($doc); $xpath = new DOMXPath($dom); if ($xpath->query('//myinfo')->length == 0) { throw new BadProcessorDocumentException($doc, 'myinfo block is missing'); } if (strpos($doc->content, '</myanimelist>') === false) { throw new BadProcessorDocumentException($doc, 'list is only partially downloaded'); } $nodes = $xpath->query('//anime | //manga'); $data = []; foreach ($nodes as $root) { $mediaMalId = Strings::makeInteger(self::getNodeValue($xpath, 'series_animedb_id | series_mangadb_id', $root)); $score = Strings::makeInteger(self::getNodeValue($xpath, 'my_score', $root)); $startDate = Strings::makeDate(self::getNodeValue($xpath, 'my_start_date', $root)); $finishDate = Strings::makeDate(self::getNodeValue($xpath, 'my_finish_date', $root)); $status = Strings::makeEnum(self::getNodeValue($xpath, 'my_status', $root), [1 => UserListStatus::Completing, 2 => UserListStatus::Finished, 3 => UserListStatus::OnHold, 4 => UserListStatus::Dropped, 6 => UserListStatus::Planned], UserListStatus::Unknown); $finishedEpisodes = null; $finishedChapters = null; $finishedVolumes = null; switch ($media) { case Media::Anime: $finishedEpisodes = Strings::makeInteger(self::getNodeValue($xpath, 'my_watched_episodes', $root)); break; case Media::Manga: $finishedChapters = Strings::makeInteger(self::getNodeValue($xpath, 'my_read_chapters', $root)); $finishedVolumes = Strings::makeInteger(self::getNodeValue($xpath, 'my_read_volumes', $root)); break; default: throw new BadMediaException(); } $data[] = ['user_id' => $context->user->id, 'mal_id' => $mediaMalId, 'media' => $media, 'score' => $score, 'start_date' => $startDate, 'end_date' => $finishDate, 'finished_episodes' => $finishedEpisodes, 'finished_chapters' => $finishedChapters, 'finished_volumes' => $finishedVolumes, 'status' => $status]; } Database::insert('usermedia', $data); $dist = RatingDistribution::fromEntries(ReflectionHelper::arraysToClasses($data)); $daysSpent = Strings::makeFloat(self::getNodeValue($xpath, '//user_days_spent_watching')); $user =& $context->user; $user->{Media::toString($media) . '_days_spent'} = $daysSpent; $user->{Media::toString($media) . '_private'} = $isPrivate; $user->cool |= ($dist->getRatedCount() >= 50 and $dist->getStandardDeviation() >= 1.5); R::store($user); } }
public function process(array $documents, &$context) { $document = $documents[self::URL_PROFILE]; $documentMobile = $documents[self::URL_PROFILE_MOBILE]; $dom = self::getDOM($document); $domMobile = self::getDOM($documentMobile); $xpath = new DOMXPath($dom); $xpathMobile = new DOMXPath($domMobile); if ($xpath->query('//h1[text() = \'404 Not Found\']')->length >= 1) { throw new BadProcessorKeyException($context->key); } $name = Strings::removeSpaces(self::getNodeValue($xpath, '//h1//span')); $name = substr($name, 0, strpos($name, '\'s Profile')); $name = Strings::removeSpaces($name); if (empty($name)) { throw new BadProcessorDocumentException($document, 'Username missing'); } $image = self::getNodeValue($xpath, '//div[contains(@class, \'user-image\')]//img', null, 'src'); $joinDate = Strings::makeDate(self::getNodeValue($xpath, '//span[text() = \'Joined\']/following-sibling::span')); $malId = Strings::makeInteger(self::getNodeValue($xpath, '//input[@name = \'profileMemId\']', null, 'value')); $postCount = Strings::makeInteger(self::getNodeValue($xpath, '//a[@href=\'https://myanimelist.net/forum/index.php?action=search&u=' . $name . '&q=&uloc=1&loc=-1\']/span[2]')); $birthday = Strings::makeDate(self::getNodeValue($xpath, '//span[text() = \'Birthday\']/following-sibling::span')); $location = Strings::removespaces(self::getNodeValue($xpath, '//span[text() = \'Location\']/following-sibling::span')); $websiteNode = $xpath->query('//h4[text() = \'Also Available at\']/following-sibling::div/a/@href'); if ($websiteNode->length >= 1) { $website = $xpath->query('//h4[text() = \'Also Available at\']/following-sibling::div/a/@href')->item(0)->nodeValue; } else { $website = ""; } $gender = Strings::makeEnum(self::getNodeValue($xpath, '//span[text() = \'Gender\']/following-sibling::span'), ['Female' => UserGender::Female, 'Male' => UserGender::Male], UserGender::Unknown); $animeViewCount = Strings::makeInteger(self::getNodeValue($xpathMobile, '//td[text() = \'Anime List Views\']/following-sibling::td')); $mangaViewCount = Strings::makeInteger(self::getNodeValue($xpathMobile, '//td[text() = \'Manga List Views\']/following-sibling::td')); $user =& $context->user; $user->name = $name; $user->picture_url = $image; $user->join_date = $joinDate; $user->mal_id = $malId; $user->posts = $postCount; $user->birthday = $birthday; $user->location = $location; $user->website = $website; $user->gender = $gender; $user->anime_views = $animeViewCount; $user->manga_views = $mangaViewCount; $user->processed = date('Y-m-d H:i:s'); R::store($user); }
public function process(array $documents, &$context) { $doc = $documents[self::URL_MEDIA]; $dom = self::getDOM($doc); $xpath = new DOMXPath($dom); Database::delete('mediarelation', ['media_id' => $context->media->id]); $data = []; $lastType = ''; foreach ($xpath->query('//h2[starts-with(text(), \'Related\')]/../*') as $node) { if ($node->nodeName == 'h2' and (strpos($node->textContent, 'Related') === false or $node->textContent == 'Related Clubs')) { break; } if ($node->nodeName != 'a') { continue; } $link = $node->attributes->getNamedItem('href')->nodeValue; //relation type $malType = strtolower(Strings::removeSpaces($node->previousSibling->textContent)); if ($malType == ',') { $type = $lastType; } else { $type = Strings::makeEnum($malType, ['sequel' => MediaRelation::Sequel, 'prequel' => MediaRelation::Prequel, 'side story' => MediaRelation::SideStory, 'parent story' => MediaRelation::ParentStory, 'adaptation' => MediaRelation::Adaptation, 'alternative version' => MediaRelation::AlternativeVersion, 'summary' => MediaRelation::Summary, 'character' => MediaRelation::Character, 'spin-off' => MediaRelation::SpinOff, 'alternative setting' => MediaRelation::AlternativeSetting, 'other' => MediaRelation::Other, 'full story' => MediaRelation::FullStory], null); if ($type === null) { throw new BadProcessorDocumentException($doc, 'unknown relation type: ' . $malType); } $lastType = $type; } //relation id preg_match_all('/([0-9]+)/', $link, $matches); if (!isset($matches[0][0])) { continue; } $mediaMalId = Strings::makeInteger($matches[0][0]); //relation media if (strpos($link, '/anime') !== false) { $media = Media::Anime; } elseif (strpos($link, '/manga') !== false) { $media = Media::Manga; } else { continue; } $data[] = ['media_id' => $context->media->id, 'mal_id' => $mediaMalId, 'media' => $media, 'type' => $type]; } Database::insert('mediarelation', $data); $context->relationData = $data; }
public function process(array $documents, &$context) { $doc = $documents[self::URL_PROFILE]; $dom = self::getDOM($doc); $xpath = new DOMXPath($dom); if ($xpath->query('//title[text() = \'Invalid User\']')->length >= 1) { throw new BadProcessorKeyException($context->key); } $userName = Strings::removeSpaces(self::getNodeValue($xpath, '//title')); $userName = substr($userName, 0, strpos($userName, '\'s Profile')); $userName = str_replace('Top - ', '', $userName); $userName = Strings::removeSpaces($userName); if (empty($userName)) { throw new BadProcessorDocumentException($doc, 'User name missing'); } $pictureUrl = self::getNodeValue($xpath, '//td[@class = \'profile_leftcell\']//img', null, 'src'); $joinDate = Strings::makeDate(self::getNodeValue($xpath, '//td[text() = \'Join Date\']/following-sibling::td')); $malId = Strings::makeInteger(self::getNodeValue($xpath, '//input[@name = \'profileMemId\']', null, 'value')); $animeViewCount = Strings::makeInteger(self::getNodeValue($xpath, '//td[text() = \'Anime List Views\']/following-sibling::td')); $mangaViewCount = Strings::makeInteger(self::getNodeValue($xpath, '//td[text() = \'Manga List Views\']/following-sibling::td')); $commentCount = Strings::makeInteger(self::getNodeValue($xpath, '//td[text() = \'Comments\']/following-sibling::td')); $postCount = Strings::makeInteger(self::getNodeValue($xpath, '//td[text() = \'Forum Posts\']/following-sibling::td')); $birthday = Strings::makeDate(self::getNodeValue($xpath, '//td[text() = \'Birthday\']/following-sibling::td')); $location = Strings::removespaces(self::getNodeValue($xpath, '//td[text() = \'Location\']/following-sibling::td')); $website = Strings::removeSpaces(self::getNodeValue($xpath, '//td[text() = \'Website\']/following-sibling::td')); $gender = Strings::makeEnum(self::getNodeValue($xpath, '//td[text() = \'Gender\']/following-sibling::td'), ['Female' => UserGender::Female, 'Male' => UserGender::Male], UserGender::Unknown); $user =& $context->user; $user->name = $userName; $user->picture_url = $pictureUrl; $user->join_date = $joinDate; $user->mal_id = $malId; $user->comments = $commentCount; $user->posts = $postCount; $user->birthday = $birthday; $user->location = $location; $user->website = $website; $user->gender = $gender; $user->anime_views = $animeViewCount; $user->manga_views = $mangaViewCount; $user->processed = date('Y-m-d H:i:s'); R::store($user); }
public function process(array $documents, &$context) { $doc = $documents[self::URL_MEDIA]; $dom = self::getDOM($doc); $xpath = new DOMXPath($dom); if ($xpath->query('//div[@class = \'badresult\']')->length >= 1) { throw new BadProcessorKeyException($context->key); } $title = Strings::removeSpaces(self::getNodeValue($xpath, '//h1/*/following-sibling::node()[1][self::text()]')); if (empty($title)) { throw new BadProcessorDocumentException($doc, 'empty title'); } //sub type $malSubType = strtolower(Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Type\')]/following-sibling::node()[self::text()]'))); $subType = Strings::makeEnum($malSubType, ['tv' => AnimeMediaType::TV, 'ova' => AnimeMediaType::OVA, 'movie' => AnimeMediaType::Movie, 'special' => AnimeMediaType::Special, 'ona' => AnimeMediaType::ONA, 'music' => AnimeMediaType::Music, 'manga' => MangaMediaType::Manga, 'novel' => MangaMediaType::Novel, 'one shot' => MangaMediaType::OneShot, 'doujin' => MangaMediaType::Doujin, 'manhwa' => MangaMediaType::Manhwa, 'manhua' => MangaMediaType::Manhua, 'oel' => MangaMediaType::OEL, '' => $this->media == Media::Manga ? MangaMediaType::Unknown : AnimeMediaType::Unknown], null); if ($subType === null) { throw new BadProcessorDocumentException($doc, 'empty sub type'); } //mal id $malId = self::getNodeValue($xpath, '//input[starts-with(@id, \'myinfo_\')]', null, 'value'); //picture $pictureUrl = self::getNodeValue($xpath, '//td[@class = \'borderClass\']//img', null, 'src'); //rank $averageScore = Strings::makeFloat(self::getNodeValue($xpath, '//span[starts-with(text(), \'Score\')]/following-sibling::node()[self::text()]')); $averageScoreUsers = Strings::extractInteger(self::getNodeValue($xpath, '//small[starts-with(text(), \'(scored by\')]')); $ranking = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Ranked\')]/following-sibling::node()[self::text()]')); $popularity = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Popularity\')]/following-sibling::node()[self::text()]')); $memberCount = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Members\')]/following-sibling::node()[self::text()]')); $favoriteCount = Strings::makeInteger(self::getNodeValue($xpath, '//span[starts-with(text(), \'Favorites\')]/following-sibling::node()[self::text()]')); //status $malStatus = strtolower(Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Status\')]/following-sibling::node()[self::text()]'))); $status = Strings::makeEnum($malStatus, ['not yet published' => MediaStatus::NotYetPublished, 'not yet aired' => MediaStatus::NotYetPublished, 'publishing' => MediaStatus::Publishing, 'currently airing' => MediaStatus::Publishing, 'finished' => MediaStatus::Finished, 'finished airing' => MediaStatus::Finished], null); if ($status === null) { throw new BadProcessorDocumentException($doc, 'unknown status: ' . $malStatus); } //air dates $publishedString = Strings::removeSpaces(self::getNodeValue($xpath, '//span[starts-with(text(), \'Aired\') or starts-with(text(), \'Published\')]/following-sibling::node()[self::text()]')); $pos = strrpos($publishedString, ' to '); if ($pos !== false) { $publishedFrom = Strings::makeDate(substr($publishedString, 0, $pos)); $publishedTo = Strings::makeDate(substr($publishedString, $pos + 4)); } else { $publishedFrom = Strings::makeDate($publishedString); $publishedTo = Strings::makeDate($publishedString); } $media =& $context->media; $media->mal_id = $malId; $media->media = $this->media; $media->title = $title; $media->sub_type = $subType; $media->picture_url = $pictureUrl; $media->average_score = $averageScore; $media->average_score_users = $averageScoreUsers; $media->publishing_status = $status; $media->popularity = $popularity; $media->members = $memberCount; $media->favorites = $favoriteCount; $media->ranking = $ranking; $media->published_from = $publishedFrom; $media->published_to = $publishedTo; $media->processed = date('Y-m-d H:i:s'); R::store($media); }