/** * Searches for matching citizens * @param string $searchQuery * @param int $page * @return array */ public function search($searchQuery, $page = 1) { $request = $this->getClient()->get('main/search/'); $request->disableCookies(); $query = $request->getQuery(); $query->set('q', $searchQuery); $query->set('page', $page); $xs = $request->send()->xpath(); $result = []; $paginator = new OldSelector\Paginator($xs); if ($paginator->isOutOfRange($page) && $page > 1) { return $result; } $rows = $xs->find('//table[@class="bestof"]')->findAll('tr[position()>1]'); return $rows->map(function (Node $tr) { $href = $tr->find('td[2]/div[1]/div[2]/a/@href')->extract(); return ['id' => (int) substr($href, strrpos($href, '/') + 1), 'name' => $tr->find('td[2]/div[1]/div[2]/a')->extract()]; }); }
/** * @param int $id * @param int|null $pageLimit * @return array * @throws NotFoundException * @throws ScrapeException */ public function getNewspaper($id, $pageLimit = null) { $response = $this->getClient()->get("newspaper/{$id}")->send(); if (!$response->isRedirect()) { throw new ScrapeException(); } $location = $response->getLocation(); if ($location == '/en') { throw new NotFoundException("Newspaper ID:{$id} does not exist."); } $xs = $this->getClient()->get($location)->send()->xpath(); $paginator = new Paginator($xs); $info = $xs->find('//div[@class="newspaper_head"]'); $avatar = $info->find('//img[@class="avatar"]/@src')->extract(); $url = explode('/', $info->find('div[@class="info"]/h1/a[1]/@href')->extract())[3]; $director = $info->find('div[2]/ul[1]/li[1]/a[1]'); $desc = $xs->find('//meta[@name="description"]/@content')->extract(); if (!preg_match('/has (\\d+) articles/', $desc, $articlesCount)) { throw new ScrapeException(); } $em = EntityManager::getInstance(); $countries = $em->getRepository(Country::class); $result = ['director' => ['id' => (int) explode('/', $director->find('@href')->extract())[4], 'name' => $director->find('@title')->extract()], 'name' => $info->find('//h1/a/@title')->extract(), 'url' => Uri::resolve($this->getClient()->getBaseUri(), $location), 'avatar' => str_replace('55x55', '100x100', $avatar), 'country' => $countries->findOneByName($info->find('div[1]/a[1]/img[2]/@title')->extract()), 'subscribers' => (int) $info->find('div[@class="actions"]')->extract(), 'article_count' => (int) $articlesCount[1], 'articles' => []]; $pages = $paginator->getLastPage(); if ($pageLimit !== null && $pages > $pageLimit) { $pages = $pageLimit; } for ($page = 1; $page <= $pages; $page++) { $xs = $this->getClient()->get('newspaper/' . $url . '/' . $page)->send()->xpath(); foreach ($xs->findAll('//div[@class="post"]') as $art) { $title = $art->find('div[2]/h2/a')->extract(); $artUrl = 'http://www.erepublik.com' . $art->find('div[2]/h2/a/@href')->extract(); $votes = $art->find('div[1]/div[1]/strong')->extract(); $comments = $art->find('div[2]/div[1]/a[1]')->extract(); $date = $art->find('div[2]/div[1]/em')->extract(); try { $category = trim($art->find('div[2]/div[1]/a[3]')->extract()); } catch (NodeNotFoundException $e) { $category = null; } $result['articles'][] = ['title' => $title, 'url' => $artUrl, 'votes' => (int) $votes, 'comments' => (int) $comments, 'date' => self::parseDate($date), 'category' => $category]; } } return $result; }