Ejemplo n.º 1
0
 /**
  * Searches for matching citizens
  * @param string $searchQuery
  * @param int $page
  * @return array
  */
 public function search($searchQuery, $page = 1)
 {
     $request = $this->getClient()->get('main/search/');
     $request->disableCookies();
     $query = $request->getQuery();
     $query->set('q', $searchQuery);
     $query->set('page', $page);
     $xs = $request->send()->xpath();
     $result = [];
     $paginator = new OldSelector\Paginator($xs);
     if ($paginator->isOutOfRange($page) && $page > 1) {
         return $result;
     }
     $rows = $xs->find('//table[@class="bestof"]')->findAll('tr[position()>1]');
     return $rows->map(function (Node $tr) {
         $href = $tr->find('td[2]/div[1]/div[2]/a/@href')->extract();
         return ['id' => (int) substr($href, strrpos($href, '/') + 1), 'name' => $tr->find('td[2]/div[1]/div[2]/a')->extract()];
     });
 }
Ejemplo n.º 2
0
 /**
  * @param int $id
  * @param int|null $pageLimit
  * @return array
  * @throws NotFoundException
  * @throws ScrapeException
  */
 public function getNewspaper($id, $pageLimit = null)
 {
     $response = $this->getClient()->get("newspaper/{$id}")->send();
     if (!$response->isRedirect()) {
         throw new ScrapeException();
     }
     $location = $response->getLocation();
     if ($location == '/en') {
         throw new NotFoundException("Newspaper ID:{$id} does not exist.");
     }
     $xs = $this->getClient()->get($location)->send()->xpath();
     $paginator = new Paginator($xs);
     $info = $xs->find('//div[@class="newspaper_head"]');
     $avatar = $info->find('//img[@class="avatar"]/@src')->extract();
     $url = explode('/', $info->find('div[@class="info"]/h1/a[1]/@href')->extract())[3];
     $director = $info->find('div[2]/ul[1]/li[1]/a[1]');
     $desc = $xs->find('//meta[@name="description"]/@content')->extract();
     if (!preg_match('/has (\\d+) articles/', $desc, $articlesCount)) {
         throw new ScrapeException();
     }
     $em = EntityManager::getInstance();
     $countries = $em->getRepository(Country::class);
     $result = ['director' => ['id' => (int) explode('/', $director->find('@href')->extract())[4], 'name' => $director->find('@title')->extract()], 'name' => $info->find('//h1/a/@title')->extract(), 'url' => Uri::resolve($this->getClient()->getBaseUri(), $location), 'avatar' => str_replace('55x55', '100x100', $avatar), 'country' => $countries->findOneByName($info->find('div[1]/a[1]/img[2]/@title')->extract()), 'subscribers' => (int) $info->find('div[@class="actions"]')->extract(), 'article_count' => (int) $articlesCount[1], 'articles' => []];
     $pages = $paginator->getLastPage();
     if ($pageLimit !== null && $pages > $pageLimit) {
         $pages = $pageLimit;
     }
     for ($page = 1; $page <= $pages; $page++) {
         $xs = $this->getClient()->get('newspaper/' . $url . '/' . $page)->send()->xpath();
         foreach ($xs->findAll('//div[@class="post"]') as $art) {
             $title = $art->find('div[2]/h2/a')->extract();
             $artUrl = 'http://www.erepublik.com' . $art->find('div[2]/h2/a/@href')->extract();
             $votes = $art->find('div[1]/div[1]/strong')->extract();
             $comments = $art->find('div[2]/div[1]/a[1]')->extract();
             $date = $art->find('div[2]/div[1]/em')->extract();
             try {
                 $category = trim($art->find('div[2]/div[1]/a[3]')->extract());
             } catch (NodeNotFoundException $e) {
                 $category = null;
             }
             $result['articles'][] = ['title' => $title, 'url' => $artUrl, 'votes' => (int) $votes, 'comments' => (int) $comments, 'date' => self::parseDate($date), 'category' => $category];
         }
     }
     return $result;
 }