예제 #1
0
 public function testEach()
 {
     $langs = $this->xs->findAll('//select[@id="changelang-langs"]/option');
     $str = [];
     $langs->each(function (Node $node) use(&$str) {
         $str[] = $node->extract();
     });
     $this->assertEquals('English, Brazilian Portuguese, Chinese (Simplified), French, German, ' . 'Italian, Japanese, Romanian, Russian, Spanish, Turkish, Other', implode(', ', $str));
 }
예제 #2
0
    public function testInnerHtml()
    {
        $xmlPath = __DIR__ . '/Resources/test.xml';
        $xs = Selector::load($xmlPath);
        $this->assertEquals($xs->innerHTML(), '<bookstore>
    <book category="COOKING">
        <title lang="en">Everyday Italian</title>
        <author>Giada De Laurentiis</author>
        <year>2005</year>
        <price>30.00</price>
    </book>
    <book category="CHILDREN">
        <title lang="en">Harry Potter</title>
        <author>J K. Rowling</author>
        <year>2005</year>
        <price>29.99</price>
    </book>
    <book category="WEB">
        <title lang="en">XQuery Kick Start</title>
        <author>James McGovern</author>
        <author>Per Bothner</author>
        <author>Kurt Cagle</author>
        <author>James Linn</author>
        <author>Vaidyanathan Nagarajan</author>
        <year>2003</year>
        <price>49.99</price>
    </book>
    <book category="WEB">
        <title lang="en">Learning XML</title>
        <author>Erik T. Ray</author>
        <year>2003</year>
        <price>39.95</price>
    </book>
</bookstore>');
    }
예제 #3
0
 public function search(Request $request)
 {
     // Ensure that the search query exists.
     $this->validate($request, ['query' => 'required|max:128']);
     $client = new Client();
     // Load Youmagine into DOM
     $response = $client->get('https://www.youmagine.com/search/designs?utf8=%E2%9C%93&search=' . $request->get('query'));
     $code = $response->getBody();
     $xs = Selector::loadHTML($code);
     // Scrape Youmagine results from DOM
     try {
         $links = $xs->findAll('//*[@id="js-results"]/div[1]/a')->map(function ($node) {
             return $node->find('@href')->extract();
         });
         $names = $xs->findAll('//*[@id="js-results"]/div[1]/a/div[*]')->map(function ($node) {
             return $node->find('h1')->extract();
         });
         $images = $xs->findAll('//*[@id="js-results"]/div[1]/a/div[*]/div[2]')->map(function ($node) {
             $styleTag = $node->find('@style')->extract();
             preg_match('/\'(.*?)\'/', $styleTag, $styleMatches);
             return $styleMatches[1];
         });
     } catch (NodeNotFoundException $e) {
         return response()->view('results.none');
     }
     return response()->view('results.show', compact('links', 'images', 'names'));
 }
예제 #4
0
 /**
  * Display the specified resource.
  *
  * @param $link
  * @return \Illuminate\Http\Response
  * @internal param int $id
  */
 public function show($link)
 {
     $client = new Client();
     $baseUrl = 'https://www.youmagine.com/';
     $response = $client->get($baseUrl . 'designs/' . $link);
     $code = $response->getBody();
     $xs = Selector::loadHTML($code);
     // Scrape Youmagine thing information from DOM
     try {
         $name = $xs->find('/html/body/div[2]/div/h1')->extract();
         $description = $xs->find('//*[@id="information"]/div[2]')->extract();
         $files = $xs->findAll('//*[@id="documents"]/div/ul/li[*]/div[3]/div[1]/a')->map(function ($node) {
             return $node->find('@href')->extract();
         });
     } catch (NodeNotFoundException $e) {
         return response()->view('thing.none');
     }
     // Get files
     $downloadLinks = [];
     foreach ($files as $file) {
         $response = $client->get($baseUrl . $file, ['allow_redirects' => false]);
         $code = $response->getBody();
         preg_match('/"(.*?)"/', $code, $downloadLinkMatch);
         $downloadLinks[] = $downloadLinkMatch[1];
     }
     // Get access token
     $response = $client->request('POST', 'https://developer.api.autodesk.com/authentication/v1/authenticate', ['form_params' => ['client_id' => env('AUTODESK_CLIENT_ID', ''), 'client_secret' => env('AUTODESK_CLIENT_SECRET', ''), 'grant_type' => 'client_credentials']]);
     $authToken = json_decode($response->getBody())->access_token;
     // Create a bucket
     $bucketKey = Str::lower(Str::random(16));
     $response = $client->request('POST', 'https://developer.api.autodesk.com/oss/v2/buckets', ['json' => ['bucketKey' => $bucketKey, 'policyKey' => 'transient'], 'headers' => ['Authorization' => 'Bearer ' . $authToken]]);
     $bucketKey = json_decode($response->getBody())->bucketKey;
     // Upload to bucket
     $bucket = [];
     foreach ($downloadLinks as $downloadLink) {
         $fileName = pathinfo($downloadLink)['basename'];
         $file = fopen(base_path('public/cache/' . $fileName), 'w');
         /** @noinspection PhpUnusedLocalVariableInspection */
         $response = $client->get($downloadLink, ['sink' => $file]);
         $file = fopen(base_path('public/cache/' . $fileName), 'r');
         $response = $client->request('PUT', 'https://developer.api.autodesk.com/oss/v2/buckets/' . $bucketKey . '/objects/' . $fileName, ['body' => $file, 'headers' => ['Authorization' => 'Bearer ' . $authToken]]);
         $body = json_decode($response->getBody());
         $bucket[] = ['filename' => $body->objectKey, 'urn' => $body->objectId];
     }
     // Set up references
     $references = ['master' => $bucket[0]['urn'], 'dependencies' => []];
     foreach ($bucket as $file) {
         if ($file['filename'] === $bucket[0]['filename']) {
             continue;
         }
         $references['dependencies'][] = ['file' => $file['urn'], 'metadata' => ['childPath' => $file['filename'], 'parentPath' => $bucket[0]['filename']]];
     }
     $response = $client->post('https://developer.api.autodesk.com/references/v1/setreference', ['json' => $references, 'headers' => ['Authorization' => 'Bearer ' . $authToken]]);
     // Register data with the viewing services
     $urn = base64_encode($bucket[0]['urn']);
     $response = $client->post('https://developer.api.autodesk.com/viewingservice/v1/register', ['json' => ['urn' => $urn], 'headers' => ['Authorization' => 'Bearer ' . $authToken]]);
     return response()->view('thing.show', compact('name', 'description', 'urn', 'authToken') + ['pollUrl' => 'https://developer.api.autodesk.com/viewingservice/v1/' . $urn, 'dl' => $downloadLinks[0]]);
 }
예제 #5
0
 public function testLoadHTML()
 {
     $html = file_get_contents(__DIR__ . '/Resources/test.html');
     $selector = Selector::loadHTML($html);
     $this->assertInstanceOf('XPathSelector\\Selector', $selector);
 }
예제 #6
0
 /**
  * Parses citizen profile HTML page and returns useful information
  * @param string $html HTML source of citizen profile page
  * @return array
  * @throws ScrapeException
  */
 public static function parseProfile($html)
 {
     $em = EntityManager::getInstance();
     $countries = $em->getRepository(Country::class);
     $regions = $em->getRepository(Region::class);
     $parseStat = function ($string, $float = false) {
         $string = trim($string);
         $string = substr($string, 0, strpos($string, '/'));
         $string = str_ireplace(',', '', $string);
         return $float ? (double) $string : (int) $string;
     };
     $xs = Selector::loadHTML($html);
     $result = [];
     $content = $xs->find('//div[@id="content"][1]');
     $sidebar = $content->find('//div[@class="citizen_sidebar"][1]');
     $second = $content->find('//div[@class="citizen_second"]');
     $state = $content->find('//div[@class="citizen_state"]');
     /**
      * BASIC DATA
      */
     try {
         $viewFriends = $content->find('//a[@class="view_friends"][1]/@href');
         preg_match('@^/[^/]+/main/citizen-friends/([0-9]+)$@', $viewFriends->extract(), $matches);
         $result['id'] = (double) $matches[1];
     } catch (NodeNotFoundException $e) {
         $result['id'] = null;
     }
     $result['name'] = $content->find('//img[@class="citizen_avatar"]/@alt')->extract();
     $birth = new DateTime(trim($second->find('p[2]')->extract()));
     $result['birth'] = $birth->format('Y-m-d');
     $avatar = $content->find('//img[@class="citizen_avatar"][1]/@style')->extract();
     $avatar = OldSelector\RegEx::find($avatar, '/background-image\\: url\\(([^)]+)\\);/i');
     $result['avatar'] = $avatar->group(0);
     $result['online'] = $content->findOneOrNull('//span[@class="online_status on"][1]') != null;
     /**
      * BAN/DEAD
      */
     try {
         $ban = $state->find('div/span/img[contains(@src, "perm_banned")]/../..');
         $result['ban'] = ['type' => trim($ban->find('span')->extract()), 'reason' => $ban->find('@title')->extract()];
     } catch (NodeNotFoundException $e) {
         $result['ban'] = null;
     }
     $result['alive'] = $state->findOneOrNull('div/span/img[contains(@src, "dead_citizen")]/../..') == null;
     $exp = $content->find('//strong[@class="citizen_level"][1]');
     $result['level'] = (int) trim($exp->extract());
     $result['experience'] = $parseStat(str_replace('<strong>Experience Level</strong><br />', '', $exp->find('@title')->extract()));
     $result['division'] = Helpers::getDivision($result['level']);
     $result['elite_citizen'] = $content->findOneOrNull('//span[@title="eRepublik Elite Citizen"][1]') !== null;
     $result['national_rank'] = (int) $second->find('small[3]/strong')->extract();
     $military = function ($eliteCitizen) use($content, $parseStat) {
         $arr = [];
         $str = $content->find('//div[@class="citizen_military_box"][2]/span[2]')->extract();
         $perc = $content->find('//div[@class="citizen_military_box"][4]/span[2]')->extract();
         $arr['strength'] = (double) str_ireplace(',', '', trim($str));
         $arr['rank'] = new Rank($parseStat($content->find('//span[@class="rank_numbers"]')->extract(), true));
         $arr['base_hit'] = Helpers::getHit($arr['strength'], $arr['rank']->getLevel(), 0, $eliteCitizen);
         $arr['perception'] = (double) str_ireplace(',', '', trim($perc));
         $arr['air_rank'] = new AirRank($parseStat($content->find('//div[@class="citizen_military_box_wide"][2]/span[2]/span[@class="rank_numbers"]')->extract(), true));
         return $arr;
     };
     $guerrilla = function () use($content) {
         $div = $content->findOneOrNull('//div[@class="guerilla_fights_history"][1]');
         if ($div) {
             return ['won' => (int) $div->find('div[@title="Guerrilla matches won"][1]/span[1]')->extract(), 'lost' => (int) $div->find('div[@title="Guerrilla matches lost"][1]/span[1]')->extract()];
         } else {
             return ['won' => null, 'lost' => null];
         }
     };
     $bombs = function () use($content) {
         $massDestruction = $content->findOneOrNull('//div[@class="citizen_mass_destruction"][1]');
         if ($massDestruction) {
             return ['small_bombs' => (int) $massDestruction->find('strong/img[@title="Small Bombs used"]/../b[1]')->extract(), 'big_bombs' => (int) $massDestruction->find('strong/img[@title="Big Bombs used"]/../b[1]')->extract()];
         } else {
             return ['small_bombs' => 0, 'big_bombs' => 0];
         }
     };
     $result['military'] = $military($result['elite_citizen']);
     $result['military']['guerrilla'] = $guerrilla();
     // Guerilla statistics
     $result['military']['mass_destruction'] = $bombs();
     // Bombs statistics
     // Residence and citizenship
     $info = $sidebar->find('div[1]');
     $result['citizenship'] = $countries->findOneByName((string) $info->find('a[3]/img[1]/@title')->extract());
     $result['residence'] = ['country' => $countries->findOneByName($info->find('a[1]/@title')->extract()), 'region' => $regions->findOneByName($info->find('a[2]/@title')->extract())];
     if (!isset($result['residence']['country'], $result['residence']['region'], $result['citizenship'])) {
         throw new ScrapeException();
     }
     // About me
     try {
         $about = $content->find('//div[@class="about_message profile_section"]/p');
         $result['about'] = strip_tags($about->extract());
     } catch (NodeNotFoundException $e) {
         $result['about'] = null;
     }
     $places = $content->findAll('//div[@class="citizen_activity"]/div[@class="place"]');
     // Political Party
     $party = $places->item(0);
     $class = $party->findOneOrNull('h3/@class');
     if ($class == null || $class->extract() != 'noactivity') {
         $url = $party->findOneOrNull('div/span/a/@href');
         if ($url == null) {
             $result['party'] = null;
         } else {
             $url = $url->extract();
             $start = strrpos($url, '-') + 1;
             $length = strrpos($url, '/') - $start;
             $result['party'] = array('id' => (int) substr($url, $start, $length), 'name' => trim($party->find('div[1]/span/a')->extract()), 'avatar' => $party->find('div/img/@src')->extract(), 'role' => trim($party->find('h3[1]')->extract()));
         }
     } else {
         $result['party'] = null;
     }
     // Military Unit
     $unit = $places->item(1);
     if ($unit->findOneOrNull('div[1]')) {
         $url = $unit->find('div[1]/a[1]/@href')->extract();
         $avatar = $unit->find('div[1]/a[1]/img[1]/@src')->extract();
         $createdAt = preg_replace('#.*([0-9]{4})/([0-9]{2})/([0-9]{2}).*#', '\\1-\\2-\\3', $avatar);
         $result['military']['unit'] = ['id' => (int) substr($url, strrpos($url, '/') + 1), 'name' => $unit->find('div[1]/a[1]/span[1]')->extract(), 'created_at' => $createdAt, 'avatar' => $avatar, 'role' => trim($unit->find('h3[1]')->extract())];
     } else {
         $result['military']['unit'] = null;
     }
     // Newspaper
     $newspaper = $places->item(2);
     if ($newspaper->findOneOrNull('div[1]')) {
         $url = $newspaper->find('div[1]/a[1]/@href')->extract();
         $start = strrpos($url, '-') + 1;
         $length = strrpos($url, '/') - $start;
         $result['newspaper'] = ['id' => (int) substr($url, $start, $length), 'name' => $newspaper->find('div[1]/a/@title')->extract(), 'avatar' => $newspaper->find('div[1]/a[1]/img[1]/@src')->extract(), 'role' => trim($newspaper->find('h3[1]')->extract())];
     } else {
         $result['newspaper'] = null;
     }
     $citizenContent = $content->find('div[@class="citizen_content"][1]');
     // Top Damage
     $topDamage = $citizenContent->findOneOrNull('h3/img[@title="Top damage is only updated at the end of the campaign"]' . '/../following-sibling::div[@class="citizen_military"][1]');
     if ($topDamage) {
         $damage = (double) str_replace(',', '', trim(str_replace('for', '', $topDamage->find('h4')->extract())));
         $stat = $topDamage->find('div[@class="stat"]/small')->extract();
         if (preg_match('/Achieved while .*? on day ([0-9,]+)/', $stat, $matches)) {
             $dateTime = DateTime::createFromDay((int) str_replace(',', '', $matches[1]));
             $result['top_damage'] = ['damage' => $damage, 'date' => $dateTime->format('Y-m-d'), 'message' => trim($stat, " \n")];
         } else {
             throw new ScrapeException();
         }
     } else {
         $result['top_damage'] = null;
     }
     // True Patriot
     $truePatriot = $citizenContent->findOneOrNull('h3[normalize-space(text())="True Patriot"]/following-sibling::div[@class="citizen_military"][1]');
     if ($truePatriot) {
         $damage = (double) str_replace(',', '', trim(str_replace('for', '', $truePatriot->find('h4')->extract())));
         $tip = $truePatriot->find('preceding-sibling::h3[1]/img[1]/@title')->extract();
         if (preg_match('/day ([0-9]+)/', $tip, $since)) {
             $dateTime = DateTime::createFromDay($since[1]);
             $result['true_patriot'] = ['damage' => $damage, 'since' => $dateTime->format('Y-m-d')];
         } else {
             throw new ScrapeException();
         }
     } else {
         $result['true_patriot'] = null;
     }
     // Medals
     $medals = $content->findAll('//ul[@id="achievment"]/li');
     foreach ($medals as $li) {
         /**
          * @var Node $li
          */
         $type = $li->findOneOrNull('div[contains(@class,"hinter")]/span/p/strong');
         if ($type == null) {
             continue;
         }
         $type = strtr(strtolower($type->extract()), [' ' => '_']);
         $count = $li->findOneOrNull('div[@class="counter"]');
         $result['medals'][$type] = $count ? (int) $count->extract() : 0;
     }
     ksort($result['medals']);
     return $result;
 }
예제 #7
0
 /**
  * @param int $citizenId
  * @param int $page
  * @return array
  * @throws InvalidArgumentException
  * @throws ScrapeException
  */
 public function listFriendsbyPage($citizenId, $page)
 {
     try {
         $response = $this->getClient()->get("main/citizen-friends/{$citizenId}/{$page}/list")->send();
     } catch (ClientException $ex) {
         if ($ex->getCode() == 404) {
             return [];
         }
     }
     $hxs = \XPathSelector\Selector::loadHTML($response->json()['content']);
     return $hxs->findAll('//tr')->map(function (Node $friendItem) {
         $link = $friendItem->find('td[@class="friend_info"]/a/@href')->extract();
         $citizenId = substr($link, strripos($link, '/') + 1);
         $citizenName = $friendItem->find('td[@class="friend_info"]/a/@title')->extract();
         $isDead = false;
         if ($friendItem->findOneOrNull('@class') !== null) {
             $isDead = trim($friendItem->find('@class')->extract()) == 'dead';
         }
         $avatarUrl = $friendItem->find('td[@class="friend_info"]/a/img/@src')->extract();
         if ($friendItem->findOneOrNull('td[@class="actions"]') != null) {
             $removeUrl = $friendItem->find('td[@class="actions"]/div/a[@class="act remove"]/@href')->extract();
         }
         $friend = ['citizenId' => (int) $citizenId, 'citizenName' => $citizenName, 'isDead' => (bool) $isDead, 'avatarUrl' => $avatarUrl];
         if (isset($removeUrl)) {
             $friend['removeUrl'] = $removeUrl;
         }
         return $friend;
     });
 }
예제 #8
0
파일: Response.php 프로젝트: erpk/harvester
 /**
  * @return Selector
  */
 public function xpath()
 {
     return Selector::loadHTML($this->getBody(true));
 }