<?php libxml_use_internal_errors(true); include __DIR__ . '/../vendor/autoload.php'; use PageScraper\Builder\BuilderFactory; use PageScraper\Director\PageBuilderDirector; // returns the PageBuilderCollection $builder = BuilderFactory::get(array(array('url' => 'https://news.ycombinator.com', 'data_config' => array('titles' => '//td[@class="title"]//a/text()', 'links' => '//td[@class="title"]//a/@href')))); // always use director to fetch the page $director = new PageBuilderDirector($builder); // returns PageBuilderCollection $pages = $director->buildPage(); foreach ($pages as $page) { $data = $page->getData(); echo '<pre>'; print_r(array_combine($data['titles'], $data['links'])); echo '</pre>'; }
<?php libxml_use_internal_errors(true); include __DIR__ . '/../vendor/autoload.php'; use PageScraper\Page\Page; use PageScraper\Builder\PageBuilder; use PageScraper\Director\PageBuilderDirector; // create a page object $page = new Page(); // set the url that needs to be fetched $page->setUrl('https://news.ycombinator.com'); // builder contains the logic to fetch the remote page // by default there is one builder right now // which uses file_get_contents to fetch the remote pate // we can add more builders which can use CURL or other // technique to fetch the remote page $builder = new PageBuilder($page); // set the data that need to be retrieved from the remote page $builder->setDataConfig(array('titles' => '//td[@class="title"]//a/text()', 'links' => '//td[@class="title"]//a/@href')); // use the director to instruct the builder to configure the page object $director = new PageBuilderDirector($builder); // finally fetch the remote page and configure the Page object $director->buildPage(); // get the queried data $data = $page->getData(); // display it echo '<pre>'; print_r(array_combine($data['titles'], $data['links'])); echo '</pre>';