Example #1
0
<?php

libxml_use_internal_errors(true);
include __DIR__ . '/../vendor/autoload.php';
use PageScraper\Builder\BuilderFactory;
use PageScraper\Director\PageBuilderDirector;
// returns the PageBuilderCollection
$builder = BuilderFactory::get(array(array('url' => 'https://news.ycombinator.com', 'data_config' => array('titles' => '//td[@class="title"]//a/text()', 'links' => '//td[@class="title"]//a/@href'))));
// always use director to fetch the page
$director = new PageBuilderDirector($builder);
// returns PageBuilderCollection
$pages = $director->buildPage();
foreach ($pages as $page) {
    $data = $page->getData();
    echo '<pre>';
    print_r(array_combine($data['titles'], $data['links']));
    echo '</pre>';
}
Example #2
0
<?php

libxml_use_internal_errors(true);
include __DIR__ . '/../vendor/autoload.php';
use PageScraper\Page\Page;
use PageScraper\Builder\PageBuilder;
use PageScraper\Director\PageBuilderDirector;
// create a page object
$page = new Page();
// set the url that needs to be fetched
$page->setUrl('https://news.ycombinator.com');
// builder contains the logic to fetch the remote page
// by default there is one builder right now
// which uses file_get_contents to fetch the remote pate
// we can add more builders which can use CURL or other
// technique to fetch the remote page
$builder = new PageBuilder($page);
// set the data that need to be retrieved from the remote page
$builder->setDataConfig(array('titles' => '//td[@class="title"]//a/text()', 'links' => '//td[@class="title"]//a/@href'));
// use the director to instruct the builder to configure the page object
$director = new PageBuilderDirector($builder);
// finally fetch the remote page and configure the Page object
$director->buildPage();
// get the queried data
$data = $page->getData();
// display it
echo '<pre>';
print_r(array_combine($data['titles'], $data['links']));
echo '</pre>';