示例#1
0
$timeStart = microtime(true);
while ($found) {
    try {
        $data = $parser->parseUrl($parseUrl, $pattern);
        $bulkUrls = [];
        if ($data && $data['cars']) {
            foreach ($data['cars'] as $car) {
                $price = '';
                if (preg_match('/(.*)руб/', $car['price'][0], $m)) {
                    $price = str_replace(' ', '', isset($m[1]) ? $m[1] : $price);
                }
                $carUrl = 'https://www.avito.ru' . $car['url'][0];
                $cars[$carUrl] = ['url' => $carUrl, 'photo' => $car['photo'][0], 'title' => $car['title'][0], 'price' => $price, 'date' => date('Y-m-d H:i:s', strtotime(dateRusToEn($car['date'][0])))];
                $bulkUrls[] = $carUrl;
            }
            $bulkData = $parser->bulkParse($bulkUrls, ['description' => '#desc_text']);
            foreach ($bulkData as $carUrl => $carDescription) {
                $cars[$carUrl]['description'] = $carDescription['description'][0];
            }
        } else {
            $found = false;
        }
        $currentPage = (int) $data['current'][0];
        echo 'Parsed page ' . $currentPage . '...' . PHP_EOL;
        if ($currentPage == $pageLimit) {
            break;
        }
        if ($found && $currentPage) {
            $parseUrl = $url . '?p=' . ($currentPage + 1);
        } else {
            $found = false;
示例#2
0
/**
 * Created by PhpStorm.
 * User: yakov
 * Date: 05.06.16
 * Time: 15:30
 */
require __DIR__ . '/../vendor/autoload.php';
$parser = new \jakulov\HyperParser\Parser();
$url1 = 'http://lenta.ru';
$pattern1 = ['links' => '.b-yellow-box .item a|href'];
$data1 = $parser->parseUrl($url1, $pattern1);
$urls = [];
foreach ($data1['links'] as $link) {
    if ($link) {
        $urls[] = $url1 . $link;
    }
}
//var_dump($urls);
$pattern2 = ['title' => 'title', 'img' => '.b-topic__title-image img|src', 'text' => '.b-text|innertext'];
$data2 = $parser->bulkParse($urls, $pattern2, false);
foreach ($data2 as $url => $news) {
    if (is_array($news)) {
        echo PHP_EOL . '==============' . PHP_EOL;
        echo $url . PHP_EOL;
        echo 'Title: ' . $news['title'][0] . PHP_EOL;
        echo 'IMG: ' . $news['img'][0] . PHP_EOL . PHP_EOL;
        echo 'Text: ' . $news['text'][0] . PHP_EOL . PHP_EOL;
    } else {
        echo 'ERROR: ' . $news . PHP_EOL . PHP_EOL;
    }
}