$timeStart = microtime(true); while ($found) { try { $data = $parser->parseUrl($parseUrl, $pattern); $bulkUrls = []; if ($data && $data['cars']) { foreach ($data['cars'] as $car) { $price = ''; if (preg_match('/(.*)руб/', $car['price'][0], $m)) { $price = str_replace(' ', '', isset($m[1]) ? $m[1] : $price); } $carUrl = 'https://www.avito.ru' . $car['url'][0]; $cars[$carUrl] = ['url' => $carUrl, 'photo' => $car['photo'][0], 'title' => $car['title'][0], 'price' => $price, 'date' => date('Y-m-d H:i:s', strtotime(dateRusToEn($car['date'][0])))]; $bulkUrls[] = $carUrl; } $bulkData = $parser->bulkParse($bulkUrls, ['description' => '#desc_text']); foreach ($bulkData as $carUrl => $carDescription) { $cars[$carUrl]['description'] = $carDescription['description'][0]; } } else { $found = false; } $currentPage = (int) $data['current'][0]; echo 'Parsed page ' . $currentPage . '...' . PHP_EOL; if ($currentPage == $pageLimit) { break; } if ($found && $currentPage) { $parseUrl = $url . '?p=' . ($currentPage + 1); } else { $found = false;
/** * Created by PhpStorm. * User: yakov * Date: 05.06.16 * Time: 15:30 */ require __DIR__ . '/../vendor/autoload.php'; $parser = new \jakulov\HyperParser\Parser(); $url1 = 'http://lenta.ru'; $pattern1 = ['links' => '.b-yellow-box .item a|href']; $data1 = $parser->parseUrl($url1, $pattern1); $urls = []; foreach ($data1['links'] as $link) { if ($link) { $urls[] = $url1 . $link; } } //var_dump($urls); $pattern2 = ['title' => 'title', 'img' => '.b-topic__title-image img|src', 'text' => '.b-text|innertext']; $data2 = $parser->bulkParse($urls, $pattern2, false); foreach ($data2 as $url => $news) { if (is_array($news)) { echo PHP_EOL . '==============' . PHP_EOL; echo $url . PHP_EOL; echo 'Title: ' . $news['title'][0] . PHP_EOL; echo 'IMG: ' . $news['img'][0] . PHP_EOL . PHP_EOL; echo 'Text: ' . $news['text'][0] . PHP_EOL . PHP_EOL; } else { echo 'ERROR: ' . $news . PHP_EOL . PHP_EOL; } }