public static function saveParseOlxAuto() { foreach (self::$urls as $id => $auto) { foreach (parent::$cities as $city) { $xml = file_get_contents('http://olx.ua/transport/' . $auto . '/' . $city . '/rss/'); $movies = new \SimpleXMLElement($xml, LIBXML_NOCDATA); unset($xml); foreach ($movies->channel->item as $item) { if (parent::productUnique(trim((string) $item->link))) { preg_match("/Цена:\\s\\d(.*?)\\sгрн/", $item->description, $m); if (isset($m[0])) { preg_match_all("/\\d/", str_replace(" ", "", $m[0]), $prices); foreach ($prices as $price) { $model = new Items(); $model->product = trim((string) $item->title); $model->price = implode('', $price); $model->url = trim((string) $item->link); $model->store = self::STORE; $model->phone = parent::getPhoneNumber(trim((string) $item->link)); $model->subcategory_id = $id; $model->options = '{"b/u":"1","city":"' . $city . '"}'; $model->save(); } } } } } } }
public static function processLink($link, $category_id, $optionType) { $page = self::getPage($link); $document = new \DOMDocument(); try { $document->loadHTML($page); } catch (\Exception $e) { return; } self::logFile('file.log', $page); $products = self::findByDoc($document, '.item'); echo "founded {$products->length} elements in {$link} \n"; if (!$products->length) { return; } foreach ($products as $product) { /** * @var $product \DOMNode */ $a = self::findByDoc($document, 'h4 a', $product)->item(0); $href = $a ? $a->getAttribute('href') . "\n" : null; try { if ($href && self::productUnique('http://foxmart.ua' . $href)) { $name = trim(explode('.', $a->textContent)[0]); $name = self::decode(trim(explode('+', $name)[0])); $price = self::findByDoc($document, '.price_prod', $product)->item(0)->textContent; $price = preg_replace('/[^\\d]+/', '', $price); $diagonal = self::findByDoc($document, '.product_view', $product)->item(0)->textContent; preg_match('/����: ([\\d\\.]+)/', $diagonal, $matchesDia); $product = new Items(); $product->url = 'http://foxmart.ua' . $href; $product->store = self::STORE; $product->price = $price; $product->product = $name; $product->phone = self::PHONE; $product->subcategory_id = $category_id; // $product->options = '{' . // '"type": "' . $optionType . '",' . // (isset($matchesDia[1]) ? ('"display": "'. $matchesDia[1] .'"') : '' ) . // '}'; $product->options = json_encode(array_merge(['type' => $optionType], isset($matchesDia[1]) ? ['display' => $matchesDia[1]] : [])); $product->save(); } } catch (\Exception $e) { echo $e->getTraceAsString(); } } if (preg_match('/\\/([\\d]+).html/', $link, $matchCurrent)) { $current = $matchCurrent[1]; self::processLink(preg_replace('/\\/[\\d]+.html/', '/' . ($current + 1) . '.html', $link), $category_id, $optionType); } else { self::processLink(preg_replace('/.html/', '/2.html', $link), $category_id, $optionType); } }
public static function processLink($link, $category_id, $optionType) { $page = self::getPage($link); $document = new \DOMDocument(); try { $document->loadHTML($page); } catch (\Exception $e) { return; } $products = self::findByDoc($document, '.item'); echo "founded {$products->length} elements in {$link} \n"; if (!$products->length) { return; } foreach ($products as $product) { /** * @var $product \DOMNode */ $a = self::findByDoc($document, 'a.product-name', $product)->item(0); $href = $a ? $a->getAttribute('href') . "\n" : null; try { if ($href && self::productUnique($href)) { $name = trim(explode('.', $a->textContent)[0]); $name = self::decode(trim(explode('+', $name)[0])); $price = self::findByDoc($document, '.price .sum', $product)->item(0)->textContent; $price = preg_replace('/[^\\d]+/', '', $price); $diagonal = self::findByDoc($document, '.attr-container', $product)->item(0)->textContent; preg_match('/([\\d\\.]+)"/', $diagonal, $matchesDia); $product = new Items(); $product->url = $href; $product->store = self::STORE; $product->price = $price; $product->product = $name; $product->phone = self::PHONE; $product->subcategory_id = $category_id; // $product->options = '{' . // '"type": "' . $optionType . '",' . // (isset($matchesDia[1]) ? ('"display": "'. $matchesDia[1] .'"') : '' ) . // '}'; $product->options = json_encode(array_merge(['type' => $optionType], isset($matchesDia[1]) ? ['display' => $matchesDia[1]] : [])); $product->save(); } } catch (\Exception $e) { } } preg_match('/\\/p-([\\d]+)/', $link, $matchCurrent); $current = isset($matchCurrent[1]) ? $matchCurrent[1] : 1; self::processLink(preg_replace('/\\/p-[\\d]+/', '/p-' . ($current + 1), $link), $category_id, $optionType); }
/** * Creates a new Items model. * If creation is successful, the browser will be redirected to the 'view' page. * @return mixed */ public function actionCreate() { $model = new Items(); $model_tr = new \common\models\ItemsTr(); $trips_array = ArrayHelper::map(\common\models\Trips::find()->orderBy('from')->all(), 'id', function ($model, $defaultValue) { return $model->from . '-' . $model->to; }); if ($model->load(Yii::$app->request->post()) && $model->save()) { if ($_POST['ItemsTr']) { $tr = Yii::$app->request->post('ItemsTr'); $params = []; foreach ($tr as $lang) { $lang['item_id'] = $model->id; array_push($params, $lang); } Yii::$app->db->createCommand()->batchInsert('items_tr', ['name', 'type', 'lang', 'item_id'], $params)->execute(); } return $this->redirect(['view', 'id' => $model->id]); } else { return $this->render('create', ['model' => $model, 'model_tr' => $model_tr, 'trips_array' => $trips_array]); } }
/** * Парсинг каталога работы Olx */ public static function saveParseOlxWork() { foreach (self::$category_work as $id => $work) { foreach (parent::$cities as $city) { $xml = file_get_contents('http://olx.ua/rabota/' . $work . '/' . $city . '/rss/'); $movies = new \SimpleXMLElement($xml, LIBXML_NOCDATA); unset($xml); foreach ($movies->channel->item as $item) { if (parent::productUnique(trim((string) $item->link))) { $model = new Items(); $model->product = trim((string) $item->title); $model->price = self::getPrice(trim((string) $item->link))[0]; $model->url = trim((string) $item->link); $model->store = self::STORE; $model->phone = parent::getPhoneNumber(trim((string) $item->link)); $model->subcategory_id = $id; $model->options = '{"city":"' . $city . '"}'; $model->save(); } } } } }
/** * @param $baseUrl * @param $subcategory */ private function saveParseRst($baseUrl, $subcategory) { set_time_limit(0); error_reporting(E_ALL & ~E_NOTICE); $j = 1; while ($j <= 1000) { $url = $baseUrl . '&start=' . $j; $parser = new HtmlDomParser(); $html = iconv('windows-1251', 'UTF-8//IGNORE', $parser->file_get_html($url)); $dom = $parser->str_get_html($html); $year = null; $fuel = null; $price = null; $link = null; $product = null; $city = null; $phone = null; for ($i = 0; $i < count($dom->find('div[class=rst-ocb-i]')) - 1; $i++) { $dparser = new HtmlDomParser(); $ddom = $dparser->str_get_html($dom->find('div[class=rst-ocb-i]')[$i]->innertext); $link = 'http://rst.ua' . $ddom->find('a.rst-ocb-i-a')[0]->href; preg_match("/(.*)/", $ddom->find('li[class=rst-ocb-i-d-l-i]')[1]->plaintext, $year); preg_match("/(.*)/", $ddom->find('li[class=rst-ocb-i-d-l-i]')[2]->plaintext, $fuel); preg_match("/(.*)/", $ddom->find('li[class=rst-ocb-i-d-l-j]')[0]->plaintext, $city); $product = $ddom->find('h3[class=rst-ocb-i-h]')[0]->plaintext; $phone_parser = $dparser->file_get_html($link); $phone = utf8_encode($phone_parser->find('p[class=rst-page-oldcars-item-option-block-container]')[0]->plaintext); if (isset($phone) && !empty($phone)) { preg_match("/\\d+/", $phone, $phone); $phone = $phone[0]; } else { $phone = utf8_encode($phone_parser->find('div.rst-page-oldcars-item-option-block-container td')[0]->plaintext); } unset($phone_parser); $price = str_replace("'", "", $ddom->find('span[class=rst-ocb-i-d-l-i-s rst-ocb-i-d-l-i-s-p]')[0]->plaintext); unset($dparser); if ($this->productUnique($link)) { preg_match("/\\((\\d+).*?\\)/", $year[0], $running); preg_match("/\\((.*?)\\)/", $fuel[0], $transmission); preg_match("/\\d+/", $year[0], $year); preg_match("/\\d+/", $phone[0], $phone); preg_match("/\\-?\\d+(\\.\\d{0,})?(.*?)\\(/", $fuel[0], $fuel); preg_match("/(\\d+)/", $price, $price); $city = explode(":", $city[0]); $model = new Items(); $model->product = $product; if (!empty($price)) { $model->price = $price[0]; } else { $model->options = "договорная"; $model->price = "0"; } $model->url = $link; $model->store = 'Rst'; $model->phone = $phone; $model->subcategory_id = $subcategory; $model->options .= '{"year":"' . trim($year[0]) . '","fuel":"' . trim($fuel[2]) . '","transmission":"' . trim($transmission[1]) . '","running":"' . trim($running[1]) . '","city":"' . trim($city[1]) . '","b/u":"1"}'; $model->save(); } } $j++; } }
public function saveMicrotronShit($item) { if (self::productUnique($item['url'])) { $url = $item['url']; $price = $item['price']; $title = $item['product']; if (isset($price) && isset($title)) { preg_match_all("/\\d/", str_replace(" ", "", $price), $price); $model = new Items(); $model->product = trim((string) $title); $model->price = implode('', $price[0]); $model->url = $url; $model->store = 'Microtron'; $model->phone = self::PHONE; $model->subcategory_id = 8; $model->options = '-'; $model->save(); } } }
public function saveParseOlxSumki() { foreach (parent::$cities as $city) { $xml = file_get_contents('http://olx.ua/elektronika/foto-video/aksessuary-dlya-foto-videokamer/sumki/' . $city . '/rss/'); $movies = new \SimpleXMLElement($xml, LIBXML_NOCDATA); unset($xml); foreach ($movies->channel->item as $item) { if (self::productUnique(trim((string) $item->link))) { preg_match("/Цена:\\s\\d(.*?)\\sгрн/", $item->description, $m); if (isset($m[0])) { preg_match_all("/\\d/", str_replace(" ", "", $m[0]), $prices); foreach ($prices as $price) { $model = new Items(); $model->product = trim((string) $item->title); $model->price = implode('', $price); $model->url = trim((string) $item->link); $model->store = 'Olx'; $model->phone = parent::getPhoneNumber(trim((string) $item->link)); $model->subcategory_id = 69; $model->options = '{"type":"sumki-photo","b/u":"1","city":"' . $city . '"}'; $model->save(); } } } } } }
/** * Парсинг Стиральных машин */ private function saveParseComfyWasher() { set_time_limit(0); $file = 'http://comfy.ua/media/sitemap.xml'; $links = []; // Формируем список урлов и сносим xml $xml = simplexml_load_file($file, "SimpleXMLElement", LIBXML_NOCDATA); $p_cnt = count($xml->url); for ($i = 0; $i < $p_cnt; $i++) { $item = $xml->url[$i]; if (preg_match("/(http\\:\\/\\/comfy\\.ua\\/stiral\\-naja\\-mashina)(.*?)(\\.html)/", $item->loc)) { array_push($links, trim((string) $item->loc)); } unset($item); } unset($xml); unset($p_cnt); // Проходим по списку урлов foreach ($links as $link) { if (self::productUnique($link)) { $page = new AdvancedHtmlDom(); $page->load_file($link); $url = $link; $type_washer = null; $max_load = null; $not = $page->find('span[class=informerText]'); if ($not->text()) { $price = $page->find('span[class=price-value]')[0]->text(); $title = $page->find('h1[class=product-name]')[0]->text(); for ($i = 0; $i < $page->find('li.features__item dl')->count(); $i++) { if (stripos($page->find('li.features__item dl')[$i]->text(), 'Тип стиральной машины')) { $type_washer = $page->find('li.features__item dl')[$i]->text(); } if (stripos($page->find('li.features__item dl')[$i]->text(), 'Макс. загрузка')) { $max_load = $page->find('li.features__item dl')[$i]->text(); } if (!empty($type_washer) && !empty($max_load)) { break; } } unset($page); if (isset($price) && isset($title) && $type_washer != null && $max_load != null) { $type_washer = split(":", $type_washer); $max_load = split(":", $max_load); preg_match_all("/\\d/", str_replace(" ", "", $price), $price); $model = new Items(); $model->product = trim((string) $title); $model->price = str_replace(",", ".", implode('', $price[0])); $model->url = $url; $model->store = 'Comfy'; $model->phone = self::PHONE; $model->subcategory_id = 4; $model->options = '{"type":"washer","type_washer":"' . trim($type_washer[1]) . '","max_load":"' . trim($max_load[1]) . '","b/u":"0"}'; $model->save(); } } } } }