예제 #1
0
 public static function saveParseOlxAuto()
 {
     foreach (self::$urls as $id => $auto) {
         foreach (parent::$cities as $city) {
             $xml = file_get_contents('http://olx.ua/transport/' . $auto . '/' . $city . '/rss/');
             $movies = new \SimpleXMLElement($xml, LIBXML_NOCDATA);
             unset($xml);
             foreach ($movies->channel->item as $item) {
                 if (parent::productUnique(trim((string) $item->link))) {
                     preg_match("/Цена:\\s\\d(.*?)\\sгрн/", $item->description, $m);
                     if (isset($m[0])) {
                         preg_match_all("/\\d/", str_replace(" ", "", $m[0]), $prices);
                         foreach ($prices as $price) {
                             $model = new Items();
                             $model->product = trim((string) $item->title);
                             $model->price = implode('', $price);
                             $model->url = trim((string) $item->link);
                             $model->store = self::STORE;
                             $model->phone = parent::getPhoneNumber(trim((string) $item->link));
                             $model->subcategory_id = $id;
                             $model->options = '{"b/u":"1","city":"' . $city . '"}';
                             $model->save();
                         }
                     }
                 }
             }
         }
     }
 }
예제 #2
0
 public static function processLink($link, $category_id, $optionType)
 {
     $page = self::getPage($link);
     $document = new \DOMDocument();
     try {
         $document->loadHTML($page);
     } catch (\Exception $e) {
         return;
     }
     self::logFile('file.log', $page);
     $products = self::findByDoc($document, '.item');
     echo "founded {$products->length} elements in {$link} \n";
     if (!$products->length) {
         return;
     }
     foreach ($products as $product) {
         /**
          * @var $product \DOMNode
          */
         $a = self::findByDoc($document, 'h4 a', $product)->item(0);
         $href = $a ? $a->getAttribute('href') . "\n" : null;
         try {
             if ($href && self::productUnique('http://foxmart.ua' . $href)) {
                 $name = trim(explode('.', $a->textContent)[0]);
                 $name = self::decode(trim(explode('+', $name)[0]));
                 $price = self::findByDoc($document, '.price_prod', $product)->item(0)->textContent;
                 $price = preg_replace('/[^\\d]+/', '', $price);
                 $diagonal = self::findByDoc($document, '.product_view', $product)->item(0)->textContent;
                 preg_match('/����: ([\\d\\.]+)/', $diagonal, $matchesDia);
                 $product = new Items();
                 $product->url = 'http://foxmart.ua' . $href;
                 $product->store = self::STORE;
                 $product->price = $price;
                 $product->product = $name;
                 $product->phone = self::PHONE;
                 $product->subcategory_id = $category_id;
                 //                    $product->options        = '{' .
                 //                        '"type": "' . $optionType . '",' .
                 //                        (isset($matchesDia[1]) ? ('"display": "'. $matchesDia[1] .'"') : '' ) .
                 //                    '}';
                 $product->options = json_encode(array_merge(['type' => $optionType], isset($matchesDia[1]) ? ['display' => $matchesDia[1]] : []));
                 $product->save();
             }
         } catch (\Exception $e) {
             echo $e->getTraceAsString();
         }
     }
     if (preg_match('/\\/([\\d]+).html/', $link, $matchCurrent)) {
         $current = $matchCurrent[1];
         self::processLink(preg_replace('/\\/[\\d]+.html/', '/' . ($current + 1) . '.html', $link), $category_id, $optionType);
     } else {
         self::processLink(preg_replace('/.html/', '/2.html', $link), $category_id, $optionType);
     }
 }
예제 #3
0
 public static function processLink($link, $category_id, $optionType)
 {
     $page = self::getPage($link);
     $document = new \DOMDocument();
     try {
         $document->loadHTML($page);
     } catch (\Exception $e) {
         return;
     }
     $products = self::findByDoc($document, '.item');
     echo "founded {$products->length} elements in {$link} \n";
     if (!$products->length) {
         return;
     }
     foreach ($products as $product) {
         /**
          * @var $product \DOMNode
          */
         $a = self::findByDoc($document, 'a.product-name', $product)->item(0);
         $href = $a ? $a->getAttribute('href') . "\n" : null;
         try {
             if ($href && self::productUnique($href)) {
                 $name = trim(explode('.', $a->textContent)[0]);
                 $name = self::decode(trim(explode('+', $name)[0]));
                 $price = self::findByDoc($document, '.price .sum', $product)->item(0)->textContent;
                 $price = preg_replace('/[^\\d]+/', '', $price);
                 $diagonal = self::findByDoc($document, '.attr-container', $product)->item(0)->textContent;
                 preg_match('/([\\d\\.]+)"/', $diagonal, $matchesDia);
                 $product = new Items();
                 $product->url = $href;
                 $product->store = self::STORE;
                 $product->price = $price;
                 $product->product = $name;
                 $product->phone = self::PHONE;
                 $product->subcategory_id = $category_id;
                 //                    $product->options        = '{' .
                 //                        '"type": "' . $optionType . '",' .
                 //                        (isset($matchesDia[1]) ? ('"display": "'. $matchesDia[1] .'"') : '' ) .
                 //                    '}';
                 $product->options = json_encode(array_merge(['type' => $optionType], isset($matchesDia[1]) ? ['display' => $matchesDia[1]] : []));
                 $product->save();
             }
         } catch (\Exception $e) {
         }
     }
     preg_match('/\\/p-([\\d]+)/', $link, $matchCurrent);
     $current = isset($matchCurrent[1]) ? $matchCurrent[1] : 1;
     self::processLink(preg_replace('/\\/p-[\\d]+/', '/p-' . ($current + 1), $link), $category_id, $optionType);
 }
예제 #4
0
 /**
  * Creates a new Items model.
  * If creation is successful, the browser will be redirected to the 'view' page.
  * @return mixed
  */
 public function actionCreate()
 {
     $model = new Items();
     $model_tr = new \common\models\ItemsTr();
     $trips_array = ArrayHelper::map(\common\models\Trips::find()->orderBy('from')->all(), 'id', function ($model, $defaultValue) {
         return $model->from . '-' . $model->to;
     });
     if ($model->load(Yii::$app->request->post()) && $model->save()) {
         if ($_POST['ItemsTr']) {
             $tr = Yii::$app->request->post('ItemsTr');
             $params = [];
             foreach ($tr as $lang) {
                 $lang['item_id'] = $model->id;
                 array_push($params, $lang);
             }
             Yii::$app->db->createCommand()->batchInsert('items_tr', ['name', 'type', 'lang', 'item_id'], $params)->execute();
         }
         return $this->redirect(['view', 'id' => $model->id]);
     } else {
         return $this->render('create', ['model' => $model, 'model_tr' => $model_tr, 'trips_array' => $trips_array]);
     }
 }
예제 #5
0
 /**
  * Парсинг каталога работы Olx
  */
 public static function saveParseOlxWork()
 {
     foreach (self::$category_work as $id => $work) {
         foreach (parent::$cities as $city) {
             $xml = file_get_contents('http://olx.ua/rabota/' . $work . '/' . $city . '/rss/');
             $movies = new \SimpleXMLElement($xml, LIBXML_NOCDATA);
             unset($xml);
             foreach ($movies->channel->item as $item) {
                 if (parent::productUnique(trim((string) $item->link))) {
                     $model = new Items();
                     $model->product = trim((string) $item->title);
                     $model->price = self::getPrice(trim((string) $item->link))[0];
                     $model->url = trim((string) $item->link);
                     $model->store = self::STORE;
                     $model->phone = parent::getPhoneNumber(trim((string) $item->link));
                     $model->subcategory_id = $id;
                     $model->options = '{"city":"' . $city . '"}';
                     $model->save();
                 }
             }
         }
     }
 }
예제 #6
0
파일: Rst.php 프로젝트: Sywooch/find-parser
 /**
  * @param $baseUrl
  * @param $subcategory
  */
 private function saveParseRst($baseUrl, $subcategory)
 {
     set_time_limit(0);
     error_reporting(E_ALL & ~E_NOTICE);
     $j = 1;
     while ($j <= 1000) {
         $url = $baseUrl . '&start=' . $j;
         $parser = new HtmlDomParser();
         $html = iconv('windows-1251', 'UTF-8//IGNORE', $parser->file_get_html($url));
         $dom = $parser->str_get_html($html);
         $year = null;
         $fuel = null;
         $price = null;
         $link = null;
         $product = null;
         $city = null;
         $phone = null;
         for ($i = 0; $i < count($dom->find('div[class=rst-ocb-i]')) - 1; $i++) {
             $dparser = new HtmlDomParser();
             $ddom = $dparser->str_get_html($dom->find('div[class=rst-ocb-i]')[$i]->innertext);
             $link = 'http://rst.ua' . $ddom->find('a.rst-ocb-i-a')[0]->href;
             preg_match("/(.*)/", $ddom->find('li[class=rst-ocb-i-d-l-i]')[1]->plaintext, $year);
             preg_match("/(.*)/", $ddom->find('li[class=rst-ocb-i-d-l-i]')[2]->plaintext, $fuel);
             preg_match("/(.*)/", $ddom->find('li[class=rst-ocb-i-d-l-j]')[0]->plaintext, $city);
             $product = $ddom->find('h3[class=rst-ocb-i-h]')[0]->plaintext;
             $phone_parser = $dparser->file_get_html($link);
             $phone = utf8_encode($phone_parser->find('p[class=rst-page-oldcars-item-option-block-container]')[0]->plaintext);
             if (isset($phone) && !empty($phone)) {
                 preg_match("/\\d+/", $phone, $phone);
                 $phone = $phone[0];
             } else {
                 $phone = utf8_encode($phone_parser->find('div.rst-page-oldcars-item-option-block-container td')[0]->plaintext);
             }
             unset($phone_parser);
             $price = str_replace("'", "", $ddom->find('span[class=rst-ocb-i-d-l-i-s rst-ocb-i-d-l-i-s-p]')[0]->plaintext);
             unset($dparser);
             if ($this->productUnique($link)) {
                 preg_match("/\\((\\d+).*?\\)/", $year[0], $running);
                 preg_match("/\\((.*?)\\)/", $fuel[0], $transmission);
                 preg_match("/\\d+/", $year[0], $year);
                 preg_match("/\\d+/", $phone[0], $phone);
                 preg_match("/\\-?\\d+(\\.\\d{0,})?(.*?)\\(/", $fuel[0], $fuel);
                 preg_match("/(\\d+)/", $price, $price);
                 $city = explode(":", $city[0]);
                 $model = new Items();
                 $model->product = $product;
                 if (!empty($price)) {
                     $model->price = $price[0];
                 } else {
                     $model->options = "договорная";
                     $model->price = "0";
                 }
                 $model->url = $link;
                 $model->store = 'Rst';
                 $model->phone = $phone;
                 $model->subcategory_id = $subcategory;
                 $model->options .= '{"year":"' . trim($year[0]) . '","fuel":"' . trim($fuel[2]) . '","transmission":"' . trim($transmission[1]) . '","running":"' . trim($running[1]) . '","city":"' . trim($city[1]) . '","b/u":"1"}';
                 $model->save();
             }
         }
         $j++;
     }
 }
예제 #7
0
 public function saveMicrotronShit($item)
 {
     if (self::productUnique($item['url'])) {
         $url = $item['url'];
         $price = $item['price'];
         $title = $item['product'];
         if (isset($price) && isset($title)) {
             preg_match_all("/\\d/", str_replace(" ", "", $price), $price);
             $model = new Items();
             $model->product = trim((string) $title);
             $model->price = implode('', $price[0]);
             $model->url = $url;
             $model->store = 'Microtron';
             $model->phone = self::PHONE;
             $model->subcategory_id = 8;
             $model->options = '-';
             $model->save();
         }
     }
 }
예제 #8
0
 public function saveParseOlxSumki()
 {
     foreach (parent::$cities as $city) {
         $xml = file_get_contents('http://olx.ua/elektronika/foto-video/aksessuary-dlya-foto-videokamer/sumki/' . $city . '/rss/');
         $movies = new \SimpleXMLElement($xml, LIBXML_NOCDATA);
         unset($xml);
         foreach ($movies->channel->item as $item) {
             if (self::productUnique(trim((string) $item->link))) {
                 preg_match("/Цена:\\s\\d(.*?)\\sгрн/", $item->description, $m);
                 if (isset($m[0])) {
                     preg_match_all("/\\d/", str_replace(" ", "", $m[0]), $prices);
                     foreach ($prices as $price) {
                         $model = new Items();
                         $model->product = trim((string) $item->title);
                         $model->price = implode('', $price);
                         $model->url = trim((string) $item->link);
                         $model->store = 'Olx';
                         $model->phone = parent::getPhoneNumber(trim((string) $item->link));
                         $model->subcategory_id = 69;
                         $model->options = '{"type":"sumki-photo","b/u":"1","city":"' . $city . '"}';
                         $model->save();
                     }
                 }
             }
         }
     }
 }
예제 #9
0
 /**
  * Парсинг Стиральных машин
  */
 private function saveParseComfyWasher()
 {
     set_time_limit(0);
     $file = 'http://comfy.ua/media/sitemap.xml';
     $links = [];
     //    Формируем список урлов и сносим xml
     $xml = simplexml_load_file($file, "SimpleXMLElement", LIBXML_NOCDATA);
     $p_cnt = count($xml->url);
     for ($i = 0; $i < $p_cnt; $i++) {
         $item = $xml->url[$i];
         if (preg_match("/(http\\:\\/\\/comfy\\.ua\\/stiral\\-naja\\-mashina)(.*?)(\\.html)/", $item->loc)) {
             array_push($links, trim((string) $item->loc));
         }
         unset($item);
     }
     unset($xml);
     unset($p_cnt);
     //    Проходим по списку урлов
     foreach ($links as $link) {
         if (self::productUnique($link)) {
             $page = new AdvancedHtmlDom();
             $page->load_file($link);
             $url = $link;
             $type_washer = null;
             $max_load = null;
             $not = $page->find('span[class=informerText]');
             if ($not->text()) {
                 $price = $page->find('span[class=price-value]')[0]->text();
                 $title = $page->find('h1[class=product-name]')[0]->text();
                 for ($i = 0; $i < $page->find('li.features__item dl')->count(); $i++) {
                     if (stripos($page->find('li.features__item dl')[$i]->text(), 'Тип стиральной машины')) {
                         $type_washer = $page->find('li.features__item dl')[$i]->text();
                     }
                     if (stripos($page->find('li.features__item dl')[$i]->text(), 'Макс. загрузка')) {
                         $max_load = $page->find('li.features__item dl')[$i]->text();
                     }
                     if (!empty($type_washer) && !empty($max_load)) {
                         break;
                     }
                 }
                 unset($page);
                 if (isset($price) && isset($title) && $type_washer != null && $max_load != null) {
                     $type_washer = split(":", $type_washer);
                     $max_load = split(":", $max_load);
                     preg_match_all("/\\d/", str_replace(" ", "", $price), $price);
                     $model = new Items();
                     $model->product = trim((string) $title);
                     $model->price = str_replace(",", ".", implode('', $price[0]));
                     $model->url = $url;
                     $model->store = 'Comfy';
                     $model->phone = self::PHONE;
                     $model->subcategory_id = 4;
                     $model->options = '{"type":"washer","type_washer":"' . trim($type_washer[1]) . '","max_load":"' . trim($max_load[1]) . '","b/u":"0"}';
                     $model->save();
                 }
             }
         }
     }
 }