public function actionTest() { $data = []; if (Yii::$app->request->post()) { $data = Yii::$app->request->post(); if (isset($data['url'])) { $parser = new NewsParserComponent(ParserQueue::findOne(1)); $data['parserResult'] = $parser->parse(PageLoaderComponent::load($data['url']), $data['url'], Sources::findOne($data['Source'])); } } return $this->render('test', $data); }
public static function processMessage($msg) { // print_r($msg); try { $params = json_decode($msg->body); print_r($params); $pqItem = ParserQueue::findOne(["id" => $params->pq_id]); $pnItem = PendingNews::findOne(["id" => $params->pn_id]); if ($pqItem && $pnItem) { $newsParser = new NewsParserComponent($pqItem, $pnItem); $newsParser->run(); } } catch (Exception $e) { echo $e->getMessage(); if ($e->getCode() != 505) { $mq = new RabbitMQComponent(); $mq->postMessage("parse", "parse_rss", $msg->body); } } $msg->delivery_info['channel']->basic_ack($msg->delivery_info['delivery_tag']); // die(); }
public function run() { $rss = PageLoaderComponent::load($this->source->url); $rss = trim($rss); $rss = preg_replace("/<rss([^>]+)>/mi", "<rss>", $rss); preg_match('/<\\?xml.*?encoding=(\'|")(.*)("|\\")/i', $rss, $matches); $charset = "utf-8"; if (isset($matches[2])) { $charset = $matches[2]; // $rss = mb_convert_encoding( $rss, "UTF-8", $charset ); } else { // $rss = mb_convert_encoding( $rss, "UTF-8" ); } $doc = new \DOMDocument("1.1", $charset); $doc->preserveWhiteSpace = false; libxml_use_internal_errors(true); $doc->loadXML($rss); $xpath = new \DOMXpath($doc); foreach ($this->itemPatterns as $pattern) { if ($newsList = $xpath->query($pattern->value)) { for ($i = 0; $i < $newsList->length; $i++) { $news = $newsList->item($i); $newsParams = array(); $newsParams['data'] = []; if ($this->source->category_id) { $newsParams['data']['category_id'] = $this->source->category_id; } $newsParams['source'] = $this->source->source; foreach ($news->childNodes as $node) { if ($this->titlePattern == $node->nodeName) { $newsParams['title'] = $node->nodeValue; } if ($this->contentPattern == $node->nodeName) { $newsParams['content'] = $node->nodeValue; } if ($this->linkPattern == $node->nodeName) { $newsParams['link'] = $node->nodeValue; } if ($this->categoryPattern == $node->nodeName) { $newsParams['data']['category'] = $node->nodeValue; } if ($this->imagePattern == $node->nodeName) { if (preg_match_all('/(https?:\\/\\/[a-z0-9\\/_а-я\\-\\.]*\\.(?:png|jpg))/i', $node->nodeValue, $images)) { $newsParams['image_src'] = $images[1][0]; } } if (!isset($newsParams['image_src'])) { if ($node->nodeName == 'enclosure') { if (preg_match_all('/(https?:\\/\\/[a-z0-9\\/_а-я\\-\\.]*\\.(?:png|jpg))/i', $node->getAttribute('url'), $images)) { $newsParams['image_src'] = $images[1][0]; } } } if (!isset($newsParams['image_src'])) { $newsParams['image_src'] = ""; } } try { $pqItem = ParserQueue::findOne(['url' => $newsParams['link']]); if (!$pqItem) { $pqItem = new ParserQueue(); $pqItem->source_id = $this->source->source_id; $pqItem->url = $newsParams['link']; $pqItem->status = ParserQueue::STATUS_INPROCESS; $pqItem->created_at = new \yii\db\Expression('NOW()'); $pqItem->updated_at = new \yii\db\Expression('NOW()'); if ($pqItem->save()) { PendingNews::add($newsParams['source'], $newsParams['title'], $newsParams['content'], isset($newsParams['image_src']) ? $newsParams['image_src'] : false, PendingNews::STATUS_NEW, $pqItem, $newsParams['data']); } } } catch (Exception $e) { // print_r( $e->getMessage() ); } } } } }