public function actionIndex()
 {
     foreach (PendingNews::find()->where("id <= 4293")->each() as $pn) {
         if ($pn->search_content) {
             $mq = new RabbitMQComponent();
             $mq->postMessage("compile", "compile", json_encode(["pn_id" => $pn->id]));
         }
     }
 }
Beispiel #2
0
 public function actionIndex()
 {
     //        $this->stdout("Hello?\n", Console::BOLD);
     $mq = new RabbitMQComponent();
     //            for ($i = 0; $i < 100; $i ++) {
     $mq->postMessage("parse", "parse_rss", json_encode(["pn_id" => "311", "pq_id" => "1434"]));
     //                $mq->postMessage( "parse", "parse_rss", json_encode( [ "queue" => "parse", "route" => "parse_rss" ] ) );
     //                $mq->postMessage( "compile", "compile", json_encode( [ "queue" => "compile", "route" => "compile" ] ) );
     //                $mq->postMessage( "compile", "compile", json_encode( [ "queue" => "compile", "route" => "compile" ] ) );
     //            }
     //        Yii::$app->mq->postMessage("url", "parse", "http://nagg.in.ua");
     return 1;
 }
 public static function processMessage($msg)
 {
     //            print_r($msg);
     try {
         $params = json_decode($msg->body);
         print_r($params);
         $pqItem = ParserQueue::findOne(["id" => $params->pq_id]);
         $pnItem = PendingNews::findOne(["id" => $params->pn_id]);
         if ($pqItem && $pnItem) {
             $newsParser = new NewsParserComponent($pqItem, $pnItem);
             $newsParser->run();
         }
     } catch (Exception $e) {
         echo $e->getMessage();
         if ($e->getCode() != 505) {
             $mq = new RabbitMQComponent();
             $mq->postMessage("parse", "parse_rss", $msg->body);
         }
     }
     $msg->delivery_info['channel']->basic_ack($msg->delivery_info['delivery_tag']);
     //            die();
 }
 public function run()
 {
     echo "Try parse `{$this->url}`\n";
     if ($html = PageLoaderComponent::load($this->url)) {
         preg_match('/<meta.*?charset=("?|\\")(.*?)("|\\")/i', $html, $matches);
         if (isset($matches[2])) {
             if ($charset = $matches[2]) {
                 $html = mb_convert_encoding($html, "UTF-8", $charset);
             } else {
                 echo "ERROR ON ENCODING DETECTING";
             }
         } else {
             if ($defaultEncoding = SourcesSettings::findOne(['source_id' => $this->source->id, 'name' => 'default_encoding'])) {
                 $html = mb_convert_encoding($html, "UTF-8", $defaultEncoding->value);
             } else {
                 $html = mb_convert_encoding($html, "UTF-8");
             }
         }
         try {
             $html = $this->stripTagWithContent($html, "script");
             //                    $htmlToDetect = $this->processExcludeElements( $html );
             //                    $content      = $this->tryContentDetect( $htmlToDetect );
             $readability = new Readabillity($this->url);
             if ($readability) {
                 $title = "test title";
                 $title = $this->processTitleStopWords($title);
                 //                        if ( ! $content) {
                 $content = $readability->getContent();
                 //                        die("content: ".$content);
                 //                        }
                 $content = $this->processContentStopWords($content);
                 $content = preg_replace('/\\n/', ' ', $content);
                 //                        $content                    = strip_tags( $content,
                 //                            "<p><div><img><span><br><ul><li><embed><iframe><strong><h1><h2><h3><h4>" );
                 $content = $this->fixUrls($content);
                 $content = $this->processExcludeElements($content);
                 if ($date = $this->processPublishDate($content)) {
                     if (!(date("Y-m-d") == date("Y-m-d", $date))) {
                         throw new Exception("Old post");
                     }
                 }
                 if ($searchContent = trim(strip_tags($content))) {
                     $searchContent = preg_replace('/\\n/', ' ', $searchContent);
                     if (count(explode(" ", $searchContent)) >= Settings::findOne(['name' => 'news_min_length'])->value) {
                         if ($this->pendingNews) {
                             $this->pendingNews->content = $content;
                             $this->pendingNews->search_content = $searchContent;
                             $this->pendingNews->status = PendingNews::STATUS_NEW;
                             if (!$this->pendingNews->thumb_src) {
                                 if ($thumbUrl = $this->detectThumb($html, $content)) {
                                     $this->pendingNews->thumb_src = $thumbUrl;
                                 }
                             }
                             if ($this->pendingNews->save()) {
                                 try {
                                     PendingNews::fillTags($this->pendingNews->search_content, $this->pendingNews->id);
                                 } catch (\Exception $e) {
                                     print_r($e->getMessage());
                                 }
                                 $mq = new RabbitMQComponent();
                                 $mq->postMessage("compile", "compile", json_encode(["pn_id" => $this->pendingNews->id]));
                                 $this->parserQueue->status = ParserQueue::STATUS_DONE;
                                 $this->parserQueue->save();
                                 return true;
                             } else {
                                 print_r($this->pendingNews->getErrors());
                                 $this->parserQueue->status = ParserQueue::STATUS_FAIL;
                                 $this->parserQueue->save();
                             }
                         } else {
                             echo PHP_EOL . "NEWS CREATION" . PHP_EOL;
                             $pn = new PendingNews();
                             $pn->source_id = $this->source->id;
                             $pn->title = $title;
                             $pn->content = $content;
                             $pn->search_content = $searchContent;
                             $pn->status = PendingNews::STATUS_NEW;
                             $pn->group_hash = md5(time());
                             $pn->thumb_src = $this->detectThumb($html, $content);
                             $pn->pq_id = $this->parserQueue->id;
                             $pn->created_at = new \yii\db\Expression("NOW()");
                             if ($pn->save()) {
                                 $this->parserQueue->status = ParserQueue::STATUS_DONE;
                                 $this->parserQueue->save();
                                 return true;
                             } else {
                                 echo PHP_EOL . "ERROR" . PHP_EOL;
                                 print_r($pn->getErrors());
                                 $this->parserQueue->status = ParserQueue::STATUS_FAIL;
                                 $this->parserQueue->save();
                             }
                         }
                     } else {
                         $this->parserQueue->status = ParserQueue::STATUS_FAIL;
                         $this->parserQueue->save();
                     }
                 }
             } else {
                 throw new Exception('Looks like we couldn\'t find the content. :(');
             }
         } catch (Exception $e) {
             print_r($e);
             $this->parserQueue->status = ParserQueue::STATUS_FAIL;
             $this->parserQueue->save();
         }
     } else {
         $this->parserQueue->status = ParserQueue::STATUS_FAIL;
         $this->parserQueue->save();
     }
 }
Beispiel #5
0
 public function actionError()
 {
     if (strpos(Yii::$app->request->url, "uploads")) {
         $aUrl = explode("/", Yii::$app->request->url);
         if (isset($aUrl[5])) {
             if ($news = News::findOne($aUrl[5])) {
                 if ($news->thumb) {
                     $mq = new RabbitMQComponent();
                     $mq->postMessage("image", "image", json_encode(["news_id" => $news->id, "src" => $news->thumb]));
                     $this->redirect($news->thumb);
                 } else {
                     if ($giData = PageLoaderComponent::load("https://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=" . urlencode($news->title) . "&userip=127.0.0.1&imgsz=large")) {
                         $data = json_decode($giData);
                         if (isset($data->responseData->results[0])) {
                             $news->thumb = $data->responseData->results[0]->unescapedUrl;
                             $news->save();
                             $mq = new RabbitMQComponent();
                             $mq->postMessage("image", "image", json_encode(["news_id" => $news->id, "src" => $data->responseData->results[0]->unescapedUrl]));
                             $this->redirect($data->responseData->results[0]->unescapedUrl);
                         }
                     }
                 }
             }
         }
     }
     $this->redirect("/");
 }
Beispiel #6
0
 public function afterSave($insert, $changedAttributes)
 {
     if (isset($changedAttributes['search_content']) && !empty($changedAttributes['search_content']) && $changedAttributes['search_content'] != '&nbsp;' || $insert && mb_strlen(trim($this->search_content), "utf-8") > 6) {
         try {
             self::fillTags($this->search_content, $this->id);
         } catch (\Exception $e) {
         }
         $mq = new RabbitMQComponent();
         $mq->postMessage("compile", "compile", json_encode(["pn_id" => $this->id]));
     }
     return parent::afterSave($insert, $changedAttributes);
 }
 protected function checkNewsThumb($news_id, $pn_id)
 {
     $news = News::findOne($news_id);
     if (!$news->thumb) {
         $pn = PendingNews::findOne($pn_id);
         $news->thumb = $pn->thumb_src;
         $news->save();
         $mq = new RabbitMQComponent();
         $mq->postMessage("image", "image", json_encode(["news_id" => $news->id, "src" => $news->thumb]));
     }
 }