/**
  * @desc Process Lyrics wiki Songs
  *
  * @param $artistData - Artist data
  * @param $albumData - Fill Album data
  * @param $leanSongsData - songs data collected from Artist's page
  * @return array - Full songs data
  */
 function processSongs($artistData, $albumData, $leanSongsData)
 {
     $songsData = [];
     foreach ($leanSongsData as $songData) {
         if ($songData['title']) {
             // Song has wiki title
             $songArticle = $this->articleFromTitle($songData['title']);
             if (!is_null($songArticle)) {
                 if ($this->songScraper->isSongTraslation($songArticle)) {
                     self::log("\t\t\tSONG IS A TRANSLATION: " . $songData['song'] . " ...SKIPPING" . PHP_EOL);
                     continue;
                 }
                 // Song article exists
                 self::log("\t\t\tSONG: " . $songData['title'] . PHP_EOL);
                 $songData = array_merge($songData, $this->songScraper->processArticle($songArticle));
                 $songData = $this->songScraper->sanitizeData($songData, $this->songScraper->getDataMap());
                 // Add song to songs list
                 $songsData[] = $songData;
                 if (isset($songData['id']) && !empty($songData['lyrics'])) {
                     $this->articlesProcessed++;
                     // Save only songs we have as Wiki pages and have lyrics
                     $this->solr->saveSong($artistData, $albumData, $songData);
                 }
                 continue;
             }
         }
         self::log("\t\t\tSONG NOT FOUND: " . $songData['song'] . PHP_EOL);
         // Add song to songs list
         $songsData[] = $this->songScraper->sanitizeData($songData, $this->songScraper->getDataMap());
     }
     return $songsData;
 }
Example #2
0
 /**
  * @desc Scrapes data from articles from yesterday
  */
 public function doScrapeArticlesFromYesterday()
 {
     $yesterdayTs = strtotime('-1 day');
     $yesterday = date("Y-m-d", $yesterdayTs);
     $this->output('Scraping articles from ' . $yesterday . PHP_EOL);
     $pages = $this->getRecentChangedPages(date("Ymd", $yesterdayTs));
     $this->addToLogContext('user_updated_articles', count($pages));
     if (!empty($pages)) {
         $pages = $this->convertIntoArtistPages($pages);
         $this->addToLogContext('updated_artists', count($pages));
         $start = date('Y-m-d\\TH:i:s.u\\Z');
         foreach ($pages as $pageId) {
             $this->setArticleId($pageId);
             $this->doScrapeArtist();
         }
         $artists = $this->getArtistTitlesFromIds($pages);
         $this->solr->delDocsByArtistsAndDate($artists, $start);
     }
 }