/** * @desc Process Album article page * * @param Article $article * @return array */ public function processArticle(Article $article) { $albumData = ['article_id' => $article->getId()]; $albumData = array_merge($albumData, $this->getHeader($article)); $albumData['album_lowercase'] = LyricsUtils::lowercase($albumData['Album']); $albumData['genres'] = $this->getGenres($article); if (isset($albumData['Genre']) && !in_array($albumData['Genre'], $albumData['genres'])) { $albumData['genres'][] = $albumData['Genre']; } return array_merge($albumData, $this->getFooter($article)); }
/** * @desc Process Song article page * * @param Article $article * @return array */ public function processArticle(Article $article) { $songArticleId = $article->getId(); $songData = ['article_id' => $songArticleId]; $songData = array_merge($songData, $this->getFooter($article)); $songData['lyrics'] = $this->getLyrics($article); // MOB-1367 - make sure the song name is the same as song's article title $songTitle = $article->getTitle(); $songName = !is_null($songTitle) ? $this->getSongFromArtistTitle($songTitle->getText()) : null; if (!is_null($songName)) { $songData['song'] = $songName; $songData['song_lowercase'] = LyricsUtils::lowercase($songName); } else { wfDebugLog(__METHOD__, sprintf('Scraped song without title (%d) or with invalid name', $songArticleId)); } return $songData; }
/** * @desc Returns lower cased value of field * @param $fieldName * @return string */ public function getLowerCaseField($fieldName) { return LyricsUtils::lowercase($this->getField($fieldName)); }
/** * @desc Extract album data from section heading * * @param string $heading - section heading * @return array */ public function getAlbumData($heading) { //==[[Entombed:Serpent Saints The Ten Amendments (2007)|Serpent Saints - The Ten Amendments (2007)]]== $result = []; $headingArr = explode('|', trim($heading, '][= ')); $result['title'] = false; if (count($headingArr) > 1) { $result['title'] = trim($headingArr[0]); $result['year'] = ''; $heading = $headingArr[1]; } if (preg_match('#(.+)\\(([\\d]+)\\)#', $heading, $matches)) { $result['Album'] = trim($matches[1]); $result['year'] = trim($matches[2]); } else { $result['Album'] = trim($heading); } $result['album_name_lc'] = LyricsUtils::lowercase($result['Album']); return $result; }