/** * Returns information to summarize an article with a snippet of text and a picture if applicable. */ public function blurb() { wfProfileIn(__METHOD__); $idStr = $this->request->getVal('ids'); $ids = explode(',', $idStr); $summary = array(); # Iterate through each title per wiki ID foreach ($ids as $id) { $title = Title::newFromID($id); if (empty($title)) { $summary[$this->wg->CityId]['error'][] = "Unable to find title for ID {$id}"; break; } $service = new ArticleService($id); $snippet = $service->getTextSnippet(); $imageServing = new ImageServing(array($id), 200, array('w' => 2, 'h' => 1)); $images = $imageServing->getImages(1); // get just one image per article $imageURL = ''; if (isset($images[$id])) { $imageURL = $images[$id][0]['url']; } $summary[$id] = array('wiki' => $this->wg->Sitename, 'wikiUrl' => $this->wg->Server, 'titleDBkey' => $title->getPrefixedDBkey(), 'titleText' => $title->getFullText(), 'articleId' => $title->getArticleID(), 'imageUrl' => $imageURL, 'url' => $title->getFullURL(), 'snippet' => $snippet); } wfProfileOut(__METHOD__); $this->summary = $summary; }
public function getForMap() { $pageName = $this->request->getVal('title'); $oTitle = Title::newFromText($pageName); if (empty($oTitle) || !$oTitle->exists()) { return array(); } $pageId = $oTitle->getArticleId(); // TODO: getImages() are not cached $imageServing = new ImageServing(array($pageId), 100, array('w' => 1, 'h' => 1)); $images = $imageServing->getImages(1); if (!empty($images[$pageId][0]['url'])) { $imageUrl = $images[$pageId][0]['url']; } else { $imageUrl = ''; } $oArticleService = new ArticleService(); $oArticleService->setArticleById($pageId); $textSnippet = $oArticleService->getTextSnippet(120); $strPos = mb_strrpos($textSnippet, ' '); $textSnippet = mb_substr($textSnippet, 0, $strPos); $textSnippet .= ' ...'; $this->setVal('title', $oTitle->getText()); $this->setVal('imgUrl', $imageUrl); $this->setVal('articleUrl', $oTitle->getLocalUrl()); $this->setVal('textSnippet', $textSnippet); }
protected function getDetails() { $article = \Article::newFromTitle($this->commentTitle, \RequestContext::getMain()); $service = new \ArticleService($article); $snippet = $service->getTextSnippet(); return $snippet; }
/** * @param OutputPage $out * @param string $text * @return bool */ function wfArticleMetaDescription(&$out, &$text) { global $wgTitle; wfProfileIn(__METHOD__); $sMessage = null; $sMainPage = wfMsgForContent('Mainpage'); if (strpos($sMainPage, ':') !== false) { $sTitle = $wgTitle->getFullText(); } else { $sTitle = $wgTitle->getText(); } if (strcmp($sTitle, $sMainPage) == 0) { // we're on Main Page, check MediaWiki:Description message $sMessage = wfMsg("Description"); } if ($sMessage == null || wfEmptyMsg("Description", $sMessage)) { $DESC_LENGTH = 100; $articleId = $wgTitle->getArticleID(); $articleService = new ArticleService($articleId); $description = $articleService->getTextSnippet($DESC_LENGTH); } else { // MediaWiki:Description message found, use it $description = $sMessage; } if (!empty($description)) { $out->addMeta('description', htmlspecialchars($description)); } wfProfileOut(__METHOD__); return true; }
protected function getDetails() { $article = \Article::newFromTitle($this->postTitle, \RequestContext::getMain()); $service = new \ArticleService($article); // Include +3 characters here for the ellipsis added when we have to truncate $snippet = $service->getTextSnippet($length = 303, $breakLimit = 500); return $snippet; }
function egOgmcParserOutputApplyValues($out, $parserOutput, $data) { global $wgTitle; $articleId = $wgTitle->getArticleID(); $titleImage = $titleDescription = null; wfRunHooks('OpenGraphMeta:beforeCustomFields', array($articleId, &$titleImage, &$titleDescription)); // Only use ImageServing if no main image is already specified. This lets people override the image with the parser function: [[File:{{#setmainimage:Whatever.png}}]]. if (!isset($out->mMainImage)) { if (is_null($titleImage)) { // Get image from ImageServing // TODO: Make sure we automatically respect these restrictions from Facebook: // "An image URL which should represent your object within the graph. // The image must be at least 50px by 50px and have a maximum aspect ratio of 3:1. // We support PNG, JPEG and GIF formats." $imageServing = F::build('ImageServing', array($articleId)); foreach ($imageServing->getImages(1) as $key => $value) { $titleImage = Title::newFromText($value[0]['name'], NS_FILE); } } // If ImageServing was not able to deliver a good match, fall back to the wiki's wordmark. if (empty($titleImage) && !is_object($titleImage) && F::app()->checkSkin('oasis')) { $themeSettings = new ThemeSettings(); $settings = $themeSettings->getSettings(); if ($settings["wordmark-type"] == "graphic") { $titleImage = Title::newFromText($settings['wordmark-image-name'], NS_FILE); } } // If we have a Title object for an image, convert it to an Image object and store it in mMainImage. if (!empty($titleImage) && is_object($titleImage)) { $mainImage = wfFindFile($titleImage); if ($mainImage !== false) { $parserOutput->setProperty('mainImage', $mainImage); $out->mMainImage = $parserOutput->getProperty('mainImage'); } } else { // Fall back to using a Wikia logo. There aren't any as "File:" pages, so we use a new config var for one that // is being added to skins/common. global $wgBigWikiaLogo; $logoUrl = wfReplaceImageServer($wgBigWikiaLogo); $parserOutput->setProperty('mainImage', $logoUrl); $out->mMainImage = $parserOutput->getProperty('mainImage'); } } // Get description from ArticleService if (is_null($titleDescription)) { $DESC_LENGTH = 100; $articleService = new ArticleService($articleId); $titleDescription = $articleService->getTextSnippet($DESC_LENGTH); } if (!empty($titleDescription)) { $parserOutput->setProperty('description', $titleDescription); $out->mDescription = $parserOutput->getProperty('description'); } if ($page_id = Wikia::getFacebookDomainId()) { $out->addMeta('property:fb:page_id', $page_id); } }
function egOgmcParserOutputApplyValues(OutputPage $out, ParserOutput $parserOutput, $data) { wfProfileIn(__METHOD__); global $wgTitle; $articleId = $wgTitle->getArticleID(); $titleImage = $titleDescription = null; wfRunHooks('OpenGraphMeta:beforeCustomFields', array($articleId, &$titleImage, &$titleDescription)); // Get description from ArticleService if (is_null($titleDescription)) { $DESC_LENGTH = 500; $articleService = new ArticleService($wgTitle); $titleDescription = $articleService->getTextSnippet($DESC_LENGTH); } if (!empty($titleDescription)) { $parserOutput->setProperty('description', $titleDescription); $out->mDescription = $parserOutput->getProperty('description'); } wfProfileOut(__METHOD__); }
/** * @param OutputPage $out * @param string $text * @return bool */ function wfArticleMetaDescription(&$out, &$text) { wfProfileIn(__METHOD__); $wg = F::app()->wg; // Whether the description has already been added static $addedToPage = false; // The OutputPage::addParserOutput method calls the OutputPageBeforeHTML hook which can happen // more than once in a request. Make sure we don't add two <meta> tags // https://wikia-inc.atlassian.net/browse/VID-2102 if ($addedToPage) { wfProfileOut(__METHOD__); return true; } $sMessage = null; $sMainPage = wfMessage('Mainpage')->inContentLanguage()->text(); if (strpos($sMainPage, ':') !== false) { $sTitle = $wg->Title->getFullText(); } else { $sTitle = $wg->Title->getText(); } if (strcmp($sTitle, $sMainPage) == 0) { // we're on Main Page, check MediaWiki:Description message $sMessage = wfMessage('Description')->text(); } if ($sMessage == null || wfEmptyMsg('Description', $sMessage)) { $DESC_LENGTH = 100; $article = new Article($wg->Title); $articleService = new ArticleService($article); $description = $articleService->getTextSnippet($DESC_LENGTH); } else { // MediaWiki:Description message found, use it $description = $sMessage; } if (!empty($description)) { $out->addMeta('description', htmlspecialchars($description)); $addedToPage = true; } wfProfileOut(__METHOD__); return true; }
/** * get a snippet of article text * @param int $articleId Article ID * @param int $length snippet length (in characters) */ public function getArticleSnippet($articleId, $length = 100) { $service = new ArticleService($articleId); return $service->getTextSnippet(); }
/** * Returns image or snippet for the category on id basis. * Uses in modified getArticle * * @param $iCategoryId int category pageId * @return array */ protected function getCategoryImageOrSnippet($iCategoryId) { $title = Title::newFromID($iCategoryId); $sCategoryDBKey = $title->getDBKey(); // tries to get image from images in category $result = CategoryDataService::getAlphabetical($sCategoryDBKey, NS_FILE, 1); if (!empty($result)) { $counter = 0; foreach ($result as $item) { if ($counter > F::App()->wg->maxCategoryExhibitionSubcatChecks) { break; } $imageServing = new ImageServing(array($item['page_id']), $this->thumbWidth, array("w" => $this->thumbWidth, "h" => $this->thumbHeight)); $itemTitle = Title::newFromID($item['page_id']); $image = wfFindFile($itemTitle); if (!empty($image)) { $imageSrc = wfReplaceImageServer($image->getThumbUrl($imageServing->getCut($image->width, $image->height) . "-" . $image->getName())); return array('imageUrl' => (string) $imageSrc, 'snippetText' => ''); } $counter++; } } // if no images found, tries to get image or snippet from artice unset($result); $result = CategoryDataService::getAlphabetical($sCategoryDBKey, NS_MAIN, 10); if (!empty($result)) { $counter = 0; $snippetText = ''; $imageUrl = ''; foreach ($result as $item) { if ($counter > F::App()->wg->maxCategoryExhibitionSubcatChecks) { break; } $imageUrl = $this->getImageFromPageId($item['page_id']); if (!empty($imageUrl)) { break; } if (empty($snippetText)) { $snippetService = new ArticleService($item['page_id']); $snippetText = $snippetService->getTextSnippet(); } $counter++; } return array('imageUrl' => $imageUrl, 'snippetText' => $snippetText); } else { return array('imageUrl' => '', 'snippetText' => ''); } }
/** * Fetches short textual snippet for given article * @param $articleId int Article id * @param $length int Desired snippet length * @return string */ protected function getArticleSnippet($articleId, $length = 150) { wfProfileIn(__METHOD__); $articleService = new ArticleService($articleId); $result = $articleService->getTextSnippet($length); wfProfileOut(__METHOD__); return $result; }
/** * Returns data needed to render marker for a given place on a map with multiple places * * This method returns article's URL, text snippet and an image for current place * * TODO: add caching */ public function getForMap() { if ($this->isEmpty()) { return false; } wfProfileIn(__METHOD__); $pageId = $this->getPageId(); $oTitle = Title::newFromID($pageId); if (empty($oTitle) || !$oTitle->exists()) { wfProfileOut(__METHOD__); return array(); } // TODO: getImages() are not cached $imageServing = new ImageServing(array($pageId), 100, array('w' => 1, 'h' => 1)); $images = $imageServing->getImages(1); if (!empty($images[$pageId][0]['url'])) { $imageUrl = $images[$pageId][0]['url']; } else { $imageUrl = ''; } $oArticleService = new ArticleService(); $oArticleService->setArticleById($pageId); $textSnippet = $oArticleService->getTextSnippet(120); $strPos = mb_strrpos($textSnippet, ' '); $textSnippet = mb_substr($textSnippet, 0, $strPos); $textSnippet .= ' ...'; $ret = array('lat' => $this->getLat(), 'lan' => $this->getLon(), 'label' => $oTitle->getText(), 'imageUrl' => $imageUrl, 'articleUrl' => $oTitle->getLocalUrl(), 'textSnippet' => $textSnippet); wfProfileOut(__METHOD__); return $ret; }
/** * Returns a text snippet provided a page ID. * @param int $pageId * @param int $snippetLength * @return string */ public function getSnippetForPageId($pageId, $snippetLength = 250) { $canonicalPageId = $this->getCanonicalPageIdFromPageId($pageId); $articleService = new \ArticleService($canonicalPageId); return $articleService->getTextSnippet($snippetLength); }
<?php /** * Lets us test the performance of a given snippetting approach * @package MediaWiki * @addtopackage maintenance */ ini_set("include_path", dirname(__FILE__) . "/../../../../maintenance/"); require_once "commandLine.inc"; $options = getopt('i:s::fc::', ['id:', 'service::', 'force', 'conf::']); global $wgEnableParserCache, $wgAllowMemcacheReads; $wgEnableParserCache = false; $wgAllowMemcacheReads = false; $articleId = $options['id']; $mws = new Wikia\Search\MediaWikiService(); $service = new ArticleService($mws->getCanonicalPageIdFromPageId($articleId)); $start = microtime(true); $type = 'default'; if (isset($options['service']) && $options['service'] == 'solr') { $type = 'solr'; $response = $service->getTextSnippetFromSolr(); } else { $response = $service->getTextSnippet(); } echo sprintf("%.2f (%s) %s\n", microtime(true) - $start, $type, $response);
protected function getArticlesDetails($articleIds, $articleKeys = [], $width = 0, $height = 0, $abstract = 0, $strict = false) { $articles = is_array($articleIds) ? $articleIds : [$articleIds]; $ids = []; $collection = []; $resultingCollectionIds = []; $titles = []; foreach ($articles as $i) { //data is cached on a per-article basis //to avoid one article requiring purging //the whole collection $cache = $this->wg->Memc->get(self::getCacheKey($i, self::DETAILS_CACHE_ID)); if (!is_array($cache)) { $ids[] = $i; } else { $collection[$i] = $cache; $resultingCollectionIds[] = $i; } } if (count($ids) > 0) { $titles = Title::newFromIDs($ids); } if (!empty($articleKeys)) { foreach ($articleKeys as $titleKey) { $titleObj = Title::newFromDbKey($titleKey); if ($titleObj instanceof Title && $titleObj->exists()) { $titles[] = $titleObj; } } } if (!empty($titles)) { foreach ($titles as $t) { $fileData = []; if ($t->getNamespace() == NS_FILE) { $fileData = $this->getFromFile($t->getText()); } elseif ($t->getNamespace() == NS_MAIN) { $fileData = ['type' => static::ARTICLE_TYPE]; } elseif ($t->getNamespace() == NS_CATEGORY) { $fileData = ['type' => static::CATEGORY_TYPE]; } $id = $t->getArticleID(); $revId = $t->getLatestRevID(); $rev = Revision::newFromId($revId); if (!empty($rev)) { $collection[$id] = ['id' => $id, 'title' => $t->getText(), 'ns' => $t->getNamespace(), 'url' => $t->getLocalURL(), 'revision' => ['id' => $revId, 'user' => $rev->getUserText(Revision::FOR_PUBLIC), 'user_id' => $rev->getUser(Revision::FOR_PUBLIC), 'timestamp' => wfTimestamp(TS_UNIX, $rev->getTimestamp())]]; $collection[$id]['comments'] = class_exists('ArticleCommentList') ? ArticleCommentList::newFromTitle($t)->getCountAllNested() : false; //add file data $collection[$id] = array_merge($collection[$id], $fileData); $resultingCollectionIds[] = $id; $this->wg->Memc->set(self::getCacheKey($id, self::DETAILS_CACHE_ID), $collection[$id], 86400); } else { $dataLog = ['titleText' => $t->getText(), 'articleId' => $t->getArticleID(), 'revId' => $revId]; WikiaLogger::instance()->info('No revision found for article', $dataLog); } } $titles = null; } //ImageServing has separate caching //so processing it separately allows to //make the thumbnail's size parametrical without //invalidating the titles details' cache //or the need to duplicate it $thumbnails = $this->getArticlesThumbnails($resultingCollectionIds, $width, $height); $articles = null; //ArticleService has separate caching //so processing it separately allows to //make the length parametrical without //invalidating the titles details' cache //or the need to duplicate it foreach ($collection as $id => &$details) { if ($abstract > 0) { $as = new ArticleService($id); $snippet = $as->getTextSnippet($abstract); } else { $snippet = null; } $details['abstract'] = $snippet; if (isset($thumbnails[$id])) { $details = array_merge($details, $thumbnails[$id]); } } $collection = $this->appendMetadata($collection); $thumbnails = null; //The collection can be in random order (depends if item was found in memcache or not) //lets preserve original order even if we are not using strict mode: //to keep things consistent over time (some other APIs that are using sorted results are using //ArticleApi::getDetails to fetch info about articles) $orderedIdsFromTitles = array_diff(array_keys($collection), $articleIds); //typecasting to convert falsy values into empty array (array_merge require arrays only) $orderedIds = array_merge((array) $articleIds, (array) $orderedIdsFromTitles); $collection = $this->preserveOriginalOrder($orderedIds, $collection); //if strict - return array instead of associative array (dict) if ($strict) { return array_values($collection); } else { return $collection; } }
/** * @desc Returns description for the article's meta tag. * * This is mostly copied from the ArticleMetaDescription extension. * * @param int $articleId * @param int $descLength * @return string * @throws WikiaException */ private function getArticleDescription($articleId, $descLength = 100) { $article = Article::newFromID($articleId); if (!$article instanceof Article) { throw new NotFoundApiException(); } $title = $article->getTitle(); $sMessage = null; if ($title->isMainPage()) { // we're on Main Page, check MediaWiki:Description message $sMessage = wfMessage('Description')->text(); } if ($sMessage == null || wfEmptyMsg('Description', $sMessage)) { $articleService = new ArticleService($article); $description = $articleService->getTextSnippet($descLength); } else { // MediaWiki:Description message found, use it $description = $sMessage; } return $description; }
protected function getArticleData($pageId) { global $wgVideoHandlersVideosMigrated; $oTitle = Title::newFromID($pageId); if (!$oTitle instanceof Title) { return false; } $oMemCache = F::App()->wg->memc; $sKey = wfSharedMemcKey('category_exhibition_category_cache_1', $pageId, F::App()->wg->cityId, $this->isVerify(), $wgVideoHandlersVideosMigrated ? 1 : 0, $this->getTouched($oTitle)); $cachedResult = $oMemCache->get($sKey); if (!empty($cachedResult)) { return $cachedResult; } $snippetText = ''; $imageUrl = $this->getImageFromPageId($pageId); if (empty($imageUrl)) { $snippetService = new ArticleService($oTitle); $snippetText = $snippetService->getTextSnippet(); } $returnData = array('id' => $pageId, 'img' => $imageUrl, 'width' => $this->thumbWidth, 'height' => $this->thumbHeight, 'snippet' => $snippetText, 'title' => $this->getTitleForElement($oTitle), 'url' => $oTitle->getFullURL()); // will be purged elsewhere after edit $oMemCache->set($sKey, $returnData, 60 * 60 * 24); return $returnData; }
private function getMediaDataFromCache(Title $media, $length = 256) { wfProfileIn(__METHOD__); if (!isset($this->mediaCache[$media->getDBKey()])) { $file = wfFindFile($media); if (!empty($file) && $file->canRender()) { $articleService = new ArticleService($media); $isVideo = WikiaFileHelper::isFileTypeVideo($file); if ($isVideo) { /** @var $videoHandler VideoHandler */ $videoHandler = $file->getHandler(); $thumb = $file->transform(array('width' => 320), 0); } else { $videoHandler = false; } $this->mediaCache[$media->getDBKey()] = array('title' => $media->getText(), 'desc' => $articleService->getTextSnippet($length), 'type' => $isVideo ? self::MEDIA_TYPE_VIDEO : self::MEDIA_TYPE_IMAGE, 'meta' => $videoHandler ? array_merge($videoHandler->getVideoMetadata(true), $videoHandler->getEmbedSrcData()) : array(), 'thumbUrl' => !empty($thumb) ? $thumb->getUrl() : false); } else { $this->mediaCache[$media->getDBKey()] = false; } } wfProfileOut(__METHOD__); return $this->mediaCache[$media->getDBKey()]; }