/**
  * @param string $wikibaseApi
  */
 public function __construct($wikibaseApi)
 {
     $api = new MediawikiApi($wikibaseApi);
     $wikibaseFactory = new WikibaseFactory($api);
     $revisionsGetter = $wikibaseFactory->newRevisionsGetter();
     $this->apiInteractor = new ApiInteractor($api, $revisionsGetter);
     $this->dataValuesFormatter = new DataValuesFormatter($this->apiInteractor);
 }
 public function addReferences(MicroData $microData, $item, $sourceUrl)
 {
     $referenceCounter = 0;
     foreach ($this->propMap as $propertyIdString => $schemaPropertyString) {
         $regexMap = $this->regexMap[$propertyIdString];
         $values = array();
         foreach ($microData->getProperty($schemaPropertyString, MicroData::PROP_STRING) as $propertyValue) {
             // Don't match URLS!
             if (strstr($propertyValue, '//')) {
                 continue;
             }
             $values[] = $propertyValue;
         }
         $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString));
         foreach ($values as $value) {
             foreach ($statements->getIterator() as &$statement) {
                 $mainSnak = $statement->getMainSnak();
                 if (!$mainSnak instanceof PropertyValueSnak) {
                     continue;
                     // Ignore some and no value statements
                 }
                 if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) {
                     continue;
                     // Ignore statements that already have this URL domain as a ref
                 }
                 /** @var EntityIdValue $valueEntityIdValue */
                 $valueEntityIdValue = $mainSnak->getDataValue();
                 /** @var EntityId $valueEntityId */
                 $valueEntityId = $valueEntityIdValue->getEntityId();
                 $valueEntityIdString = $valueEntityId->getSerialization();
                 if (!array_key_exists($valueEntityIdString, $regexMap)) {
                     //TODO log that this ItemId is missing?
                     continue;
                 }
                 $regex = $regexMap[$valueEntityIdString];
                 if (!preg_match($regex, $value)) {
                     // ItemId regex didn't match this schema value
                     continue;
                 }
                 // Add the new reference!
                 $newReference = DataModelUtils::getReferenceForUrl($sourceUrl);
                 try {
                     $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT));
                     //NOTE: keep our in memory item copy up to date (yay such reference passing)
                     $statement->addNewReference($newReference->getSnaks());
                     $referenceCounter++;
                 } catch (UsageException $e) {
                     //Ignore
                 }
             }
         }
     }
     return $referenceCounter;
 }
 public function addReferences(MicroData $microData, $item, $sourceUrl)
 {
     $referenceCounter = 0;
     foreach ($this->propMap as $propertyIdString => $schemaPropertyString) {
         /** @var TimeValue[] $timeValues */
         $timeValues = array();
         foreach ($microData->getProperty($schemaPropertyString, MicroData::PROP_STRING) as $propertyValue) {
             try {
                 $date = new DateTime(trim($propertyValue));
                 $timeValues[] = $this->timeParser->parse($date->format('Y m d'));
             } catch (Exception $e) {
                 // Ignore failed parsing
             }
         }
         $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString));
         foreach ($timeValues as $timeValue) {
             foreach ($statements->getIterator() as &$statement) {
                 $mainSnak = $statement->getMainSnak();
                 if (!$mainSnak instanceof PropertyValueSnak) {
                     continue;
                     // Ignore some and no value statements
                 }
                 if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) {
                     continue;
                     // Ignore statements that already have this URL domain as a ref
                 }
                 if (!$timeValue->equals($mainSnak->getDataValue())) {
                     continue;
                 }
                 // Add the new reference!
                 $newReference = DataModelUtils::getReferenceForUrl($sourceUrl);
                 try {
                     $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT));
                     //NOTE: keep our in memory item copy up to date (yay such reference passing)
                     $statement->addNewReference($newReference->getSnaks());
                     $referenceCounter++;
                 } catch (UsageException $e) {
                     //Ignore
                 }
             }
         }
     }
     return $referenceCounter;
 }
 public function addReferences(MicroData $microData, $item, $sourceUrl)
 {
     // Only cache entity lookup stuff per item we are adding references for!
     // (but can be used for multiple sourceURLs!!
     if (!$item->getId()->equals($this->lastEntityId)) {
         $this->inMemoryEntityLookup = new InMemoryEntityLookup();
     }
     $referenceCounter = 0;
     foreach ($this->callbackMap as $propertyIdString => $valueGetterFunction) {
         $values = $valueGetterFunction($microData);
         $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString));
         foreach ($values as $value) {
             foreach ($statements->getIterator() as &$statement) {
                 $mainSnak = $statement->getMainSnak();
                 if (!$mainSnak instanceof PropertyValueSnak) {
                     continue;
                     // Ignore some and no value statements
                 }
                 /** @var EntityIdValue $valueEntityIdValue */
                 $valueEntityIdValue = $mainSnak->getDataValue();
                 /** @var ItemId $valueItemId */
                 $valueItemId = $valueEntityIdValue->getEntityId();
                 if ($this->inMemoryEntityLookup->hasEntity($valueItemId)) {
                     $valueItem = $this->inMemoryEntityLookup->getEntity($valueItemId);
                 } else {
                     $valueItem = $this->wikibaseFactory->newItemLookup()->getItemForId($valueItemId);
                     $this->inMemoryEntityLookup->addEntity($valueItem);
                 }
                 if (!in_array(strtolower($value), DataModelUtils::getMainTermsAsLowerCaseStrings($valueItem->getFingerprint()))) {
                     continue;
                     // Ignore things that don't appear to have the correct value
                 }
                 if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) {
                     continue;
                     // Ignore statements that already have this URL domain as a ref
                 }
                 // Add the new reference!
                 $newReference = DataModelUtils::getReferenceForUrl($sourceUrl);
                 try {
                     $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT));
                     //NOTE: keep our in memory item copy up to date (yay such reference passing)
                     $statement->addNewReference($newReference->getSnaks());
                     $referenceCounter++;
                 } catch (UsageException $e) {
                     //Ignore
                 }
             }
         }
     }
     return $referenceCounter;
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $user = $input->getOption('user');
     $userDetails = $this->appConfig->offsetGet('users.' . $user);
     if ($userDetails === null) {
         throw new RuntimeException('User not found in config');
     }
     $wiki = $input->getOption('wiki');
     $wikiDetails = $this->appConfig->offsetGet('wikis.' . $wiki);
     if ($wikiDetails === null) {
         throw new RuntimeException('Wiki not found in config');
     }
     $sparql = $input->getOption('sparql');
     if ($sparql === null || empty($sparql)) {
         throw new RuntimeException('SPARQL endpoint must be passed');
     }
     $this->setServices($wikiDetails['url'], $sparql);
     $propertyString = $input->getOption('property');
     $property = new PropertyId($propertyString);
     if ($propertyString === null || $propertyString === '' || $property === null) {
         throw new RuntimeException('No property given');
     }
     $output->writeln('Running SPARQL query to find items to check');
     $queryBuilder = new QueryBuilder(array('wdt' => 'http://www.wikidata.org/prop/direct/'));
     $itemIds = $this->sparqlQueryRunner->getItemIdsFromQuery($queryBuilder->select('?item')->where('?item', 'wdt:' . $propertyString, '?value')->limit(10000)->__toString());
     $loggedIn = $this->wikibaseApi->login(new ApiUser($userDetails['username'], $userDetails['password']));
     if (!$loggedIn) {
         $output->writeln('Failed to log in to wikibase wiki');
         return -1;
     }
     $itemLookup = $this->wikibaseFactory->newItemLookup();
     $statementRemover = $this->wikibaseFactory->newStatementRemover();
     foreach ($itemIds as $itemId) {
         $item = $itemLookup->getItemForId($itemId);
         foreach ($item->getStatements()->getIterator() as $statement) {
             if ($statement->getPropertyId()->equals($property)) {
                 $statementRemover->remove($statement, new EditInfo('Removing Statement'));
             }
         }
     }
     return 0;
 }
 private function addTsvStatement($statement)
 {
     //Parse Statement
     $fullStatement = $this->statementParser->parseTsvStatement($statement);
     $subject = $fullStatement->getSubjectId();
     $statement = $fullStatement->getStatement();
     //Get existing statements
     $entityRevision = $this->wikibaseFactory->newRevisionGetter()->getFromId($subject);
     if ($entityRevision === false) {
         throw new Exception('Entity does not exists');
     }
     $entity = $entityRevision->getContent()->getData();
     $samePropertyStatements = $entity->getStatements()->getByPropertyId($statement->getMainSnak()->getPropertyId());
     //Looks for existing statements
     if ($this->hasClaim($samePropertyStatements, $statement)) {
         return;
     }
     try {
         $subStatement = $this->findSubStatement($samePropertyStatements, $statement);
         $statement->setGuid($subStatement->getGuid());
         if ($this->hasMeaningfulReference($subStatement)) {
             throw new Exception('Substatement with meaningful reference');
         }
     } catch (OutOfBoundsException $e) {
         if (!$samePropertyStatements->isEmpty()) {
             throw new Exception('Contradictory statement');
         }
     }
     //Add reference "imported from" "Freebase data dump"
     $statement->addNewReference(new PropertyValueSnak(new PropertyId('P143'), new EntityIdValue(new ItemId('Q15241312'))));
     //Save
     $this->statementSaver->addStatementToEntity($statement, $entity->getId(), $entityRevision->getId());
 }
 /**
  * @param OutputInterface $output
  * @param ItemId[] $itemIds
  * @param bool $force
  */
 private function executeForItemIds(OutputInterface $output, array $itemIds, $force)
 {
     $itemLookup = $this->wikibaseFactory->newItemLookup();
     $processedItemIdStrings = $this->getProcessedItemIdStrings();
     $loopCounter = 0;
     /** @var FormatterHelper $formatter */
     $formatter = $this->getHelper('formatter');
     foreach ($itemIds as $itemId) {
         $loopCounter++;
         $itemIdString = $itemId->getSerialization();
         $output->writeln('----------------------------------------------------');
         if ($loopCounter % 10 != 0) {
             $processedItemIdStrings = $this->getProcessedItemIdStrings();
         }
         if (!$force && in_array($itemId->getSerialization(), $processedItemIdStrings)) {
             $output->writeln($formatter->formatSection($itemIdString, 'Already processed'));
             continue;
         }
         try {
             $output->writeln($formatter->formatSection($itemIdString, 'Loading Item'));
             $item = $itemLookup->getItemForId($itemId);
         } catch (ItemLookupException $e) {
             $output->writeln($formatter->formatSection($itemIdString, 'Failed to load item (exception)', 'error'));
             continue;
         }
         if ($item === null) {
             $output->writeln($formatter->formatSection($itemIdString, 'Failed to load item (null)', 'error'));
             continue;
         }
         // Get the item types..
         $types = array();
         foreach ($item->getStatements()->getByPropertyId(new PropertyId('P31'))->toArray() as $instanceStatement) {
             $mainSnak = $instanceStatement->getMainSnak();
             if ($mainSnak instanceof PropertyValueSnak) {
                 /** @var EntityIdValue $instanceItemIdValue */
                 $instanceItemIdValue = $mainSnak->getDataValue();
                 $idSerialization = $instanceItemIdValue->getEntityId()->getSerialization();
                 if (array_key_exists($idSerialization, $this->instanceMap)) {
                     $types[] = $this->instanceMap[$idSerialization];
                 }
             }
         }
         if (empty($types)) {
             $output->writeln($formatter->formatSection($itemIdString, 'Didn\\t find any useful instance of statements', 'comment'));
             continue;
         }
         // Note: only load Wikipedias
         $siteLinkList = DataModelUtils::getSitelinksWiteSiteIdSuffix($item->getSiteLinkList(), 'wiki');
         $output->writeln($formatter->formatSection($itemIdString, $siteLinkList->count() . ' Wikipedia pages to request'));
         $parseProgressBar = new ProgressBar($output, $siteLinkList->count());
         $parseProgressBar->display();
         /** @var PromiseInterface[] $parsePromises */
         $parsePromises = array();
         foreach ($siteLinkList->getIterator() as $siteLink) {
             $siteId = $siteLink->getSiteId();
             $pageName = $item->getSiteLinkList()->getBySiteId($siteId)->getPageName();
             $sourceMwFactory = $this->wmFactoryFactory->getFactory($siteId);
             $sourceParser = $sourceMwFactory->newParser();
             $pageIdentifier = new PageIdentifier(new Title($pageName));
             $parsePromises[$siteId] = $sourceParser->parsePageAsync($pageIdentifier);
             $parseProgressBar->advance();
         }
         $links = array();
         foreach ($parsePromises as $siteId => $promise) {
             try {
                 $parseResult = $promise->wait();
                 if (array_key_exists('externallinks', $parseResult)) {
                     foreach ($parseResult['externallinks'] as $externalLink) {
                         // Ignore archive.org links
                         if (strstr($externalLink, 'archive.org') === false) {
                             $links[] = $this->normalizeExternalLink($externalLink);
                         }
                     }
                 }
             } catch (Exception $e) {
                 $parseProgressBar->clear();
                 $output->writeln($formatter->formatSection($itemIdString, $e->getMessage(), 'error'));
                 $parseProgressBar->display();
                 // Ignore failed requests
             }
         }
         $parseProgressBar->finish();
         $output->writeln('');
         $links = array_unique($links);
         shuffle($links);
         /** @var Request[] $linkRequests */
         $linkRequests = array();
         foreach ($links as $link) {
             $linkRequests[] = new Request('GET', $link, array('allow_redirects' => array('track_redirects' => true), 'connect_timeout' => 3.14, 'timeout' => 10));
         }
         $output->writeln($formatter->formatSection($itemIdString, count($linkRequests) . ' External links to (download, action)'));
         if (empty($linkRequests)) {
             continue;
         }
         // Make a bunch of requests and act on the responses
         $referencesAddedToItem = 0;
         $externalLinkProgressBar = new ProgressBar($output, count($linkRequests) * 2);
         $externalLinkProgressBar->display();
         $pool = new Pool($this->externalLinkClient, $linkRequests, array('fulfilled' => function ($response) use($externalLinkProgressBar, $item, $types, $referencesAddedToItem, $output) {
             $externalLinkProgressBar->advance();
             // 1st advance point
             if ($response instanceof ResponseInterface) {
                 $link = $response->getHeaderLine('X-GUZZLE-EFFECTIVE-URL');
                 $html = $response->getBody();
                 $referencesAddedFromLink = 0;
                 foreach ($this->microDataExtractor->extract($html) as $microData) {
                     foreach ($types as $type) {
                         if ($microData->hasType($type) && array_key_exists($type, $this->referencerMap)) {
                             foreach ($this->referencerMap[$type] as $referencer) {
                                 /** @var Referencer $referencer */
                                 $addedReferences = $referencer->addReferences($microData, $item, $link);
                                 $referencesAddedToItem += $addedReferences;
                                 $referencesAddedFromLink += $addedReferences;
                             }
                         }
                     }
                 }
                 if ($referencesAddedFromLink > 0) {
                     $externalLinkProgressBar->clear();
                     $output->write("\r");
                     $output->writeln($referencesAddedFromLink . ' reference(s) added from ' . urldecode($link));
                     $externalLinkProgressBar->display();
                 }
             }
             $externalLinkProgressBar->advance();
             // 2nd advance point
         }, 'rejected' => function () use($externalLinkProgressBar) {
             // TODO add this to some kind of verbose log?
             $externalLinkProgressBar->advance();
             // 1st advance point
         }));
         $pool->promise()->wait();
         $externalLinkProgressBar->finish();
         $output->writeln('');
         $output->writeln($formatter->formatSection($itemIdString, $referencesAddedToItem . ' References added'));
         $this->markIdAsProcessed($itemId);
     }
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $user = $input->getOption('user');
     $userDetails = $this->appConfig->get('users.' . $user);
     if ($userDetails === null) {
         throw new RuntimeException('User not found in config');
     }
     $pageIdentifier = null;
     if ($input->getOption('title') != null) {
         $sourceTitle = $input->getOption('title');
         $pageIdentifier = new PageIdentifier(new Title($sourceTitle));
     } else {
         throw new RuntimeException('No titles was set!');
     }
     $sourceApi = new MediawikiApi("https://www.mediawiki.org/w/api.php");
     $targetApi = new MediawikiApi("https://www.wikidata.org/w/api.php");
     $loggedIn = $targetApi->login(new ApiUser($userDetails['username'], $userDetails['password']));
     if (!$loggedIn) {
         $output->writeln('Failed to log in to target wiki');
         return -1;
     }
     $sourceMwFactory = new MediawikiFactory($sourceApi);
     $sourceParser = $sourceMwFactory->newParser();
     $parseResult = $sourceParser->parsePage($pageIdentifier);
     //Get the wikibase item if it exists
     $itemIdString = null;
     if (array_key_exists('properties', $parseResult)) {
         foreach ($parseResult['properties'] as $pageProp) {
             if ($pageProp['name'] == 'wikibase_item') {
                 $itemIdString = $pageProp['*'];
             }
         }
     }
     $targetWbFactory = new WikibaseFactory($targetApi, new DataValueDeserializer(array('boolean' => 'DataValues\\BooleanValue', 'number' => 'DataValues\\NumberValue', 'string' => 'DataValues\\StringValue', 'unknown' => 'DataValues\\UnknownValue', 'globecoordinate' => 'DataValues\\Geo\\Values\\GlobeCoordinateValue', 'monolingualtext' => 'DataValues\\MonolingualTextValue', 'multilingualtext' => 'DataValues\\MultilingualTextValue', 'quantity' => 'DataValues\\QuantityValue', 'time' => 'DataValues\\TimeValue', 'wikibase-entityid' => 'Wikibase\\DataModel\\Entity\\EntityIdValue')), new DataValueSerializer());
     // Create an item if there is no item yet!
     if ($itemIdString === null) {
         $output->writeln("Creating a new Item");
         $item = new Item();
         $item->setLabel('en', $sourceTitle);
         //TODO this siteid should come from somewhere?
         $item->getSiteLinkList()->setNewSiteLink('mediawikiwiki', $sourceTitle);
         $targetRevSaver = $targetWbFactory->newRevisionSaver();
         $item = $targetRevSaver->save(new Revision(new Content($item)));
     } else {
         $item = $targetWbFactory->newItemLookup()->getItemForId(new ItemId($itemIdString));
     }
     // Add instance of if not already there
     $hasInstanceOfExtension = false;
     foreach ($item->getStatements()->getByPropertyId(new PropertyId('P31'))->getMainSnaks() as $mainSnak) {
         if ($mainSnak instanceof PropertyValueSnak) {
             /** @var EntityIdValue $dataValue */
             $dataValue = $mainSnak->getDataValue();
             if ($dataValue->getEntityId()->equals(new ItemId('Q6805426'))) {
                 $hasInstanceOfExtension = true;
                 break;
             }
         }
     }
     if (!$hasInstanceOfExtension) {
         $output->writeln("Creating instance of Statement");
         $targetWbFactory->newStatementCreator()->create(new PropertyValueSnak(new PropertyId('P31'), new EntityIdValue(new ItemId('Q6805426'))), $item->getId());
     }
     // Try to add a licence
     $catLicenseMap = array('Public_domain_licensed_extensions' => 'Q19652');
     $extensionLicenseItemIdString = null;
     if (array_key_exists('categories', $parseResult)) {
         foreach ($parseResult['categories'] as $categoryInfo) {
             if (array_key_exists($categoryInfo['*'], $catLicenseMap)) {
                 $extensionLicenseItemIdString = $catLicenseMap[$categoryInfo['*']];
             }
         }
     }
     if ($extensionLicenseItemIdString !== null) {
         $output->writeln("Creating Licence Statement");
         $statementCreator = $targetWbFactory->newStatementCreator();
         //TODO make sure it isn't already there????
         $statementCreator->create(new PropertyValueSnak(new PropertyId('P275'), new EntityIdValue(new ItemId($extensionLicenseItemIdString))), $item->getId());
     }
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     // Get options
     $user = $input->getOption('user');
     $userDetails = $this->appConfig->offsetGet('users.' . $user);
     if ($userDetails === null) {
         throw new RuntimeException('User not found in config');
     }
     $items = $input->getOption('item');
     if (empty($items)) {
         $output->writeln('Running SPARQL query to find items to check');
         $queryBuilder = new QueryBuilder(array('prov' => 'http://www.w3.org/ns/prov#', 'wd' => 'http://www.wikidata.org/entity/', 'wikibase' => 'http://wikiba.se/ontology#', 'prv' => 'http://www.wikidata.org/prop/reference/value/'));
         $itemIds = $this->sparqlQueryRunner->getItemIdsFromQuery($queryBuilder->select('?item')->where('?ref', 'prv:P813', '?value')->also('?value', 'wikibase:timeCalendarModel', 'wd:Q1985786')->also('?st', 'prov:wasDerivedFrom', '?ref')->also('?item', '?pred', '?st')->limit(10000)->__toString());
     } else {
         /** @var ItemId[] $itemIds */
         $itemIds = array();
         foreach (array_unique($items) as $itemIdString) {
             $itemIds[] = new ItemId($itemIdString);
         }
     }
     $itemIds = array_unique($itemIds);
     $output->writeln('Running for ' . count($itemIds) . ' items');
     // Log in to Wikidata
     $loggedIn = $this->wikibaseApi->login(new ApiUser($userDetails['username'], $userDetails['password']));
     if (!$loggedIn) {
         $output->writeln('Failed to log in to wikidata wiki');
         return -1;
     }
     $itemLookup = $this->wikibaseFactory->newItemLookup();
     foreach ($itemIds as $itemId) {
         $output->write($itemId->getSerialization() . ' ');
         $item = $itemLookup->getItemForId($itemId);
         foreach ($item->getStatements()->getIterator() as $statement) {
             foreach ($statement->getReferences() as $reference) {
                 /** @var Reference $reference */
                 foreach ($reference->getSnaks()->getIterator() as $snak) {
                     if ($snak instanceof PropertyValueSnak) {
                         if ($snak->getPropertyId()->getSerialization() == 'P813') {
                             /** @var TimeValue $dataValue */
                             $dataValue = $snak->getDataValue();
                             // We can assume ALL retrieval dates should be Gregorian!
                             if ($dataValue->getCalendarModel() === TimeValue::CALENDAR_JULIAN) {
                                 $oldRefHash = $reference->getHash();
                                 $statementGuid = $statement->getGuid();
                                 $snakList = $reference->getSnaks();
                                 $snakList = new SnakList($snakList->getArrayCopy());
                                 $snakList->removeSnak($snak);
                                 $fixedTimestamp = $this->getFixedTimestamp($dataValue->getTime());
                                 if ($fixedTimestamp) {
                                     $snakList->addSnak(new PropertyValueSnak(new PropertyId('P813'), new TimeValue($fixedTimestamp, $dataValue->getTimezone(), $dataValue->getBefore(), $dataValue->getAfter(), $dataValue->getPrecision(), TimeValue::CALENDAR_GREGORIAN)));
                                     $editSummary = 'Fix reference retrieval date';
                                     $output->write('.');
                                 } else {
                                     //TODO optionally remove rather than always doing so?
                                     $editSummary = 'Removing bad reference retrieval date';
                                     $output->write('x');
                                 }
                                 try {
                                     $this->wikibaseFactory->newReferenceSetter()->set(new Reference($snakList), $statementGuid, $oldRefHash, new EditInfo($editSummary));
                                 } catch (UsageException $e) {
                                     $output->writeln('');
                                     $output->write($e->getMessage());
                                 }
                             }
                         }
                     }
                 }
             }
         }
         $output->writeln('');
     }
     return 0;
 }