public function addReferences(MicroData $microData, $item, $sourceUrl)
 {
     $referenceCounter = 0;
     foreach ($this->propMap as $propertyIdString => $schemaPropertyString) {
         $regexMap = $this->regexMap[$propertyIdString];
         $values = array();
         foreach ($microData->getProperty($schemaPropertyString, MicroData::PROP_STRING) as $propertyValue) {
             // Don't match URLS!
             if (strstr($propertyValue, '//')) {
                 continue;
             }
             $values[] = $propertyValue;
         }
         $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString));
         foreach ($values as $value) {
             foreach ($statements->getIterator() as &$statement) {
                 $mainSnak = $statement->getMainSnak();
                 if (!$mainSnak instanceof PropertyValueSnak) {
                     continue;
                     // Ignore some and no value statements
                 }
                 if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) {
                     continue;
                     // Ignore statements that already have this URL domain as a ref
                 }
                 /** @var EntityIdValue $valueEntityIdValue */
                 $valueEntityIdValue = $mainSnak->getDataValue();
                 /** @var EntityId $valueEntityId */
                 $valueEntityId = $valueEntityIdValue->getEntityId();
                 $valueEntityIdString = $valueEntityId->getSerialization();
                 if (!array_key_exists($valueEntityIdString, $regexMap)) {
                     //TODO log that this ItemId is missing?
                     continue;
                 }
                 $regex = $regexMap[$valueEntityIdString];
                 if (!preg_match($regex, $value)) {
                     // ItemId regex didn't match this schema value
                     continue;
                 }
                 // Add the new reference!
                 $newReference = DataModelUtils::getReferenceForUrl($sourceUrl);
                 try {
                     $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT));
                     //NOTE: keep our in memory item copy up to date (yay such reference passing)
                     $statement->addNewReference($newReference->getSnaks());
                     $referenceCounter++;
                 } catch (UsageException $e) {
                     //Ignore
                 }
             }
         }
     }
     return $referenceCounter;
 }
예제 #2
0
 public function addReferences(MicroData $microData, $item, $sourceUrl)
 {
     $referenceCounter = 0;
     foreach ($this->propMap as $propertyIdString => $schemaPropertyString) {
         /** @var TimeValue[] $timeValues */
         $timeValues = array();
         foreach ($microData->getProperty($schemaPropertyString, MicroData::PROP_STRING) as $propertyValue) {
             try {
                 $date = new DateTime(trim($propertyValue));
                 $timeValues[] = $this->timeParser->parse($date->format('Y m d'));
             } catch (Exception $e) {
                 // Ignore failed parsing
             }
         }
         $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString));
         foreach ($timeValues as $timeValue) {
             foreach ($statements->getIterator() as &$statement) {
                 $mainSnak = $statement->getMainSnak();
                 if (!$mainSnak instanceof PropertyValueSnak) {
                     continue;
                     // Ignore some and no value statements
                 }
                 if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) {
                     continue;
                     // Ignore statements that already have this URL domain as a ref
                 }
                 if (!$timeValue->equals($mainSnak->getDataValue())) {
                     continue;
                 }
                 // Add the new reference!
                 $newReference = DataModelUtils::getReferenceForUrl($sourceUrl);
                 try {
                     $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT));
                     //NOTE: keep our in memory item copy up to date (yay such reference passing)
                     $statement->addNewReference($newReference->getSnaks());
                     $referenceCounter++;
                 } catch (UsageException $e) {
                     //Ignore
                 }
             }
         }
     }
     return $referenceCounter;
 }
 public function addReferences(MicroData $microData, $item, $sourceUrl)
 {
     // Only cache entity lookup stuff per item we are adding references for!
     // (but can be used for multiple sourceURLs!!
     if (!$item->getId()->equals($this->lastEntityId)) {
         $this->inMemoryEntityLookup = new InMemoryEntityLookup();
     }
     $referenceCounter = 0;
     foreach ($this->callbackMap as $propertyIdString => $valueGetterFunction) {
         $values = $valueGetterFunction($microData);
         $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString));
         foreach ($values as $value) {
             foreach ($statements->getIterator() as &$statement) {
                 $mainSnak = $statement->getMainSnak();
                 if (!$mainSnak instanceof PropertyValueSnak) {
                     continue;
                     // Ignore some and no value statements
                 }
                 /** @var EntityIdValue $valueEntityIdValue */
                 $valueEntityIdValue = $mainSnak->getDataValue();
                 /** @var ItemId $valueItemId */
                 $valueItemId = $valueEntityIdValue->getEntityId();
                 if ($this->inMemoryEntityLookup->hasEntity($valueItemId)) {
                     $valueItem = $this->inMemoryEntityLookup->getEntity($valueItemId);
                 } else {
                     $valueItem = $this->wikibaseFactory->newItemLookup()->getItemForId($valueItemId);
                     $this->inMemoryEntityLookup->addEntity($valueItem);
                 }
                 if (!in_array(strtolower($value), DataModelUtils::getMainTermsAsLowerCaseStrings($valueItem->getFingerprint()))) {
                     continue;
                     // Ignore things that don't appear to have the correct value
                 }
                 if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) {
                     continue;
                     // Ignore statements that already have this URL domain as a ref
                 }
                 // Add the new reference!
                 $newReference = DataModelUtils::getReferenceForUrl($sourceUrl);
                 try {
                     $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT));
                     //NOTE: keep our in memory item copy up to date (yay such reference passing)
                     $statement->addNewReference($newReference->getSnaks());
                     $referenceCounter++;
                 } catch (UsageException $e) {
                     //Ignore
                 }
             }
         }
     }
     return $referenceCounter;
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     // Get options
     $user = $input->getOption('user');
     $userDetails = $this->appConfig->offsetGet('users.' . $user);
     if ($userDetails === null) {
         throw new RuntimeException('User not found in config');
     }
     $items = $input->getOption('item');
     if (empty($items)) {
         $output->writeln('Running SPARQL query to find items to check');
         $queryBuilder = new QueryBuilder(array('prov' => 'http://www.w3.org/ns/prov#', 'wd' => 'http://www.wikidata.org/entity/', 'wikibase' => 'http://wikiba.se/ontology#', 'prv' => 'http://www.wikidata.org/prop/reference/value/'));
         $itemIds = $this->sparqlQueryRunner->getItemIdsFromQuery($queryBuilder->select('?item')->where('?ref', 'prv:P813', '?value')->also('?value', 'wikibase:timeCalendarModel', 'wd:Q1985786')->also('?st', 'prov:wasDerivedFrom', '?ref')->also('?item', '?pred', '?st')->limit(10000)->__toString());
     } else {
         /** @var ItemId[] $itemIds */
         $itemIds = array();
         foreach (array_unique($items) as $itemIdString) {
             $itemIds[] = new ItemId($itemIdString);
         }
     }
     $itemIds = array_unique($itemIds);
     $output->writeln('Running for ' . count($itemIds) . ' items');
     // Log in to Wikidata
     $loggedIn = $this->wikibaseApi->login(new ApiUser($userDetails['username'], $userDetails['password']));
     if (!$loggedIn) {
         $output->writeln('Failed to log in to wikidata wiki');
         return -1;
     }
     $itemLookup = $this->wikibaseFactory->newItemLookup();
     foreach ($itemIds as $itemId) {
         $output->write($itemId->getSerialization() . ' ');
         $item = $itemLookup->getItemForId($itemId);
         foreach ($item->getStatements()->getIterator() as $statement) {
             foreach ($statement->getReferences() as $reference) {
                 /** @var Reference $reference */
                 foreach ($reference->getSnaks()->getIterator() as $snak) {
                     if ($snak instanceof PropertyValueSnak) {
                         if ($snak->getPropertyId()->getSerialization() == 'P813') {
                             /** @var TimeValue $dataValue */
                             $dataValue = $snak->getDataValue();
                             // We can assume ALL retrieval dates should be Gregorian!
                             if ($dataValue->getCalendarModel() === TimeValue::CALENDAR_JULIAN) {
                                 $oldRefHash = $reference->getHash();
                                 $statementGuid = $statement->getGuid();
                                 $snakList = $reference->getSnaks();
                                 $snakList = new SnakList($snakList->getArrayCopy());
                                 $snakList->removeSnak($snak);
                                 $fixedTimestamp = $this->getFixedTimestamp($dataValue->getTime());
                                 if ($fixedTimestamp) {
                                     $snakList->addSnak(new PropertyValueSnak(new PropertyId('P813'), new TimeValue($fixedTimestamp, $dataValue->getTimezone(), $dataValue->getBefore(), $dataValue->getAfter(), $dataValue->getPrecision(), TimeValue::CALENDAR_GREGORIAN)));
                                     $editSummary = 'Fix reference retrieval date';
                                     $output->write('.');
                                 } else {
                                     //TODO optionally remove rather than always doing so?
                                     $editSummary = 'Removing bad reference retrieval date';
                                     $output->write('x');
                                 }
                                 try {
                                     $this->wikibaseFactory->newReferenceSetter()->set(new Reference($snakList), $statementGuid, $oldRefHash, new EditInfo($editSummary));
                                 } catch (UsageException $e) {
                                     $output->writeln('');
                                     $output->write($e->getMessage());
                                 }
                             }
                         }
                     }
                 }
             }
         }
         $output->writeln('');
     }
     return 0;
 }