public function addReferences(MicroData $microData, $item, $sourceUrl) { $referenceCounter = 0; foreach ($this->propMap as $propertyIdString => $schemaPropertyString) { $regexMap = $this->regexMap[$propertyIdString]; $values = array(); foreach ($microData->getProperty($schemaPropertyString, MicroData::PROP_STRING) as $propertyValue) { // Don't match URLS! if (strstr($propertyValue, '//')) { continue; } $values[] = $propertyValue; } $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString)); foreach ($values as $value) { foreach ($statements->getIterator() as &$statement) { $mainSnak = $statement->getMainSnak(); if (!$mainSnak instanceof PropertyValueSnak) { continue; // Ignore some and no value statements } if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) { continue; // Ignore statements that already have this URL domain as a ref } /** @var EntityIdValue $valueEntityIdValue */ $valueEntityIdValue = $mainSnak->getDataValue(); /** @var EntityId $valueEntityId */ $valueEntityId = $valueEntityIdValue->getEntityId(); $valueEntityIdString = $valueEntityId->getSerialization(); if (!array_key_exists($valueEntityIdString, $regexMap)) { //TODO log that this ItemId is missing? continue; } $regex = $regexMap[$valueEntityIdString]; if (!preg_match($regex, $value)) { // ItemId regex didn't match this schema value continue; } // Add the new reference! $newReference = DataModelUtils::getReferenceForUrl($sourceUrl); try { $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT)); //NOTE: keep our in memory item copy up to date (yay such reference passing) $statement->addNewReference($newReference->getSnaks()); $referenceCounter++; } catch (UsageException $e) { //Ignore } } } } return $referenceCounter; }
public function addReferences(MicroData $microData, $item, $sourceUrl) { $referenceCounter = 0; foreach ($this->propMap as $propertyIdString => $schemaPropertyString) { /** @var TimeValue[] $timeValues */ $timeValues = array(); foreach ($microData->getProperty($schemaPropertyString, MicroData::PROP_STRING) as $propertyValue) { try { $date = new DateTime(trim($propertyValue)); $timeValues[] = $this->timeParser->parse($date->format('Y m d')); } catch (Exception $e) { // Ignore failed parsing } } $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString)); foreach ($timeValues as $timeValue) { foreach ($statements->getIterator() as &$statement) { $mainSnak = $statement->getMainSnak(); if (!$mainSnak instanceof PropertyValueSnak) { continue; // Ignore some and no value statements } if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) { continue; // Ignore statements that already have this URL domain as a ref } if (!$timeValue->equals($mainSnak->getDataValue())) { continue; } // Add the new reference! $newReference = DataModelUtils::getReferenceForUrl($sourceUrl); try { $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT)); //NOTE: keep our in memory item copy up to date (yay such reference passing) $statement->addNewReference($newReference->getSnaks()); $referenceCounter++; } catch (UsageException $e) { //Ignore } } } } return $referenceCounter; }
public function addReferences(MicroData $microData, $item, $sourceUrl) { // Only cache entity lookup stuff per item we are adding references for! // (but can be used for multiple sourceURLs!! if (!$item->getId()->equals($this->lastEntityId)) { $this->inMemoryEntityLookup = new InMemoryEntityLookup(); } $referenceCounter = 0; foreach ($this->callbackMap as $propertyIdString => $valueGetterFunction) { $values = $valueGetterFunction($microData); $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString)); foreach ($values as $value) { foreach ($statements->getIterator() as &$statement) { $mainSnak = $statement->getMainSnak(); if (!$mainSnak instanceof PropertyValueSnak) { continue; // Ignore some and no value statements } /** @var EntityIdValue $valueEntityIdValue */ $valueEntityIdValue = $mainSnak->getDataValue(); /** @var ItemId $valueItemId */ $valueItemId = $valueEntityIdValue->getEntityId(); if ($this->inMemoryEntityLookup->hasEntity($valueItemId)) { $valueItem = $this->inMemoryEntityLookup->getEntity($valueItemId); } else { $valueItem = $this->wikibaseFactory->newItemLookup()->getItemForId($valueItemId); $this->inMemoryEntityLookup->addEntity($valueItem); } if (!in_array(strtolower($value), DataModelUtils::getMainTermsAsLowerCaseStrings($valueItem->getFingerprint()))) { continue; // Ignore things that don't appear to have the correct value } if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) { continue; // Ignore statements that already have this URL domain as a ref } // Add the new reference! $newReference = DataModelUtils::getReferenceForUrl($sourceUrl); try { $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT)); //NOTE: keep our in memory item copy up to date (yay such reference passing) $statement->addNewReference($newReference->getSnaks()); $referenceCounter++; } catch (UsageException $e) { //Ignore } } } } return $referenceCounter; }
protected function execute(InputInterface $input, OutputInterface $output) { // Get options $user = $input->getOption('user'); $userDetails = $this->appConfig->offsetGet('users.' . $user); if ($userDetails === null) { throw new RuntimeException('User not found in config'); } $items = $input->getOption('item'); if (empty($items)) { $output->writeln('Running SPARQL query to find items to check'); $queryBuilder = new QueryBuilder(array('prov' => 'http://www.w3.org/ns/prov#', 'wd' => 'http://www.wikidata.org/entity/', 'wikibase' => 'http://wikiba.se/ontology#', 'prv' => 'http://www.wikidata.org/prop/reference/value/')); $itemIds = $this->sparqlQueryRunner->getItemIdsFromQuery($queryBuilder->select('?item')->where('?ref', 'prv:P813', '?value')->also('?value', 'wikibase:timeCalendarModel', 'wd:Q1985786')->also('?st', 'prov:wasDerivedFrom', '?ref')->also('?item', '?pred', '?st')->limit(10000)->__toString()); } else { /** @var ItemId[] $itemIds */ $itemIds = array(); foreach (array_unique($items) as $itemIdString) { $itemIds[] = new ItemId($itemIdString); } } $itemIds = array_unique($itemIds); $output->writeln('Running for ' . count($itemIds) . ' items'); // Log in to Wikidata $loggedIn = $this->wikibaseApi->login(new ApiUser($userDetails['username'], $userDetails['password'])); if (!$loggedIn) { $output->writeln('Failed to log in to wikidata wiki'); return -1; } $itemLookup = $this->wikibaseFactory->newItemLookup(); foreach ($itemIds as $itemId) { $output->write($itemId->getSerialization() . ' '); $item = $itemLookup->getItemForId($itemId); foreach ($item->getStatements()->getIterator() as $statement) { foreach ($statement->getReferences() as $reference) { /** @var Reference $reference */ foreach ($reference->getSnaks()->getIterator() as $snak) { if ($snak instanceof PropertyValueSnak) { if ($snak->getPropertyId()->getSerialization() == 'P813') { /** @var TimeValue $dataValue */ $dataValue = $snak->getDataValue(); // We can assume ALL retrieval dates should be Gregorian! if ($dataValue->getCalendarModel() === TimeValue::CALENDAR_JULIAN) { $oldRefHash = $reference->getHash(); $statementGuid = $statement->getGuid(); $snakList = $reference->getSnaks(); $snakList = new SnakList($snakList->getArrayCopy()); $snakList->removeSnak($snak); $fixedTimestamp = $this->getFixedTimestamp($dataValue->getTime()); if ($fixedTimestamp) { $snakList->addSnak(new PropertyValueSnak(new PropertyId('P813'), new TimeValue($fixedTimestamp, $dataValue->getTimezone(), $dataValue->getBefore(), $dataValue->getAfter(), $dataValue->getPrecision(), TimeValue::CALENDAR_GREGORIAN))); $editSummary = 'Fix reference retrieval date'; $output->write('.'); } else { //TODO optionally remove rather than always doing so? $editSummary = 'Removing bad reference retrieval date'; $output->write('x'); } try { $this->wikibaseFactory->newReferenceSetter()->set(new Reference($snakList), $statementGuid, $oldRefHash, new EditInfo($editSummary)); } catch (UsageException $e) { $output->writeln(''); $output->write($e->getMessage()); } } } } } } } $output->writeln(''); } return 0; }