/** * This method needs to be changed to allow the script to know how to create more references. * * @param WikibaseFactory $wikibaseFactory * @param SparqlQueryRunner $sparqlQueryRunner * * @return array like array( 'Schema.org_type' => Referencer[] ) */ public function getReferencerMap(WikibaseFactory $wikibaseFactory, SparqlQueryRunner $sparqlQueryRunner) { return array('Book' => array(new ThingReferencer($wikibaseFactory, array('P50' => 'author', 'P110' => 'illustrator', 'P123' => 'publisher', 'P136' => 'genre')), new DateReferencer($wikibaseFactory, array('P577' => 'datePublished'))), 'Person' => array(new ThingReferencer($wikibaseFactory, array('P7' => 'sibling', 'P9' => 'sibling', 'P19' => 'birthPlace', 'P20' => 'deathPlace', 'P21' => 'gender', 'P22' => 'parent', 'P25' => 'parent', 'P26' => 'spouse', 'P40' => 'children', 'P27' => 'nationality', 'P734' => 'familyName', 'P735' => 'givenName')), new DateReferencer($wikibaseFactory, array('P569' => 'birthDate', 'P570' => 'deathDate'))), 'Movie' => array(new ThingReferencer($wikibaseFactory, array('P57' => 'director', 'P161' => 'actor', 'P162' => 'producer', 'P1040' => 'editor', 'P58' => 'author', 'P272' => array('creator', 'productionCompany'), 'P364' => 'inLanguage', 'P674' => 'character', 'P840' => 'contentLocation', 'P166' => 'award', 'P1657' => 'contentRating', 'P2360' => 'audience')), new MultiTextReferencer($wikibaseFactory, array('P136' => 'genre'), array('P136' => function () use($sparqlQueryRunner) { $filmGenreData = $sparqlQueryRunner->getItemIdStringsAndLabelsFromInstanceOf('Q201658'); $filmGenreRegexMap = array(); foreach ($filmGenreData as $itemIdString => $label) { if (preg_match('/ films?/i', $label)) { $regex = '/^' . preg_replace('/ films?/i', '( film)?', $label) . '$/i'; } else { $regex = '/^' . $label . '( film)?' . '$/i'; } $regex = preg_replace('/science ?fiction/i', '(science ?fiction|sci-fi)', $regex); $filmGenreRegexMap[$itemIdString] = $regex; } return $filmGenreRegexMap; })), new DateReferencer($wikibaseFactory, array('P577' => 'datePublished')))); }
protected function execute(InputInterface $input, OutputInterface $output) { $this->initServices(); if (is_string($input->getOption('tmpDir'))) { $this->tmpDir = $input->getOption('tmpDir'); } else { $this->tmpDir = sys_get_temp_dir(); } if (!is_writable($this->tmpDir)) { throw new RuntimeException('Temp dir: ' . $this->tmpDir . ' is not writable'); } /** @var FormatterHelper $formatter */ $formatter = $this->getHelper('formatter'); $output->writeln($formatter->formatBlock(array('Wikidata Referencer', 'This script is in development, If something goes wrong while you use it it is your fault!', 'Temp file: ' . $this->getProcessedListPath()), 'info')); // Get options $user = $input->getOption('user'); $userDetails = $this->appConfig->offsetGet('users.' . $user); if ($userDetails === null) { throw new RuntimeException('User not found in config'); } $sparqlQueryParts = $input->getOption('sparql'); $item = $input->getOption('item'); $force = false; // Get a list of ItemIds if ($item !== null) { $output->writeln($formatter->formatSection('Init', 'Using item passed in item parameter')); $itemIds = array(new ItemId($item)); // Force if explicitly passed an ItemId $force = true; } elseif (!empty($sparqlQueryParts)) { $output->writeln($formatter->formatSection('Init', 'Using items from SPARQL QUERY (running)')); $itemIds = $this->sparqlQueryRunner->getItemIdsForSimpleQueryParts($sparqlQueryParts); } else { throw new RuntimeException('You must pass an instance id or an item'); } shuffle($itemIds); $output->writeln($formatter->formatSection('Init', 'Got ' . count($itemIds) . ' items to investigate')); // Log in to Wikidata $loggedIn = $this->wikibaseApi->login(new ApiUser($userDetails['username'], $userDetails['password'])); if (!$loggedIn) { throw new RuntimeException('Failed to log in to wikibase wiki'); } $this->executeForItemIds($output, $itemIds, $force); return 0; }
protected function execute(InputInterface $input, OutputInterface $output) { // Get options $user = $input->getOption('user'); $userDetails = $this->appConfig->offsetGet('users.' . $user); if ($userDetails === null) { throw new RuntimeException('User not found in config'); } $items = $input->getOption('item'); if (empty($items)) { $output->writeln('Running SPARQL query to find items to check'); $queryBuilder = new QueryBuilder(array('prov' => 'http://www.w3.org/ns/prov#', 'wd' => 'http://www.wikidata.org/entity/', 'wikibase' => 'http://wikiba.se/ontology#', 'prv' => 'http://www.wikidata.org/prop/reference/value/')); $itemIds = $this->sparqlQueryRunner->getItemIdsFromQuery($queryBuilder->select('?item')->where('?ref', 'prv:P813', '?value')->also('?value', 'wikibase:timeCalendarModel', 'wd:Q1985786')->also('?st', 'prov:wasDerivedFrom', '?ref')->also('?item', '?pred', '?st')->limit(10000)->__toString()); } else { /** @var ItemId[] $itemIds */ $itemIds = array(); foreach (array_unique($items) as $itemIdString) { $itemIds[] = new ItemId($itemIdString); } } $itemIds = array_unique($itemIds); $output->writeln('Running for ' . count($itemIds) . ' items'); // Log in to Wikidata $loggedIn = $this->wikibaseApi->login(new ApiUser($userDetails['username'], $userDetails['password'])); if (!$loggedIn) { $output->writeln('Failed to log in to wikidata wiki'); return -1; } $itemLookup = $this->wikibaseFactory->newItemLookup(); foreach ($itemIds as $itemId) { $output->write($itemId->getSerialization() . ' '); $item = $itemLookup->getItemForId($itemId); foreach ($item->getStatements()->getIterator() as $statement) { foreach ($statement->getReferences() as $reference) { /** @var Reference $reference */ foreach ($reference->getSnaks()->getIterator() as $snak) { if ($snak instanceof PropertyValueSnak) { if ($snak->getPropertyId()->getSerialization() == 'P813') { /** @var TimeValue $dataValue */ $dataValue = $snak->getDataValue(); // We can assume ALL retrieval dates should be Gregorian! if ($dataValue->getCalendarModel() === TimeValue::CALENDAR_JULIAN) { $oldRefHash = $reference->getHash(); $statementGuid = $statement->getGuid(); $snakList = $reference->getSnaks(); $snakList = new SnakList($snakList->getArrayCopy()); $snakList->removeSnak($snak); $fixedTimestamp = $this->getFixedTimestamp($dataValue->getTime()); if ($fixedTimestamp) { $snakList->addSnak(new PropertyValueSnak(new PropertyId('P813'), new TimeValue($fixedTimestamp, $dataValue->getTimezone(), $dataValue->getBefore(), $dataValue->getAfter(), $dataValue->getPrecision(), TimeValue::CALENDAR_GREGORIAN))); $editSummary = 'Fix reference retrieval date'; $output->write('.'); } else { //TODO optionally remove rather than always doing so? $editSummary = 'Removing bad reference retrieval date'; $output->write('x'); } try { $this->wikibaseFactory->newReferenceSetter()->set(new Reference($snakList), $statementGuid, $oldRefHash, new EditInfo($editSummary)); } catch (UsageException $e) { $output->writeln(''); $output->write($e->getMessage()); } } } } } } } $output->writeln(''); } return 0; }