public function addReferences(MicroData $microData, $item, $sourceUrl)
 {
     $referenceCounter = 0;
     foreach ($this->propMap as $propertyIdString => $schemaPropertyString) {
         $regexMap = $this->regexMap[$propertyIdString];
         $values = array();
         foreach ($microData->getProperty($schemaPropertyString, MicroData::PROP_STRING) as $propertyValue) {
             // Don't match URLS!
             if (strstr($propertyValue, '//')) {
                 continue;
             }
             $values[] = $propertyValue;
         }
         $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString));
         foreach ($values as $value) {
             foreach ($statements->getIterator() as &$statement) {
                 $mainSnak = $statement->getMainSnak();
                 if (!$mainSnak instanceof PropertyValueSnak) {
                     continue;
                     // Ignore some and no value statements
                 }
                 if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) {
                     continue;
                     // Ignore statements that already have this URL domain as a ref
                 }
                 /** @var EntityIdValue $valueEntityIdValue */
                 $valueEntityIdValue = $mainSnak->getDataValue();
                 /** @var EntityId $valueEntityId */
                 $valueEntityId = $valueEntityIdValue->getEntityId();
                 $valueEntityIdString = $valueEntityId->getSerialization();
                 if (!array_key_exists($valueEntityIdString, $regexMap)) {
                     //TODO log that this ItemId is missing?
                     continue;
                 }
                 $regex = $regexMap[$valueEntityIdString];
                 if (!preg_match($regex, $value)) {
                     // ItemId regex didn't match this schema value
                     continue;
                 }
                 // Add the new reference!
                 $newReference = DataModelUtils::getReferenceForUrl($sourceUrl);
                 try {
                     $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT));
                     //NOTE: keep our in memory item copy up to date (yay such reference passing)
                     $statement->addNewReference($newReference->getSnaks());
                     $referenceCounter++;
                 } catch (UsageException $e) {
                     //Ignore
                 }
             }
         }
     }
     return $referenceCounter;
 }
 public function addReferences(MicroData $microData, $item, $sourceUrl)
 {
     $referenceCounter = 0;
     foreach ($this->propMap as $propertyIdString => $schemaPropertyString) {
         /** @var TimeValue[] $timeValues */
         $timeValues = array();
         foreach ($microData->getProperty($schemaPropertyString, MicroData::PROP_STRING) as $propertyValue) {
             try {
                 $date = new DateTime(trim($propertyValue));
                 $timeValues[] = $this->timeParser->parse($date->format('Y m d'));
             } catch (Exception $e) {
                 // Ignore failed parsing
             }
         }
         $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString));
         foreach ($timeValues as $timeValue) {
             foreach ($statements->getIterator() as &$statement) {
                 $mainSnak = $statement->getMainSnak();
                 if (!$mainSnak instanceof PropertyValueSnak) {
                     continue;
                     // Ignore some and no value statements
                 }
                 if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) {
                     continue;
                     // Ignore statements that already have this URL domain as a ref
                 }
                 if (!$timeValue->equals($mainSnak->getDataValue())) {
                     continue;
                 }
                 // Add the new reference!
                 $newReference = DataModelUtils::getReferenceForUrl($sourceUrl);
                 try {
                     $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT));
                     //NOTE: keep our in memory item copy up to date (yay such reference passing)
                     $statement->addNewReference($newReference->getSnaks());
                     $referenceCounter++;
                 } catch (UsageException $e) {
                     //Ignore
                 }
             }
         }
     }
     return $referenceCounter;
 }
 /**
  * @param OutputInterface $output
  * @param ItemId[] $itemIds
  * @param bool $force
  */
 private function executeForItemIds(OutputInterface $output, array $itemIds, $force)
 {
     $itemLookup = $this->wikibaseFactory->newItemLookup();
     $processedItemIdStrings = $this->getProcessedItemIdStrings();
     $loopCounter = 0;
     /** @var FormatterHelper $formatter */
     $formatter = $this->getHelper('formatter');
     foreach ($itemIds as $itemId) {
         $loopCounter++;
         $itemIdString = $itemId->getSerialization();
         $output->writeln('----------------------------------------------------');
         if ($loopCounter % 10 != 0) {
             $processedItemIdStrings = $this->getProcessedItemIdStrings();
         }
         if (!$force && in_array($itemId->getSerialization(), $processedItemIdStrings)) {
             $output->writeln($formatter->formatSection($itemIdString, 'Already processed'));
             continue;
         }
         try {
             $output->writeln($formatter->formatSection($itemIdString, 'Loading Item'));
             $item = $itemLookup->getItemForId($itemId);
         } catch (ItemLookupException $e) {
             $output->writeln($formatter->formatSection($itemIdString, 'Failed to load item (exception)', 'error'));
             continue;
         }
         if ($item === null) {
             $output->writeln($formatter->formatSection($itemIdString, 'Failed to load item (null)', 'error'));
             continue;
         }
         // Get the item types..
         $types = array();
         foreach ($item->getStatements()->getByPropertyId(new PropertyId('P31'))->toArray() as $instanceStatement) {
             $mainSnak = $instanceStatement->getMainSnak();
             if ($mainSnak instanceof PropertyValueSnak) {
                 /** @var EntityIdValue $instanceItemIdValue */
                 $instanceItemIdValue = $mainSnak->getDataValue();
                 $idSerialization = $instanceItemIdValue->getEntityId()->getSerialization();
                 if (array_key_exists($idSerialization, $this->instanceMap)) {
                     $types[] = $this->instanceMap[$idSerialization];
                 }
             }
         }
         if (empty($types)) {
             $output->writeln($formatter->formatSection($itemIdString, 'Didn\\t find any useful instance of statements', 'comment'));
             continue;
         }
         // Note: only load Wikipedias
         $siteLinkList = DataModelUtils::getSitelinksWiteSiteIdSuffix($item->getSiteLinkList(), 'wiki');
         $output->writeln($formatter->formatSection($itemIdString, $siteLinkList->count() . ' Wikipedia pages to request'));
         $parseProgressBar = new ProgressBar($output, $siteLinkList->count());
         $parseProgressBar->display();
         /** @var PromiseInterface[] $parsePromises */
         $parsePromises = array();
         foreach ($siteLinkList->getIterator() as $siteLink) {
             $siteId = $siteLink->getSiteId();
             $pageName = $item->getSiteLinkList()->getBySiteId($siteId)->getPageName();
             $sourceMwFactory = $this->wmFactoryFactory->getFactory($siteId);
             $sourceParser = $sourceMwFactory->newParser();
             $pageIdentifier = new PageIdentifier(new Title($pageName));
             $parsePromises[$siteId] = $sourceParser->parsePageAsync($pageIdentifier);
             $parseProgressBar->advance();
         }
         $links = array();
         foreach ($parsePromises as $siteId => $promise) {
             try {
                 $parseResult = $promise->wait();
                 if (array_key_exists('externallinks', $parseResult)) {
                     foreach ($parseResult['externallinks'] as $externalLink) {
                         // Ignore archive.org links
                         if (strstr($externalLink, 'archive.org') === false) {
                             $links[] = $this->normalizeExternalLink($externalLink);
                         }
                     }
                 }
             } catch (Exception $e) {
                 $parseProgressBar->clear();
                 $output->writeln($formatter->formatSection($itemIdString, $e->getMessage(), 'error'));
                 $parseProgressBar->display();
                 // Ignore failed requests
             }
         }
         $parseProgressBar->finish();
         $output->writeln('');
         $links = array_unique($links);
         shuffle($links);
         /** @var Request[] $linkRequests */
         $linkRequests = array();
         foreach ($links as $link) {
             $linkRequests[] = new Request('GET', $link, array('allow_redirects' => array('track_redirects' => true), 'connect_timeout' => 3.14, 'timeout' => 10));
         }
         $output->writeln($formatter->formatSection($itemIdString, count($linkRequests) . ' External links to (download, action)'));
         if (empty($linkRequests)) {
             continue;
         }
         // Make a bunch of requests and act on the responses
         $referencesAddedToItem = 0;
         $externalLinkProgressBar = new ProgressBar($output, count($linkRequests) * 2);
         $externalLinkProgressBar->display();
         $pool = new Pool($this->externalLinkClient, $linkRequests, array('fulfilled' => function ($response) use($externalLinkProgressBar, $item, $types, $referencesAddedToItem, $output) {
             $externalLinkProgressBar->advance();
             // 1st advance point
             if ($response instanceof ResponseInterface) {
                 $link = $response->getHeaderLine('X-GUZZLE-EFFECTIVE-URL');
                 $html = $response->getBody();
                 $referencesAddedFromLink = 0;
                 foreach ($this->microDataExtractor->extract($html) as $microData) {
                     foreach ($types as $type) {
                         if ($microData->hasType($type) && array_key_exists($type, $this->referencerMap)) {
                             foreach ($this->referencerMap[$type] as $referencer) {
                                 /** @var Referencer $referencer */
                                 $addedReferences = $referencer->addReferences($microData, $item, $link);
                                 $referencesAddedToItem += $addedReferences;
                                 $referencesAddedFromLink += $addedReferences;
                             }
                         }
                     }
                 }
                 if ($referencesAddedFromLink > 0) {
                     $externalLinkProgressBar->clear();
                     $output->write("\r");
                     $output->writeln($referencesAddedFromLink . ' reference(s) added from ' . urldecode($link));
                     $externalLinkProgressBar->display();
                 }
             }
             $externalLinkProgressBar->advance();
             // 2nd advance point
         }, 'rejected' => function () use($externalLinkProgressBar) {
             // TODO add this to some kind of verbose log?
             $externalLinkProgressBar->advance();
             // 1st advance point
         }));
         $pool->promise()->wait();
         $externalLinkProgressBar->finish();
         $output->writeln('');
         $output->writeln($formatter->formatSection($itemIdString, $referencesAddedToItem . ' References added'));
         $this->markIdAsProcessed($itemId);
     }
 }
 /**
  * @param string $url
  *
  * @return Reference
  */
 public static function getReferenceForUrl($url)
 {
     return new Reference(array(new PropertyValueSnak(new PropertyId('P854'), new StringValue($url)), new PropertyValueSnak(new PropertyId('P813'), DataModelUtils::getCurrentTimeValue())));
 }
 public function addReferences(MicroData $microData, $item, $sourceUrl)
 {
     // Only cache entity lookup stuff per item we are adding references for!
     // (but can be used for multiple sourceURLs!!
     if (!$item->getId()->equals($this->lastEntityId)) {
         $this->inMemoryEntityLookup = new InMemoryEntityLookup();
     }
     $referenceCounter = 0;
     foreach ($this->callbackMap as $propertyIdString => $valueGetterFunction) {
         $values = $valueGetterFunction($microData);
         $statements = $item->getStatements()->getByPropertyId(new PropertyId($propertyIdString));
         foreach ($values as $value) {
             foreach ($statements->getIterator() as &$statement) {
                 $mainSnak = $statement->getMainSnak();
                 if (!$mainSnak instanceof PropertyValueSnak) {
                     continue;
                     // Ignore some and no value statements
                 }
                 /** @var EntityIdValue $valueEntityIdValue */
                 $valueEntityIdValue = $mainSnak->getDataValue();
                 /** @var ItemId $valueItemId */
                 $valueItemId = $valueEntityIdValue->getEntityId();
                 if ($this->inMemoryEntityLookup->hasEntity($valueItemId)) {
                     $valueItem = $this->inMemoryEntityLookup->getEntity($valueItemId);
                 } else {
                     $valueItem = $this->wikibaseFactory->newItemLookup()->getItemForId($valueItemId);
                     $this->inMemoryEntityLookup->addEntity($valueItem);
                 }
                 if (!in_array(strtolower($value), DataModelUtils::getMainTermsAsLowerCaseStrings($valueItem->getFingerprint()))) {
                     continue;
                     // Ignore things that don't appear to have the correct value
                 }
                 if (DataModelUtils::statementHasReferenceForUrlWithSameDomain($statement, $sourceUrl)) {
                     continue;
                     // Ignore statements that already have this URL domain as a ref
                 }
                 // Add the new reference!
                 $newReference = DataModelUtils::getReferenceForUrl($sourceUrl);
                 try {
                     $this->wikibaseFactory->newReferenceSetter()->set($newReference, $statement, null, new EditInfo(urldecode($sourceUrl), EditInfo::NOTMINOR, EditInfo::BOT));
                     //NOTE: keep our in memory item copy up to date (yay such reference passing)
                     $statement->addNewReference($newReference->getSnaks());
                     $referenceCounter++;
                 } catch (UsageException $e) {
                     //Ignore
                 }
             }
         }
     }
     return $referenceCounter;
 }