public function parse(GoogleDom $dom, \DomElement $node, IndexedResultSet $resultSet) { $xPath = $dom->getXpath(); $item = ['items' => function () use($node, $dom) { return []; }]; $resultSet->addItem(new BaseResult(NaturalResultType::VIDEO_GROUP, $item)); }
public function parse(GoogleDom $googleDOM, \DomElement $group, IndexedResultSet $resultSet) { $item = ['news' => []]; $xpathCards = "div/div[contains(concat(' ',normalize-space(@class),' '),' card-section ')]"; $cardNodes = $googleDOM->getXpath()->query($xpathCards, $group); foreach ($cardNodes as $cardNode) { $item['news'][] = $this->parseItem($googleDOM, $cardNode); } $resultSet->addItem(new BaseResult(NaturalResultType::IN_THE_NEWS, $item)); }
protected function parseNode(GoogleDom $dom, \DomElement $node) { // find the tilte/url /* @var $aTag \DOMElement */ $aTag = $dom->xpathQuery("descendant::h3[@class='r'][1]/a", $node)->item(0); if (!$aTag) { return; } $destinationTag = $dom->cssQuery('div.f.kv>cite', $node)->item(0); $descriptionTag = $dom->xpathQuery("descendant::span[@class='st']", $node)->item(0); return ['title' => $aTag->nodeValue, 'url' => $dom->getUrl()->resolveAsString($aTag->getAttribute('href')), 'destination' => $destinationTag ? $destinationTag->nodeValue : null, 'description' => $descriptionTag ? trim($descriptionTag->nodeValue) : null]; }
public function parse(GoogleDom $dom, \DomElement $node, IndexedResultSet $resultSet) { $xpath = $dom->getXpath(); /* @var $aTag \DOMElement */ $aTag = $xpath->query("descendant::h3[@class='r'][1]//a", $node)->item(0); if ($aTag) { $title = $aTag->nodeValue; preg_match('/@([A-Za-z0-9_]{1,15})/', $title, $match); $data = ['title' => $title, 'url' => $aTag->getAttribute('href'), 'user' => $match[0]]; $item = new BaseResult(NaturalResultType::TWEETS_CAROUSEL, $data); $resultSet->addItem($item); } }
public function parseItem(GoogleDom $googleDOM, \DOMNode $node) { return new BaseResult(AdwordsResultType::SHOPPING_GROUP_PRODUCT, ['title' => function () use($googleDOM, $node) { $aTag = $googleDOM->getXpath()->query(Css::toXPath('.pla-unit-title-link'), $node)->item(0); if (!$aTag) { return null; } return $aTag->nodeValue; }, 'url' => function () use($node, $googleDOM) { $aTag = $googleDOM->getXpath()->query(Css::toXPath('.pla-unit-title-link'), $node)->item(0); if (!$aTag) { return $googleDOM->getUrl()->resolve('/'); } return $googleDOM->getUrl()->resolveAsString($aTag->getAttribute('href')); }, 'image' => function () use($node, $googleDOM) { $imgTag = $googleDOM->getXpath()->query(Css::toXPath('.pla-unit-img-container-link img'), $node)->item(0); if (!$imgTag) { return null; } return $imgTag->getAttribute('src'); }, 'target' => function () use($node, $googleDOM) { $aTag = $googleDOM->getXpath()->query(Css::toXPath('div._mC span.a'), $node)->item(0); if (!$aTag) { return null; } return $aTag->nodeValue; }, 'price' => function () use($node, $googleDOM) { $priceTag = $googleDOM->getXpath()->query(Css::toXPath('._QD._pvi'), $node)->item(0); if (!$priceTag) { return null; } return $priceTag->nodeValue; }]); }
public function parse(GoogleDom $googleDOM, \DomElement $node, IndexedResultSet $resultSet) { $item = ['title' => function () use($googleDOM, $node) { $aTag = $googleDOM->getXpath()->query('descendant::h3/a[2]', $node)->item(0); if (!$aTag) { return null; } return $aTag->nodeValue; }, 'url' => function () use($node, $googleDOM) { $aTag = $googleDOM->getXpath()->query('descendant::h3/a[2]', $node)->item(0); if (!$aTag) { return $googleDOM->getUrl()->resolve('/'); } return $googleDOM->getUrl()->resolveAsString($aTag->getAttribute('href')); }, 'visurl' => function () use($node, $googleDOM) { $aTag = $googleDOM->getXpath()->query(Css::toXPath('div.ads-visurl>cite'), $node)->item(0); if (!$aTag) { return null; } return $aTag->nodeValue; }, 'description' => function () use($node, $googleDOM) { $aTag = $googleDOM->getXpath()->query(Css::toXPath('div.ads-creative'), $node)->item(0); if (!$aTag) { return null; } return $aTag->nodeValue; }]; $resultSet->addItem(new BaseResult(AdwordsResultType::AD, $item)); }
public function parse(GoogleDom $dom, \DomElement $node, IndexedResultSet $resultSet) { $xpath = $dom->getXpath(); $aTag = $xpath->query("descendant::h3[@class='r'][1]/a", $node)->item(0); if (!$aTag) { return false; } $destinationTag = $xpath->query("descendant::div[@class='f kv _SWb']/cite", $node)->item(0); $data = ['title' => $aTag->nodeValue, 'url' => $dom->getUrl()->resolveAsString($aTag->getAttribute('href')), 'destination' => $destinationTag ? $destinationTag->nodeValue : null, 'description' => null, 'videoLarge' => true, 'thumb' => null, 'videoCover' => function () use($dom, $node) { $imageTag = $dom->cssQuery('._ELb img', $node)->item(0); if ($imageTag) { return MediaFactory::createMediaFromSrc($imageTag->getAttribute('src')); // TODO 1p gif ? } else { return null; } }]; $resultSet->addItem(new BaseResult([NaturalResultType::CLASSICAL_VIDEO, NaturalResultType::CLASSICAL], $data)); }
protected function parseNode(GoogleDom $dom, \DOMElement $node) { return ['title' => function () use($dom, $node) { $aTag = $dom->cssQuery('.rc .r a', $node)->item(0); if (!$aTag) { // TODO ERROR return; } return $aTag->nodeValue; }, 'url' => function () use($dom, $node) { $aTag = $dom->cssQuery('.rc .r a', $node)->item(0); if (!$aTag) { // TODO ERROR return; } return $dom->getUrl()->resolveAsString($aTag->getAttribute('href')); }, 'destination' => function () use($dom, $node) { $citeTag = $dom->cssQuery('.rc .s cite', $node)->item(0); if (!$citeTag) { // TODO ERROR return; } return $citeTag->nodeValue; }, 'description' => function () use($dom, $node) { $citeTag = $dom->cssQuery('.mod ._Tgc', $node)->item(0); if (!$citeTag) { // TODO ERROR return; } return $citeTag->nodeValue; }]; }
private function parseItem($localPack, GoogleDom $dom) { return ['title' => function () use($localPack, $dom) { $item = $dom->cssQuery('._rl', $localPack)->item(0); if ($item) { return $item->nodeValue; } return null; }, 'url' => function () use($localPack, $dom) { $item = $dom->getXpath()->query('descendant::a', $localPack)->item(1); if ($item) { return $item->getAttribute('href'); } return null; }, 'street' => function () use($localPack, $dom) { $item = $dom->cssQuery('._iPk>span.rllt__details>div:nth-child(3)>span', $localPack)->item(0); if ($item) { return $item->nodeValue; } return null; }, 'stars' => function () use($localPack, $dom) { $item = $dom->cssQuery('._PXi', $localPack)->item(0); if ($item) { return $item->nodeValue; } return null; }, 'review' => function () use($localPack, $dom) { $item = $dom->cssQuery('._iPk>span.rllt__details>div:nth-child(1)', $localPack)->item(0); if ($item) { if ($item->childNodes->length > 0 && !$item->childNodes->item(0) instanceof \DOMText) { return null; } else { return trim(explode('·', $item->nodeValue)[0]); } } return null; }, 'phone' => function () use($localPack, $dom) { $item = $dom->cssQuery('._iPk>span.rllt__details>div:nth-child(3)', $localPack)->item(0); if ($item) { if ($item->childNodes->length > 1 && $item->childNodes->item(1) instanceof \DOMText) { return trim($item->childNodes->item(1)->nodeValue, ' ·'); } } return null; }]; }
public function parse(GoogleDom $googleDOM, \DomElement $node, IndexedResultSet $resultSet) { $item = ['images' => [], 'moreUrl' => function () use($node, $googleDOM) { $aTag = $googleDOM->getXpath()->query('descendant::div[@class="_Icb _kk _wI"]/a', $node)->item(0); if (!$aTag) { return $googleDOM->getUrl()->resolve('/'); } return $googleDOM->getUrl()->resolveAsString($aTag->getAttribute('href')); }]; // TODO: detect no image (google dom update) $imageNodes = $googleDOM->cssQuery('.rg_ul>div._ZGc a', $node); foreach ($imageNodes as $imgNode) { $item['images'][] = $this->parseItem($googleDOM, $imgNode); } $resultSet->addItem(new BaseResult(NaturalResultType::IMAGE_GROUP, $item)); }
/** * @inheritdoc */ protected function getParsableItems(GoogleDom $googleDom) { $xpathObject = $googleDom->getXpath(); $xpathElementGroups = "//div[@id = 'ires']/*[@id = 'rso']/*"; return $xpathObject->query($xpathElementGroups); }
/** * @inheritdoc */ protected function getParsableItems(GoogleDom $googleDom) { $xpathObject = $googleDom->getXpath(); $xpathElementGroups = $this->pathToItems; return $xpathObject->query($xpathElementGroups); }
protected function createResultSet(GoogleDom $googleDom) { $startingAt = (int) $googleDom->getUrl()->getParamValue('start', 0); return new IndexedResultSet($startingAt + 1); }