/** * @return \Sengin\Extraction\Result * @throws Exception\Exception */ public function extract() { $data = $this->_dataSource->getData(); $document = $this->getDocument(); /* * If disable libxml errors is set to true then we see no more errors like that: * Warning: DOMDocument::loadHTML(): htmlParseEntityRef: expecting ';' in Entity */ $previos = libxml_use_internal_errors($this->getDisableLibXmlErrors()); $isLoaded = $document->loadHTML($data); libxml_use_internal_errors($previos); if (!$isLoaded) { $message = "Can't load html data from given source"; throw new Exception\Exception($message); } $result = new Result(); $this->extractSearchResults($result); $this->extractSponsoredResults($result); $this->extractSuggstions($result); return $result; }
public function getData() { if (!$this->_options->isCacheEnabled() || !$this->_dataSource instanceof Cachable) { return $this->_dataSource->getData(); } $cacheDir = $this->_options->getCacheDir(); $cacheKey = $this->_dataSource->getCacheKey(); $cacheFile = $cacheDir . '/' . $cacheKey . '.sengincache'; if (is_file($cacheFile)) { $expirationTime = $this->_options->getExpirationTime(); $lastModificationTime = time() - filemtime($cacheFile); if ($lastModificationTime <= $expirationTime) { // from cache return file_get_contents($cacheFile); } // cache expired, remove cache data unlink($cacheFile); } $data = $this->_dataSource->getData(); // save cache data file_put_contents($cacheFile, $data); return $data; }