public function processContent() { $body = $this->response->getBody(); $domDocument = new Query($body); $limit = 5; foreach ($domDocument->execute('ul.prod_grd.three_per_row.prod_lst_square_ic li a.lst_a') as $key) { if (--$limit <= 0) { //break; } $href = $key->getAttribute('href'); if (!parse_url($href, PHP_URL_HOST)) { $urlHost = parse_url($this->url, PHP_URL_HOST); $urlScheme = parse_url($this->url, PHP_URL_SCHEME) . '://'; $href = $urlScheme . $urlHost . $href; } $task = new \Net_Gearman_Task('parse', array('arg' => array('url' => $href, 'file' => $this->fileName)), null, \Net_Gearman_Task::JOB_NORMAL); $task->attachCallback($this->getTaskFinishedCallback()); $this->set->addTask($task); } if ($this->set->count() > 0) { $filename = $this->config->getSystemConfig()->getParseDir() . $this->fileName; if (file_exists($filename)) { rename($filename, $this->config->getSystemConfig()->getParseDir() . basename($this->fileName, '.csv') . '_toupdate.csv'); } $this->set->attachCallback($this->getJobFinishedCallback()); Logger::addMessage('Jobs pushed to server: ' . $this->set->count()); $this->jobTotal = $this->set->count(); $this->client->runSet($this->set); } }
public function processContent() { $body = $this->response->getBody(); $domDocument = new Query($body); $this->content = array(); foreach ($this->getFields() as $fieldName => $field) { if (isset($field['selector'])) { foreach ($domDocument->execute($field['selector']) as $result) { if (property_exists($result, $field['getter']) && !isset($field['attr'])) { $this->content[$fieldName][] = $result->{$field}['getter']; } if (method_exists($result, $field['getter']) && isset($field['attr'])) { $this->content[$fieldName][] = $result->{$field['getter']}($field['attr']); } } } if (isset($field['callable']) && method_exists($this, $field['callable'])) { $this->{$field}['callable']($fieldName, $domDocument); } if (isset($field['required']) && !isset($this->content[$fieldName])) { throw new ExpectedParamException('Cant get required property ' . $fieldName . ' with ' . $field['selector'] . ' at ' . $this->url); } if (isset($this->content[$fieldName])) { $this->content[$fieldName] = implode(',', str_replace(',', '[comma]', $this->content[$fieldName])); } else { $this->content[$fieldName] = ''; } } var_dump($this->content); $this->content['parse_url'] = $this->url; CsvFile::putInFile(array($this->content), $this->fileName); Logger::addMessage('Successful! Remove from Queue!'); }
protected function execute(InputInterface $input, OutputInterface $output) { $url = $input->getArgument('urls'); $fileName = $input->getArgument('file'); $returnCode = 0; // Default retry job if ($url) { try { Logger::addMessage('Running parse: ' . $url); $route = $this->registry->router->match($url); $route->url = $url; $route = $this->processRoute($route); $controller = new $route->class(null); if (!$input->getOption('compare')) { $this->registry->eventDispatcher->addListener('parse.finished', array(new ParseListener(), 'compare'), 10); } $this->registry->eventDispatcher->addListener('parse.jobFinished', array(new ParseListener(), 'jobFinish'), 0); if (!$input->getOption('mail')) { $this->registry->eventDispatcher->addListener('parse.finished', array(new ParseListener(), 'mail'), 0); } $controller->{$route->method}($route, $fileName); $returnCode = 1; } catch (ExpectedParamException $e) { Logger::addMessage('Error: ' . $e->getMessage()); $returnCode = 2; // Put into unparsed } catch (\Exception $e) { Logger::addMessage('Error: ' . $e->getMessage()); } } FileLogger::save(); return $returnCode; }
public function getZendResponse($url) { $this->setUri($url); $proxy = $this->registry->config->getRandProxy(); Logger::addMessage('Proxy: ' . $proxy->getHost()); Logger::addMessage('URL: ' . $url); $this->setOptions(array('proxy_host' => $proxy->getHost(), 'proxy_port' => $proxy->getPort(), 'useragent' => 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36', 'timeout' => 10, 'adapter' => $this->registry->config->getSystemConfig()->getZendClientAdapter(), 'ssltransport' => 'tls')); return $this->send(); }
protected function processRoute(Route $route, $path) { Logger::addMessage($path); $route = clone $route; $route->method = lcfirst($route->method) . 'Action'; $route->class = $path . '\\ContentHandler\\' . ucfirst($route->class) . 'Handler'; if (!class_exists($route->class)) { throw new ExpectedParamException("No {$route->class}"); } return $route; }
protected function execute(InputInterface $input, OutputInterface $output) { $limit = $input->getArgument('limit'); if ($limit > 1) { for (; $limit > 0; $limit--) { BackgroundProcess::open('php index.php worker:parse 1'); } } else { if ($limit == 1) { try { $worker = new \Net_Gearman_Worker($this->config->getGearmanHost()); $worker->addAbility('parse'); $worker->beginWork(); } catch (\Net_Gearman_Exception $e) { Logger::addMessage($e->getMessage()); BackgroundProcess::open('php index.php worker:parse 1'); } } } FileLogger::save(); }
public function parseAction(Route $params, $file = null) { if (!$file) { $file = basename($params->params, '.html') . '.csv'; } $route = $this->router->match($params->params); if ($route->class != 'error') { // Get correct route $route = $this->processRoute($route, $params->path); // Add current parse url $route->url = $params->url; /** @var BaseHandler $handler */ Logger::addMessage($route->url); $handler = new $route->class($route->url, $file); // Load content of page $handler->loadContent(); // Parse content of page $handler->processContent(); } else { throw new ExpectedParamException('Cannot route url'); } }
private function log() { Logger::addMessage($this->getMessage() . ". IN " . $this->getFile() . " AT LINE: " . $this->getLine() . PHP_EOL); }