示例#1
0
 public function processContent()
 {
     $body = $this->response->getBody();
     $domDocument = new Query($body);
     $limit = 5;
     foreach ($domDocument->execute('ul.prod_grd.three_per_row.prod_lst_square_ic li a.lst_a') as $key) {
         if (--$limit <= 0) {
             //break;
         }
         $href = $key->getAttribute('href');
         if (!parse_url($href, PHP_URL_HOST)) {
             $urlHost = parse_url($this->url, PHP_URL_HOST);
             $urlScheme = parse_url($this->url, PHP_URL_SCHEME) . '://';
             $href = $urlScheme . $urlHost . $href;
         }
         $task = new \Net_Gearman_Task('parse', array('arg' => array('url' => $href, 'file' => $this->fileName)), null, \Net_Gearman_Task::JOB_NORMAL);
         $task->attachCallback($this->getTaskFinishedCallback());
         $this->set->addTask($task);
     }
     if ($this->set->count() > 0) {
         $filename = $this->config->getSystemConfig()->getParseDir() . $this->fileName;
         if (file_exists($filename)) {
             rename($filename, $this->config->getSystemConfig()->getParseDir() . basename($this->fileName, '.csv') . '_toupdate.csv');
         }
         $this->set->attachCallback($this->getJobFinishedCallback());
         Logger::addMessage('Jobs pushed to server: ' . $this->set->count());
         $this->jobTotal = $this->set->count();
         $this->client->runSet($this->set);
     }
 }
示例#2
0
 public function processContent()
 {
     $body = $this->response->getBody();
     $domDocument = new Query($body);
     $this->content = array();
     foreach ($this->getFields() as $fieldName => $field) {
         if (isset($field['selector'])) {
             foreach ($domDocument->execute($field['selector']) as $result) {
                 if (property_exists($result, $field['getter']) && !isset($field['attr'])) {
                     $this->content[$fieldName][] = $result->{$field}['getter'];
                 }
                 if (method_exists($result, $field['getter']) && isset($field['attr'])) {
                     $this->content[$fieldName][] = $result->{$field['getter']}($field['attr']);
                 }
             }
         }
         if (isset($field['callable']) && method_exists($this, $field['callable'])) {
             $this->{$field}['callable']($fieldName, $domDocument);
         }
         if (isset($field['required']) && !isset($this->content[$fieldName])) {
             throw new ExpectedParamException('Cant get required property ' . $fieldName . ' with ' . $field['selector'] . ' at ' . $this->url);
         }
         if (isset($this->content[$fieldName])) {
             $this->content[$fieldName] = implode(',', str_replace(',', '[comma]', $this->content[$fieldName]));
         } else {
             $this->content[$fieldName] = '';
         }
     }
     var_dump($this->content);
     $this->content['parse_url'] = $this->url;
     CsvFile::putInFile(array($this->content), $this->fileName);
     Logger::addMessage('Successful! Remove from Queue!');
 }
示例#3
0
文件: Parse.php 项目: sudevva/parser2
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $url = $input->getArgument('urls');
     $fileName = $input->getArgument('file');
     $returnCode = 0;
     // Default retry job
     if ($url) {
         try {
             Logger::addMessage('Running parse: ' . $url);
             $route = $this->registry->router->match($url);
             $route->url = $url;
             $route = $this->processRoute($route);
             $controller = new $route->class(null);
             if (!$input->getOption('compare')) {
                 $this->registry->eventDispatcher->addListener('parse.finished', array(new ParseListener(), 'compare'), 10);
             }
             $this->registry->eventDispatcher->addListener('parse.jobFinished', array(new ParseListener(), 'jobFinish'), 0);
             if (!$input->getOption('mail')) {
                 $this->registry->eventDispatcher->addListener('parse.finished', array(new ParseListener(), 'mail'), 0);
             }
             $controller->{$route->method}($route, $fileName);
             $returnCode = 1;
         } catch (ExpectedParamException $e) {
             Logger::addMessage('Error: ' . $e->getMessage());
             $returnCode = 2;
             // Put into unparsed
         } catch (\Exception $e) {
             Logger::addMessage('Error: ' . $e->getMessage());
         }
     }
     FileLogger::save();
     return $returnCode;
 }
示例#4
0
 public function getZendResponse($url)
 {
     $this->setUri($url);
     $proxy = $this->registry->config->getRandProxy();
     Logger::addMessage('Proxy: ' . $proxy->getHost());
     Logger::addMessage('URL: ' . $url);
     $this->setOptions(array('proxy_host' => $proxy->getHost(), 'proxy_port' => $proxy->getPort(), 'useragent' => 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36', 'timeout' => 10, 'adapter' => $this->registry->config->getSystemConfig()->getZendClientAdapter(), 'ssltransport' => 'tls'));
     return $this->send();
 }
示例#5
0
 protected function processRoute(Route $route, $path)
 {
     Logger::addMessage($path);
     $route = clone $route;
     $route->method = lcfirst($route->method) . 'Action';
     $route->class = $path . '\\ContentHandler\\' . ucfirst($route->class) . 'Handler';
     if (!class_exists($route->class)) {
         throw new ExpectedParamException("No {$route->class}");
     }
     return $route;
 }
示例#6
0
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $limit = $input->getArgument('limit');
     if ($limit > 1) {
         for (; $limit > 0; $limit--) {
             BackgroundProcess::open('php index.php worker:parse 1');
         }
     } else {
         if ($limit == 1) {
             try {
                 $worker = new \Net_Gearman_Worker($this->config->getGearmanHost());
                 $worker->addAbility('parse');
                 $worker->beginWork();
             } catch (\Net_Gearman_Exception $e) {
                 Logger::addMessage($e->getMessage());
                 BackgroundProcess::open('php index.php worker:parse 1');
             }
         }
     }
     FileLogger::save();
 }
示例#7
0
 public function parseAction(Route $params, $file = null)
 {
     if (!$file) {
         $file = basename($params->params, '.html') . '.csv';
     }
     $route = $this->router->match($params->params);
     if ($route->class != 'error') {
         // Get correct route
         $route = $this->processRoute($route, $params->path);
         // Add current parse url
         $route->url = $params->url;
         /** @var BaseHandler $handler */
         Logger::addMessage($route->url);
         $handler = new $route->class($route->url, $file);
         // Load content of page
         $handler->loadContent();
         // Parse content of page
         $handler->processContent();
     } else {
         throw new ExpectedParamException('Cannot route url');
     }
 }
示例#8
0
 private function log()
 {
     Logger::addMessage($this->getMessage() . ". IN " . $this->getFile() . " AT LINE: " . $this->getLine() . PHP_EOL);
 }
示例#9
0
文件: index.php 项目: sudevva/parser2
use sys\Logger\FileLogger;
use sys\Logger\Logger;
use sys\Registry;
use sys\Router\Route;
use sys\Router\RouteCollection;
use sys\Router\Router;
use sys\Router\Loader\jsonFileLoader as RouterLoader;
use sys\Config\Loader\jsonFileLoader as ConfigLoader;
$registry = Registry::getInstance();
$configLoader = new ConfigLoader();
$configLoader->setConfigObject(new Config());
$registry->config = $configLoader->load('config/config.json');
$registry->router = new Router();
$registry->eventDispatcher = new EventDispatcher();
$routeLoader = new RouterLoader();
$routeLoader->setRouteCollection(new RouteCollection());
$routeLoader->setRouteClass(new Route());
$registry->router->setRouteCollection($routeLoader->load("config/routes.json"));
try {
    $application = new Application();
    $registry->application = $application;
    $application->add(new Parse());
    $application->add(new Worker());
    $application->add(new Compare());
    $application->run();
} catch (\Exception $e) {
    echo "<pre>";
    echo Logger::toString();
    FileLogger::save();
    echo "</pre>";
}