set_time_limit(600); require_once 'debug.php'; require_once 'const.php'; require_once BASE_DIR . 'classes/KDGLoader.php'; $config = array(); $config['db_user'] = DB_USERNAME; $config['db_password'] = DB_PASSWORD; $config['db_host'] = DB_SERVER; $config['db_name'] = DB_DATABASE; KDGLoader::loadLibraryClass('KDGGeneral'); KDGLoader::loadLibraryClass('KDGSession'); KDGLoader::loadLibraryClass('KDGModel'); KDGLoader::loadLibraryClass('Encoding'); KDGLoader::loadLibraryClass('KDGMemory'); KDGLoader::loadLibraryClass('ISDString'); KDGLoader::loadLibraryClass('Inflector'); KDGLoader::loadLibraryClass('KDGEntity'); KDGLoader::loadLibraryClass('KDGParser'); KDGLoader::loadLibraryClass('KDGCrawler'); KDGLoader::loadLibraryClass('KDGDatabase'); KDGLoader::loadLibraryClass('KDGInflector'); if (isConsole() && isset($argv) && $argv) { foreach ($argv as $val) { if (stristr($val, '=')) { $parts = explode('=', $val); if (isset($parts[0]) && isset($parts[1])) { setRequestParameter($parts[0], $parts[1]); } } } }
<?php KDGLoader::loadEntityClass('Product'); KDGLoader::loadModelClass('ProductModel'); KDGLoader::loadModelClass('CategoryModel'); KDGLoader::loadLibraryClass('parsers/NewhtfProductsListParser'); class NewhtfCollectProductsCrawler extends Crawler { protected function crawl() { $categories = CategoryModel::findAllMain(); foreach ($categories as $category) { $html = $this->requestProductsListPage($category->url); if ($html) { $this->parseProductsList($html, $category); } } } protected function requestProductsListPage($url) { $this->sendBuffered('category: ' . $url); return $this->makeRequest($url, false, false, false); } protected function parseProductsList($html, $category) { $parser = new NewhtfProductsListParser($html); foreach ($parser->products_list as $data) { $entity = ProductModel::findOneByUrl($data['url']); if (!$entity) { $entity = new Product(); }
<?php KDGLoader::loadLibraryClass('KDGModel'); class ProductModel extends KDGModel { public static function getQuery() { return KDGDatabase::create()->entity('product')->from('product p'); } public static function findAll() { return self::getQuery()->execute(); } public static function findOneById($id) { return self::getQuery()->addWhere('p.id = ?', $id)->fetchOne(); } public static function findOneByTitle($title) { return self::getQuery()->where('p.title = ?', $title)->fetchOne(); } public static function findOneByUrl($url) { return self::getQuery()->debug(false)->where('p.url = ?', $url)->fetchOne(); } }
<?php KDGLoader::loadLibraryClass('vendor/SimpleHtmlDom/simple_html_dom'); class KDGService { protected $cacheLifetime = 86400; // 24 hours protected $data = array(); protected $request = array(); protected $rawLinks = array(); protected $comparedData = array(); const SERVICE_SEARCH_STATUS_TRUE = 1; const SERVICE_SEARCH_STATUS_FALSE = 0; const SERVICE_SEARCH_STATUS_ERROR = 2; protected $ServiceHost = null; protected $ServiceCode = null; protected $ServiceTitle = null; protected $ServiceMethod = 'html'; protected $ServiceEnabled = true; protected $ServiceStatus = self::SERVICE_SEARCH_STATUS_FALSE; // not found anything by default protected $ServiceStatusErrorMessage = null; protected $company = null; protected $rawLink = null; protected $rawLinkDom = null; protected $rawLinksDom = null; protected $rawLinksHtml = null; protected $rawLinksLimit = 5; protected $rawLinkRequestAttempt = 1; protected $html_response = null; protected $html_response_object = null;
<?php KDGLoader::loadEntityClass('Product'); KDGLoader::loadModelClass('ProductModel'); KDGLoader::loadLibraryClass('parsers/NewhtfUpdateProductParser'); class NewhtfUpdateProductsCrawler extends Crawler { protected function crawl() { $products = ProductModel::findAll(); foreach ($products as $product) { sleep(0.2); $html = $this->requestProductPage($product->url); if ($html) { $this->parseProduct($html, $product); } } } protected function requestProductPage($url) { $this->sendBuffered('[product url]: ' . $url); return $this->makeRequest($url, false, false, false); } protected function parseProduct($html, $product) { new NewhtfUpdateProductParser($html, $product); } }
<?php require_once 'include/config.php'; set_time_limit(0); ini_set('memory_limit', '256M'); KDGLoader::loadLibraryClass('crawlers/NewhtfCategoryCrawler'); $crawler = new NewhtfCategoryCrawler(); //$crawler->crawlMainCategories(); //$crawler->crawlMiddleCategories(); //$crawler->crawlSubCategories(); //$product_url = 'http://newhtf.ru/catalog/paneli-interernye-svetilniki/svetilniki_serii_kub/svetodiodnyy_svetilnik_dlya_interera_htf_cub_6_plt_6w_nw.html'; //KDGLoader::loadLibraryClass('parsers/NewhtfProductParser'); //$html = file_get_contents($product_url); //new NewhtfProductParser($html);
<?php require_once 'include/config.php'; set_time_limit(0); ini_set('memory_limit', '256M'); KDGLoader::loadLibraryClass('crawlers/NewhtfUpdateProductsCrawler'); new NewhtfUpdateProductsCrawler(); //$product_url = 'http://newhtf.ru/catalog/paneli-interernye-svetilniki/svetilniki_serii_kub/svetodiodnyy_svetilnik_dlya_interera_htf_cub_6_plt_6w_nw.html'; //KDGLoader::loadLibraryClass('parsers/NewhtfProductParser'); //$html = file_get_contents($product_url); //new NewhtfProductParser($html);
<?php KDGLoader::loadLibraryClass('parsers/Xbox360ContentPageParser'); class CollectXbox360ContentCrawler extends Crawler { protected $page = 1; protected $pagemax = 22; public function execute() { while ($this->page <= $this->pagemax) { $this->collectContent(); $this->page++; } } protected function collectContent() { $this->response = null; $this->requestPage(); if ($this->response) { $this->parsePage(); } } protected function strToHex($string) { $hex = ''; for ($i = 0; $i < strlen($string); $i++) { $hex .= dechex(ord($string[$i])); } return $hex; } protected function requestPage()
<?php KDGLoader::loadLibraryClass('Database'); $gdb = null; class KDGDatabase extends Database { const ACTION_SELECT = 1; const ACTION_INSERT = 2; const ACTION_UPDATE = 3; const ACTION_DELETE = 4; const ACTION_COUNT = 5; const ACTION_SCALAR = 6; const RESULTS_AS_ARRAY = 1; const RESULTS_AS_OBJECT = 2; protected $replaceQuote = "\\'"; protected $_collection = array(); protected $_entityName = null; protected $_action_type = self::ACTION_SELECT; protected $_action_debug = false; protected $_is_count_request = false; protected $_mixed_entity = false; protected $_resultType = self::RESULTS_AS_OBJECT; protected $_sql = ''; protected $_sqlParts = array('select' => array(), 'distinct' => false, 'forUpdate' => false, 'from' => array(), 'set' => array(), 'join' => array(), 'where' => array(), 'groupby' => array(), 'having' => array(), 'orderby' => array(), 'limit' => false, 'offset' => false); protected $_sqlParams = array('exec' => array(), 'join' => array(), 'where' => array(), 'set' => array(), 'having' => array()); protected static $_keywords = array('ALL', 'AND', 'ANY', 'AS', 'ASC', 'AVG', 'BETWEEN', 'BIT_LENGTH', 'BY', 'CHARACTER_LENGTH', 'CHAR_LENGTH', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'DELETE', 'DESC', 'DISTINCT', 'EMPTY', 'EXISTS', 'FALSE', 'FETCH', 'FROM', 'GROUP', 'HAVING', 'IN', 'INDEXBY', 'INNER', 'IS', 'JOIN', 'LEFT', 'LIKE', 'LOWER', 'LIMIT', 'MEMBER', 'MOD', 'NEW', 'NOT', 'NULL', 'OBJECT', 'OF', 'OR', 'ORDER', 'OUTER', 'OFFSET', 'POSITION', 'SELECT', 'SOME', 'TRIM', 'TRUE', 'UNKNOWN', 'UPDATE', 'WHERE'); public function __construct($user, $password, $host, $dbname) { return parent::__construct($user, $password, $host, $dbname); } public function __destruct()
<?php KDGLoader::loadEntityClass('Category'); KDGLoader::loadModelClass('CategoryModel'); KDGLoader::loadLibraryClass('parsers/NewhtfMainCategoryParser'); KDGLoader::loadLibraryClass('parsers/NewhtfMiddleCategoryParser'); KDGLoader::loadLibraryClass('parsers/NewhtfSubCategoryParser'); class NewhtfCategoryCrawler extends Crawler { protected $mainCategories = []; protected $middleCategories = []; protected $subCategories = []; protected function crawl() { } /* MAIN CATEGORIES */ public function crawlMainCategories() { $this->response = null; $this->requestMainCategoriesPage(); if ($this->response) { $this->parseMainCategories(); } } protected function requestMainCategoriesPage() { $url = 'http://newhtf.ru/catalog/'; getDiffTime(1); getDiffMemory(1); $this->sendBuffered($url); $this->response = $this->makeRequest($url, false, false, false);
<?php require_once 'include/config.php'; set_time_limit(0); ini_set('memory_limit', '256M'); KDGLoader::loadLibraryClass('crawlers/NewhtfCollectProductsCrawler'); new NewhtfCollectProductsCrawler(); //$product_url = 'http://newhtf.ru/catalog/paneli-interernye-svetilniki/svetilniki_serii_kub/svetodiodnyy_svetilnik_dlya_interera_htf_cub_6_plt_6w_nw.html'; //KDGLoader::loadLibraryClass('parsers/NewhtfProductParser'); //$html = file_get_contents($product_url); //new NewhtfProductParser($html);