/** * Initiates a new crawler. */ public function __construct() { // Create uniqid for this crawlerinstance $this->crawler_uniqid = getmypid() . time(); // Include needed class-files $classpath = dirname(__FILE__); // Utils-class if (!class_exists("PHPCrawlerUtils")) { include_once $classpath . "/PHPCrawlerUtils.class.php"; } // URL-Cache-classes if (!class_exists("PHPCrawlerURLCacheBase")) { include_once $classpath . "/UrlCache/PHPCrawlerURLCacheBase.class.php"; } if (!class_exists("PHPCrawlerMemoryURLCache")) { include_once $classpath . "/UrlCache/PHPCrawlerMemoryURLCache.class.php"; } if (!class_exists("PHPCrawlerSQLiteURLCache")) { include_once $classpath . "/UrlCache/PHPCrawlerSQLiteURLCache.class.php"; } // PageRequest-class if (!class_exists("PHPCrawlerHTTPRequest")) { include_once $classpath . "/PHPCrawlerHTTPRequest.class.php"; } $this->PageRequest = new PHPCrawlerHTTPRequest(); $this->PageRequest->setHeaderCheckCallbackFunction($this, "handleHeaderInfo"); // Cookie-Cache-class if (!class_exists("PHPCrawlerCookieCacheBase")) { include_once $classpath . "/CookieCache/PHPCrawlerCookieCacheBase.class.php"; } if (!class_exists("PHPCrawlerMemoryCookieCache")) { include_once $classpath . "/CookieCache/PHPCrawlerMemoryCookieCache.class.php"; } if (!class_exists("PHPCrawlerSQLiteCookieCache")) { include_once $classpath . "/CookieCache/PHPCrawlerSQLiteCookieCache.class.php"; } // URL-filter-class if (!class_exists("PHPCrawlerURLFilter")) { include_once $classpath . "/PHPCrawlerURLFilter.class.php"; } $this->UrlFilter = new PHPCrawlerURLFilter(); // RobotsTxtParser-class if (!class_exists("PHPCrawlerRobotsTxtParser")) { include_once $classpath . "/PHPCrawlerRobotsTxtParser.class.php"; } $this->RobotsTxtParser = new PHPCrawlerRobotsTxtParser(); // ProcessReport-class if (!class_exists("PHPCrawlerProcessReport")) { include_once $classpath . "/PHPCrawlerProcessReport.class.php"; } // UserSendDataCache-class if (!class_exists("PHPCrawlerUserSendDataCache")) { include_once $classpath . "/PHPCrawlerUserSendDataCache.class.php"; } $this->UserSendDataCache = new PHPCrawlerUserSendDataCache(); // URLDescriptor-class if (!class_exists("PHPCrawlerURLDescriptor")) { include_once $classpath . "/PHPCrawlerURLDescriptor.class.php"; } // PageInfo-class if (!class_exists("PHPCrawlerDocumentInfo")) { include_once $classpath . "/PHPCrawlerDocumentInfo.class.php"; } // Benchmark-class if (!class_exists("PHPCrawlerBenchmark")) { include_once $classpath . "/PHPCrawlerBenchmark.class.php"; } // URLDescriptor-class if (!class_exists("PHPCrawlerUrlPartsDescriptor")) { include_once $classpath . "/PHPCrawlerUrlPartsDescriptor.class.php"; } // CrawlerStatus-class if (!class_exists("PHPCrawlerStatus")) { include_once $classpath . "/PHPCrawlerStatus.class.php"; } // AbortReasons-class if (!class_exists("PHPCrawlerAbortReasons")) { include_once $classpath . "/Enums/PHPCrawlerAbortReasons.class.php"; } // RequestErrors-class if (!class_exists("PHPCrawlerRequestErrors")) { include_once $classpath . "/Enums/PHPCrawlerRequestErrors.class.php"; } // PHPCrawlerUrlCacheTypes-class if (!class_exists("PHPCrawlerUrlCacheTypes")) { include_once $classpath . "/Enums/PHPCrawlerUrlCacheTypes.class.php"; } // PHPCrawlerMultiProcessModes-class if (!class_exists("PHPCrawlerMultiProcessModes")) { include_once $classpath . "/Enums/PHPCrawlerMultiProcessModes.class.php"; } // PHPCrawlerProcessCommunication-class if (!class_exists("PHPCrawlerProcessCommunication")) { include_once $classpath . "/ProcessCommunication/PHPCrawlerProcessCommunication.class.php"; } // PHPCrawlerDocumentInfoQueue-class if (!class_exists("PHPCrawlerDocumentInfoQueue")) { include_once $classpath . "/ProcessCommunication/PHPCrawlerDocumentInfoQueue.class.php"; } // Set default temp-dir $this->working_base_directory = PHPCrawlerUtils::getSystemTempDir(); }