public static function registerProvider(ContentProvider $provider) { $ns = $provider->getNamespace(); if (isset(self::$providers[$ns])) { logger::warn('Overwriting previous handler for %s', $ns); } self::$providers[$ns] = $provider; }
<?php require "./framework/classloader.php"; error_reporting(E_ALL); ini_set('display_errors', true); ini_set('html_errors', false); date_default_timezone_set('Asia/Chongqing'); // 初始化类加载器 $loader = new classloader(); // 日志的使用 // 初始化 logger::init(array()); //参数: ($msg, $moduleName) logger::info("testing mes", "index.php"); logger::warn("oh no warning", "index.php"); logger::trace("hei hei tracing", "index.php"); logger::debug("just debug", "index"); logger::debug("just debug, 只是debug,不用怕", "index"); // 配置信息 $config = array("dbhost" => "localhost", "dbuser" => "root", "dbpwd" => "", "dbname" => "test", "dbcharset" => "utf-8", "pconnect" => 0); // 初始化 try { database::init($config); } catch (DbException $dbe) { echo 'can not connect to database' . $dbe; } // 主要四个方法: // database::execute($sql) // database::fetch_all($sql) // database::fetch_one($sql) try {
public function accept() { if ($this->sockstate == SOCKSTATE_LISTENING) { $sh = socket_accept($this->fsh); if ($sh) { $sock = new TcpSocket($sh); return $sh; } else { return false; } } else { logger::warn("Can't accept on non-listening socket"); } }
/** * @return void */ public function continueWithFoundLinks() { //reset DB in case this is executed in a forked child process $this->db = Pimcore_Resource_Mysql::reset(); try { $row = $this->db->fetchRow("SELECT * FROM plugin_searchphp_frontend_crawler_todo ORDER BY id", array()); $nextLink = $row['uri']; $depth = $row['depth']; $cookieJar = unserialize($row['cookiejar']); } catch (Exception $e) { // probably table was already removed because crawler is finished logger::log(get_class($this) . ": Could not extract next link from table plugin_searchphp_frontend_crawler_todo ", Zend_Log::DEBUG); return; } if (empty($nextLink)) { return; } $client = Pimcore_Tool::getHttpClient(); $client->setUri($nextLink); $client->setConfig(array('maxredirects' => $this->maxRedirects, 'keepalive' => true, 'timeout' => $this->timeout)); $client->setCookieJar($cookieJar); $client->setHeaders('If-Modified-Since', null); while ($nextLink) { try { $this->db->delete("plugin_searchphp_frontend_crawler_todo", "id = '" . md5($nextLink) . "'"); } catch (Exception $e) { logger::warn(get_class($this) . ": Could not delete from plugin_searchphp_frontend_crawler_todo - maybe forcing crawler stop right now?"); } if ($depth <= $this->maxLinkDepth) { logger::debug(get_class($this) . ": Link depth [ {$depth} ]"); try { $nextLink = $this->addEvictOutputFilterParameter($nextLink); $client->setUri($nextLink); $client->setCookieJar($cookieJar); $client->setHeaders('If-Modified-Since', null); try { $response = $client->request(); } catch (Zend_Http_Client_Adapter_Exception $e) { logger::log(get_class($this) . ": Could not get response for Link [ {$nextLink} ] ", Zend_Log::ERR); } if ($response instanceof Zend_Http_Response and ($response->isSuccessful() or $response->isRedirect())) { //we don't use port - crawler ist limited to standard port 80 $client->getUri()->setPort(null); //update url - maybe we were redirected $nextLink = $client->getUri(true); $nextLink = $this->removeOutputFilterParameters($nextLink); $valid = $this->validateLink($nextLink); if ($valid) { //see if we were redirected to a place we already have in fetch list or done try { $rowTodo = $this->db->fetchRow("SELECT count(*) as count from plugin_searchphp_frontend_crawler_todo WHERE id ='" . md5($nextLink) . "'"); } catch (Exception $e) { logger::log(get_class($this) . ": could not fetch from plugin_searchphp_contents_temp", Zend_Log::DEBUG); } try { $rowDone = $this->db->fetchRow("SELECT count(*) as count from plugin_searchphp_contents_temp WHERE id ='" . md5($nextLink) . "'"); } catch (Exception $e) { logger::log(get_class($this) . ": could not fetch from plugin_searchphp_contents_temp", Zend_Log::DEBUG); } try { $rowNoIndex = $this->db->fetchRow("SELECT count(*) as count from plugin_searchphp_frontend_crawler_noindex WHERE id ='" . md5($nextLink) . "'"); } catch (Exception $e) { logger::log(get_class($this) . ": could not fetch from plugin_searchphp_frontend_crawler_noindex", Zend_Log::DEBUG); } if ($rowTodo['count'] > 0 or $rowDone['count'] > 0 or $rowNoIndex['count'] > 0) { logger::log(get_class($this) . " Redirected to uri [ {$nextLink} ] - which has already been processed", Zend_Log::DEBUG); } else { try { $success = $this->parse($nextLink, $response, $client->getUri()->getHost(), $client->getCookieJar(), $depth); logger::log(get_class($this) . ": parsed [ {$nextLink} ] ", Zend_Log::DEBUG); } catch (Exception $e) { logger::log($e, Zend_Log::ERR); } } } else { logger::log("We were redirected to an invalid Link [ {$nextLink}]", Zend_Log::DEBUG); } } else { logger::log(get_class($this) . ": Error parsing [ {$nextLink} ] ", Zend_Log::ERR); } } catch (Zend_Uri_Exception $e) { logger::log(get_class($this) . ": Invalid URI [ {$nextLink} ] ", Zend_Log::ERR); } } else { logger::alert(get_class($this) . ": Stopping with uri [ {$nextLink} ] because maximum link depth of [ {$depth} ] has been reached."); } //get next from DB try { $row = $this->db->fetchRow("SELECT * FROM plugin_searchphp_frontend_crawler_todo ORDER BY id", array()); $nextLink = $row['uri']; $depth = $row['depth']; $cookieJar = unserialize($row['cookiejar']); } catch (Exception $e) { //wait 2 seconds then try again sleep(2); try { $row = $this->db->fetchRow("SELECT * FROM plugin_searchphp_frontend_crawler_todo ORDER BY id", array()); $nextLink = $row['uri']; $depth = $row['depth']; $cookieJar = unserialize($row['cookiejar']); } catch (Exception $e) { // probably table was already removed because crawler is finished logger::log(get_class($this) . ": Could not extract next link from table plugin_searchphp_frontend_crawler_todo ", Zend_Log::DEBUG); $nextLink = false; } } } }
/** * @return void */ public static function generateSitemap() { $sitemapDir = PIMCORE_WEBSITE_PATH . "/var/search/sitemap"; if (is_dir($sitemapDir) and !is_writable($sitemapDir)) { $sitemapDirAvailable = false; } else { if (!is_dir($sitemapDir)) { $sitemapDirAvailable = mkdir($sitemapDir, 0755, true); chmod($sitemapDir, 0755); } else { $sitemapDirAvailable = true; } } if ($sitemapDirAvailable) { $db = Pimcore_Resource_Mysql::get(); $hosts = $db->fetchAll("SELECT DISTINCT host from plugin_searchphp_contents"); if (is_array($hosts)) { //create domain sitemaps foreach ($hosts as $row) { $host = $row['host']; $data = $db->fetchAll("SELECT * FROM plugin_searchphp_contents WHERE host = '" . $host . "' AND content != 'canonical' AND content!='noindex' ORDER BY uri", array()); $name = str_replace(".", "-", $host); $filePath = $sitemapDir . "/sitemap-" . $name . ".xml"; $fh = fopen($filePath, 'w'); fwrite($fh, '<?xml version="1.0" encoding="UTF-8"?>' . "\r\n"); fwrite($fh, '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'); fwrite($fh, "\r\n"); foreach ($data as $row) { $uri = str_replace("&pimcore_outputfilters_disabled=1", "", $row['uri']); $uri = str_replace("?pimcore_outputfilters_disabled=1", "", $uri); fwrite($fh, '<url>' . "\r\n"); fwrite($fh, ' <loc>' . htmlspecialchars($uri, ENT_QUOTES) . '</loc>' . "\r\n"); fwrite($fh, '</url>' . "\r\n"); } fwrite($fh, '</urlset>' . "\r\n"); fclose($fh); } //create sitemap index file $filePath = $sitemapDir . "/sitemap.xml"; $fh = fopen($filePath, 'w'); fwrite($fh, '<?xml version="1.0" encoding="UTF-8"?>' . "\r\n"); fwrite($fh, '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'); fwrite($fh, "\r\n"); foreach ($hosts as $row) { $host = $row['host']; $name = str_replace(".", "-", $host); //first host must be main domain - see hint in plugin settings $currenthost = $hosts[0]['host']; fwrite($fh, '<sitemap>' . "\r\n"); fwrite($fh, ' <loc>http://' . $currenthost . "/plugin/SearchPhp/frontend/sitemap/?sitemap=sitemap-" . $name . ".xml" . '</loc>' . "\r\n"); fwrite($fh, '</sitemap>' . "\r\n"); } fwrite($fh, '</sitemapindex>' . "\r\n"); fclose($fh); } else { logger::warn("SearchPhp_Tool: could not generate sitemaps, did not find any hosts in index."); } } else { logger::emerg("SearchPhp_Tool: Cannot generate sitemap. Sitemap directory [ " . $sitemapDir . " ] not available/not writeable and cannot be created"); } }