$cc = new \Console_CommandLine(); $cc->description = 'phinde URL processor'; $cc->version = '0.0.1'; $cc->addOption('force', array('short_name' => '-f', 'long_name' => '--force', 'description' => 'Always process URL, even when it did not change', 'action' => 'StoreTrue', 'default' => false)); $cc->addOption('showLinksOnly', array('short_name' => '-s', 'long_name' => '--show-links', 'description' => 'Only show which URLs were found', 'action' => 'StoreTrue', 'default' => false)); $cc->addArgument('url', array('description' => 'URL to process', 'multiple' => false)); $cc->addArgument('actions', array('description' => 'Actions to take', 'multiple' => true, 'optional' => true, 'choices' => array('index', 'crawl'), 'default' => array('index', 'crawl'))); try { $res = $cc->parse(); } catch (\Exception $e) { $cc->displayError($e->getMessage()); } $url = $res->args['url']; $url = Helper::addSchema($url); $urlObj = new \Net_URL2($url); $url = $urlObj->getNormalizedURL(); if (!Helper::isUrlAllowed($url)) { Log::error("Domain is not allowed; not crawling"); exit(2); } try { $actions = array(); foreach ($res->args['actions'] as $action) { if ($action == 'crawl') { $crawler = new Crawler(); $crawler->setShowLinksOnly($res->options['showLinksOnly']); $actions[$action] = $crawler; } else { if ($action == 'index') { $actions[$action] = new Indexer(); }
/** * @param $path * @param null $document * @return array * @throws \Exception */ public static function getNormalizedFileInfo($path, $document = null) { if ($document && $document instanceof Model\Document == false) { throw new \Exception('$document has to be an instance of Document'); } $fileInfo = array(); $hostUrl = Tool::getHostUrl(); if ($path[0] != '/') { $fileInfo['fileUrl'] = $hostUrl . $document . "/{$path}"; //relative eg. ../file.css } else { $fileInfo['fileUrl'] = $hostUrl . $path; } $fileInfo['fileExtension'] = substr($path, strrpos($path, '.') + 1); $netUrl = new \Net_URL2($fileInfo['fileUrl']); $fileInfo['fileUrlNormalized'] = $netUrl->getNormalizedURL(); $fileInfo['filePathNormalized'] = PIMCORE_DOCUMENT_ROOT . str_replace($hostUrl, '', $fileInfo['fileUrlNormalized']); return $fileInfo; }
/** * Tests an URL with no userinfo and normalization * * Also: Regression test for Bug #20385 * * @covers Net_URL2::getUserinfo * @covers Net_URL2::normalize * @covers Net_URL2::getNormalizedURL * @return void * @link https://pear.php.net/bugs/bug.php?id=20385 */ public function testNoUserinfoAndNormalize() { $testUrl = 'http://www.example.com/'; $url = new Net_URL2($testUrl); $this->assertFalse($url->getUserinfo()); $url->normalize(); $this->assertFalse($url->getUserinfo()); $this->assertEquals($testUrl, $url->getURL()); $this->assertEquals($testUrl, $url->getNormalizedURL()); }