/** * This helper function can be used to get a valid uri from an url and return it. * * @param string $url * * @return mixed */ function getUri($url) { if (!empty($url)) { // Sanitize URL first by removing unwanted chars $url = preg_replace("/[\n\r]/", '', $url); // Sanitize URL accourding to RFC1738 (perhaps use RFC3986?) $entities = [' ']; $replacements = ['%20']; $url = str_replace($entities, $replacements, $url); // Check weither the domain is actually valid if (getDomain($url) == false) { return false; } $pslManager = new Pdp\PublicSuffixListManager(); $urlParser = new Pdp\Parser($pslManager->getList()); $urlData = $urlParser->parseUrl($url)->toArray(); $path = $urlData['path'] . (!empty($urlData['query']) ? '?' . $urlData['query'] : ''); // Set the path to root if empty (default) if (empty($path)) { $path = '/'; } // Sanitize PATH accourding to RFC1738 (perhaps use RFC3986?) $entities = [' ']; $replacements = ['%20']; $path = str_replace($entities, $replacements, $path); return $path; } else { return false; } }
/** * This helper function can be used to get a valid domain.tld from an url and return it. * * @param string $url * * @return mixed */ function getDomain($url) { if (!empty($url)) { // Sanitize URL first by removing unwanted chars $url = preg_replace("/[\n\r]/", '', $url); // Sanitize URL accourding to RFC1738 (perhaps use RFC3986?) $entities = [' ']; $replacements = ['%20']; $url = str_replace($entities, $replacements, $url); // Check weither the URL is actually valid if (!filter_var($url, FILTER_VALIDATE_URL) === true) { return false; } $pslManager = new Pdp\PublicSuffixListManager(); $urlParser = new Pdp\Parser($pslManager->getList()); $urlData = $urlParser->parseUrl($url)->toArray(); if ($urlParser->isSuffixValid($urlData['registerableDomain']) === false) { // Not a valid domain. return false; } else { // Return valid domain return $urlData['registerableDomain']; } } else { return false; } }
/** * This helper function can be used to get the url data. * * @param $url * * @return mixed * * @internal param string $str */ function getUrlData($url) { if (!empty($url)) { $pslManager = new Pdp\PublicSuffixListManager(); $urlParser = new Pdp\Parser($pslManager->getList()); $urlData = $urlParser->parseUrl($url)->toArray(); return $urlData; } }
/** * getBaseDomain * * Get domain name from a URL. This will check that the domain is valid for registering, * preventing return of constructs like 'co.uk' as the domain. See https://publicsuffix.org/ * * @param string $url URL * @param boolean $includeSubdomain true to include include subdomains, * default is false registerable domain only * @param boolean $returnObject true to return Pdp\Uri\Url\Host object * false returns domain as string * * @return Pdp\Uri\Url\Host|string|null domain, or null if domain is invalid */ public function getBaseDomain($url, $includeSubdomain = false, $returnObject = false) { $pslManager = new \Pdp\PublicSuffixListManager(); $parser = new \Pdp\Parser($pslManager->getList()); $url = mb_strtolower($url, 'UTF-8'); try { // use php-domain-parser to give us just the domain $pdp = $parser->parseUrl($url); $host = $pdp->host->host; } catch (\Exception $e) { $this->events()->triggerEvent('core.exception', $e); return null; } // check for exceptions, localhost and ip address (v4 & v6) if (!empty($host)) { // localhost exception if ($host === 'localhost') { return $returnObject ? $pdp->host : $host; } // Check for IPV6 URL (see http://www.ietf.org/rfc/rfc2732.txt) // strip brackets before validating if (substr($host, 0, 1) === '[' && substr($host, -1) === ']') { $host = substr($host, 1, strlen($host) - 2); } // ip address exception if (filter_var($host, FILTER_VALIDATE_IP)) { return $returnObject ? new \Pdp\Uri\Url\Host(null, null, null, $host) : $host; } } $host = $pdp->host->registerableDomain; if (!empty($host) && $includeSubdomain) { $host = $pdp->host->host; } return $returnObject ? $pdp->host : $host; }
<h1>FH URL parser</h1> <p>Cette page permet de parser des urls et de ne ressortir que le nom de domaine !</p> </div> <?php if (isset($_POST['urls'])) { echo '<div class="row">'; echo '<div class="col-sm-12">'; echo '<p>Voilà les domaines extraits : </p>'; echo "<pre>"; $urls = explode("\n", $_POST['urls']); foreach ($urls as $urlc) { $urlc = trim($urlc); if ($urlc == '') { continue; } $pslManager = new Pdp\PublicSuffixListManager(); $parser = new Pdp\Parser($pslManager->getList()); $host = $urlc; $url = $parser->parseUrl($host); $newdomain = $url->host->registerableDomain; echo $newdomain . "<br />"; } echo "</pre>"; echo "</div>"; echo "</div>"; } ?> {!! Form::open(array('url' => '/urlparse')) !!} <div class="form-group"> {!! Form::textarea('urls', '', array('class' => 'form-control')) !!} <br />