Esempi in PHP per PHPCrawlerUrlPartsDescriptor::toArray

Linguaggio di programmazione: PHP

Classe/tipologia: PHPCrawlerUrlPartsDescriptor

Metodo/funzione: toArray

Esempi su hotexamples.com: 2

PHPCrawlerUrlPartsDescriptor::toArray in PHP: 2 esempi trovati. Questi sono i migliori esempi reali in PHP per PHPCrawlerUrlPartsDescriptor::toArray, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

fromURL(2)

toArray(2)

Esempio n. 1

Mostra file

File: PHPCrawlerUtils.class.php Progetto: luoxun/SensysPHP

 /**
  * Reconstructs a full qualified and normalized URL from a given link relating to the URL the link was found in.
  *
  * @param string $link          The link (i.e. "../page.htm")
  * @param PHPCrawlerUrlPartsDescriptor $BaseUrlParts  The parts of the URL the link was found in (i.e. "http://www.foo.com/folder/index.html")
  *
  * @return string The rebuild, full qualified and normilazed URL the link is leading to (i.e. "http://www.foo.com/page.htm")
  *                Or NULL if the link couldn't be rebuild correctly.
  */
 public static function buildURLFromLink($link, PHPCrawlerUrlPartsDescriptor $BaseUrlParts)
 {
     $url_parts = $BaseUrlParts->toArray();
     // Entities-replacements
     $entities = array("'&(quot|#34);'i", "'&(amp|#38);'i", "'&(lt|#60);'i", "'&(gt|#62);'i", "'&(nbsp|#160);'i", "'&(iexcl|#161);'i", "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i");
     $replace = array("\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169));
     // Remove "#..." at end, but ONLY at the end,
     // not if # is at the beginning !
     $link = preg_replace("/^(.{1,})#.{0,}\$/", "\\1", $link);
     // Cases
     // Strange link like "//foo.htm" -> make it to "http://foo.html"
     if (substr($link, 0, 2) == "//") {
         $link = "http:" . $link;
     } elseif (substr($link, 0, 1) == "/") {
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $link;
     } elseif (substr($link, 0, 2) == "./") {
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $url_parts["path"] . substr($link, 2);
     } elseif (preg_match("#^[a-z0-9]{1,}(:\\/\\/)# i", $link)) {
         $link = $link;
     } elseif (preg_match("/^[a-zA-Z]{0,}:[^\\/]{0,1}/", $link)) {
         $link = "";
     } elseif (substr($link, 0, 3) == "../") {
         $new_path = $url_parts["path"];
         while (substr($link, 0, 3) == "../") {
             $new_path = preg_replace('/\\/[^\\/]{0,}\\/$/', "/", $new_path);
             $link = substr($link, 3);
         }
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $new_path . $link;
     } elseif (substr($link, 0, 1) == "#") {
         $link = "";
     } elseif (substr($link, 0, 1) == "?") {
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $url_parts["path"] . $url_parts["file"] . $link;
     } else {
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $url_parts["path"] . $link;
     }
     if ($link == "") {
         return null;
     }
     // Now, at least, replace all HTMLENTITIES with normal text !!
     // Fe: HTML-Code of the link is: <a href="index.php?x=1&amp;y=2">
     // -> Link has to be "index.php?x=1&y=2"
     $link = preg_replace($entities, $replace, $link);
     // Replace linebreaks in the link with "" (happens if a links in the sourcecode
     // linebreaks)
     $link = str_replace(array("\n", "\r"), "", $link);
     // "Normalize" URL
     $link = self::normalizeUrl($link);
     return $link;
 }

Esempio n. 2

Mostra file

File: PHPCrawlerUtils.class.php Progetto: Konsul117/PhpCrawler

 /**
  * Reconstructs a full qualified and normalized URL from a given link relating to the URL the link was found in.
  *
  * @param string $link                           The link (i.e. "../page.htm")
  * @param PHPCrawlerUrlPartsDescriptor $BaseUrl  The base-URL the link was found in as PHPCrawlerUrlPartsDescriptor-object
  *
  * @return string The rebuild, full qualified and normilazed URL the link is leading to (i.e. "http://www.foo.com/page.htm"),
  *                or NULL if the link couldn't be rebuild correctly.
  */
 public static function buildURLFromLink($link, PHPCrawlerUrlPartsDescriptor $BaseUrl)
 {
     $url_parts = $BaseUrl->toArray();
     // Dedoce HTML-entities
     $link = PHPCrawlerEncodingUtils::decodeHtmlEntities($link);
     // Remove anchor ("#..."), but ONLY at the end, not if # is at the beginning !
     $link = preg_replace("/^(.{1,})#.{0,}\$/", "\\1", $link);
     // Cases
     // Strange link like "//foo.htm" -> make it to "http://foo.html"
     if (substr($link, 0, 2) == "//") {
         $link = "http:" . $link;
     } elseif (substr($link, 0, 1) == "/") {
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $link;
     } elseif (substr($link, 0, 2) == "./") {
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $url_parts["path"] . substr($link, 2);
     } elseif (preg_match("#^[a-z0-9-]{1,}(:\\/\\/)# i", $link)) {
         $link = $link;
     } elseif (preg_match("/^[a-zA-Z]{0,}:[^\\/]{0,1}/", $link)) {
         $link = "";
     } elseif (substr($link, 0, 3) == "../") {
         $new_path = $url_parts["path"];
         while (substr($link, 0, 3) == "../") {
             $new_path = preg_replace('/\\/[^\\/]{0,}\\/$/', "/", $new_path);
             $link = substr($link, 3);
         }
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $new_path . $link;
     } elseif (substr($link, 0, 1) == "#") {
         $link = "";
     } elseif (substr($link, 0, 1) == "?") {
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $url_parts["path"] . $url_parts["file"] . $link;
     } else {
         $link = $url_parts["protocol"] . $url_parts["host"] . ":" . $url_parts["port"] . $url_parts["path"] . $link;
     }
     if ($link == "") {
         return null;
     }
     // Now, at least, replace all HTMLENTITIES with normal text.
     // I.E.: HTML-Code of the link is: <a href="index.php?x=1&amp;y=2">
     // -> Link has to be "index.php?x=1&y=2"
     //$link = PHPCrawlerEncodingUtils::decodeHtmlEntities($link);
     // Replace linebreaks in the link with "" (happens if a link in the sourcecode
     // linebreaks)
     $link = str_replace(array("\n", "\r"), "", $link);
     // "Normalize" URL
     $link = self::normalizeUrl($link);
     return $link;
 }