Esempi in PHP per UrlParser::getHostSubdomains

Linguaggio di programmazione: PHP

Classe/tipologia: UrlParser

Metodo/funzione: getHostSubdomains

Esempi su hotexamples.com: 2

UrlParser::getHostSubdomains in PHP: 2 esempi trovati. Questi sono i migliori esempi reali in PHP per UrlParser::getHostSubdomains, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

canonicalLink(12)

getHost(11)

getDocumentType(7)

checkRecursiveUrl(6)

getDocumentFilename(5)

getPath(4)

urlMemberSiteArray(4)

getScheme(4)

getWordsLastPathPartUrl(3)

getWordsIfHostUrl(3)

getPathArray(3)

isLocalhostUrl(3)

isPathMemberRegexPaths(3)

simplifyUrl(3)

getHostAndPath(2)

pruneLinks(2)

guessMimeTypeFromFileName(2)

getHostSubdomains(2)

parse(1)

parseUrl(1)

isVideoUrl(1)

getPort(1)

isFollowUrl(1)

getLang(1)

getHostPaths(1)

getCourseDirName(1)

defaultFilter(1)

cleanRedundantLinks(1)

urlParse(1)

Esempio n. 1

Mostra file

File: phrase_parser.php Progetto: yakar/yioop

 /**
  * Calculates the meta words to be associated with a given downloaded
  * document. These words will be associated with the document in the
  * index for (server:apache) even if the document itself did not contain
  * them.
  *
  * @param array& $site associated array containing info about a downloaded
  *     (or read from archive) document.
  * @param array $video_sources used to check if a page should be marked as
  *      having meta media:video
  * @return array of meta words to be associate with this document
  */
 static function calculateMetas(&$site, $video_sources = array())
 {
     $meta_ids = array();
     // handles user added meta words
     if (isset($site[CrawlConstants::META_WORDS])) {
         $meta_ids = $site[CrawlConstants::META_WORDS];
     }
     /*
         Handle the built-in meta words. For example
         store the sites the doc_key belongs to,
         so you can search by site
     */
     $url_sites = UrlParser::getHostPaths($site[CrawlConstants::URL]);
     $url_sites = array_merge($url_sites, UrlParser::getHostSubdomains($site[CrawlConstants::URL]));
     $meta_ids[] = 'site:all';
     foreach ($url_sites as $url_site) {
         if (strlen($url_site) > 0) {
             $meta_ids[] = 'site:' . $url_site;
         }
     }
     $path = UrlParser::getPath($site[CrawlConstants::URL]);
     if (strlen($path) > 0) {
         $path_parts = explode("/", $path);
         $pre_path = "";
         $meta_ids[] = 'path:all';
         $meta_ids[] = 'path:/';
         foreach ($path_parts as $part) {
             if (strlen($part) > 0) {
                 $pre_path .= "/{$part}";
                 $meta_ids[] = 'path:' . $pre_path;
             }
         }
     }
     $meta_ids[] = 'info:' . $site[CrawlConstants::URL];
     $meta_ids[] = 'info:' . crawlHash($site[CrawlConstants::URL]);
     $meta_ids[] = 'code:all';
     $meta_ids[] = 'code:' . $site[CrawlConstants::HTTP_CODE];
     if (UrlParser::getHost($site[CrawlConstants::URL]) . "/" == $site[CrawlConstants::URL]) {
         $meta_ids[] = 'host:all';
         //used to count number of distinct hosts
     }
     if (isset($site[CrawlConstants::SIZE])) {
         $meta_ids[] = "size:all";
         $interval = DOWNLOAD_SIZE_INTERVAL;
         $size = floor($site[CrawlConstants::SIZE] / $interval) * $interval;
         $meta_ids[] = "size:{$size}";
     }
     if (isset($site[CrawlConstants::TOTAL_TIME])) {
         $meta_ids[] = "time:all";
         $interval = DOWNLOAD_TIME_INTERVAL;
         $time = floor($site[CrawlConstants::TOTAL_TIME] / $interval) * $interval;
         $meta_ids[] = "time:{$time}";
     }
     if (isset($site[CrawlConstants::DNS_TIME])) {
         $meta_ids[] = "dns:all";
         $interval = DOWNLOAD_TIME_INTERVAL;
         $time = floor($site[CrawlConstants::DNS_TIME] / $interval) * $interval;
         $meta_ids[] = "dns:{$time}";
     }
     if (isset($site[CrawlConstants::LINKS])) {
         $num_links = count($site[CrawlConstants::LINKS]);
         $meta_ids[] = "numlinks:all";
         $meta_ids[] = "numlinks:{$num_links}";
         $link_urls = array_keys($site[CrawlConstants::LINKS]);
         $meta_ids[] = "link:all";
         foreach ($link_urls as $url) {
             $meta_ids[] = 'link:' . $url;
             $meta_ids[] = 'link:' . crawlHash($url);
         }
     }
     if (isset($site[CrawlConstants::LOCATION]) && is_array($site[CrawlConstants::LOCATION])) {
         foreach ($site[CrawlConstants::LOCATION] as $location) {
             $meta_ids[] = 'info:' . $location;
             $meta_ids[] = 'info:' . crawlHash($location);
             $meta_ids[] = 'location:all';
             $meta_ids[] = 'location:' . $location;
         }
     }
     if (isset($site[CrawlConstants::IP_ADDRESSES])) {
         $meta_ids[] = 'ip:all';
         foreach ($site[CrawlConstants::IP_ADDRESSES] as $address) {
             $meta_ids[] = 'ip:' . $address;
         }
     }
     $meta_ids[] = 'media:all';
     if ($video_sources != array()) {
         if (UrlParser::isVideoUrl($site[CrawlConstants::URL], $video_sources)) {
             $meta_ids[] = "media:video";
         } else {
             $meta_ids[] = stripos($site[CrawlConstants::TYPE], "image") !== false ? 'media:image' : 'media:text';
         }
     }
     // store the filetype info
     $url_type = UrlParser::getDocumentType($site[CrawlConstants::URL]);
     if (strlen($url_type) > 0) {
         $meta_ids[] = 'filetype:all';
         $meta_ids[] = 'filetype:' . $url_type;
     }
     if (isset($site[CrawlConstants::SERVER])) {
         $meta_ids[] = 'server:all';
         $meta_ids[] = 'server:' . strtolower($site[CrawlConstants::SERVER]);
     }
     if (isset($site[CrawlConstants::SERVER_VERSION])) {
         $meta_ids[] = 'version:all';
         $meta_ids[] = 'version:' . $site[CrawlConstants::SERVER_VERSION];
     }
     if (isset($site[CrawlConstants::OPERATING_SYSTEM])) {
         $meta_ids[] = 'os:all';
         $meta_ids[] = 'os:' . strtolower($site[CrawlConstants::OPERATING_SYSTEM]);
     }
     if (isset($site[CrawlConstants::MODIFIED])) {
         $modified = $site[CrawlConstants::MODIFIED];
         $meta_ids[] = 'modified:all';
         $meta_ids[] = 'modified:' . date('Y', $modified);
         $meta_ids[] = 'modified:' . date('Y-m', $modified);
         $meta_ids[] = 'modified:' . date('Y-m-d', $modified);
     }
     if (isset($site[CrawlConstants::TIMESTAMP])) {
         $date = $site[CrawlConstants::TIMESTAMP];
         $meta_ids[] = 'date:all';
         $meta_ids[] = 'date:' . date('Y', $date);
         $meta_ids[] = 'date:' . date('Y-m', $date);
         $meta_ids[] = 'date:' . date('Y-m-d', $date);
         $meta_ids[] = 'date:' . date('Y-m-d-H', $date);
         $meta_ids[] = 'date:' . date('Y-m-d-H-i', $date);
         $meta_ids[] = 'date:' . date('Y-m-d-H-i-s', $date);
     }
     if (isset($site[CrawlConstants::LANG])) {
         $meta_ids[] = 'lang:all';
         $lang_parts = explode("-", $site[CrawlConstants::LANG]);
         $meta_ids[] = 'lang:' . $lang_parts[0];
         if (isset($lang_parts[1])) {
             $meta_ids[] = 'lang:' . $site[CrawlConstants::LANG];
         }
     }
     if (isset($site[CrawlConstants::AGENT_LIST])) {
         foreach ($site[CrawlConstants::AGENT_LIST] as $agent) {
             $meta_ids[] = 'robot:' . strtolower($agent);
         }
     }
     //Add all meta word for subdoctype
     if (isset($site[CrawlConstants::SUBDOCTYPE])) {
         $meta_ids[] = $site[CrawlConstants::SUBDOCTYPE] . ':all';
     }
     return $meta_ids;
 }

Esempio n. 2

Mostra file

File: fetcher.php Progetto: yakar/yioop

 /**
  * Calculates the company level domain for the given url
  *
  * For www.yahoo.com the cld is yahoo.com, for
  * www.theregister.co.uk it is theregister.co.uk. It is
  * similar for organizations.
  *
  * @param string $url url to determine cld for
  * @return string the cld of $url
  */
 function getCompanyLevelDomain($url)
 {
     $subdomains = UrlParser::getHostSubdomains($url);
     if (!isset($subdomains[0]) || !isset($subdomains[2])) {
         return "";
     }
     /*
        if $url is www.yahoo.com
            $subdomains[0] == com, $subdomains[1] == .com,
            $subdomains[2] == yahoo.com,$subdomains[3] == .yahoo.com
        etc.
     */
     if (strlen($subdomains[0]) == 2 && strlen($subdomains[2]) == 5 && isset($subdomains[4])) {
         return $subdomains[4];
     }
     return $subdomains[2];
 }