function getURLFromHTML($originalURL, $htmlCode, $urlRegexList, &$urlList)
{
    // $mainURLObj = new Net_URL2($originalURL);
    foreach ($urlRegexList as $ur) {
        $matches = array();
        preg_match_all($ur, $htmlCode, $matches);
        if (isset($matches[1])) {
            // convert matched links to full url
            for ($i = 0; $i < count($matches[1]); $i++) {
                $link = urlencode($matches[1][$i]);
                $link = str_replace('%2F', '/', $link);
                $link = str_replace('%3F', '?', $link);
                // $urlObj = new Net_URL2($link);
                // relative url, add scheme, host and port into it
                $matches[1][$i] = urldecode(url_to_absolute($originalURL, $link));
                // if ($urlObj -> getScheme() == false) {
                // 	$urlObj -> setScheme($mainURLObj -> getScheme());
                // 	$urlObj -> setHost($mainURLObj -> getHost());
                // 	$urlObj -> setPort($mainURLObj -> getPort());
                // }
                // // query doesn't need to be encoded
                // $urlObj -> setQuery(urldecode($urlObj -> getQuery()));
                // $matches[1][$i] = (string)$urlObj;
            }
            $urlList = array_merge($urlList, $matches[1]);
        }
        $urlList = array_unique($urlList);
    }
}
function iitcMobileDownload($apkfile)
{
    $version = getMobileVersion($apkfile);
    $apk_version = $version['apk_version'];
    $iitc_version = preg_replace('/^(\\d+\\.\\d+\\.\\d+)\\.(\\d{8}\\.\\d{1,6})/', '\\1<small class="text-muted">.\\2</small>', $version['iitc_version']);
    # we need an absolute link for the QR Code
    # get the URL of this page itself
    $pageurl = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] ? "https" : "http") . "://" . $_SERVER['HTTP_HOST'] . $_SERVER['REQUEST_URI'];
    $apkurl = url_to_absolute($pageurl, $apkfile);
    ?>


<div>

<img style="float: right; margin: 10px;" src="https://chart.googleapis.com/chart?cht=qr&chs=120x120&chld=L|2&chl=<?php 
    print urlencode($apkurl);
    ?>
" alt="QR Code for download">

<p>
IITC Mobile version <?php 
    print $apk_version;
    ?>
, with IITC version <?php 
    print $iitc_version;
    ?>
</p>

<p>
<a style="margin-right: 1em;" onclick="if(track){track('mobile','download','<?php 
    print $apkfile;
    ?>
');}" class="btn btn-large btn-primary" href="<?php 
    print $apkfile;
    ?>
">Download</a> or scan the QR Code
</p>

</div>
<div style="clear: both"></div>


<?php 
}
 function check_d($path_arr, $dir = "", $i = 0)
 {
     global $docroot, $docroot_prefix, $url_fa;
     $handle = opendir(realpath($docroot . $dir));
     while ($f = readdir($handle)) {
         if ($f != "." && $f != "..") {
             $t[$f] = levenshtein($path_arr[$i], $f);
         }
     }
     closedir($handle);
     asort($t);
     while (list($rep, $val) = each($t)) {
         if ($val >= 0 && $val <= 1) {
             if ($i + 1 == count($path_arr)) {
                 $url_fa[] = url_to_absolute($docroot_prefix . $dir . $rep);
             } else {
                 if (is_dir($docroot . $dir . $rep)) {
                     $this->check_d($path_arr, $dir . $rep . "/", $i + 1);
                 }
             }
         }
     }
 }
function get_resource($url)
{
    $resource = '';
    if (!empty($url)) {
        $response = get_request($url);
        if (!function_exists('str_get_html')) {
            require_once dirname(__FILE__) . '/../vendor/simple-html-dom/simple-html-dom.php';
        }
        if (!function_exists('url_to_absolute')) {
            require_once dirname(__FILE__) . '/../vendor/url-to-absolute/url-to-absolute.php';
        }
        $url_parts = parse_url($url);
        //$body = wp_remote_retrieve_body($response);
        $body = $response;
        $html = str_get_html($body);
        foreach ($html->find('a, link') as $element) {
            if (isset($element->href) && $element->href[0] != "#") {
                $element->href = url_to_absolute($url, $element->href);
            }
        }
        foreach ($html->find('img, script') as $element) {
            if (isset($element->src)) {
                $element->src = url_to_absolute($url, $element->src);
            }
        }
        foreach ($html->find('form') as $element) {
            if (isset($element->action)) {
                $element->action = url_to_absolute($url, $element->action);
            } else {
                $element->action = $url;
            }
        }
        $resource = $html->save();
    }
    return $resource;
}
Example #5
0
 function main()
 {
     global $http_uri, $docroot, $conf, $vhost, $rq_err, $out_contenttype, $real_uri, $out_add_headers, $accessdir, $mime, $query_string;
     foreach (access_query("fbiconbytype") as $icndef) {
         $ic = explode(" ", $icndef);
         $icons[trim($ic[1])] = trim($ic[0]);
     }
     $icndef = access_query("fbicondefault", 0);
     if (!($icndir = access_query("fbicondirectory", 0))) {
         $icndir = $icndef;
     }
     if ($http_uri[strlen($http_uri) - 1] != "/") {
         $http_uri .= "/";
     }
     if (access_query("filebrowser", 0)) {
         if (@is_readable($docroot . $http_uri)) {
             $dfmt = access_query("fbdateformat", 0) or $dfmt = "d-M-Y H:i:s";
             $rq_err = 200;
             $out_contenttype = "text/html";
             // Generate directory listing
             $hnd = opendir(realpath($docroot . $http_uri));
             unset($fb_arr);
             unset($fsort);
             while ($f = readdir($hnd)) {
                 $fi = stat($docroot . $http_uri . $f);
                 $fi["isdir"] = is_dir($docroot . $http_uri . $f);
                 $fi["f"] = $f;
                 $fb_arr[$f] = $fi;
                 if (!$fi["isdir"]) {
                     $fb_ts += $fi[7];
                     $fb_tf++;
                 }
             }
             if ($fbstmp = access_query("fbsortorder", 0)) {
                 $fbsort = explode(" ", $fbstmp);
             } else {
                 $fbsort = array("name");
             }
             parse_str($query_string, $ptmp);
             if (count($ptmp)) {
                 if ($ptmp["sort"]) {
                     $fbsort[0] = $ptmp["sort"];
                 }
                 if ($ptmp["order"]) {
                     $fbsort[1] = $ptmp["order"];
                 }
             }
             switch ($fbsort[0]) {
                 case "date":
                     $sortidx = 9;
                     break;
                 case "size":
                     $sortidx = 7;
                     break;
                 case "name":
                 default:
                     $sortidx = "f";
             }
             $dsort = $fsort = array();
             foreach ($fb_arr as $fstmp) {
                 if (!$fstmp["isdir"]) {
                     $fsort[$fstmp["f"]] = $fstmp[$sortidx];
                 } else {
                     if ($fstmp["f"] != "..") {
                         $dsort[$fstmp["f"]] = $fstmp[$sortidx];
                     } else {
                         $has_parent = $fstmp[$sortidx];
                     }
                 }
             }
             if ($fbsort[1] == "desc") {
                 arsort($fsort);
                 arsort($dsort);
             } else {
                 asort($fsort);
                 asort($dsort);
             }
             if ($has_parent) {
                 $dsort = array_reverse($dsort);
                 $dsort[".."] = $has_parent;
                 $dsort = array_reverse($dsort);
             }
             // Do other processing
             if (@is_readable($wfn = $docroot . $http_uri . $conf[$vhost]["fbwelcomefile"][0])) {
                 $wfc = implode("<br>", file($wfn));
                 $welcome_formated = "<br><font size=\"1\" face=\"fixedsys\">" . $wfc . "</font><br><br>";
             } else {
                 $welcome_formated = "";
             }
             $fhdr = array();
             $fhdr["dir_name"] = "/" . $real_uri;
             $fhdr["welcome"] = $welcome_formated;
             $fhdr["total_files"] = $fb_tf;
             $fhdr["total_files_formated"] = number_format($fb_tf);
             $fhdr["total_size"] = $fb_ts;
             $fhdr["total_size_formated"] = number_format($fb_ts);
             $resp = nw_apply_template(NW_TMPL_FB_HEADER, $fhdr);
             $dfile = access_query("fbdescfile", 0);
             unset($fb_desc);
             if (@is_readable($dfcomp = realpath($accessdir . "/" . $dfile))) {
                 if ($descf = file($dfcomp)) {
                     foreach ($descf as $dfline) {
                         if (trim($dfline)) {
                             $didx = trim(substr($dfline, 0, strpos($dfline, " ")));
                             $desc = trim(substr($dfline, strpos($dfline, " ")));
                             $fb_desc[$didx] = $desc;
                         }
                     }
                 }
             }
             // Display each row
             foreach (array_keys($dsort) as $fidx) {
                 $fi = $fb_arr[$fidx];
                 $f = $fi["f"];
                 if ($f == "..") {
                     $dname = nw_apply_template(NW_TMPL_FB_PARENT, array());
                     $tmpdl = explode("/", trim($real_uri, "/"));
                     array_pop($tmpdl);
                     $dlink = url_to_absolute(implode("/", $tmpdl) . "/");
                 } else {
                     $dname = $f;
                     $dlink = url_to_absolute($real_uri . rawurlencode($f) . "/");
                 }
                 if ((substr($f, 0, 1) != "." || $f == ".." || $conf[$vhost]["fbshowdotfiles"][0]) && $f != "." && !($f == ".." && $http_uri == "/")) {
                     $d_row = array();
                     $d_row["icon"] = $icndir;
                     $d_row["link"] = $dlink;
                     $d_row["name"] = $dname;
                     $d_row["date"] = date($dfmt, $fi[9]);
                     $d_row["desc"] = $fb_desc[$f] ? $fb_desc[$f] : "-";
                     $resp .= nw_apply_template(NW_TMPL_FB_ROW_D, $d_row, true);
                 }
             }
             foreach (array_keys($fsort) as $fidx) {
                 $fi = $fb_arr[$fidx];
                 $f = $fi["f"];
                 $fp = pathinfo($f);
                 $t = $mime[strtolower($fp["extension"])];
                 $icnf = $icndef;
                 if ($icons) {
                     foreach ($icons as $key => $val) {
                         if (strpos($t, $key) === 0) {
                             $icnf = $val;
                             break;
                         }
                     }
                 }
                 if (($f[0] != "." || $f == ".." || $conf[$vhost]["fbshowdotfiles"][0]) && $f != "." && !($f == ".." && $http_uri == "/")) {
                     $f_row = array();
                     $f_row["icon"] = $icnf;
                     $f_row["link"] = url_to_absolute($real_uri . rawurlencode($f));
                     $f_row["name"] = $f;
                     $f_row["date"] = date($dfmt, $fi[9]);
                     $f_row["size"] = number_format($fi[7]);
                     $f_row["desc"] = $fb_desc[$f] ? $fb_desc[$f] : "-";
                     $resp .= nw_apply_template(NW_TMPL_FB_ROW_F, $f_row, true);
                 }
             }
             closedir($hnd);
             $resp .= nw_apply_template(NW_TMPL_FB_FOOTER, $fhdr);
         } else {
             $rq_err = 403;
         }
     } else {
         $rq_err = 404;
     }
     if ($resp) {
         $GLOBALS["lf"] =& new static_response($resp);
     }
 }
function _pugpig_package_test_endpoints($endpoints, $timestamp, $tmp_root)
{
    pugpig_interface_output_header("Pugpig - Endpoint Checker");
    print_r("<h1>Checking Pugpig End Points</h1>");
    $tmp_root = str_replace(DIRECTORY_SEPARATOR, '/', $tmp_root);
    $tmp_path = $tmp_root . 'package-' . $timestamp . '/';
    $entries = array();
    $c = 0;
    foreach ($endpoints as $endpoint) {
        if ($endpoint != '') {
            $save_path = $tmp_path . 'opds/' . hash('md5', $endpoint) . '.xml';
            $entries[$endpoint] = $save_path;
        }
    }
    $debug = FALSE;
    $concurrent = 1;
    $entries = _pugpig_package_download_batch("OPDS Feeds", $entries, $debug, $concurrent);
    $format_failures = array();
    foreach (array_keys($entries) as $entry) {
        // print_r($entry . " ---> " . $entries[$entry] . "<br />");
        // Read the ATOM from the file
        $fhandle = fopen($entries[$entry], 'r');
        $opds_atom = fread($fhandle, filesize($entries[$entry]));
        fclose($fhandle);
        $msg = check_xml_is_valid($opds_atom);
        if ($msg != '') {
            $format_failures[$entry] = "OPDS XML Invalid: " . $msg;
            $opds_atom = '';
        }
        $opds_ret = _pugpig_package_parse_opds($opds_atom);
        $edition_roots = array();
        $package_roots = array();
        print_r("<h2>" . $entry . "(" . $opds_ret['title'] . ")</h2>");
        foreach ($opds_ret['editions'] as $edition) {
            $cover = url_to_absolute($entry, $edition['cover']);
            print_r("<img class='cover " . ($edition['free'] ? "free" : "paid") . "' height='60' title='" . $edition['title'] . ': ' . $edition['summary'] . "' src='" . $cover . "' />");
            $edition_root = url_to_absolute($entry, $edition['url']);
            $save_path = $tmp_path . $edition['type'] . '/' . hash('md5', $edition_root) . '.xml';
            $edition_roots[$edition_root] = $save_path;
            if ($edition['type'] == 'package') {
                $package_roots[] = $edition_root;
            }
        }
        $edition_roots = _pugpig_package_download_batch("Edition Roots", $edition_roots, $debug, $concurrent);
        $format_failures = array();
        foreach ($package_roots as $package_root) {
            $save_path = $edition_roots[$package_root];
            $fhandle = fopen($save_path, 'r');
            $package_xml_body = fread($fhandle, filesize($save_path));
            fclose($fhandle);
            $msg = check_xml_is_valid($package_xml_body);
            if ($msg != '') {
                $format_failures[$package_root] = "Package XML Invalid: " . $msg;
                $opds_atom = '';
            }
        }
        // Show package format errros
        _pugpig_package_show_failures($format_failures);
    }
    _pugpig_package_show_failures($format_failures);
}
Example #7
0
/**
 * Make an $url absolute according to $host, if it is not absolute yet.
 *
 * @param string URL
 * @param string Base (including protocol, e.g. 'http://example.com'); autodedected
 * @return string
 */
function url_absolute($url, $base = NULL)
{
    load_funcs('_ext/_url_rel2abs.php');
    if (is_absolute_url($url)) {
        // URL is already absolute
        return $url;
    }
    if (empty($base)) {
        // Detect current page base
        global $Blog, $ReqHost, $base_tag_set, $baseurl;
        if ($base_tag_set) {
            // <base> tag is set
            $base = $base_tag_set;
        } else {
            if (!empty($Blog)) {
                // Get original blog skin, not passed with 'tempskin' param
                $SkinCache =& get_SkinCache();
                if (($Skin = $SkinCache->get_by_ID($Blog->get_skin_ID(), false)) !== false) {
                    $base = $Blog->get_local_skins_url() . $Skin->folder . '/';
                } else {
                    // Skin not set:
                    $base = $Blog->gen_baseurl();
                }
            } else {
                // We are displaying a general page that is not specific to a blog:
                $base = $ReqHost;
            }
        }
    }
    if (($absurl = url_to_absolute($url, $base)) === false) {
        // Return relative URL in case of error
        $absurl = $url;
    }
    return $absurl;
}
Example #8
0
 public function analyse_page($baseurl, $content, &$list)
 {
     global $CFG, $OUTPUT;
     $urls = extract_html_urls($content);
     $images = $urls['img']['src'];
     $pattern = '#img(.+)src="?\'?([[:alnum:]:?=&@/._+-]+)"?\'?#i';
     if (!empty($images)) {
         foreach ($images as $url) {
             $list['list'][] = array('title' => $this->guess_filename($url, ''), 'source' => url_to_absolute($baseurl, $url), 'thumbnail' => url_to_absolute($baseurl, $url), 'thumbnail_height' => 84, 'thumbnail_width' => 84);
         }
     }
 }
Example #9
0
list($successes, $failures) = array(0, 0);
foreach ($tests as $test) {
    if (($r = rel2abs($test['rel'], $base)) == $test['result']) {
        $successes++;
    } else {
        $failures++;
    }
}
$elapsed = microtime() - $start;
echo "rel2abs:         successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n";
# url_to_absolute
$start = microtime();
$base = 'http://a/b/c/d;p?q';
list($successes, $failures) = array(0, 0);
foreach ($tests as $test) {
    if (($r = url_to_absolute($base, $test['rel'])) == $test['result']) {
        $successes++;
    } else {
        $failures++;
    }
}
$elapsed = microtime() - $start;
echo "url_to_absolute: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n";
# phpuri
$start = microtime();
$base = phpUri::parse('http://a/b/c/d;p?q');
list($successes, $failures) = array(0, 0);
foreach ($tests as $test) {
    if (($r = $base->join($test['rel'])) == $test['result']) {
        $successes++;
    } else {
function absolute_url($base_url, $relative_url)
{
    return url_to_absolute($base_url, $relative_url);
}
 public function getAllURLFromHTML()
 {
     $output = array();
     $dom = new DOMDocument();
     @$dom->loadHTML($this->htmlCode);
     $xpath = new DomXPath($dom);
     $hrefs = $xpath->evaluate("/html/body//a");
     for ($i = 0; $i < $hrefs->length; $i++) {
         $h = $this->encodeURL($hrefs->item($i)->getAttribute('href'));
         $h = url_to_absolute($this->url, $h);
         array_push($output, $h);
     }
     return array_unique($output);
 }
Example #12
0
 protected function add_image_to_list($baseurl, $url, &$list)
 {
     if (empty($list['list'])) {
         $list['list'] = array();
     }
     $src = url_to_absolute($baseurl, htmlspecialchars_decode($url));
     foreach ($list['list'] as $image) {
         if ($image['source'] == $src) {
             return;
         }
     }
     $list['list'][] = array('title' => $this->guess_filename($url, ''), 'source' => $src, 'thumbnail' => $src, 'thumbnail_height' => 84, 'thumbnail_width' => 84);
 }
function breadcrumbs($forcedUrl='', $noBold=false){
	global $post;
	$menuItems = wp_get_nav_menu_items('glowne');
		
	$url = "http://".$_SERVER["HTTP_HOST"] . $_SERVER["REQUEST_URI"];
	
	//if (is_paged()){
	//	$url = preg_replace('#&paged=\d*#', '', $url);
	//}

	
	if (  is_single() || is_search()  ){
		$url=get_permalink();		
		
		$bold_suffix = false;
		if (!find_menu_item($menuItems, get_permalink())){
			$url=get_post_type_archive_link( get_post_type($post) );
			$suffix = get_the_title();
			$bold_suffix=true;
		}			
				
	}
	
	if (!empty($forcedUrl)){
		$url=$forcedUrl;
		$suffix='';
		$bold_suffix=false;		
	}

	
	
	$s='';
	$baseUrl = "http://".$_SERVER["HTTP_HOST"]."/";
	foreach($menuItems as $menuItem) {
		$absoluteUrl = url_to_absolute( $baseUrl, $menuItem->url); // bo w menu link moze byc zapisany jako samo "?post_type=xxx";
		if($absoluteUrl == $url ) {
			$id=$menuItem->ID;		
			
			for ($x=0; $x<50; $x++){
			

			
				if (empty($id))
					break;
				$item = get_menu_item($menuItems, $id);
				$parentId = $item->menu_item_parent;
				$title = $item->title;
				$title = htmlentities( $title, ENT_QUOTES ,"UTF-8" );
				
				//if (!empty_link($item->url)){
				//	$title='<a href="'.$item->url.'">'.$title.'</a>';
				//}
				$href=$item->url;
				
				if ( ($href=='') || ($href=='#') )
					//$href='?bread='.$item->ID;
					$href = "#";
				
				if ( ($x==0) && (!$bold_suffix) && (!$noBold)){
					$s = '<span>'.$title.'</span>'. $s;
					//$s = '<span class="breadcrumbs first">'.$title.'</span>'. $s;
				}else{
				
					if($x != 1){
						$separator = '<span> &gt; </span>';
						//$separator = '<span class="breadcrumbs sep"> &gt; </span>';
					}else{
						$separator = '';
					}
				
					$s = ' <a href="'.$href.'">'.$title.' </a>'.$separator. $s;
					//$s = ' <a href="'.$href.'" class="breadcrumbs">'.$title.' </a>'.$separator. $s;
				}
				
				
				$id = $parentId;
				

				
			}
			if (!$noBold)
				$s.= '<span>'.$suffix.'</span>'; //dokladamy tytul postu
				//$s.= '<span class="breadcrumbsBold last">'.$suffix.'</span>'; //dokladamy tytul postu
			//echo $s;			
			break;
		}
	}

if ( (empty($s)) && (empty($forcedUrl)) && (!$noBold) )
	$s='<span>'.get_the_title().'</span>';
	//$s='<span class="breadcrumbsBold single">'.get_the_title().'</span>';
echo $s;
	
}
function showEditionsAndCovers($user, $opds, $edition_file_root)
{
    $entries = array();
    $save_path = pugpig_get_local_save_path($edition_file_root, $opds);
    // Remove the query string
    $save_path = preg_replace('/\\/?\\?.*/', '', $save_path);
    $entries[$opds] = $save_path;
    $entries = _pugpig_package_download_batch("OPDS Feeds", $entries);
    $format_failures = array();
    foreach (array_keys($entries) as $entry) {
        // Read the ATOM from the file
        $fhandle = fopen($entries[$entry], 'r');
        $opds_body = fread($fhandle, filesize($entries[$entry]));
        fclose($fhandle);
        // Parse the OPDS file
        $opds_ret = _pugpig_package_parse_opds($opds_body);
        if (!empty($opds_ret['failure'])) {
            echo "<font color='red'>Not Valid OPDS: " . $opds_ret['failure'] . "</font>";
            return;
        }
        echo "<h1>Your Editions</h1>";
        $covers = array();
        echo "<table>";
        foreach ($opds_ret['editions'] as $edition) {
            echo "<tr>";
            $cover_url = url_to_absolute($opds, $edition['cover']);
            $atom_url = url_to_absolute($opds, $edition['url']);
            $cover_save_path = pugpig_get_local_save_path($edition_file_root, $cover_url);
            // $save_path = $edition_file_root . 'cover/' . hash('md5', $edition['cover']). '.jpg';
            if (count($covers) < 10) {
                $covers[$cover_url] = $cover_save_path;
                // showSingleEdition($user, $opds, $atom_url, $edition_file_root);
            }
            echo "<td><img height='80' src='" . $cover_url . "' /></td>";
            echo "<td>";
            echo "<b>" . $edition['title'] . "</b><br />";
            echo "<i>" . $edition['summary'] . "</i><br />";
            $updated_ts = strtotime($edition['updated']);
            echo _ago($updated_ts) . " ago) - (" . $edition['updated'] . ") ({$updated_ts})<br />";
            echo $edition['draft'] ? "<font color='orange'>DRAFT</font> " : "";
            echo ($edition['free'] ? "free" : "paid") . ($edition['samples'] ? " with samples" : "");
            echo "<br />";
            echo "</td>";
            echo "<td>";
            //echo count($edition['categories']) . " categories";
            foreach ($edition['categories'] as $schema => $term) {
                echo "<b>{$schema}</b>: {$term}<br />";
            }
            echo "</td>";
            echo "<td>";
            if ($edition['type'] == 'atom') {
                $q = http_build_query(array('opds' => $opds, 'atom' => $atom_url, 'user' => $user));
                echo "<a href='?{$q}'>TEST PAGES</a><br />\n";
            } else {
                echo "EPUB<br />";
            }
            echo "<a href='" . url_to_absolute($opds, $atom_url) . "' target='_blank'>FEED</a></br />";
            echo "FLATPLAN</br />";
            echo "PREVIEW IN WEB<br />";
            echo "</tr>";
        }
        echo "</table>";
        $entries = _pugpig_package_download_batch("Valdating Covers (only 10)", $covers);
    }
}
Example #15
0
 public function GetImageUrl($url)
 {
     $maxSize = -1;
     //         $curl = curl_init();
     // curl_setopt($curl, CURLOPT_URL, $url);
     // curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
     // curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10);
     // $str = curl_exec($curl);
     // curl_close($curl);
     // $html= str_get_html($str);
     $html = file_get_html($url);
     if ($html === FALSE) {
         return 'No Image';
     }
     $temp = array();
     $visited = array();
     foreach ($html->find('img') as $e) {
         $src = $e->src;
         if ($src == '') {
             continue;
         }
         // it happens on your test url
         $imageurl = url_to_absolute($url, $src);
         //get image absolute url
         // ignore already seen images, add new images
         if (in_array($imageurl, $visited)) {
             continue;
         }
         $visited[] = $imageurl;
         // get image
         $image = @getimagesize($imageurl);
         // get the rest images width and height
         if ($image[0] * $image[1] > $maxSize) {
             $maxSize = $image[0] * $image[1];
             //compare sizes
             $biggest_img = $imageurl;
         }
     }
     return $biggest_img ? $biggest_img : 'No Image';
 }
function findLinksOnPage($location)
{
    print "loading page " . $location . "\n";
    $html = scraperWiki::scrape($location);
    $dom = new simple_html_dom();
    $dom->load($html);
    foreach ($dom->find("a") as $anchor) {
        $href = $anchor->getAttribute('href');
        $href = preg_replace("/SID-[0-9a-fA-F]+\\-[0-9a-fA-F]+\\//i", "", $href);
        $type = "";
        if (preg_match("/(http|https|ftp):\\/\\/([\\w-\\d]+\\.)+[\\w-\\d]+/i", $href, $m)) {
            if ("bertelsmann-stiftung." == $m[2]) {
                $type = 'internal';
            } else {
                $type = 'external';
            }
        } else {
            if (preg_match("/(\\/[\\w~,;\\-\\.\\/?%&+#=]*)/i", $href)) {
                $type = 'internal';
                $href = url_to_absolute($location, $href);
            } else {
                $type = 'unknown';
            }
        }
        scraperwiki::save(array('href'), array('on_page' => $location, 'href' => $href, 'type' => $type, 'exported' => 0));
        // remember to follow an internal link, if not already done
        if ($type == 'internal') {
            if (!preg_match("/.*\\.mpeg\$/i", $href) && !preg_match("/.*\\.exe\$/i", $href) && !preg_match("/.*\\.pdf\$/i", $href) && !preg_match("/.*\\.mp3\$/i", $href) && !preg_match("/.*\\.jpg\$/i", $href) && !preg_match("/.*\\.zip\$/i", $href) && !preg_match("/.*\\.doc\$/i", $href) && !preg_match("/.*\\.ppt\$/i", $href)) {
                $count = scraperwiki::select("count(*) as c from pages where url like '" . $href . "'");
                $count = $count[0]['c'];
                if ($count > 0) {
                    //print "page already processed (or reserved): " . $href . "\n";
                } else {
                    scraperwiki::save_sqlite(array('url'), array('url' => $href, 'processed' => 0), "pages", 0);
                }
            }
        }
    }
}
Example #17
0
 /**
  * Entifies the tweet using the given entities element, using the provided
  * options.
  *
  * @param array $tweet the json converted to normalised array
  * @param array $options settings to be used when rendering the entities
  * @param array $replacements if specified, the entities and their replacements will be stored to this variable
  * @return the tweet text with entities replaced with hyperlinks
  */
 public static function entify_with_options($tweet, $options = array(), &$replacements = array())
 {
     $default_opts = array('encoding' => 'UTF-8', 'target' => '', 'link_preview' => false);
     $opts = array_merge($default_opts, $options);
     $encoding = mb_internal_encoding();
     mb_internal_encoding($opts['encoding']);
     $keys = array();
     $is_retweet = false;
     if (isset($tweet['retweeted_status'])) {
         $tweet = $tweet['retweeted_status'];
         $is_retweet = true;
     }
     if (!isset($tweet['entities'])) {
         return $tweet['text'];
     }
     $target = !empty($opts['target']) ? ' target="' . $opts['target'] . '"' : '';
     // prepare the entities
     foreach ($tweet['entities'] as $type => $things) {
         foreach ($things as $entity => $value) {
             $tweet_link = "<a href=\"https://twitter.com/{$tweet['user']['screen_name']}/statuses/{$tweet['id']}\"{$target}>{$tweet['created_at']}</a>";
             switch ($type) {
                 case 'hashtags':
                     $href = "<a href=\"https://twitter.com/search?q=%23{$value['text']}\"{$target}>#{$value['text']}</a>";
                     break;
                 case 'user_mentions':
                     $href = "@<a href=\"https://twitter.com/{$value['screen_name']}\" title=\"{$value['name']}\"{$target}>{$value['screen_name']}</a>";
                     break;
                 case 'urls':
                 case 'media':
                     $url = empty($value['expanded_url']) ? $value['url'] : $value['expanded_url'];
                     if ($opts['link_preview']) {
                         // Generate link previews rather than just a URL
                         $html = file_get_html($url);
                         $site_title = $html->find('title', 0);
                         $site_img = $html->find('img', 0);
                         $display = "<div>";
                         if (null != $site_img) {
                             // We have to convert the image from relative to absolute
                             $site_img = url_to_absolute($url, $site_img->src);
                             $display .= "<img style=\"width:30px;vertical-align:middle\" " . "src=\"{$site_img}\"/>";
                         }
                         if (null != $site_title) {
                             $site_title = trim($site_title->innertext);
                             $display .= "<span style=\"font-size: 2em;\">{$site_title}</span></span>";
                         } else {
                             // No title, fall back to normal behavior
                             $display = isset($value['display_url']) ? $value['display_url'] : str_replace('http://', '', $url);
                             // Not all pages are served in UTF-8 so you may need to do this ...
                             $display = "LAME" . urldecode(str_replace('%E2%80%A6', '&hellip;', urlencode($display)));
                         }
                         $display .= "</div>";
                     } else {
                         $display = isset($value['display_url']) ? $value['display_url'] : str_replace('http://', '', $url);
                         // Not all pages are served in UTF-8 so you may need to do this ...
                         $display = urldecode(str_replace('%E2%80%A6', '&hellip;', urlencode($display)));
                     }
                     $href = "<a href=\"{$value['url']}\"{$target}>{$display}</a>";
                     break;
             }
             $keys[$value['indices']['0']] = mb_substr($tweet['text'], $value['indices']['0'], $value['indices']['1'] - $value['indices']['0']);
             $replacements[$value['indices']['0']] = $href;
         }
     }
     ksort($replacements);
     $replacements = array_reverse($replacements, true);
     $entified_tweet = $tweet['text'];
     foreach ($replacements as $k => $v) {
         $entified_tweet = mb_substr($entified_tweet, 0, $k) . $v . mb_substr($entified_tweet, $k + strlen($keys[$k]));
     }
     $replacements = array('replacements' => $replacements, 'keys' => $keys);
     mb_internal_encoding($encoding);
     return $entified_tweet;
 }
Example #18
0
/**
 * Uniformly cleans a link to avoid duplicates
 *
 * 1. Changes relative links to absolute (/bar to http://www.foo.com/bar)
 * 2. Removes anchor tags (foo.html#bar to foo.html)
 * 3. Adds trailing slash if directory (foo.com/bar to foo.com/bar/)
 * 4. Adds www if there is not a subdomain (foo.com to www.foo.com but not bar.foo.com)
 *
 * @params string $link link to clean
 * @parmas string $dir directory of parent (linking) page
 * @return strin cleaned link
 */
function clean_link($link, $dir)
{
    $link = url_to_absolute($dir, $link);
    //make them absolute, not relative
    if (stripos($link, '#') != FALSE) {
        $link = substr($link, 0, stripos($link, '#'));
    }
    //remove anchors
    if (!preg_match('#(^http://(.*)/$)|http://(.*)/(.*)\\.([A-Za-z0-9]+)|http://(.*)/([^\\?\\#]*)(\\?|\\#)([^/]*)#i', $link)) {
        $link .= '/';
    }
    $link = preg_replace('#http://([^.]+).([a-zA-z]{3})/#i', 'http://www.$1.$2/', $link);
    return $link;
}
Example #19
0
 function GetImage($document, $url)
 {
     $meta_og_img = '';
     foreach ($document->getElementsByTagName('meta') as $meta) {
         //If the property attribute of the meta tag is og:image
         if ($meta->getAttribute('property') == 'og:image') {
             //Assign the value from content attribute to $meta_og_img
             $meta_og_img = $meta->getAttribute('content');
             // trigger_error($meta_og_img);
             $images[$meta_og_img] = array('src' => $meta_og_img);
         }
     }
     foreach ($document->getElementsByTagName('img') as $img) {
         $image = array('src' => @url_to_absolute($url, $img->getAttribute('src')));
         if (!$image['src']) {
             continue;
         }
         if (!$this->endsWith($image['src'], "gif") && $meta_og_img != $image) {
             $images[$image['src']] = $image;
         }
     }
     if (isset($images)) {
         return $images;
     } else {
         return 0;
     }
 }
                $response['mobile']['iitc_version'] = $header['@version'];
            }
        } else {
            $response['error'] = 'Failed to find .apk file ' . $apkfile;
        }
    } else {
        // desktop - .user.js scripts
        // load main script version
        $iitc_details = loadUserScriptHeader("{$dir}/total-conversion-build.user.js");
        $response['iitc'] = array('version' => $iitc_details['@version'], 'downloadUrl' => url_to_absolute($pageurl, "{$dir}/total-conversion-build.user.js"), 'pageUrl' => url_to_absolute($pageurl, $info['web']));
        // and now the plugins
        $response['plugins'] = array();
        foreach (glob("{$dir}/plugins/*.user.js") as $path) {
            $basename = basename($path, ".user.js");
            $details = loadUserScriptHeader($path);
            $response['plugins'][$basename] = array('version' => $details['@version'], 'downloadUrl' => url_to_absolute($pageurl, "{$dir}/plugins/{$basename}.user.js"), 'pageUrl' => url_to_absolute($pageurl, $info['web'] . "#plugin-{$basename}"));
        }
    }
} else {
    $response['error'] = 'Unsupported build for version check';
}
$data = json_encode($response);
# send the response - allow either jsonp (using a 'callback' parameter), or regular json
if (array_key_exists('callback', $_GET)) {
    header('Content-Type: text/javascript; charset=utf8');
    header('Access-Control-Allow-Origin: *');
    header('Access-Control-Max-Age: 3628800');
    header('Access-Control-Allow-Methods: GET, POST');
    $callback = $_GET['callback'];
    echo $callback . '(' . $data . ');';
} else {
Example #21
0
 function _parse_external_js($url)
 {
     $url = _make_clean_url($url);
     if (filter_var($url, FILTER_VALIDATE_URL) === FALSE) {
         return FALSE;
     } else {
         $url_header = _is_url_exist($url);
         if ($url_header) {
             /*_request_timeout(3);
               $html = @file_get_contents($url);*/
             $html = @file_get_html($url_header, false, stream_context_create(array('http' => array('timeout' => 3))));
             if ($html) {
                 $external_js = array();
                 $external_js_count = 0;
                 $external_js_count_max = 20;
                 foreach ($html->find('script') as $script) {
                     $src = $script->src;
                     if ($src != null) {
                         if (strpos($src, '//') === FALSE) {
                             /*if($src[0] === '/') {
                                 $src = $url.$src;
                               }
                               elseif($src[0] != '.') {
                                 $src = $url.'/'.$src;
                               }
                               else {
                                 $src = null;
                               }*/
                             if (isset($url_header[strlen($url_header) - 1]) && $url_header[strlen($url_header) - 1] !== '/') {
                                 $url_header .= '/';
                             }
                             $src = url_to_absolute($url_header, $src);
                         }
                         if (_is_url_exist($src)) {
                             $external_js[] = $src;
                         }
                     }
                     $external_js_count++;
                     if ($external_js_count === $external_js_count_max) {
                         unset($external_js_count, $external_js_count_max);
                         break;
                     }
                 }
                 $html = NULL;
                 unset($url_header);
                 if (!empty($external_js)) {
                     return $external_js;
                 } else {
                     unset($external_js);
                     return FALSE;
                 }
             } else {
                 $html = NULL;
                 unset($url_header);
                 return FALSE;
             }
         } else {
             unset($url_header);
             return FALSE;
         }
     }
 }
Example #22
0
 // options
 $biggestImage = 'path to "no image found" image';
 // Is returned when no images are found.
 // process
 $maxSize = -1;
 $visited = array();
 $url = $link1;
 $html = file_get_html($url);
 // loop images
 foreach ($html->find('img') as $element) {
     $src = $element->src;
     if ($src == '') {
         continue;
     }
     // it happens on your test url
     $imageurl = url_to_absolute($url, $src);
     //get image absolute url
     // ignore already seen images, add new images
     if (in_array($imageurl, $visited)) {
         continue;
     }
     $visited[] = $imageurl;
     // get image
     $image = @getimagesize($imageurl);
     // get the rest images width and height
     if ($image[0] * $image[1] > $maxSize) {
         $maxSize = $image[0] * $image[1];
         //compare sizes
         $biggest_img = $imageurl;
     }
 }
Example #23
0
 public static function absolutizeUrl($sBaseUrl, $sUrl)
 {
     $sUrl = str_replace('&amp;', '&', $sUrl);
     return url_to_absolute($sBaseUrl, $sUrl);
 }
 /**
  * urlFix
  * 
  * This function adds in the url in $baseURL if the input of $url looks to be a relative URL
  * 
  * @param string $baseURL
  * @param string $url
  * @return the fixed url
  */
 function urlFix($baseURL, $url)
 {
     return url_to_absolute($baseURL, $url);
 }
Example #25
0
//			/images/facebook.png
//			/images/twitter.png
//			/images/email.16x16.gif
//			http://feedproxy.google.com/~fc/ElectricToolboxBlog?bg=ffaf5a&amp;fg=333333&amp;anim=0
//			/images/gui/bottom.gif
?>

<?php 
//Note that the actual path for the image as it is in the HTML is returned and that paths are not resolved to be absolute.
//Resolving the paths So that it's possible to download the images, the relative URLs need to be turned into absolute URLs. I found a library to do this from the blog for Nadeau Software Consulting but their site no longer appears to be available, so have made the library available for download here.
//Download and extract the zipped file url_to_absolute.zip which contains three PHP files. The url_to_absolute.php file requires the other two files.
//Here's the modified version of the above code which will now resolved all image URLs to absolute URLs which can then be used to download the image:
require_once 'c:/wamp/www/PHP/simplehtmldom/simple_html_dom.php';
require_once 'c:/wamp/www/PHP/url_to_absolute/url_to_absolute.php';
$url = 'http://www.electrictoolbox.com/php-get-meta-tags-html-file/';
$html = file_get_html($url);
foreach ($html->find('img') as $element) {
    echo url_to_absolute($url, $element->src), "\n";
}
//This will now output the following from the same page, with absolute URLs:
//			http://www.electrictoolbox.com/images/icons/php.gif
//			http://manage.aff.biz/42/2882/189/
//			http://static.addtoany.com/buttons/subscribe_171_16.gif
//			http://static.addtoany.com/buttons/share_save_171_16.gif
//			http://www.electrictoolbox.com/images/gui/logo.gif
//			http://www.electrictoolbox.com/images/feed.16x16.gif
//			http://www.electrictoolbox.com/images/facebook.png
//			http://www.electrictoolbox.com/images/twitter.png
//			http://www.electrictoolbox.com/images/email.16x16.gif
//			http://feedproxy.google.com/%7Efc/ElectricToolboxBlog?bg%3Dffaf5a%26amp%3Bfg%3D333333%26amp%3Banim%3D0
//			http://www.electrictoolbox.com/images/gui/bottom.gif
 function main()
 {
     global $http_uri, $rq_file, $conf, $docroot, $htreq_headers, $mime, $mime_enc, $out_add_headers, $pri_err;
     if (access_query("reflectrewriting", 0)) {
         global $real_uri;
     }
     // quick skip
     $enabled = strtolower(trim(access_query("multiviews", 0)));
     if ($enabled === "0" || $enabled == "off" || file_exists($docroot . $http_uri) && !is_dir($docroot . $http_uri)) {
         return;
     }
     // determine file name to be worked on
     $file = $http_uri;
     if ($last_slash = strrpos($file, "/")) {
         $act_path = substr($file, 0, $last_slash + 1);
         $file = substr($file, $last_slash + 1);
     }
     if ($file) {
         $allowed_filenames = array($file);
     } else {
         $allowed_filenames = explode(" ", access_query("directoryindex", 0));
     }
     // find files with same basename and different extensions
     $alternative_files = array();
     $dir = opendir($docroot . $act_path);
     while ($filename = readdir($dir)) {
         foreach ($allowed_filenames as $filebn) {
             if (!$filebn) {
                 continue;
             }
             $filebn .= ".";
             if (substr($filename, 0, strlen($filebn)) == $filebn) {
                 $alternative_files[] = $filename;
             }
         }
     }
     closedir($dir);
     if (empty($alternative_files)) {
         return 2;
     }
     // quick skip2
     #print_r($alternative_files);
     // fetch priorities
     $qualities = array_merge(array("php" => 0.75, "shtml" => 0.72, "html" => 0.71, "xhtml" => 0.7, "png" => 0.33, "jpeg" => 0.32, "gif" => 0.31), $this->parseQualities(implode(", ", access_query("otherpriority")), 1.3, 1.1), $this->parseQualities(@$htreq_headers["ACCEPT-FEATURES"], 1.15, 1), $this->parseQualities(strtr(access_query("languagepriority", 0), " ", ","), 0.8, 1.2), $this->parseQualities(@$htreq_headers["ACCEPT-LANGUAGE"], 1, 1.03), $this->parseQualities(@$htreq_headers["ACCEPT"], 1, 1.02), $this->parseQualities(strpos(@$htreq_headers["ACCEPT-ENCODING"], "gzip") !== false ? "gz" : "", 1.5, 1));
     // other algorithm flags
     $accept_all = strpos(@$htreq_headers["ACCEPT"], "*/*") !== false;
     $http10 = $GLOBALS['http_version'] < "1.1";
     $agent_negotiate = strpos($htreq_headers["NEGOTIATE"], "vlist") !== false || strpos($htreq_headers["NEGOTIATE"], "trans") !== false;
     // will contain variants and their attributes
     $alternates = array();
     // go thru filename extensions, and sum qualities
     foreach ($alternative_files as $filename) {
         $q_mime = -1;
         $q_enc = $q_lang = $q_features = $q_else = +1;
         $file_extensions = array_slice(explode('.', $filename), 1);
         foreach ($file_extensions as $ext) {
             $ext_q = $qualities[$ext];
             if (empty($ext_q) && $ext_q !== 0) {
                 if ($accept_all) {
                     $ext_q = 0.1;
                 } else {
                     $ext_q = 0.001;
                 }
             }
             if (@$mime[$ext]) {
                 $alternates[$filename]["type"] = $mime[$ext];
                 $q_mime += $ext_q + ($q_mime < 0 ? +1 : +0);
             } elseif (@$mime_enc[$ext]) {
                 $alternates[$filename]["encoding"] = $mime_enc[$ext];
                 $q_enc *= $ext_q;
             } elseif (strlen($ext) == 2) {
                 $alternates[$filename]["language"] = $ext;
                 $q_lang = $ext_q;
             } else {
                 $q_else *= 0.9;
             }
             if ($feature = $this->negotiate_features[$ext]) {
                 $alternates[$filename]["feature"] .= " {$feature};+1.2-0.9";
                 $q_features *= $qualities[$feature] ? 1.2 : 0.9;
                 $q_else /= 0.9;
             }
         }
         if ($q_mime < 0) {
             $q_mime = 0.005;
         }
         $alternates[$filename]["q"] = $q_mime * $q_enc * $q_lang * $q_features * $q_else;
     }
     // sort
     uasort($alternates, 'mod_multiviews_uarsort_by_q');
     #print_r($alternates);
     // return selected variant
     list($file) = array_keys($alternates);
     $real_uri = $http_uri = $act_path . $file;
     $rq_file = pathinfo($http_uri);
     // fallback output
     $out_add_headers["TCN"] = "adhoc";
     $out_add_headers["Vary"] = "negotiate";
     // server-driven negotiation
     if (!$agent_negotiation) {
         $out_add_headers["Content-Location"] = $file;
         if (!$http10) {
             $out_add_headers["TCN"] = "choice";
         }
         $out_add_headers["Vary"] = "negotiate, accept, accept-language, accept-features";
         foreach ($mime_enc as $ext => $encoding) {
             // report file's encoding
             if (strpos($file, ".{$ext}")) {
                 $out_add_headers["Content-Encoding"] = $encoding;
                 $GLOBALS["out_encoded"] = true;
             }
         }
     } else {
         $out_add_headers["TCN"] = "list";
         $GLOBALS["pri_err"] = $http10 ? 200 : 300;
         // HTTP 300 Choose Yourself
         $GLOBALS["out_contenttype"] = "text/html";
         $GLOBALS["add_errmsg"] = "The document you requested exists in different variants, and your browser gives you the opportunity to select one of them (or just does not support transparent content negotiation):<BR><UL>";
         foreach ($alternative_files as $f => $q) {
             $GLOBALS["add_errmsg"] .= '<LI><A HREF="' . url_to_absolute($act_path . $f) . '">' . $f . '</A></LI>';
         }
         $GLOBALS["add_errmsg"] .= "</UL>";
         if ($http10) {
             $out_add_headers["Refresh"] = "10; URL=" . url_to_absolute($http_uri);
         }
     }
     // add alternates-header
     $ah = " ";
     foreach ($alternates as $filename => $a) {
         $qstr = substr($a["q"], 0, 5);
         if (strpos($qstr, ".") === false) {
             $qstr .= ".";
         }
         while (strlen($qstr) < 5) {
             $qstr .= "0";
         }
         $ah .= '{ "' . $filename . '" ' . $qstr;
         unset($a["q"]);
         foreach ($a as $desc => $value) {
             $ah .= ' {' . $desc . ' ' . $value . '}';
         }
         $ah .= " },\n\t";
     }
     $ah .= "proxy-rvsa=1.0";
     $out_add_headers["Alternates"] = $ah;
     if ($GLOBALS['path_info']) {
         // may help(?)
         $out_add_headers["Content-Base"] = "/" . ($act_path ? "{$act_path}/" : "");
     }
     if ($http10) {
         $out_add_headers["Vary"] = "*";
     }
     if (is_dir($docroot . $http_uri) && !is_file($docroot . $http_uri . "/index.html") || strpos($file, ".var") !== false) {
         // very rare error / 506 Variant Also Negotiates
         $GLOBALS["pri_err"] = 506;
     }
 }
function _pugpig_package_get_asset_urls_from_manifest($manifest_contents, $entries = array(), $base_url, $mode = 'all')
{
    $active = false;
    $last_line_was_ad = false;
    if ($mode != 'theme') {
        $active = true;
    }
    $found_manifest_start = false;
    $lines = preg_split('/\\n/m', $manifest_contents, 0, PREG_SPLIT_NO_EMPTY);
    foreach ($lines as $line) {
        // Temporary hacks to determine what is a theme asset
        // These will work with our Drupal and WordPress connector only
        // In the longer term, we need a better way to mark assets as Theme assets
        if (!$last_line_was_ad && startsWith($line, '# Theme assets')) {
            if ($mode == 'theme') {
                $active = true;
            }
            if ($mode == 'page') {
                $active = false;
            }
        }
        if (startsWith($line, '# Ad Package Zip Contents') || startsWith($line, '# Package Zip Contents')) {
            $last_line_was_ad = true;
        } else {
            $last_line_was_ad = false;
        }
        preg_match('/\\s*([^#]*)/', $line, $matches);
        if (count($matches) > 1) {
            $m = trim($matches[1]);
            // Ignore all lines until we find the "CACHE MANIFEST one"
            // Can't do this as it is currently used to scan partial manifests too
            /*
            if ($m == "CACHE MANIFEST") $found_manifest_start = TRUE;
            if (!$found_manifest_start) {
              continue;
            }
            */
            if (!empty($m) && !in_array($m, $entries) && substr($m, 0, strlen('CACHE')) != 'CACHE' && substr($m, 0, strlen('NETWORK')) != 'NETWORK' && $m != '*') {
                if (!startsWith($m, "/")) {
                    // We have a relative URL
                    $m = pugpig_strip_domain(url_to_absolute($base_url, $m));
                }
                if (!empty($m)) {
                    if ($active) {
                        $entries[] = $m;
                    }
                }
            }
        }
    }
    return $entries;
}