function getURLFromHTML($originalURL, $htmlCode, $urlRegexList, &$urlList) { // $mainURLObj = new Net_URL2($originalURL); foreach ($urlRegexList as $ur) { $matches = array(); preg_match_all($ur, $htmlCode, $matches); if (isset($matches[1])) { // convert matched links to full url for ($i = 0; $i < count($matches[1]); $i++) { $link = urlencode($matches[1][$i]); $link = str_replace('%2F', '/', $link); $link = str_replace('%3F', '?', $link); // $urlObj = new Net_URL2($link); // relative url, add scheme, host and port into it $matches[1][$i] = urldecode(url_to_absolute($originalURL, $link)); // if ($urlObj -> getScheme() == false) { // $urlObj -> setScheme($mainURLObj -> getScheme()); // $urlObj -> setHost($mainURLObj -> getHost()); // $urlObj -> setPort($mainURLObj -> getPort()); // } // // query doesn't need to be encoded // $urlObj -> setQuery(urldecode($urlObj -> getQuery())); // $matches[1][$i] = (string)$urlObj; } $urlList = array_merge($urlList, $matches[1]); } $urlList = array_unique($urlList); } }
function iitcMobileDownload($apkfile) { $version = getMobileVersion($apkfile); $apk_version = $version['apk_version']; $iitc_version = preg_replace('/^(\\d+\\.\\d+\\.\\d+)\\.(\\d{8}\\.\\d{1,6})/', '\\1<small class="text-muted">.\\2</small>', $version['iitc_version']); # we need an absolute link for the QR Code # get the URL of this page itself $pageurl = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] ? "https" : "http") . "://" . $_SERVER['HTTP_HOST'] . $_SERVER['REQUEST_URI']; $apkurl = url_to_absolute($pageurl, $apkfile); ?> <div> <img style="float: right; margin: 10px;" src="https://chart.googleapis.com/chart?cht=qr&chs=120x120&chld=L|2&chl=<?php print urlencode($apkurl); ?> " alt="QR Code for download"> <p> IITC Mobile version <?php print $apk_version; ?> , with IITC version <?php print $iitc_version; ?> </p> <p> <a style="margin-right: 1em;" onclick="if(track){track('mobile','download','<?php print $apkfile; ?> ');}" class="btn btn-large btn-primary" href="<?php print $apkfile; ?> ">Download</a> or scan the QR Code </p> </div> <div style="clear: both"></div> <?php }
function check_d($path_arr, $dir = "", $i = 0) { global $docroot, $docroot_prefix, $url_fa; $handle = opendir(realpath($docroot . $dir)); while ($f = readdir($handle)) { if ($f != "." && $f != "..") { $t[$f] = levenshtein($path_arr[$i], $f); } } closedir($handle); asort($t); while (list($rep, $val) = each($t)) { if ($val >= 0 && $val <= 1) { if ($i + 1 == count($path_arr)) { $url_fa[] = url_to_absolute($docroot_prefix . $dir . $rep); } else { if (is_dir($docroot . $dir . $rep)) { $this->check_d($path_arr, $dir . $rep . "/", $i + 1); } } } } }
function get_resource($url) { $resource = ''; if (!empty($url)) { $response = get_request($url); if (!function_exists('str_get_html')) { require_once dirname(__FILE__) . '/../vendor/simple-html-dom/simple-html-dom.php'; } if (!function_exists('url_to_absolute')) { require_once dirname(__FILE__) . '/../vendor/url-to-absolute/url-to-absolute.php'; } $url_parts = parse_url($url); //$body = wp_remote_retrieve_body($response); $body = $response; $html = str_get_html($body); foreach ($html->find('a, link') as $element) { if (isset($element->href) && $element->href[0] != "#") { $element->href = url_to_absolute($url, $element->href); } } foreach ($html->find('img, script') as $element) { if (isset($element->src)) { $element->src = url_to_absolute($url, $element->src); } } foreach ($html->find('form') as $element) { if (isset($element->action)) { $element->action = url_to_absolute($url, $element->action); } else { $element->action = $url; } } $resource = $html->save(); } return $resource; }
function main() { global $http_uri, $docroot, $conf, $vhost, $rq_err, $out_contenttype, $real_uri, $out_add_headers, $accessdir, $mime, $query_string; foreach (access_query("fbiconbytype") as $icndef) { $ic = explode(" ", $icndef); $icons[trim($ic[1])] = trim($ic[0]); } $icndef = access_query("fbicondefault", 0); if (!($icndir = access_query("fbicondirectory", 0))) { $icndir = $icndef; } if ($http_uri[strlen($http_uri) - 1] != "/") { $http_uri .= "/"; } if (access_query("filebrowser", 0)) { if (@is_readable($docroot . $http_uri)) { $dfmt = access_query("fbdateformat", 0) or $dfmt = "d-M-Y H:i:s"; $rq_err = 200; $out_contenttype = "text/html"; // Generate directory listing $hnd = opendir(realpath($docroot . $http_uri)); unset($fb_arr); unset($fsort); while ($f = readdir($hnd)) { $fi = stat($docroot . $http_uri . $f); $fi["isdir"] = is_dir($docroot . $http_uri . $f); $fi["f"] = $f; $fb_arr[$f] = $fi; if (!$fi["isdir"]) { $fb_ts += $fi[7]; $fb_tf++; } } if ($fbstmp = access_query("fbsortorder", 0)) { $fbsort = explode(" ", $fbstmp); } else { $fbsort = array("name"); } parse_str($query_string, $ptmp); if (count($ptmp)) { if ($ptmp["sort"]) { $fbsort[0] = $ptmp["sort"]; } if ($ptmp["order"]) { $fbsort[1] = $ptmp["order"]; } } switch ($fbsort[0]) { case "date": $sortidx = 9; break; case "size": $sortidx = 7; break; case "name": default: $sortidx = "f"; } $dsort = $fsort = array(); foreach ($fb_arr as $fstmp) { if (!$fstmp["isdir"]) { $fsort[$fstmp["f"]] = $fstmp[$sortidx]; } else { if ($fstmp["f"] != "..") { $dsort[$fstmp["f"]] = $fstmp[$sortidx]; } else { $has_parent = $fstmp[$sortidx]; } } } if ($fbsort[1] == "desc") { arsort($fsort); arsort($dsort); } else { asort($fsort); asort($dsort); } if ($has_parent) { $dsort = array_reverse($dsort); $dsort[".."] = $has_parent; $dsort = array_reverse($dsort); } // Do other processing if (@is_readable($wfn = $docroot . $http_uri . $conf[$vhost]["fbwelcomefile"][0])) { $wfc = implode("<br>", file($wfn)); $welcome_formated = "<br><font size=\"1\" face=\"fixedsys\">" . $wfc . "</font><br><br>"; } else { $welcome_formated = ""; } $fhdr = array(); $fhdr["dir_name"] = "/" . $real_uri; $fhdr["welcome"] = $welcome_formated; $fhdr["total_files"] = $fb_tf; $fhdr["total_files_formated"] = number_format($fb_tf); $fhdr["total_size"] = $fb_ts; $fhdr["total_size_formated"] = number_format($fb_ts); $resp = nw_apply_template(NW_TMPL_FB_HEADER, $fhdr); $dfile = access_query("fbdescfile", 0); unset($fb_desc); if (@is_readable($dfcomp = realpath($accessdir . "/" . $dfile))) { if ($descf = file($dfcomp)) { foreach ($descf as $dfline) { if (trim($dfline)) { $didx = trim(substr($dfline, 0, strpos($dfline, " "))); $desc = trim(substr($dfline, strpos($dfline, " "))); $fb_desc[$didx] = $desc; } } } } // Display each row foreach (array_keys($dsort) as $fidx) { $fi = $fb_arr[$fidx]; $f = $fi["f"]; if ($f == "..") { $dname = nw_apply_template(NW_TMPL_FB_PARENT, array()); $tmpdl = explode("/", trim($real_uri, "/")); array_pop($tmpdl); $dlink = url_to_absolute(implode("/", $tmpdl) . "/"); } else { $dname = $f; $dlink = url_to_absolute($real_uri . rawurlencode($f) . "/"); } if ((substr($f, 0, 1) != "." || $f == ".." || $conf[$vhost]["fbshowdotfiles"][0]) && $f != "." && !($f == ".." && $http_uri == "/")) { $d_row = array(); $d_row["icon"] = $icndir; $d_row["link"] = $dlink; $d_row["name"] = $dname; $d_row["date"] = date($dfmt, $fi[9]); $d_row["desc"] = $fb_desc[$f] ? $fb_desc[$f] : "-"; $resp .= nw_apply_template(NW_TMPL_FB_ROW_D, $d_row, true); } } foreach (array_keys($fsort) as $fidx) { $fi = $fb_arr[$fidx]; $f = $fi["f"]; $fp = pathinfo($f); $t = $mime[strtolower($fp["extension"])]; $icnf = $icndef; if ($icons) { foreach ($icons as $key => $val) { if (strpos($t, $key) === 0) { $icnf = $val; break; } } } if (($f[0] != "." || $f == ".." || $conf[$vhost]["fbshowdotfiles"][0]) && $f != "." && !($f == ".." && $http_uri == "/")) { $f_row = array(); $f_row["icon"] = $icnf; $f_row["link"] = url_to_absolute($real_uri . rawurlencode($f)); $f_row["name"] = $f; $f_row["date"] = date($dfmt, $fi[9]); $f_row["size"] = number_format($fi[7]); $f_row["desc"] = $fb_desc[$f] ? $fb_desc[$f] : "-"; $resp .= nw_apply_template(NW_TMPL_FB_ROW_F, $f_row, true); } } closedir($hnd); $resp .= nw_apply_template(NW_TMPL_FB_FOOTER, $fhdr); } else { $rq_err = 403; } } else { $rq_err = 404; } if ($resp) { $GLOBALS["lf"] =& new static_response($resp); } }
function _pugpig_package_test_endpoints($endpoints, $timestamp, $tmp_root) { pugpig_interface_output_header("Pugpig - Endpoint Checker"); print_r("<h1>Checking Pugpig End Points</h1>"); $tmp_root = str_replace(DIRECTORY_SEPARATOR, '/', $tmp_root); $tmp_path = $tmp_root . 'package-' . $timestamp . '/'; $entries = array(); $c = 0; foreach ($endpoints as $endpoint) { if ($endpoint != '') { $save_path = $tmp_path . 'opds/' . hash('md5', $endpoint) . '.xml'; $entries[$endpoint] = $save_path; } } $debug = FALSE; $concurrent = 1; $entries = _pugpig_package_download_batch("OPDS Feeds", $entries, $debug, $concurrent); $format_failures = array(); foreach (array_keys($entries) as $entry) { // print_r($entry . " ---> " . $entries[$entry] . "<br />"); // Read the ATOM from the file $fhandle = fopen($entries[$entry], 'r'); $opds_atom = fread($fhandle, filesize($entries[$entry])); fclose($fhandle); $msg = check_xml_is_valid($opds_atom); if ($msg != '') { $format_failures[$entry] = "OPDS XML Invalid: " . $msg; $opds_atom = ''; } $opds_ret = _pugpig_package_parse_opds($opds_atom); $edition_roots = array(); $package_roots = array(); print_r("<h2>" . $entry . "(" . $opds_ret['title'] . ")</h2>"); foreach ($opds_ret['editions'] as $edition) { $cover = url_to_absolute($entry, $edition['cover']); print_r("<img class='cover " . ($edition['free'] ? "free" : "paid") . "' height='60' title='" . $edition['title'] . ': ' . $edition['summary'] . "' src='" . $cover . "' />"); $edition_root = url_to_absolute($entry, $edition['url']); $save_path = $tmp_path . $edition['type'] . '/' . hash('md5', $edition_root) . '.xml'; $edition_roots[$edition_root] = $save_path; if ($edition['type'] == 'package') { $package_roots[] = $edition_root; } } $edition_roots = _pugpig_package_download_batch("Edition Roots", $edition_roots, $debug, $concurrent); $format_failures = array(); foreach ($package_roots as $package_root) { $save_path = $edition_roots[$package_root]; $fhandle = fopen($save_path, 'r'); $package_xml_body = fread($fhandle, filesize($save_path)); fclose($fhandle); $msg = check_xml_is_valid($package_xml_body); if ($msg != '') { $format_failures[$package_root] = "Package XML Invalid: " . $msg; $opds_atom = ''; } } // Show package format errros _pugpig_package_show_failures($format_failures); } _pugpig_package_show_failures($format_failures); }
/** * Make an $url absolute according to $host, if it is not absolute yet. * * @param string URL * @param string Base (including protocol, e.g. 'http://example.com'); autodedected * @return string */ function url_absolute($url, $base = NULL) { load_funcs('_ext/_url_rel2abs.php'); if (is_absolute_url($url)) { // URL is already absolute return $url; } if (empty($base)) { // Detect current page base global $Blog, $ReqHost, $base_tag_set, $baseurl; if ($base_tag_set) { // <base> tag is set $base = $base_tag_set; } else { if (!empty($Blog)) { // Get original blog skin, not passed with 'tempskin' param $SkinCache =& get_SkinCache(); if (($Skin = $SkinCache->get_by_ID($Blog->get_skin_ID(), false)) !== false) { $base = $Blog->get_local_skins_url() . $Skin->folder . '/'; } else { // Skin not set: $base = $Blog->gen_baseurl(); } } else { // We are displaying a general page that is not specific to a blog: $base = $ReqHost; } } } if (($absurl = url_to_absolute($url, $base)) === false) { // Return relative URL in case of error $absurl = $url; } return $absurl; }
public function analyse_page($baseurl, $content, &$list) { global $CFG, $OUTPUT; $urls = extract_html_urls($content); $images = $urls['img']['src']; $pattern = '#img(.+)src="?\'?([[:alnum:]:?=&@/._+-]+)"?\'?#i'; if (!empty($images)) { foreach ($images as $url) { $list['list'][] = array('title' => $this->guess_filename($url, ''), 'source' => url_to_absolute($baseurl, $url), 'thumbnail' => url_to_absolute($baseurl, $url), 'thumbnail_height' => 84, 'thumbnail_width' => 84); } } }
list($successes, $failures) = array(0, 0); foreach ($tests as $test) { if (($r = rel2abs($test['rel'], $base)) == $test['result']) { $successes++; } else { $failures++; } } $elapsed = microtime() - $start; echo "rel2abs: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n"; # url_to_absolute $start = microtime(); $base = 'http://a/b/c/d;p?q'; list($successes, $failures) = array(0, 0); foreach ($tests as $test) { if (($r = url_to_absolute($base, $test['rel'])) == $test['result']) { $successes++; } else { $failures++; } } $elapsed = microtime() - $start; echo "url_to_absolute: successes -> {$successes}, failures => {$failures}, elapsed time: {$elapsed}\n"; # phpuri $start = microtime(); $base = phpUri::parse('http://a/b/c/d;p?q'); list($successes, $failures) = array(0, 0); foreach ($tests as $test) { if (($r = $base->join($test['rel'])) == $test['result']) { $successes++; } else {
function absolute_url($base_url, $relative_url) { return url_to_absolute($base_url, $relative_url); }
public function getAllURLFromHTML() { $output = array(); $dom = new DOMDocument(); @$dom->loadHTML($this->htmlCode); $xpath = new DomXPath($dom); $hrefs = $xpath->evaluate("/html/body//a"); for ($i = 0; $i < $hrefs->length; $i++) { $h = $this->encodeURL($hrefs->item($i)->getAttribute('href')); $h = url_to_absolute($this->url, $h); array_push($output, $h); } return array_unique($output); }
protected function add_image_to_list($baseurl, $url, &$list) { if (empty($list['list'])) { $list['list'] = array(); } $src = url_to_absolute($baseurl, htmlspecialchars_decode($url)); foreach ($list['list'] as $image) { if ($image['source'] == $src) { return; } } $list['list'][] = array('title' => $this->guess_filename($url, ''), 'source' => $src, 'thumbnail' => $src, 'thumbnail_height' => 84, 'thumbnail_width' => 84); }
function breadcrumbs($forcedUrl='', $noBold=false){ global $post; $menuItems = wp_get_nav_menu_items('glowne'); $url = "http://".$_SERVER["HTTP_HOST"] . $_SERVER["REQUEST_URI"]; //if (is_paged()){ // $url = preg_replace('#&paged=\d*#', '', $url); //} if ( is_single() || is_search() ){ $url=get_permalink(); $bold_suffix = false; if (!find_menu_item($menuItems, get_permalink())){ $url=get_post_type_archive_link( get_post_type($post) ); $suffix = get_the_title(); $bold_suffix=true; } } if (!empty($forcedUrl)){ $url=$forcedUrl; $suffix=''; $bold_suffix=false; } $s=''; $baseUrl = "http://".$_SERVER["HTTP_HOST"]."/"; foreach($menuItems as $menuItem) { $absoluteUrl = url_to_absolute( $baseUrl, $menuItem->url); // bo w menu link moze byc zapisany jako samo "?post_type=xxx"; if($absoluteUrl == $url ) { $id=$menuItem->ID; for ($x=0; $x<50; $x++){ if (empty($id)) break; $item = get_menu_item($menuItems, $id); $parentId = $item->menu_item_parent; $title = $item->title; $title = htmlentities( $title, ENT_QUOTES ,"UTF-8" ); //if (!empty_link($item->url)){ // $title='<a href="'.$item->url.'">'.$title.'</a>'; //} $href=$item->url; if ( ($href=='') || ($href=='#') ) //$href='?bread='.$item->ID; $href = "#"; if ( ($x==0) && (!$bold_suffix) && (!$noBold)){ $s = '<span>'.$title.'</span>'. $s; //$s = '<span class="breadcrumbs first">'.$title.'</span>'. $s; }else{ if($x != 1){ $separator = '<span> > </span>'; //$separator = '<span class="breadcrumbs sep"> > </span>'; }else{ $separator = ''; } $s = ' <a href="'.$href.'">'.$title.' </a>'.$separator. $s; //$s = ' <a href="'.$href.'" class="breadcrumbs">'.$title.' </a>'.$separator. $s; } $id = $parentId; } if (!$noBold) $s.= '<span>'.$suffix.'</span>'; //dokladamy tytul postu //$s.= '<span class="breadcrumbsBold last">'.$suffix.'</span>'; //dokladamy tytul postu //echo $s; break; } } if ( (empty($s)) && (empty($forcedUrl)) && (!$noBold) ) $s='<span>'.get_the_title().'</span>'; //$s='<span class="breadcrumbsBold single">'.get_the_title().'</span>'; echo $s; }
function showEditionsAndCovers($user, $opds, $edition_file_root) { $entries = array(); $save_path = pugpig_get_local_save_path($edition_file_root, $opds); // Remove the query string $save_path = preg_replace('/\\/?\\?.*/', '', $save_path); $entries[$opds] = $save_path; $entries = _pugpig_package_download_batch("OPDS Feeds", $entries); $format_failures = array(); foreach (array_keys($entries) as $entry) { // Read the ATOM from the file $fhandle = fopen($entries[$entry], 'r'); $opds_body = fread($fhandle, filesize($entries[$entry])); fclose($fhandle); // Parse the OPDS file $opds_ret = _pugpig_package_parse_opds($opds_body); if (!empty($opds_ret['failure'])) { echo "<font color='red'>Not Valid OPDS: " . $opds_ret['failure'] . "</font>"; return; } echo "<h1>Your Editions</h1>"; $covers = array(); echo "<table>"; foreach ($opds_ret['editions'] as $edition) { echo "<tr>"; $cover_url = url_to_absolute($opds, $edition['cover']); $atom_url = url_to_absolute($opds, $edition['url']); $cover_save_path = pugpig_get_local_save_path($edition_file_root, $cover_url); // $save_path = $edition_file_root . 'cover/' . hash('md5', $edition['cover']). '.jpg'; if (count($covers) < 10) { $covers[$cover_url] = $cover_save_path; // showSingleEdition($user, $opds, $atom_url, $edition_file_root); } echo "<td><img height='80' src='" . $cover_url . "' /></td>"; echo "<td>"; echo "<b>" . $edition['title'] . "</b><br />"; echo "<i>" . $edition['summary'] . "</i><br />"; $updated_ts = strtotime($edition['updated']); echo _ago($updated_ts) . " ago) - (" . $edition['updated'] . ") ({$updated_ts})<br />"; echo $edition['draft'] ? "<font color='orange'>DRAFT</font> " : ""; echo ($edition['free'] ? "free" : "paid") . ($edition['samples'] ? " with samples" : ""); echo "<br />"; echo "</td>"; echo "<td>"; //echo count($edition['categories']) . " categories"; foreach ($edition['categories'] as $schema => $term) { echo "<b>{$schema}</b>: {$term}<br />"; } echo "</td>"; echo "<td>"; if ($edition['type'] == 'atom') { $q = http_build_query(array('opds' => $opds, 'atom' => $atom_url, 'user' => $user)); echo "<a href='?{$q}'>TEST PAGES</a><br />\n"; } else { echo "EPUB<br />"; } echo "<a href='" . url_to_absolute($opds, $atom_url) . "' target='_blank'>FEED</a></br />"; echo "FLATPLAN</br />"; echo "PREVIEW IN WEB<br />"; echo "</tr>"; } echo "</table>"; $entries = _pugpig_package_download_batch("Valdating Covers (only 10)", $covers); } }
public function GetImageUrl($url) { $maxSize = -1; // $curl = curl_init(); // curl_setopt($curl, CURLOPT_URL, $url); // curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10); // $str = curl_exec($curl); // curl_close($curl); // $html= str_get_html($str); $html = file_get_html($url); if ($html === FALSE) { return 'No Image'; } $temp = array(); $visited = array(); foreach ($html->find('img') as $e) { $src = $e->src; if ($src == '') { continue; } // it happens on your test url $imageurl = url_to_absolute($url, $src); //get image absolute url // ignore already seen images, add new images if (in_array($imageurl, $visited)) { continue; } $visited[] = $imageurl; // get image $image = @getimagesize($imageurl); // get the rest images width and height if ($image[0] * $image[1] > $maxSize) { $maxSize = $image[0] * $image[1]; //compare sizes $biggest_img = $imageurl; } } return $biggest_img ? $biggest_img : 'No Image'; }
function findLinksOnPage($location) { print "loading page " . $location . "\n"; $html = scraperWiki::scrape($location); $dom = new simple_html_dom(); $dom->load($html); foreach ($dom->find("a") as $anchor) { $href = $anchor->getAttribute('href'); $href = preg_replace("/SID-[0-9a-fA-F]+\\-[0-9a-fA-F]+\\//i", "", $href); $type = ""; if (preg_match("/(http|https|ftp):\\/\\/([\\w-\\d]+\\.)+[\\w-\\d]+/i", $href, $m)) { if ("bertelsmann-stiftung." == $m[2]) { $type = 'internal'; } else { $type = 'external'; } } else { if (preg_match("/(\\/[\\w~,;\\-\\.\\/?%&+#=]*)/i", $href)) { $type = 'internal'; $href = url_to_absolute($location, $href); } else { $type = 'unknown'; } } scraperwiki::save(array('href'), array('on_page' => $location, 'href' => $href, 'type' => $type, 'exported' => 0)); // remember to follow an internal link, if not already done if ($type == 'internal') { if (!preg_match("/.*\\.mpeg\$/i", $href) && !preg_match("/.*\\.exe\$/i", $href) && !preg_match("/.*\\.pdf\$/i", $href) && !preg_match("/.*\\.mp3\$/i", $href) && !preg_match("/.*\\.jpg\$/i", $href) && !preg_match("/.*\\.zip\$/i", $href) && !preg_match("/.*\\.doc\$/i", $href) && !preg_match("/.*\\.ppt\$/i", $href)) { $count = scraperwiki::select("count(*) as c from pages where url like '" . $href . "'"); $count = $count[0]['c']; if ($count > 0) { //print "page already processed (or reserved): " . $href . "\n"; } else { scraperwiki::save_sqlite(array('url'), array('url' => $href, 'processed' => 0), "pages", 0); } } } } }
/** * Entifies the tweet using the given entities element, using the provided * options. * * @param array $tweet the json converted to normalised array * @param array $options settings to be used when rendering the entities * @param array $replacements if specified, the entities and their replacements will be stored to this variable * @return the tweet text with entities replaced with hyperlinks */ public static function entify_with_options($tweet, $options = array(), &$replacements = array()) { $default_opts = array('encoding' => 'UTF-8', 'target' => '', 'link_preview' => false); $opts = array_merge($default_opts, $options); $encoding = mb_internal_encoding(); mb_internal_encoding($opts['encoding']); $keys = array(); $is_retweet = false; if (isset($tweet['retweeted_status'])) { $tweet = $tweet['retweeted_status']; $is_retweet = true; } if (!isset($tweet['entities'])) { return $tweet['text']; } $target = !empty($opts['target']) ? ' target="' . $opts['target'] . '"' : ''; // prepare the entities foreach ($tweet['entities'] as $type => $things) { foreach ($things as $entity => $value) { $tweet_link = "<a href=\"https://twitter.com/{$tweet['user']['screen_name']}/statuses/{$tweet['id']}\"{$target}>{$tweet['created_at']}</a>"; switch ($type) { case 'hashtags': $href = "<a href=\"https://twitter.com/search?q=%23{$value['text']}\"{$target}>#{$value['text']}</a>"; break; case 'user_mentions': $href = "@<a href=\"https://twitter.com/{$value['screen_name']}\" title=\"{$value['name']}\"{$target}>{$value['screen_name']}</a>"; break; case 'urls': case 'media': $url = empty($value['expanded_url']) ? $value['url'] : $value['expanded_url']; if ($opts['link_preview']) { // Generate link previews rather than just a URL $html = file_get_html($url); $site_title = $html->find('title', 0); $site_img = $html->find('img', 0); $display = "<div>"; if (null != $site_img) { // We have to convert the image from relative to absolute $site_img = url_to_absolute($url, $site_img->src); $display .= "<img style=\"width:30px;vertical-align:middle\" " . "src=\"{$site_img}\"/>"; } if (null != $site_title) { $site_title = trim($site_title->innertext); $display .= "<span style=\"font-size: 2em;\">{$site_title}</span></span>"; } else { // No title, fall back to normal behavior $display = isset($value['display_url']) ? $value['display_url'] : str_replace('http://', '', $url); // Not all pages are served in UTF-8 so you may need to do this ... $display = "LAME" . urldecode(str_replace('%E2%80%A6', '…', urlencode($display))); } $display .= "</div>"; } else { $display = isset($value['display_url']) ? $value['display_url'] : str_replace('http://', '', $url); // Not all pages are served in UTF-8 so you may need to do this ... $display = urldecode(str_replace('%E2%80%A6', '…', urlencode($display))); } $href = "<a href=\"{$value['url']}\"{$target}>{$display}</a>"; break; } $keys[$value['indices']['0']] = mb_substr($tweet['text'], $value['indices']['0'], $value['indices']['1'] - $value['indices']['0']); $replacements[$value['indices']['0']] = $href; } } ksort($replacements); $replacements = array_reverse($replacements, true); $entified_tweet = $tweet['text']; foreach ($replacements as $k => $v) { $entified_tweet = mb_substr($entified_tweet, 0, $k) . $v . mb_substr($entified_tweet, $k + strlen($keys[$k])); } $replacements = array('replacements' => $replacements, 'keys' => $keys); mb_internal_encoding($encoding); return $entified_tweet; }
/** * Uniformly cleans a link to avoid duplicates * * 1. Changes relative links to absolute (/bar to http://www.foo.com/bar) * 2. Removes anchor tags (foo.html#bar to foo.html) * 3. Adds trailing slash if directory (foo.com/bar to foo.com/bar/) * 4. Adds www if there is not a subdomain (foo.com to www.foo.com but not bar.foo.com) * * @params string $link link to clean * @parmas string $dir directory of parent (linking) page * @return strin cleaned link */ function clean_link($link, $dir) { $link = url_to_absolute($dir, $link); //make them absolute, not relative if (stripos($link, '#') != FALSE) { $link = substr($link, 0, stripos($link, '#')); } //remove anchors if (!preg_match('#(^http://(.*)/$)|http://(.*)/(.*)\\.([A-Za-z0-9]+)|http://(.*)/([^\\?\\#]*)(\\?|\\#)([^/]*)#i', $link)) { $link .= '/'; } $link = preg_replace('#http://([^.]+).([a-zA-z]{3})/#i', 'http://www.$1.$2/', $link); return $link; }
function GetImage($document, $url) { $meta_og_img = ''; foreach ($document->getElementsByTagName('meta') as $meta) { //If the property attribute of the meta tag is og:image if ($meta->getAttribute('property') == 'og:image') { //Assign the value from content attribute to $meta_og_img $meta_og_img = $meta->getAttribute('content'); // trigger_error($meta_og_img); $images[$meta_og_img] = array('src' => $meta_og_img); } } foreach ($document->getElementsByTagName('img') as $img) { $image = array('src' => @url_to_absolute($url, $img->getAttribute('src'))); if (!$image['src']) { continue; } if (!$this->endsWith($image['src'], "gif") && $meta_og_img != $image) { $images[$image['src']] = $image; } } if (isset($images)) { return $images; } else { return 0; } }
$response['mobile']['iitc_version'] = $header['@version']; } } else { $response['error'] = 'Failed to find .apk file ' . $apkfile; } } else { // desktop - .user.js scripts // load main script version $iitc_details = loadUserScriptHeader("{$dir}/total-conversion-build.user.js"); $response['iitc'] = array('version' => $iitc_details['@version'], 'downloadUrl' => url_to_absolute($pageurl, "{$dir}/total-conversion-build.user.js"), 'pageUrl' => url_to_absolute($pageurl, $info['web'])); // and now the plugins $response['plugins'] = array(); foreach (glob("{$dir}/plugins/*.user.js") as $path) { $basename = basename($path, ".user.js"); $details = loadUserScriptHeader($path); $response['plugins'][$basename] = array('version' => $details['@version'], 'downloadUrl' => url_to_absolute($pageurl, "{$dir}/plugins/{$basename}.user.js"), 'pageUrl' => url_to_absolute($pageurl, $info['web'] . "#plugin-{$basename}")); } } } else { $response['error'] = 'Unsupported build for version check'; } $data = json_encode($response); # send the response - allow either jsonp (using a 'callback' parameter), or regular json if (array_key_exists('callback', $_GET)) { header('Content-Type: text/javascript; charset=utf8'); header('Access-Control-Allow-Origin: *'); header('Access-Control-Max-Age: 3628800'); header('Access-Control-Allow-Methods: GET, POST'); $callback = $_GET['callback']; echo $callback . '(' . $data . ');'; } else {
function _parse_external_js($url) { $url = _make_clean_url($url); if (filter_var($url, FILTER_VALIDATE_URL) === FALSE) { return FALSE; } else { $url_header = _is_url_exist($url); if ($url_header) { /*_request_timeout(3); $html = @file_get_contents($url);*/ $html = @file_get_html($url_header, false, stream_context_create(array('http' => array('timeout' => 3)))); if ($html) { $external_js = array(); $external_js_count = 0; $external_js_count_max = 20; foreach ($html->find('script') as $script) { $src = $script->src; if ($src != null) { if (strpos($src, '//') === FALSE) { /*if($src[0] === '/') { $src = $url.$src; } elseif($src[0] != '.') { $src = $url.'/'.$src; } else { $src = null; }*/ if (isset($url_header[strlen($url_header) - 1]) && $url_header[strlen($url_header) - 1] !== '/') { $url_header .= '/'; } $src = url_to_absolute($url_header, $src); } if (_is_url_exist($src)) { $external_js[] = $src; } } $external_js_count++; if ($external_js_count === $external_js_count_max) { unset($external_js_count, $external_js_count_max); break; } } $html = NULL; unset($url_header); if (!empty($external_js)) { return $external_js; } else { unset($external_js); return FALSE; } } else { $html = NULL; unset($url_header); return FALSE; } } else { unset($url_header); return FALSE; } } }
// options $biggestImage = 'path to "no image found" image'; // Is returned when no images are found. // process $maxSize = -1; $visited = array(); $url = $link1; $html = file_get_html($url); // loop images foreach ($html->find('img') as $element) { $src = $element->src; if ($src == '') { continue; } // it happens on your test url $imageurl = url_to_absolute($url, $src); //get image absolute url // ignore already seen images, add new images if (in_array($imageurl, $visited)) { continue; } $visited[] = $imageurl; // get image $image = @getimagesize($imageurl); // get the rest images width and height if ($image[0] * $image[1] > $maxSize) { $maxSize = $image[0] * $image[1]; //compare sizes $biggest_img = $imageurl; } }
public static function absolutizeUrl($sBaseUrl, $sUrl) { $sUrl = str_replace('&', '&', $sUrl); return url_to_absolute($sBaseUrl, $sUrl); }
/** * urlFix * * This function adds in the url in $baseURL if the input of $url looks to be a relative URL * * @param string $baseURL * @param string $url * @return the fixed url */ function urlFix($baseURL, $url) { return url_to_absolute($baseURL, $url); }
// /images/facebook.png // /images/twitter.png // /images/email.16x16.gif // http://feedproxy.google.com/~fc/ElectricToolboxBlog?bg=ffaf5a&fg=333333&anim=0 // /images/gui/bottom.gif ?> <?php //Note that the actual path for the image as it is in the HTML is returned and that paths are not resolved to be absolute. //Resolving the paths So that it's possible to download the images, the relative URLs need to be turned into absolute URLs. I found a library to do this from the blog for Nadeau Software Consulting but their site no longer appears to be available, so have made the library available for download here. //Download and extract the zipped file url_to_absolute.zip which contains three PHP files. The url_to_absolute.php file requires the other two files. //Here's the modified version of the above code which will now resolved all image URLs to absolute URLs which can then be used to download the image: require_once 'c:/wamp/www/PHP/simplehtmldom/simple_html_dom.php'; require_once 'c:/wamp/www/PHP/url_to_absolute/url_to_absolute.php'; $url = 'http://www.electrictoolbox.com/php-get-meta-tags-html-file/'; $html = file_get_html($url); foreach ($html->find('img') as $element) { echo url_to_absolute($url, $element->src), "\n"; } //This will now output the following from the same page, with absolute URLs: // http://www.electrictoolbox.com/images/icons/php.gif // http://manage.aff.biz/42/2882/189/ // http://static.addtoany.com/buttons/subscribe_171_16.gif // http://static.addtoany.com/buttons/share_save_171_16.gif // http://www.electrictoolbox.com/images/gui/logo.gif // http://www.electrictoolbox.com/images/feed.16x16.gif // http://www.electrictoolbox.com/images/facebook.png // http://www.electrictoolbox.com/images/twitter.png // http://www.electrictoolbox.com/images/email.16x16.gif // http://feedproxy.google.com/%7Efc/ElectricToolboxBlog?bg%3Dffaf5a%26amp%3Bfg%3D333333%26amp%3Banim%3D0 // http://www.electrictoolbox.com/images/gui/bottom.gif
function main() { global $http_uri, $rq_file, $conf, $docroot, $htreq_headers, $mime, $mime_enc, $out_add_headers, $pri_err; if (access_query("reflectrewriting", 0)) { global $real_uri; } // quick skip $enabled = strtolower(trim(access_query("multiviews", 0))); if ($enabled === "0" || $enabled == "off" || file_exists($docroot . $http_uri) && !is_dir($docroot . $http_uri)) { return; } // determine file name to be worked on $file = $http_uri; if ($last_slash = strrpos($file, "/")) { $act_path = substr($file, 0, $last_slash + 1); $file = substr($file, $last_slash + 1); } if ($file) { $allowed_filenames = array($file); } else { $allowed_filenames = explode(" ", access_query("directoryindex", 0)); } // find files with same basename and different extensions $alternative_files = array(); $dir = opendir($docroot . $act_path); while ($filename = readdir($dir)) { foreach ($allowed_filenames as $filebn) { if (!$filebn) { continue; } $filebn .= "."; if (substr($filename, 0, strlen($filebn)) == $filebn) { $alternative_files[] = $filename; } } } closedir($dir); if (empty($alternative_files)) { return 2; } // quick skip2 #print_r($alternative_files); // fetch priorities $qualities = array_merge(array("php" => 0.75, "shtml" => 0.72, "html" => 0.71, "xhtml" => 0.7, "png" => 0.33, "jpeg" => 0.32, "gif" => 0.31), $this->parseQualities(implode(", ", access_query("otherpriority")), 1.3, 1.1), $this->parseQualities(@$htreq_headers["ACCEPT-FEATURES"], 1.15, 1), $this->parseQualities(strtr(access_query("languagepriority", 0), " ", ","), 0.8, 1.2), $this->parseQualities(@$htreq_headers["ACCEPT-LANGUAGE"], 1, 1.03), $this->parseQualities(@$htreq_headers["ACCEPT"], 1, 1.02), $this->parseQualities(strpos(@$htreq_headers["ACCEPT-ENCODING"], "gzip") !== false ? "gz" : "", 1.5, 1)); // other algorithm flags $accept_all = strpos(@$htreq_headers["ACCEPT"], "*/*") !== false; $http10 = $GLOBALS['http_version'] < "1.1"; $agent_negotiate = strpos($htreq_headers["NEGOTIATE"], "vlist") !== false || strpos($htreq_headers["NEGOTIATE"], "trans") !== false; // will contain variants and their attributes $alternates = array(); // go thru filename extensions, and sum qualities foreach ($alternative_files as $filename) { $q_mime = -1; $q_enc = $q_lang = $q_features = $q_else = +1; $file_extensions = array_slice(explode('.', $filename), 1); foreach ($file_extensions as $ext) { $ext_q = $qualities[$ext]; if (empty($ext_q) && $ext_q !== 0) { if ($accept_all) { $ext_q = 0.1; } else { $ext_q = 0.001; } } if (@$mime[$ext]) { $alternates[$filename]["type"] = $mime[$ext]; $q_mime += $ext_q + ($q_mime < 0 ? +1 : +0); } elseif (@$mime_enc[$ext]) { $alternates[$filename]["encoding"] = $mime_enc[$ext]; $q_enc *= $ext_q; } elseif (strlen($ext) == 2) { $alternates[$filename]["language"] = $ext; $q_lang = $ext_q; } else { $q_else *= 0.9; } if ($feature = $this->negotiate_features[$ext]) { $alternates[$filename]["feature"] .= " {$feature};+1.2-0.9"; $q_features *= $qualities[$feature] ? 1.2 : 0.9; $q_else /= 0.9; } } if ($q_mime < 0) { $q_mime = 0.005; } $alternates[$filename]["q"] = $q_mime * $q_enc * $q_lang * $q_features * $q_else; } // sort uasort($alternates, 'mod_multiviews_uarsort_by_q'); #print_r($alternates); // return selected variant list($file) = array_keys($alternates); $real_uri = $http_uri = $act_path . $file; $rq_file = pathinfo($http_uri); // fallback output $out_add_headers["TCN"] = "adhoc"; $out_add_headers["Vary"] = "negotiate"; // server-driven negotiation if (!$agent_negotiation) { $out_add_headers["Content-Location"] = $file; if (!$http10) { $out_add_headers["TCN"] = "choice"; } $out_add_headers["Vary"] = "negotiate, accept, accept-language, accept-features"; foreach ($mime_enc as $ext => $encoding) { // report file's encoding if (strpos($file, ".{$ext}")) { $out_add_headers["Content-Encoding"] = $encoding; $GLOBALS["out_encoded"] = true; } } } else { $out_add_headers["TCN"] = "list"; $GLOBALS["pri_err"] = $http10 ? 200 : 300; // HTTP 300 Choose Yourself $GLOBALS["out_contenttype"] = "text/html"; $GLOBALS["add_errmsg"] = "The document you requested exists in different variants, and your browser gives you the opportunity to select one of them (or just does not support transparent content negotiation):<BR><UL>"; foreach ($alternative_files as $f => $q) { $GLOBALS["add_errmsg"] .= '<LI><A HREF="' . url_to_absolute($act_path . $f) . '">' . $f . '</A></LI>'; } $GLOBALS["add_errmsg"] .= "</UL>"; if ($http10) { $out_add_headers["Refresh"] = "10; URL=" . url_to_absolute($http_uri); } } // add alternates-header $ah = " "; foreach ($alternates as $filename => $a) { $qstr = substr($a["q"], 0, 5); if (strpos($qstr, ".") === false) { $qstr .= "."; } while (strlen($qstr) < 5) { $qstr .= "0"; } $ah .= '{ "' . $filename . '" ' . $qstr; unset($a["q"]); foreach ($a as $desc => $value) { $ah .= ' {' . $desc . ' ' . $value . '}'; } $ah .= " },\n\t"; } $ah .= "proxy-rvsa=1.0"; $out_add_headers["Alternates"] = $ah; if ($GLOBALS['path_info']) { // may help(?) $out_add_headers["Content-Base"] = "/" . ($act_path ? "{$act_path}/" : ""); } if ($http10) { $out_add_headers["Vary"] = "*"; } if (is_dir($docroot . $http_uri) && !is_file($docroot . $http_uri . "/index.html") || strpos($file, ".var") !== false) { // very rare error / 506 Variant Also Negotiates $GLOBALS["pri_err"] = 506; } }
function _pugpig_package_get_asset_urls_from_manifest($manifest_contents, $entries = array(), $base_url, $mode = 'all') { $active = false; $last_line_was_ad = false; if ($mode != 'theme') { $active = true; } $found_manifest_start = false; $lines = preg_split('/\\n/m', $manifest_contents, 0, PREG_SPLIT_NO_EMPTY); foreach ($lines as $line) { // Temporary hacks to determine what is a theme asset // These will work with our Drupal and WordPress connector only // In the longer term, we need a better way to mark assets as Theme assets if (!$last_line_was_ad && startsWith($line, '# Theme assets')) { if ($mode == 'theme') { $active = true; } if ($mode == 'page') { $active = false; } } if (startsWith($line, '# Ad Package Zip Contents') || startsWith($line, '# Package Zip Contents')) { $last_line_was_ad = true; } else { $last_line_was_ad = false; } preg_match('/\\s*([^#]*)/', $line, $matches); if (count($matches) > 1) { $m = trim($matches[1]); // Ignore all lines until we find the "CACHE MANIFEST one" // Can't do this as it is currently used to scan partial manifests too /* if ($m == "CACHE MANIFEST") $found_manifest_start = TRUE; if (!$found_manifest_start) { continue; } */ if (!empty($m) && !in_array($m, $entries) && substr($m, 0, strlen('CACHE')) != 'CACHE' && substr($m, 0, strlen('NETWORK')) != 'NETWORK' && $m != '*') { if (!startsWith($m, "/")) { // We have a relative URL $m = pugpig_strip_domain(url_to_absolute($base_url, $m)); } if (!empty($m)) { if ($active) { $entries[] = $m; } } } } } return $entries; }