/**
  * Parse a string for embed codes.
  *
  * @param string $content The text to parse.
  * @param string $base_url The base URL. Ignored.  
  * @param string $default_link_text Default link text. Ignored.
  * @return array An array of new blcLinkInstance objects. The objects will include info about the embeds found, but not about the corresponding container entity. 
  */
 function parse($content, $base_url = '', $default_link_text = '')
 {
     $instances = array();
     //Find likely-looking <embed> elements
     $embeds = $this->extract_embeds($content);
     foreach ($embeds as $embed) {
         //Do we know how to handle this embed? (first-pass verification)
         if (strpos($embed['attributes']['src'], $this->url_search_string) === false) {
             continue;
         }
         //Get the original URL of the embedded object (may perform more complex verification)
         $url = $this->link_url_from_src($embed['attributes']['src']);
         if (empty($url)) {
             continue;
         }
         //Create a new link instance.
         $instance = new blcLinkInstance();
         $instance->set_parser($this);
         $instance->raw_url = $embed['embed_code'];
         $instance->link_text = '[' . $this->short_title . ']';
         $link_obj = new blcLink($url);
         //Creates or loads the link
         $instance->set_link($link_obj);
         $instances[] = $instance;
     }
     return $instances;
 }
Exemple #2
0
 /**
  * "Parse" an URL into an instance.
  *
  * @param string $content The entire content is expected to be a single plaintext URL.
  * @param string $base_url The base URL to use for normalizing relative URLs. If ommitted, the blog's root URL will be used. 
  * @param string $default_link_text
  * @return array An array of new blcLinkInstance objects.  
  */
 function parse($content, $base_url = '', $default_link_text = '')
 {
     $instances = array();
     $url = $raw_url = trim($content);
     //Attempt to parse the URL
     $parts = @parse_url($url);
     if (!$parts) {
         return $instances;
         //Ignore invalid URLs
     }
     if (!isset($parts['scheme'])) {
         //No sheme - likely a relative URL. Turn it into an absolute one.
         $url = $this->relative2absolute($url, $base_url);
         //Skip invalid URLs (again)
         if (!$url || strlen($url) < 6) {
             return $instances;
         }
     }
     //The URL is okay, create and populate a new link instance.
     $instance = new blcLinkInstance();
     $instance->set_parser($this);
     $instance->raw_url = $raw_url;
     $instance->link_text = $default_link_text;
     $link_obj = new blcLink($url);
     //Creates or loads the link
     $instance->set_link($link_obj);
     $instances[] = $instance;
     return $instances;
 }
 /**
  * Parse a string for plaintext URLs
  *
  * @param string $content The text to parse.
  * @param string $base_url The base URL. Ignored.
  * @param string $default_link_text Default link text.
  * @return array An array of new blcLinkInstance objects.
  */
 function parse($content, $base_url = '', $default_link_text = '')
 {
     //Don't want to detect URLs inside links or tag attributes -
     //there are already other parsers for that.
     //Avoid <a href="http://...">http://...</a>
     $content = preg_replace('#<a[^>]*>.*?</a>#si', '', $content);
     //HTML tags are treated as natural boundaries for plaintext URLs
     //(since we strip tags, we must place another boundary char where they were).
     //The closing tag of [shortcodes] is also treated as a boundary.
     $content = str_replace(array('<', '>', '[/'), array("\n<", ">\n", "\n[/"), $content);
     //Finally, kill all tags.
     $content = strip_tags($content);
     //Find all URLs
     $found = preg_match_all($this->url_regexp, $content, $matches);
     $instances = array();
     if ($found) {
         //Create a new instance for each match
         foreach ($matches[2] as $match) {
             $url = $this->validate_url(trim($match));
             if ($url == false) {
                 continue;
             }
             //Create a new link instance.
             $instance = new blcLinkInstance();
             $instance->set_parser($this);
             $instance->raw_url = $match;
             $instance->link_text = $match;
             $link_obj = new blcLink($url);
             //Creates or loads the link
             $instance->set_link($link_obj);
             $instances[] = $instance;
         }
     }
     return $instances;
 }
Exemple #4
0
 /**
  * Parse a string for HTML images - <img src="URL">
  *
  * @param string $content The text to parse.
  * @param string $base_url The base URL to use for normalizing relative URLs. If omitted, the blog's root URL will be used.
  * @param string $default_link_text 
  * @return array An array of new blcLinkInstance objects. The objects will include info about the links found, but not about the corresponding container entity. 
  */
 function parse($content, $base_url = '', $default_link_text = '')
 {
     global $blclog;
     $charset = get_bloginfo('charset');
     if (strtoupper($charset) === 'UTF8') {
         $charset = 'UTF-8';
     }
     $blclog->info('Blog charset is "' . $charset . '"');
     $instances = array();
     //remove all <code></code> blocks first
     $content = preg_replace('/<code[^>]*>.+?<\\/code>/si', ' ', $content);
     //Find images
     if (preg_match_all($this->img_pattern, $content, $matches, PREG_SET_ORDER)) {
         foreach ($matches as $link) {
             $url = $raw_url = $link[3];
             //FB::log($url, "Found image");
             $blclog->info('Found image. SRC attribute: "' . $raw_url . '"');
             //Decode &amp; and other entities
             $url = html_entity_decode($url, ENT_QUOTES, $charset);
             $blclog->info('Decoded image URL: "' . $url . '"');
             $url = trim($url);
             $blclog->info('Trimmed image URL: "' . $url . '"');
             //Allow shortcodes in image URLs.
             $url = do_shortcode($url);
             //Attempt to parse the URL
             $parts = @parse_url($url);
             if (!$parts) {
                 continue;
                 //Skip invalid URLs
             }
             if (!isset($parts['scheme'])) {
                 //No scheme - likely a relative URL. Turn it into an absolute one.
                 $relativeUrl = $url;
                 $url = $this->relative2absolute($url, $base_url);
                 $blclog->info(sprintf('%s:%s Resolving relative URL. Relative URL = "%s", base URL = "%s", result = "%s"', __CLASS__, __FUNCTION__, $relativeUrl, $base_url, $url));
             }
             //Skip invalid URLs (again)
             if (!$url || strlen($url) < 6) {
                 continue;
             }
             $blclog->info('Final URL: "' . $url . '"');
             //The URL is okay, create and populate a new link instance.
             $instance = new blcLinkInstance();
             $instance->set_parser($this);
             $instance->raw_url = $raw_url;
             $instance->link_text = '';
             $link_obj = new blcLink($url);
             //Creates or loads the link
             $instance->set_link($link_obj);
             $instances[] = $instance;
         }
     }
     return $instances;
 }
Exemple #5
0
 /**
  * Parse a string for plaintext URLs
  *
  * @param string $content The text to parse.
  * @param string $base_url The base URL. Ignored.  
  * @param string $default_link_text Default link text.
  * @return array An array of new blcLinkInstance objects.  
  */
 function parse($content, $base_url = '', $default_link_text = '')
 {
     //Don't want to detect URLs inside links or tag attributes -
     //there are already other parsers for that.
     //Avoid <a href="http://...">http://...</a>
     $content = preg_replace('#<a[^>]*>.*?</a>#si', '', $content);
     //HTML tags are treated as natural boundaries for plaintext URLs
     //(since we strip tags, we must place another boundary char where they were).
     //The closing tag of [shortcodes] is also treated as a boundary.
     $content = str_replace(array('<', '>', '[/'), array("\n<", ">\n", "\n[/"), $content);
     //Finally, kill all tags.
     $content = strip_tags($content);
     //Find all URLs
     $found = preg_match_all($this->url_regexp, $content, $matches);
     $instances = array();
     if ($found) {
         //Create a new instance for each match
         foreach ($matches[2] as $match) {
             //Do a little bit of validation
             $url = esc_url_raw(trim($match));
             if (empty($url)) {
                 continue;
             }
             if (function_exists('filter_var')) {
                 //Note: filter_var() is no panacea as it accepts many invalid URLs
                 if (!filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_HOST_REQUIRED)) {
                     continue;
                 }
             }
             $parts = @parse_url($url);
             if (empty($parts['host']) || !strpos($parts['host'], '.')) {
                 continue;
             }
             //Create a new link instance.
             $instance = new blcLinkInstance();
             $instance->set_parser($this);
             $instance->raw_url = $match;
             $instance->link_text = $match;
             $link_obj = new blcLink($url);
             //Creates or loads the link
             $instance->set_link($link_obj);
             $instances[] = $instance;
         }
     }
     return $instances;
 }
 /**
  * blcHTMLLink::parser_callback()
  *
  * @access private
  *
  * @param array $link
  * @param array $params
  * @return blcLinkInstance|null
  */
 function parser_callback($link, $params)
 {
     global $blclog;
     $base_url = $params['base_url'];
     $url = $raw_url = $link['href'];
     $url = trim($url);
     //FB::log($url, "Found link");
     $blclog->info(__CLASS__ . ':' . __FUNCTION__ . ' Found a link, raw URL = "' . $raw_url . '"');
     //Sometimes links may contain shortcodes. Execute them.
     $url = do_shortcode($url);
     //Skip empty URLs
     if (empty($url)) {
         $blclog->warn(__CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (empty URL)');
         return null;
     }
     //Attempt to parse the URL
     $parts = @parse_url($url);
     if (!$parts) {
         $blclog->warn(__CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (parse_url failed)');
         return null;
         //Skip invalid URLs
     }
     if (!isset($parts['scheme'])) {
         //No scheme - likely a relative URL. Turn it into an absolute one.
         $url = $this->relative2absolute($url, $base_url);
         //$base_url comes from $params
         $blclog->info(__CLASS__ . ':' . __FUNCTION__ . ' Convert relative URL to absolute. Absolute URL = "' . $url . '"');
     }
     //Skip invalid links (again)
     if (!$url || strlen($url) < 6) {
         $blclog->info(__CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (invalid/short URL)');
         return null;
     }
     $text = $link['#link_text'];
     //The URL is okay, create and populate a new link instance.
     $instance = new blcLinkInstance();
     $instance->set_parser($this);
     $instance->raw_url = $raw_url;
     $instance->link_text = $text;
     $link_obj = new blcLink($url);
     //Creates or loads the link
     $instance->set_link($link_obj);
     return $instance;
 }
 /**
  * Parse a string for HTML images - <img src="URL">
  *
  * @param string $content The text to parse.
  * @param string $base_url The base URL to use for normalizing relative URLs. If ommitted, the blog's root URL will be used. 
  * @param string $default_link_text 
  * @return array An array of new blcLinkInstance objects. The objects will include info about the links found, but not about the corresponding container entity. 
  */
 function parse($content, $base_url = '', $default_link_text = '')
 {
     $instances = array();
     //remove all <code></code> blocks first
     $content = preg_replace('/<code[^>]*>.+?<\\/code>/si', ' ', $content);
     //Find images
     if (preg_match_all($this->img_pattern, $content, $matches, PREG_SET_ORDER)) {
         foreach ($matches as $link) {
             $url = $raw_url = $link[3];
             //FB::log($url, "Found image");
             //Decode &amp; and other entities
             $url = html_entity_decode($url);
             $url = trim($url);
             //Allow shortcodes in image URLs.
             $url = do_shortcode($url);
             //Attempt to parse the URL
             $parts = @parse_url($url);
             if (!$parts) {
                 continue;
                 //Skip invalid URLs
             }
             if (!isset($parts['scheme'])) {
                 //No sheme - likely a relative URL. Turn it into an absolute one.
                 $url = $this->relative2absolute($url, $base_url);
             }
             //Skip invalid URLs (again)
             if (!$url || strlen($url) < 6) {
                 continue;
             }
             //The URL is okay, create and populate a new link instance.
             $instance = new blcLinkInstance();
             $instance->set_parser($this);
             $instance->raw_url = $raw_url;
             $instance->link_text = '';
             $link_obj = new blcLink($url);
             //Creates or loads the link
             $instance->set_link($link_obj);
             $instances[] = $instance;
         }
     }
     return $instances;
 }
Exemple #8
0
 /**
  * Parse a metadata value.
  *
  * @param string|array $content Metadata value(s).
  * @param string $base_url The base URL to use for normalizing relative URLs. If ommitted, the blog's root URL will be used. 
  * @param string $default_link_text
  * @return array An array of new blcLinkInstance objects.  
  */
 function parse($content, $base_url = '', $default_link_text = '')
 {
     $instances = array();
     if (!is_array($content)) {
         $content = array($content);
     }
     foreach ($content as $value) {
         //The complete contents of the meta field are stored in raw_url.
         //This is useful for editing/unlinking, when one may need to
         //distinguish between multiple fields with the same name.
         $raw_url = $value;
         //If this is a multiline metadata field take only the first line (workaround for the 'enclosure' field).
         $lines = explode("\n", $value);
         $url = trim(reset($lines));
         //Attempt to parse the URL
         $parts = @parse_url($url);
         if (!$parts) {
             return $instances;
             //Ignore invalid URLs
         }
         if (!isset($parts['scheme'])) {
             //No scheme - likely a relative URL. Turn it into an absolute one.
             $url = $this->relative2absolute($url, $base_url);
             //Skip invalid URLs (again)
             if (!$url || strlen($url) < 6) {
                 return $instances;
             }
         }
         //The URL is okay, create and populate a new link instance.
         $instance = new blcLinkInstance();
         $instance->set_parser($this);
         $instance->raw_url = $raw_url;
         $instance->link_text = $default_link_text;
         $link_obj = new blcLink($url);
         //Creates or loads the link
         $instance->set_link($link_obj);
         $instances[] = $instance;
     }
     return $instances;
 }
 /**
  * blcHTMLLink::parser_callback()
  *
  * @access private
  *
  * @param array $link
  * @param array $params
  * @return blcLinkInstance|null
  */
 function parser_callback($link, $params)
 {
     $base_url = $params['base_url'];
     $url = $raw_url = $link['href'];
     $url = trim($url);
     //FB::log($url, "Found link");
     //Sometimes links may contain shortcodes. Execute them.
     $url = do_shortcode($url);
     //Skip empty URLs
     if (empty($url)) {
         return null;
     }
     //Attempt to parse the URL
     $parts = @parse_url($url);
     if (!$parts) {
         return null;
         //Skip invalid URLs
     }
     if (!isset($parts['scheme'])) {
         //No sheme - likely a relative URL. Turn it into an absolute one.
         $url = $this->relative2absolute($url, $base_url);
         //$base_url comes from $params
     }
     //Skip invalid links (again)
     if (!$url || strlen($url) < 6) {
         return null;
     }
     $text = strip_tags($link['#link_text']);
     //The URL is okay, create and populate a new link instance.
     $instance = new blcLinkInstance();
     $instance->set_parser($this);
     $instance->raw_url = $raw_url;
     $instance->link_text = $text;
     $link_obj = new blcLink($url);
     //Creates or loads the link
     $instance->set_link($link_obj);
     return $instance;
 }
Exemple #10
0
 /**
  * blcHTMLLink::parser_callback()
  *
  * @access private
  *
  * @param array $link
  * @param array $params
  * @return blcLinkInstance|null
  */
 function parser_callback($link, $params)
 {
     global $blclog;
     $base_url = $params['base_url'];
     $url = $raw_url = $link['href'];
     $url = trim($url);
     //$blclog->debug(__CLASS__ .':' . __FUNCTION__ . ' Found a link, raw URL = "' . $raw_url . '"');
     //Sometimes links may contain shortcodes. Execute them.
     $url = do_shortcode($url);
     //Skip empty URLs
     if (empty($url)) {
         $blclog->warn(__CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (empty URL)');
         return null;
     }
     //Attempt to parse the URL
     $parts = @parse_url($url);
     if (!$parts) {
         $blclog->warn(__CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (parse_url failed)', $url);
         return null;
         //Skip invalid URLs
     }
     if (!isset($parts['scheme'])) {
         //No scheme - likely a relative URL. Turn it into an absolute one.
         //TODO: Also log the original URL and base URL.
         $url = $this->relative2absolute($url, $base_url);
         //$base_url comes from $params
         $blclog->info(__CLASS__ . ':' . __FUNCTION__ . ' Convert relative URL to absolute. Absolute URL = "' . $url . '"');
     }
     //Skip invalid links (again)
     if (!$url || strlen($url) < 6) {
         $blclog->info(__CLASS__ . ':' . __FUNCTION__ . ' Skipping the link (invalid/short URL)', $url);
         return null;
     }
     //Remove left-to-right marks. See: https://en.wikipedia.org/wiki/Left-to-right_mark
     $ltrm = json_decode('"\\u200E"');
     $url = str_replace($ltrm, '', $url);
     $text = $link['#link_text'];
     //The URL is okay, create and populate a new link instance.
     $instance = new blcLinkInstance();
     $instance->set_parser($this);
     $instance->raw_url = $raw_url;
     $instance->link_text = $text;
     $link_obj = new blcLink($url);
     //Creates or loads the link
     $instance->set_link($link_obj);
     return $instance;
 }