Exemple #1
0
 public function test_hab_encoding()
 {
     // see what the current value is
     $original = MultiByte::hab_encoding();
     // make sure it equals the static value
     $this->assert_equal($original, MultiByte::$hab_enc);
     // change it, saving the old
     $old = MultiByte::hab_encoding('foo');
     // make sure the old value is equal to the original value - it should return the previous when we set it
     $this->assert_equal($old, $original);
     // set the original value back
     MultiByte::hab_encoding($original);
     // make sure that the original really did get set - really a duplicate of the prior assertion, but you know...
     $this->assert_equal($original, MultiByte::hab_encoding());
 }
 function test_substr()
 {
     printf("Test string: %s <br>", self::$test_str);
     printf("Habari encoding: %s <br>", MultiByte::hab_encoding());
     printf("mb_internal_encoding: %s <br>", mb_internal_encoding());
     printf("MultiByte detected encoding of test string: %s <br>", MultiByte::detect_encoding(self::$test_str));
     printf("mbstring detected encoding of test string: %s <br>", mb_detect_encoding(self::$test_str));
     $this->assert_equal(MultiByte::substr(self::$test_str, 1, 3), mb_substr(self::$test_str, 1, 3));
     $this->assert_equal(MultiByte::substr(self::$test_str, 1, 3), mb_substr(self::$test_str, 1, 3, mb_detect_encoding(self::$test_str)));
     $this->assert_equal(MultiByte::substr(self::$test_str, 5), mb_substr(self::$test_str, 5));
     printf(" MultiByte substring (begin-1 end-3): %s <br>", MultiByte::substr(self::$test_str, 1, 3));
     printf(" MultiByte substring 2 (begin-5 end-null): %s <br>", MultiByte::substr(self::$test_str, 5));
     printf(" mbstring substring without encoding detected (begin-1 end-3): %s <br>", mb_substr(self::$test_str, 1, 3));
     printf(" mbstring substring with encoding detected (begin-1 end-3): %s <br>", mb_substr(self::$test_str, 1, 3, mb_detect_encoding(self::$test_str)));
     printf(" mbstring substring 2 without encoding detected(begin-5 end-null): %s <br>", mb_substr(self::$test_str, 5));
 }
Exemple #3
0
 /**
  * @todo TODO must build DOM to really properly remove offending elements
  * @todo TODO properly filter URLs
  */
 public static function filter_html_elements($str)
 {
     $tokenizer = new HTMLTokenizer($str);
     // tokenize, baby
     $tokens = $tokenizer->parse();
     // filter token stream
     $filtered = new HTMLTokenSet();
     $stack = array();
     foreach ($tokens as $node) {
         switch ($node['type']) {
             case HTMLTokenizer::NODE_TYPE_TEXT:
                 $node['value'] = html_entity_decode($node['value'], ENT_QUOTES, MultiByte::hab_encoding());
                 break;
             case HTMLTokenizer::NODE_TYPE_ELEMENT_OPEN:
             case HTMLTokenizer::NODE_TYPE_ELEMENT_EMPTY:
                 // is this element allowed at all?
                 if (!in_array(strtolower($node['name']), self::$whitelist_elements)) {
                     if (!in_array(strtolower($node['name']), self::$elements_empty)) {
                         array_push($stack, $node['name']);
                     }
                     //$node = null; //remove the node completely
                     // convert the node to text
                     $node = array('type' => HTMLTokenizer::NODE_TYPE_TEXT, 'name' => '#text', 'value' => HTMLTokenSet::token_to_string($node), 'attrs' => array());
                 } else {
                     // check attributes
                     foreach ($node['attrs'] as $k => $v) {
                         $attr_ok = false;
                         // if the attribute is in the global whitelist and validates
                         if (array_key_exists(strtolower($k), self::$whitelist_attributes['*']) && self::check_attr_value(strtolower($k), $v, self::$whitelist_attributes['*'][strtolower($k)])) {
                             $attr_ok = true;
                         }
                         // if there is a whitelist for this node and this attribute is in that list and it validates
                         if (array_key_exists(strtolower($node['name']), self::$whitelist_attributes) && array_key_exists(strtolower($k), self::$whitelist_attributes[strtolower($node['name'])]) && self::check_attr_value(strtolower($k), $v, self::$whitelist_attributes[strtolower($node['name'])][strtolower($k)])) {
                             $attr_ok = true;
                         }
                         // if it wasn't in one of the whitelists or failed its check, remove it
                         if ($attr_ok != true) {
                             unset($node['attrs'][$k]);
                         }
                     }
                 }
                 break;
             case HTMLTokenizer::NODE_TYPE_ELEMENT_CLOSE:
                 if (!in_array(strtolower($node['name']), self::$whitelist_elements)) {
                     if (strtolower($temp = array_pop($stack)) !== strtolower($node['name'])) {
                         // something weird happened (Luke, use the DOM!)
                         array_push($stack, $temp);
                     }
                     //$node = null;
                     //convert the node to text
                     $node = array('type' => HTMLTokenizer::NODE_TYPE_TEXT, 'name' => '#text', 'value' => HTMLTokenSet::token_to_string($node), 'attrs' => array());
                 }
                 break;
             case HTMLTokenizer::NODE_TYPE_PI:
             case HTMLTokenizer::NODE_TYPE_COMMENT:
             case HTMLTokenizer::NODE_TYPE_CDATA_SECTION:
             case HTMLTokenizer::NODE_TYPE_STATEMENT:
             default:
                 $node = null;
                 break;
         }
         if ($node != null) {
             $filtered[] = $node;
         }
     }
     // rebuild our output string
     return preg_replace('#<([^>\\s]+)(?:\\s+[^>]+)?></\\1>#u', '', (string) $filtered);
 }
	/**
	 * Receive a Pingback via XMLRPC
	 * @param array $params An array of XMLRPC parameters from the remote call
	 * @return string The success state of the pingback
	 */
	public function xmlrpc_pingback__ping( $params )
	{
		try {
			list( $source_uri, $target_uri )= $params;

			// This should really be done by an Habari core function
			$target_parse = InputFilter::parse_url( $target_uri );
			$target_stub = $target_parse['path'];
			$base_url = Site::get_path( 'base', true );

			if ( '/' != $base_url) {
				$target_stub = str_replace( $base_url, '', $target_stub );
			}

			$target_stub = trim( $target_stub, '/' );

			if ( strpos( $target_stub, '?' ) !== false ) {
				list( $target_stub, $query_string )= explode( '?', $target_stub );
			}

			// Can this be used as a target?
			$target_slug = URL::parse( $target_stub )->named_arg_values['slug'];

			if ( $target_slug === false ) {
				throw new XMLRPCException( 33 );
			}

			// Does the target exist?
			$target_post = Post::get( array( 'slug' => $target_slug ) );

			if ( $target_post === false ) {
				throw new XMLRPCException( 32 );
			}

			// Is comment allowed?
			if ( $target_post->info->comments_disabled ) {
				throw new XMLRPCException( 33 );
			}

			// Is this Pingback already registered?
			if ( Comments::get( array( 'post_id' => $target_post->id, 'url' => $source_uri, 'type' => Comment::PINGBACK ) )->count() > 0 ) {
				throw new XMLRPCException( 48 );
			}

			// Retrieve source contents
			try {
				$rr = new RemoteRequest( $source_uri );
				$rr->execute();
				if ( ! $rr->executed() ) {
					throw new XMLRPCException( 16 );
				}
				$source_contents = $rr->get_response_body();
				$headers = $rr->get_response_headers();
			}
			catch ( XMLRPCException $e ) {
				// catch our special type of exception and re-throw it
				throw $e;
			}
			catch ( Exception $e ) {
				throw new XMLRPCException( -32300 );
			}

			// Encoding is converted into internal encoding.
			// First, detect the source string's encoding
			$habari_encoding = strtoupper( MultiByte::hab_encoding() );
			$source_encoding = 'Windows-1252';
			// Is the charset in the headers?
			if ( isset( $headers['Content-Type'] ) && strpos( $headers['Content-Type'], 'charset' ) !== false ) {
				// This regex should be changed to meet the HTTP spec at some point
				if ( preg_match("/charset[\x09\x0A\x0C\x0D\x20]*=[\x09\x0A\x0C\x0D\x20]*('?)([A-Za-z0-9\-\_]+)\1/i", $headers['Content-Type'], $matches ) ) {
					$source_encoding = strtoupper( $matches[2] );
				}
			}
			// Can we tell the charset from the stream itself?
			else if ( ( $enc = MultiByte::detect_bom_encoding( $source_contents ) ) !== false ) {
				$source_encoding = $enc;
			}
			// Is the charset in a meta tag?
			else if ( preg_match( "/<meta[^>]+charset[\x09\x0A\x0C\x0D\x20]*=[\x09\x0A\x0C\x0D\x20]*([\"']?)([A-Za-z0-9\-\_]+)\1/i", $source_contents, $matches ) ) {
				$source_encoding = strtoupper( $matches[2] );
				if (in_array($source_encoding, array("UTF-16", "UTF-16BE", "UTF-16LE"))) {
					$source_encoding = "UTF-8";
				}
			}
			// Then, convert the string
			$ret = MultiByte::convert_encoding( $source_contents, $habari_encoding, $source_encoding );
			if ( $ret !== false ) {
				$source_contents = $ret;
			}

			// Find the page's title
			preg_match( '/<title>(.*)<\/title>/is', $source_contents, $matches );
			$source_title = $matches[1];

			// Find the reciprocal links and their context
			preg_match( '/<body[^>]*>(.+)<\/body>/is', $source_contents, $matches );
			$source_contents_filtered = preg_replace( '/\s{2,}/is', ' ', strip_tags( $matches[1], '<a>' ) );

			// Get rid of all the non-recriprocal links
			$ht = new HTMLTokenizer( trim( $source_contents_filtered ) );
			$set = $ht->parse();
			$all_links = $set->slice( 'a', array() );
			$keep_links = $set->slice( 'a', array( 'href' => $target_uri ) );
			$bad_links = array_diff( $all_links, $keep_links );
			foreach( $bad_links as $link ) {
				$link->tokenize_replace( '' );
				$set->replace_slice( $link );
			}
			$source_contents_filtered = (string)$set;

			// Get the excerpt
			if ( !preg_match( '%.{0,100}?<a[^>]*?href\\s*=\\s*("|\'|)' . $target_uri . '\\1[^>]*?'.'>(.+?)</a>.{0,100}%s', $source_contents_filtered, $source_excerpt ) ) {
				throw new XMLRPCException( 17 );
			}

			/** Sanitize Data */
			$source_excerpt = '&hellip;' . InputFilter::filter( $source_excerpt[0] ) . '&hellip;';
			$source_title = InputFilter::filter($source_title);
			$source_uri = InputFilter::filter($source_uri);

			/* Sanitize the URL */
			if (!empty($source_uri)) {
				$parsed = InputFilter::parse_url( $source_uri );
				if ( $parsed['is_relative'] ) {
					// guess if they meant to use an absolute link
					$parsed = InputFilter::parse_url( 'http://' . $source_uri );
					if ( ! $parsed['is_error'] ) {
						$source_uri = InputFilter::glue_url( $parsed );
					}
					else {
						// disallow relative URLs
						$source_uri = '';
					}
				}
				if ( $parsed['is_pseudo'] || ( $parsed['scheme'] !== 'http' && $parsed['scheme'] !== 'https' ) ) {
					// allow only http(s) URLs
					$source_uri = '';
				}
				else {
					// reconstruct the URL from the error-tolerant parsing
					// http:moeffju.net/blog/ -> http://moeffju.net/blog/
					$source_uri = InputFilter::glue_url( $parsed );
				}
			}

			// Add a new pingback comment
			$pingback = new Comment( array(
				'post_id'	=>	$target_post->id,
				'name'		=>	$source_title,
				'email'		=>	'',
				'url'		=>	$source_uri,
				'ip'		=>	Utils::get_ip(),
				'content'	=>	$source_excerpt,
				'status'	=>	Comment::STATUS_UNAPPROVED,
				'date'		=>	HabariDateTime::date_create(),
				'type' 		=> 	Comment::PINGBACK,
				) );

			$pingback->insert();

			// Respond to the Pingback
			return 'The pingback has been registered';
		}
		catch ( XMLRPCException $e ) {
			$e->output_fault_xml();
		}
	}
Exemple #5
0
 /**
  * Receive a Pingback via XMLRPC
  * @param array $params An array of XMLRPC parameters from the remote call
  * @return string The success state of the pingback
  */
 public function xmlrpc_pingback__ping($params)
 {
     try {
         list($source_uri, $target_uri) = $params;
         // This should really be done by an Habari core function
         $target_parse = InputFilter::parse_url($target_uri);
         $target_stub = $target_parse['path'];
         $base_url = Site::get_path('base', TRUE);
         if ('/' != $base_url) {
             $target_stub = str_replace($base_url, '', $target_stub);
         }
         $target_stub = trim($target_stub, '/');
         if (strpos($target_stub, '?') !== FALSE) {
             list($target_stub, $query_string) = explode('?', $target_stub);
         }
         // Can this be used as a target?
         $target_slug = URL::parse($target_stub)->named_arg_values['slug'];
         if ($target_slug === FALSE) {
             throw new XMLRPCException(33);
         }
         // Does the target exist?
         $target_post = Post::get(array('slug' => $target_slug));
         if ($target_post === FALSE) {
             throw new XMLRPCException(32);
         }
         // Is comment allowed?
         if ($target_post->info->comments_disabled) {
             throw new XMLRPCException(33);
         }
         // Is this Pingback already registered?
         if (Comments::get(array('post_id' => $target_post->id, 'url' => $source_uri, 'type' => Comment::PINGBACK))->count() > 0) {
             throw new XMLRPCException(48);
         }
         // Retrieve source contents
         $rr = new RemoteRequest($source_uri);
         $rr->execute();
         if (!$rr->executed()) {
             throw new XMLRPCException(16);
         }
         $source_contents = $rr->get_response_body();
         // encoding is converted into internal encoding.
         // @todo check BOM at beginning of file before checking for a charset attribute
         $habari_encoding = MultiByte::hab_encoding();
         if (preg_match("/<meta[^>]+charset=([A-Za-z0-9\\-\\_]+)/i", $source_contents, $matches) !== FALSE && strtolower($habari_encoding) != strtolower($matches[1])) {
             $ret = MultiByte::convert_encoding($source_contents, $habari_encoding, $matches[1]);
             if ($ret !== FALSE) {
                 $source_contents = $ret;
             }
         }
         // Find the page's title
         preg_match('/<title>(.*)<\\/title>/is', $source_contents, $matches);
         $source_title = $matches[1];
         // Find the reciprocal links and their context
         preg_match('/<body[^>]*>(.+)<\\/body>/is', $source_contents, $matches);
         $source_contents_filtered = preg_replace('/\\s{2,}/is', ' ', strip_tags($matches[1], '<a>'));
         if (!preg_match('%.{0,100}?<a[^>]*?href\\s*=\\s*("|\'|)' . $target_uri . '\\1[^>]*?' . '>(.+?)</a>.{0,100}%s', $source_contents_filtered, $source_excerpt)) {
             throw new XMLRPCException(17);
         }
         /** Sanitize Data */
         $source_excerpt = '...' . InputFilter::filter($source_excerpt[0]) . '...';
         $source_title = InputFilter::filter($source_title);
         $source_uri = InputFilter::filter($source_uri);
         /* Sanitize the URL */
         if (!empty($source_uri)) {
             $parsed = InputFilter::parse_url($source_uri);
             if ($parsed['is_relative']) {
                 // guess if they meant to use an absolute link
                 $parsed = InputFilter::parse_url('http://' . $source_uri);
                 if (!$parsed['is_error']) {
                     $source_uri = InputFilter::glue_url($parsed);
                 } else {
                     // disallow relative URLs
                     $source_uri = '';
                 }
             }
             if ($parsed['is_pseudo'] || $parsed['scheme'] !== 'http' && $parsed['scheme'] !== 'https') {
                 // allow only http(s) URLs
                 $source_uri = '';
             } else {
                 // reconstruct the URL from the error-tolerant parsing
                 // http:moeffju.net/blog/ -> http://moeffju.net/blog/
                 $source_uri = InputFilter::glue_url($parsed);
             }
         }
         // Add a new pingback comment
         $pingback = new Comment(array('post_id' => $target_post->id, 'name' => $source_title, 'email' => '', 'url' => $source_uri, 'ip' => sprintf("%u", ip2long($_SERVER['REMOTE_ADDR'])), 'content' => $source_excerpt, 'status' => Comment::STATUS_UNAPPROVED, 'date' => HabariDateTime::date_create(), 'type' => Comment::PINGBACK));
         $pingback->insert();
         // Respond to the Pingback
         return 'The pingback has been registered';
     } catch (XMLRPCException $e) {
         $e->output_fault_xml();
     }
 }
Exemple #6
0
 /**
  * Assign the default variables that would be used in every template
  */
 public function add_template_vars()
 {
     // set the locale and character set that habari is configured to use presently
     if (!isset($this->locale)) {
         $this->locale = Options::get('locale', 'en');
         // default to 'en' just in case we somehow don't have one?
     }
     if (!isset($this->charset)) {
         $this->charset = MultiByte::hab_encoding();
     }
     if (!isset($this->user)) {
         $this->user = User::identify();
     }
     if (!isset($this->loggedin)) {
         $this->loggedin = User::identify()->loggedin;
     }
     if (!isset($this->page)) {
         $this->page = isset($this->page) ? $this->page : 1;
     }
     if (!isset($this->page_title)) {
         $this->page_title = $this->page_title();
         // This calls theme_page_title via the theme_* plugin hook.
     }
     $handler = Controller::get_handler();
     if (isset($handler)) {
         Plugins::act('add_template_vars', $this, $handler->handler_vars);
     }
     $this->added_template_vars = true;
 }
Exemple #7
0
	public function testHab_encoding()
	{
		$this->assertEquals( MultiByte::hab_encoding(), 'UTF-8' );
	}
Exemple #8
0
	/**
	 * Assign the default variables that would be used in every template
	 */
	public function add_template_vars()
	{
		
		// set the locale and character set that habari is configured to use presently
		if ( !isset( $this->locale ) ) {
			$this->locale = Options::get('locale', 'en');	// default to 'en' just in case we somehow don't have one?
		}
		
		if ( !isset( $this->charset ) ) {
			$this->charset = MultiByte::hab_encoding();
		}
		
		if ( !$this->template_engine->assigned( 'user' ) ) {
			$this->assign( 'user', User::identify() );
		}

		if ( !$this->template_engine->assigned( 'loggedin' ) ) {
			$this->assign( 'loggedin', User::identify()->loggedin );
		}

		if ( !$this->template_engine->assigned( 'page' ) ) {
			$this->assign( 'page', isset( $this->page ) ? $this->page : 1 );
		}

		$handler = Controller::get_handler();
		if ( isset( $handler ) ) {
			Plugins::act( 'add_template_vars', $this, $handler->handler_vars );
		}
	}