public function test_hab_encoding() { // see what the current value is $original = MultiByte::hab_encoding(); // make sure it equals the static value $this->assert_equal($original, MultiByte::$hab_enc); // change it, saving the old $old = MultiByte::hab_encoding('foo'); // make sure the old value is equal to the original value - it should return the previous when we set it $this->assert_equal($old, $original); // set the original value back MultiByte::hab_encoding($original); // make sure that the original really did get set - really a duplicate of the prior assertion, but you know... $this->assert_equal($original, MultiByte::hab_encoding()); }
function test_substr() { printf("Test string: %s <br>", self::$test_str); printf("Habari encoding: %s <br>", MultiByte::hab_encoding()); printf("mb_internal_encoding: %s <br>", mb_internal_encoding()); printf("MultiByte detected encoding of test string: %s <br>", MultiByte::detect_encoding(self::$test_str)); printf("mbstring detected encoding of test string: %s <br>", mb_detect_encoding(self::$test_str)); $this->assert_equal(MultiByte::substr(self::$test_str, 1, 3), mb_substr(self::$test_str, 1, 3)); $this->assert_equal(MultiByte::substr(self::$test_str, 1, 3), mb_substr(self::$test_str, 1, 3, mb_detect_encoding(self::$test_str))); $this->assert_equal(MultiByte::substr(self::$test_str, 5), mb_substr(self::$test_str, 5)); printf(" MultiByte substring (begin-1 end-3): %s <br>", MultiByte::substr(self::$test_str, 1, 3)); printf(" MultiByte substring 2 (begin-5 end-null): %s <br>", MultiByte::substr(self::$test_str, 5)); printf(" mbstring substring without encoding detected (begin-1 end-3): %s <br>", mb_substr(self::$test_str, 1, 3)); printf(" mbstring substring with encoding detected (begin-1 end-3): %s <br>", mb_substr(self::$test_str, 1, 3, mb_detect_encoding(self::$test_str))); printf(" mbstring substring 2 without encoding detected(begin-5 end-null): %s <br>", mb_substr(self::$test_str, 5)); }
/** * @todo TODO must build DOM to really properly remove offending elements * @todo TODO properly filter URLs */ public static function filter_html_elements($str) { $tokenizer = new HTMLTokenizer($str); // tokenize, baby $tokens = $tokenizer->parse(); // filter token stream $filtered = new HTMLTokenSet(); $stack = array(); foreach ($tokens as $node) { switch ($node['type']) { case HTMLTokenizer::NODE_TYPE_TEXT: $node['value'] = html_entity_decode($node['value'], ENT_QUOTES, MultiByte::hab_encoding()); break; case HTMLTokenizer::NODE_TYPE_ELEMENT_OPEN: case HTMLTokenizer::NODE_TYPE_ELEMENT_EMPTY: // is this element allowed at all? if (!in_array(strtolower($node['name']), self::$whitelist_elements)) { if (!in_array(strtolower($node['name']), self::$elements_empty)) { array_push($stack, $node['name']); } //$node = null; //remove the node completely // convert the node to text $node = array('type' => HTMLTokenizer::NODE_TYPE_TEXT, 'name' => '#text', 'value' => HTMLTokenSet::token_to_string($node), 'attrs' => array()); } else { // check attributes foreach ($node['attrs'] as $k => $v) { $attr_ok = false; // if the attribute is in the global whitelist and validates if (array_key_exists(strtolower($k), self::$whitelist_attributes['*']) && self::check_attr_value(strtolower($k), $v, self::$whitelist_attributes['*'][strtolower($k)])) { $attr_ok = true; } // if there is a whitelist for this node and this attribute is in that list and it validates if (array_key_exists(strtolower($node['name']), self::$whitelist_attributes) && array_key_exists(strtolower($k), self::$whitelist_attributes[strtolower($node['name'])]) && self::check_attr_value(strtolower($k), $v, self::$whitelist_attributes[strtolower($node['name'])][strtolower($k)])) { $attr_ok = true; } // if it wasn't in one of the whitelists or failed its check, remove it if ($attr_ok != true) { unset($node['attrs'][$k]); } } } break; case HTMLTokenizer::NODE_TYPE_ELEMENT_CLOSE: if (!in_array(strtolower($node['name']), self::$whitelist_elements)) { if (strtolower($temp = array_pop($stack)) !== strtolower($node['name'])) { // something weird happened (Luke, use the DOM!) array_push($stack, $temp); } //$node = null; //convert the node to text $node = array('type' => HTMLTokenizer::NODE_TYPE_TEXT, 'name' => '#text', 'value' => HTMLTokenSet::token_to_string($node), 'attrs' => array()); } break; case HTMLTokenizer::NODE_TYPE_PI: case HTMLTokenizer::NODE_TYPE_COMMENT: case HTMLTokenizer::NODE_TYPE_CDATA_SECTION: case HTMLTokenizer::NODE_TYPE_STATEMENT: default: $node = null; break; } if ($node != null) { $filtered[] = $node; } } // rebuild our output string return preg_replace('#<([^>\\s]+)(?:\\s+[^>]+)?></\\1>#u', '', (string) $filtered); }
/** * Receive a Pingback via XMLRPC * @param array $params An array of XMLRPC parameters from the remote call * @return string The success state of the pingback */ public function xmlrpc_pingback__ping( $params ) { try { list( $source_uri, $target_uri )= $params; // This should really be done by an Habari core function $target_parse = InputFilter::parse_url( $target_uri ); $target_stub = $target_parse['path']; $base_url = Site::get_path( 'base', true ); if ( '/' != $base_url) { $target_stub = str_replace( $base_url, '', $target_stub ); } $target_stub = trim( $target_stub, '/' ); if ( strpos( $target_stub, '?' ) !== false ) { list( $target_stub, $query_string )= explode( '?', $target_stub ); } // Can this be used as a target? $target_slug = URL::parse( $target_stub )->named_arg_values['slug']; if ( $target_slug === false ) { throw new XMLRPCException( 33 ); } // Does the target exist? $target_post = Post::get( array( 'slug' => $target_slug ) ); if ( $target_post === false ) { throw new XMLRPCException( 32 ); } // Is comment allowed? if ( $target_post->info->comments_disabled ) { throw new XMLRPCException( 33 ); } // Is this Pingback already registered? if ( Comments::get( array( 'post_id' => $target_post->id, 'url' => $source_uri, 'type' => Comment::PINGBACK ) )->count() > 0 ) { throw new XMLRPCException( 48 ); } // Retrieve source contents try { $rr = new RemoteRequest( $source_uri ); $rr->execute(); if ( ! $rr->executed() ) { throw new XMLRPCException( 16 ); } $source_contents = $rr->get_response_body(); $headers = $rr->get_response_headers(); } catch ( XMLRPCException $e ) { // catch our special type of exception and re-throw it throw $e; } catch ( Exception $e ) { throw new XMLRPCException( -32300 ); } // Encoding is converted into internal encoding. // First, detect the source string's encoding $habari_encoding = strtoupper( MultiByte::hab_encoding() ); $source_encoding = 'Windows-1252'; // Is the charset in the headers? if ( isset( $headers['Content-Type'] ) && strpos( $headers['Content-Type'], 'charset' ) !== false ) { // This regex should be changed to meet the HTTP spec at some point if ( preg_match("/charset[\x09\x0A\x0C\x0D\x20]*=[\x09\x0A\x0C\x0D\x20]*('?)([A-Za-z0-9\-\_]+)\1/i", $headers['Content-Type'], $matches ) ) { $source_encoding = strtoupper( $matches[2] ); } } // Can we tell the charset from the stream itself? else if ( ( $enc = MultiByte::detect_bom_encoding( $source_contents ) ) !== false ) { $source_encoding = $enc; } // Is the charset in a meta tag? else if ( preg_match( "/<meta[^>]+charset[\x09\x0A\x0C\x0D\x20]*=[\x09\x0A\x0C\x0D\x20]*([\"']?)([A-Za-z0-9\-\_]+)\1/i", $source_contents, $matches ) ) { $source_encoding = strtoupper( $matches[2] ); if (in_array($source_encoding, array("UTF-16", "UTF-16BE", "UTF-16LE"))) { $source_encoding = "UTF-8"; } } // Then, convert the string $ret = MultiByte::convert_encoding( $source_contents, $habari_encoding, $source_encoding ); if ( $ret !== false ) { $source_contents = $ret; } // Find the page's title preg_match( '/<title>(.*)<\/title>/is', $source_contents, $matches ); $source_title = $matches[1]; // Find the reciprocal links and their context preg_match( '/<body[^>]*>(.+)<\/body>/is', $source_contents, $matches ); $source_contents_filtered = preg_replace( '/\s{2,}/is', ' ', strip_tags( $matches[1], '<a>' ) ); // Get rid of all the non-recriprocal links $ht = new HTMLTokenizer( trim( $source_contents_filtered ) ); $set = $ht->parse(); $all_links = $set->slice( 'a', array() ); $keep_links = $set->slice( 'a', array( 'href' => $target_uri ) ); $bad_links = array_diff( $all_links, $keep_links ); foreach( $bad_links as $link ) { $link->tokenize_replace( '' ); $set->replace_slice( $link ); } $source_contents_filtered = (string)$set; // Get the excerpt if ( !preg_match( '%.{0,100}?<a[^>]*?href\\s*=\\s*("|\'|)' . $target_uri . '\\1[^>]*?'.'>(.+?)</a>.{0,100}%s', $source_contents_filtered, $source_excerpt ) ) { throw new XMLRPCException( 17 ); } /** Sanitize Data */ $source_excerpt = '…' . InputFilter::filter( $source_excerpt[0] ) . '…'; $source_title = InputFilter::filter($source_title); $source_uri = InputFilter::filter($source_uri); /* Sanitize the URL */ if (!empty($source_uri)) { $parsed = InputFilter::parse_url( $source_uri ); if ( $parsed['is_relative'] ) { // guess if they meant to use an absolute link $parsed = InputFilter::parse_url( 'http://' . $source_uri ); if ( ! $parsed['is_error'] ) { $source_uri = InputFilter::glue_url( $parsed ); } else { // disallow relative URLs $source_uri = ''; } } if ( $parsed['is_pseudo'] || ( $parsed['scheme'] !== 'http' && $parsed['scheme'] !== 'https' ) ) { // allow only http(s) URLs $source_uri = ''; } else { // reconstruct the URL from the error-tolerant parsing // http:moeffju.net/blog/ -> http://moeffju.net/blog/ $source_uri = InputFilter::glue_url( $parsed ); } } // Add a new pingback comment $pingback = new Comment( array( 'post_id' => $target_post->id, 'name' => $source_title, 'email' => '', 'url' => $source_uri, 'ip' => Utils::get_ip(), 'content' => $source_excerpt, 'status' => Comment::STATUS_UNAPPROVED, 'date' => HabariDateTime::date_create(), 'type' => Comment::PINGBACK, ) ); $pingback->insert(); // Respond to the Pingback return 'The pingback has been registered'; } catch ( XMLRPCException $e ) { $e->output_fault_xml(); } }
/** * Receive a Pingback via XMLRPC * @param array $params An array of XMLRPC parameters from the remote call * @return string The success state of the pingback */ public function xmlrpc_pingback__ping($params) { try { list($source_uri, $target_uri) = $params; // This should really be done by an Habari core function $target_parse = InputFilter::parse_url($target_uri); $target_stub = $target_parse['path']; $base_url = Site::get_path('base', TRUE); if ('/' != $base_url) { $target_stub = str_replace($base_url, '', $target_stub); } $target_stub = trim($target_stub, '/'); if (strpos($target_stub, '?') !== FALSE) { list($target_stub, $query_string) = explode('?', $target_stub); } // Can this be used as a target? $target_slug = URL::parse($target_stub)->named_arg_values['slug']; if ($target_slug === FALSE) { throw new XMLRPCException(33); } // Does the target exist? $target_post = Post::get(array('slug' => $target_slug)); if ($target_post === FALSE) { throw new XMLRPCException(32); } // Is comment allowed? if ($target_post->info->comments_disabled) { throw new XMLRPCException(33); } // Is this Pingback already registered? if (Comments::get(array('post_id' => $target_post->id, 'url' => $source_uri, 'type' => Comment::PINGBACK))->count() > 0) { throw new XMLRPCException(48); } // Retrieve source contents $rr = new RemoteRequest($source_uri); $rr->execute(); if (!$rr->executed()) { throw new XMLRPCException(16); } $source_contents = $rr->get_response_body(); // encoding is converted into internal encoding. // @todo check BOM at beginning of file before checking for a charset attribute $habari_encoding = MultiByte::hab_encoding(); if (preg_match("/<meta[^>]+charset=([A-Za-z0-9\\-\\_]+)/i", $source_contents, $matches) !== FALSE && strtolower($habari_encoding) != strtolower($matches[1])) { $ret = MultiByte::convert_encoding($source_contents, $habari_encoding, $matches[1]); if ($ret !== FALSE) { $source_contents = $ret; } } // Find the page's title preg_match('/<title>(.*)<\\/title>/is', $source_contents, $matches); $source_title = $matches[1]; // Find the reciprocal links and their context preg_match('/<body[^>]*>(.+)<\\/body>/is', $source_contents, $matches); $source_contents_filtered = preg_replace('/\\s{2,}/is', ' ', strip_tags($matches[1], '<a>')); if (!preg_match('%.{0,100}?<a[^>]*?href\\s*=\\s*("|\'|)' . $target_uri . '\\1[^>]*?' . '>(.+?)</a>.{0,100}%s', $source_contents_filtered, $source_excerpt)) { throw new XMLRPCException(17); } /** Sanitize Data */ $source_excerpt = '...' . InputFilter::filter($source_excerpt[0]) . '...'; $source_title = InputFilter::filter($source_title); $source_uri = InputFilter::filter($source_uri); /* Sanitize the URL */ if (!empty($source_uri)) { $parsed = InputFilter::parse_url($source_uri); if ($parsed['is_relative']) { // guess if they meant to use an absolute link $parsed = InputFilter::parse_url('http://' . $source_uri); if (!$parsed['is_error']) { $source_uri = InputFilter::glue_url($parsed); } else { // disallow relative URLs $source_uri = ''; } } if ($parsed['is_pseudo'] || $parsed['scheme'] !== 'http' && $parsed['scheme'] !== 'https') { // allow only http(s) URLs $source_uri = ''; } else { // reconstruct the URL from the error-tolerant parsing // http:moeffju.net/blog/ -> http://moeffju.net/blog/ $source_uri = InputFilter::glue_url($parsed); } } // Add a new pingback comment $pingback = new Comment(array('post_id' => $target_post->id, 'name' => $source_title, 'email' => '', 'url' => $source_uri, 'ip' => sprintf("%u", ip2long($_SERVER['REMOTE_ADDR'])), 'content' => $source_excerpt, 'status' => Comment::STATUS_UNAPPROVED, 'date' => HabariDateTime::date_create(), 'type' => Comment::PINGBACK)); $pingback->insert(); // Respond to the Pingback return 'The pingback has been registered'; } catch (XMLRPCException $e) { $e->output_fault_xml(); } }
/** * Assign the default variables that would be used in every template */ public function add_template_vars() { // set the locale and character set that habari is configured to use presently if (!isset($this->locale)) { $this->locale = Options::get('locale', 'en'); // default to 'en' just in case we somehow don't have one? } if (!isset($this->charset)) { $this->charset = MultiByte::hab_encoding(); } if (!isset($this->user)) { $this->user = User::identify(); } if (!isset($this->loggedin)) { $this->loggedin = User::identify()->loggedin; } if (!isset($this->page)) { $this->page = isset($this->page) ? $this->page : 1; } if (!isset($this->page_title)) { $this->page_title = $this->page_title(); // This calls theme_page_title via the theme_* plugin hook. } $handler = Controller::get_handler(); if (isset($handler)) { Plugins::act('add_template_vars', $this, $handler->handler_vars); } $this->added_template_vars = true; }
public function testHab_encoding() { $this->assertEquals( MultiByte::hab_encoding(), 'UTF-8' ); }
/** * Assign the default variables that would be used in every template */ public function add_template_vars() { // set the locale and character set that habari is configured to use presently if ( !isset( $this->locale ) ) { $this->locale = Options::get('locale', 'en'); // default to 'en' just in case we somehow don't have one? } if ( !isset( $this->charset ) ) { $this->charset = MultiByte::hab_encoding(); } if ( !$this->template_engine->assigned( 'user' ) ) { $this->assign( 'user', User::identify() ); } if ( !$this->template_engine->assigned( 'loggedin' ) ) { $this->assign( 'loggedin', User::identify()->loggedin ); } if ( !$this->template_engine->assigned( 'page' ) ) { $this->assign( 'page', isset( $this->page ) ? $this->page : 1 ); } $handler = Controller::get_handler(); if ( isset( $handler ) ) { Plugins::act( 'add_template_vars', $this, $handler->handler_vars ); } }