/** * Receive a Pingback via XMLRPC * @param array $params An array of XMLRPC parameters from the remote call * @return string The success state of the pingback */ public function xmlrpc_pingback__ping( $params ) { try { list( $source_uri, $target_uri )= $params; // This should really be done by an Habari core function $target_parse = InputFilter::parse_url( $target_uri ); $target_stub = $target_parse['path']; $base_url = Site::get_path( 'base', true ); if ( '/' != $base_url) { $target_stub = str_replace( $base_url, '', $target_stub ); } $target_stub = trim( $target_stub, '/' ); if ( strpos( $target_stub, '?' ) !== false ) { list( $target_stub, $query_string )= explode( '?', $target_stub ); } // Can this be used as a target? $target_slug = URL::parse( $target_stub )->named_arg_values['slug']; if ( $target_slug === false ) { throw new XMLRPCException( 33 ); } // Does the target exist? $target_post = Post::get( array( 'slug' => $target_slug ) ); if ( $target_post === false ) { throw new XMLRPCException( 32 ); } // Is comment allowed? if ( $target_post->info->comments_disabled ) { throw new XMLRPCException( 33 ); } // Is this Pingback already registered? if ( Comments::get( array( 'post_id' => $target_post->id, 'url' => $source_uri, 'type' => Comment::PINGBACK ) )->count() > 0 ) { throw new XMLRPCException( 48 ); } // Retrieve source contents try { $rr = new RemoteRequest( $source_uri ); $rr->execute(); if ( ! $rr->executed() ) { throw new XMLRPCException( 16 ); } $source_contents = $rr->get_response_body(); $headers = $rr->get_response_headers(); } catch ( XMLRPCException $e ) { // catch our special type of exception and re-throw it throw $e; } catch ( Exception $e ) { throw new XMLRPCException( -32300 ); } // Encoding is converted into internal encoding. // First, detect the source string's encoding $habari_encoding = strtoupper( MultiByte::hab_encoding() ); $source_encoding = 'Windows-1252'; // Is the charset in the headers? if ( isset( $headers['Content-Type'] ) && strpos( $headers['Content-Type'], 'charset' ) !== false ) { // This regex should be changed to meet the HTTP spec at some point if ( preg_match("/charset[\x09\x0A\x0C\x0D\x20]*=[\x09\x0A\x0C\x0D\x20]*('?)([A-Za-z0-9\-\_]+)\1/i", $headers['Content-Type'], $matches ) ) { $source_encoding = strtoupper( $matches[2] ); } } // Can we tell the charset from the stream itself? else if ( ( $enc = MultiByte::detect_bom_encoding( $source_contents ) ) !== false ) { $source_encoding = $enc; } // Is the charset in a meta tag? else if ( preg_match( "/<meta[^>]+charset[\x09\x0A\x0C\x0D\x20]*=[\x09\x0A\x0C\x0D\x20]*([\"']?)([A-Za-z0-9\-\_]+)\1/i", $source_contents, $matches ) ) { $source_encoding = strtoupper( $matches[2] ); if (in_array($source_encoding, array("UTF-16", "UTF-16BE", "UTF-16LE"))) { $source_encoding = "UTF-8"; } } // Then, convert the string $ret = MultiByte::convert_encoding( $source_contents, $habari_encoding, $source_encoding ); if ( $ret !== false ) { $source_contents = $ret; } // Find the page's title preg_match( '/<title>(.*)<\/title>/is', $source_contents, $matches ); $source_title = $matches[1]; // Find the reciprocal links and their context preg_match( '/<body[^>]*>(.+)<\/body>/is', $source_contents, $matches ); $source_contents_filtered = preg_replace( '/\s{2,}/is', ' ', strip_tags( $matches[1], '<a>' ) ); // Get rid of all the non-recriprocal links $ht = new HTMLTokenizer( trim( $source_contents_filtered ) ); $set = $ht->parse(); $all_links = $set->slice( 'a', array() ); $keep_links = $set->slice( 'a', array( 'href' => $target_uri ) ); $bad_links = array_diff( $all_links, $keep_links ); foreach( $bad_links as $link ) { $link->tokenize_replace( '' ); $set->replace_slice( $link ); } $source_contents_filtered = (string)$set; // Get the excerpt if ( !preg_match( '%.{0,100}?<a[^>]*?href\\s*=\\s*("|\'|)' . $target_uri . '\\1[^>]*?'.'>(.+?)</a>.{0,100}%s', $source_contents_filtered, $source_excerpt ) ) { throw new XMLRPCException( 17 ); } /** Sanitize Data */ $source_excerpt = '…' . InputFilter::filter( $source_excerpt[0] ) . '…'; $source_title = InputFilter::filter($source_title); $source_uri = InputFilter::filter($source_uri); /* Sanitize the URL */ if (!empty($source_uri)) { $parsed = InputFilter::parse_url( $source_uri ); if ( $parsed['is_relative'] ) { // guess if they meant to use an absolute link $parsed = InputFilter::parse_url( 'http://' . $source_uri ); if ( ! $parsed['is_error'] ) { $source_uri = InputFilter::glue_url( $parsed ); } else { // disallow relative URLs $source_uri = ''; } } if ( $parsed['is_pseudo'] || ( $parsed['scheme'] !== 'http' && $parsed['scheme'] !== 'https' ) ) { // allow only http(s) URLs $source_uri = ''; } else { // reconstruct the URL from the error-tolerant parsing // http:moeffju.net/blog/ -> http://moeffju.net/blog/ $source_uri = InputFilter::glue_url( $parsed ); } } // Add a new pingback comment $pingback = new Comment( array( 'post_id' => $target_post->id, 'name' => $source_title, 'email' => '', 'url' => $source_uri, 'ip' => Utils::get_ip(), 'content' => $source_excerpt, 'status' => Comment::STATUS_UNAPPROVED, 'date' => HabariDateTime::date_create(), 'type' => Comment::PINGBACK, ) ); $pingback->insert(); // Respond to the Pingback return 'The pingback has been registered'; } catch ( XMLRPCException $e ) { $e->output_fault_xml(); } }