/**
  * This section uses tidy to parse the file
  * Specify configuration
  * 
  * @param string $text string to parse. 
  */
 public function parseString($text)
 {
     $tidy = new tidy();
     $escaped_text = preg_replace(array('/<script([^>]*)>(.*?)<\\/script>/s', '/<link([^>]*)>(.*?)<\\/link>/s', '/<link([^>]*)\\/>/s', '/<style([^>]*)>(.*?)<\\/style>/s'), array('<rs:script$1><![CDATA[$2]]></rs:script>', '<rs:link$1><![CDATA[$2]]></rs:link>', '<rs:link$1/>', '<rs:style$1><![CDATA[$2]]></rs:style>'), $text);
     $escaped_text = preg_replace_callback('/<fb:share-button[^>]*>.*<\\/fb:share-button>/s', array('RingsideSocialDslParser', '_replace_meta_and_links'), $escaped_text);
     $textToParse = "<rs:social-dsl>{$escaped_text}</rs:social-dsl>";
     $tagRegistry = Social_Dsl_TagRegistry::getInstance();
     $tagRegistry->scanForNewTags($textToParse);
     $tidy->parseString($textToParse, $tagRegistry->getTidyConfiguration());
     //      error_log("Before tidy parse:");
     //      error_log($escaped_text);
     //      error_log("After tidy parse:");
     //      error_log($tidy);
     ob_start();
     try {
         $this->walk_nodes($tidy->root());
     } catch (Exception $e) {
         ob_end_clean();
         error_log($e->getMessage());
         error_log($e->getTraceAsString());
     }
     $pre_text = ob_get_clean();
     if (empty($pre_text)) {
         return $pre_text;
     }
     // We can allow directly <rs:script tags, and replace them with standard script tags
     // TODO review this and make this a handler?
     $final_text = preg_replace(array('/<rs:script([^>]*)>(.*?)<!\\[CDATA\\[(.*?)]]>(.*?)<\\/rs:script>/s', '/<rs:link([^>]*)>(.*?)<!\\[CDATA\\[(.*?)]]>(.*?)<\\/rs:link>/s', '/<rs:link([^>]*)><\\/rs:link>/s', '/<rs:link([^>]*)\\/>/s', '/<rs:style([^>]*)>(.*?)<!\\[CDATA\\[(.*?)]]>(.*?)<\\/rs:style>/s'), array('<script$1>$2$3$4</script>', '<link$1>$2$3$4</link>', '<link$1/>', '<link$1/>', '<style$1>$2$3$4</style>'), $pre_text);
     $result = '';
     if (strstr(strtolower($text), '<html>')) {
         // If the input has the HTML wrapper, emit the whole result (minus the rs-div div tag)
         $result = preg_replace('/<rs:social-dsl>(.*)<\\/rs:social-dsl>/s', '$1', $final_text);
     } else {
         $matches = array();
         preg_match('/<rs:social-dsl>(.*)<\\/rs:social-dsl>/s', $final_text, $matches);
         if (!empty($matches) && count($matches) > 1) {
             $result = $matches[1];
         }
     }
     // Replace truly-empty rs:social-dsl node (tidy does this)
     $result = preg_replace('/<rs:social-dsl *\\/>/', '', $result);
     return $result;
 }
Example #2
0
 /**
  * Echo the body segments
  *
  * @return void
  */
 public function outputBody()
 {
     $body = implode('', $this->_body);
     if (!$this->_enableTidy) {
         echo $body;
     } else {
         $tidy = new tidy();
         $tidy->ParseString($body, $this->_config, $this->_tidyEncoding);
         #$tidy->cleanRepair();
         if ($this->_echoTidyErrors && $tidy->errorBuffer) {
             echo $body;
             echo "\n\n\n<!--\n";
             echo "tidy detected the following errors:\n";
             echo $tidy->errorBuffer;
             echo "\n-->";
         } else {
             echo $tidy->root();
         }
     }
 }
 /**
  * Get data from path
  *
  * @param string $path
  *
  * @return string
  */
 public function get($path)
 {
     /* @var $response \Guzzle\Http\Message\Response */
     $response = $this->client->get($path)->send();
     if ($response->isError()) {
         throw new \RuntimeException('Failed to query the server ' . $this->host);
     }
     if ($response->getStatusCode() != 200 || !($html = $response->getBody(true))) {
         return '';
     }
     $html = iconv('windows-1251', 'utf-8', $html);
     // clean content
     $config = array('output-xhtml' => true, 'indent' => true, 'indent-spaces' => 0, 'fix-backslash' => true, 'hide-comments' => true, 'drop-empty-paras' => true, 'wrap' => false);
     $this->tidy->parseString($html, $config, 'utf8');
     $this->tidy->cleanRepair();
     $html = $this->tidy->root()->value;
     // ignore blocks
     $html = preg_replace('/<noembed>.*?<\\/noembed>/is', '', $html);
     $html = preg_replace('/<noindex>.*?<\\/noindex>/is', '', $html);
     return $html;
 }
Example #4
0
 /**
  * (non-PHPdoc)
  * @see PHPCPD_Detector_Tokenizer_AbstractTokenizer::cpd()
  */
 public function cpd(PHPCPD_Detector_Strategy $strategy, $file)
 {
     $buffer = file_get_contents($file);
     $this->_iLines = substr_count($buffer, PHP_EOL);
     $this->_aTokens = array();
     $currentTokenPositions = array();
     $currentSignature = '';
     $tidy_config = array('clean' => true, 'drop-proprietary-attributes' => true, 'output-xhtml' => true, 'hide-comments' => true, 'show-body-only' => true, 'word-2000' => true, 'wrap' => '0');
     $oTidy = new tidy($file, $tidy_config);
     $oNode = $oTidy->root();
     $this->_tokenHelper($oNode);
     unset($oTidy, $oNode, $buffer);
     $tokenNr = 0;
     foreach ($this->_aTokens as $token) {
         if (!isset($this->tokensIgnoreList[$token[0]])) {
             $currentTokenPositions[$tokenNr++] = $token[2];
             $currentSignature .= chr($token[0] & 255) . pack('N*', crc32($token[1]));
         }
     }
     $strategy->tokenFactor($this->_fTokenFactor, $this->_iMinLines)->processFile($file, $currentTokenPositions, $currentSignature);
     return $this;
 }