/** * This section uses tidy to parse the file * Specify configuration * * @param string $text string to parse. */ public function parseString($text) { $tidy = new tidy(); $escaped_text = preg_replace(array('/<script([^>]*)>(.*?)<\\/script>/s', '/<link([^>]*)>(.*?)<\\/link>/s', '/<link([^>]*)\\/>/s', '/<style([^>]*)>(.*?)<\\/style>/s'), array('<rs:script$1><![CDATA[$2]]></rs:script>', '<rs:link$1><![CDATA[$2]]></rs:link>', '<rs:link$1/>', '<rs:style$1><![CDATA[$2]]></rs:style>'), $text); $escaped_text = preg_replace_callback('/<fb:share-button[^>]*>.*<\\/fb:share-button>/s', array('RingsideSocialDslParser', '_replace_meta_and_links'), $escaped_text); $textToParse = "<rs:social-dsl>{$escaped_text}</rs:social-dsl>"; $tagRegistry = Social_Dsl_TagRegistry::getInstance(); $tagRegistry->scanForNewTags($textToParse); $tidy->parseString($textToParse, $tagRegistry->getTidyConfiguration()); // error_log("Before tidy parse:"); // error_log($escaped_text); // error_log("After tidy parse:"); // error_log($tidy); ob_start(); try { $this->walk_nodes($tidy->root()); } catch (Exception $e) { ob_end_clean(); error_log($e->getMessage()); error_log($e->getTraceAsString()); } $pre_text = ob_get_clean(); if (empty($pre_text)) { return $pre_text; } // We can allow directly <rs:script tags, and replace them with standard script tags // TODO review this and make this a handler? $final_text = preg_replace(array('/<rs:script([^>]*)>(.*?)<!\\[CDATA\\[(.*?)]]>(.*?)<\\/rs:script>/s', '/<rs:link([^>]*)>(.*?)<!\\[CDATA\\[(.*?)]]>(.*?)<\\/rs:link>/s', '/<rs:link([^>]*)><\\/rs:link>/s', '/<rs:link([^>]*)\\/>/s', '/<rs:style([^>]*)>(.*?)<!\\[CDATA\\[(.*?)]]>(.*?)<\\/rs:style>/s'), array('<script$1>$2$3$4</script>', '<link$1>$2$3$4</link>', '<link$1/>', '<link$1/>', '<style$1>$2$3$4</style>'), $pre_text); $result = ''; if (strstr(strtolower($text), '<html>')) { // If the input has the HTML wrapper, emit the whole result (minus the rs-div div tag) $result = preg_replace('/<rs:social-dsl>(.*)<\\/rs:social-dsl>/s', '$1', $final_text); } else { $matches = array(); preg_match('/<rs:social-dsl>(.*)<\\/rs:social-dsl>/s', $final_text, $matches); if (!empty($matches) && count($matches) > 1) { $result = $matches[1]; } } // Replace truly-empty rs:social-dsl node (tidy does this) $result = preg_replace('/<rs:social-dsl *\\/>/', '', $result); return $result; }
/** * Echo the body segments * * @return void */ public function outputBody() { $body = implode('', $this->_body); if (!$this->_enableTidy) { echo $body; } else { $tidy = new tidy(); $tidy->ParseString($body, $this->_config, $this->_tidyEncoding); #$tidy->cleanRepair(); if ($this->_echoTidyErrors && $tidy->errorBuffer) { echo $body; echo "\n\n\n<!--\n"; echo "tidy detected the following errors:\n"; echo $tidy->errorBuffer; echo "\n-->"; } else { echo $tidy->root(); } } }
/** * Get data from path * * @param string $path * * @return string */ public function get($path) { /* @var $response \Guzzle\Http\Message\Response */ $response = $this->client->get($path)->send(); if ($response->isError()) { throw new \RuntimeException('Failed to query the server ' . $this->host); } if ($response->getStatusCode() != 200 || !($html = $response->getBody(true))) { return ''; } $html = iconv('windows-1251', 'utf-8', $html); // clean content $config = array('output-xhtml' => true, 'indent' => true, 'indent-spaces' => 0, 'fix-backslash' => true, 'hide-comments' => true, 'drop-empty-paras' => true, 'wrap' => false); $this->tidy->parseString($html, $config, 'utf8'); $this->tidy->cleanRepair(); $html = $this->tidy->root()->value; // ignore blocks $html = preg_replace('/<noembed>.*?<\\/noembed>/is', '', $html); $html = preg_replace('/<noindex>.*?<\\/noindex>/is', '', $html); return $html; }
/** * (non-PHPdoc) * @see PHPCPD_Detector_Tokenizer_AbstractTokenizer::cpd() */ public function cpd(PHPCPD_Detector_Strategy $strategy, $file) { $buffer = file_get_contents($file); $this->_iLines = substr_count($buffer, PHP_EOL); $this->_aTokens = array(); $currentTokenPositions = array(); $currentSignature = ''; $tidy_config = array('clean' => true, 'drop-proprietary-attributes' => true, 'output-xhtml' => true, 'hide-comments' => true, 'show-body-only' => true, 'word-2000' => true, 'wrap' => '0'); $oTidy = new tidy($file, $tidy_config); $oNode = $oTidy->root(); $this->_tokenHelper($oNode); unset($oTidy, $oNode, $buffer); $tokenNr = 0; foreach ($this->_aTokens as $token) { if (!isset($this->tokensIgnoreList[$token[0]])) { $currentTokenPositions[$tokenNr++] = $token[2]; $currentSignature .= chr($token[0] & 255) . pack('N*', crc32($token[1])); } } $strategy->tokenFactor($this->_fTokenFactor, $this->_iMinLines)->processFile($file, $currentTokenPositions, $currentSignature); return $this; }