Filters an HTML snippet/document to be XSS-free and standards-compliant.
public purify ( string $html, HTMLPurifier_Config $config = null ) : string | ||
$html | string | String of HTML to purify |
$config | HTMLPurifier_Config | Config object for this operation, if omitted, defaults to the config object specified during this object's construction. The parameter can also be any type that HTMLPurifier_Config::create() supports. |
return | string | Purified HTML |
function html_purify($dirty_html, $config = FALSE) { require_once APPPATH . 'third_party/htmlpurifier-4.6.0-standalone/HTMLPurifier.standalone.php'; if (is_array($dirty_html)) { foreach ($dirty_html as $key => $val) { $clean_html[$key] = html_purify($val, $config); } } else { $ci =& get_instance(); switch ($config) { //settings for rhe WYSIWYG case 'comment': $config = HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', $ci->config->item('charset')); $config->set('HTML.Doctype', 'XHTML 1.0 Strict'); $config->set('HTML.Allowed', 'a[href|title],img[title|src|alt],em,strong,cite,blockquote,code,ul,ol,li,dl,dt,dd,p,br,h1,h2,h3,h4,h5,h6,span,*[style]'); $config->set('AutoFormat.AutoParagraph', TRUE); $config->set('AutoFormat.Linkify', TRUE); $config->set('AutoFormat.RemoveEmpty', TRUE); break; case FALSE: $config = HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', $ci->config->item('charset')); $config->set('HTML.Doctype', 'XHTML 1.0 Strict'); break; default: show_error('The HTMLPurifier configuration labeled "' . htmlentities($config, ENT_QUOTES, 'UTF-8') . '" could not be found.'); } $purifier = new HTMLPurifier($config); $clean_html = $purifier->purify($dirty_html); } return $clean_html; }
/** * Transforms string to purified string. * * @param string $string * * @return string * * @throws TransformationFailedException if $string is null. */ public function reverseTransform($string) { if (null === $string) { throw new TransformationFailedException("Field is empty!"); } return strip_tags($this->purifier->purify($string)); }
/** * Transforms description string to purified description string. * * @param string $description * * @return string * * @throws TransformationFailedException if $description is null. */ public function reverseTransform($description) { if (null === $description) { throw new TransformationFailedException("Description field is empty!"); } return $this->purifier->purify(strip_tags($description)); }
/** * @inheritdoc */ public function transform($value) { if (is_null($value)) { return $value; } if (is_scalar($value)) { $value = (string) $value; } if (!is_string($value)) { throw new TransformationFailedException(sprintf('Expected a string to transform, got %s instead', json_encode($value))); } // purify to remove really obscure html return $this->purifier->purify($value); }
/** * Escape any comment for being placed inside HTML, but preserve simple links (<a href="...">). * * @param string $comment * * @return string */ public function escapeComment($comment) { if (self::$purifier === null) { require_once 'HTMLPurifier/Bootstrap.php'; require_once 'HTMLPurifier.php'; require_once 'HTMLPurifier.autoload.php'; $config = HTMLPurifier_Config::createDefault(); $config->set('Core.EscapeNonASCIICharacters', true); $config->set('HTML.Allowed', 'a[href]'); $config->set('Cache.DefinitionImpl', null); self::$purifier = new HTMLPurifier($config); } return self::$purifier->purify($comment); }
/** * 过滤数据 重组 * @param array $data * @param array $modelfield */ public function filterData($data = array(), $modelfield = array()) { $newmodelfield = $this->parseModelField($modelfield); $newdata = $data; foreach ($data as $k => $d) { if (key_exists($k, $newmodelfield)) { switch ($newmodelfield[$k]['type']) { case 'editor': //编辑器过滤XSS Vendor('Htmlpurifier.library.HTMLPurifier#auto'); $config = \HTMLPurifier_Config::createDefault(); $purifier = new \HTMLPurifier($config); $newdata[$k] = $purifier->purify(htmlspecialchars_decode($d)); break; case 'position': //推荐位 $newdata[$k] = implode(',', $d); break; case 'checkbox': $newdata[$k] = implode(',', $d); break; } } } return $newdata; }
/** * Value sanitation. Sanitize input and output with ease using one of the sanitation types below. * * @param string $data the string/value you wish to sanitize * @param string $type the type of sanitation you wish to use. * @return string the sanitized string */ public function sanitize($data, $type = '') { ## Use the HTML Purifier, as it help remove malicious scripts and code. ## ## HTML Purifier 4.4.0 - Standards Compliant HTML Filtering ## require_once 'htmlpurifier/HTMLPurifier.standalone.php'; $purifier = new HTMLPurifier(); $config = HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', 'UTF-8'); // If no type if selected, it will simply run it through the HTML purifier only. switch ($type) { // Remove HTML tags (can have issues with invalid tags, keep that in mind!) case 'purestring': $data = strip_tags($data); break; // Only allow a-z (H & L case) // Only allow a-z (H & L case) case 'atoz': $data = preg_replace('/[^a-zA-Z]+/', '', $data); break; // Integers only - Remove any non 0-9 and use Intval() to make sure it is an integer which comes out. // Integers only - Remove any non 0-9 and use Intval() to make sure it is an integer which comes out. case 'integer': $data = intval(preg_replace('/[^0-9]+/', '', $data)); break; } /* HTML purifier to help prevent XSS in case anything slipped through. */ $data = $purifier->purify($data); return $data; }
public function saveAction() { $form = new News_Form_Article(); $formData = $this->_request->getPost(); $form->populate($formData); if (!$form->isValid($formData)) { $appSession = Zend_Registry::get('appSession'); $appSession->articleForm = $form; $this->_forward('index'); return; } $news = new News_Model_News(); if ($this->_getParam('id')) { if (!($article = $news->getRowInstance($this->_getParam('id')))) { $this->_helper->FlashMessenger->addMessage($this->view->translate('The article doesn\'t exist.')); $this->_redirect('/news'); return; } } else { $article = $news->createRow(); } require_once 'htmlpurifier/library/HTMLPurifier.auto.php'; $config = HTMLPurifier_Config::createDefault(); $purifier = new HTMLPurifier($config); $cleanHtml = $purifier->purify($form->getValue('content')); $article->title = $form->getValue('title'); $article->date = $form->getValue('date'); $article->excerpt = $form->getValue('excerpt'); $article->content = $cleanHtml; $article->save(); $this->_helper->FlashMessenger->addMessage($this->view->translate('The article has been saved.')); $this->_redirect('/news'); }
function html_purify($dirty_html, $config = FALSE) { require_once APPPATH . 'third_party/htmlpurifier-4.6.0-standalone/HTMLPurifier.standalone.php'; if (is_array($dirty_html)) { foreach ($dirty_html as $key => $val) { $clean_html[$key] = html_purify($val, $config); } } else { $ci =& get_instance(); switch ($config) { case 'comment': $config = HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', $ci->config->item('charset')); $config->set('HTML.Doctype', 'XHTML 1.0 Strict'); $config->set('HTML.Allowed', 'p,a[href|title],abbr[title],acronym[title],b,strong,blockquote[cite],code,em,i,strike'); $config->set('AutoFormat.AutoParagraph', TRUE); $config->set('AutoFormat.Linkify', TRUE); $config->set('AutoFormat.RemoveEmpty', TRUE); break; case FALSE: $config = HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', $ci->config->item('charset')); $config->set('HTML.Doctype', 'XHTML 1.0 Strict'); break; default: show_error('The HTMLPurifier configuration labeled "' . htmlentities($config, ENT_QUOTES, 'UTF-8') . '" could not be found.'); } $purifier = new HTMLPurifier($config); $clean_html = $purifier->purify($dirty_html); } return $clean_html; }
/** * Transform a raw field value. * * @param string $name The name of the field to transform, as specified in the schema. * @param string $value The value to be transformed. * @return string The transformed value. */ public function transformField($name, $value) { $schemaFields = $this->schema->getSchema(); $fieldParameters = $schemaFields[$name]; if (!isset($fieldParameters['transformations']) || empty($fieldParameters['transformations'])) { return $value; } else { // Field exists in schema, so apply sequence of transformations $transformedValue = $value; foreach ($fieldParameters['transformations'] as $transformation) { switch (strtolower($transformation)) { case "purify": $transformedValue = $this->purifier->purify($transformedValue); break; case "escape": $transformedValue = $this->escapeHtmlCharacters($transformedValue); break; case "purge": $transformedValue = $this->purgeHtmlCharacters($transformedValue); break; case "trim": $transformedValue = $this->trim($transformedValue); break; default: break; } } return $transformedValue; } }
/** * render * * @param string $userId * @param array $messages * @param bool $showDefaultMessage * * @return string */ protected function render($userId, $messages, $showDefaultMessage = false) { $messageHtml = ''; $messageHtml .= '<div class="rcmMessage userMessageList" data-ng-controller="rcmMessageList">'; foreach ($messages as $userMessage) { /** @var \RcmMessage\Entity\Message $message */ $message = $userMessage->getMessage(); $cssName = $this->getCssName($message->getLevel()); $messageSubject = $message->getSubject(); $messageBody = $message->getMessage(); $messageHtml .= ' <div class="alert' . $cssName . '" ng-hide="hiddenUserMessageIds[\'' . $userId . ':' . $userMessage->getId() . '\']" role="alert"> <button type="button" class="close" ng-click="dismissUserMessage(' . $userId . ', ' . $userMessage->getId() . ')" aria-label="Close"> <span aria-hidden="true">×</span> </button> <span class="subject"> ' . $this->htmlPurifier->purify($this->translator->translate($messageSubject)) . ': </span> <span class="body"> ' . $this->htmlPurifier->purify($this->translator->translate($messageBody)) . ' </span> </div> '; } $messageHtml .= '</div>'; return $messageHtml; }
function save() { $this->import_parameters(); $this->load_library('htmlpurifier-4.5.0-lite/library/HTMLPurifier.auto'); $config = HTMLPurifier_Config::createDefault(); $purifier = new HTMLPurifier($config); $message = $purifier->purify(html_entity_decode($this->message)); $this->set('message', $message); $reference_object = new $this->reference_object($this->reference_id); //if the message is being created for an object other than a project, then the project id will be retrieved from //the actual object //if the message is being posted on a project, then the project id is the messages reference_id if ($this->reference_object != 'project') { $project_id = isset($reference_object->project_id) ? $reference_object->project_id : false; } else { $project_id = $this->reference_id; } if ($project_id) { $this->set('project_id', $project_id); } if (isset($reference_object->client_id)) { $this->set('client_id', $reference_object->client_id); } $this->set('user_id', current_user()->id); //these two parameters shouldn't be set yet (they are set when we log activity which happens after the save), //but let's just make sure $this->unset_param('linked_object'); $this->unset_param('linked_object_title'); $result = parent::save(); ActivityManager::message_created($this); return $result; }
protected function _purifyValue($val) { if ($val == $this->_example) { $val = null; } else { static $purifier = null; if ($this->_prevent_xss) { if (!empty($val)) { if ($purifier == null && class_exists('HTMLPurifier')) { if (iconv_get_encoding("internal_encoding") != "UTF-8") { $config = HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', iconv_get_encoding("internal_encoding")); // replace with your encoding $purifier = new HTMLPurifier($config); } else { $purifier = new HTMLPurifier(); } } if ($purifier != null) { $val = $purifier->purify($val); } } } } return $val; }
/** * Converts a given string to our xml friendly text. * This step involves purifying the string * * @param String $string Input string to reformat * @return String Reformatted string (optional HTML -> Markdown, UTF-8) */ public function xml_ready($string, $convert_to_markdown = true) { static $purifier = null; static $fixer = null; static $markdown = null; if ($purifier === null) { $purifier_config = HTMLPurifier_Config::createDefault(); $purifier_config->set('Cache.SerializerPath', realpath($GLOBALS['TMP_PATH'])); $purifier = new HTMLPurifier($purifier_config); $markdown = new HTML_To_Markdown(); $markdown->set_option('strip_tags', true); } $string = studip_utf8encode($string); $string = $purifier->purify($string); if ($convert_to_markdown) { $string = $markdown->convert($string); $string = preg_replace('/\\[\\]\\((\\w+:\\/\\/.*?)\\)/', '', $string); $string = preg_replace('/\\[(\\w+:\\/\\/.*?)\\/?\\]\\(\\1\\/?\\s+"(.*?)"\\)/isxm', '$2: $1', $string); $string = preg_replace('/\\[(\\w+:\\/\\/.*?)\\/?\\]\\(\\1\\/?\\)/isxm', '$1', $string); $string = preg_replace('/\\[(.*?)\\]\\((\\w+:\\/\\/.*?)\\)/', '$1: $2', $string); } $string = preg_replace('/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]/', '', $string); $string = trim($string); $string = htmlspecialchars($string, ENT_QUOTES, 'UTF-8'); return $string; }
public function edit_contact() { if (isset($_POST['edit_contact_btn'])) { $data_post = $this->input->post(); $this->load->helper('HTMLPurifier'); $config = HTMLPurifier_Config::createDefault(); $purifier = new HTMLPurifier($config); $data_update['content'] = $purifier->purify($data_post['content_contact']); if ($this->Contact->update($data_update)) { $content = 'Cập nhật thông tin liên lạc thành công.'; set_notice('status', SUCCESS_STATUS, $content); header('location:' . base_url() . 'index.php/_admin/manage_site/contact/show_contact'); } else { $content = 'Cập nhật thông tin liên lạc thất bại.'; set_notice('status', FAILED_STATUS, $content); header('location:' . base_url() . 'index.php/_admin/manage_site/contact/show_contact'); } } else { $data['contact'] = $this->Contact->get_contact(); $data['subView'] = '/manage_site/contact/edit_contact_layout'; $data['title'] = "Cập nhật thông tin liên hệ"; $data['subData'] = $data; $this->load->view('/main/main_layout', $data); } }
function scrape($url, $path, $parse) { $config = HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', 'UTF-8'); //encoding of output $config->set('HTML.Doctype', 'XHTML 1.1'); //doctype of output $purifier = new HTMLPurifier($config); $dirty_html = file_get_contents($url); $clean_html = $purifier->purify($dirty_html); $html = str_get_html($clean_html); switch ($parse) { case 'tag': $ret = $html->find($path)->tag; break; case 'outertext': $ret = $html->find($path)->outertext; break; case 'innertext': $ret = $html->find($path)->innertext; break; case 'plaintext': $ret = $html->find($path)->plaintext; break; default: $ret = $html->find($path); break; } // clean up memory $html->clear(); unset($dirty_html); unset($clean_html); unset($html); return $ret; }
/** * Gets the selected HTML Filter & filters the content * @param string $html input to be cleaned * @TODO allow the webmasters to select which HTML Filter they want to use such as * HTMLPurifier, HTMLLawed etc, for now we just have HTMLPurifier. * @return string **/ public function filterHTML($html) { $icmsConfigPurifier = icms::$config->getConfigsByCat(ICMS_CONF_PURIFIER); $fcomment = '<!-- filtered with htmlpurifier -->'; $purified = strpos($html, $fcomment); if ($purified !== FALSE) { $html = str_replace($fcomment, '', $html); } if ($icmsConfigPurifier['enable_purifier'] !== 0) { ICMS_PLUGINS_PATH; require_once ICMS_LIBRARIES_PATH . '/htmlpurifier/HTMLPurifier.standalone.php'; require_once ICMS_LIBRARIES_PATH . '/htmlpurifier/HTMLPurifier.autoload.php'; if ($icmsConfigPurifier['purifier_Filter_ExtractStyleBlocks'] !== 0) { require_once ICMS_PLUGINS_PATH . '/csstidy/class.csstidy.php'; } // get the Config Data $icmsPurifyConf = self::getHTMLFilterConfig(); // uncomment for specific config debug info //parent::filterDebugInfo('icmsPurifyConf', $icmsPurifyConf); $purifier = new HTMLPurifier($icmsPurifyConf); $html = $purifier->purify($html); $html .= $fcomment; } return $html; }
public function purify($html, $options = array()) { if (empty($html)) { return ''; } require_once Config::get('HTML_PURIFIER'); require_once 'HTMLPurifier.func.php'; $html = Util\toUTF8String($html); $config = \HTMLPurifier_Config::createDefault(); $config->set('AutoFormat.AutoParagraph', false); $config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true); //$config->set('AutoFormat.RemoveEmpty', true);//slows down htmls parsing //$config->set('AutoFormat.RemoveSpansWithoutAttributes', true); //medium slows down htmls parsing $config->set('HTML.ForbiddenElements', array('head')); $config->set('HTML.SafeIframe', true); $config->set('HTML.TargetBlank', true); $config->set('URI.DefaultScheme', 'https'); $config->set('Attr.EnableID', true); if (!empty($options)) { foreach ($options as $k => $v) { $config->set($k, $v); } } $purifier = new \HTMLPurifier($config); // This storage is freed on error Cache::set('memory', str_repeat('*', 1024 * 1024)); register_shutdown_function(array($this, 'onScriptShutdown')); $html = $purifier->purify($html); Cache::remove('memory'); $html = str_replace('/preview/#', '#', $html); return $html; }
/** * Prebehneme data HTML purifierom * @param array * @return void */ public function loadHttpData() { $data = $this->getForm()->getHttpData(); $name = $this->getName(); $value = isset($data[$name]) && is_scalar($data[$name]) ? $data[$name] : NULL; $config = HTMLPurifier_Config::createDefault(); $config->set('Core.Encoding', $this->encoding); if (!is_null($this->docType)) { $config->set('HTML.Doctype', $this->docType); } $config->set('HTML.Allowed', 'p,a[href],strong,em,b,i,ul,ol,li,h1,h2,h3,h4,h5,div[class],span[class],br,sup,table[border],tr,td,th,thead,tbody,img[src],img[style]'); // $config->set('HTML.Allowed', 'p,a[href],strong,em,ul,ol,li,h1,h2,div[class],span[class],br,sup'); // $config->set('HTML.Allowed', 'p,a[href],strong,em,ul,ol,li,h2,h3,h4,h5'); // povoli lubovolny obsah pre href atribut odkazu - aby sa dali vyuzit latte links $config->set('HTML.DefinitionID', 'enduser-customize.html tutorial'); // $config->set('HTML.DefinitionRev', 1); // $config->set('Cache.DefinitionImpl', null); // remove this later! $def = $config->getHTMLDefinition(true); $def->addAttribute('a', 'href*', 'Text'); $purifier = new HTMLPurifier($config); // var_dump($value); // kedze CKEDITOR to escapuje a neviem ho prinutit aby to nerobil, tak to tu dam naspat, Purifier to nasledne aj tak spravne zescapuje // $value = html_entity_decode($value); // var_dump($value); // var_dump($purifier->purify($value));die(); $this->setValue($purifier->purify($value)); }
/** * clean the comment text field from html, in order to use it as submitted text * uses the htmlpurifier library, or a simple strip_tags call, based on the app.yml config file * * @return String * @param String - the text to be cleaned * * @author Guglielmo Celata * @see http://htmlpurifier.org/ **/ public static function clean($text) { $allowed_html_tags = sfConfig::get('app_deppPropelActAsCommentableBehaviorPlugin_allowed_tags', array()); $use_htmlpurifier = sfConfig::get('app_deppPropelActAsCommentableBehaviorPlugin_use_htmlpurifier', false); if ($use_htmlpurifier) { $htmlpurifier_path = sfConfig::get('app_deppPropelActAsCommentableBehaviorPlugin_htmlpurifier_path', SF_ROOT_DIR . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'htmlpurifier' . DIRECTORY_SEPARATOR . 'library' . DIRECTORY_SEPARATOR); require_once $htmlpurifier_path . 'HTMLPurifier.auto.php'; $config = HTMLPurifier_Config::createDefault(); $config->set('HTML', 'Doctype', 'XHTML 1.0 Strict'); $config->set('HTML', 'Allowed', implode(',', array_keys($allowed_html_tags))); if (isset($allowed_html_tags['a'])) { $config->set('HTML', 'AllowedAttributes', 'a.href'); $config->set('AutoFormat', 'Linkify', true); } if (isset($allowed_html_tags['p'])) { $config->set('AutoFormat', 'AutoParagraph', true); } $purifier = new HTMLPurifier($config); $clean_text = $purifier->purify($text); } else { $allowed_html_tags_as_string = ""; foreach ($allowed_html_tags as $tag) { $allowed_html_tags_as_string .= "{$tag}"; } $clean_text = strip_tags($text, $allowed_html_tags_as_string); } return $clean_text; }
/** * Purify session description * @param OutputInterface $output */ protected function purifySessionDescription(OutputInterface $output) { $cleaned = 0; $offset = 1; $limit = self::QUERY_LIMIT; $total = $this->sessionDescriptionManager->getTotalSessionDescriptionCount(); $progress = new ProgressBar($output, $total); $progress->setRedrawFrequency(208); $output->writeln("<info>Starting cleanup of session descriptions...</info>"); $progress->start(); do { $descriptions = $this->sessionDescriptionManager->findBy(array(), array('id' => 'ASC'), $limit, $offset); foreach ($descriptions as $description) { $original = $description->getDescription(); $clean = $this->purifier->purify($original); if ($original != $clean) { $cleaned++; $description->setDescription($clean); $this->sessionDescriptionManager->update($description, false); } $progress->advance(); } $offset += $limit; $this->em->flush(); $this->em->clear(); } while (count($descriptions) == $limit); $progress->finish(); $output->writeln(''); $output->writeln("<info>{$cleaned} Session Descriptions updated.</info>"); }
function sanitize($input, $type = "old") { switch ($type) { case "int": $input = filter_var($input, FILTER_SANITIZE_NUMBER_INT); break; case "string": $input = filter_var($input, FILTER_SANITIZE_STRING); break; case "url": $input = filter_var($input, FILTER_SANITIZE_URL); break; case "email": $input = strtolower(filter_var($input, FILTER_SANITIZE_EMAIL)); break; case "markdown": include_once ROOT . DS . 'libraries' . DS . 'purifier' . DS . 'HTMLPurifier.auto.php'; $purifier = new HTMLPurifier(); $input = $purifier->purify($input); break; case "comment": $input = htmlentities($input, ENT_QUOTES, "UTF-8"); break; case "old": echo "Old version of sanitize called"; exit; break; } return $input; }
/** * Transforms the content and purifies the result. * This method calls the transform() method to convert * markdown content into HTML content. It then * uses {@link CHtmlPurifier} to purify the HTML content * to avoid XSS attacks. * @param string $content the markdown content * @return string the purified HTML content */ public function safeTransform($content) { $content=$this->transform($content); $purifier=new HTMLPurifier($this->purifierOptions); $purifier->config->set('Cache.SerializerPath',Yii::app()->getRuntimePath()); return $purifier->purify($content); }
function smarty_modifier_xoops_html_purifier($html, $ecoding = null, $doctype = null) { require_once XOOPS_LIBRARY_PATH . '/htmlpurifier/library/HTMLPurifier.auto.php'; $encoding = $encoding ? $encoding : _CHARSET; $doctypeArr = array("HTML 4.01 Strict", "HTML 4.01 Transitional", "XHTML 1.0 Strict", "XHTML 1.0 Transitional", "XHTML 1.1"); $config = HTMLPurifier_Config::createDefault(); if (in_array($doctype, $doctypeArr)) { $config->set('HTML.Doctype', $doctype); } if ($_conv = $encoding !== 'UTF-8' && function_exists('mb_convert_encoding')) { $_substitute = mb_substitute_character(); mb_substitute_character('none'); $html = mb_convert_encoding($html, 'UTF-8', $encoding); $config->set('Core.Encoding', 'UTF-8'); } else { $config->set('Core.Encoding', $encoding); } $purifier = new HTMLPurifier($config); $html = $purifier->purify($html); if ($_conv) { $html = mb_convert_encoding($html, $encoding, 'UTF-8'); mb_substitute_character($_substitute); } return $html; }
/** * Convert user input to HTML * Do not call this function recursively. * * @param string $text Text we want to parse * @param Title $title * @param ParserOptions $options * @param bool $linestart * @param bool $clearState * @param int $revid Number to pass in {{REVISIONID}} * @return ParserOutputInterface A ParserOutput */ public function parse($text, Title $title, ParserOptions $options, $lineStart = true, $clearState = true, $revId = null) { $config = HTMLPurifier_Config::createDefault(); $purifier = new HTMLPurifier($config); $text = $purifier->purify($text); return new ParserOutput($text); }
/** * {@inheritDoc} */ public function purify($content, $config = null) { if ($config && !$config instanceof ConfigurationInterface) { throw new \BadMethodCallException(sprintf('Argument 2 must be an instance of %s\\ConfigurationInterface or null', __NAMESPACE__)); } return parent::purify($content, $config); }
public function generate($f3) { require_once '***/libs/htmlpurifier/library/HTMLPurifier.auto.php'; $purifier = new HTMLPurifier(); make_seed(); $models = array('cv2/lm_lstm_epoch50.00_0.5080.t7', 'cv/lm_lstm_epoch46.00_0.7940.t7'); $rnx = array_rand($models, 1); $model = $models[$rnx]; $seed = round(rand()); $cmd = 'cd ***/char-rnn && th ***/char-rnn/sample.lua -verbose 0 -temperature 0.8 -gpuid -1 -seed ' . $seed . ' -length 2048 -primetext "<poem><html><head><meta charset=\\"utf-8\\"><style>body{background-color:#000;color:#0c0;}</style></head><body>" /home/drakh/klingon-poetry/' . $model; $postVars = array('cmd' => $cmd); $options = array('method' => 'POST', 'content' => http_build_query($postVars)); $r = \Web::instance()->request('http://127.0.0.1:1337', $options); $clean_html = $purifier->purify($r['body']); $poem = nl2br(trim($clean_html)); $db_data = array('seed' => $seed, 'model' => $model, 'poem' => $poem); $data_to_save = json_encode($db_data, JSON_UNESCAPED_UNICODE); $this->model->poem = $data_to_save; $this->model->written_date = date('d.m.Y H:i:s'); $this->model->save(); $id = $this->model->id_poem; $postVars = array('id' => $id); $options = array('method' => 'POST', 'content' => http_build_query($postVars)); $r = \Web::instance()->request('http://127.0.0.1:1338', $options); $f3->reroute('/poem/' . $id); }
/** * @inheritdoc */ public function transform($value) { if (is_null($value)) { return $value; } if (is_scalar($value)) { $value = (string) $value; } if (!is_string($value)) { throw new TransformationFailedException(sprintf('Expected a string to transform, got %s instead', json_encode($value))); } // replace non-breaking spaces, somehow this results in a question mark when markdownifying $value = str_replace([' ', " "], ' ', $value); // remove leading spaces/tabs $value = preg_replace('/^[ \\t]+/m', '', $value); // purify the html first $value = $this->purifier->purify($value); // perform some replacements... $replacements = [[['/>\\s+</', '/\\s+<\\//'], ['><', '</']], [['/\\s+<br\\/?>/', '/<br\\/?>\\s+/'], '<br>'], ['/([^>])\\n([^<])/', '\\1<br>\\2'], ['/(<(p|li)>)<br\\s?\\/?>/i', '\\1'], ['/<br\\s?\\/?>(<\\/(p|li)>)/i', '\\1']]; foreach ($replacements as list($search, $replace)) { $value = preg_replace($search, $replace, $value); } // strip tags in headings foreach (range(1, 6) as $headingSize) { $value = preg_replace_callback('/(<h' . $headingSize . '>)(.*)(<\\/h' . $headingSize . '>)/iU', function ($matches) { if (count($matches) !== 4) { return $matches[0]; } return $matches[1] . trim(strip_tags(str_replace('<br>', ' ', $matches[2]))) . $matches[3]; }, $value); } // remove any double bullets $value = preg_replace('/(<li>\\s*)[\\*|\\-]{1}/im', '\\1', $value); // convert to markdown $value = @$this->converter->parseString($value); // Fix different types of bullets. What this does is check each line if it starts with any of "-ו○", // not followed by another bullet, and normalizes it to "* text". $value = preg_replace('/^[\\-ו○]\\s*([^\\-ו○])/mu', '* $1', $value); // Now make sure there's a newline before 2 consecutive lines that start with a bullet. // This could lead to superfluous newlines, but they will be corrected later on. $value = preg_replace('/(\\n\\* [^\\n]+){2,}/', "\n\$0", "\n" . $value); // remove trailing spaces/tabs $value = preg_replace('/[ \\t]+$/m', '', $value); // remove excessive newlines $value = preg_replace('/\\n{3,}/m', "\n\n", $value); return trim($value); }
function parse_event($h) { require_once 'include/Scrape.php'; require_once 'library/HTMLPurifier.auto.php'; require_once 'include/html2bbcode'; $h = '<html><body>' . $h . '</body></html>'; $ret = array(); try { $dom = HTML5_Parser::parse($h); } catch (DOMException $e) { logger('parse_event: parse error: ' . $e); } if (!$dom) { return $ret; } $items = $dom->getElementsByTagName('*'); foreach ($items as $item) { if (attribute_contains($item->getAttribute('class'), 'vevent')) { $level2 = $item->getElementsByTagName('*'); foreach ($level2 as $x) { if (attribute_contains($x->getAttribute('class'), 'dtstart') && $x->getAttribute('title')) { $ret['start'] = $x->getAttribute('title'); if (!strpos($ret['start'], 'Z')) { $ret['adjust'] = true; } } if (attribute_contains($x->getAttribute('class'), 'dtend') && $x->getAttribute('title')) { $ret['finish'] = $x->getAttribute('title'); } if (attribute_contains($x->getAttribute('class'), 'description')) { $ret['desc'] = $x->textContent; } if (attribute_contains($x->getAttribute('class'), 'location')) { $ret['location'] = $x->textContent; } } } } // sanitise if (x($ret, 'desc') && (strpos($ret['desc'], '<') !== false || strpos($ret['desc'], '>') !== false)) { $config = HTMLPurifier_Config::createDefault(); $config->set('Cache.DefinitionImpl', null); $purifier = new HTMLPurifier($config); $ret['desc'] = html2bbcode($purifier->purify($ret['desc'])); } if (x($ret, 'location') && (strpos($ret['location'], '<') !== false || strpos($ret['location'], '>') !== false)) { $config = HTMLPurifier_Config::createDefault(); $config->set('Cache.DefinitionImpl', null); $purifier = new HTMLPurifier($config); $ret['location'] = html2bbcode($purifier->purify($ret['location'])); } if (x($ret, 'start')) { $ret['start'] = datetime_convert('UTC', 'UTC', $ret['start']); } if (x($ret, 'finish')) { $ret['finish'] = datetime_convert('UTC', 'UTC', $ret['finish']); } return $ret; }
/** * Purify HTML. * @param $html String HTML to purify * @param $config Configuration to use, can be any value accepted by * HTMLPurifier_Config::create() */ function HTMLPurifier($html, $config = null) { static $purifier = false; if (!$purifier) { $purifier = new HTMLPurifier(); } return $purifier->purify($html, $config); }