/** * Filters a string of html with the htmLawed library. * * @param string $html The text to filter. * @param array|null $config Config settings for the array. * @param string|array|null $spec A specification to further limit the allowed attribute values in the html. * @return string Returns the filtered html. * @see http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm */ public static function filter($html, array $config = null, $spec = null) { require_once __DIR__ . '/htmLawed/htmLawed.php'; if ($config === null) { $config = self::$defaultConfig; } if (isset($config['spec']) && !$spec) { $spec = $config['spec']; } if ($spec === null) { $spec = static::$defaultSpec; } return htmLawed::hl($html, $config, $spec); }
/** * filter possible XSS * * @param string $text text string to filter * * @return mixed */ public function applyFilter($text) { if (!$this->config['enabled']) { return $text; } /* $patterns = array(); $replacements = array(); $text = str_replace("\x00", "", $text); $c = "[\x01-\x1f]*"; $patterns[] = "/\bj{$c}a{$c}v{$c}a{$c}s{$c}c{$c}r{$c}i{$c}p{$c}t{$c}[\s]*:/si"; $replacements[] = "javascript;"; $patterns[] = "/\ba{$c}b{$c}o{$c}u{$c}t{$c}[\s]*:/si"; $replacements[] = "about;"; $patterns[] = "/\bx{$c}s{$c}s{$c}[\s]*:/si"; $replacements[] = "xss;"; $text = preg_replace($patterns, $replacements, $text); */ $text = \htmLawed::hl($text, $this->config['htmlawed_config'], $this->config['htmlawed_spec']); return $text; }
$html = preg_replace('!<p>[\\s\\h\\v]*</p>!u', '', $html); if ($links == 'remove') { $html = preg_replace('!</?a[^>]*>!', '', $html); } // get text sample for language detection $text_sample = strip_tags(substr($html, 0, 500)); if ($options->message_to_prepend) { $html = make_substitutions($options->message_to_prepend) . $html; } if ($options->message_to_append) { $html .= make_substitutions($options->message_to_append); } // filter XSS if ($xss_filter) { debug('Filtering HTML to remove XSS'); $html = htmLawed::hl($html, array('safe' => 1, 'deny_attribute' => 'style', 'comment' => 1, 'cdata' => 1)); } set_cached($permalink, $html); } } } $newitem->addElement('guid', $item->get_permalink(), array('isPermaLink' => 'true')); // add content if ($options->summary === true) { // get summary $summary = ''; if (!$do_content_extraction) { $summary = $html; } else { // Try to get first few paragraphs if (isset($content_block) && $content_block instanceof DOMElement) {
private function processItems($items) { $result = array(); foreach ($items as $item) { $data = array(); // Fetch the title $data['title'] = $this->getFirstFeedNode($item->title()); // Fetch the link $link = $item->link; if (is_array($link)) { $link = $link[0]; } if (isset($link['href'])) { $link = $link['href']; } $data['link'] = $link; // Date $pubDate = strtotime((string) $item->pubDate); // For RSS entries $published = strtotime((string) $item->published); // For Atom entries $updated = strtotime((string) $item->updated); // For Atom entries $data['published'] = max($pubDate, $published, $updated); //Content $content = (string) $item->content; $desc = (string) $item->description; if (strlen($desc) > strlen($content)) { $content = $desc; } $data['content'] = htmLawed::tidy($content, array('safe' => 1, 'tidy' => '2s0n')); // Get the categories as tags, if we can $tags = array(); try { $categories = $item->category(); if ($categories && count($categories) > 0) { foreach ($categories as $category) { $tags[] = $category->nodeValue; } } } catch (Exception $e) { } // Save the item in the database $id = $this->addItem($data, $data['published'], SourceItem::BLOG_TYPE, $tags, false, false, $data['title']); if ($id) { $result[] = $id; } if (count($result) > 100) { break; } } return $result; }
public static function kses($t, $h, $p = array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto')) { // kses compat foreach ($h as $k => $v) { $h[$k]['n']['*'] = 1; } $C['cdata'] = $C['comment'] = $C['make_tag_strict'] = $C['no_deprecated_attr'] = $C['unique_ids'] = 0; $C['keep_bad'] = 1; $C['elements'] = count($h) ? strtolower(implode(',', array_keys($h))) : '-*'; $C['hook'] = 'htmLawed::kses_hook'; $C['schemes'] = '*:' . implode(',', $p); return htmLawed::hl($t, $C, $h); // eof }
private function processItems($items) { $result = array(); foreach ($items as $item) { $data = array(); $data['title'] = $item->title(); if ($item->link() && count($item->link()) > 0) { $links = $item->link(); $link = $links[0]; if (is_object($link)) { $data['link'] = (string) $link->getAttribute('href'); } else { $data['link'] = ""; } } else { $link = $item->link; $data['link'] = (string) $link['href']; } $content = $item->content(); $data['published'] = $item->published(); $data['note'] = $item->{'gr:annotation'}->content; $crawl = (string) $item->getDom()->getAttribute("gr:crawl-timestamp-msec"); $timestamp = $crawl != "" ? substr($crawl, 0, 10) : strtotime((string) $data['published']); $data['content'] = htmLawed::tidy($content, array('safe' => 1, 'tidy' => '2s0n')); $id = $this->addItem($data, $timestamp, SourceItem::LINK_TYPE, false, false, false, $data['title']); if ($id) { $result[] = $id; } unset($data); } return $result; }
/** * Strips scripts and stylesheets from output * * @param string $str String to sanitize * @access public * @static */ function stripScripts($str) { App::import('Vendor', 'htmlawed' . DS . 'htmlawed'); return htmLawed::hl($str, array('safe' => 1)); // return preg_replace('/(<link[^>]+rel="[^"]*stylesheet"[^>]*>)|<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|<!--.*?-->/i', '', $str); // return preg_replace('/(<link[^>]+rel="[^"]*stylesheet"[^>]*>|style="[^"]*")|<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|<!--.*?-->/i', '', $str); // return preg_replace('/(<link[^>]+rel="[^"]*stylesheet"[^>]*>|<img[^>]*>|style="[^"]*")|<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|<!--.*?-->/i', '', $str); }