Ejemplo n.º 1
0
 /**
  * Filters a string of html with the htmLawed library.
  *
  * @param string $html The text to filter.
  * @param array|null $config Config settings for the array.
  * @param string|array|null $spec A specification to further limit the allowed attribute values in the html.
  * @return string Returns the filtered html.
  * @see http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm
  */
 public static function filter($html, array $config = null, $spec = null)
 {
     require_once __DIR__ . '/htmLawed/htmLawed.php';
     if ($config === null) {
         $config = self::$defaultConfig;
     }
     if (isset($config['spec']) && !$spec) {
         $spec = $config['spec'];
     }
     if ($spec === null) {
         $spec = static::$defaultSpec;
     }
     return htmLawed::hl($html, $config, $spec);
 }
Ejemplo n.º 2
0
 /**
  * filter possible XSS
  *
  * @param string $text text string to filter
  *
  * @return mixed
  */
 public function applyFilter($text)
 {
     if (!$this->config['enabled']) {
         return $text;
     }
     /*
     $patterns = array();
     $replacements = array();
     $text = str_replace("\x00", "", $text);
     $c = "[\x01-\x1f]*";
     $patterns[] = "/\bj{$c}a{$c}v{$c}a{$c}s{$c}c{$c}r{$c}i{$c}p{$c}t{$c}[\s]*:/si";
     $replacements[] = "javascript;";
     $patterns[] = "/\ba{$c}b{$c}o{$c}u{$c}t{$c}[\s]*:/si";
     $replacements[] = "about;";
     $patterns[] = "/\bx{$c}s{$c}s{$c}[\s]*:/si";
     $replacements[] = "xss;";
     $text = preg_replace($patterns, $replacements, $text);
     */
     $text = \htmLawed::hl($text, $this->config['htmlawed_config'], $this->config['htmlawed_spec']);
     return $text;
 }
Ejemplo n.º 3
0
             $html = preg_replace('!<p>[\\s\\h\\v]*</p>!u', '', $html);
             if ($links == 'remove') {
                 $html = preg_replace('!</?a[^>]*>!', '', $html);
             }
             // get text sample for language detection
             $text_sample = strip_tags(substr($html, 0, 500));
             if ($options->message_to_prepend) {
                 $html = make_substitutions($options->message_to_prepend) . $html;
             }
             if ($options->message_to_append) {
                 $html .= make_substitutions($options->message_to_append);
             }
             // filter XSS
             if ($xss_filter) {
                 debug('Filtering HTML to remove XSS');
                 $html = htmLawed::hl($html, array('safe' => 1, 'deny_attribute' => 'style', 'comment' => 1, 'cdata' => 1));
             }
             set_cached($permalink, $html);
         }
     }
 }
 $newitem->addElement('guid', $item->get_permalink(), array('isPermaLink' => 'true'));
 // add content
 if ($options->summary === true) {
     // get summary
     $summary = '';
     if (!$do_content_extraction) {
         $summary = $html;
     } else {
         // Try to get first few paragraphs
         if (isset($content_block) && $content_block instanceof DOMElement) {
Ejemplo n.º 4
0
 private function processItems($items)
 {
     $result = array();
     foreach ($items as $item) {
         $data = array();
         // Fetch the title
         $data['title'] = $this->getFirstFeedNode($item->title());
         // Fetch the link
         $link = $item->link;
         if (is_array($link)) {
             $link = $link[0];
         }
         if (isset($link['href'])) {
             $link = $link['href'];
         }
         $data['link'] = $link;
         // Date
         $pubDate = strtotime((string) $item->pubDate);
         // For RSS entries
         $published = strtotime((string) $item->published);
         // For Atom entries
         $updated = strtotime((string) $item->updated);
         // For Atom entries
         $data['published'] = max($pubDate, $published, $updated);
         //Content
         $content = (string) $item->content;
         $desc = (string) $item->description;
         if (strlen($desc) > strlen($content)) {
             $content = $desc;
         }
         $data['content'] = htmLawed::tidy($content, array('safe' => 1, 'tidy' => '2s0n'));
         // Get the categories as tags, if we can
         $tags = array();
         try {
             $categories = $item->category();
             if ($categories && count($categories) > 0) {
                 foreach ($categories as $category) {
                     $tags[] = $category->nodeValue;
                 }
             }
         } catch (Exception $e) {
         }
         // Save the item in the database
         $id = $this->addItem($data, $data['published'], SourceItem::BLOG_TYPE, $tags, false, false, $data['title']);
         if ($id) {
             $result[] = $id;
         }
         if (count($result) > 100) {
             break;
         }
     }
     return $result;
 }
Ejemplo n.º 5
0
 public static function kses($t, $h, $p = array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto'))
 {
     // kses compat
     foreach ($h as $k => $v) {
         $h[$k]['n']['*'] = 1;
     }
     $C['cdata'] = $C['comment'] = $C['make_tag_strict'] = $C['no_deprecated_attr'] = $C['unique_ids'] = 0;
     $C['keep_bad'] = 1;
     $C['elements'] = count($h) ? strtolower(implode(',', array_keys($h))) : '-*';
     $C['hook'] = 'htmLawed::kses_hook';
     $C['schemes'] = '*:' . implode(',', $p);
     return htmLawed::hl($t, $C, $h);
     // eof
 }
Ejemplo n.º 6
0
 private function processItems($items)
 {
     $result = array();
     foreach ($items as $item) {
         $data = array();
         $data['title'] = $item->title();
         if ($item->link() && count($item->link()) > 0) {
             $links = $item->link();
             $link = $links[0];
             if (is_object($link)) {
                 $data['link'] = (string) $link->getAttribute('href');
             } else {
                 $data['link'] = "";
             }
         } else {
             $link = $item->link;
             $data['link'] = (string) $link['href'];
         }
         $content = $item->content();
         $data['published'] = $item->published();
         $data['note'] = $item->{'gr:annotation'}->content;
         $crawl = (string) $item->getDom()->getAttribute("gr:crawl-timestamp-msec");
         $timestamp = $crawl != "" ? substr($crawl, 0, 10) : strtotime((string) $data['published']);
         $data['content'] = htmLawed::tidy($content, array('safe' => 1, 'tidy' => '2s0n'));
         $id = $this->addItem($data, $timestamp, SourceItem::LINK_TYPE, false, false, false, $data['title']);
         if ($id) {
             $result[] = $id;
         }
         unset($data);
     }
     return $result;
 }
Ejemplo n.º 7
0
 /**
  * Strips scripts and stylesheets from output
  *
  * @param string $str String to sanitize
  * @access public
  * @static
  */
 function stripScripts($str)
 {
     App::import('Vendor', 'htmlawed' . DS . 'htmlawed');
     return htmLawed::hl($str, array('safe' => 1));
     //		return preg_replace('/(<link[^>]+rel="[^"]*stylesheet"[^>]*>)|<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|<!--.*?-->/i', '', $str);
     //		return preg_replace('/(<link[^>]+rel="[^"]*stylesheet"[^>]*>|style="[^"]*")|<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|<!--.*?-->/i', '', $str);
     //		return preg_replace('/(<link[^>]+rel="[^"]*stylesheet"[^>]*>|<img[^>]*>|style="[^"]*")|<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|<!--.*?-->/i', '', $str);
 }