function sphsc3()
{
    $results = array();
    $i = 0;
    do {
        // 'https://content.sphsc.washington.edu/sphintra/web2/factoidsaud.asp'; //7-8
        // 'https://content.sphsc.washington.edu/sphintra/web2/res_blurb.asp';  // 11
        // 'https://content.sphsc.washington.edu/sphintra/web2/factoidscore.asp'; // 14
        // 'https://content.sphsc.washington.edu/sphintra/web2/factoidsphd.asp'; //15
        // 'https://content.sphsc.washington.edu/sphintra/web2/factoidspb.asp';  // 16, 19
        // 'https://content.sphsc.washington.edu/sphintra/web2/factoidsmed.asp';  // 17, 20
        // 'https://content.sphsc.washington.edu/sphintra/web2/factoidsug.asp'; // 18
        // 'https://content.sphsc.washington.edu/sphintra/web2/res_stone_blurb.asp';
        //$uri = 'https://content.sphsc.washington.edu/sphintra/web2/factoidscore.asp'; // 19
        // No's:
        // http://content.sphsc.washington.edu/sphintra/web2/clinic_msg.asp
        // http://content.sphsc.washington.edu/sphintra/web2/home_right.asp
        // https://content.sphsc.washington.edu/sphintra/web2/res_stone_blurb.asp
        // http://content.sphsc.washington.edu/sphintra/web2/outreach_msg.asp
        $tids = array();
        $c = array('indent' => true, 'output-xhtml' => true, 'wrap' => 200, 'hide-comments' => true);
        $response = get_page($uri);
        $tidy = new tidy();
        $tidy->parseString($response, $c, 'utf8');
        $tidy->cleanRepair();
        preg_match("/<body[^>]*>(.*?)<\\/body>/is", $tidy, $a);
        $body = str_replace(array("\n", "\r"), '', $a[1]);
        $title = some_words(strip_tags($body));
        $results[$title] = $body;
    } while ($i++ < 20);
    foreach ($results as $k => $v) {
        save_item($k, $v, $tids);
    }
}
Example #2
0
 function tidyClean()
 {
     $tidy = new tidy();
     $tidy->parseString(self::$html, self::$tidy_config, self::$encoding);
     $tidy->cleanRepair();
     self::$html = $tidy;
 }
Example #3
0
 public function repair($markup)
 {
     $tidy = new \tidy();
     $tidy->parseString($markup, self::$config, 'utf8');
     $tidy->cleanRepair();
     return $tidy . '';
 }
 protected function createContentElements(&$objItem)
 {
     if ($objItem->tl_content) {
         // need to wrap <p> around text for contao
         $tidyConfig = array('enclose-text' => true, 'drop-font-tags' => true, 'drop-proprietary-attributes' => true, 'quote-ampersand' => true, 'clean' => false);
         $bodyText = '<!DOCTYPE html><head><title></title></head><body>' . $objItem->tl_content . '</body></html>';
         //			$bodyText = $this->convert_external_link_tags($bodyText);
         //			$bodyText = $this->convert_internal_link_tags($bodyText);
         $bodyText = $this->nl2p($bodyText);
         $tidy = new \tidy();
         $tidy->parseString($bodyText, $tidyConfig, $GLOBALS['TL_CONFIG']['dbCharset']);
         $body = $tidy->body();
         $objContent = new \ContentModel();
         $objContent->text = trim(str_replace(array('<body>', '</body>'), '', $body));
         $objContent->text = preg_replace("/<img[^>]+\\>/i", "", $objContent->text);
         // strip images
         // create links from text
         $objContent->text = preg_replace('!(\\s|^)((https?://|www\\.)+[a-z0-9_./?=&-]+)!i', ' <a href="http://$2" target="_blank">$2</a>', $objContent->text);
         // replace <b> by <strong>
         $objContent->text = preg_replace('!<b(.*?)>(.*?)</b>!i', '<strong>$2</strong>', $objContent->text);
         // replace emails with inserttags
         $objContent->text = preg_replace('/([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\.([A-Z]{2,4})(\\((.+?)\\))?/i', "{{email::\$1@\$2.\$3}}", $objContent->text);
         // strip not allowed tags
         $objContent->text = strip_tags($objContent->text, \Config::get('allowedTags'));
         $objContent->text = $this->stripAttributes($objContent->text, array('style', 'class', 'id'));
         $objContent->ptable = static::$strTable;
         $objContent->pid = $objItem->id;
         $objContent->sorting = 16;
         $objContent->tstamp = time();
         $objContent->type = 'text';
         $objContent->save();
     }
 }
 /**
  * Detects feed types and instantiate appropriate objects.
  *
  * Our constructor takes care of detecting feed types and instantiating
  * appropriate classes. For now we're going to treat Atom 0.3 as Atom 1.0
  * but raise a warning. I do not intend to introduce full support for 
  * Atom 0.3 as it has been deprecated, but others are welcome to.
  *
  * @param    string    $feed    XML serialization of the feed
  * @param    bool    $strict    Whether or not to validate the feed
  * @param    bool    $suppressWarnings Trigger errors for deprecated feed types?
  * @param    bool    $tidy    Whether or not to try and use the tidy library on input
  */
 function build(DOMDocument $model, $feed, $strict = false, $suppressWarnings = false, $tidy = false)
 {
     $options = 0;
     if ($suppressWarnings) {
         $options |= LIBXML_NOWARNING;
         $options |= LIBXML_NOERROR;
     }
     if (empty($feed)) {
         throw new XML_Feed_Parser_Exception('Invalid input: file is empty');
     }
     if (!$model->loadXML($feed, $options)) {
         if (extension_loaded('tidy') && $tidy) {
             $tidy = new tidy();
             $tidy->parseString($feed, array('input-xml' => true, 'output-xml' => true));
             $tidy->cleanRepair();
             if (!$model->loadXML((string) $tidy)) {
                 throw new XML_Feed_Parser_Exception('Invalid input: this is not ' . 'valid XML');
             }
         } else {
             throw new XML_Feed_Parser_Exception('Invalid input: this is not valid XML');
         }
     }
     /* detect feed type */
     $doc_element = $model->documentElement;
     $class = $this->determineClass($doc_element, $suppressWarnings);
     /* Instantiate feed object */
     $feed = new $class($model, $strict);
     $feed->setSanitizer(new XML_Feed_Parser_Unsafe_Sanitizer());
     return $feed;
 }
 private static function send_mail($server_cfg, $game_cfg, $report)
 {
     $to = "";
     $random_hash = md5(date('r', time()));
     $mime_boundary = "==Multipart_Boundary_x{$random_hash}x";
     if ($game_cfg === null) {
         $subject = "Instance Utilization Summary";
         $bcc = $server_cfg["instance_report_mail_recipients"];
         $headers = 'From: ' . $server_cfg["sender"] . "\r\n" . 'Bcc: ' . $bcc . "\r\n" . "Content-Type: multipart/mixed;" . " boundary=\"{$mime_boundary}\"" . 'X-Mailer: PHP/' . phpversion();
         $HTMLMessage = file_get_contents($report);
         $tidy = new tidy();
         $tidy->parseString($HTMLMessage);
         $tidy->cleanRepair();
         $message = "\n\n" . "--{$mime_boundary}\n" . "Content-Type:text/html; charset=\"iso-8859-1\"\n" . "Content-Transfer-Encoding: 7bit\n\n" . $tidy . "\n\n";
         $reportClass = new ReportCollector($server_cfg);
         $data = $reportClass->generateCSV();
         $message .= "--{$mime_boundary}\n" . "Content-Type:text/csv; \n" . " name=zPerfmonUtilTrend_" . date("MjY") . ".csv \n" . "Content-Transfer-Encoding: 7bit\n\n" . $data . "\n\n" . "--{$mime_boundary}--\n";
         mail($to, $subject, $message, $headers);
     } else {
         $subject = "Instance Utilization report for {$game_cfg['name']}";
         $bcc = $game_cfg["instance_report_mail_recipients"];
         $headers = 'From: ' . $server_cfg["sender"] . "\r\n" . 'Bcc: ' . $bcc . "\r\n" . 'Content-Type: text/HTML' . "\r\n" . 'X-Mailer: PHP/' . phpversion();
         $message = file_get_contents($report);
         $tidy = new tidy();
         $tidy->parseString($message);
         $tidy->cleanRepair();
         mail($to, $subject, $tidy, $headers);
     }
 }
 public function __construct($content)
 {
     if (extension_loaded('tidy')) {
         // using the tidy php extension
         $tidy = new tidy();
         $tidy->parseString($content, array('output-xhtml' => true, 'numeric-entities' => true, 'wrap' => 0), 'utf8');
         $tidy->cleanRepair();
         $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy);
         $tidy = str_replace('&#160;', '', $tidy);
     } elseif (@shell_exec('which tidy')) {
         // using tiny through cli
         $CLI_content = escapeshellarg($content);
         $tidy = `echo {$CLI_content} | tidy --force-output 1 -n -q -utf8 -asxhtml -w 0 2> /dev/null`;
         $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy);
         $tidy = str_replace('&#160;', '', $tidy);
     } else {
         // no tidy library found, hence no sanitizing
         $tidy = $content;
     }
     $this->simpleXML = @simplexml_load_string($tidy, 'SimpleXMLElement', LIBXML_NOWARNING);
     if (!$this->simpleXML) {
         throw new Exception('CSSContentParser::__construct(): Could not parse content.' . ' Please check the PHP extension tidy is installed.');
     }
     parent::__construct();
 }
Example #8
0
 /**
  * Filter through Tidy
  * 
  * @param  array
  * @param  string
  * @param  bool
  * @return bool
  */
 public static function tidy(array &$headers, &$body, $uncached)
 {
     $tidy = new tidy();
     $tidy->parseString($body, array('clean' => 1, 'bare' => 1, 'hide-comments' => 1, 'doctype' => 'omit', 'indent-spaces' => 0, 'tab-size' => 0, 'wrap' => 0, 'quote-ampersand' => 0, 'output-xhtml' => true, 'quiet' => 1), 'utf8');
     $tidy->cleanRepair();
     $body = tidy_get_output($tidy);
 }
 public function apply($str)
 {
     if (extension_loaded('tidy') && class_exists('tidy')) {
         $config = array('doctype' => 'strict', 'drop-proprietary-attributes' => true, 'drop-font-tags' => true, 'escape-cdata' => true, 'indent' => false, 'join-classes' => false, 'join-styles' => true, 'lower-literals' => true, 'output-xhtml' => true, 'show-body-only' => true, 'wrap' => 80);
         $str = '<p>tt</p>' . $str;
         // Fixes a big issue
         $tidy = new tidy();
         $tidy->parseString($str, $config, 'utf8');
         $tidy->cleanRepair();
         $str = (string) $tidy;
         $str = preg_replace('#^<p>tt</p>\\s?#', '', $str);
     } else {
         $str = $this->miniTidy($str);
     }
     # Removing open comments, open CDATA and processing instructions
     $str = preg_replace('%<!--.*?-->%msu', '', $str);
     $str = str_replace('<!--', '', $str);
     $str = preg_replace('%<!\\[CDATA\\[.*?\\]\\]>%msu', '', $str);
     $str = str_replace('<![CDATA[', '', $str);
     # Transform processing instructions
     $str = str_replace('<?', '&gt;?', $str);
     $str = str_replace('?>', '?&lt;', $str);
     $str = html::decodeEntities($str, true);
     $this->content = '';
     xml_parse($this->parser, '<all>' . $str . '</all>');
     return $this->content;
 }
 /**
  * 
  * Uses the tidy library to tidy HTML output.
  * 
  * @access public
  * 
  * @param string $buffer The source text to be filtered.
  *
  */
 public function filter($buffer)
 {
     $tidy = new tidy();
     $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 200);
     $tidy->parseString($buffer, $config);
     $tidy->cleanRepair();
     return $tidy->get_output();
 }
Example #11
0
function tidy_html($html)
{
    $config = array('indent' => 2, 'output-xhtml' => true, 'doctype' => 'strict', 'wrap' => 120);
    $tidy = new tidy();
    $tidy->parseString($html, $config, 'utf8');
    $tidy->cleanRepair();
    return $tidy;
}
 public function filter(&$content)
 {
     $config = array('clean' => true, 'enclose-block-text' => true, 'enclose-text' => true, 'preserve-entities' => true, 'logical-emphasis' => true, 'char-encoding' => 'utf8', 'indent' => 'auto', 'output-xhtml' => true, 'wrap' => 200);
     $tidy = new tidy();
     $tidy->parseString($this->gethtml($content), $config, 'utf8');
     $tidy->cleanRepair();
     $content = $this->getbody((string) $tidy);
 }
Example #13
0
 protected function tidy($response)
 {
     $response = str_replace('&nbsp;', ' ', $response);
     $config = array('output-xhtml' => true);
     $tidy = new tidy();
     $tidy->parseString($response, $config, 'utf8');
     $tidy->cleanRepair();
     return (string) $tidy;
 }
Example #14
0
 public static function init()
 {
     if (!ob_start(function ($buffer) {
         $tidy = new \tidy();
         return $tidy->repairString($buffer, ['input-xml' => true, 'indent' => true, 'wrap' => 0, 'output-xml' => true]);
     })) {
         throw \ErrorException("ob_start failed", null, null, __FILE__, __LINE__);
     }
 }
 public function getURL($url)
 {
     $data = "awerawer";
     // in my code, $data is downloaded from a site
     $tidy = new tidy();
     $tidy->parseString($data, $this->tidyconfig, 'utf8');
     $tidy->cleanRepair();
     return $tidy;
 }
Example #16
0
 public static function minify($content, $options = array())
 {
     $options = array_merge(array('clean' => false, 'hide-comments' => true, 'wrap' => 0, 'input-encoding' => 'utf8', 'output-encoding' => 'utf8', 'preserve-entities' => true), $options, array('show-errors' => 0, 'show-warnings' => false, 'force-output' => true, 'tidy-mark' => false));
     $tidy = new tidy();
     $tidy->parseString($content, $options);
     $tidy->cleanRepair();
     $content = $tidy->value;
     return $content;
 }
Example #17
0
 /**
  *
  * Cleans the html
  * @param string $html String cotaining the html to clean
  * @return string Cleaned an tidyed
  */
 public static function Clean($html)
 {
     $html = Html::BurnerClean($html);
     $config = array("show-body-only" => true, "alt-text" => "Pic without description", "hide-endtags" => false, "output-xhtml" => true);
     $tidy = new tidy();
     $tidy->parseString($html, $config, 'utf8');
     $tidy->cleanRepair();
     return $tidy;
 }
Example #18
0
 function tidyhtml($input)
 {
     $config = array('show-errors' => 0, 'show-warnings' => false, 'break-before-br' => true, 'indent' => true, 'indent-attributes' => true, 'add-xml-decl' => false, 'force-output' => true, 'fix-backslash' => false, 'merge-divs' => false, 'merge-spans' => false, 'doctype' => 'omit', 'enclose-block-text' => true, 'drop-empty-paras' => false, 'output-html' => true, 'show-body-only' => true, 'wrap' => 1);
     $tidy = new tidy();
     $tidy->parseString($input, $config, 'utf8');
     $tidy->cleanRepair();
     // Output
     return $tidy;
 }
Example #19
0
function tidyHtml($html)
{
    $config = ["indent" => 2, "clean" => false, "char-encoding" => "utf8"];
    $tidy = new tidy();
    $tidy->parseString($html, $config, 'utf8');
    $tidy->cleanRepair();
    $ret = $tidy->html()->child[1]->value;
    $ret = substr($ret, 7, -7);
    return $ret;
}
Example #20
0
 /**
  * @param $output
  * @return \tidy
  */
 public function preView($output)
 {
     $config = array('show-body-only' => false, 'clean' => true, 'char-encoding' => 'UTF8', 'add-xml-decl' => true, 'add-xml-space' => true, 'output-html' => false, 'output-xml' => false, 'output-xhtml' => true, 'numeric-entities' => false, 'ascii-chars' => false, 'doctype' => 'auto', 'bare' => true, 'fix-uri' => true, 'indent' => true, 'indent-spaces' => 4, 'tab-size' => 4, 'wrap-attributes' => true, 'wrap' => 0, 'indent-attributes' => true, 'join-classes' => true, 'join-styles' => false, 'enclose-block-text' => true, 'fix-bad-comments' => true, 'fix-backslash' => true, 'replace-color' => false, 'wrap-asp' => false, 'wrap-jste' => false, 'wrap-php' => false, 'write-back' => true, 'drop-proprietary-attributes' => false, 'hide-comments' => true, 'hide-endtags' => false, 'literal-attributes' => false, 'drop-empty-paras' => true, 'enclose-text' => true, 'quote-ampersand' => true, 'quote-marks' => false, 'quote-nbsp' => true, 'vertical-space' => true, 'wrap-script-literals' => false, 'tidy-mark' => true, 'merge-divs' => false, 'repeated-attributes' => 'keep-last', 'break-before-br' => true);
     header('Content-type: text/html; charset=utf-8');
     $tidy = new \tidy();
     $tidy->parseString($output, $config, 'UTF8');
     $tidy->cleanRepair();
     return $tidy;
     // Output
 }
Example #21
0
 /**
  * (non-PHPdoc)
  * @see app/Lib/Browser/Mixin/Browser\Mixin.Iface::after()
  */
 public function after(\stdClass $Object, array $params = [], array $arguments = [])
 {
     if (class_exists('\\tidy')) {
         $Tidy = new \tidy();
         $params['content'] = $Tidy->repairString($params['content'], ['output-xhtml' => true], 'utf8');
         unset($Tidy);
     }
     // возвращаем обработанные данные
     return $params;
 }
Example #22
0
 protected function _tidy($text)
 {
     // tidy up the text
     $tidy = new tidy();
     $tidy->parseString($text, array(), 'utf8');
     $tidy->cleanRepair();
     // get only the body portion
     $body = tidy_get_body($tidy);
     return $body->value;
 }
Example #23
0
 public static function tidyHTML($html)
 {
     if (extension_loaded(tidy)) {
         $tidy = new \tidy();
         $cleanHTML = $tidy->repairString($html, array('indent' => true, 'indent-spaces' => 2, 'show-body-only' => true, 'merge-divs' => false));
         return $cleanHTML;
     } else {
         return $html;
     }
 }
 public function parse($data)
 {
     // Specify configuration
     $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 200);
     // Tidy
     $tidy = new tidy();
     $tidy->parseString($data, $config, 'utf8');
     $tidy->cleanRepair();
     $S = new HTMLSlicer();
     $XML = new SimpleXMLElement($tidy . "");
     $rss = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\">\n\t<channel>\n\t\t<title>Notdienste</title>\n\t\t<link>notdienst-portal.de</link>\n\t\t<description>Notdienste</description>\n\t\t<language>de-de</language>\n\t\t<copyright>Bayerische Landesapothekerkammer</copyright>\n\t\t<pubDate>" . date("r") . "</pubDate>\n\n";
     foreach ($XML->body->table[0]->tbody[0]->tr as $row) {
         $lat = "data-lat";
         $lon = "data-lon";
         #print_r();
         #print_r($row->attributes()->$lon);
         $von = "";
         $bis = "";
         $title = "";
         $content = "";
         $i = 0;
         foreach ($row->td as $cell) {
             $td = trim(strip_tags($cell->asXML()));
             $td = trim(str_replace("Entfernung", "", $td));
             $ex = explode("\n", $td);
             foreach ($ex as $k => $line) {
                 $ex[$k] = trim($line);
             }
             if ($i == 0) {
                 $title = trim($ex[0]);
                 unset($ex[0]);
             }
             foreach ($ex as $k => $l) {
                 if (stripos($l, "von") === 0) {
                     $von = $ex[$k];
                     unset($ex[$k]);
                 }
                 if (stripos($l, "bis") === 0) {
                     $bis = $ex[$k];
                     unset($ex[$k]);
                 }
             }
             $content .= implode("\n", $ex) . "\n";
             $i++;
         }
         $content = nl2br(trim($content));
         $rss .= "\n\t\t<item>\n\t\t\t<title><![CDATA[" . html_entity_decode($title) . "]]></title>\n\t\t\t<description><![CDATA[" . preg_replace('/ +/', ' ', html_entity_decode($content)) . "]]></description>\n\t\t\t<link></link>\n\t\t\t<author></author>\n\t\t\t<guid></guid>\n\t\t\t<valid>{$von} {$bis}</valid>\n\t\t\t<lat>" . $row->attributes()->{$lat} . "</lat>\n\t\t\t<lon>" . $row->attributes()->{$lon} . "</lon>\n\t\t\t<pubDate>" . date("r") . "</pubDate>\n\t\t</item>";
         #print_r($content);
     }
     #$X = $XML->xpath("//tr");
     #var_dump($X);
     // Output
     $rss .= "\n\t</channel>\n</rss>";
     return $rss;
 }
 function GetXML($html)
 {
     // Specify configuration
     $config = array('output-xml' => true, 'numeric-entities' => true, 'hide-comments' => true);
     // Tidy
     $tidy = new tidy();
     $tidy->parseString($html, $config, 'utf8');
     $tidy->cleanRepair();
     $xHTML = $tidy->html();
     return new SimpleXMLElement($xHTML);
 }
Example #26
0
 private function purifyHtml($html, $tags = null)
 {
     if (class_exists('Tidy') && false) {
         $config = array('output-xhtml' => true, 'indent' => false);
         $tidy = new tidy();
         $tidy->parseString($html, $config, 'utf8');
         $html = (string) $tidy;
         return $this->getStringBetween($html, '<body>');
     }
     return $html;
 }
Example #27
0
 /**
  * Trims content, then trims each line of content
  *
  * @param string $content
  * @return string
  */
 public function render($content = NULL)
 {
     if ($content === NULL) {
         $content = $this->renderChildren();
     }
     if (class_exists('tidy') === FALSE) {
         throw new Exception('TidyViewHelper requires the PHP extension "tidy" which is not installed or not loaded.', 1352059753);
     }
     $tidy = new tidy();
     return $tidy->repairString($content);
 }
 /**
  * @return string
  */
 public function toPlainText(Io_Charset $outputCharset_ = null)
 {
     if (null === $outputCharset_) {
         $outputCharset_ = $this->m_charset;
     }
     $html = html_entity_decode($this->m_value, ENT_QUOTES, $this->m_charset->name());
     $tidy = new \tidy();
     $tidy->parseString($html, ['char-encoding' => $this->m_charset->name(), 'input-encoding' => $this->m_charset->name(), 'output-encoding' => $outputCharset_->name()]);
     $string = '';
     $this->nodeToPlainText($tidy->body(), $string);
     return $string;
 }
Example #29
0
 public function getAdjustedArticleContent()
 {
     $trim = Setting::find(1)->value;
     if ($trim != 0 && strlen($this->content) > $trim) {
         $text = substr($this->content, 0, $trim) . '...';
     } else {
         $text = $this->content;
     }
     $tidy = new tidy();
     $tidy->parseString($text, array('show-body-only' => true), 'utf8');
     $tidy->cleanRepair();
     return $tidy . '<a href="' . $this->getArticleUrl() . '">[ Continue reading ]</a>';
 }
 /**
  * Send content
  *
  * @param  SendResponseEvent $event
  * @return $this
  */
 public function sendContent(SendResponseEvent $event)
 {
     if ($event->contentSent()) {
         return $this;
     }
     $response = $event->getResponse();
     $tidy = new \tidy();
     $tidy->parseString($response->getContent(), $this->config);
     //$tidy->cleanRepair();
     echo $tidy;
     $event->setContentSent();
     return $this;
 }