public function parse($isUpdate = false) { Ibos::import("application.extensions.simple_html_dom", true); if ($isUpdate) { $model = preg_replace("/\\s+data-id\\s?=\\s?\"?\\d+\"?/i", "", $this->printmodel); $max = 0; } else { $model = $this->printmodel; $max = intval($this->itemmax); } $elements = array(); $doc = new simple_html_dom(); $doc->load($model, true, true, CHARSET); $items = $doc->find("ic"); $config = $this->getItemConfig(); if (!empty($items) && !empty($config)) { $this->refactor($items, $config, $max, $elements); } $html = $doc->save(); $this->_cache = $elements; CacheUtil::set("form_" . $this->ID, $elements); $form["printmodelshort"] = $html; if ($max != $this->itemmax) { $form["itemmax"] = $max; } $doc->clear(); FlowFormType::model()->modify($this->ID, $form); }
function addToTable($text,$position,$button){ $dom = new simple_html_dom(); $dom->load($text); $tableEl = $dom->find('.'.$position,0); if(!$tableEl){ $table = '<table class="'.$position .' myApiShareTable"></table>'; $text = ($position == 'myApiShareTop') ? $table.$text : $text.$table; $dom->load($text); } $text = $dom->save(); $dom->load($text); $rowEl = $dom->find('.'.$position,0)->find('.myApiButtons',0); if(!$rowEl){ $tr = '<tr class="myApiButtons"><td><table><tr><td>'.$button.'</td></tr></table></td></tr>'; $row = $dom->find('.'.$position,0); $row->innertext = $tr.$row->innertext; }else{ $rowEl->find('table',0)->find('tr',0)->innertext = '<td>'.$button.'</td>'.$rowEl->find('table',0)->find('tr',0)->innertext; } $text = $dom->save(); $dom->load($text); $commentsTable = $dom->find('.myApiShareBottom',0); if($commentsTable){ $commentsEl = $commentsTable->find('.myApiCommentsCell',0); if($commentsEl){ $buttonRow = $commentsTable->find('.myApiButtons',0); if($buttonRow){ $commentsEl->colspan = sizeof($buttonRow->find('td')); $text = $dom->save(); } } } $dom->clear(); unset($dom); return $text; }
function buffer(){ ob_start(); require(ROOT.DS.MAIN.DS.'reflex'.DS.'templates'.DS.$this->_template.'.php'); $html_string = ob_get_clean(); $html_string = str_replace('<php>','<?php ',$html_string); $html_string = str_replace('</php>',' ?>',$html_string); require(ROOT.DS.MAIN.DS.'library'.DS.'simple_html_dom.php'); $html = new simple_html_dom(); $html->load($html_string); $b = $html->find('base',0); if(!isset($b)){ $html->find('head',0)->innertext = '<base href="http://'.THIS_DOMAIN.'/"/>'.$html->find('head',0)->innertext; $html_string = $html->save(); } $fileloc = fopen(ROOT.DS.MAIN.DS.'reflex'.DS.'admin'.DS.'documents'.DS.'dont_touch_this_file.php', 'w'); fwrite($fileloc,$html_string); fclose($fileloc); }
<li>Coffee</li> <li>Tea</li> <li>Milk</li> </ul> <li>Tea</li> <li>Milk</li> </ul> <li>Tea</li> <li>Milk</li> </ul> </body> </html>'; //creating DOM object $domObject1 = new simple_html_dom($htmlText); //dumping it to a string $str = $domObject1->save(); echo $str; //Printing nested elements foreach ($domObject1->find('ul') as $ul) { foreach ($ul->find('li') as $li) { echo $li->plaintext . "\n"; } } echo "\n"; //Accesing direct descendant elements $domObject1->find('ul ul ul li', 2)->plaintext = 'Not enough milk! :('; echo $domObject1->find('ul')[0]->children()[0]->plaintext . "\n"; //prints 'Coffee' echo $domObject1->find('ul', 0)->children(0)->plaintext . "\n"; //prints the same as the last line echo $domObject1->find('ul', 2)->children(2)->plaintext . "\n";
function inlineCss($html) { // パースしやすいように無駄な空白や改行を取り除く $html = preg_replace('!\\s+!', ' ', trim($html)); // headからCSSファイルを取り出す preg_match('/<head>.*<\\/head>/', $html, $match); $dom = new simple_html_dom(); $dom->load($match[0], true); $css = ''; foreach ($dom->find('link[type=text/css]') as $e) { if (is_object($e)) { $url = 'http://' . env('HTTP_HOST') . $e->href; $css .= file_get_contents($url); } } $dom->clear(); // CSSのパース $styles = $this->_parseCss($css); // a:*をヘッダ内に格納 $css = '<style type="text/css"> <![CDATA[' . "\n"; $links = array('a:link', 'a:hover', 'a:focus', 'a:visited'); foreach ($links as $link) { if (isset($styles[$link])) { $css .= $link . '{' . $styles[$link] . '}' . "\n"; unset($styles[$link]); } } $css .= ']]> </style>'; $html = preg_replace('/<\\/head>/', $css . ' </head>', $html); // bodyを取り出す preg_match('/<body[^>]*>.*<\\/body>/', $html, $match); $dom = new simple_html_dom(); $dom->load($match[0], true); // インライン化 foreach ($styles as $element => $style) { $es = $dom->find($element); foreach ($es as $e) { if (is_object($e)) { if (isset($e->attr['style'])) { $style .= str_replace('"', '', $e->attr['style']); } $e->attr = array_merge($e->attr, array('style' => '"' . $style . '"')); } } } // session_idの付与 $targets = array('a', 'form'); foreach ($targets as $target) { $es = $dom->find($target); foreach ($es as $e) { if ('a' === $target && isset($e->attr['href'])) { $url = $e->attr['href']; $e->attr['href'] = $this->_url($url); } if ('form' === $target && isset($e->attr['action'])) { $url = $e->attr['action']; $e->attr['action'] = $this->_url($url); } } } // html再構成 $body = $dom->save(); $dom->clear(); $html = preg_replace("/<body>.*<\\/body>/", $body, $html); $html = preg_replace("/> /", ">\n", $html); $html = preg_replace("/ </", "\n<", $html); return $html; }
function flash_replace($texte_a_formater) { $html = new simple_html_dom(); $html->load($texte_a_formater); $flv_found = 0; foreach ($html->find("object") as $element) { // SE E' RELATIVO A UN FILE FLV if (substr(strtoupper($element->data), -4) == ".FLV" || substr(strtoupper($element->data), -4) == ".MP4") { $flv_found++; $flv_file = $element->data; $flv_width = $element->width; $flv_height = $element->height; $flv_type = $element->type; $element->outertext = ""; $flv_code = ""; /* $flv_code .="<object type=\"".$flv_type."\" data=\"".$topdir.$v_Nom_Rep_Admin."/include/editeurs/ressources/player_flv_multi.swf\" width=\"".$flv_width."\" height=\"".$flv_height."\"> <param name=\"movie\" value=\"".$topdir.$v_Nom_Rep_Admin."/include/editeurs/ressources/player_flv_multi.swf\" /> <param name=\"allowFullScreen\" value=\"true\" /> <param name=\"FlashVars\" value=\"flv=".$flv_file."&showstop=0&showvolume=1&showtime=0&showopen=0&margin=0&autoplay=0&showiconplay=1&loop=0&volume=150&showfullscreen=1&buffer=10&ondoubleclick=fullscreen&buffermessage=&allowfullscreen=true\" /> </object>";*/ $flv_code .= '<object id="player" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" name="player" width="100%" height="385"> <param name="movie" value="media/jwplayer/player.swf" /> <param name="allowfullscreen" value="true" /> <param name="allowscriptaccess" value="always" /> <param name="flashvars" value="file=' . $flv_file . '&image=" /> <embed type="application/x-shockwave-flash" id="player2" name="player2" src="media/jwplayer/player.swf" width="100%" height="385" allowscriptaccess="always" allowfullscreen="true" flashvars="file=' . $flv_file . '&image=" /> </object>'; $element->outertext = $element->outertext . $flv_code; } } if ($flv_found > 0) { $actu_texte_format = $html->save(); } else { $actu_texte_format = $texte_a_formater; } return $actu_texte_format; $html->clear(); unset($html); }
$html_dom->load($buf); $toc = ''; $last_level = 0; foreach ($html_dom->find('h1,h2,h3,h4,h5,h6') as $h) { $inner_text = trim($h->innertext); $id = str_replace(' ', '_', $inner_text); $id = preg_replace('%[^a-zA-Z0-9_-]%', '', $id); $h->id = $id; // add id attribute so we can jump to this element $level = intval($h->tag[1]); if ($level > $last_level) { $toc .= "<ul>"; } else { $toc .= str_repeat('</li></ul>', $last_level - $level); $toc .= '</li>'; } $toc .= "<li><a href='#{$id}'>{$inner_text}</a>"; $last_level = $level; } $toc .= str_repeat('</li></ul>', $last_level); // Replace placeholder with this: $html_dom->find('div[id=word-table-of-contents]', 0)->innertext = $toc; $html_with_toc = $html_dom->save(); $html_dom->clear(); unset($html_dom); print $html_with_toc; ?> </div> </body> </html>
function BB_HTMLPurifyForWYMEditor($data, $options) { if (isset($options["shortcodes"]) && (!$options["shortcodes"] || !isset($options["shortcode_placeholder"]) || !isset($options["shortcode_ids"]))) { unset($options["shortcodes"]); } if (isset($options["validate_img"]) && !$options["validate_img"]) { unset($options["validate_img"]); } // Let HTML Purifier do the heavy-lifting (removes XSS, etc). // If the 'p' tag ever accepts more than 'class', the 'class' extraction code while generating pretty HTML will need rewriting. $config = array("Attr.EnableID" => isset($options["shortcodes"]), "HTML.Allowed" => "p[class],strong,em,sup,sub,a[title|href],ul[class],ol[class],li[class],h1[class],h2[class],h3[class],h4[class],h5[class],h6[class],pre[class],blockquote[class],img[" . (isset($options["shortcodes"]) ? "id|class|" : "") . "src|alt]"); if (isset($options["allowed_classes"]) && is_array($options["allowed_classes"])) { $config["Attr.AllowedClasses"] = $options["allowed_classes"]; } $data = BB_HTMLPurify($data, $config); // Replace newlines outside of 'pre' tags with spaces. $data2 = ""; $lastpos = 0; $pos = strpos($data, "<pre"); $pos2 = strpos($data, "</pre>"); $pos3 = strpos($data, ">", $pos); while ($pos !== false && $pos2 !== false && $pos3 !== false && $pos3 < $pos2) { $data2 .= Str::ReplaceNewlines(" ", substr($data, $lastpos, $pos3 + 1 - $lastpos)); $data2 .= Str::ReplaceNewlines("\n", substr($data, $pos3 + 1, $pos2 - $pos3 - 1)); $data2 .= "</pre>"; $lastpos = $pos2 + 6; $pos = strpos($data, "<pre", $lastpos); $pos2 = strpos($data, "</pre>", $lastpos); $pos3 = strpos($data, ">", $pos); } $data = $data2 . Str::ReplaceNewlines(" ", substr($data, $lastpos)); // Process the DOM to create consistent input and output. require_once ROOT_PATH . "/" . SUPPORT_PATH . "/simple_html_dom.php"; $html = new simple_html_dom(); $html2 = new simple_html_dom(); // Make sure all elements and text are inside a top-level tag. $html->load("<body>" . $data . "</body>"); $bodytags = array("p" => true, "ul" => true, "ol" => true, "h1" => true, "h2" => true, "h3" => true, "h4" => true, "h5" => true, "h6" => true, "pre" => true, "blockquote" => true); $rows = $html->find("body text"); foreach ($rows as $row) { $row2 = $row; while ($row2->parent()->tag != "body") { $row2 = $row2->parent(); } if (!isset($bodytags[$row2->tag])) { $row2->outertext = "<p>" . $row2->outertext . "</p>"; } } $html->load($html->save()); $body = $html->find("body", 0); $rows = $body->children(); foreach ($rows as $row) { if (!isset($bodytags[$row->tag])) { $row->outertext = "<p>" . $row->outertext . "</p>"; } } $html->load($html->save()); $rows = $html->find("blockquote text"); foreach ($rows as $row) { $row2 = $row; while ($row2->parent()->tag != "blockquote") { $row2 = $row2->parent(); } if (!isset($bodytags[$row2->tag])) { $row2->outertext = "<p>" . $row2->outertext . "</p>"; } } $html->load($html->save()); // Clean up 'li' elements. WYMEditor only allows a limited number of tags (a good thing). $rows = $html->find("li"); foreach ($rows as $row) { $row->innertext = strip_tags($row->innertext, "<strong><em><sup><sub><a><img><ul><ol><li>"); } // Replace with spaces. $data = $html->save(); $data = str_replace(array(" ", " ", "Â "), array(" ", " ", " "), $data); $html->load($data); // Process shortcodes or images. if (isset($options["shortcodes"])) { // Remove invalid 'img' tags. $rows = $html->find("img"); foreach ($rows as $row) { if (!isset($row->class) || $row->class != $options["shortcode_placeholder"] || !isset($row->id) || !isset($options["shortcode_ids"][$row->id])) { $row->outertext = ""; } else { $row->src = $options["shortcode_ids"][$row->id]; } } $html->load($html->save()); // Move text inside the special 'p.wrap-shortcode' class to separate 'p' tags. $rows = $html->find("p.wrap-shortcode img"); foreach ($rows as $row) { $str = $row->parent()->innertext; $pos = strpos($str, "<img "); $pos2 = strpos($str, "/>", $pos); $str2 = substr($str, 0, $pos); $str3 = substr($str, $pos2 + 2); $str = substr($str, $pos, $pos2 + 2 - $pos); if ($str2 != "" || $str3 != "") { $row->parent()->outertext = ($str2 == "" ? "" : "<p>" . $str2 . "</p>") . "<p class=\"" . $row->parent()->class . "\">" . $str . "</p>" . ($str3 == "" ? "" : "<p>" . $str3 . "</p>"); } } $html->load($html->save()); } else { if (isset($options["validate_img"])) { // Download each 'img' 'src' and check them for valid web output (only allow JPEG, PNG, and GIF). $imgopts = array("protocol" => isset($options["validate_img_protocol"]) ? $options["validate_img_protocol"] : "", "allow_gif" => isset($options["validate_img_allow_gif"]) ? $options["validate_img_allow_gif"] : true, "allow_jpg" => isset($options["validate_img_allow_jpg"]) ? $options["validate_img_allow_jpg"] : true, "allow_png" => isset($options["validate_img_allow_png"]) ? $options["validate_img_allow_png"] : true); $rows = $html->find("img"); foreach ($rows as $row) { if (!isset($row->src)) { $row->outertext = ""; } else { $imginfo = BB_IsValidHTMLImage($row->src, $imgopts); if (!$imginfo["success"]) { $row->outertext = ""; } } } $html->load($html->save()); } } // Remove special classes that are improperly used. $specials = array("wrap-start" => "p", "wrap-end" => "p", "table-row" => "p", "table-cell" => "p", "table-end" => "p"); if (isset($options["shortcodes"])) { $specials["wrap-shortcode"] = array("p", "img"); } if (isset($options["additional_specials"])) { $specials = array_merge($specials, $options["additional_specials"]); } foreach ($specials as $class => $tags) { $rows = $html->find("." . $class); foreach ($rows as $row) { if (is_string($tags)) { $valid = true; } else { $html2->load($row->innertext); $row2 = $html2->find($tags[1], 0); $valid = $row2 ? true : false; } $valid = $valid && (is_string($tags) && $row->tag == $tags || is_array($tags) && $row->tag == $tags[0]); if (!$valid) { $row->class = BB_HTMLRemoveClass($row->class, $class); } if ($row->class == "") { unset($row->class); } } $html->load($html->save()); } // Remove empty elements without a class attribute. do { $found = false; $stack = array(); $body = $html->find("body", 0); $stack[] = array("rows" => $body->children(), "pos" => 0); while (count($stack)) { $pos = count($stack) - 1; if ($stack[$pos]["pos"] >= count($stack[$pos]["rows"])) { $stack = array_slice($stack, 0, -1); if (count($stack)) { $pos = count($stack) - 1; $row = $stack[$pos]["rows"][$stack[$pos]["pos"]]; if (!$found && trim($row->innertext) !== $row->innertext) { $row->innertext = trim($row->innertext); $found = true; } $stack[$pos]["pos"]++; } } else { $row = $stack[$pos]["rows"][$stack[$pos]["pos"]]; $rows = $row->children(); if (count($rows)) { $stack[] = array("rows" => $rows, "pos" => 0); } else { if (!isset($row->class) && trim($row->innertext) == "") { $row->outertext = ""; $found = true; } else { if (trim($row->innertext) !== $row->innertext) { $row->innertext = trim($row->innertext); $found = true; } } $stack[$pos]["pos"]++; } } } $html->load($html->save()); } while ($found); $body = $html->find("body", 0); $data = $body->innertext; // Finalize 'li' tag cleanup. $data = preg_replace('/<\\/li>\\s+/', "</li>", $data); return $data; }
function cc2_bootstrap_comment_form_fields($arrFields = array()) { $return = $arrFields; $is_horizontal = get_theme_mod('cc2_comment_form_orientation', 'horizontal') == 'horizontal' ? true : false; //new __debug( $arrFields, 'arrFields' ); // mostly for reference $aria_req = ' aria-required="true" '; $commenter = array('comment_author' => '', 'comment_author_email' => '', 'comment_author_url' => ''); // mostly for reference $arrDefaultFields = array('author' => '<p class="comment-form-author">' . '<label for="author">' . __('Name', 'domainreference') . '</label> ' . (!empty($req) ? '<span class="required">*</span>' : '') . '<input id="author" name="author" type="text" value="' . esc_attr($commenter['comment_author']) . '" size="30"' . $aria_req . ' /></p>', 'email' => '<p class="comment-form-email"><label for="email">' . __('Email', 'domainreference') . '</label> ' . (!empty($req) ? '<span class="required">*</span>' : '') . '<input id="email" name="email" type="text" value="' . esc_attr($commenter['comment_author_email']) . '" size="30"' . $aria_req . ' /></p>', 'url' => '<p class="comment-form-url"><label for="url">' . __('Website', 'domainreference') . '</label>' . '<input id="url" name="url" type="text" value="' . esc_attr($commenter['comment_author_url']) . '" size="30" /></p>'); if (class_exists('simple_html_dom')) { // avoid nasty errors if out of some unknown reason the simple_html_dom class failed to be included $dom = new simple_html_dom(); foreach ($arrDefaultFields as $strFieldName => $strHTML) { // use the default fields ONLY as reference, NOT for actual parsing! if (isset($return[$strFieldName]) != false) { // reset variables $strEditedHTML = ''; // load snippets $dom->load($return[$strFieldName]); // find input tag $elem = $dom->find('input#' . $strFieldName, 0); // add class if not already set $elem->class = cc2_htmlClass::addClass($elem->class, 'form-control'); /** * Wrap element if horizontal form is enabled * Also see @http://simplehtmldom.sourceforge.net/manual.htm > How to access the HTML element's attributes? > Tips */ if ($is_horizontal != false) { // find label ... $label_elem = $dom->find('label', 0); // .. and add class $label_elem->class = cc2_htmlClass::addClass($label_elem->class, array('col-md-2', 'col-lg-2', 'control-label')); // wrap parent element $elem->parent()->outertext = '<div class="col-md-10 col-lg-10">' . $elem->parent()->outertext . '</div>'; } // return edited data $strEditedHTML = $dom->save(); // Optionally group field if ($is_horizontal != false) { $strEditedHTML = sprintf('<div class="form-group">%s</div>', $strEditedHTML); } if (!empty($strEditedHTML)) { $return[$strFieldName] = $strEditedHTML; } // uncomment the following line for testing purposes (ie. to see whether this was ACTUALLY passed throught the filter or not) //$return[ $strFieldName ] .= '<!-- parsed field: ' . $strFieldName . ' -->'; } } } return $return; }
function process_content($article) { $image_types = array('jpg' => 'image/jpeg', 'jpeg' => 'image/jpeg', 'gif' => 'image/gif', 'png' => 'image/png', 'bmp' => 'image/bmp'); if (!empty($article->content)) { $md5 = md5($article->url); $article->md5 = $md5; $content = $article->content; unset($article->content); $json = json_encode($article); $domain = str_replace('www.', '', $article->domain); $domain = "<a href=\"{$article->url}\">{$domain}</a>"; $meta = $article->author ? "By {$article->author}<span class=\"short-url\"> / {$domain}</span><div class=\"full-url\">{$article->url}</div>" : "<div class=\"short-url\">{$domain}</div><div class=\"full-url\">{$article->url}</div>"; $content = <<<END <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>{$article->title}</title> <meta content="width=device-width, initial-scale=1" name="viewport"> <style> body { text-align: center; margin: 0; } #article-{$md5} { font: 16px/24px serif; width: 100%; text-align: left; margin: 0 auto; padding-bottom: 25px; } #article-{$md5} a { color: #03C; } #article-{$md5} img { display: block; max-width: 100%; width: auto; height: auto; } #article-{$md5} > h1 { font: bold 24px/24px helvetica neue, helvetica, sans-serif; margin: 25px 0 10px 0; } #article-{$md5} > .meta { color: #666; font: 11px verdana, sans-serif; text-transform: uppercase; border-top: 1px solid #ccc; padding-top: 10px; padding-bottom: 10px; } #article-{$md5} > .meta .full-url { display: none; } #article-{$md5} > .meta a { color: #666; } #article-{$md5} > h1, #article-{$md5} > .meta, #article-{$md5} p { padding-left: 15px; padding-right: 15px; } #article-{$md5}-links { display: none; font-size: 9px; text-align: left; padding-left: 0; list-style-position: inside; border-top: 1px dotted #ccc; padding-top: 10px; margin-top: 20px; } #article-{$md5}-links li { margin-top: 5px; margin-left: 5px; padding-left: 0; } @media only screen and (min-width: 580px) { #article-{$md5} { width: 550px; padding-bottom: 50px; } #article-{$md5} > h1 { font: bold 30px/30px helvetica neue, helvetica, sans-serif; margin: 50px 0 10px 0; } #article-{$md5} { font: 16px/24px serif; } #article-{$md5} > h1, #article-{$md5} > .meta, #article-{$md5} p { padding-left: 0; padding-right: 0; } } @media print { #article-{$md5} { font: 12px/18px serif; padding-top: 0; padding-bottom: 0; column-count: 2; column-gap: 20px; -moz-column-count: 2; -moz-column-gap: 20px; -webkit-column-count: 2; -webkit-column-gap: 20px; counter-reset: link; } #article-{$md5}-links { display: block; } #article-{$md5} a[href] { color: #000; text-decoration: none; border-bottom: 1px dotted #ccc; } #article-{$md5} a[href]:after { counter-increment: link; content: counter(link); vertical-align: super; font-size: 50%; line-height: 0; } #article-{$md5} > h1 { font: bold 18px/18px helvetica neue, helvetica, sans-serif; margin-top: 0; } #article-{$md5} > .meta { font-size: 9px; text-transform: none; } #article-{$md5} > .meta .short-url { display: none; } #article-{$md5} > .meta .full-url { display: block; } #article-{$md5} > h1, #article-{$md5} > .meta, #article-{$md5} p { padding-left: 0; padding-right: 0; } } </style> </head> <body> <!-- occupy.here meta start --> <script id="meta"> var meta = {$json}; </script> <!-- occupy.here meta end --> <div id="article-{$md5}"> <!-- occupy.here article start --> <h1>{$article->title}</h1> <div class="meta"> {$meta} </div> <div class="content"> {$content} </div> <!-- occupy.here article end --> </div> <ol id="article-{$md5}-links" class="article-links"> </ol> </body> </html> END; $html = new simple_html_dom(); $html->load($content, true, false); $images = array(); foreach ($html->find('img') as $img) { $images[] = urldecode($img->src); } $image_data = array(); foreach ($images as $url) { if (!preg_match('/\\.(\\w+)$/', $url, $matches)) { continue; } list(, $ext) = $matches; $ext = strtolower($ext); if (empty($image_types[$ext])) { continue; } $type = $image_types[$ext]; $data = download_file($url); $tmp = tempnam(GRID_DIR . "/public/uploads/tmp", basename($url)); file_put_contents($tmp, $data); list($width, $height) = getimagesize($tmp); unlink($tmp); $base64 = base64_encode($data); $data_url = "data:{$type};base64,{$base64}"; if (!empty($data)) { $img_list = $html->find("img[src={$url}]"); foreach ($img_list as $img) { $img->src = $data_url; $img->width = $width; $img->height = $height; } } } $links = array(); foreach ($html->find("#article-{$md5} .content a[href]") as $link) { $links[] = urldecode($link->href); } if (!empty($links)) { $links = '<li>' . implode("</li>\n<li>", $links) . "</li>\n"; list($ol) = $html->find("#article-{$md5}-links"); $ol->innertext = $links; } else { list($ol) = $html->find("#article-{$md5}-links"); $ol->outertext = ''; } return $html->save(); } }
public static function _Process_Recieved_Content($_HTML_CONTENT, $_Cung1, $_Cung2, $_Summary, $_SourceUri, $_LinkId, $_ImageLink) { if ($_HTML_CONTENT != '') { // Create a DOM object require_once Kohana::find_file('classes', 'vendor/simple_html_dom'); $html = new simple_html_dom(); // Load HTML from a string $html->load($_HTML_CONTENT); unset($_HTML_CONTENT); if ($html) { $story = new Model_Horoscope_XungHop(); $ktra = true; if ($_Cung1 == '-' || $_Cung2 == '-') { $ktra = FALSE; } $story->cung_1 = $_Cung1; $story->cung_2 = $_Cung2; $story->alias = $_Cung1 . '_' . $_Cung2; if (self::CheckRecordByAlias($story->alias)) { $story->alias = $_Cung1 . '__' . $_Cung2; } $story->tom_tat = $_Summary; $story->ngay_tao = date("Y-m-d"); $story->url_nguon = $_SourceUri; $story->auto_get = true; //begin find elements #find date post $date = $html->find('div[class="datetime"]', 0); if ($date) { $d = explode(',', $date->plaintext); if (isset($d[1])) { //var_dump($d); //exit; $d1 = explode(' ', trim($d[1])); list($ngay, $thang, $nam) = explode('/', $d1[0]); $story->source_date = date("Y-m-d h:i:s", strtotime($nam . '-' . $thang . '-' . $ngay . ' ' . $d1[1] . ':00')); } else { $story->source_date = date("Y-m-d h:i:s"); } } else { $story->source_date = date("Y-m-d h:i:s"); } //find content $content = $html->find('div[id="content_document"]', 0); if ($content) { $string = $content->innertext; # remove white space $string = str_replace(array("\r\n", "\r", "\n", "\t"), '', $string); $string = preg_replace('/(<!--.+?-->)/s', '', $string); $string = preg_replace('@<a[^>]*>(.*)</a>@ismUx', '$1', $string); $string = preg_replace('/<p[ ]class="pAuthor">.*<\\/p>/ismxU', '', $string); $string = preg_replace('/<p[ ]class="pSource">.*<\\/p>/ismxU', '', $string); $story->noi_dung = $string; $story->kiem_tra = $ktra; $story->save(); if ($story->identifier()) { if ($ktra) { //get image thumb => save to disk => update record in db $path = 'assets/horoscope/xung-hop/' . $story->alias . '/'; $img = Vendor_Crawler::get_file_from_url_by_curl($_ImageLink, $save_to_path = $path, $file_name_to_set = $story->alias . '-thumb'); if ($img) { //check file size, if = 0 -> mean file can't get if (filesize($img) == 0) { @copy('assets/horoscope/thumb_140.jpg', $img); } $story->hinh_anh = '/' . $img; } else { $story->hinh_anh = $_ImageLink; } } else { $story->hinh_anh = $_ImageLink; } if ($ktra != FALSE) { //print_r($img); $html2 = new simple_html_dom(); $html2->load($story->noi_dung); $images = $html2->find('img'); if (count($images) > 0) { for ($i = 0; $i < count($images); $i++) { unset($images[$i]->onclick); $file_name = 'anh_' . $i + 1; $get_file = Vendor_Crawler::get_file_from_url_by_curl($images[$i]->src, $save_to_path = $path, $file_name_to_set = $file_name); if (filesize(ltrim($get_file, '/')) == 0) { unset($images[$i]); } else { $images[$i]->src = '/' . $get_file; } } } $story->noi_dung = $html2->save(); $html2->clear(); unset($html2); } else { $story->hinh_anh = $_ImageLink; } $story->save(); //insert done => update from tmp table Model_Horoscope_XungHopLinkBLL::UpdateRecordStatus($_LinkId); self::_print_to_console('Done: ' . $_SourceUri); } else { self::_print_to_console('Fail:' . $_SourceUri); } } else { self::_print_to_console('-> content not found'); return false; } $html->clear(); unset($html); } else { self::_print_to_console('-> cant load DOM obj'); return false; } } else { self::_print_to_console('-> nothing to do'); return false; } }
/** * Convert Embedded CSS to Inline * @param string $document * @param bool $strip_class strip attribute class */ function convert($document, $strip_class = false) { // Debug mode // Debug mode will output selectors and styles that are detected in the embedded CSS $debug = false; // Extract the CSS preg_match('/<style[^>]+>(?<css>[^<]+)<\\/style>/s', $document, $matches); // If no CSS style if (empty($matches)) { return $document; } // Strip out extra newlines and tabs from CSS $css = preg_replace("/[\n\r\t]+/s", "", $matches['css']); // Extract each CSS declaration preg_match_all('/([-a-zA-Z0-9_ ,#\\.]+){([^}]+)}/s', $css, $rules, PREG_SET_ORDER); // For each CSS declaration, make the selector and style declaration into an array // Array index 1 is the CSS selector // Array index 2 is the CSS rule(s) foreach ($rules as $rule) { // If the CSS selector is multiple, we should split them up if (strstr($rule['1'], ',')) { // Strip out spaces after a comma for consistency $rule['1'] = str_replace(', ', ',', $rule['1']); // Unset any previous combos unset($selectors); // Make each selector declaration its own // Create a separate array element in styles array for each declaration $selectors = explode(',', $rule['1']); foreach ($selectors as $selector) { $selector = trim($selector); if (!isset($styles[$selector])) { $styles[$selector] = ''; } $styles[$selector] .= trim($rule['2']); if ($debug) { echo $selector . ' { ' . trim($rule['2']) . ' }<br/>'; } } } else { $selector = trim($rule['1']); if (!isset($styles[$selector])) { $styles[$selector] = ''; } $styles[$selector] .= trim($rule['2']); if ($debug) { echo $selector . ' { ' . trim($rule['2']) . ' }<br/>'; } } } // DEBUG: Show selector and declaration if ($debug) { echo '<pre>'; foreach ($styles as $selector => $styling) { echo $selector . ':<br>'; echo $styling . '<br/><br/>'; } echo '</pre><hr/>'; } // For each style declaration, find the selector in the HTML and add the inline CSS if (!empty($styles)) { // Load Simple HTML DOM helper require_once 'simple_html_dom.php'; $html_dom = new simple_html_dom(); // Load in the HTML without the head and style definitions $html_dom->load(preg_replace('/\\<head\\>(.+?)\\<\\/head>/s', '', $document)); foreach ($styles as $selector => $styling) { foreach ($html_dom->find($selector) as $element) { // Check to make sure the style doesn't already exist if (!stristr($element->style, $styling)) { if (strlen($element->style) > 0 && substr(rtrim($element->style), -1) !== ';') { $element->style .= ';'; } // If there is any existing style, this will append to it $element->style .= $styling; } } } $inline_css_message = $html_dom->save(); // Strip class attribute if ($strip_class === true) { $inline_css_message = preg_replace('~(<[a-z0-0][^>]*)(\\s(?:class|id)\\s*=\\s*(([\'"]).*?\\4|[^\\s]*))~usi', '\\1', $inline_css_message); } $html_dom->__destruct(); return $inline_css_message; } return false; }
public static function strip_html($html, $scripts = true, $styles = true) { $obj = new simple_html_dom(); $obj->load($html); if ($styles) { $elems = $obj->find("style"); foreach ($elems as $elem) { $elem->outertext = ""; } $elems = $obj->find("link"); foreach ($elems as $elem) { $elem->outertext = ""; } } if ($scripts) { $elems = $obj->find("script"); foreach ($elems as $elem) { $elem->outertext = ""; } } return $obj->save(); }
function generate_pdf_file($id, $forceDownload = false) { $post = get_post(); $content = $post->the_content; if (has_shortcode($content, 'wpptopdfenh')) { if (!$this->options[$post->post_type]) { return false; } } // require_once(WPPTOPDFENH_PATH . '/tcpdf/config/lang/eng.php'); // to avoid duplicate function error if (!class_exists('TCPDF')) { require_once WPPTOPDFENH_PATH . '/tcpdf/tcpdf.php'; } if (!class_exists('MYPDF')) { require_once WPPTOPDFENH_PATH . '/wpptopdfenh_header.php'; } // to avoid duplicate function error ( conflict with Lightbox Plus v2.4.6 ) if (!class_exists('simple_html_dom')) { require_once WPPTOPDFENH_PATH . '/simplehtmldom/simple-html-dom.php'; } $filePath = WPPTOPDFENH_CACHE_DIR . '/' . $post->post_name . '.pdf'; // create new PDF document if (isset($this->options['pageSize'])) { $pagesize = $this->options['pageSize']; } else { $pagesize = PDF_PAGE_FORMAT; } if (isset($this->options['unitMeasure'])) { $unit = $this->options['unitMeasure']; } else { $unit = PDF_UNIT; } if (isset($this->options['orientation'])) { $orientation = $this->options['orientation']; } else { $unit = PDF_PAGE_ORIENTATION; } $pdf = new MYPDF($orientation, $unit, $pagesize, true, 'UTF-8', false); // Let other filter modify content if selected if (isset($this->options['otherPlugin'])) { $post->post_content = apply_filters('the_content', $post->post_content); } else { $post->post_content = wpautop($post->post_content); } // Process shortcodes if selected if (isset($this->options['processShortcodes'])) { $post->post_content = do_shortcode($post->post_content); } else { $post->post_content = strip_shortcodes($post->post_content); } // set document information $pdf->SetCreator('WP Post to PDF Enhanced plugin by Lewis Rosenthal (http://www.2rosenthals.net/wordpress/help/general-help/wp-post-to-pdf-enhanced/) with ' . PDF_CREATOR); $pdf->SetAuthor(get_bloginfo('name')); $pdf->SetTitle(apply_filters('the_title', $post->post_title)); // Count width of logo for better presentation if (isset($this->options['headerlogoImage'])) { $logo = PDF_HEADER_LOGO; $logodata = getimagesize(PDF_HEADER_LOGO); if (isset($this->options['headerlogoImageFactor'])) { $logowidth = (int) ($this->options['headerlogoImageFactor'] * $logodata[0] / $logodata[1]); } else { $logowidth = (int) (14 * $logodata[0] / $logodata[1]); } } // new feature under development: specify header/footer text/separator color // some addtional header data which should be set in the admin UI; for testing, we're hiding the separator line (note the RGB array) //$header_text_color = array( 0,0,0 ); //$header_line_color = array( 255,255,255 ); // some addtional footer data which should be set in the admin UI; for testing, we're hiding the separator line (note the RGB array) //$footer_text_color = array( 0,0,0 ); //$footer_line_color = array( 255,255,255 ); //$pdf->SetSubject('TCPDF Tutorial'); //$pdf->SetKeywords('TCPDF, PDF, example, test, guide'); // set default header data, as appropriate for PHP 5.4 or below if (version_compare(phpversion(), '5.4.0', '<')) { $pdf->SetHeaderData($logo, $logowidth, html_entity_decode(get_bloginfo('name'), ENT_COMPAT | ENT_QUOTES), html_entity_decode(get_bloginfo('description') . "\n" . home_url(), ENT_COMPAT | ENT_QUOTES), $header_text_color, $header_line_color); } else { $pdf->SetHeaderData($logo, $logowidth, html_entity_decode(get_bloginfo('name'), ENT_COMPAT | ENT_HTML401 | ENT_QUOTES), html_entity_decode(get_bloginfo('description') . "\n" . home_url(), ENT_COMPAT | ENT_HTML401 | ENT_QUOTES), $header_text_color, $header_line_color); } // set header and footer fonts $pdf->setHeaderFont(array($this->options['headerFont'], '', $this->options['headerFontSize'])); $pdf->setFooterFont(array($this->options['footerFont'], '', $this->options['footerFontSize'])); // set default monospaced font $pdf->SetDefaultMonospacedFont(PDF_FONT_MONOSPACED); //set margins if ($this->options['marginLeft'] > 0) { $pdf->SetLeftMargin($this->options['marginLeft']); } else { $pdf->SetLeftMargin(PDF_MARGIN_LEFT); } if ($this->options['marginRight'] > 0) { $pdf->SetRightMargin($this->options['marginRight']); } else { $pdf->SetRightMargin(PDF_MARGIN_RIGHT); } if ($this->options['marginTop'] > 0) { $pdf->SetTopMargin($this->options['marginTop']); } else { $pdf->SetTopMargin(PDF_MARGIN_TOP); } if ($this->options['marginHeader'] > 0) { $pdf->SetHeaderMargin($this->options['marginHeader']); } else { $pdf->SetHeaderMargin(PDF_MARGIN_HEADER); } if ($this->options['marginFooter'] > 0) { $pdf->SetFooterMargin($this->options['marginFooter']); } else { $pdf->SetFooterMargin(PDF_MARGIN_FOOTER); } //set auto page breaks $pdf->SetAutoPageBreak(TRUE, PDF_MARGIN_BOTTOM); //set image scale factor if ($this->options['imageScale'] > 0) { $pdf->setImageScale($this->options['imageScale']); } else { $pdf->setImageScale(PDF_IMAGE_SCALE_RATIO); } // --------------------------------------------------------- // Set the default LI image, if specified if (isset($this->options['liSymbol'])) { $lisymbol = 'img|' . $this->options['liSymbolType'] . '|' . $this->options['liSymbolWidth'] . '|' . $this->options['liSymbolHeight'] . '|' . WP_CONTENT_DIR . '/uploads/' . $this->options['liSymbolFile']; $pdf->setLIsymbol($lisymbol); } // set default font subsetting mode $pdf->setFontSubsetting(true); // Set font // dejavusans is a UTF-8 Unicode font, if you only need to // print standard ASCII chars, you can use core fonts like // helvetica or times to reduce file size. $pdf->SetFont($this->options['contentFont'], '', $this->options['contentFontSize'], '', true); // Add a page // This method has several options, check the source code documentation for more information. $pdf->AddPage(); // Apply global css, if set in config if ($this->options['applyCSS']) { $html .= '<style>' . $this->options['customCss'] . '</style>'; } // Set some content to print $html .= '<h1>' . html_entity_decode($post->post_title, ENT_QUOTES) . '</h1>'; // Display author name is set in config if (isset($this->options['authorDetail']) and !$this->options['authorDetail'] == '') { $author = get_the_author_meta($this->options['authorDetail'], $post->post_author); $html .= '<p><strong>Author : </strong>' . $author . '</p>'; } // Display category list is set in config if (isset($this->options['postCategories'])) { $categories = get_the_category_list(', ', '', $post); if ($categories) { $html .= '<p><strong>Categories : </strong>' . $categories . '</p>'; } } // Display tag list is set in config if (isset($this->options['postTags'])) { $tags = get_the_tags($post->the_tags); if ($tags) { $html .= '<p><strong>Tagged as : </strong>'; foreach ($tags as $tag) { $tag_link = get_tag_link($tag->term_id); $html .= '<a href="' . $tag_link . '">' . $tag->name . '</a>'; if (next($tags)) { $html .= ', '; } } $html .= '</p>'; } } // Display date if set in config if (isset($this->options['postDate'])) { $date = get_the_date($post->the_date); $html .= '<p><strong>Date : </strong>' . $date . '</p>'; } // Display featured image if set in config and post/page if (isset($this->options['featuredImage'])) { if (has_post_thumbnail($post->ID)) { $html .= get_the_post_thumbnail($post->ID); } } $html .= htmlspecialchars_decode(htmlentities($post->post_content, ENT_NOQUOTES, 'UTF-8', false), ENT_NOQUOTES); $dom = new simple_html_dom(); $dom->load($html); foreach ($dom->find('img') as $e) { // Try to respect alignment of images // This code is under heavy development, so well-commented // First, try to determine the desired alignment from the class attribute inserted by WP. // Note that as we're still working with HTML vs CSS, and HTML uses "middle" for center, we // have two variables to fill for that possibility. if (preg_match('/alignleft/i', $e->class)) { $imgalign = 'left'; } elseif (preg_match('/alignright/i', $e->class)) { $imgalign = 'right'; } elseif (preg_match('/aligncenter/i', $e->class)) { $imgalign = 'center'; $htmlimgalign = 'middle'; } else { $imgalign = 'none'; } // These options apply to all images. Remove any embedded class, which is ignored by TCPDF, anyway; // then set an align attribute inside the img tag (for HTML), and finally, a style tag (for CSS). $e->class = null; $e->align = $imgalign; if (isset($htmlimgalign)) { $e->style = 'float:' . $htmlimgalign; } else { $e->style = 'float:' . $imgalign; } // Try to identify SVG images vs JPG or PNG, so that we treat them correctly. Currently, we don't // handle these well, so we'll just swap them with placeholder links. // Note that we're still using div tags to (harshly) force images into some semblance of horizontal // position. This precludes text wrap, and ultimately (if we can get the above working) should be // replaced (unless we need the text link) with the CSS in the img tag (if TCPDF will respect it). if (strtolower(substr($e->src, -4)) == '.svg') { $e->src = null; $e->outertext = '<div style="text-align:' . $imgalign . '">[ SVG: ' . $e->alt . ' ]</div><br/>'; } else { $e->outertext = '<div style="text-align:' . $imgalign . '">' . $e->outertext . '</div>'; } } $html = $dom->save(); $dom->clear(); // Test TCPDF functions to include here. // Presently, we're working with trying to get PDF forms working. These options should go into the admin UI. // set default form properties $pdf->setFormDefaultProp(array('lineWidth' => 1, 'borderStyle' => 'solid', 'fillColor' => array(255, 255, 200), 'strokeColor' => array(255, 128, 128))); // Print text using writeHTML $pdf->writeHTML($html, true, 0, true, 0); // --------------------------------------------------------- // Close and output PDF document // This method has several options, check the source code documentation for more information. // Create directory if not exist if (!is_dir(WPPTOPDFENH_CACHE_DIR)) { mkdir(WPPTOPDFENH_CACHE_DIR, 0777, true); } if ($forceDownload) { $pdf->Output($filePath, 'FI'); } else { $pdf->Output($filePath, 'F'); } }
public function filter_the_content($content = null) { $return = $content; // sanity check: do not parse content if it doesnt contain html tags, or class-attributes (maybe .. ) if (!empty($return) && strpos($return, '<') !== false && strpos($return, '>') !== false) { $dom = new simple_html_dom(); $dom->load($return); /** * Comment buttons * @see http://codex.wordpress.org/Function_Reference/comment_form#Default_.24args_array * * Original code: // here for each comment reply link of wordpress jQuery( '.comment-reply-link' ).addClass( 'btn btn-primary' ); // here for the submit button of the comment reply form jQuery( '#commentsubmit' ).addClass( 'btn btn-primary' ); */ foreach ($dom->find('.comment-reply-link, #commentsubmit') as $elem) { if (!isset($elem->class)) { $elem->class = 'btn btn-primary'; } elseif (!empty($elem->class) != false && stripos($elem->class, 'btn btn-primary') === false) { $elem->class = trim($elem->class . ' btn btn-primary'); // better than .=, cause you never know ... might be an empty class="" construct ^_^ } } /** * there can only be ONE id .. ^_^(ID = _unique_ IDentifier) * NOTE: Redundant / obsolete. */ /* if( stripos($dom->find('#commentsubmit',0)->class, 'btn btn-primary' ) === false ) { $dom->find('#commentsubmit',0)->class = trim( $dom->find('#commentsubmit', 0)->class . ' btn btn-primary'); } */ $return = $dom->save(); } return $return; }
public function stripAttributes($html, $attribs) { $dom = new \simple_html_dom(); $dom->load($html); foreach ($attribs as $attrib) { foreach ($dom->find("*[{$attrib}]") as $e) { $e->{$attrib} = null; } } $dom->load($dom->save()); return $dom->save(); }
/** * Substitutes the gallery placeholder content with the gallery type frontend * view, returns a list of static resources that need to be loaded * @param string $content */ function substitute_placeholder_imgs($content) { // Load html into parser $doc = new simple_html_dom(); if ($content) { $doc->load($content); // Find all placeholder images $imgs = $doc->find("img[class='ngg_displayed_gallery']"); if ($imgs) { // Get some utilities $mapper = $this->get_registry()->get_utility('I_Displayed_Gallery_Mapper'); $router = $this->get_registry()->get_utility('I_Router'); // To match ATP entries we compare the stored url against a generic path // We must check HTTP and HTTPS as well as permalink and non-permalink forms $preview_url = parse_url($router->join_paths($router->remove_url_segment('index.php', $router->get_base_url()), '/nextgen-attach_to_post/preview')); $preview_url = preg_quote($preview_url['host'] . $preview_url['path'], '#'); $alt_preview_url = parse_url($router->join_paths($router->remove_url_segment('index.php', $router->get_base_url()), 'index.php/nextgen-attach_to_post/preview')); $alt_preview_url = preg_quote($alt_preview_url['host'] . $alt_preview_url['path'], '#'); // Substitute each image for the gallery type frontent content foreach ($imgs as $img) { // The placeholder MUST have a gallery instance id if (preg_match("#http(s)?://({$preview_url}|{$alt_preview_url})/id--(\\d+)#", $img->src, $match)) { // Find the displayed gallery $displayed_gallery_id = $match[3]; $displayed_gallery = $mapper->find($displayed_gallery_id, TRUE); // Get the content for the displayed gallery $content = '<p>' . _('Invalid Displayed Gallery') . '</p>'; if ($displayed_gallery) { $renderer = $this->get_registry()->get_utility('I_Displayed_Gallery_Renderer'); $content = $renderer->render($displayed_gallery, TRUE); } // Replace the placeholder with the displayed gallery content $img->outertext = $content; } } $content = (string) $doc->save(); } return $content; } }
function _pugpig_rewrite_pugpig_html_links($markup) { $html = new simple_html_dom(); $html->load($markup, false, false); $anchors = $html->find('a[href$="/' . PUGPIG_HTML_FILE_NAME . '"]'); foreach ($anchors as $anchor) { $anchor->href = url_create_deep_dot_url($anchor->href); } return $html->save(); }
public function action_sua($story_id) { $this->template->title = __('Sửa bài viết: xung - hợp cung'); $this->template->section_title = __('Sửa bài viết: xung - hợp cung'); $data = array(); $story = Model_Horoscope_XungHopBLL::getInstance()->find($story_id); if ($story) { if (Request::$method == 'POST') { // print_r($_POST); // die(); $post = $story->validate_update($_POST); if ($post->check()) { //begin save $post_values = $post->as_array(); $old_alias = $story->alias; // //alias changed => image changed => directory changed => images in content not get :( $story->hinh_anh = $post_values['hinh_anh']; $story->alias = $post_values['alias']; $story->cung_1 = $post_values['cung_1']; $story->cung_2 = $post_values['cung_2']; $story->tom_tat = trim($post_values['tom_tat']); $story->noi_dung = $post_values['noi_dung']; $story->kiem_tra = true; $story->save(); //print_r($img); // Create a DOM object if ($old_alias != $post_values['alias']) { //remove old folder (if existed when update) @rmdir('assets/horoscope/xung-hop/' . $old_alias . '/'); require_once Kohana::find_file('classes', 'vendor/simple_html_dom'); $html2 = new simple_html_dom(); $html2->load($story->noi_dung); $images = $html2->find('img'); if ($images) { $i = 1; foreach ($images as $image) { unset($image->onclick); $path = 'assets/horoscope/xung-hop/' . $story->alias . '/'; $file_name = 'anh_' . $i; $get_file = Vendor_Crawler::get_file_from_url_by_curl($image->src, $save_to_path = $path, $file_name_to_set = $file_name); if (filesize($get_file) == 0) { unset($image); } else { $image->src = '/' . $get_file; } $i++; } } $story->noi_dung = $html2->save(); $html2->clear(); unset($html2); $story->save(); } Request::instance()->redirect('admin/horoscope_xunghop/index'); } else { $_POST = $post->as_array(); #Affects errors for further display $data['errors'] = $post->errors(); } } $data['story'] = $story->toArray(); $this->template->content = View::factory('horoscope/admin/xung-hop/sua', $data); } else { Request::instance()->redirect('admin/horoscope_xunghop/index'); } }
error_reporting(E_ALL); include_once '../simple_html_dom.php'; $dir = './html/'; $files = array(array('name' => 'empty.htm', 'url' => ''), array('name' => 'smarty_1.htm', 'url' => 'guestbook.tpl'), array('name' => 'smarty_2.htm', 'url' => 'guestbook_form.tpl'), array('name' => 'google.htm', 'url' => 'http://www.google.com/'), array('name' => 'senate.htm', 'url' => 'http://www.senate.gov/legislative/LIS/roll_call_lists/roll_call_vote_cfm.cfm?congress=101&session=2&vote=00317'), array('name' => 'cyberciti.htm', 'url' => 'http://www.cyberciti.biz/tips/configure-ubuntu-grub-to-load-freebsd.html'), array('name' => 'myspace.htm', 'url' => 'http://www.myspace.com/'), array('name' => 'mootools.htm', 'url' => 'http://www.mootools.net/'), array('name' => 'jquery.htm', 'url' => 'http://jquery.com/'), array('name' => 'scriptaculo.htm', 'url' => 'http://script.aculo.us/'), array('name' => 'apache.htm', 'url' => 'http://www.apache.org/'), array('name' => 'microsoft.htm', 'url' => 'http://www.microsoft.com/'), array('name' => 'slashdot.htm', 'url' => 'http://www.slashdot.org/'), array('name' => 'ror.htm', 'url' => 'http://www.rubyonrails.org/'), array('name' => 'yahoo.htm', 'url' => 'http://www.yahoo.com/'), array('name' => 'phpbb.htm', 'url' => 'http://www.phpbb.com/'), array('name' => 'python.htm', 'url' => 'http://www.python.org/'), array('name' => 'lua.htm', 'url' => 'http://www.lua.org/'), array('name' => 'php.htm', 'url' => 'http://www.php.net/'), array('name' => 'ibm.htm', 'url' => 'http://www.ibm.com/'), array('name' => 'java.htm', 'url' => 'http://java.sun.com/'), array('name' => 'flickr.htm', 'url' => 'http://www.flickr.com/tour/upload/'), array('name' => 'amazon.htm', 'url' => 'http://www.amazon.com/'), array('name' => 'youtube.htm', 'url' => 'http://www.youtube.com/watch?v=kib05Ip6GSo&feature=bz302')); echo 'memory: ' . memory_get_usage() . '<br>'; $dom = new simple_html_dom(); foreach ($files as $f) { // get file from url if ($f['url'] != '') { file_put_contents($dir . $f['name'], file_get_contents($f['url'])); } else { file_put_contents($dir . $f['name'], ''); } $start = microtime(); $dom->load(file_get_contents($dir . $f['name']), false); list($eu, $es) = explode(' ', microtime()); list($bu, $bs) = explode(' ', $start); echo sprintf('(%.1f)', ((double) $eu + (double) $es - (double) $bu - (double) $bs) * 1000) . '<br>'; if (file_get_contents($dir . $f['name']) != $dom->save()) { echo "[<font color='red'>failed</font>] " . $f['name'] . "<br>"; $dom->save($dir . $f['name'] . '.error'); } else { echo "[success] " . $f['name'] . "<br>"; } echo 'memory: ' . memory_get_usage() . '<br>'; flush(); set_time_limit(0); } $dom->clear(); unset($dom); echo '<br>memory: ' . memory_get_usage() . '<br>';
/** * New method to parse page content navigating the dom and replacing found elements with modified HTML to acomodate LBP appropriate HTML * * @param mixed $content * @return mixed */ function lightboxPlusReplace($html_content, $unq_id) { global $post; if (!empty($this->lightboxOptions)) { $lightboxPlusOptions = $this->getAdminOptions($this->lightboxOptionsName); } /** * Remove following line after a few versions or 2.6 is the prevelent version */ $lightboxPlusOptions = $this->setMissingOptions($lightboxPlusOptions); $postGroupID = $post->ID; $postGroupTitle = $post->post_title; $html = new simple_html_dom(); $html->load($html_content, false, false); /** * Find all image links (text and images) * * If (autolightbox text links) then */ switch ($lightboxPlusOptions['text_links']) { case 1: foreach ($html->find('a[href*=jpg$], a[href*=gif$], a[href*=png$], a[href*=jpeg$], a[href*=bmp$]') as $e) { /** * Use Class Method is selected - yes/no */ switch ($lightboxPlusOptions['output_htmlv']) { case 1: $htmlv_prop = 'data-' . $lightboxPlusOptions['data_name']; switch ($lightboxPlusOptions['use_class_method']) { case 1: if ($e->class && $e->class != $lightboxPlusOptions['class_name']) { $e->class .= ' ' . $lightboxPlusOptions['class_name']; if (!$e->{$htmlv_prop}) { $e->{$htmlv_prop} = 'lightbox[' . $postGroupID . $unq_id . ']'; } } else { $e->class = $lightboxPlusOptions['class_name']; if (!$e->{$htmlv_prop}) { $e->{$htmlv_prop} = 'lightbox[' . $postGroupID . $unq_id . ']'; } } break; default: if (!$e->{$htmlv_prop}) { $e->{$htmlv_prop} = 'lightbox[' . $postGroupID . $unq_id . ']'; } break; } break; default: switch ($lightboxPlusOptions['use_class_method']) { case 1: if ($e->class && $e->class != $lightboxPlusOptions['class_name']) { $e->class .= ' ' . $lightboxPlusOptions['class_name']; if (!$e->rel) { $e->rel = 'lightbox[' . $postGroupID . $unq_id . ']'; } } else { $e->class = $lightboxPlusOptions['class_name']; if (!$e->rel) { $e->rel = 'lightbox[' . $postGroupID . $unq_id . ']'; } } break; default: if (!$e->rel) { $e->rel = 'lightbox[' . $postGroupID . $unq_id . ']'; } break; } break; } /** * Do Not Display Title is select - yes/no */ switch ($lightboxPlusOptions['no_display_title']) { case 1: $e->title = null; break; default: /** * If title doesn't exist then get a title * Set to caption title->image->post title by default then set to image title is exists */ if (!$e->title && $e->first_child()) { if ($e->first_child()->alt) { $e->title = $e->first_child()->alt; } else { $e->title = $postGroupTitle; } } /** * If use caption for title try to get the text from the caption - this could be wrong */ if ($lightboxPlusOptions['use_caption_title']) { if ($e->next_sibling()->class = 'wp-caption-text') { $e->title = $e->next_sibling()->innertext; } elseif ($e->parent()->next_sibling()->class = 'gallery-caption') { $e->title = $e->parent()->next_sibling()->innertext; } } break; } } break; default: /** * find all links with image only else if (do not autolightbox textlinks) then */ foreach ($html->find('a[href*=jpg$] img, a[href*=gif$] img, a[href*=png$] img, a[href*=jpeg$] img, a[href*=bmp$] img') as $e) { /** * Generate HTML5 yes/no */ switch ($lightboxPlusOptions['output_htmlv']) { case 1: $htmlv_prop = 'data-' . $lightboxPlusOptions['data_name']; switch ($lightboxPlusOptions['use_class_method']) { /** * Use Class Method is selected - yes/no */ case 1: if ($e->parent()->class && $e->parent()->class != $lightboxPlusOptions['class_name']) { $e->parent()->class .= ' ' . $lightboxPlusOptions['class_name']; if (!$e->parent()->{$htmlv_prop}) { $e->parent()->{$htmlv_prop} = 'lightbox[' . $postGroupID . $unq_id . ']'; } } else { $e->parent()->class = $lightboxPlusOptions['class_name']; if (!$e->parent()->{$htmlv_prop}) { $e->parent()->{$htmlv_prop} = 'lightbox[' . $postGroupID . $unq_id . ']'; } } break; default: if (!$e->parent()->{$htmlv_prop}) { $e->parent()->{$htmlv_prop} = 'lightbox[' . $postGroupID . $unq_id . ']'; } break; } break; default: switch ($lightboxPlusOptions['use_class_method']) { /** * Use Class Method is selected - yes/no */ case 1: if ($e->parent()->class && $e->parent()->class != $lightboxPlusOptions['class_name']) { $e->parent()->class .= ' ' . $lightboxPlusOptions['class_name']; if (!$e->parent()->rel) { $e->parent()->rel = 'lightbox[' . $postGroupID . $unq_id . ']'; } } else { $e->parent()->class = $lightboxPlusOptions['class_name']; if (!$e->parent()->rel) { $e->parent()->rel = 'lightbox[' . $postGroupID . $unq_id . ']'; } } break; default: if (!$e->parent()->rel) { $e->parent()->rel = 'lightbox[' . $postGroupID . $unq_id . ']'; } break; } break; } /** * Do Not Display Title is select - yes/no */ switch ($lightboxPlusOptions['no_display_title']) { case 1: $e->parent()->title = null; break; default: if (!$e->parent()->title) { if ($e->title) { $e->parent()->title = $e->title; } else { $e->parent()->title = $postGroupTitle; } } if ($lightboxPlusOptions['use_caption_title']) { //if ($e->parent()->next_sibling()->innertext) { $e->parent()->title = $e->parent()->next_sibling()->innertext; } //if ($e->parent()->next_sibling()->innertext) { $e->title = $e->parent()->next_sibling()->innertext; } if ($e->find('img[src*=jpg$], img[src*=gif$], img[src*=png$], img[src*=jpeg$], img[src*=bmp$]') && ($e->next_sibling()->class = 'wp-caption-text')) { $e->title = $e->next_sibling()->innertext; } elseif ($e->find('img[src*=jpg$], img[src*=gif$], img[src*=png$], img[src*=jpeg$], img[src*=bmp$]') && ($e->parent()->next_sibling()->class = 'gallery-caption')) { $e->title = $e->parent()->next_sibling()->innertext; } } break; } } break; } $content = $html->save(); $html->clear(); unset($html); return $content; }
<?php // $Rev: 115 $ // ----------------------------------------------------------------------------- // setup error_reporting(E_ALL); require_once '../simple_html_dom.php'; $dom = new simple_html_dom(); // ----------------------------------------------------------------------------- // empty test $str = ''; $dom->load($str); assert($dom->save() == $str); // ----------------------------------------------- $str = null; $dom->load($str); assert($dom->save() == $str); // ----------------------------------------------------------------------------- // text test $str = <<<HTML <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"></html> HTML; $dom->load($str); assert(count($dom->find('unknown')) == 1); assert(count($dom->find('text')) == 1); // ----------------------------------------------------------------------------- // string quote test $str = <<<HTML <div class="class0" id="id0" >
/** * Process the whole page output. Move link tags with an ID starting * with 'metaslider' into the <head> of the page. */ public function resource_manager($buffer) { // create dom document from buffer $html = new simple_html_dom(); // Load from a string $html->load($buffer, true, false); if (!$html->find('body link[id^="metaslider"]')) { return $buffer; } // selectors to find Meta Slider links $selectors = array('body link[id^="metaslider"]'); $selectors = apply_filters("metaslider_resource_manager_selectors", $selectors); if ($head = $html->find('head', 0)) { // move meta slider elemends to <head> foreach ($selectors as $selector) { foreach ($html->find($selector) as $element) { $head->innertext .= "\t" . $element->outertext . "\n"; $element->outertext = ''; } } } return $html->save(); }
function onBeforeDisplayContent( &$article, &$params, $limitstart ) { if(!file_exists(JPATH_SITE.DS.'plugins'.DS.'system'.DS.'myApiConnectFacebook.php') || (!array_key_exists('category',$article) && !isset($params->showK2Plugins) )){ return; } //this may fire fron a component other than com_content if(is_object($article) && (@$article->id != '') && (@$_POST['fb_sig_api_key'] == '') && class_exists('plgSystemmyApiConnect')) { JPlugin::loadLanguage( 'plg_content_myapicomment' , JPATH_ADMINISTRATOR ); $facebook = plgSystemmyApiConnect::getFacebook(); $xid = urlencode('articlecomment'.$article->id); require_once(JPATH_SITE.DS.'components'.DS.'com_content'.DS.'helpers'.DS.'route.php'); if(isset($article->slug)){ require_once(JPATH_SITE.DS.'components'.DS.'com_content'.DS.'helpers'.DS.'route.php'); $link = ContentHelperRoute::getArticleRoute($article->slug, $article->catslug, $article->sectionid); }elseif(method_exists('K2HelperRoute','getItemRoute')){ $link = K2HelperRoute::getItemRoute($article->id.':'.urlencode($article->alias),$article->catid.':'.urlencode($article->category->alias)); }else{ error_log('myApi unable to calculate link for the article id '.$article->id); return; } $u =& JURI::getInstance( JURI::base().$link ); $port = ($u->getPort() == '') ? '' : ":".$u->getPort(); $commentURL = 'http://'.$u->getHost().$port.$u->getPath().'?'.$u->getQuery(); $base = JURI::base(); $doc = & JFactory::getDocument(); JHTML::_('behavior.mootools'); $plugin = & JPluginHelper::getPlugin('content', 'myApiComment'); // Load plugin params info $myapiparama = new JParameter($plugin->params); $comment_sections = $myapiparama->get('comment_sections'); $comment_categories = $myapiparama->get('comment_categories'); $comments_show_on = $myapiparama->get('comments_show_on'); $comments_access = $myapiparama->get('comments_access'); $comments_width = $myapiparama->get('comments_width'); $comments_numposts = $myapiparama->get('comments_numposts'); $comments_scheme = $myapiparama->get('comments_scheme'); $comments_view_article = $myapiparama->get('comments_view_article'); $comments_view_list = $myapiparama->get('comments_view_list'); $comments_view_blog = $myapiparama->get('comments_view_blog'); $comment_show = false; if(isset($article->sectionid)) { if( is_array($comment_sections) ) { foreach($comment_sections as $id) { if($id == $article->sectionid) { $comment_show = true; } } } else{ if($comment_sections == $article->sectionid) { $comment_show = true; } } } if(isset($article->category)) { if( is_array($comment_categories) ) { foreach($comment_categories as $id) { if($id == $article->category) { $comment_show = true; } } } else{ if($comment_categories == $article->category) { $comment_show = true; } } } //After checking categories and sections reset to fasle is not in articel view $user = JFactory::getUser(); if($comments_access == '29'){ $hasAccess = true; }elseif($comments_access == '30'){ if(($user->gid == '23') || ($user->gid == '24') || ($user->gid == '25')) $hasAccess = true; } else{ if($user->gid >= $comments_access) $hasAccess = true; } if(($comments_access == $user->gid) || ($comments_access == '29') ) $hasAccess = true; if($comments_show_on == 'all') $comment_show = true; if($comment_show && $hasAccess ){ $comment_box = '<fb:comments app_id="'.$facebook->getAppId().'" migrated="1" xid="'.$xid.'" url="'.$commentURL.'" numposts="'.$comments_numposts.'" width="'.$comments_width.'" colorscheme="'.$comments_scheme.'"></fb:comments>'; $comment_link = "<br /><a id='".$xid."commentLink' class='' href='#'>".JText::_('ADD_COMMENT')."</a><br />"; $js = "window.addEvent('domready',function(){ $('".$xid."commentLink').addEvent('click',function(){ myApiModal.open(\"".JText::_('COMMENT_PROMPT')."\",null,\"<fb:comments app_id=\'".$facebook->getAppId()."\' migrated=\'1\' xid=\'".$xid."\' url=\'".$commentURL."\' numposts=\'5\' width=\'693\'></fb:comments>\"); }); });"; $viewType = null; if(JRequest::getVar('view','','get') == 'article'){ $viewType = ($comments_view_article == 1) ? 'box' : $viewType = 'link'; //Only add noscript comments for article view $cache = & JFactory::getCache('plgContentmyApiComment - Comments for SEO'); $cache->setCaching( 1 ); $cache->setLifeTime(60*60*24*2); $comments = $cache->call( array( 'plgContentmyApiComment', 'getComments'),$xid); $article->text .= "<noscript><h3>Comments for ".$article->title."</h3>".$comments."</noscript>"; }elseif((JRequest::getVar('layout','','get') == 'blog') || (JRequest::getVar('view','','get') == 'frontpage')){ $viewType = ($comments_view_blog == 1) ? 'box' : $viewType = 'link'; }else{ $viewType = ($comments_view_list == 1) ? 'box' : $viewType = 'link'; } require_once(JPATH_SITE.DS.'plugins'.DS.'system'.DS.'myApiDom.php'); $dom = new simple_html_dom(); $dom->load($article->text); $tableEl = $dom->find('.myApiShareBottom',0); if(!$tableEl){ $table = '<table class="myApiShareBottom myApiShareTable"></table>'; $article->text = $article->text.$table; $dom->load($article->text); } $commentEl = ($viewType == 'box') ? $comment_box : $comment_link; //$colspan = ($buttons = $dom->find('.myApiShareBottom',0)->find('.myApiButtons',0)) ? sizeof($buttons->find('td')) : 1; $tr = '<tr class="myApiComments"><td class="myApiCommentsCell">'.$commentEl.'</td></tr>'; $row = $dom->find('.myApiShareBottom',0); $row->innertext .= $tr; if($viewType == 'link'){ $doc->addScriptDeclaration($js); plgContentmyApiComment::addFbJs($xid); } $article->text = $dom->save(); $dom->clear(); unset($dom); } } }