/**
  * Wrap a CAS 2.0 XML response and output it as a string.
  *
  * @return string CAS 2.0+ server response as an XML string.
  */
 public function prepare()
 {
     $root = $this->createElement('serviceResponse');
     if (!empty($this->response)) {
         $root->appendChild($this->response);
     }
     // Removing all child nodes from response document:
     while ($this->document->firstChild) {
         $this->document->removeChild($this->document->firstChild);
     }
     $this->document->appendChild($root);
     return $this->document->saveXML();
 }
Example #2
0
/**
 * Tries to convert the given HTML into a plain text format - best suited for
 * e-mail display, etc.
 *
 * <p>In particular, it tries to maintain the following features:
 * <ul>
 *   <li>Links are maintained, with the 'href' copied over
 *   <li>Information in the &lt;head&gt; is lost
 * </ul>
 *
 * @param html the input HTML
 * @return the HTML converted, as best as possible, to text
 */
function convert_html_to_text($html, $width = 74)
{
    $html = fix_newlines($html);
    $doc = new DOMDocument('1.0', 'utf-8');
    if (strpos($html, '<?xml ') === false) {
        $html = '<?xml encoding="utf-8"?>' . $html;
    }
    # <?php (4vim)
    if (!@$doc->loadHTML($html)) {
        return $html;
    }
    // Thanks, http://us3.php.net/manual/en/domdocument.loadhtml.php#95251
    // dirty fix -- remove the inserted processing instruction
    foreach ($doc->childNodes as $item) {
        if ($item->nodeType == XML_PI_NODE) {
            $doc->removeChild($item);
            // remove hack
            break;
        }
    }
    $elements = identify_node($doc);
    // Add the default stylesheet
    $elements->getRoot()->addStylesheet(HtmlStylesheet::fromArray(array('html' => array('white-space' => 'pre'), 'p' => array('margin-bottom' => '1em'), 'pre' => array('white-space' => 'pre'))));
    $options = array();
    if (is_object($elements)) {
        $output = $elements->render($width, $options);
    } else {
        $output = $elements;
    }
    return trim($output);
}
 /**
  * {@inheritdoc}
  */
 public function parseHtml($html, $encoding = 'UTF-8')
 {
     $document = new \DOMDocument();
     foreach ($this->config as $name => $value) {
         $document->{$name} = $value;
     }
     $document->encoding = $encoding;
     if ($encoding !== false) {
         // Tell the parser which charset to use
         $encoding = $encoding ?: $document->encoding;
         $encoding = '<?xml encoding="' . $encoding . '" ?>';
         $html = $encoding . $html;
         // @codingStandardsIgnoreStart
         @$document->loadHTML($html);
         // @codingStandardsIgnoreEnd
         foreach ($document->childNodes as $item) {
             if ($item->nodeType == XML_PI_NODE) {
                 $document->removeChild($item);
             }
         }
     } else {
         // @codingStandardsIgnoreStart
         @$document->loadHTML($html);
         // @codingStandardsIgnoreEnd
     }
     return $document;
 }
Example #4
0
 static function fixChildrenClass($content, $elementType, $class, $fixRawHtml = true)
 {
     $classPath = '\\AbstractElement\\' . $elementType;
     // is this an object that extends AbstractElement?
     if (is_a($content, '\\Element')) {
         // is this of the right element type?
         if (is_a($content, $classPath)) {
             $content->addClass($class);
         }
         if (isset($content->contents)) {
             foreach ($content->contents as $index => $value) {
                 $content->contents[$index] = AbstractElement\Helper::fixChildrenClass($value, $elementType, $class);
             }
         }
         return $content;
     }
     if (is_string($content) && $fixRawHtml) {
         $dom = new \DOMDocument();
         $dom->loadHtml($content);
         $reflectionClass = new \ReflectionClass($classPath);
         $elements = $dom->getElementsByTagName($reflectionClass->getConstant('tag'));
         foreach ($elements as $element) {
             $element->setAttribute('class', $element->getAttribute('class') . ' ' . $class);
         }
         $dom->removeChild($dom->doctype);
         $dom->replaceChild($dom->firstChild->firstChild, $dom->firstChild);
         return $dom->saveHTML();
     }
 }
 function parse($content)
 {
     $doc = new DOMDocument();
     @$doc->loadHTML("<div>" . $content . "</div>");
     $links = $doc->getElementsByTagName('a');
     $blogurl = get_bloginfo("url");
     $components = parse_url($blogurl);
     $host = $components["host"];
     /* Set target attribute of all external links to "_blank" */
     foreach ($links as $link) {
         $href = $link->getAttribute("href");
         $components = parse_url($href);
         if (!isset($components["host"])) {
             continue;
         }
         if ($components["host"] != $host) {
             $link->setAttribute("target", "_blank");
         }
     }
     /*
      * Extract the HTML fragment.
      * Credits: http://stackoverflow.com/questions/29493678/loadhtml-libxml-html-noimplied-on-an-html-fragment-generates-incorrect-tags
      */
     $temporary_wrapper = $doc->getElementsByTagName('div')->item(0);
     $temporary_wrapper = $temporary_wrapper->parentNode->removeChild($temporary_wrapper);
     while ($doc->firstChild) {
         $doc->removeChild($doc->firstChild);
     }
     while ($temporary_wrapper->firstChild) {
         $doc->appendChild($temporary_wrapper->firstChild);
     }
     /* Return HTML */
     return trim($doc->saveHTML());
 }
Example #6
0
 function preg_replace_html($content, $tags)
 {
     $dom = new DOMDocument();
     if (!@$dom->loadHTML('<?xml encoding="UTF-8">' . $content)) {
         return $content;
     }
     foreach ($dom->childNodes as $item) {
         if ($item->nodeType === XML_PI_NODE) {
             $dom->removeChild($item);
             break;
         }
     }
     $dom->encoding = 'UTF-8';
     $images = $dom->getElementsByTagName('img');
     $blankImage = 'data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7';
     for ($i = $images->length - 1; $i >= 0; $i--) {
         $node = $images->item($i);
         $clone = $node->cloneNode();
         $noscript = $dom->createElement('noscript');
         $noscript->appendChild($clone);
         $node->parentNode->insertBefore($noscript, $node);
         $node->setAttribute('data-src', $node->getAttribute('src'));
         $node->setAttribute('src', $blankImage);
         $node->setAttribute('class', trim($node->getAttribute('class') . ' lazy'));
     }
     $newHtml = $dom->saveHTML();
     if (!$newHtml) {
         return $content;
     }
     return $newHtml;
 }
Example #7
0
 /**
  * Convert HTML into a textual representation
  *
  * @return string Text representation
  */
 public function convert()
 {
     if (!empty($this->text)) {
         return $this->text;
     }
     $html = $this->preprocess($this->html);
     $this->document = new DOMDocument();
     set_error_handler(array(__CLASS__, 'silence_errors'));
     $this->document->loadHTML($html);
     restore_error_handler();
     // Remove the DOCTYPE
     // Seems to cause segfaulting if we don't do this
     if ($this->document->firstChild instanceof DOMDocumentType) {
         $this->document->removeChild($this->document->firstChild);
     }
     $this->text = $this->parse_children($this->document->getElementsByTagName('body')->item(0));
     $this->text = preg_replace("#\n{3,}#", "\n\n", $this->text);
     $this->text = $this->wrap($this->text, 80);
     $this->text .= $this->generate_links();
     return $this->text;
 }
Example #8
0
 public function __construct($document, $tagname = '*')
 {
     if (!is_scalar($document)) {
         throw new Exception('Not a valid {JQMDoc} object');
     }
     $this->_namespace = microtime(true) . uniqid();
     jqm_var($this->_namespace, $this);
     //$document = preg_replace('/\s+/',' ',$document);
     //Detect if $document is a valid full document
     $hasHTML = stripos($document, '<html') !== false;
     $this->__documentMap = array();
     $DOM = new DOMDocument();
     $DOM->recover = true;
     $DOM->preserveWhiteSpace = true;
     $DOM->substituteEntities = true;
     $DOM->formatOutput = true;
     $DOM->encoding = 'utf-8';
     $DOM->loadHTML(mb_convert_encoding($document, 'HTML-ENTITIES', 'UTF-8'));
     $DOM->normalizeDocument();
     $html = $DOM->getElementsByTagName($tagname);
     //Determine root / pieced map
     $hasRoot = false;
     if ($html->item(0)->childNodes->length > 0) {
         $hasRoot = false;
         $html_tmp = $html;
         if ($html->item(0)->tagName == 'html') {
             $html_tmp = $html->item(0)->childNodes->item(0);
         }
         if (!$hasHTML and $html_tmp->childNodes->length == 1) {
             $hasRoot = true;
             if ($html_tmp->childNodes->item(0)->firstChild) {
                 $root = $html_tmp->childNodes->item(0)->firstChild->getNodePath();
             } else {
                 $root = $html_tmp->childNodes->item(0)->getNodePath();
             }
         }
     }
     $this->__schema['root'] = $hasRoot;
     $this->__schema['rootPath'] = $root;
     $this->__mapLength = false;
     $this->_length = false;
     $this->length = false;
     if ($DOM->doctype) {
         //$this->__schema['doctype'] = $DOM->saveHTML($DOM->doctype);
         $DOM->removeChild($DOM->doctype);
     }
     //$output = $DOM->saveHTML();
     //$this->__documentRaw = $output;
     $this->_DOM = $DOM;
     $this->_selector = $tagname;
 }
Example #9
0
    function hook_article_filter($article)
    {
        if (defined('NO_CURL') || !function_exists("curl_init")) {
            return $article;
        }
        $charset_hack = '<head>
			<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
		</head>';
        $doc = new DOMDocument();
        $doc->loadHTML($charset_hack . $article["content"]);
        $found = false;
        if ($doc) {
            $xpath = new DOMXpath($doc);
            $images = $xpath->query('(//img[@src])');
            foreach ($images as $img) {
                $src = $img->getAttribute("src");
                $ch = curl_init($src);
                curl_setopt($ch, CURLOPT_HEADER, 0);
                curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
                curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
                curl_setopt($ch, CURLOPT_RANGE, "0-32768");
                @($result = curl_exec($ch));
                $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
                if ($result && ($http_code == 200 || $http_code == 206)) {
                    $filename = tempnam(sys_get_temp_dir(), "ttsizecheck");
                    if ($filename) {
                        $fh = fopen($filename, "w");
                        if ($fh) {
                            fwrite($fh, $result);
                            fclose($fh);
                            @($info = getimagesize($filename));
                            if ($info && $info[0] > 0 && $info[1] > 0) {
                                $img->setAttribute("width", $info[0]);
                                $img->setAttribute("height", $info[1]);
                                $found = true;
                            }
                            unlink($filename);
                        }
                    }
                }
            }
            if ($found) {
                $doc->removeChild($doc->firstChild);
                //remove doctype
                $article["content"] = $doc->saveHTML();
            }
        }
        return $article;
    }
 /**
  * @param string $item
  * @param int $key
  * @return string
  */
 public function extProc_beforeAllWrap($item, $key)
 {
     if (!empty($item)) {
         $pageId = $this->I['uid'];
         $dom = new \DOMDocument();
         $dom->loadHTML(mb_convert_encoding($item, 'HTML-ENTITIES', 'UTF-8'));
         $link = $dom->getElementsByTagName('a');
         $item = $link->item(0);
         $dataAttribute = 'bwrk_onepage_' . $pageId;
         $classAttribute = $dom->createAttribute('data-bwrkonepage-id');
         $classAttribute->value = $dataAttribute;
         $item->appendChild($classAttribute);
         $dom->removeChild($dom->doctype);
         $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild);
         $newItem = $dom->saveHTML();
         return $newItem;
     }
 }
 /**
  * Append product recommendations to Autocomplete block html
  * 
  * @param string $html
  * @return string
  */
 protected function _appendTopRecommendations($html)
 {
     $recommendationsModel = Mage::getModel('autocompleterecommendations/recommendation');
     $query = Mage::helper('catalogsearch')->getQuery();
     $productRecommendationsHtml = $recommendationsModel->getProductRecommendationsHtml($query);
     if ($productRecommendationsHtml) {
         $dom = new DOMDocument('1.0', 'utf8');
         $dom->loadHTML($html);
         $uls = $dom->getElementsByTagName('ul');
         $ul = $uls->item(0);
         $productRecommendationsDom = $recommendationsModel->getRecommendationsDom($productRecommendationsHtml);
         $productRecommendationsDom = $dom->importNode($productRecommendationsDom, true);
         $ul->appendChild($productRecommendationsDom);
         $dom->removeChild($dom->doctype);
         $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild);
         $html = $dom->saveHTML();
     }
     return $html;
 }
Example #12
0
    function hook_article_filter($article)
    {
        if (!function_exists("curl_init") || ini_get("open_basedir")) {
            return $article;
        }
        $charset_hack = '<head>
			<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
		</head>';
        $doc = new DOMDocument();
        $doc->loadHTML($charset_hack . $article["content"]);
        $found = false;
        if ($doc) {
            $xpath = new DOMXpath($doc);
            $images = $xpath->query('(//img[contains(@src, \'media.tumblr.com\')])');
            foreach ($images as $img) {
                $src = $img->getAttribute("src");
                $test_src = preg_replace("/_\\d{3}.(jpg|gif|png)/", "_1280.\$1", $src);
                if ($src != $test_src) {
                    $ch = curl_init($test_src);
                    curl_setopt($ch, CURLOPT_TIMEOUT, 5);
                    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
                    curl_setopt($ch, CURLOPT_HEADER, true);
                    curl_setopt($ch, CURLOPT_NOBODY, true);
                    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
                    curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT);
                    @($result = curl_exec($ch));
                    $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
                    if ($result && $http_code == 200) {
                        $img->setAttribute("src", $test_src);
                        $found = true;
                    }
                }
            }
            if ($found) {
                $doc->removeChild($doc->firstChild);
                //remove doctype
                $article["content"] = $doc->saveHTML();
            }
        }
        return $article;
    }
Example #13
0
function fof_item_targets($content)
{
    /* quiet warnings */
    $old_xml_err = libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    /*
    	Load content into DOM, within a div wrapper.  Wrapper div will be
    	stripped before returning altered content.  Without doing this,
    	any bare text content would get wrapped in p elements while being
    	parsed in.
    */
    $dom->loadHtml('<div>' . mb_convert_encoding($content, 'HTML-ENTITIES', "UTF-8") . '</div>');
    /* strip <!DOCTYPE> which DOMDocument adds */
    $dom->removeChild($dom->firstChild);
    /* strip <html><body> which DOMDocument adds */
    $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild);
    /* replace or add link targets */
    $xpath = new DOMXpath($dom);
    foreach ($xpath->query('//a') as $node) {
        $node->setAttribute('target', '_blank');
    }
    $content_out = '';
    /* emit the updated contents inside our div */
    /* start at the first node inside first div.. */
    $node = $dom->firstChild->firstChild;
    while ($node) {
        $content_out .= $dom->saveHTML($node);
        /* repeat for all nodes at this level */
        $node = $node->nextSibling;
    }
    foreach (libxml_get_errors() as $error) {
        /* just ignore warnings */
        if ($error->level === LIBXML_ERR_WARNING) {
            continue;
        }
        fof_log(__FUNCTION__ . ': ' . $error->message);
    }
    libxml_clear_errors();
    libxml_use_internal_errors($old_xml_err);
    return $content_out;
}
Example #14
0
 public function create($replaceHtml = false)
 {
     $this->_html = file_get_contents($this->getViewFile());
     // this is relative root directory
     $dom = new \DOMDocument();
     //        libxml_use_internal_errors(true);
     $dom->loadHTML($this->_html);
     //        libxml_clear_errors();
     if ($replaceHtml) {
         // use this if we ever need to get rid of doc types or html tags that we for some reason get wrapped in when calling loadHTML
         // remove doc type
         $dom->removeChild($dom->firstChild);
         //  remove <html><body></body></html>
         $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild);
     }
     foreach ($this->getViewElements() as $viewElement) {
         $viewElement->replaceHtml($dom, $this->getViewElementValue());
     }
     $html = $dom->saveHTML();
     return $this->_html = trim($html);
 }
Example #15
0
 public function loadHtmlNoCharset($htmlString = '')
 {
     $dom = new DOMDocument('1.0', 'UTF-8');
     $dom->preserveWhiteSpace = false;
     if (strlen($htmlString)) {
         libxml_use_internal_errors(true);
         $dom->loadHTML('<?xml encoding="UTF-8">' . $htmlString);
         // dirty fix
         foreach ($dom->childNodes as $item) {
             if ($item->nodeType == XML_PI_NODE) {
                 $dom->removeChild($item);
                 // remove hack
                 break;
             }
         }
         $dom->encoding = 'UTF-8';
         // insert proper
         libxml_clear_errors();
     }
     $this->loadDom($dom);
 }
 function process(&$article)
 {
     $owner_uid = $article["owner_uid"];
     if (strpos($article["link"], "mrlovenstein.com") !== FALSE) {
         if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) {
             $doc = new DOMDocument();
             @$doc->loadHTML($article["content"]);
             if ($doc) {
                 $xpath = new DOMXPath($doc);
                 $entries = $xpath->query('(//img[@alt])');
                 $basenode = false;
                 foreach ($entries as $entry) {
                     // get image
                     $basenode = $entry->parentNode;
                     // add linebreak
                     $linebreak = $doc->createElement("br");
                     $basenode->appendChild($linebreak);
                     // add text
                     $alt = $entry->getAttribute("alt");
                     $textnode = $doc->createTextNode($alt);
                     $basenode->appendChild($textnode);
                     break;
                 }
                 if ($basenode) {
                     $doc->removeChild($doc->firstChild);
                     $article["content"] = $doc->saveHTML();
                     $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"];
                 }
             }
         } else {
             if (isset($article["stored"]["content"])) {
                 $article["content"] = $article["stored"]["content"];
             }
         }
         return true;
     }
     return false;
 }
Example #17
0
 public function testCreateCdbXMLGuideExample6Dot2()
 {
     $event = new CultureFeed_Cdb_Item_Event();
     $event->setAvailableFrom('2010-02-25T00:00:00');
     $event->setAvailableTo('2010-08-09T00:00:00');
     $event->setCdbId('ea37cae2-c91e-4810-89ab-e060432d2b78');
     $event->setCreatedBy('mverdoodt');
     $event->setCreationDate('2010-07-05T18:28:18');
     $event->setExternalId('SKB Import:SKB00001_216413');
     $event->setIsParent(FALSE);
     $event->setLastUpdated('2010-07-28T13:58:55');
     $event->setLastUpdatedBy('mverdoodt');
     $event->setOwner('SKB Import');
     $event->setPctComplete(80);
     $event->setPublished(TRUE);
     $event->setValidator('SKB');
     $event->setWfStatus('approved');
     $event->setAgeFrom(18);
     $event->setPrivate(FALSE);
     $calendar = new CultureFeed_Cdb_Data_Calendar_TimestampList();
     $calendar->add(new CultureFeed_Cdb_Data_Calendar_Timestamp('2010-08-01', '21:00:00.0000000'));
     $event->setCalendar($calendar);
     $categories = new CultureFeed_Cdb_Data_CategoryList();
     $categories->add(new CultureFeed_Cdb_Data_Category(CultureFeed_Cdb_Data_Category::CATEGORY_TYPE_EVENT_TYPE, '0.50.4.0.0', 'Concert'));
     $categories->add(new CultureFeed_Cdb_Data_Category(CultureFeed_Cdb_Data_Category::CATEGORY_TYPE_THEME, '1.8.2.0.0', 'Jazz en blues'));
     $categories->add(new CultureFeed_Cdb_Data_Category(CultureFeed_Cdb_Data_Category::CATEGORY_TYPE_PUBLICSCOPE, '6.2.0.0.0', 'Regionaal'));
     $event->setCategories($categories);
     $contactInfo = new CultureFeed_Cdb_Data_ContactInfo();
     $contactInfo->addMail(new CultureFeed_Cdb_Data_Mail('*****@*****.**', NULL, NULL));
     $contactInfo->addPhone(new CultureFeed_Cdb_Data_Phone('0487-62.22.31'));
     $url = new CultureFeed_Cdb_Data_Url('http://www.bonnefooi.be');
     $url->setMain();
     $contactInfo->addUrl($url);
     $event->setContactInfo($contactInfo);
     $details = new CultureFeed_Cdb_Data_EventDetailList();
     $detailNl = new CultureFeed_Cdb_Data_EventDetail();
     $detailNl->setLanguage('nl');
     $detailNl->setTitle('The Bonnefooi Acoustic Jam');
     $detailNl->setCalendarSummary('zo 01/08/10 om 21:00');
     $performers = new CultureFeed_Cdb_Data_PerformerList();
     $performers->add(new CultureFeed_Cdb_Data_Performer('Muzikant', 'Matt, the Englishman in Brussels'));
     $detailNl->setPerformers($performers);
     $detailNl->setLongDescription('Weggelaten voor leesbaarheid...');
     $file = new CultureFeed_Cdb_Data_File();
     $file->setMain();
     $file->setCopyright('Bonnefooi');
     $file->setHLink('http://www.bonnefooi.be/images/sized/site/images/uploads/Jeroen_Jamming-453x604.jpg');
     $file->setMediaType(CultureFeed_Cdb_Data_File::MEDIA_TYPE_IMAGEWEB);
     $file->setTitle('Jeroen Jamming');
     $detailNl->getMedia()->add($file);
     $price = new CultureFeed_Cdb_Data_Price(0);
     $price->setTitle('The Bonnefooi Acoustic Jam');
     $detailNl->setPrice($price);
     $detailNl->setShortDescription('Korte omschrijving.');
     $details->add($detailNl);
     $detailEn = new CultureFeed_Cdb_Data_EventDetail();
     $detailEn->setLanguage('en');
     $detailEn->setShortDescription('Short description.');
     $details->add($detailEn);
     $event->setDetails($details);
     // @todo Add headings.
     //$headings = array();
     $event->addKeyword('Free Jazz, Acoustisch');
     $address = new CultureFeed_Cdb_Data_Address();
     $physicalAddress = new CultureFeed_Cdb_Data_Address_PhysicalAddress();
     $physicalAddress->setCity('Brussel');
     $physicalAddress->setCountry('BE');
     $physicalAddress->setHouseNumber(8);
     $physicalAddress->setStreet('Steenstraat');
     $physicalAddress->setZip(1000);
     $address->setPhysicalAddress($physicalAddress);
     $location = new CultureFeed_Cdb_Data_Location($address);
     $location->setLabel('Café Bonnefooi');
     $location->setCdbid('920e9755-94a0-42c1-8c8c-9d17f693d0be');
     $event->setLocation($location);
     $organiser = new CultureFeed_Cdb_Data_Organiser();
     $organiser->setLabel('Café Bonnefooi');
     $event->setOrganiser($organiser);
     $languages = new CultureFeed_Cdb_Data_LanguageList();
     $languages->add(new CultureFeed_Cdb_Data_Language('Nederlands', CultureFeed_Cdb_Data_Language::TYPE_SPOKEN));
     $languages->add(new CultureFeed_Cdb_Data_Language('Frans', CultureFeed_Cdb_Data_Language::TYPE_SPOKEN));
     $event->setLanguages($languages);
     $dom = new DOMDocument('1.0', 'UTF-8');
     $dom->preserveWhiteSpace = FALSE;
     $dom->formatOutput = TRUE;
     $dummy_element = $dom->createElementNS(CultureFeed_Cdb_Xml::namespaceUri(), 'cdbxml');
     $dom->appendChild($dummy_element);
     $event->appendToDOM($dummy_element);
     $xpath = new DOMXPath($dom);
     $items = $xpath->query('//event');
     $this->assertEquals(1, $items->length);
     $event_element = $items->item(0);
     $dom->removeChild($dummy_element);
     $dom->appendChild($event_element);
     /*$namespaceAttribute = $dom->createAttribute('xmlns');
       $namespaceAttribute->value = CultureFeed_Cdb_Xml::namespaceUri();
       $event_element->appendChild($namespaceAttribute);*/
     // @todo Put xmlns attribute first.
     $xml = $dom->saveXML();
     $sample_dom = new DOMDocument('1.0', 'UTF-8');
     $contents = file_get_contents($this->samplePath('cdbxml-guide-example-6-2.xml'));
     $contents = str_replace('xmlns="http://www.cultuurdatabank.com/XMLSchema/CdbXSD/3.2/FINAL" ', '', $contents);
     $sample_dom->preserveWhiteSpace = FALSE;
     $sample_dom->formatOutput = TRUE;
     $sample_dom->loadXML($contents);
     $sample_dom->preserveWhiteSpace = FALSE;
     $sample_dom->formatOutput = TRUE;
     $expected_xml = $sample_dom->saveXML();
     //$this->assertEquals($sample_dom->documentElement->C14N(), $dom->documentElement->C14N());
     $this->assertEquals($expected_xml, $xml);
 }
Example #18
0
 /**
  * log click to temp file and return whether it was a double click or not
  *
  * @param $ip ip of client
  * @param $documentId id of documents table
  * @param $fileId id of document_files table
  * @return bool is it a double click
  */
 public function logClick($documentId, $fileId, $time)
 {
     $ip = '';
     if (array_key_exists('REMOTE_ADDR', $_SERVER)) {
         $ip = $_SERVER['REMOTE_ADDR'];
     }
     $registry = Zend_Registry::getInstance();
     $tempDir = $registry->get('temp_dir');
     //initialize log data
     $md5Ip = "h" . md5($ip);
     //TODO determine file type of file id
     $filetype = 'pdf';
     $dom = new DOMDocument();
     if (file_exists($tempDir . '~localstat.xml') === FALSE) {
         $xmlAccess = $dom->createElement('access');
         $dom->appendChild($xmlAccess);
     } else {
         $dom->load($tempDir . '~localstat.xml');
     }
     $xmlAccess = $dom->getElementsByTagName("access")->item(0);
     if (is_null($xmlAccess)) {
         $message = 'Error loading click-log "' . $tempDir . '~localstat.xml"';
         throw new Opus_Model_Exception($message);
     }
     //if global file access timestamp too old, the whole log file can be removed
     $xmlTime = $dom->getElementsByTagName("time")->item(0);
     if ($xmlTime != null && $time - $xmlTime->nodeValue > max($this->doubleClickIntervalHtml, $this->doubleClickIntervalPdf)) {
         $xmlAccess = $dom->getElementsByTagName("access")->item(0);
         $dom->removeChild($xmlAccess);
         $xmlAccess = $dom->createElement('access');
         $dom->appendChild($xmlAccess);
     }
     $xmlTime = $xmlAccess->getElementsByTagName('time')->item(0);
     if ($xmlTime != null) {
         $xmlAccess->removeChild($xmlTime);
     }
     $xmlTime = $dom->createElement('time', $time);
     $xmlAccess->appendChild($xmlTime);
     //get document id, create if not exists
     $xmlDocumentId = $dom->getElementsByTagName('document' . $documentId)->item(0);
     if ($xmlDocumentId == null) {
         $xmlDocumentId = $dom->createElement('document' . $documentId);
         $xmlAccess->appendChild($xmlDocumentId);
     }
     //get ip node
     $xmlIp = $xmlDocumentId->getElementsByTagName($md5Ip)->item(0);
     if ($xmlIp == null) {
         $xmlIp = $dom->createElement($md5Ip);
         $xmlDocumentId->appendChild($xmlIp);
     }
     //get file id, create if not exists
     $xmlFileId = $xmlIp->getElementsByTagName('file' . $fileId)->item(0);
     if ($xmlFileId == null) {
         $xmlFileId = $dom->createElement('file' . $fileId);
         $xmlIp->appendChild($xmlFileId);
     }
     //read last Access for this file id
     $fileIdTime = $xmlFileId->getAttribute('lastAccess');
     $doubleClick = false;
     if ($fileIdTime == null || $time - $fileIdTime > max($this->doubleClickIntervalHtml, $this->doubleClickIntervalPdf)) {
         /*no lastAccess set (new entry for this id) or lastAccess too far away
           -> create entry with actual time -> return no double click*/
     } else {
         if ($time - $fileIdTime <= $this->doubleClickIntervalHtml && ($filetype == 'html' || $fileId == -1)) {
             //html file double click
             $doubleClick = true;
         } else {
             if ($time - $fileIdTime <= $this->doubleClickIntervalPdf && $filetype == 'pdf' && $fileId != -1) {
                 //pdf file double click
                 $doubleClick = true;
             }
         }
     }
     $xmlFileId->setAttribute('lastAccess', $time);
     $return = $dom->save($tempDir . '~localstat.xml');
     if ($return === false) {
         $message = 'Error saving click-log "' . $tempDir . '~localstat.xml"';
         throw new Opus_Model_Exception($message);
     }
     return $doubleClick;
 }
Example #19
0
    $set->appendChild($node);
    $filename = "{$ac['srcdir']}/.manual.{$ac['PARTIAL']}.xml";
    $dom->save($filename);
    echo "done.\n";
    echo "Partial manual saved to {$filename}. To build it, run 'phd -d {$filename}'\n";
    exit(0);
}
// }}}
$mxml = $ac["OUTPUT_FILENAME"];
if ($dom->validate()) {
    echo "done.\n";
    printf("\nAll good. Saving %s... ", basename($ac["OUTPUT_FILENAME"]));
    flush();
    if ($ac["SEGFAULT_SPEED"] == "yes") {
        $t = $dom->doctype;
        $dom->removeChild($t);
    }
    $dom->save($mxml);
    echo "done.\n";
    echo "All you have to do now is run 'phd -d {$mxml}'\n";
    echo "If the script hangs here, you can abort with ^C.\n";
    echo <<<CAT
         _ _..._ __
        \\)`    (` /
         /      `\\
        |  d  b   |
        =\\  Y    =/--..-="````"-.
          '.=__.-'               `\\
             o/                 /\\ \\
              |                 | \\ \\   / )
               \\    .--""`\\    <   \\ '-' /
 /**
  * Add language name to slug
  *
  * @param $template
  * @param $string
  * @param $lang
  *
  * @return string
  */
 private function add_language_name_to_images($template, $string, $lang)
 {
     $doc = new DOMDocument();
     $loaded = @$doc->loadHTML('<div>' . $string . '</div>');
     //dirty hack with <div> to avoid additional <p> for text without tags
     if (!$loaded) {
         return $string;
     }
     $images = $doc->getElementsByTagName('img');
     foreach ($images as $image) {
         if ($image->hasAttribute('alt')) {
             $image->setAttribute('alt', wpml_ctt_prepare_string($template, $image->getAttribute('alt'), $lang));
         }
         if ($image->hasAttribute('title')) {
             $image->setAttribute('title', wpml_ctt_prepare_string($template, $image->getAttribute('title'), $lang));
         }
     }
     // removes doctype
     $doc->removeChild($doc->firstChild);
     // removes html, body and div tags
     $result = str_replace(array('<html>', '</html>', '<body><div>', '</div></body>'), array('', '', '', ''), $doc->saveHTML());
     return $result;
 }
 /**
  * Removes dom nodes, eg: <script> elements
  *
  * @param $html
  * @param $xpathString
  * @return string
  */
 private function removeDomNodes($html, $xpathString)
 {
     $dom = new DOMDocument();
     // Libxml constants not available on all servers (Libxml < 2.7.8)
     // $html->loadHTML($content, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
     $dom->loadHtml('<div class="form-group">' . $html . '</div>');
     # remove <!DOCTYPE
     $dom->removeChild($dom->doctype);
     # remove <html><body></body></html>
     $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild);
     // remove the required node
     $xpath = new DOMXPath($dom);
     while ($node = $xpath->query($xpathString)->item(0)) {
         $node->parentNode->removeChild($node);
     }
     return $dom->saveHTML();
 }
Example #22
0
 /**
  * Return a document object based on a string containing the contents of
  * a web page
  *
  * @param string $page   a web page
  *
  * @return object  document object
  */
 static function dom($page)
 {
     /*
         first do a crude check to see if we have at least an <html> tag
         otherwise try to make a simplified html document from what we got
     */
     if (!stristr($page, "<html")) {
         $head_tags = "<title><meta><base>";
         $head = strip_tags($page, $head_tags);
         $body_tags = "<frameset><frame><noscript><img><span><b><i><em>" . "<strong><h1><h2><h3><h4><h5><h6><p><div>" . "<a><table><tr><td><th><dt><dir><dl><dd>";
         $body = strip_tags($page, $body_tags);
         $page = "<html><head>{$head}</head><body>{$body}</body></html>";
     }
     $dom = new DOMDocument();
     //this hack modified from php.net
     @$dom->loadHTML('<?xml encoding="UTF-8">' . $page);
     foreach ($dom->childNodes as $item) {
         if ($item->nodeType == XML_PI_NODE) {
             $dom->removeChild($item);
         }
     }
     // remove hack
     $dom->encoding = "UTF-8";
     // insert proper
     return $dom;
 }
 public function stripExtra($xpath = null)
 {
     if (!empty($xpath)) {
         $tempDOM = new DOMDocument();
         @$tempDOM->loadHTML('<?xml encoding="UTF-8">' . $this->content);
         // dirty fix
         foreach ($tempDOM->childNodes as $item) {
             if ($item->nodeType == XML_PI_NODE) {
                 $tempDOM->removeChild($item);
             }
             // remove hack
         }
         $tempDOM->encoding = 'UTF-8';
         // insert proper
         $tempXPath = new DOMXPath($tempDOM);
         $temp_content = $tempXPath->query($xpath)->item(0);
         $temp_content = $tempDOM->saveHTML($temp_content);
         $newTempDOM = new DOMDocument();
         @$newTempDOM->loadHTML('<?xml encoding="UTF-8">' . $temp_content);
         // dirty fix
         foreach ($newTempDOM->childNodes as $item) {
             if ($item->nodeType == XML_PI_NODE) {
                 $newTempDOM->removeChild($item);
             }
             // remove hack
         }
         $newTempDOM->encoding = 'UTF-8';
         // insert proper
         $this->content = $newTempDOM->saveHTML();
         return true;
     }
     return false;
 }
Example #24
0
 private function fixEncoding($html)
 {
     $doc = new \DOMDocument();
     @$doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     foreach ($doc->childNodes as $item) {
         if ($item->nodeType == XML_PI_NODE) {
             $doc->removeChild($item);
         }
     }
     $doc->encoding = 'UTF-8';
     return $doc->saveHTML();
 }
Example #25
0
 private function parse($content)
 {
     $extension = $this->getExtension();
     $path = $this->getPath();
     $parseVariable = $this->isParseVariable();
     $options = $this->getOptions();
     /*
      * Everything inside a <% IGNORE %>...<% IGNOER_END %> block
      * will be ignored from being parsed
      */
     $ignoreBlock = array();
     $content = preg_replace_callback('@(<% *IGNORE *%>(.*?)<% *IGNORE_END *%>)@s', function ($matches) use(&$ignoreBlock) {
         $ignoreBlock[] = $matches[2];
         return '<!--SimPHPfyIgnoreBlock#' . count($ignoreBlock) . '#SimPHPfyIgnoreBlock-->';
     }, $content);
     /* 
      * Code block is in the form <% code %>, code can be any valid
      * PHP statements.
      * A special form of code block is <%= ${variable_name} %> 
      * which is equivalent to echo a variable
      * 
      * Every code snippet is stored inside the $codeBlock Array to
      * prevent variable parsing from corrupting the whole document
      */
     $codeBlock = array();
     $content = preg_replace_callback('@(<%= *(.*?[^\\\\]) *%>)@s', function ($matches) use(&$codeBlock) {
         $codeSnippet = $matches[2];
         if (preg_match('@^\\${([a-zA-Z_\\x7f-\\xff][\\[\\]\'"a-zA-Z0-9_\\x7f-\\xff*]*(\\[[\'"][a-zA-Z0-9_\\x7f-\\xff*]*[\'"]\\])*)}$@', $codeSnippet)) {
             $codeSnippet = "<?php if (isset({$codeSnippet})) { echo {$codeSnippet}; } ?>";
         } elseif (preg_match('@^\\$([a-zA-Z_\\x7f-\\xff][a-zA-Z0-9_\\x7f-\\xff*]*(\\[[\'"][a-zA-Z0-9_\\x7f-\\xff*]*[\'"]\\])*)$@', $codeSnippet)) {
             $codeSnippet = "<?php if (isset({$codeSnippet})) { echo {$codeSnippet}; } ?>";
         }
         $codeBlock[] = $codeSnippet;
         return '<!--SimPHPfyCodeBlock#' . count($codeBlock) . '#SimPHPfyCodeBlock-->';
     }, $content);
     $content = preg_replace_callback('@(<% *(.*?[^\\\\]) *%>)@s', function ($matches) use(&$codeBlock, $path) {
         $codeSnippet = $matches[2];
         if ($codeSnippet == 'IGNORE_MINIFY' || $codeSnippet == 'IGNORE_MINIFY_END') {
             return $matches[0];
         } elseif (preg_match('@^render.*$@', $codeSnippet)) {
             /*
              * Layout rendering
              */
             $codeSnippet = '<' . $codeSnippet . ' />';
             $dom = new DOMDocument();
             @$dom->loadXML($codeSnippet);
             /*
              * extract the attributes of render tag
              */
             $render = $dom->getElementsByTagName('render')->item(0);
             // <% render file='header.html' directory='' static='' dynamic='' %>
             $file = $directory = $dynamic = $format = $parsedDirectory = '';
             if (($file = $render->getAttribute('file')) == '') {
                 throw new InvalidTemplateException(array($path, 'Missing `file` for layout rendering'));
             }
             $directory = ASSERT;
             $parsedDirectory = TEMP_VIEW . 'Assert' . DS;
             if ($dirAttr = $render->getAttribute('directory') != '') {
                 $directory .= $dirAttr . DS;
                 $parsedDirectory = TEMP_VIEW . $dirAttr . DS;
             }
             /*
              * If `controller` attribute is spceified, the layout is 
              * inside the View/:Controller
              */
             if (($controllerAttr = $render->getAttribute('controller')) != '') {
                 $directory = VIEW . $controllerAttr . DS;
                 $parsedDirectory = TEMP_VIEW . $controllerAttr . DS;
             }
             if (($staticAttr = $render->getAttribute('static')) != '') {
                 $dynamic = !$staticAttr;
             }
             /*
              * `dynamic` attribute always overwrite `static` attribute, 
              * thought it is not recommended to set both attributes
              */
             if (($dynamicAttr = $render->getAttribute('dynamic')) != '') {
                 $dynamic = $dynamicAttr;
             }
             /*
              * The behaviour of render. When a render code is parsed, 
              * should it return a URL to the rendered file or directly 
              * render that file, possiblities: 'url' | 'direct'
              * default: 'direct'
              */
             if (($behaviourAttr = $render->getAttribute('behaviour')) == '') {
                 $behaviour = 'direct';
             } else {
                 $behaviour = $behaviourAttr;
             }
             $format = $render->getAttribute('format');
             $optionArray = 'array(';
             $i = 0;
             if ($format != '') {
                 $optionArray .= "'format' => {$format}";
                 $i++;
             }
             if ($parsedDirectory != '') {
                 $optionArray .= ($i++ == 0 ? '' : ', ') . "'parsedDirectory' => '{$parsedDirectory}'";
             }
             if ($dynamic !== '') {
                 $optionArray .= ($i++ == 0 ? '' : ', ') . '\'dynamic\' => ' . ($dynamic ? 'TRUE' : 'FALSE');
             } else {
                 $dynamic = TRUE;
             }
             $optionArray .= ')';
             $extension = substr(strrchr($file, "."), 1);
             if ($behaviour == 'direct' || $dynamic) {
                 if ($extension == 'html') {
                     $codeSnippet = "<?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?>";
                 } elseif ($extension == 'js') {
                     $codeSnippet = "<script><?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?></script>";
                 } elseif ($extension == 'css') {
                     $codeSnippet = "<style><?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?></style>";
                 }
             } else {
                 if ($extension == 'html') {
                     $codeSnippet = "<?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?>";
                 } else {
                     $optionArray = substr($optionArray, 0, -1) . ($i++ == 0 ? '' : ', ') . '\'relative\' => TRUE)';
                     if ($extension == 'js') {
                         $codeSnippet = "<script src=\"<?php echo Template::render('{$file}', '{$directory}', {$optionArray}); ?>\"></script>";
                     } elseif ($extension == 'css') {
                         $codeSnippet = "<link rel=\"stylesheet\" href=\"<?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?>\" />";
                     }
                 }
             }
         } else {
             $codeSnippet = '<?php ' . str_replace('\\%>', '%>', $codeSnippet) . ' ?>';
         }
         $codeBlock[] = $codeSnippet;
         return '<!--SimPHPfyCodeBlock#' . count($codeBlock) . '#SimPHPfyCodeBlock-->';
     }, $content);
     /* 
      * Perform HTML specificied parsing
      */
     if ($extension == 'html') {
         $htmlBlock = array();
         /* 
          * gurantee the form tag is not inside a comment by examining the 
          * DOM Tree instead of pure regular expression
          */
         $dom = new DOMDocument();
         @$dom->loadHTML($content);
         $forms = $dom->getElementsByTagName('form');
         for ($i = 0; $i < $forms->length; $i++) {
             /*
              * Re-create the DOM because the previous iteration have
              * changed the content of $content
              */
             $dom = new DOMDocument();
             @$dom->loadHTML($content);
             $form = $dom->getElementsByTagName('form')->item($i);
             if ($form->textContent == '') {
                 /*
                  * Create dummy block to make the form tag recognizable
                  */
                 $dummyString = '<!--SimPHPfyDummyBlock#' . $i . '#SimPHPfyDummyBlock-->';
                 $textNode = $dom->createTextNode($dummyString);
                 $form->appendChild($textNode);
                 $content = $dom->saveHTML();
                 $pattern = '@(< *form.*?>)(' . $dummyString . ')@';
             } else {
                 /*
                  * Save the form HTML content into the $htmlBlock
                  */
                 $htmlContent = '';
                 while ($form->hasChildNodes()) {
                     /*
                      * Loop through the child to skip the parent form tag
                      * from including into the stored $htmlBlock
                      */
                     $node = $form->childNodes->item(0);
                     $htmlContent .= $dom->saveXML($node);
                     $form->removeChild($node);
                 }
                 $htmlBlock[] = $htmlContent;
                 $commentNode = $dom->createComment('SimPHPfyHTMLBlock#' . count($htmlBlock) . '#SimPHPfyHTMLBlock');
                 $htmlBlockString = '<!--SimPHPfyHTMLBlock#' . count($htmlBlock) . '#SimPHPfyHTMLBlock-->';
                 $form->appendChild($commentNode);
                 $content = $dom->saveHTML();
                 $pattern = '@(< *form.*?>)(' . $htmlBlockString . ')@';
             }
             $content = preg_replace_callback($pattern, function ($matches) use($options, $path) {
                 $formDom = new DOMDocument();
                 @$formDom->loadHTML($matches[1]);
                 $formTags = @$formDom->getElementsByTagName('form');
                 $formTag = $formTags->item(0);
                 $controller = $action = '';
                 /*
                  * data-controller and data-action attributes determine which 
                  * controller and action the form should send to
                  * 
                  * A table of the possible values of data-* are as followed:
                  * data-controller  data-action     result
                  * String           String          $contoller/$action
                  * Omitted          String          :Controller/$action
                  * String           Omitted         Disallowed
                  * Omitted          Omitted         :Controller/:Action
                  */
                 if ($formTag->getAttribute('data-helper') == 'simphpfy') {
                     if ($formTag->getAttribute('data-controller') == '') {
                         if ($formTag->getAttribute('data-action') == '') {
                             if (isset($options['controller'])) {
                                 $controller = $options['controller']->getController();
                                 $action = $options['controller']->getAction();
                             } else {
                                 throw new InvalidTemplateException(array($path, 'missing controller and/or action for form helper'));
                             }
                         } else {
                             if (isset($options['controller'])) {
                                 $controller = $options['controller']->getController();
                                 $action = $formTag->getAttribute('data-action');
                             } else {
                                 throw new InvalidTemplateException(array($path, 'missing controller for form helper'));
                             }
                         }
                     } else {
                         if ($formTag->getAttribute('data-action') == '') {
                             throw new InvalidTemplateException(array($path, 'malformed form helper'));
                         } else {
                             $controller = $formTag->getAttribute('data-controller');
                             $action = $formTag->getAttribute('data-action');
                         }
                     }
                     if ($controller && $action) {
                         $actionAttr = DIRECTORY_PREFIX . $controller . DS . $action;
                         if (($id = $formTag->getAttribute('data-id')) != '') {
                             $actionAttr .= DS . $id;
                         }
                         $formTag->setAttribute('action', $actionAttr);
                     } else {
                         throw new InvalidTemplateException(array($path, 'malformed form helper'));
                     }
                     // check if method is PUT or DELETE
                     if ($formTag->getAttribute('data-method') != '') {
                         $method = strtoupper($formTag->getAttribute('data-method'));
                         if ($method == "PUT" || $method == "DELETE") {
                             $formTag->setAttribute('method', 'POST');
                         } elseif ($method == 'GET' || $method == 'POST') {
                             $formTag->setAttribute('method', $method);
                         }
                         $inputMethod = $formDom->createElement('input');
                         $inputMethod->setAttribute('type', 'hidden');
                         $inputMethod->setAttribute('name', '_method');
                         $inputMethod->setAttribute('value', $method);
                         $formTag->appendChild($inputMethod);
                     }
                     // remove <!DOCTYPE
                     $formDom->removeChild($formDom->doctype);
                     // remove <html><body></body></html>
                     $formDom->replaceChild($formDom->firstChild->firstChild->firstChild, $formDom->firstChild);
                     return str_replace('</form>', '', $formDom->saveHTML()) . $matches[2];
                 } else {
                     return $matches[0];
                 }
             }, $content);
         }
         $content = preg_replace_callback('@<!--SimPHPfyHTMLBlock#([0-9]+)#SimPHPfyHTMLBlock-->@', function ($matches) use($htmlBlock) {
             return $htmlBlock[(int) $matches[1] - 1];
         }, $content);
         $dom = new DOMDocument();
         @$dom->loadHTML($content);
         $this->parseInput($dom->getElementsByTagName('input'), $codeBlock);
         $this->parseInput($dom->getElementsByTagName('select'), $codeBlock);
         $this->parseInput($dom->getElementsByTagName('textarea'), $codeBlock);
         $content = $dom->saveHTML();
     }
     if ($parseVariable) {
         $content = preg_replace('@([^\\\\]|]^)\\${([a-zA-Z_\\x7f-\\xff][a-zA-Z0-9_\\x7f-\\xff]*)}\\$@', '<?php echo $\\2; ?>', $content);
         $content = preg_replace('@([^\\\\]|^)\\$([a-zA-Z_\\x7f-\\xff][a-zA-Z0-9_\\x7f-\\xff]*)\\$@', '<?php echo $\\2; ?>', $content);
     }
     $content = preg_replace_callback('@<!--SimPHPfyIgnoreBlock#([0-9]+)#SimPHPfyIgnoreBlock-->@', function ($matches) use($ignoreBlock) {
         return $ignoreBlock[(int) $matches[1] - 1];
     }, $content);
     $content = preg_replace_callback('@<!--SimPHPfyCodeBlock#([0-9]+)#SimPHPfyCodeBlock-->@', function ($matches) use($codeBlock) {
         return $codeBlock[(int) $matches[1] - 1];
     }, $content);
     $content = preg_replace('@<!--SimPHPfyDummyBlock#([0-9]+)#SimPHPfyDummyBlock-->@', '', $content);
     return $content;
 }
Example #26
0
File: Doc.php Project: h0gar/xpath
 /**
  * Convert xml/html code to a DOMXpath object.
  * 
  * @param String code The html/xml code to be parsed.
  * @param String type html|xml
  * @param String encoding
  * 
  * @return \DOMXPath
  */
 protected static function toXpath($code, $type = 'html', $encoding = 'UTF-8')
 {
     $doc = new \DOMDocument();
     if ($type == 'xml') {
         @$doc->loadXML('<?xml encoding="' . $encoding . '">' . $code);
     } else {
         @$doc->loadHTML('<?xml encoding="' . $encoding . '">' . $code);
     }
     foreach ($doc->childNodes as $item) {
         if ($item->nodeType == XML_PI_NODE) {
             $doc->removeChild($item);
         }
         #remove encoding node
     }
     return new \DOMXPath($doc);
 }
Example #27
0
[expect]
root
nodeType: 1
child
nodeType: 1

[file]
<?php 
/* Node is preserved from removeChild */
$dom = new DOMDocument();
$dom->loadXML('<root><child/></root>');
$xpath = new DOMXpath($dom);
$node = $xpath->query('/root')->item(0);
echo $node->nodeName . "\n";
$dom->removeChild($GLOBALS['dom']->firstChild);
echo "nodeType: " . $node->nodeType . "\n";
/* Node gets destroyed during removeChild */
$dom->loadXML('<root><child/></root>');
$xpath = new DOMXpath($dom);
$node = $xpath->query('//child')->item(0);
echo $node->nodeName . "\n";
$GLOBALS['dom']->removeChild($GLOBALS['dom']->firstChild);
echo "nodeType: " . $node->nodeType . "\n";
Example #28
0
 /**
  * Compute <?xyl-meta?> processing-instruction.
  *
  * @param   \DOMDocument  $ownerDocument    Document that ownes PIs.
  * @return  void
  */
 protected function computeMeta(\DOMDocument $ownerDocument)
 {
     $xpath = new \DOMXPath($ownerDocument);
     $xyl_meta = $xpath->query('/processing-instruction(\'xyl-meta\')');
     unset($xpath);
     if (0 === $xyl_meta->length) {
         return;
     }
     for ($i = 0, $m = $xyl_meta->length; $i < $m; ++$i) {
         $item = $xyl_meta->item($i);
         $this->_metas[] = new Xml\Attribute($item->data);
         $ownerDocument->removeChild($item);
     }
     return;
 }
Example #29
0
 protected function processTag($elementMarkup)
 {
     # http://stackoverflow.com/q/1148928/200145
     libxml_use_internal_errors(true);
     $DOMDocument = new DOMDocument();
     # http://stackoverflow.com/q/11309194/200145
     $elementMarkup = mb_convert_encoding($elementMarkup, 'HTML-ENTITIES', 'UTF-8');
     # http://stackoverflow.com/q/4879946/200145
     $DOMDocument->loadHTML($elementMarkup);
     $DOMDocument->removeChild($DOMDocument->doctype);
     $DOMDocument->replaceChild($DOMDocument->firstChild->firstChild->firstChild, $DOMDocument->firstChild);
     $elementText = '';
     if ($DOMDocument->documentElement->getAttribute('markdown') === '1') {
         foreach ($DOMDocument->documentElement->childNodes as $Node) {
             $elementText .= $DOMDocument->saveHTML($Node);
         }
         $DOMDocument->documentElement->removeAttribute('markdown');
         $elementText = "\n" . $this->text($elementText) . "\n";
     } else {
         foreach ($DOMDocument->documentElement->childNodes as $Node) {
             $nodeMarkup = $DOMDocument->saveHTML($Node);
             if ($Node instanceof DOMElement and !in_array($Node->nodeName, $this->textLevelElements)) {
                 $elementText .= $this->processTag($nodeMarkup);
             } else {
                 $elementText .= $nodeMarkup;
             }
         }
     }
     # because we don't want for markup to get encoded
     $DOMDocument->documentElement->nodeValue = 'placeholder';
     $markup = $DOMDocument->saveHTML($DOMDocument->documentElement);
     $markup = str_replace('placeholder', $elementText, $markup);
     return $markup;
 }
Example #30
0
 /**
  * @param  string $html
  * @param  array $options OPTIONAL
  * @return string
  */
 public static function filter($html, array $options = null)
 {
     $errors = libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     libxml_clear_errors();
     libxml_use_internal_errors($errors);
     foreach ($doc->childNodes as $item) {
         if ($item->nodeType == XML_PI_NODE) {
             $doc->removeChild($item);
         }
     }
     $doc->encoding = 'UTF-8';
     $body = $doc->getElementsByTagName('body')->item(0);
     $debug = 0;
     if ($debug) {
         header('Content-Type: text/plain; charset=utf-8');
         echo $doc->saveHTML(), "\n\n";
     }
     if ($body) {
         $elems = array($body);
         $refs = array();
         $filter = new Zefram_Filter_Slug();
         // FIXME dependency!
         // extract all referenced ids of elements, they will be used for internal links creation
         while ($elem = array_shift($elems)) {
             foreach ($elem->childNodes as $item) {
                 if ($item->nodeType === XML_ELEMENT_NODE) {
                     $elems[] = $item;
                 }
             }
             if ($elem->nodeType === XML_ELEMENT_NODE && strtoupper($elem->tagName) === 'A') {
                 $href = trim($elem->getAttribute('href'));
                 if (strlen($href) && $href[0] === '#') {
                     $id = substr($href, 1);
                     $refs[$id] = 'ref:' . $filter->filter(str_ireplace('ref:', '', $id));
                 }
             }
         }
         self::$_refs = $refs;
         // TODO create IDs map
         $latex = self::processBlock($body, self::TRIM);
         if ($debug) {
             header('Content-Type: text/plain; charset=utf-8');
             echo $latex;
             exit;
         }
         return $latex;
     }
     return '';
 }