Example #1
0
 function testRequestToOutputFile()
 {
     $client = new ProxyClient();
     $client->URL = df_absolute_url('tests/test_ProxyClient/test1.html');
     $outputFile = tempnam(sys_get_temp_dir(), 'test_ProxyClient');
     $client->outputFile = $outputFile;
     $client->process();
     $this->assertEquals(null, $client->content, 'Content should be written to output file, not saved to variable.');
     $expected = file_get_contents('tests/test_ProxyClient/test1.html');
     $doc = new DOMDocument();
     @$doc->loadHtml($expected);
     $expected = $doc->saveHtml();
     $actual = file_get_contents($outputFile);
     $actual = '';
     $fh = fopen($outputFile, 'r');
     while (!feof($fh) and trim($line = fgets($fh, 1024))) {
         // We skip the headers
     }
     ob_start();
     fpassthru($fh);
     fclose($fh);
     $actual = ob_get_contents();
     ob_end_clean();
     unset($doc);
     $doc = new DOMDocument();
     @$doc->loadHtml($actual);
     $actual = $doc->saveHtml();
     unset($doc);
     $this->assertEquals($expected, $actual);
 }
Example #2
0
 /**
  * @param \DOMNode $node
  * @return string
  */
 public static function getOuterHtml(\DOMNode $node)
 {
     $domDocument = new \DOMDocument('1.0');
     $b = $domDocument->importNode($node->cloneNode(true), true);
     $domDocument->appendChild($b);
     $html = $domDocument->saveHtml();
     $html = StringHelper::safeEncodeStr($html);
     return $html;
 }
 /**
  * Print script which defined attributes
  *
  * @access public
  * @return void
  */
 public function printScript()
 {
     $domDocument = new DOMDocument();
     $domScript = $domDocument->createElement('script');
     foreach ($this->scriptAttributes as $key => $value) {
         $domAttribute = $domDocument->createAttribute($key);
         $domAttribute->value = $value;
         $domScript->appendChild($domAttribute);
     }
     $domDocument->appendChild($domScript);
     echo $domDocument->saveHtml();
 }
Example #4
0
 public function getEmbedCode()
 {
     $dom = new \DOMDocument();
     $docSrc = URL::to('docs/embed', $this->slug);
     $insertElement = $dom->createElement('div');
     $containerElement = $dom->createElement('iframe');
     $containerElement->setAttribute('id', '__ogFrame');
     $containerElement->setAttribute('width', 300);
     $containerElement->setAttribute('height', 500);
     $containerElement->setAttribute('src', $docSrc);
     $containerElement->setAttribute('frameBorder', 0);
     $insertElement->appendChild($containerElement);
     return $dom->saveHtml($insertElement);
 }
 function get($input)
 {
     libxml_use_internal_errors(true);
     libxml_clear_errors();
     $doc = new DOMDocument();
     $doc->loadHtml($input);
     $xpath = new DOMXPath($doc);
     $mainElements = $xpath->query("//div[@class='section results']");
     $resultArray = array();
     foreach ($mainElements as $mainElement) {
         $this->parseEntries($doc->saveHtml($mainElement), $resultArray);
     }
     return $resultArray;
 }
Example #6
0
 public function find($xp)
 {
     //var_dump($this->dom);
     $xpath = new DOMXpath($this->dom);
     //$xpath->registerNamespace('html','http://www.w3.org/1999/xhtml');
     $eles = $xpath->query($xp);
     //var_dump($eles);
     if ($eles->length == 0) {
         return '';
     }
     //var_dump($eles);
     $ele = $eles->item(0);
     $dom = new DOMDocument();
     $dom->appendChild($dom->importNode($ele, true));
     $dom->formatOutput = true;
     $dom->preserveWhiteSpace = false;
     $c = $dom->saveHtml();
     $c = mb_convert_encoding($c, $this->charset, 'HTML-ENTITIES');
     return $c;
 }
Example #7
0
 public function merge(array $pages, $separator = '')
 {
     $head = '';
     $output = '';
     libxml_use_internal_errors(true);
     foreach ($pages as $page) {
         if (!$head) {
             list($head, $body) = preg_split('/<body/i', $page);
         }
         $document = new \DOMDocument();
         $document->loadHTML($page);
         $bodyOnlyDocument = new \DOMDocument();
         $body = $document->getElementsByTagName('body')->item(0);
         foreach ($body->childNodes as $child) {
             $bodyOnlyDocument->appendChild($bodyOnlyDocument->importNode($child, true));
         }
         $output .= $bodyOnlyDocument->saveHtml() . $separator;
     }
     return $head . '<body>' . rtrim($output, $separator) . '</body></html>';
 }
Example #8
0
 function testPage()
 {
     $url = DATAFACE_SITE_URL . '/tests/testsites/site1/index.html';
     $site = new Dataface_Record('websites', array());
     $site->setValues(array('website_url' => df_absolute_url(DATAFACE_SITE_URL . '/tests/testsites/site1/'), 'source_language' => 'en', 'target_language' => 'fr', 'website_name' => 'Site 1 French', 'active' => 1, 'base_path' => DATAFACE_SITE_URL . '/proxies/site1/', 'host' => $_SERVER['HTTP_HOST']));
     $site->save();
     df_q("delete from site_text_filters where website_id='" . addslashes($site->val('website_id')) . "'");
     $server = new ProxyServer();
     $server->site = SweteSite::loadSiteById($site->val('website_id'));
     $server->SERVER = array('REQUEST_METHOD' => 'get');
     $server->URL = df_absolute_url(DATAFACE_SITE_URL . '/proxies/site1/index.html');
     $server->buffer = true;
     $server->handleRequest();
     $doc = new DOMDocument();
     $doc->loadHtml(file_get_contents('tests/testsites/site1_output/index.out.html'));
     $expected = $doc->saveHtml();
     //echo $server->contentBuffer;
     $doc2 = new DOMDocument();
     $doc2->loadHtml($server->contentBuffer);
     $actual = $doc2->saveHtml();
     //$this->assertEquals(trim($expected), trim($actual));
     // Cancelled this test because WTF!!!!  Even if I print the actual output, copy it to the file
     // and compare it to itself, it still fails!!!! WTF!!!!
 }
Example #9
0
<?php

$xmlDoc = new DOMDocument();
$xmlDoc->loadHTMLFile("http://docs.codehaus.org/display/GRADLE/Gradle+0.9+Breaking+Changes");
$xpath = new DOMXPath($xmlDoc);
$entries = $xpath->query("//*[@class='wiki-content']");
foreach ($entries as $entry) {
    $copyDoc = new DOMDocument();
    $copyDoc->appendChild($copyDoc->importNode($entry, true));
    echo $copyDoc->saveHtml();
}
Example #10
0
 /**
  * Returns a safe output to the theme
  * This includes setting nofollow tags on links, forcing them to open in new windows, and safely encoding the text
  * @return string
  */
 public function getSafeOutput()
 {
     $md = new CMarkdownParser();
     $dom = new DOMDocument();
     $dom->loadHtml('<?xml encoding="UTF-8">' . $md->safeTransform($this->content));
     $x = new DOMXPath($dom);
     foreach ($x->query('//a') as $node) {
         $element = $node->getAttribute('href');
         if (isset($element[0]) && $element[0] !== "/") {
             $node->setAttribute('rel', 'nofollow');
             $node->setAttribute('target', '_blank');
         }
     }
     return $md->safeTransform($dom->saveHtml());
 }
Example #11
0
 /**
  * Dumps the internal document into a string using HTML formatting.
  * 
  * @return string
  */
 public function html()
 {
     return trim($this->document->saveHtml());
 }
Example #12
0
/**
 * Find a tag by ID and append/replace content
 *
 * source: http://stackoverflow.com/a/17661043/1829145
 * Thanks to Rodolfo Buaiz (brasofilo)
 *
 * @param string $oDoc source html (passed by reference!)
 * @param string $s html code to insert
 * @param string $sId id of the tag to find
 * @param string $sHtml
 * @param boolean $bAppend append new code?
 * @param boolean $bInsert replace existing contents by the new source code?
 * @param boolean $bAddToOuter
 * @return boolean
 */
function brasofilo_suSetHtmlElementById(&$oDoc, &$s, $sId, $sHtml, $bAppend = false, $bInsert = false, $bAddToOuter = false)
{
    if (brasofilo_suIsValidString($s) && brasofilo_suIsValidString($sId)) {
        $bCreate = true;
        if (is_object($oDoc)) {
            if (!$oDoc instanceof DOMDocument) {
                return false;
            }
            $bCreate = false;
        }
        if ($bCreate) {
            $oDoc = new DOMDocument();
        }
        libxml_use_internal_errors(true);
        $oDoc->loadHTML($s);
        libxml_use_internal_errors(false);
        $oNode = $oDoc->getElementById($sId);
        if (is_object($oNode)) {
            $bReplaceOuter = !$bAppend && !$bInsert;
            $sId = uniqid('NVCMS_SHEBI-');
            $aId = array("<!-- {$sId} -->", "<!--{$sId}-->");
            if ($bReplaceOuter) {
                if (brasofilo_suIsValidString($sHtml)) {
                    $oNode->parentNode->replaceChild($oDoc->createComment($sId), $oNode);
                    $s = $oDoc->saveHtml();
                    $s = str_replace($aId, $sHtml, $oDoc->saveHtml());
                } else {
                    $oNode->parentNode->removeChild($oNode);
                    $s = $oDoc->saveHtml();
                }
                return true;
            }
            $bReplaceInner = $bAppend && $bInsert;
            $sThis = null;
            if (!$bReplaceInner) {
                $sThis = $oDoc->saveHTML($oNode);
                $sThis = ($bInsert ? $sHtml : '') . ($bAddToOuter ? $sThis : substr($sThis, strpos($sThis, '>') + 1, -(strlen($oNode->nodeName) + 3))) . ($bAppend ? $sHtml : '');
            }
            if (!$bReplaceInner && $bAddToOuter) {
                $oNode->parentNode->replaceChild($oDoc->createComment($sId), $oNode);
                $sId =& $aId;
            } else {
                $oNode->nodeValue = $sId;
            }
            $s = str_replace($sId, $bReplaceInner ? $sHtml : $sThis, $oDoc->saveHtml());
            return true;
        }
    }
    return false;
}
Example #13
0
 /**
  * Populate values in HTML forms. 
  *
  * @param string $body
  * @return string
  * @todo fix for radio buttons and dropdown boxes!
  */
 protected function populateForms($body)
 {
     $forms = $this->response->getForms();
     // If response object holds no form data, we are done.
     if (sizeof($forms) == 0) {
         return $body;
     }
     $dom = new \DOMDocument();
     $dom->loadHtml($body);
     $query = new \DOMXPath($dom);
     foreach ($forms as $form) {
         foreach ($this->response->getFormValues($form) as $field => $value) {
             $nodes = $query->evaluate("//form[@name='" . $form . "']//*[@name='" . $field . "']");
             if ($nodes->length == 0) {
                 throw new Exception('No field ' . $field . ' in form ' . $form);
             }
             $node = $nodes->item(0);
             switch ($node->nodeName) {
                 case 'input':
                     if ($node->getAttribute('type') == 'checkbox') {
                         if ($value != 0) {
                             $node->setAttribute('checked', true);
                         }
                     } else {
                         $node->setAttribute('value', $value);
                     }
                     break;
                 case 'textarea':
                     $node->firstChild->nodeValue = $value;
                     break;
                 case 'select':
                     foreach ($node->childNodes as $child) {
                         if ($child->nodeValue == $value) {
                             $child->setAttribute('selected', 'selected');
                         }
                     }
                     break;
                 default:
                     throw new Exception('Unknown tag ' . $nodes->item(0)->nodeName . ' in form ' . $form);
                     // @todo DEV only.
             }
         }
     }
     /*	    	
     	        // Patch in field error messages.
     	        foreach ($this->response->getFieldErrors($form) as $field => $errorMessages) {
     	        	foreach ($errorMessages as $errorMessage) {
     	                $nodes = $query->evaluate("//form[@name='" . $form . "']//*[@name='" . $field . "']");
     	
     	                if ($nodes->length == 0) {
     	                    throw new Exception('No field ' . $field . ' in form ' . $form);
     	                }
     	
     	                $node = $nodes->item(0);
     
     	                $message = new \DOMElement('div', $errorMessage->getMessage());
                         //$node->parentNode->appendChild($message);                
                         $node->nextSibling->insertBefore($message);                
     	        	}
     	        }
     */
     return $dom->saveHtml();
 }
<?php

$dom = new DOMDocument();
//$dom = new DOMDocument('1.0', 'utf-8');
$head = $dom->createElement('head');
$title = $dom->createElement('title');
$node = $dom->createTextNode('Hello, World!');
// Solution
$title->appendChild($node);
$head->appendChild($title);
// More
$attr = $dom->createAttribute('id');
$attr->value = 'my-title';
$title->appendChild($attr);
$title->setAttributeNode(new DOMAttr('data-charset', 'utf-8'));
// Render
$dom->appendChild($head);
var_dump($dom->saveXML(), $dom->saveXML($title), $dom->saveHtml());
 /**
  * Extract all interaction by find interaction node & relative choices
  * Find right answer & resolve identifier to choice name
  * Output example of item interactions:
  * array (
  *   [...],
  *   array(
  *      "id" => "56e7d1397ad57",
  *      "type" => "Match",
  *      "choices" => array (
  *          "M" => "Mouse",
  *          "S" => "Soda",
  *          "W" => "Wheel",
  *          "D" => "DarthVader",
  *          "A" => "Astronaut",
  *          "C" => "Computer",
  *          "P" => "Plane",
  *          "N" => "Number",
  *      ),
  *      "responses" => array (
  *          0 => "M C"
  *      ),
  *      "responseIdentifier" => "RESPONSE"
  *   )
  * )
  *
  * @return $this
  */
 protected function extractInteractions()
 {
     $elements = ['Choice' => ['domInteraction' => 'choiceInteraction', 'xpathChoice' => './/qti:simpleChoice'], 'Order' => ['domInteraction' => 'orderInteraction', 'xpathChoice' => './/qti:simpleChoice'], 'Match' => ['domInteraction' => 'matchInteraction', 'xpathChoice' => './/qti:simpleAssociableChoice'], 'Associate' => ['domInteraction' => 'associateInteraction', 'xpathChoice' => './/qti:simpleAssociableChoice'], 'Gap Match' => ['domInteraction' => 'gapMatchInteraction', 'xpathChoice' => './/qti:gapText'], 'Hot text' => ['domInteraction' => 'hottextInteraction', 'xpathChoice' => './/qti:hottext'], 'Inline choice' => ['domInteraction' => 'inlineChoiceInteraction', 'xpathChoice' => './/qti:inlineChoice'], 'Graphic hotspot' => ['domInteraction' => 'hotspotInteraction', 'xpathChoice' => './/qti:hotspotChoice'], 'Graphic order' => ['domInteraction' => 'graphicOrderInteraction', 'xpathChoice' => './/qti:hotspotChoice'], 'Graphic associate' => ['domInteraction' => 'graphicAssociateInteraction', 'xpathChoice' => './/qti:associableHotspot'], 'Graphic gap match' => ['domInteraction' => 'graphicGapMatchInteraction', 'xpathChoice' => './/qti:gapImg'], 'ScaffHolding' => ['xpathInteraction' => '//*[@customInteractionTypeIdentifier="adaptiveChoiceInteraction"]', 'xpathChoice' => 'descendant::*[@class="qti-choice"]'], 'Extended text' => ['domInteraction' => 'extendedTextInteraction'], 'Slider' => ['domInteraction' => 'sliderInteraction'], 'Upload file' => ['domInteraction' => 'uploadInteraction'], 'Text entry' => ['domInteraction' => 'textEntryInteraction'], 'End attempt' => ['domInteraction' => 'endAttemptInteraction']];
     /**
      * foreach all interactions type
      */
     foreach ($elements as $element => $parser) {
         if (isset($parser['domInteraction'])) {
             $interactionNode = $this->dom->getElementsByTagName($parser['domInteraction']);
         } elseif (isset($parser['xpathInteraction'])) {
             $interactionNode = $this->xpath->query($parser['xpathInteraction']);
         } else {
             continue;
         }
         if ($interactionNode->length == 0) {
             continue;
         }
         /**
          * foreach all real interactions
          */
         for ($i = 0; $i < $interactionNode->length; $i++) {
             $interaction = [];
             $interaction['id'] = uniqid();
             $interaction['type'] = $element;
             $interaction['choices'] = [];
             $interaction['responses'] = [];
             /**
              * Interaction right answers
              */
             $interaction['responseIdentifier'] = $interactionNode->item($i)->getAttribute('responseIdentifier');
             $rightAnswer = $this->xpath->query('./qti:responseDeclaration[@identifier="' . $interaction['responseIdentifier'] . '"]');
             if ($rightAnswer->length > 0) {
                 $answers = $rightAnswer->item(0)->textContent;
                 if (!empty($answers)) {
                     foreach (explode(PHP_EOL, $answers) as $answer) {
                         if (trim($answer) !== '') {
                             $interaction['responses'][] = $answer;
                         }
                     }
                 }
             }
             /**
              * Interaction choices
              */
             $choiceNode = '';
             if (!empty($parser['domChoice'])) {
                 $choiceNode = $this->dom->getElementsByTagName($parser['domChoice']);
             } elseif (!empty($parser['xpathChoice'])) {
                 $choiceNode = $this->xpath->query($parser['xpathChoice'], $interactionNode->item($i));
             }
             if (!empty($choiceNode) && $choiceNode->length > 0) {
                 for ($j = 0; $j < $choiceNode->length; $j++) {
                     $identifier = $choiceNode->item($j)->getAttribute('identifier');
                     $value = $this->sanitizeNodeToValue($this->dom->saveHtml($choiceNode->item($j)));
                     //Image
                     if ($value === '') {
                         $imgNode = $this->xpath->query('./qti:img/@src', $choiceNode->item($j));
                         if ($imgNode->length > 0) {
                             $value = 'image' . $j . '_' . $imgNode->item(0)->value;
                         }
                     }
                     $interaction['choices'][$identifier] = $value;
                 }
             }
             $this->interactions[] = $interaction;
         }
     }
     return $this;
 }
Example #16
0
 private function buildToc($document)
 {
     $body = $document->getBody();
     if (preg_match('/^(\\s|\\n)+$/', $body)) {
         return;
     }
     $dom = new \DOMDocument();
     $dom->loadHtml('<?xml encoding="UTF-8">' . $body);
     $xpath = new \DOMXPath($dom);
     $toc = array();
     $ids = array();
     $isSpan = function ($node) {
         return XML_ELEMENT_NODE === $node->nodeType && 'span' === $node->tagName;
     };
     $genId = function ($node) use(&$ids, $isSpan) {
         $count = 0;
         do {
             if ($isSpan($node->lastChild)) {
                 $node = clone $node;
                 $node->removeChild($node->lastChild);
             }
             $id = preg_replace('{[^a-z0-9]}i', '-', strtolower(trim($node->nodeValue)));
             $id = preg_replace('{-+}', '-', $id);
             if ($count) {
                 $id .= '-' . ($count + 1);
             }
             $count++;
         } while (isset($ids[$id]));
         $ids[$id] = true;
         return $id;
     };
     $getDesc = function ($node) use($isSpan) {
         if ($isSpan($node->lastChild)) {
             return $node->lastChild->nodeValue;
         }
         return null;
     };
     $getTitle = function ($node) use($isSpan) {
         if ($isSpan($node->lastChild)) {
             $node = clone $node;
             $node->removeChild($node->lastChild);
         }
         return $node->nodeValue;
     };
     // build TOC & deep links
     $h1 = $h2 = $h3 = $h4 = 0;
     $nodes = $xpath->query('//*[self::h1 or self::h2 or self::h3 or self::h4]');
     foreach ($nodes as $node) {
         // set id and add anchor link
         $id = $genId($node);
         $title = $getTitle($node);
         $desc = $getDesc($node);
         $node->setAttribute('id', $id);
         $link = $dom->createElement('a', '#');
         $link->setAttribute('href', '#' . $id);
         $link->setAttribute('class', 'anchor');
         $node->appendChild($link);
         // parse into a tree
         switch ($node->nodeName) {
             case 'h1':
                 $toc[++$h1] = array('title' => $title, 'id' => $id, 'desc' => $desc);
                 break;
             case 'h2':
                 $toc[$h1][++$h2] = array('title' => $title, 'id' => $id, 'desc' => $desc);
                 break;
             case 'h3':
                 $toc[$h1][$h2][++$h3] = array('title' => $title, 'id' => $id, 'desc' => $desc);
                 break;
             case 'h4':
                 $toc[$h1][$h2][$h3][++$h4] = array('title' => $title, 'id' => $id, 'desc' => $desc);
                 break;
         }
     }
     // save new body with IDs
     $body = $dom->saveHtml();
     $body = preg_replace('{.*<body>(.*)</body>.*}is', '$1', $body);
     $document->setToc($toc);
     $document->setBody($body);
 }
Example #17
0
        $key = $_GET['page'];
        if (!isset($pages[$key])) {
            die("Could not find page");
        }
        $url = $pages[$key];
    }
}
if ($url) {
    //$url = 'http://ca2.php.net/images/php.gif';
    $client = new ProxyClient();
    $client->URL = $url;
    $client->flushableContentTypeRegex = '#html|css#';
    $client->afterFlushCallback = 'flushCallback';
    $client->process();
    foreach ($client->headers as $h) {
        header($h);
    }
    echo $client->content;
} else {
    $html = '<div style="overflow:scroll">';
    foreach ($images as $img => $imgurl) {
        $html .= '<img src="?image=' . urlencode($img) . '"/>';
    }
    $html .= '</div>';
    foreach ($pages as $pg => $pgurl) {
        $html .= '<iframe src="?page=' . urlencode($pg) . '" width="300" height="200"/>';
    }
    $doc = new DOMDocument();
    @$doc->loadHtml($html);
    echo $doc->saveHtml();
}
Example #18
0
        $link->setAttribute('class', 'anchor');
        $node->appendChild($link);
        if (empty($firstTitle)) {
            $firstTitle = $title;
        }
        // parse into a tree
        switch ($node->nodeName) {
            case 'h1':
                $toc[++$h1] = array('title' => $title, 'id' => $id, 'desc' => $desc);
                break;
            case 'h2':
                $toc[$h1][++$h2] = array('title' => $title, 'id' => $id, 'desc' => $desc);
                break;
            case 'h3':
                $toc[$h1][$h2][++$h3] = array('title' => $title, 'id' => $id, 'desc' => $desc);
                break;
            case 'h4':
                $toc[$h1][$h2][$h3][++$h4] = array('title' => $title, 'id' => $id, 'desc' => $desc);
                break;
        }
    }
    // save new content with IDs
    $content = $dom->saveHtml();
    $content = preg_replace('{.*<body>(.*)</body>.*}is', '$1', $content);
    // add class to footer nav
    $content = preg_replace('{<p>(&larr;.+?|.+?&rarr;)</p>}', '<p class="prev-next">$1</p>', $content);
    return $app['twig']->render('doc.show.html.twig', array('doc' => $content, 'file' => $page, 'page' => $page == '00-intro.md' ? 'getting-started' : 'docs', 'toc' => $toc, 'title' => $firstTitle));
})->assert('page', '[a-z0-9/\'-]+\\.md')->bind('docs.view');
$app->get('/commit-deps', function () use($app) {
    return new RedirectResponse($app['url_generator']->generate('docs.view', array('page' => 'faqs/should-i-commit-the-dependencies-in-my-vendor-directory.md')));
});
Example #19
0
$md = new CMarkdownParser();
$dom = new DOMDocument();
$dom->loadHtml('<?xml encoding="UTF-8">' . $md->safeTransform($content->content));
$x = new DOMXPath($dom);
foreach ($x->query('//a') as $node) {
    $element = $node->getAttribute('href');
    // Don't follow links outside of this site, and always open them in a new tab
    if ($element[0] !== "/") {
        $node->setAttribute('rel', 'nofollow');
        $node->setAttribute('target', '_blank');
    }
}
?>

				<div id="md-output"><?php 
echo $md->safeTransform($dom->saveHtml());
?>
</div>
				<textarea id="markdown" style="display:none;"><?php 
echo $content->content;
?>
</textarea>
				
				
		</div>
	    <div style="clear:both;"><br /></div>
	</div>
</div>

<div class="comments <?php 
echo Cii::getConfig('useDisqusComments') ? 'disqus' : NULL;
Example #20
0
 // Executing the cURL request and assigning the returned data to the $data variable
 curl_close($ch);
 echo $data;
 $doc = new DOMDocument();
 @$doc->loadHTML($data);
 $nodes = $doc->getElementsByTagName('title');
 $body = $doc->getElementsByTagName('body');
 //get and display what you need:
 $title = $nodes->item(0)->nodeValue;
 $metas = $doc->getElementsByTagName('meta');
 $nodeList = $doc->getElementsByTagName('script');
 for ($nodeIdx = $nodeList->length; --$nodeIdx >= 0;) {
     $node = $nodeList->item($nodeIdx);
     $node->parentNode->removeChild($node);
 }
 $data = $doc->saveHtml();
 for ($i = 0; $i < $metas->length; $i++) {
     $meta = $metas->item($i);
     if ($meta->getAttribute('name') == 'description') {
         $description = $meta->getAttribute('content');
     }
     if ($meta->getAttribute('name') == 'keywords') {
         $keywords = $meta->getAttribute('content');
     }
 }
 $keywords = strtolower($keywords);
 $title = strtolower($title);
 $description = strtolower($description);
 $data = strtolower($data);
 $sportlength = count($sportskeywords);
 $i = 0;
Example #21
0
    foreach (scandir($dir) as $file) {
        if (in_array($file, $ignored)) {
            continue;
        }
        $files[$file] = filemtime($dir . '/' . $file);
    }
    arsort($files);
    $files = array_keys($files);
    return $files ? $files : false;
}
$files = scan_dir($dir);
$total_files = count($files);
if ($postNumber > $total_files) {
    exit;
}
$html = $files[$postNumber];
preg_replace('.blog\\/*.', '', $pageName);
if ($html == $pageName) {
    $html = $files[$postNumber + 1];
}
$classname = 'main-container';
$dom = new DOMDocument();
@$dom->loadHTMLFile($html);
$xpath = new DOMXPath($dom);
$result = '';
//$results = $xpath->query('//*[@class="main-container"]');
foreach ($xpath->evaluate('//div[@class="main-container"]/node()') as $childNode) {
    $result .= $dom->saveHtml($childNode);
}
echo $result;
exit;
    public function extractStrings($html)
    {
        $dom = null;
        if ($this->useHtml5Parser) {
            $intro = substr($html, 0, 255);
            if (stripos($intro, '<!DOCTYPE html>') !== false) {
                // this is html5 so we'll use the html5
                require_once 'lib/HTML5.php';
                $options = new StdClass();
                $options->decorateDocument = function (DOMDocument $dom) {
                    $dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
                };
                $dom = HTML5::loadHTML($html, $options);
                // noscripts contents are treated like text which causes problems when
                // filters/replacements are run on them.  Let's just remove them
                $noscripts = $dom->getElementsByTagName('noscript');
                foreach ($noscripts as $noscript) {
                    $noscript->parentNode->removeChild($noscript);
                }
            }
        }
        //$dom = str_get_html($html);
        if (!isset($dom)) {
            $dom = new DOMDocument();
            $dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
            @$dom->loadHtml('<?xml encoding="UTF-8">' . $html);
            // dirty fix
            foreach ($dom->childNodes as $item) {
                if ($item->nodeType == XML_PI_NODE) {
                    $dom->removeChild($item);
                }
            }
            // remove hack
            $dom->encoding = 'UTF-8';
            // insert proper
        }
        //print_r($dom);
        $strings = array();
        $this->strings =& $strings;
        $stringsIndex = array();
        $xpath = new DOMXPath($dom);
        $this->translateDates($xpath);
        //$text = $xpath->query('//text()[normalize-space() and not(ancestor::script | ancestor::style)]');
        //$translatables = $dom->find('[translate]');
        $translateAttrs = $xpath->query('//*[@data-swete-translate-attrs or @alt or @title]');
        $otherAtts = array('title', 'alt');
        foreach ($translateAttrs as $el) {
            if ($el->hasAttribute('data-swete-translate-attrs')) {
                $attNames = explode(' ', $el->getAttribute('data-swete-translate-attrs'));
            } else {
                $attNames = array();
            }
            foreach ($otherAtts as $attName) {
                if ($el->hasAttribute($attName)) {
                    $attNames[] = $attName;
                }
            }
            foreach ($attNames as $attName) {
                $attVal = $el->getAttribute($attName);
                if ($attVal and trim($attVal)) {
                    $index = count($strings);
                    $strings[] = trim(_n($attVal));
                    $stringsIndex[trim(_n($attVal))] = $index;
                    $el->setAttribute($attName, '{{$' . $index . '$}}');
                    $index++;
                }
            }
        }
        $translatables = $xpath->query('//*[@translate]');
        foreach ($translatables as $tr) {
            $index = count($strings);
            //$strings[] = trim(_n($tr->innertext));
            //$strings[] = trim(_n($tr->innerHTML));
            $trStr = trim(_n($tr->innerHTML));
            if ($tr->hasAttribute('data-swete-delimiters')) {
                $delim = trim($tr->getAttribute('data-swete-delimiters'));
                if ($delim) {
                    $delimSplitter = $delim[0];
                    $delimiters = explode($delimSplitter, $delim);
                    $delimiters2 = array();
                    foreach ($delimiters as $delimiterIdx => $delimiter) {
                        if (!trim($delimiter)) {
                            continue;
                        }
                        $delimiters2[] = '(' . preg_quote($delimiter, '/') . ')';
                    }
                    $delimiters = $delimiters2;
                    $pattern = '/' . implode('|', $delimiters) . '/';
                    $toks = preg_split($pattern, $trStr, -1, PREG_SPLIT_DELIM_CAPTURE);
                    $innerHTML = array();
                    foreach ($toks as $tokIdx => $tok) {
                        if (!trim($tok)) {
                            $innerHTML[] = $tok;
                        } else {
                            if ($tokIdx % 2 === 1) {
                                // It is a delimiter
                                $innerHTML[] = $tok;
                            } else {
                                $strings[] = trim(_n($tok));
                                $stringsIndex[trim(_n($tok))] = $index;
                                $innerHTML[] = '{{$' . $index . '$}}';
                                $index++;
                                if ($tok[strlen($tok) - 1] === ' ') {
                                    $innerHTML[] = ' ';
                                }
                            }
                        }
                    }
                    $tr->innerHTML = implode('', $innerHTML);
                    $trStr = '';
                }
            }
            if ($trStr) {
                $strings[] = trim(_n($trStr));
                $stringsIndex[trim(_n($trStr))] = $index;
                $tr->innerHTML = '{{$' . $index . '$}}';
                $index++;
            }
            $gchildren = $xpath->query('./text()', $tr);
            foreach ($gchildren as $gchild) {
                $gchild->isCovered = 1;
            }
        }
        //$untranslatables = $dom->find('[notranslate]');
        $untranslatables = $xpath->query('//*[@notranslate]');
        foreach ($untranslatables as $tr) {
            //error_log('Found untranslatable: '.$tr->outertext);
            //$gchildren = $tr->find('text');
            $gchildren = $xpath->query('./text()', $tr);
            //error_log(count($gchildren).' found');
            //foreach ($gchildren as $gchild) $gchild->isCovered = 1;
            foreach ($gchildren as $gchild) {
                $gchild->isCovered = 1;
            }
        }
        $textX = $xpath->query('//text()[not(ancestor::script | ancestor::style | ancestor::*[@notranslate] | ancestor::*[@translate])]');
        $text = array();
        foreach ($textX as $x) {
            $text[] = $x;
        }
        //echo "Found ".$text->length;
        foreach ($text as $tx) {
            if (!$tx instanceof DOMNode) {
                continue;
            }
            if (!isset($tx->parentNode)) {
                continue;
            }
            if (!$tx->parentNode instanceof DOMElement) {
                continue;
            }
            // the data-swete-translate is a little different than the notranslate attribute
            // the notranslate attribute confers block level status to its owner tag.
            // data-swete-translate simply marks a segment of text as not to be translated
            // (or to be translated) within the flow of the document.  Therefore we don't
            // use a text node whose parent has the data-swete-translate as an anchor
            // to start building a group of text.  But we will allow a tag with this
            // to be included in a group of text (that contains content before and/or after).
            // The SweteTools::encode() method will take care of variablizing the content
            // at translation time.
            if ($tx->parentNode->hasAttribute('data-swete-translate') and $tx->parentNode->getAttribute('data-swete-translate') === '0') {
                continue;
            }
            //if ( !trim($tx->innertext) ) continue;
            if (!trim($tx->nodeValue)) {
                continue;
            }
            //if ( in_array($tx->parent->tag , array('comment','script','style','code') )) continue;
            if (in_array(strtolower($tx->parentNode->tagName), array('comment', 'script', 'style', 'code'))) {
                continue;
            }
            if ($this->isCovered($tx)) {
                //echo "This one's covered!!!";
                continue;
            }
            //echo "[".$tx->nodeValue."]";
            //continue;
            $group = array();
            $start = $tx;
            //if ( $tx->parent->children ){
            if (!isset($tx->parentNode)) {
                //error_log("skipping ".$tx->nodeValue);
                continue;
            }
            if ($tx->parentNode->childNodes->length > 0) {
                $pos = -1;
                //foreach ( $tx->parent->nodes as $idx=>$child ){
                foreach ($tx->parentNode->childNodes as $idx => $child) {
                    if ($child === $tx) {
                        $pos = $idx;
                        break;
                    }
                }
                $mypos = $pos;
                for ($i = $pos; $i >= 0; $i--) {
                    //$node = $tx->parent->nodes[$i];
                    $node = $tx->parentNode->childNodes->item($i);
                    //if ( $node->tag != 'text' and !in_array($node->tag, self::$inlineTags) ){
                    if ($node->nodeType != XML_TEXT_NODE and !in_array(strtolower(@$node->tagName), self::$inlineTags) and !($node instanceof DOMElement and $node->hasAttribute('data-swete-inline'))) {
                        break;
                    }
                    //if ( $node->notranslate ){
                    if ($node instanceof DOMElement and $node->hasAttribute('notranslate')) {
                        break;
                    }
                    if ($node instanceof DOMElement and $node->hasAttribute('data-swete-block')) {
                        break;
                    }
                    $pos = $i;
                }
                //if ( $mypos == $pos or $this->isFirstText($tx->parent, $mypos, $pos)){
                if ($mypos == $pos or $this->isFirstText($tx->parentNode, $mypos, $pos)) {
                    $startIdx = $pos;
                    //for ( $i=$startIdx; $i<count($tx->parent->nodes); $i++ ){
                    for ($i = $startIdx; $i < $tx->parentNode->childNodes->length; $i++) {
                        //$node = $tx->parent->nodes[$i];
                        $node = $tx->parentNode->childNodes->item($i);
                        if (!$node) {
                            break;
                        }
                        //if ( $node->tag != 'text' and !in_array($node->tag, self::$inlineTags) ){
                        if ($node->nodeType != XML_TEXT_NODE and !in_array(strtolower(@$node->tagName), self::$inlineTags) and !($node instanceof DOMElement and $node->hasAttribute('data-swete-inline'))) {
                            break;
                        }
                        //if ( $node->notranslate ){
                        if ($node instanceof DOMElement and $node->hasAttribute('notranslate')) {
                            break;
                        }
                        if ($node instanceof DOMElement and $node->hasAttribute('data-swete-block')) {
                            break;
                        }
                        //if ( $node->tag != 'text' ){
                        //	if ( preg_match('/^<'.$node->tag.'[^>]*>/', $node->outertext, $matches) ){
                        //
                        //		$node->outertext = preg_replace('/^<'.$node->tag.'([^>]*)>/', '<'.$node->tag.' id="{{R'.count($this->replacements).'R}}">', $node->outertext);
                        //		$this->replacements[] = $matches[0];
                        //	}
                        //
                        //}
                        $group[] = $node;
                    }
                }
            } else {
                $group[] = $tx;
            }
            $combinedText = array();
            foreach ($group as $item) {
                //$combinedText[] = trim($item->outertext);
                // REquires PHP 5.3.6 or higher.. passing element to saveHtml()
                $combinedText[] = preg_replace_callback('#<(\\w+)([^>]*)\\s*/>#s', create_function('$m', '
					$xhtml_tags = array("br", "hr", "input", "frame", "img", "area", "link", "col", "base", "basefont", "param");
					return in_array($m[1], $xhtml_tags) ? "<$m[1]$m[2]/>" : "<$m[1]$m[2]></$m[1]>";
					'), $dom->saveXml($item));
            }
            //var_dump($combinedText);
            $combinedText = implode('', $combinedText);
            $leadingWhiteSpace = '';
            $trailingWhiteSpace = '';
            if (preg_match('#^[\\p{Z}\\s]+#', $combinedText, $m1)) {
                $leadingWhiteSpace = $m1[0];
            }
            //echo 'Checking for trailing space: ['.$combinedText.']'."\n";
            if (preg_match('#[\\p{Z}\\s]+$#', $combinedText, $m1)) {
                //echo "Trailing white space found in '$combinedText'\n";
                $trailingWhiteSpace = $m1[0];
            } else {
                //echo "No trailing whitespace found.".ord($combinedText{strlen($combinedText)-1});
            }
            $combinedText = _n($this->replaceStrings($combinedText));
            if (!trim(str_ireplace('&nbsp;', '', $combinedText))) {
                continue;
            }
            if (isset($stringsIndex[$combinedText])) {
                $index = $stringsIndex[$combinedText];
            } else {
                $index = count($strings);
                $strings[] = $combinedText;
                $stringsIndex[$combinedText] = $index;
            }
            foreach ($group as $gnode) {
                //$gchildren = $gnode->find('text');
                $gchildren = @$xpath->query('./text()', $gnode);
                if (!$gchildren) {
                    continue;
                }
                foreach ($gchildren as $gchild) {
                    $gchild->isCovered = 1;
                }
            }
            //$group[0]->outertext = '{{$'.$index.'$}}';
            //$group[0]->nodeValue = '{{$'.$index.'$}}';
            for ($i = 1; $i < count($group); $i++) {
                //$group[$i]->outertext = '';
                //if ( !@$group[$i] ) continue;
                if (@$group[$i]->parentNode) {
                    $group[$i]->parentNode->removeChild($group[$i]);
                }
            }
            if (!@$group[0]) {
                continue;
            }
            if (!@$group[0]->parentNode) {
                continue;
            }
            $textNodeContent = $leadingWhiteSpace . '{{$' . $index . '$}}' . $trailingWhiteSpace;
            //echo 'Content:['.$textNodeContent.']'."\n";
            $group[0]->parentNode->replaceChild($dom->createTextNode($textNodeContent), $group[0]);
        }
        // Now we need to translate the keywords and the description
        //foreach ($dom->find('meta') as $el){
        foreach ($xpath->query('//meta[@name="keywords" or @name="description"]') as $el) {
            //$content = _n($el->content);
            if (!$el->hasAttribute('content')) {
                continue;
            }
            $content = _n($el->getAttribute('content'));
            //if ( $content and in_array(strtolower(strval($el->name)), array('keywords','description')) ){
            if (isset($stringsIndex[$content])) {
                $index = $stringsIndex[$content];
            } else {
                $index = count($strings);
                $strings[] = $content;
                $stringsIndex[$content] = $index;
            }
            //$el->content = '{{$'.$index.'$}}';
            $el->setAttribute('content', '{{$' . $index . '$}}');
            //}
        }
        $this->strings = array_map(array($this, 'cleanString'), $this->strings);
        //return $dom->save();
        return $dom->saveHtml();
    }
<?php

$d = new DOMDocument();
$str = <<<EOD
<html>
<head>
</head>
<body>
<p>Hi.<br/>there</p>
</body>
</html>
EOD;
$d->loadHTML($str);
$e = $d->getElementsByTagName("p");
$e = $e->item(0);
echo $d->saveXml($e), "\n";
echo $d->saveHtml($e), "\n";