protected function threadParse($raw) { $xml = new SimpleXMLElement($raw); $json = $xml->json; $html = $this->ampsfix($xml->html); $dom = new DOMDocument(); @$dom->loadHTML($html); $dom->normalizeDocument(); $xpath = new DOMXPath($dom); $array = []; $imgs = $xpath->evaluate('/html/body//div/div/div/table/tr/td/div/table/tr/td/table/tr/td/div/img'); $i = 0; foreach ($imgs as $img) { $t = explode('?', $img->getAttribute('src')); $array[$i]['user']['avatar'] = $t[0]; $i++; } $ppl = $xpath->evaluate('/html/body//div/div/div/table/tr/td/div/table/tr/td/table/tr/td/div/span/a'); $i = 0; foreach ($ppl as $pp) { $t = $pp->nodeValue; $array[$i]['user']['name'] = $t; $i++; } return $array; }
/** * {@inheritdoc} */ protected function stringMatches($other) { $internalErrors = libxml_use_internal_errors(true); $disableEntities = libxml_disable_entity_loader(true); libxml_clear_errors(); $dom = new \DOMDocument(); $dom->preserveWhiteSpace = false; $dom->validateOnParse = true; if (!@$dom->loadXML($other, LIBXML_NONET | (defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0))) { libxml_disable_entity_loader($disableEntities); $this->setXMLConstraintErrors(); libxml_clear_errors(); libxml_use_internal_errors($internalErrors); return false; } $dom->normalizeDocument(); libxml_disable_entity_loader($disableEntities); libxml_clear_errors(); if (false === ($result = @$dom->schemaValidateSource($this->XSD))) { $this->setXMLConstraintErrors(); } libxml_clear_errors(); libxml_use_internal_errors($internalErrors); return $result; }
/** * Parses the given template content * * Calls cleanupParse on failure or success * * @param string $contents the source content * * @return string the compiled form */ protected function parse($contents) { if (isset($this->buffer)) { throw new RuntimeException('PHPSTLCompiler->parse called recursivley'); } try { $this->whitespace = self::WHITESPACE_COLLAPSE; $this->stash = array(); $this->meta = array(); $this->buffer = ''; $this->footerBuffer = ''; $this->handlers = array(); $this->dom = new DOMDocument(); $this->dom->preserveWhiteSpace = true; $this->meta['uri'] = (string) $this->template; $this->meta['type'] = 'text/html'; if (!$this->dom->loadXML($contents)) { die("failed to parse {$this->template}"); } $this->dom->normalizeDocument(); $this->writeTemplateHeader(); $this->process($this->dom); $this->writeTemplateFooter(); $meta = $this->meta; $content = $this->unstashPHP(trim($this->buffer)); $this->cleanupParse(); return array($meta, $content); } catch (Exception $ex) { $this->cleanupParse(); throw $ex; } }
public function __construct($url) { if (!preg_match('!^https?://!i', $url)) { $url = 'http://' . $url; } $data = Http::Request($url); //$enc = mb_detect_encoding($str, "UTF-8,ISO-8859-1,ASCII"); $html = mb_convert_encoding($data, "UTF-8", "UTF-8,ISO-8859-1,ASCII"); //$html = utf8_encode($html); $r = new Readability($html, $url); $r->init(); if (!isset($this->metadata["title"])) { $this->metadata["title"] = CharacterEntities::convert(strip_tags($r->getTitle()->innerHTML)); } if (!isset($this->metadata["author"])) { $parts = parse_url($url); $this->metadata["author"] = $parts["host"]; } $article = $r->getContent()->innerHTML; if (substr($article, 0, 5) == "<body") { $article = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/></head>" . $article . "</html>"; } else { $article = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/></head><body>" . $article . "</body></html>"; } $doc = new DOMDocument(); @$doc->loadHTML($article) or die($article); $doc->normalizeDocument(); $this->images = $this->handleImages($doc, $url); $this->text = $doc->saveHTML(); }
/** * Validates and parses the given file into a SimpleXMLElement * * @param string $file * @return SimpleXMLElement */ private function parseFile($file) { $dom = new \DOMDocument(); $current = libxml_use_internal_errors(true); if (!@$dom->load($file, LIBXML_COMPACT)) { throw new \RuntimeException(implode("\n", $this->getXmlErrors())); } $location = str_replace('\\', '/', __DIR__).'/schema/dic/xliff-core/xml.xsd'; $parts = explode('/', $location); if (preg_match('#^phar://#i', $location)) { $tmpfile = tempnam(sys_get_temp_dir(), 'sf2'); if ($tmpfile) { file_put_contents($tmpfile, file_get_contents($location)); $tmpfiles[] = $tmpfile; $parts = explode('/', str_replace('\\', '/', $tmpfile)); } } $drive = '\\' === DIRECTORY_SEPARATOR ? array_shift($parts).'/' : ''; $location = 'file:///'.$drive.implode('/', array_map('rawurlencode', $parts)); $source = file_get_contents(__DIR__.'/schema/dic/xliff-core/xliff-core-1.2-strict.xsd'); $source = str_replace('http://www.w3.org/2001/xml.xsd', $location, $source); if (!@$dom->schemaValidateSource($source)) { throw new \RuntimeException(implode("\n", $this->getXmlErrors())); } $dom->validateOnParse = true; $dom->normalizeDocument(); libxml_use_internal_errors($current); return simplexml_import_dom($dom); }
private function getDocument() { $doc = new \DOMDocument('1.0', 'UTF-8'); $doc->load(__DIR__ . '/../resource/base.svg.xml'); $doc->normalizeDocument(); $doc->formatOutput = true; return $doc; }
/** * Loads an XML file. * * @param string $file An XML file path * @param string|callable|null $schemaOrCallable An XSD schema file path, a callable, or null to disable validation * * @return \DOMDocument * * @throws \InvalidArgumentException When loading of XML file returns error */ public static function loadFile($file, $schemaOrCallable = null) { $internalErrors = libxml_use_internal_errors(true); $disableEntities = libxml_disable_entity_loader(true); libxml_clear_errors(); $dom = new \DOMDocument(); $dom->validateOnParse = true; if (!$dom->loadXML(file_get_contents($file), LIBXML_NONET | (defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0))) { libxml_disable_entity_loader($disableEntities); throw new \InvalidArgumentException(implode("\n", static::getXmlErrors($internalErrors))); } $dom->normalizeDocument(); libxml_use_internal_errors($internalErrors); libxml_disable_entity_loader($disableEntities); foreach ($dom->childNodes as $child) { if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { throw new \InvalidArgumentException('Document types are not allowed.'); } } if (null !== $schemaOrCallable) { $internalErrors = libxml_use_internal_errors(true); libxml_clear_errors(); $e = null; if (is_callable($schemaOrCallable)) { try { $valid = call_user_func($schemaOrCallable, $dom, $internalErrors); } catch (\Exception $e) { $valid = false; } } elseif (!is_array($schemaOrCallable) && is_file((string) $schemaOrCallable)) { $valid = @$dom->schemaValidate($schemaOrCallable); } else { libxml_use_internal_errors($internalErrors); throw new \InvalidArgumentException('The schemaOrCallable argument has to be a valid path to XSD file or callable.'); } if (!$valid) { $messages = static::getXmlErrors($internalErrors); if (empty($messages)) { $messages = array(sprintf('The XML file "%s" is not valid.', $file)); } throw new \InvalidArgumentException(implode("\n", $messages), 0, $e); } libxml_use_internal_errors($internalErrors); } return $dom; }
public function parse($url, $html) { $doc = new \DOMDocument(); @$doc->loadHTML($html); $doc->normalizeDocument(); $main = $doc->getElementById('main'); $classe = array('href' => $url, 'name' => str_replace(' Class', '', $main->getElementsByTagName('h2')->item(0)->nodeValue), 'summary' => $main->getElementsByTagName('p')->item(0)->nodeValue, 'methods' => array()); foreach ($main->getElementsByTagName('article') as $article) { if ($article->getElementsByTagName('h4')->length) { $h4 = $article->getElementsByTagName('h4')->item(0); if (preg_match('/([\\w\\*]+)\\((.*)\\)$/', $h4->nodeValue, $name_args)) { $args = array(); foreach (explode(',', $name_args[2]) as $arg) { $arg = str_replace('$', '', trim($arg)); preg_match('/^([^ ]+ )?([^ =]+)( =[^=]*)?$/', $arg, $matches); if ($matches[3]) { $args[] = '[' . $matches[2] . ']'; } elseif ($arg) { $args[] = $matches[2]; } } if (count($args)) { $args = '(' . implode(', ', $args) . ')'; } else { $args = '()'; } $access = ''; $return = ''; if ($article->getElementsByTagName('table')->length) { foreach ($article->getElementsByTagName('table')->item(0)->getElementsByTagName('tbody')->item(0)->getElementsByTagName('tr') as $tr) { if ($tr->getElementsByTagName('th')->item(0)->nodeValue === 'Static') { $access = $tr->getElementsByTagName('td')->item(0)->nodeValue === 'Yes' ? '::' : '->'; } if ($tr->getElementsByTagName('th')->item(0)->nodeValue === 'Returns') { $return = $tr->getElementsByTagName('td')->item(0)->nodeValue; break; } } } $name = $name_args[1]; if (isset($classe['methods'][$name]) && $name === 'pre_save') { $name = 'pre_update'; } $classe['methods'][$name] = array('href' => $url . ($access ? '#method_' : '#function_') . $name, 'name' => $name, 'args' => $args, 'access' => $access, 'return' => static::$types[strtolower($return)]); } } } if (isset($this->classes[$url]['methods'])) { foreach ($this->classes[$url]['methods'] as $name => $method) { $classe['methods'][$name] = array_merge($classe['methods'][$name], $this->classes[$url]['methods'][$name]); } unset($this->classes[$url]['methods']); } $this->classes[$url] = array_merge($classe, $this->classes[$url]); $this->count[] = $url; }
/** * Parses the given file into a SimpleXMLElement * Does NOT validate the file (much faster) * * @param string $file * @return SimpleXMLElement */ protected function parseFile($file) { $dom = new \DOMDocument(); libxml_use_internal_errors(true); if (!$dom->load($file, LIBXML_COMPACT)) { throw new \Exception(implode("\n", $this->getXmlErrors())); } $dom->validateOnParse = true; $dom->normalizeDocument(); libxml_use_internal_errors(false); return simplexml_import_dom($dom); }
public function cleanup() { $paragraphNodeList = $this->getParagraphNodeList(); /** @var $paragraphNode \DOMNode */ foreach ($paragraphNodeList as $paragraphNode) { $clonedParagraphNode = $paragraphNode->cloneNode(true); // fixed missing paragraph props element $runNodeList = $this->getRunNodeList($clonedParagraphNode); $runIndex = 0; $currentRunNode = $runNodeList->item($runIndex); $runIndex += 1; $nextRunNode = $runNodeList->item($runIndex); while ($currentRunNode) { if ($nextRunNode !== null) { $isEqual = $this->deepEqual($this->getPropertyNode($currentRunNode), $this->getPropertyNode($nextRunNode)); if ($isEqual === true) { $nextValueNode = $this->getValueNode($nextRunNode); $currentValueNode = $this->getValueNode($currentRunNode); if ($nextValueNode !== null && $currentValueNode !== null) { // fixme libreoffice docx quick fix $appendTextNode = $this->document->createTextNode($nextValueNode->textContent); $currentValueNode->appendChild($appendTextNode); } $clonedParagraphNode->removeChild($nextRunNode); } else { $currentRunNode = $nextRunNode; } // even if we remove element from document node list still contains it, so jump on next $runIndex += 1; $nextRunNode = $runNodeList->item($runIndex); } else { $currentRunNode = $nextRunNode; } } $paragraphNode->parentNode->replaceChild($clonedParagraphNode, $paragraphNode); } // merge appended text nodes $this->document->normalizeDocument(); }
public function renderDOM($json) { $page = new DOMDocument(); $page->normalizeDocument(); $page->formatOutput = true; $html = $page->createElement('html'); $head = $page->createElement('head'); $title = $page->createElement('title'); $bootstrap = $page->createElement('link'); $body = $page->createElement('body'); $title_text = $page->createTextNode($json["title"]); $title->appendChild($title_text); $meta = $page->createElement('meta'); $meta->setAttribute("http-equiv", "Content-Type"); $meta->setAttribute("content", "text/html; charset=utf-8"); $meta = $head->appendChild($meta); $bootstrap->setAttribute("rel", "stylesheet"); $bootstrap->setAttribute("href", $this->domain . "/assets/css/bootstrap/bootstrap.css"); $head->appendChild($title); $head->appendChild($bootstrap); $html->appendChild($head); $html->appendChild($body); $page->appendChild($html); $countB = count($json["body"]); for ($i = 0; $i < $countB; $i++) { $tempEl = $page->createElement($json["body"][$i]["tag"]); $countAttr = isset($json["body"][$i]["attribute"]) ? count($json["body"][$i]["attribute"]) : 0; for ($j = 0; $j < $countAttr; $j++) { $tempEl->setAttribute($json["body"][$i]["attribute"][$j][0], $json["body"][$i]["attribute"][$j][1]); if ($json["body"][$i]["attribute"][$j][0] == "id") { $tempEl->setIdAttribute($json["body"][$i]["attribute"][$j][0], true); } } if (!$tempEl->hasAttribute("id")) { $tempEl->setAttribute("id", $json["body"][$i]["tag"] . $body->getElementsByTagName($json["body"][$i]["tag"])->length); } if (isset($json["body"][$i]["text"])) { $tempEl->appendChild($page->createTextNode($json["body"][$i]["text"])); } if (isset($json["body"][$i]["parent"])) { $parent = $page->getElementById($json["body"][$i]["parent"]); $parent->appendChild($tempEl); } else { $body->appendChild($tempEl); } } $html->appendChild($head); $html->appendChild($body); $page->appendChild($html); echo "<!DOCTYPE html>" . html_entity_decode($page->saveHTML()); }
public function __construct($document, $tagname = '*') { if (!is_scalar($document)) { throw new Exception('Not a valid {JQMDoc} object'); } $this->_namespace = microtime(true) . uniqid(); jqm_var($this->_namespace, $this); //$document = preg_replace('/\s+/',' ',$document); //Detect if $document is a valid full document $hasHTML = stripos($document, '<html') !== false; $this->__documentMap = array(); $DOM = new DOMDocument(); $DOM->recover = true; $DOM->preserveWhiteSpace = true; $DOM->substituteEntities = true; $DOM->formatOutput = true; $DOM->encoding = 'utf-8'; $DOM->loadHTML(mb_convert_encoding($document, 'HTML-ENTITIES', 'UTF-8')); $DOM->normalizeDocument(); $html = $DOM->getElementsByTagName($tagname); //Determine root / pieced map $hasRoot = false; if ($html->item(0)->childNodes->length > 0) { $hasRoot = false; $html_tmp = $html; if ($html->item(0)->tagName == 'html') { $html_tmp = $html->item(0)->childNodes->item(0); } if (!$hasHTML and $html_tmp->childNodes->length == 1) { $hasRoot = true; if ($html_tmp->childNodes->item(0)->firstChild) { $root = $html_tmp->childNodes->item(0)->firstChild->getNodePath(); } else { $root = $html_tmp->childNodes->item(0)->getNodePath(); } } } $this->__schema['root'] = $hasRoot; $this->__schema['rootPath'] = $root; $this->__mapLength = false; $this->_length = false; $this->length = false; if ($DOM->doctype) { //$this->__schema['doctype'] = $DOM->saveHTML($DOM->doctype); $DOM->removeChild($DOM->doctype); } //$output = $DOM->saveHTML(); //$this->__documentRaw = $output; $this->_DOM = $DOM; $this->_selector = $tagname; }
/** * Compile document DOM to string view * @param bool $a_format Format result * @return string */ public function compile($a_format = false) { $d = parent::getData(); if ($d) { return ZXmlEncoder::encode($d); } $this->m_document->preserveWhiteSpace = false; $this->m_document->recover = false; $f = $this->m_document->formatOutput; $this->m_document->formatOutput = (bool) $a_format; $this->m_document->normalizeDocument(); $return = $this->m_document->saveXML(); $this->m_document->formatOutput = $f; return $return; }
/** * Validates and parses the given file into a SimpleXMLElement * * @param string $file * @return SimpleXMLElement */ protected function parseFile($file) { $dom = new \DOMDocument(); libxml_use_internal_errors(true); if (!$dom->load($file, LIBXML_COMPACT)) { throw new \Exception(implode("\n", $this->getXmlErrors())); } if (!$dom->schemaValidate(__DIR__ . '/schema/dic/xliff-core/xliff-core-1.2-strict.xsd')) { throw new \Exception(implode("\n", $this->getXmlErrors())); } $dom->validateOnParse = true; $dom->normalizeDocument(); libxml_use_internal_errors(false); return simplexml_import_dom($dom); }
/** * Compile document DOM to string view * @param bool $a_format Format result * @return string */ public function compile($a_format = false) { $d = parent::getData(); if ($d) { return Zoombi_Xml::encode($d); } $this->m_document->preserveWhiteSpace = $this->m_flags['whitespace']; $this->m_document->recover = $this->m_flags['recover']; $f = $this->m_document->formatOutput; $this->m_document->formatOutput = $this->m_flags['format']; if ($this->m_flags['normalize']) { $this->m_document->normalizeDocument(); } $return = $this->m_document->saveXML(); $this->m_document->formatOutput = $f; return $return; }
public function testGenerateTransferInitiatorDetailsWithOfiIdentifier() { $webshopArticle = new WebshopArticle("Toaster", 1, 15000); $transferMsgDetails = new TransferMsgDetails("http://10.18.70.8:7001/vendorconfirmation", "http://10.18.70.8:7001/transactionok?danke.asp", "http://10.18.70.8:7001/transactionnok?fehler.asp"); $transferMsgDetails->TargetWindowNok = $transferMsgDetails->TargetWindowOk = 'Mustershop'; $data = new TransferInitiatorDetails('AKLJS231534', 'topSecret', 'GAWIATW1XXX', 'Max Mustermann', 'AT611904300234573201', '1234567890ABCDEFG', 15000, $transferMsgDetails, '2007-03-16'); $data->RemittanceIdentifier = 'AT1234567890XYZ'; $data->WebshopArticles[] = $webshopArticle; $data->OrderingCustomerOfiIdentifier = 'TESTBANKXXX'; $aSimpleXml = $data->GetSimpleXml(); $eDom = new \DOMDocument(); $eDom->loadXML($this->GetEpsData('TransferInitiatorDetailsWithoutSignatureAndOrderingCustomerOfiIdentifier.xml')); $eDom->formatOutput = true; $eDom->preserveWhiteSpace = false; $eDom->normalizeDocument(); $this->assertEquals($eDom->saveXML(), $aSimpleXml->asXML()); }
protected function installLanguage($def, $dir) { $this->checkRequirements($def); $this->definition = new DOMDocument(); $this->definition->formatOutput = true; $this->definition->preserveWhiteSpace = false; $this->definition->appendChild($this->definition->createElement('SobiProApp')); $Install = $this->definition->createElement('installLog'); $Files = $this->definition->createElement('files'); $filesLog = array(); $this->id = $def->getElementsByTagName('tag')->item(0)->nodeValue; if ($def->getElementsByTagName('administration')->length) { $this->langFiles('administration', $def, $dir, $filesLog); } if ($def->getElementsByTagName('site')->length) { $this->langFiles('site', $def, $dir, $filesLog); } $this->storeData('language', $def); $dir = SPLoader::dirPath('etc.installed.languages', 'front', false); if (!SPFs::exists($dir)) { SPFs::mkdir($dir); } foreach ($filesLog as $file) { $Files->appendChild($this->definition->createElement('file', $file)); } $Install->appendChild($Files); $root = $this->definition->getElementsByTagName('SobiProApp')->item(0); $root->appendChild($this->definition->createElement('id', $this->id)); $root->appendChild($this->definition->createElement('type', 'language')); $root->appendChild($this->definition->createElement('name', $def->getElementsByTagName('name')->item(0)->nodeValue)); $root->appendChild($Install); $this->definition->appendChild($root); $path = "{$dir}/{$this->id}.xml"; $file = SPFactory::Instance('base.fs.file', $path); $this->definition->normalizeDocument(); $file->content($this->definition->saveXML()); $file->save(); if (!$this->error) { return array('msg' => Sobi::Txt('LANG_INSTALLED', $def->getElementsByTagName('name')->item(0)->nodeValue), 'msgtype' => SPC::SUCCESS_MSG); } else { return array('msg' => Sobi::Txt('LANG_INSTALLED', $def->getElementsByTagName('name')->item(0)->nodeValue) . "\n" . $this->error, 'msgtype' => $this->errorType); } }
/** * Performs processing on the email content to make CSS styles inline. This * wraps the emogrified library, but extracts external an inline css * defitions. * * @param string $content * @return string */ protected function emogrify($content) { require_once 'emogrifier/emogrifier.php'; // order here is seemingly important; 'tidy' seems to strip stuff important for detecting encoding?? $encoding = mb_detect_encoding($content); $content = $this->tidy($content, $encoding); $content = mb_convert_encoding($content, 'HTML-ENTITIES', $encoding); $emog = new Emogrifier($content); $css = array(); if (!$encoding) { $encoding = 'UTF-8'; } $document = new DOMDocument(); $document->encoding = $encoding; $document->strictErrorChecking = false; // some versions of tidy don't remove duplicate attrs libxml_use_internal_errors(true); $document->loadHTML($content); $document->normalizeDocument(); $xpath = new DOMXPath($document); foreach ($xpath->query("//link[@rel='stylesheet']") as $link) { $media = $link->getAttribute('media'); $file = $this->findCSSFile($link->getAttribute('href')); if (file_exists($file)) { $contents = trim(file_get_contents($file)); if ($contents && (!$media || in_array($media, array('all', 'screen')))) { $css[] = $contents; } } } foreach ($xpath->query('//style') as $style) { $type = $style->getAttribute('type'); $content = trim($style->textContent); if ($content && (!$type || $type == 'text/css')) { $css[] = $content; } } $emog->setCSS(implode("\n", $css)); $content = $emog->emogrify(); // clean up crap from emogrify $content = str_replace('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">', '', $content); return $content; }
/** * @param string $sText * @param string $sHtmlAttrs = '' * @param string $sBodyAttrs = '' * * @return \DOMDocument|bool */ public static function GetDomFromText($sText, $sHtmlAttrs = '', $sBodyAttrs = '') { $bState = true; if (\MailSo\Base\Utils::FunctionExistsAndEnabled('libxml_use_internal_errors')) { $bState = \libxml_use_internal_errors(true); } $oDom = new \DOMDocument('1.0', 'utf-8'); $oDom->encoding = 'UTF-8'; $oDom->strictErrorChecking = false; $oDom->formatOutput = false; @$oDom->loadHTML('<' . '?xml version="1.0" encoding="utf-8"?' . '>' . '<html ' . $sHtmlAttrs . '><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head><body ' . $sBodyAttrs . '>' . $sText . '</body></html>'); @$oDom->normalizeDocument(); if (\MailSo\Base\Utils::FunctionExistsAndEnabled('libxml_use_internal_errors')) { @\libxml_clear_errors(); } if (\MailSo\Base\Utils::FunctionExistsAndEnabled('libxml_use_internal_errors')) { \libxml_use_internal_errors($bState); } return $oDom; }
/** * Validates and parses the given file into a SimpleXMLElement * * @param string $file * * @return SimpleXMLElement */ private function parseFile($file) { $internalErrors = libxml_use_internal_errors(true); $disableEntities = libxml_disable_entity_loader(true); libxml_clear_errors(); $dom = new \DOMDocument(); $dom->validateOnParse = true; if (!@$dom->loadXML(file_get_contents($file), LIBXML_NONET | (defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0))) { libxml_disable_entity_loader($disableEntities); throw new \RuntimeException(implode("\n", $this->getXmlErrors($internalErrors))); } libxml_disable_entity_loader($disableEntities); foreach ($dom->childNodes as $child) { if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { libxml_use_internal_errors($internalErrors); throw new \RuntimeException('Document types are not allowed.'); } } $location = str_replace('\\', '/', __DIR__) . '/schema/dic/xliff-core/xml.xsd'; $parts = explode('/', $location); if (0 === stripos($location, 'phar://')) { $tmpfile = tempnam(sys_get_temp_dir(), 'sf2'); if ($tmpfile) { copy($location, $tmpfile); $parts = explode('/', str_replace('\\', '/', $tmpfile)); } } $drive = '\\' === DIRECTORY_SEPARATOR ? array_shift($parts) . '/' : ''; $location = 'file:///' . $drive . implode('/', array_map('rawurlencode', $parts)); $source = file_get_contents(__DIR__ . '/schema/dic/xliff-core/xliff-core-1.2-strict.xsd'); $source = str_replace('http://www.w3.org/2001/xml.xsd', $location, $source); if (!@$dom->schemaValidateSource($source)) { throw new \RuntimeException(implode("\n", $this->getXmlErrors($internalErrors))); } $dom->normalizeDocument(); libxml_use_internal_errors($internalErrors); return simplexml_import_dom($dom); }
static function addOutLinks($str) { $dom = new DOMDocument(); //libxml_use_internal_errors(true); $dom->loadHTML("<html><body>{$str}</body></html>"); $dom->normalizeDocument(); $xpath = new DOMXPath($dom); $hrefs = $xpath->query("/html/body//a"); if ($hrefs->length) { for ($i = 0; $i < $hrefs->length; $i++) { $content = str_replace(array('\\n', '\\r'), '', $hrefs->item($i)->textContent); $normalized_content = self::normalize($content); $url = $hrefs->item($i)->getAttribute('href'); $normalized_url = self::normalize($url); if ($normalized_content == $normalized_url) { $text = new DOMText("[{$normalized_url}]"); } else { $text = new DOMText("{$content} [{$normalized_url}]"); } $hrefs->item($i)->parentNode->replaceChild($text, $hrefs->item($i)); } } return rtrim(str_replace(array(self::$HTMLDOCTYPE, "<html><body>", "</body></html>"), '', $dom->saveHTML())); }
public function parseError($content) { if (!$content) { throw new ApiParserException('Could not transform this xml to a \\DOMDocument instance.'); } $internalErrors = libxml_use_internal_errors(true); $disableEntities = libxml_disable_entity_loader(true); libxml_clear_errors(); $document = new \DOMDocument(); $document->validateOnParse = true; if (!$document->loadXML($content, LIBXML_NONET | (defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0))) { libxml_disable_entity_loader($disableEntities); libxml_clear_errors(); libxml_use_internal_errors($internalErrors); throw new ApiParserException('Could not transform this xml to a \\DOMDocument instance.'); } $document->normalizeDocument(); libxml_use_internal_errors($internalErrors); libxml_disable_entity_loader($disableEntities); $xpath = new \DOMXpath($document); $nodes = $xpath->evaluate('./error'); if (1 === $nodes->length) { throw new ApiParserException('The dom contains more than one error node.'); } $error = new Error(); $parameters = $xpath->query('./entity/body/parameter', $nodes->item(0)); foreach ($parameters as $parameter) { $name = $parameter->getAttribute('name'); $error->addEntityBodyParameter($name); $messages = $xpath->query('./message', $parameter); foreach ($messages as $message) { $error->addEntityBodyParameterError($name, $this->sanitizeValue($message->nodeValue)); } } return $error; }
/** * Returns the normalized, whitespace-cleaned, and indented textual * representation of a DOMDocument. * * @param DOMDocument $document * @return string */ protected function domToText(DOMDocument $document) { $document->formatOutput = TRUE; $document->normalizeDocument(); return $document->saveXML(); }
/** * @throws \InvalidArgumentException When loading of XML file returns error */ protected function loadFile($path) { $dom = new \DOMDocument(); libxml_use_internal_errors(true); if (!$dom->load($path, LIBXML_COMPACT)) { throw new \InvalidArgumentException(implode("\n", $this->getXmlErrors())); } $dom->validateOnParse = true; $dom->normalizeDocument(); libxml_use_internal_errors(false); $this->validate($dom, $path); return $dom; }
/** * Parses a XML file. * * @param string $file Path to a file * * @throws \InvalidArgumentException When loading of XML file returns error */ private function parseFile($file) { $dom = new \DOMDocument(); libxml_use_internal_errors(true); if (!$dom->load($file, defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0)) { throw new \InvalidArgumentException(implode("\n", $this->getXmlErrors())); } $dom->validateOnParse = true; $dom->normalizeDocument(); libxml_use_internal_errors(false); $this->validate($dom, $file); return simplexml_import_dom($dom, 'Symfony\\Component\\DependencyInjection\\SimpleXMLElement'); }
/** * Creates a DOMDocument instance with the current HTML. * * @return \DOMDocument */ private function createXmlDocument() { $xmlDocument = new \DOMDocument(); $xmlDocument->encoding = 'UTF-8'; $xmlDocument->strictErrorChecking = false; $xmlDocument->formatOutput = true; $libXmlState = libxml_use_internal_errors(true); $xmlDocument->loadHTML($this->getUnifiedHtml()); libxml_clear_errors(); libxml_use_internal_errors($libXmlState); $xmlDocument->normalizeDocument(); return $xmlDocument; }
/** * Parses a XML file. * * @param string $file Path to a file * * @throws \InvalidArgumentException When loading of XML file returns error */ private function parseFile($file) { $internalErrors = libxml_use_internal_errors(true); $disableEntities = libxml_disable_entity_loader(true); libxml_clear_errors(); $dom = new \DOMDocument(); $dom->validateOnParse = true; if (!$dom->loadXML(file_get_contents($file), LIBXML_NONET | (defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0))) { libxml_disable_entity_loader($disableEntities); throw new \InvalidArgumentException(implode("\n", $this->getXmlErrors($internalErrors))); } $dom->normalizeDocument(); libxml_use_internal_errors($internalErrors); libxml_disable_entity_loader($disableEntities); foreach ($dom->childNodes as $child) { if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { throw new \InvalidArgumentException('Document types are not allowed.'); } } $this->validate($dom, $file); return simplexml_import_dom($dom, 'Symfony\\Component\\DependencyInjection\\SimpleXMLElement'); }
/** * Parse a XML File. * * @param string $file Path of file * * @return SimpleXMLElement * * @throws MappingException */ protected function parseFile($file) { $internalErrors = libxml_use_internal_errors(true); $disableEntities = libxml_disable_entity_loader(true); libxml_clear_errors(); $dom = new \DOMDocument(); $dom->validateOnParse = true; if (!$dom->loadXML(file_get_contents($file), LIBXML_NONET | (defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0))) { libxml_disable_entity_loader($disableEntities); throw new MappingException(implode("\n", $this->getXmlErrors($internalErrors))); } libxml_disable_entity_loader($disableEntities); if (!$dom->schemaValidate(__DIR__ . '/schema/dic/constraint-mapping/constraint-mapping-1.0.xsd')) { throw new MappingException(implode("\n", $this->getXmlErrors($internalErrors))); } $dom->normalizeDocument(); libxml_use_internal_errors($internalErrors); foreach ($dom->childNodes as $child) { if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { throw new MappingException('Document types are not allowed.'); } } return simplexml_import_dom($dom); }
public static function getXPath(&$bodyHtml, &$r) { global $wgWikiHowSections, $IP, $wgTitle; $lang = MobileWikihow::getSiteLanguage(); // munge steps first $opts = array('no-ads' => true); require_once "{$IP}/skins/WikiHowSkin.php"; $oldTitle = $wgTitle; $wgTitle = $r->getTitle(); $vars['bodyHtml'] = WikihowArticleHTML::postProcess($bodyHtml, $opts); $vars['lang'] = $lang; EasyTemplate::set_path(dirname(__FILE__) . '/'); $html = EasyTemplate::html('thumb_html.tmpl.php', $vars); require_once "{$IP}/extensions/wikihow/mobile/JSLikeHTMLElement.php"; $doc = new DOMDocument('1.0', 'utf-8'); $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); $doc->strictErrorChecking = false; $doc->recover = true; @$doc->loadHTML($html); $doc->normalizeDocument(); $xpath = new DOMXPath($doc); $wgTitle = $oldTitle; return $xpath; }
/** * Set the data to use * @param string $data Data to put in the file */ public function setData($data) { //$data = utf8_encode($data); $data = CharacterEntities::convert($data); //$data = utf8_decode($data); //$this->source = iconv('UTF-8', 'ISO-8859-1//TRANSLIT', $data); $images = array(); // image handling stuff $dom = new DOMDocument(); $dom->loadHTML($data) or die($data); $dom->normalizeDocument(); //exit(); $savedImages = array(); $imgElements = $dom->getElementsByTagName('img'); foreach ($imgElements as $img) { $src = $img->getAttribute("src"); $is_root = false; if (substr($src, 0, 1) == "/") { $is_root = true; } /*$parsed = parse_url($src); if(!isset($parsed["host"])){ if($is_root){ $src = http_build_url($url, $parsed, HTTP_URL_REPLACE); }else{ $src = http_build_url($url, $parsed, HTTP_URL_JOIN_PATH); } }*/ $img->setAttribute("src", ""); if (isset($savedImages[$src])) { $img->setAttribute("recindex", $savedImages[$src]); } else { $image = ImageHandler::DownloadImage($src); if ($image !== false) { $images[$this->imgCounter] = new FileRecord(new Record($image)); $img->setAttribute("recindex", $this->imgCounter); $savedImages[$src] = $this->imgCounter; $this->imgCounter++; } } } $this->images = $images; //end image stuff $data = $dom->saveXML(); $data = str_replace("<pagebreak/>", "<mbp:pagebreak/>", $data); $data = str_replace("<pagebreak></pagebreak>", "<mbp:pagebreak/>", $data); // echo $data; // print_r($this->images); $this->source = $data; $this->prc = false; }