public function parseXML($data, $params = array(), $fetchData = false, $silent = false) { # this supports <? > and <! ... > ... tags as auto-closure comment tags if (!isset($params[C_XML_RAW])) { $params[C_XML_RAW] = false; } # Forces XHTML mode if not specified if (!isset($params[C_XML_AUTOPARSE])) { $params[C_XML_AUTOPARSE] = $fetchData; } else { if ($fetchData) { $params[C_XML_AUTOPARSE] = true; } } # we must parse the parameters to get ids, classes and links to the $parsedContent if (!isset($params[C_XML_LAX])) { $params[C_XML_LAX] = false; } # LAX mode will ignore auto-close tags that are not closed, and will automatically fix incorrectly opened/closed tags if ($params[C_XML_RAW]) { $fetchData = false; # pointless with XML } if ($fetchData) { $this->parsedContent = array(C_XHTML_IDS => array(), C_XHTML_CLASSES => array(), C_XHTML_DUPLICATES => array(), C_XHTML_LINKS => array(), C_XHTML_SRCS => array(), C_XHTML_NAMES => array()); } $autoparse = $params[C_XML_AUTOPARSE]; # will parse the tag parameters to an array $laxClosure = $params[C_XML_LAX]; # Ignore errors in HTML, such as incorrect open/close tags $keepcase = isset($params[C_XML_KEEPCASE]); $isXHTML = false; # The last tag to be closed had XHTML content (false means only raw text) $emptyParams = $autoparse ? array() : ""; $pos = 0; # position in file $buffer = ""; # what is being processed $data = str_replace("\r", "\n", $data); # remove \r, we don't need them (Windows \n\r) $data = str_replace("\n\n", "\n", $data); # now remove double \n caused by \r removal (or redundant \n anyway) $total = strlen($data); # html size $intag = false; # we are inside a < > $intags = array(); # queue of nested tags $incode = false; # we are inside a code (C_XHTML_CODE) $saida = new ttree(); # what we will generate as output # nest result in this node (remember the XHTML will be nested on this) $saida->addbranch(array(0 => $params[C_XML_RAW] ? "xml" : "xhtml", 1 => $emptyParams, 2 => "")); $branch =& $saida->lastsibling(); $closure = ""; # if inside a literal, which closure was used, " or ' $line = 1; # current line for debug purposes while ($pos < $total) { $car = $data[$pos]; if ($car == "\n") { $line++; } if ($intag) { if ($car == ">" && $closure == "") { $intag = false; // CLOSURE ----------------------------------------------------------- if ($buffer[0] == "/") { // closes, checking consistency $tagClose = substr($buffer, 1); $nextClose = count($intags) > 0 ? $intags[count($intags) - 1] : ''; $thisClose = strtoupper($tagClose); if ($thisClose == $nextClose) { // fine, back to previous node array_pop($intags); if (!$isXHTML && count($branch->branchs) == 1 && $branch->branchs[0]->data[2] != "" && $branch->branchs[0]->data[0] == "") { # we had no XHTML inside this tag, so we can compact it! $branch->data[2] = $branch->branchs[0]->data[2]; $branch->branchs = array(); $branch =& $branch->parent; } else { $branch =& $branch->parent; } $isXHTML = true; } else { if (!$laxClosure) { // incorrect close tag, and we are running on strict mode if (!$silent) { echo "XML:Tag mismatch at {$buffer} expecting {$nextClose} @ line " . $line . " (parent was " . $branch->parent->data[0] . ")"; #echo $data; } return false; } else { // incorrect close tag, but we are allowed to auto-close it. If this closure is an EXTRA closure, this will cause an error // we search if this tag exists to be closed, if so, close all of them, if not, ignore this closure $located = false; for ($tp = count($intags) - 2; $tp > -1; $tp--) { if ($thisClose == $intags[$tp]) { $located = true; break; } } if ($located) { $isXHTML = true; $autoClose = count($intags) - $tp; # we will close all this tags for ($c = 0; $c < $autoClose; $c++) { array_pop($intags); $branch =& $branch->parent; } } } } if (isset($this->xhtmlcode[$thisClose])) { $incode = false; } // Auto-closing tag ---------------------------------------------------------- } else { if ($buffer[strlen($buffer) - 1] == "/" || $buffer[strlen($buffer) - 1] == "?" && $buffer[0] == "?") { $buffer = substr($buffer, 0, strlen($buffer) - 1); # remove auto close $tag = explode(" ", $buffer); $tag = $tag[0]; $utag = strtoupper($tag); $buffer = trim(substr($buffer, strlen($tag) + 1)); $data_do_branch = array(0 => $keepcase ? $tag : $utag, 1 => $buffer, 2 => ''); if ($autoparse) { $data_do_branch[1] = $this->parseparams($buffer); if ($fetchData && $utag != "PARAM" && !$incode) { $this->refreshData($data_do_branch[1]); } } $branch->addbranch($data_do_branch); // Opening TAG ------------------------------------------------------------- } else { if ($buffer[0] == "!" || $buffer[0] == "?") { // stand-alone comment tag, consider it auto-close but don't parse $tag = explode(" ", $buffer); $tag = $tag[0]; $utag = strtoupper($tag); $buffer = substr($buffer, strlen($tag) + 1); $data_do_branch = array(0 => $tag, 1 => $buffer, 2 => ''); $branch->addbranch($data_do_branch); } else { // separates parameters $tag = explode(" ", $buffer); $tag = $tag[0]; $utag = strtoupper($tag); if (!$params[C_XML_RAW] && isset($this->xhtmlacl[$utag])) { # this should be a auto-close tag! if ($laxClosure) { $buffer = trim(substr($buffer, strlen($tag) + 1)); $data_do_branch = array(0 => $keepcase ? $tag : $utag, 1 => $buffer, 2 => ''); if ($autoparse) { $data_do_branch[1] = $this->parseparams($buffer); if ($fetchData && $utag != "PARAM" && !$incode) { $this->refreshData($data_do_branch[1]); } } else { if ($buffer == "") { $buffer = $emptyParams; } } $branch->addbranch($data_do_branch); } else { if (!$silent) { echo "XML: Auto-close tag not closed at line {$line}: {$tag}"; } return false; } } else { $buffer = trim(substr($buffer, strlen($tag) + 1)); array_push($intags, $utag); $data_do_branch = array(0 => $keepcase ? $tag : $utag, 1 => $buffer, 2 => ''); if ($autoparse) { $data_do_branch[1] = $this->parseparams($buffer); if ($fetchData && !$incode) { $this->refreshData($data_do_branch[1]); } } else { if ($buffer == "") { $buffer = $emptyParams; } } $branch->addbranch($data_do_branch); $branch =& $branch->lastsibling(); } } if (isset($this->xhtmlcode[$utag])) { $incode = true; } $isXHTML = false; } } // ---------------------------------------------------------------------- $buffer = ""; } else { if ($car != "\n") { if ($closure == "" && $car == "\t") { $car = " "; } else { if ($closure == "" && ($car == "\"" || $car == "'")) { $closure = $car; } else { if ($closure == $car) { $closure = ""; } } } $buffer .= $car; } else { $buffer .= " "; // converts \r or \n to " " (\r was hard-converted to \n before) } } } else { if ($car == "<") { if ($pos == $total - 1 || $data[$pos + 1] == ">" || preg_match("/^([\n\r\t ='\"<]+)\$/", $data[$pos + 1])) { // do not count as a tag on these cases $buffer .= "<"; } else { if ($buffer != "" && !preg_match("/^([\n\r\t ]+)\$/", $buffer)) { # ignore empty strings comprised only or these characters (do not add to xml) $data_do_branch = array(0 => "", 1 => $emptyParams, 2 => preg_replace("/([\t ])+/", " ", $buffer)); $branch->addbranch($data_do_branch); } $buffer = ""; if (substr($data, $pos, 4) == "<!--" || substr($data, $pos, 2) == "<?") { $end = substr($data, $pos, 4) == "<!--" ? strpos($data, "-->", $pos) + 3 : strpos($data, "?>", $pos) + 2; $buffer .= substr($data, $pos, $end - $pos); $pos = $end - 1; if (isset($data[$pos + 1]) && $data[$pos + 1] == "\n") { $buffer .= "\n"; $pos++; } if (!isset($params[C_XML_REMOVECOMMENTS]) || $incode) { # only add comments if set NOT to remove or we are in code $data_do_branch = array(0 => "", 1 => $emptyParams, 2 => $buffer); $branch->addbranch($data_do_branch); } $buffer = ""; } else { $intag = true; } $closure = ""; } } else { $buffer .= $car; } } $pos++; } if ($buffer != "" && !preg_match("/^([\n\r\t ]+)\$/", $buffer)) { # ignore empty strings comprised only or these characters $data_do_branch = array(0 => "", 1 => $emptyParams, 2 => preg_replace("/([\t ])+/", " ", $buffer)); $branch->addbranch($data_do_branch); } if (!$laxClosure && count($intags) > 0 && !$silent) { echo "XML: Tag not closed at " . array_pop($intags); } # this should never happen on LAX mode return $saida; }