Esempio n. 1
0
 public function parseXML($data, $params = array(), $fetchData = false, $silent = false)
 {
     # this supports <?  > and <! ... > ... tags as auto-closure comment tags
     if (!isset($params[C_XML_RAW])) {
         $params[C_XML_RAW] = false;
     }
     # Forces XHTML mode if not specified
     if (!isset($params[C_XML_AUTOPARSE])) {
         $params[C_XML_AUTOPARSE] = $fetchData;
     } else {
         if ($fetchData) {
             $params[C_XML_AUTOPARSE] = true;
         }
     }
     # we must parse the parameters to get ids, classes and links to the $parsedContent
     if (!isset($params[C_XML_LAX])) {
         $params[C_XML_LAX] = false;
     }
     # LAX mode will ignore auto-close tags that are not closed, and will automatically fix incorrectly opened/closed tags
     if ($params[C_XML_RAW]) {
         $fetchData = false;
         # pointless with XML
     }
     if ($fetchData) {
         $this->parsedContent = array(C_XHTML_IDS => array(), C_XHTML_CLASSES => array(), C_XHTML_DUPLICATES => array(), C_XHTML_LINKS => array(), C_XHTML_SRCS => array(), C_XHTML_NAMES => array());
     }
     $autoparse = $params[C_XML_AUTOPARSE];
     # will parse the tag parameters to an array
     $laxClosure = $params[C_XML_LAX];
     # Ignore errors in HTML, such as incorrect open/close tags
     $keepcase = isset($params[C_XML_KEEPCASE]);
     $isXHTML = false;
     # The last tag to be closed had XHTML content (false means only raw text)
     $emptyParams = $autoparse ? array() : "";
     $pos = 0;
     # position in file
     $buffer = "";
     # what is being processed
     $data = str_replace("\r", "\n", $data);
     # remove \r, we don't need them (Windows \n\r)
     $data = str_replace("\n\n", "\n", $data);
     # now remove double \n caused by \r removal (or redundant \n anyway)
     $total = strlen($data);
     # html size
     $intag = false;
     # we are inside a < >
     $intags = array();
     # queue of nested tags
     $incode = false;
     # we are inside a code (C_XHTML_CODE)
     $saida = new ttree();
     # what we will generate as output
     # nest result in this node (remember the XHTML will be nested on this)
     $saida->addbranch(array(0 => $params[C_XML_RAW] ? "xml" : "xhtml", 1 => $emptyParams, 2 => ""));
     $branch =& $saida->lastsibling();
     $closure = "";
     # if inside a literal, which closure was used, " or '
     $line = 1;
     # current line for debug purposes
     while ($pos < $total) {
         $car = $data[$pos];
         if ($car == "\n") {
             $line++;
         }
         if ($intag) {
             if ($car == ">" && $closure == "") {
                 $intag = false;
                 // CLOSURE -----------------------------------------------------------
                 if ($buffer[0] == "/") {
                     // closes, checking consistency
                     $tagClose = substr($buffer, 1);
                     $nextClose = count($intags) > 0 ? $intags[count($intags) - 1] : '';
                     $thisClose = strtoupper($tagClose);
                     if ($thisClose == $nextClose) {
                         // fine, back to previous node
                         array_pop($intags);
                         if (!$isXHTML && count($branch->branchs) == 1 && $branch->branchs[0]->data[2] != "" && $branch->branchs[0]->data[0] == "") {
                             # we had no XHTML inside this tag, so we can compact it!
                             $branch->data[2] = $branch->branchs[0]->data[2];
                             $branch->branchs = array();
                             $branch =& $branch->parent;
                         } else {
                             $branch =& $branch->parent;
                         }
                         $isXHTML = true;
                     } else {
                         if (!$laxClosure) {
                             // incorrect close tag, and we are running on strict mode
                             if (!$silent) {
                                 echo "XML:Tag mismatch at {$buffer} expecting {$nextClose} @ line " . $line . " (parent was " . $branch->parent->data[0] . ")";
                                 #echo $data;
                             }
                             return false;
                         } else {
                             // incorrect close tag, but we are allowed to auto-close it. If this closure is an EXTRA closure, this will cause an error
                             // we search if this tag exists to be closed, if so, close all of them, if not, ignore this closure
                             $located = false;
                             for ($tp = count($intags) - 2; $tp > -1; $tp--) {
                                 if ($thisClose == $intags[$tp]) {
                                     $located = true;
                                     break;
                                 }
                             }
                             if ($located) {
                                 $isXHTML = true;
                                 $autoClose = count($intags) - $tp;
                                 # we will close all this tags
                                 for ($c = 0; $c < $autoClose; $c++) {
                                     array_pop($intags);
                                     $branch =& $branch->parent;
                                 }
                             }
                         }
                     }
                     if (isset($this->xhtmlcode[$thisClose])) {
                         $incode = false;
                     }
                     // Auto-closing tag ----------------------------------------------------------
                 } else {
                     if ($buffer[strlen($buffer) - 1] == "/" || $buffer[strlen($buffer) - 1] == "?" && $buffer[0] == "?") {
                         $buffer = substr($buffer, 0, strlen($buffer) - 1);
                         # remove auto close
                         $tag = explode(" ", $buffer);
                         $tag = $tag[0];
                         $utag = strtoupper($tag);
                         $buffer = trim(substr($buffer, strlen($tag) + 1));
                         $data_do_branch = array(0 => $keepcase ? $tag : $utag, 1 => $buffer, 2 => '');
                         if ($autoparse) {
                             $data_do_branch[1] = $this->parseparams($buffer);
                             if ($fetchData && $utag != "PARAM" && !$incode) {
                                 $this->refreshData($data_do_branch[1]);
                             }
                         }
                         $branch->addbranch($data_do_branch);
                         // Opening TAG -------------------------------------------------------------
                     } else {
                         if ($buffer[0] == "!" || $buffer[0] == "?") {
                             // stand-alone comment tag, consider it auto-close but don't parse
                             $tag = explode(" ", $buffer);
                             $tag = $tag[0];
                             $utag = strtoupper($tag);
                             $buffer = substr($buffer, strlen($tag) + 1);
                             $data_do_branch = array(0 => $tag, 1 => $buffer, 2 => '');
                             $branch->addbranch($data_do_branch);
                         } else {
                             // separates parameters
                             $tag = explode(" ", $buffer);
                             $tag = $tag[0];
                             $utag = strtoupper($tag);
                             if (!$params[C_XML_RAW] && isset($this->xhtmlacl[$utag])) {
                                 # this should be a auto-close tag!
                                 if ($laxClosure) {
                                     $buffer = trim(substr($buffer, strlen($tag) + 1));
                                     $data_do_branch = array(0 => $keepcase ? $tag : $utag, 1 => $buffer, 2 => '');
                                     if ($autoparse) {
                                         $data_do_branch[1] = $this->parseparams($buffer);
                                         if ($fetchData && $utag != "PARAM" && !$incode) {
                                             $this->refreshData($data_do_branch[1]);
                                         }
                                     } else {
                                         if ($buffer == "") {
                                             $buffer = $emptyParams;
                                         }
                                     }
                                     $branch->addbranch($data_do_branch);
                                 } else {
                                     if (!$silent) {
                                         echo "XML: Auto-close tag not closed at line {$line}: {$tag}";
                                     }
                                     return false;
                                 }
                             } else {
                                 $buffer = trim(substr($buffer, strlen($tag) + 1));
                                 array_push($intags, $utag);
                                 $data_do_branch = array(0 => $keepcase ? $tag : $utag, 1 => $buffer, 2 => '');
                                 if ($autoparse) {
                                     $data_do_branch[1] = $this->parseparams($buffer);
                                     if ($fetchData && !$incode) {
                                         $this->refreshData($data_do_branch[1]);
                                     }
                                 } else {
                                     if ($buffer == "") {
                                         $buffer = $emptyParams;
                                     }
                                 }
                                 $branch->addbranch($data_do_branch);
                                 $branch =& $branch->lastsibling();
                             }
                         }
                         if (isset($this->xhtmlcode[$utag])) {
                             $incode = true;
                         }
                         $isXHTML = false;
                     }
                 }
                 // ----------------------------------------------------------------------
                 $buffer = "";
             } else {
                 if ($car != "\n") {
                     if ($closure == "" && $car == "\t") {
                         $car = " ";
                     } else {
                         if ($closure == "" && ($car == "\"" || $car == "'")) {
                             $closure = $car;
                         } else {
                             if ($closure == $car) {
                                 $closure = "";
                             }
                         }
                     }
                     $buffer .= $car;
                 } else {
                     $buffer .= " ";
                     // converts \r or \n to " " (\r was hard-converted to \n before)
                 }
             }
         } else {
             if ($car == "<") {
                 if ($pos == $total - 1 || $data[$pos + 1] == ">" || preg_match("/^([\n\r\t ='\"<]+)\$/", $data[$pos + 1])) {
                     // do not count as a tag on these cases
                     $buffer .= "<";
                 } else {
                     if ($buffer != "" && !preg_match("/^([\n\r\t ]+)\$/", $buffer)) {
                         # ignore empty strings comprised only or these characters (do not add to xml)
                         $data_do_branch = array(0 => "", 1 => $emptyParams, 2 => preg_replace("/([\t ])+/", " ", $buffer));
                         $branch->addbranch($data_do_branch);
                     }
                     $buffer = "";
                     if (substr($data, $pos, 4) == "<!--" || substr($data, $pos, 2) == "<?") {
                         $end = substr($data, $pos, 4) == "<!--" ? strpos($data, "-->", $pos) + 3 : strpos($data, "?>", $pos) + 2;
                         $buffer .= substr($data, $pos, $end - $pos);
                         $pos = $end - 1;
                         if (isset($data[$pos + 1]) && $data[$pos + 1] == "\n") {
                             $buffer .= "\n";
                             $pos++;
                         }
                         if (!isset($params[C_XML_REMOVECOMMENTS]) || $incode) {
                             # only add comments if set NOT to remove or we are in code
                             $data_do_branch = array(0 => "", 1 => $emptyParams, 2 => $buffer);
                             $branch->addbranch($data_do_branch);
                         }
                         $buffer = "";
                     } else {
                         $intag = true;
                     }
                     $closure = "";
                 }
             } else {
                 $buffer .= $car;
             }
         }
         $pos++;
     }
     if ($buffer != "" && !preg_match("/^([\n\r\t ]+)\$/", $buffer)) {
         # ignore empty strings comprised only or these characters
         $data_do_branch = array(0 => "", 1 => $emptyParams, 2 => preg_replace("/([\t ])+/", " ", $buffer));
         $branch->addbranch($data_do_branch);
     }
     if (!$laxClosure && count($intags) > 0 && !$silent) {
         echo "XML: Tag not closed at " . array_pop($intags);
     }
     # this should never happen on LAX mode
     return $saida;
 }