function ParseTag(&$tag, &$task) { // Parse Tag content and create construct // logs error itself to task // RET: // TAGPARSER_RET_OK continue // TAGPARSER_RET_ERR tag parse error, skip // this is a real char-by-char parser! $text =& $task->text; $pos =& $task->pos_act; $pos_start = $pos; $tagname = ''; $nowkey = ''; $nowval = ''; $arr = array(); $quot = ''; // ' or " $isesc = FALSE; // escape \ used for quotes! $isend = FALSE; // TRUE if endtag $mode = BBCODE_PARSE_START; // scan through string //echo 'POS:'.$pos."\n"; $textlen = strlen($text); while (++$pos < $textlen) { $char = substr($text, $pos, 1); //echo 'CHAR:'.$mode.':'.$char."\n"; // missing tag end, overflow prevention! if ($char === FALSE) { // cancel this tag $err = new ParserErrorContext('parser.err.tag.parsetag.endless'); $err->GrabContext($task); $task->ErrorPush($err); unset($err); //opt $tag->tag_end = $pos + 1; return TAGPARSER_RET_ERR; } // modes if ($mode == BBCODE_PARSE_START) { // switch to name in any case $mode = BBCODE_PARSE_NAME; if ($char == '/') { $isend = TRUE; continue; } // continue on name totally } if ($mode == BBCODE_PARSE_NAME) { if ($char == $this->tag_end || $char == '=' || $char == ' ') { if ($tagname == '') { // cancel this nameless tag $err = new ParserErrorContext('parser.err.tag.parsetag.noname'); $err->GrabContext($task); $task->ErrorPush($err); unset($err); //opt $tag->tag_end = $pos + 1; return TAGPARSER_RET_ERR; } } if ($char == $this->tag_end) { break; } if ($char == '=') { $mode = BBCODE_PARSE_EQUAL; $nowkey .= 'default'; continue; } if ($char == ' ') { $mode = BBCODE_PARSE_SPACE; continue; } // build tagname $tagname .= JString::strtolower($char); //echo 'TAG:'.$tagname."\n"; continue; } if ($mode == BBCODE_PARSE_SPACE) { if ($char == ' ') { continue; // eat up spaces } if ($char == $this->tag_end) { break; } $nowkey .= JString::strtolower($char); $mode = BBCODE_PARSE_KEY_OR_END; continue; } if ($mode == BBCODE_PARSE_KEY_OR_END) { if ($char == '=') { $mode = BBCODE_PARSE_EQUAL; continue; } if ($char == $this->tag_end) { $arr[$nowkey] = TRUE; break; } if ($char == ' ') { $arr[$nowkey] = TRUE; $nowkey = ''; $mode = BBCODE_PARSE_SPACE; continue; } $nowkey .= JString::strtolower($char); } if ($mode == BBCODE_PARSE_EQUAL) { $quot = ''; // quotescan " if ($char == '"') { $quot = '"'; $mode = BBCODE_PARSE_VALQUOT; continue; } // quotescan ' if ($char == '\'') { $quot = '\''; $mode = BBCODE_PARSE_VALQUOT; continue; } if ($char == ' ') { $arr[$nowkey] = TRUE; $nowkey = ''; $mode = BBCODE_PARSE_SPACE; continue; } if ($char == $this->tag_end) { $arr[$nowkey] = TRUE; break; } $nowval .= $char; $mode = BBCODE_PARSE_VAL; continue; } if ($mode == BBCODE_PARSE_VALQUOT) { if ($isesc) { $nowval .= $char; $isesc = FALSE; continue; } if ($char == '\\') { // ONE backspace //echo 'ESCAPE'."\n"; $isesc = TRUE; continue; } if ($char == $quot) { $arr[$nowkey] = $nowval; $nowkey = $nowval = ''; $mode = BBCODE_PARSE_SPACE; continue; } $nowval .= $char; continue; //opt } if ($mode == BBCODE_PARSE_VAL) { if ($char == ' ') { $arr[$nowkey] = $nowval; $nowkey = $nowval = ''; $mode = BBCODE_PARSE_SPACE; continue; } if ($char == $this->tag_end) { $arr[$nowkey] = $nowval; break; } $nowval .= $char; continue; //opt } } // end position points to tag closing //$pos_end = $pos; // create tag object if ($isend) { // no reference! //echo 'TAGEND:'.$tagname.':'.$pos_start.":".$pos_end."\n"; $tag = new ParserEventTagEnd($pos_start, $pos, $tagname); } else { // no reference! //echo 'TAG:'.$tagname.':'.$pos_start.":".$pos_end."\n"; $tag = new ParserEventTag($pos_start, $pos, $tagname); $tag->setOptions($arr); } // parser position after tag: this is next char $pos++; return TAGPARSER_RET_OK; }
function Parse(&$task) { # Parses Text for tag-based transformation of task text # remove=1 -> Remove Tags with illegal Content microtime_float(); // fast access $interpreter =& $task->interpreter; $skip = $task->dry; $remove = $task->drop_errtag; // output text is input text $text =& $task->text; // internal state pass $pos_act =& $task->pos_act; $pos_act = 0; $pos_encode_last =& $task->pos_encode_last; $pos_encode_last = 0; // encode as soon as a matching tag is executed $st =& $task->st; $st = array(); $sti = 0; // stackarr and TopPositionOfStack // scan for candidate of tag $textlen = strlen($text); $pos = 0; while ($pos < $textlen) { microtime_float(); // next tag candidate if ($interpreter->ParseNext($task) !== TAGPARSER_RET_OK) { break; // terminate event } // tag start detected - no further manipulation of inner functions except offset $tag_start = $pos_act; // verify escape, remove escape if ($interpreter->UnEscape($task) == TAGPARSER_RET_REPLACED) { // was escaped, cancel tag, did unescape, continue after pos // no further linear encoding - later on continue; // terminate event } // parse UNescaped STARTORENDTAG-Start [ found $tag = NULL; if ($interpreter->ParseTag($tag, $task) !== TAGPARSER_RET_OK) { # ERROR SEPARATE FROM UNSUPPORTED REMOVES $offset = 0; $this->RemoveOrEncode($offset, $task, $tag_start, $tag->tag_end, 'parsetag'); $pos_act += $offset; unset($offset); // opt continue; // continue (if needed) after tag } $tag_end = $tag->tag_end; #echo 'TAG:'.$tag_start.":".$tag_end.":".$tag->name; #echo "\n"; #var_dump($tag); #echo "\n"; // verify tag validity content if ($interpreter->CheckTag($task, $tag) !== TAGPARSER_RET_OK) { # ERROR SEPARATE FROM UNSUPPORTED REMOVES # we have found a syntactically correct tag, check semantics or remove # ERROR ENCODE WOULD BE WRONG!! (DOUBLE ENCODE LATER ON) $offset = 0; $this->RemoveOrEncode($offset, $task, $tag_start, $tag_end, 'checktag'); $pos_act += $offset; unset($offset); // opt continue; // continue (if needed) after tag } // now realtag! // linear encode till current tag $encode_len = $tag_start - $pos_encode_last; $textnew = ''; if (!$skip && $task->interpreter->Encode($textnew, $task, substr($text, $pos_encode_last, $encode_len), 'text') !== TAGPARSER_RET_NOTHING) { // Replaced $encode_diff = strlen($textnew) - $encode_len; $text = substr($text, 0, $pos_encode_last) . $textnew . substr($text, $tag_start); $tag->Offset($encode_diff); $tag_start += $encode_diff; $tag_end += $encode_diff; $pos_act += $encode_diff; unset($encode_diff); //opt #echo 'ENCODE:'.$pos_encode_last.":".$tag_start.":"; #echo "\n"; } unset($textnew); // opt $pos_encode_last = $tag_start; unset($encode_len); //opt // tag length $tag_len = $tag_end - $tag_start + 1; // [<5x<6]<7 7-5=2 // go tag events #echo 'CLASS:'.get_class($tag); #echo "\n"; #if(get_class($tag)=='ParserEventTagEnd') { #if(strget_class($tag)=='ParserEventTagEnd') { if (is_a($tag, 'ParserEventTagEnd')) { // ENDTAG found $i = $sti - 1; // seek tag on stack while ($i >= 0) { $temp = $st[$i]; if ($temp->name == $tag->name) { // $i representing index of starting tag break; } $i--; } unset($temp); // opt if ($i == -1) { // Tag not on Stack -> Ignore // endtag without start -- illegal in any case - no event $err = new ParserErrorContext('parser.err.tag.nostart'); $err->GrabContext($task, $tag); $task->ErrorPush($err); unset($err); //opt if ($remove) { // remove tag, continue on prev tagstart $text = substr($text, 0, $tag_start) . substr($text, $tag_end + 1); $pos_act = $tag_start; } else { // tag wrong, linear encoding follows! continue parsing after $pos_act = $tag_end + 1; } // option would be to encode it as tagremove continue; } // Tag on Stack at Pos $i -> reduce stack to $i IF needed! while ($sti > $i + 1) { // pop top --$sti; $starttag =& $st[$sti]; $starttag_len = $starttag->tag_end - $starttag->tag_start + 1; // late event $tag_new = NULL; if ($interpreter->TagSingleLate($tag_new, $task, $starttag) !== TAGPARSER_RET_NOTHING) { if ($skip) { continue; } // tag replacement $templen = strlen($tag_new) - $starttag_len; $text = substr($text, 0, $starttag->tag_start) . $tag_new . substr($text, $starttag->tag_end + 1); // marks are always behind tag! $tag->Offset($templen); $tag_start += $templen; $tag_end += $templen; $pos_act += $templen; unset($templen); //opt continue; } else { // bad tag on stack (open tag only) $err = new ParserErrorContext('parser.err.tag.remain'); $err->GrabContext($task, $starttag); $task->ErrorPush($err); unset($err); //opt $offset = 0; $this->RemoveOrEncode($offset, $task, $starttag->tag_start, $starttag->tag_end, 'unsupported'); // was tag from stack - so all indices are behind $tag->Offset($offset); $tag_start += $offset; $tag_end += $offset; $pos_encode_last += $offset; $pos_act += $offset; unset($offset); //opt continue; // continue (if needed) after tag } unset($starttag, $starttag_len); //opt } unset($i); //opt // Pop Top-Element (Actual) unset($st[$sti]); //opt $sti--; $starttag =& $st[$sti]; // TRY STD-&EXT-TAG-REPLACEMENT OR KILL TAGS (START & END) $tag_new = $tag_new_start = $tag_new_end = NULL; if ($task->interpreter->TagStandard($tag_new_start, $tag_new_end, $task, $starttag) !== TAGPARSER_RET_NOTHING) { if ($skip) { continue; } // length in between tags $midlen = $tag_start - $starttag->tag_end - 1; $text = substr($text, 0, $starttag->tag_start) . $tag_new_start . substr($text, $starttag->tag_end + 1, $midlen) . $tag_new_end . substr($text, $tag_end + 1); // To Starttag End $totallen = strlen($tag_new_start) + $midlen + strlen($tag_new_end); $pos_act = $starttag->tag_start + $totallen; // linear encoding continue after $pos_encode_last = $pos_act; unset($midlen, $totallen); //opt } else { if ($task->interpreter->TagExtended($tag_new, $task, $starttag, substr($text, $starttag->tag_end + 1, $tag_start - $starttag->tag_end - 1)) !== TAGPARSER_RET_NOTHING) { if ($skip) { continue; } $text = substr($text, 0, $starttag->tag_start) . $tag_new . substr($text, $tag_end + 1); $templen = strlen($tag_new); // linear encoding continue after $pos_encode_last = $pos_act = $starttag->tag_start + $templen; unset($templen); //opt } else { // UNSUPPORTED TAG $err = new ParserErrorContext('parser.err.tag.unsupported'); $err->GrabContext($task, $starttag); $task->ErrorPush($err); unset($err); //opt $offset_start = $offset_end = 0; $this->RemoveOrEncode($offset_end, $task, $tag_start, $tag_end, 'unsupported'); $this->RemoveOrEncode($offset_start, $task, $starttag->tag_start, $starttag->tag_end, 'unsupported'); $tag->Offset($offset_end + $offset_start); $pos_act += $offset_end + $offset_start; $pos_encode_last = $pos_act; unset($offset_end, $offset_start); //opt } } unset($starttag); //opt } else { // STARTTAG FOUND $tag_new = NULL; $kind = $task->interpreter->TagSingle($tag_new, $task, $tag); if ($kind !== TAGPARSER_RET_NOTHING) { if ($skip) { continue; } $text = substr($text, 0, $tag_start) . $tag_new . substr($text, $tag_end + 1); if ($kind == TAGPARSER_RET_RECURSIVE) { // recursive parsing possible, start from prev tagposition! $pos_act = $tag_start; $pos_encode_last = $pos_act; } else { $templen = strlen($tag_new); // NONrecursive parsing $pos_act = $tag_start + $templen; // parse continue after $pos_encode_last = $pos_act; // linear encoding continue after } } else { // PUSH new ELEM $st[$sti] = $tag; // not by ref -- store & forget unset($tag); $sti++; $pos_act = $tag_end + 1; // parse continue after $pos_encode_last = $pos_act; // linear encoding continue after } } } // encode last linear part $textnew = ''; if (!$skip && $task->interpreter->Encode($textnew, $task, substr($text, $pos_encode_last), 'text') !== TAGPARSER_RET_NOTHING) { $text = substr($text, 0, $pos_encode_last) . $textnew; } unset($textnew); //opt // empty stack, stack should be empty while ($sti > 0) { // pop top --$sti; $starttag =& $st[$sti]; $starttag_len = $starttag->tag_end - $starttag->tag_start + 1; #var_dump($starttag); // late event $tag_new = NULL; if ($interpreter->TagSingleLate($tag_new, $task, $starttag) !== TAGPARSER_RET_NOTHING) { if ($skip) { continue; } // tag replacement $text = substr($text, 0, $starttag->tag_start) . $tag_new . substr($text, $starttag->tag_end + 1); // no more marks tag_start ... pos_act } else { // bad tag on stack $err = new ParserErrorContext('parser.err.tag.remain'); $err->GrabContext($task, $starttag); $task->ErrorPush($err); unset($err); //opt $offset = 0; $this->RemoveOrEncode($offset, $task, $starttag->tag_start, $starttag->tag_end, 'unsupported'); // no more marks tag_start ... pos_act unset($offset); //opt } unset($starttag, $starttag_len); //opt } microtime_float(); $task->interpreter->PostProcessing($task); if (count($task->errarr)) { return TAGPARSER_RET_ERR; } return TAGPARSER_RET_OK; }