예제 #1
0
 /**
  * @throws \InvalidArgumentException
  * @return HtmlToken[]
  */
 public function tokenizer()
 {
     if ($this->_SegmentedString->eos()) {
         return array();
     }
     while (true) {
         $this->_startPos = $startPos = $this->_SegmentedString->tell();
         $result = $this->nextToken($this->_SegmentedString);
         $this->_state = static::DataState;
         $endPos = $this->_SegmentedString->tell();
         if ($result === false && $endPos - $startPos === 0) {
             throw new \InvalidArgumentException('Given invalid string or invalid statement.');
         }
         $startState = $this->_startState;
         // In other than `DataState`, `nextToken` return the type of Character, it contains the type of EndTag.
         // SegmentedString go back to the end of the type of Character position.
         $type = $this->_Token->getType();
         if ($type === HTMLToken::Character && $this->_bufferedEndTagName !== '' && ($startState === static::RAWTEXTState || $startState === static::RCDATAState || $startState === static::ScriptDataState)) {
             $length = strlen($this->_Token->getData());
             // HTMLToken::Character
             $this->_buffer = array_slice($this->_buffer, 0, $length);
             $this->_compactBuffer($startPos, $startPos + $length, $type);
             $token = $this->_Token;
             $this->_tokens[] = $token;
             // process again for type of EndTag
             $this->_SegmentedString->seek($startPos + $length);
             $this->_state = $startState;
         } else {
             $this->_compactBuffer($startPos, $endPos, $type);
             $token = $this->_Token;
             $this->_tokens[] = $token;
             // FIXME: The tokenizer should do this work for us.
             if ($type === HTMLToken::StartTag) {
                 $this->_updateStateFor($token->getTagName());
             } else {
                 $this->_state = static::DataState;
             }
         }
         $this->_startState = $this->_state;
         $this->_buffer = array();
         $this->_bufferedEndTagName = '';
         $this->_temporaryBuffer = '';
         $this->_Token = new HTMLToken();
         if ($this->_SegmentedString->eos()) {
             break;
         }
     }
     return $this->_tokens;
 }