Esempio n. 1
0
 public function read($initialState, $allowWhitespace = true)
 {
     if (is_string($initialState)) {
         $state = Morphes_Core_Model_Html_State::INITIAL_RAWTEXT;
         $rawElement = strtolower($initialState);
     } else {
         $state = $initialState;
     }
     $startPos = $this->_pos;
     $token = array('pos' => $this->_pos, 'line' => $this->_line, 'column' => $this->_column);
     $readNext = true;
     while ($state != Morphes_Core_Model_Html_State::FINISHED) {
         $ch = $this->_ch !== false ? ord($this->_ch) : false;
         switch ($state) {
             case Morphes_Core_Model_Html_State::INITIAL_TEXT:
                 if ($ch === false) {
                     $token['type'] = Morphes_Core_Model_Html_Token::EOF;
                     $readNext = false;
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                 } elseif ($ch == ord('<')) {
                     // here we assume we have enough characters in read buffer. It is always the case for now,
                     // later it may break if we work with underlying stream, not memory buffer
                     if (mb_substr($this->_source, $this->_pos, 2) == '</') {
                         $this->_move(1);
                         $token['type'] = Morphes_Core_Model_Html_Token::TAG_END;
                         $state = Morphes_Core_Model_Html_State::FINISHED;
                     } elseif (mb_substr($this->_source, $this->_pos, 8) == '<![CDATA') {
                         $this->_move(7);
                         $token['type'] = Morphes_Core_Model_Html_Token::CDATA;
                         $state = Morphes_Core_Model_Html_State::CDATA;
                     } elseif (mb_substr($this->_source, $this->_pos, 4) == '<!--') {
                         $this->_move(3);
                         $token['type'] = Morphes_Core_Model_Html_Token::COMMENT;
                         $state = Morphes_Core_Model_Html_State::COMMENT;
                     } else {
                         $token['type'] = Morphes_Core_Model_Html_Token::TAG_START;
                         $state = Morphes_Core_Model_Html_State::FINISHED;
                     }
                 } else {
                     $token['type'] = Morphes_Core_Model_Html_Token::TEXT;
                     $state = Morphes_Core_Model_Html_State::TEXT;
                 }
                 break;
             case Morphes_Core_Model_Html_State::INITIAL:
                 if ($ch == ord(' ') || $ch == ord("\r") || $ch == ord("\t") || $ch == ord("\n") || $ch == ord("\f")) {
                     if (!$allowWhitespace) {
                         $token['end_pos'] = $this->_pos;
                         throw new Exception(Mage::helper('morphes_core')->__('HTML read error %s: whitespace not expected%s', Morphes_Core_Model_Html_Token::getPosition($token), $this->getSourceAt($token)));
                     }
                 } else {
                     $token['pos'] = $this->_pos;
                     $token['line'] = $this->_line;
                     $token['column'] = $this->_column;
                     if ($ch === false) {
                         $token['type'] = Morphes_Core_Model_Html_Token::EOF;
                         $readNext = false;
                         $state = Morphes_Core_Model_Html_State::FINISHED;
                     } elseif ($ch == ord('>')) {
                         $token['type'] = Morphes_Core_Model_Html_Token::TAG_CLOSE;
                         $state = Morphes_Core_Model_Html_State::FINISHED;
                     } elseif ($ch == ord('/') && mb_substr($this->_source, $this->_pos, 2) == '/>') {
                         $this->_move(1);
                         $token['type'] = Morphes_Core_Model_Html_Token::TAG_SELF_CLOSE;
                         $state = Morphes_Core_Model_Html_State::FINISHED;
                     } elseif ($ch == ord('=')) {
                         $token['type'] = Morphes_Core_Model_Html_Token::EQ;
                         $state = Morphes_Core_Model_Html_State::FINISHED;
                     } elseif ($ch == ord('!') || ord('a') <= $ch && $ch <= ord('z') || ord('A') <= $ch && $ch <= ord('Z')) {
                         $state = Morphes_Core_Model_Html_State::NAME;
                         $token['type'] = Morphes_Core_Model_Html_Token::NAME;
                     } else {
                         $token['end_pos'] = $this->_pos;
                         throw new Exception(Mage::helper('morphes_core')->__('HTML read error %s: unexpected character%s', Morphes_Core_Model_Html_Token::getPosition($token), $this->getSourceAt($token)));
                     }
                 }
                 break;
             case Morphes_Core_Model_Html_State::INITIAL_VALUE:
                 if ($ch == ord(' ') || $ch == ord("\r") || $ch == ord("\t") || $ch == ord("\n") || $ch == ord("\f")) {
                     if (!$allowWhitespace) {
                         $token['end_pos'] = $this->_pos;
                         throw new Exception(Mage::helper('morphes_core')->__('HTML read error %s: whitespace not expected%s', Morphes_Core_Model_Html_Token::getPosition($token), $this->getSourceAt($token)));
                     }
                 } else {
                     $token['pos'] = $this->_pos;
                     $token['line'] = $this->_line;
                     $token['column'] = $this->_column;
                     $token['type'] = Morphes_Core_Model_Html_Token::VALUE;
                     if ($ch === false) {
                         $token['type'] = Morphes_Core_Model_Html_Token::EOF;
                         $readNext = false;
                         $state = Morphes_Core_Model_Html_State::FINISHED;
                     } elseif ($ch == ord("'")) {
                         $state = Morphes_Core_Model_Html_State::SINGLE_QUOTED_VALUE;
                         $token['pos']++;
                         $token['column']++;
                     } elseif ($ch == ord('"')) {
                         $state = Morphes_Core_Model_Html_State::DOUBLE_QUOTED_VALUE;
                         $token['pos']++;
                         $token['column']++;
                     } elseif (!($ch == ord('=') || $ch == ord('<') || $ch == ord('>') || $ch == ord('`'))) {
                         $state = Morphes_Core_Model_Html_State::UNQUOTED_VALUE;
                     } else {
                         $token['end_pos'] = $this->_pos;
                         throw new Exception(Mage::helper('morphes_core')->__('HTML read error %s: unexpected character%s', Morphes_Core_Model_Html_Token::getPosition($token), $this->getSourceAt($token)));
                     }
                 }
                 break;
             case Morphes_Core_Model_Html_State::INITIAL_RAWTEXT:
                 if ($ch === false) {
                     $token['type'] = Morphes_Core_Model_Html_Token::EOF;
                     $readNext = false;
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                 } else {
                     $token['type'] = Morphes_Core_Model_Html_Token::TEXT;
                     $state = Morphes_Core_Model_Html_State::RAWTEXT;
                     if ($ch == ord('<') && strtolower(mb_substr($this->_source, $this->_pos, 2 + mb_strlen($rawElement))) == '</' . $rawElement) {
                         if (mb_strlen($this->_source) > $this->_pos + 2 + mb_strlen($rawElement)) {
                             $nextCh = ord(mb_substr($this->_source, $this->_pos + 2 + mb_strlen($rawElement), 1));
                             if ($nextCh == ord('>') || $nextCh == ord('/') || $ch == ord(' ') || $ch == ord("\r") || $ch == ord("\t") || $ch == ord("\n") || $ch == ord("\f")) {
                                 $readNext = false;
                                 $state = Morphes_Core_Model_Html_State::FINISHED;
                             }
                         }
                     }
                 }
                 break;
             case Morphes_Core_Model_Html_State::CDATA:
                 if ($ch === false) {
                     $token['end_pos'] = $this->_pos;
                     throw new Exception(Mage::helper('morphes_core')->__('HTML read error %s: unexpected end of text%s', Morphes_Core_Model_Html_Token::getPosition($token), $this->getSourceAt($token)));
                 } elseif ($ch == ord(']') && mb_substr($this->_source, $this->_pos, 3) == ']]>') {
                     $this->_move(2);
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                 }
                 break;
             case Morphes_Core_Model_Html_State::COMMENT:
                 if ($ch === false) {
                     $token['end_pos'] = $this->_pos;
                     throw new Exception(Mage::helper('morphes_core')->__('HTML read error %s: unexpected end of text%s', Morphes_Core_Model_Html_Token::getPosition($token), $this->getSourceAt($token)));
                 } elseif ($ch == ord('-') && mb_substr($this->_source, $this->_pos, 3) == '-->') {
                     $this->_move(2);
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                 }
                 break;
             case Morphes_Core_Model_Html_State::TEXT:
                 if ($ch === false || $ch == ord('<')) {
                     $readNext = false;
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                 }
                 break;
             case Morphes_Core_Model_Html_State::RAWTEXT:
                 if ($ch === false) {
                     $readNext = false;
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                 } elseif ($ch == ord('<') && strtolower(mb_substr($this->_source, $this->_pos, 2 + mb_strlen($rawElement))) == '</' . $rawElement) {
                     if (mb_strlen($this->_source) > $this->_pos + 2 + mb_strlen($rawElement)) {
                         $nextCh = ord(mb_substr($this->_source, $this->_pos + 2 + mb_strlen($rawElement), 1));
                         if ($nextCh == ord('>') || $nextCh == ord('/') || $ch == ord(' ') || $ch == ord("\r") || $ch == ord("\t") || $ch == ord("\n") || $ch == ord("\f")) {
                             $readNext = false;
                             $state = Morphes_Core_Model_Html_State::FINISHED;
                         }
                     }
                 }
                 break;
             case Morphes_Core_Model_Html_State::NAME:
                 if ($ch === false || !($ch == ord('!') || ord('a') <= $ch && $ch <= ord('z') || ord('A') <= $ch && $ch <= ord('Z') || $ch == ord('_') || $ch == ord('-') || $ch == ord(':') || ord('0') <= $ch && $ch <= ord('9'))) {
                     $readNext = false;
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                 }
                 break;
             case Morphes_Core_Model_Html_State::UNQUOTED_VALUE:
                 if ($ch === false || $ch == ord('"') || $ch == ord("'") || $ch == ord(' ') || $ch == ord("\r") || $ch == ord("\t") || $ch == ord("\n") || $ch == ord("\f") || $ch == ord('=') || $ch == ord('<') || $ch == ord('>') || $ch == ord('`')) {
                     $readNext = false;
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                 }
                 break;
             case Morphes_Core_Model_Html_State::SINGLE_QUOTED_VALUE:
                 if ($ch === false) {
                     $token['end_pos'] = $this->_pos;
                     throw new Exception(Mage::helper('morphes_core')->__('HTML read error %s: unexpected end of text%s', Morphes_Core_Model_Html_Token::getPosition($token), $this->getSourceAt($token)));
                 } elseif ($ch == ord("'")) {
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                     $token['end_pos'] = $this->_pos;
                 }
                 break;
             case Morphes_Core_Model_Html_State::DOUBLE_QUOTED_VALUE:
                 if ($ch === false) {
                     $token['end_pos'] = $this->_pos;
                     throw new Exception(Mage::helper('morphes_core')->__('HTML read error %s: unexpected end of text%s', Morphes_Core_Model_Html_Token::getPosition($token), $this->getSourceAt($token)));
                 } elseif ($ch == ord('"')) {
                     $state = Morphes_Core_Model_Html_State::FINISHED;
                     $token['end_pos'] = $this->_pos;
                 }
                 break;
             default:
                 throw new Exception('Not implemented');
         }
         if ($readNext) {
             $this->_ch = $this->_read();
         }
     }
     if (!isset($token['end_pos'])) {
         $token['end_pos'] = $this->_pos;
     }
     $token['text'] = $token['end_pos'] == $token['pos'] ? '' : mb_substr($this->_source, $token['pos'], $token['end_pos'] - $token['pos']);
     $token['full_text'] = $this->_pos == $startPos ? '' : mb_substr($this->_source, $startPos, $this->_pos - $startPos);
     return $token;
 }
Esempio n. 2
0
 public function parseElement($parentContent = null)
 {
     $result = $this->_beforeParsingElement($parentContent);
     $this->_read(Morphes_Core_Model_Html_State::INITIAL, Morphes_Core_Model_Html_Token::NAME, false);
     $elementName = $this->_token['text'];
     array_push($this->_openedElements, $elementName);
     $void = Morphes_Core_Model_Html_Token::isVoid($elementName);
     $rawText = Morphes_Core_Model_Html_Token::isRawText($elementName);
     $this->_processElementName($parentContent, $result, $this->_token, $elementName, $void, $rawText);
     while ($this->_read(Morphes_Core_Model_Html_State::INITIAL) == Morphes_Core_Model_Html_Token::NAME || $this->_token['type'] == Morphes_Core_Model_Html_Token::EQ) {
         if ($this->_token['type'] != Morphes_Core_Model_Html_Token::EQ) {
             $attributeName = $this->_token['text'];
             $this->_processAttributeName($parentContent, $result, $this->_token, $attributeName);
         } else {
             $this->_processAttributeEq($parentContent, $result, $this->_token);
             $this->_read(Morphes_Core_Model_Html_State::INITIAL_VALUE, Morphes_Core_Model_Html_Token::VALUE, true);
             $attributeValue = $this->_token['text'];
             $this->_processAttributeValue($parentContent, $result, $this->_token, $attributeValue);
         }
     }
     switch ($this->_token['type']) {
         case Morphes_Core_Model_Html_Token::TAG_SELF_CLOSE:
             $this->_processElementClose($parentContent, $result, $this->_token);
             array_pop($this->_openedElements);
             break;
         case Morphes_Core_Model_Html_Token::TAG_CLOSE:
             $this->_processElementClose($parentContent, $result, $this->_token);
             if (!$void) {
                 $this->_read($rawText ? $elementName : Morphes_Core_Model_Html_State::INITIAL_TEXT);
                 $this->_afterParsingChildContent($parentContent, $result, $this->parseContent($this->_beforeParsingChildContent($parentContent, $result)));
                 if ($this->_token['type'] != Morphes_Core_Model_Html_Token::TAG_END) {
                     throw new Exception(Mage::helper('morphes_core')->__('HTML parser error %s: %s expected%s', Morphes_Core_Model_Html_Token::getPosition($this->_token), Morphes_Core_Model_Html_Token::getName(Morphes_Core_Model_Html_Token::TAG_END), $this->getReader()->getSourceAt($this->_token)));
                 }
                 $this->_read(Morphes_Core_Model_Html_State::INITIAL, Morphes_Core_Model_Html_Token::NAME, false);
                 array_pop($this->_openedElements);
                 if ($this->_token['text'] != $elementName) {
                     if (in_array($this->_token['text'], $this->_openedElements)) {
                         $this->getReader()->move(-3 - mb_strlen($this->_token['text']));
                         $this->_processElementEnd($parentContent, $result, $this->_token, $elementName);
                     } else {
                         throw new Exception(Mage::helper('morphes_core')->__('HTML parser error %s: closing tag for %s expected%s', Morphes_Core_Model_Html_Token::getPosition($this->_token), $elementName, $this->getReader()->getSourceAt($this->_token)));
                     }
                 } else {
                     $this->_read(Morphes_Core_Model_Html_State::INITIAL, Morphes_Core_Model_Html_Token::TAG_CLOSE, false);
                     $this->_processElementEnd($parentContent, $result, $this->_token, $elementName);
                 }
             } else {
                 array_pop($this->_openedElements);
             }
             break;
         default:
             throw new Exception(Mage::helper('morphes_core')->__('HTML parser error %s: %s or %s expected%s', Morphes_Core_Model_Html_Token::getPosition($this->_token), Morphes_Core_Model_Html_Token::getName(Morphes_Core_Model_Html_Token::TAG_SELF_CLOSE), Morphes_Core_Model_Html_Token::getName(Morphes_Core_Model_Html_Token::TAG_CLOSE), $this->getReader()->getSourceAt($this->_token)));
     }
     $this->_read(Morphes_Core_Model_Html_State::INITIAL_TEXT);
     return $this->_afterParsingElement($parentContent, $result);
 }