Example #1
0
 public function testElement()
 {
     foreach ($this->html5Elements as $element) {
         $this->assertGreaterThan(0, Elements::element($element));
     }
     $nonhtml5 = array('foo', 'bar', 'baz');
     foreach ($nonhtml5 as $element) {
         $this->assertFalse(Elements::element($element));
     }
 }
Example #2
0
 /**
  * Process the start tag.
  *
  * @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
  *       - XLink, MathML and SVG namespace handling
  *       - Omission rules: 8.1.2.4 Optional tags
  */
 public function startTag($name, $attributes = array(), $selfClosing = false)
 {
     // fprintf(STDOUT, $name);
     $lname = $this->normalizeTagName($name);
     // Make sure we have an html element.
     if (!$this->doc->documentElement && $name !== 'html' && !$this->frag) {
         $this->startTag('html');
     }
     // Set quirks mode if we're at IM_INITIAL with no doctype.
     if ($this->insertMode == static::IM_INITIAL) {
         $this->quirks = true;
         $this->parseError("No DOCTYPE specified.");
     }
     // SPECIAL TAG HANDLING:
     // Spec says do this, and "don't ask."
     if ($name == 'image') {
         $name = 'img';
     }
     // Autoclose p tags where appropriate.
     if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) {
         $this->autoclose('p');
     }
     // Set insert mode:
     switch ($name) {
         case 'html':
             $this->insertMode = static::IM_BEFORE_HEAD;
             break;
         case 'head':
             if ($this->insertMode > static::IM_BEFORE_HEAD) {
                 $this->parseError("Unexpected head tag outside of head context.");
             } else {
                 $this->insertMode = static::IM_IN_HEAD;
             }
             break;
         case 'body':
             $this->insertMode = static::IM_IN_BODY;
             break;
         case 'svg':
             $this->insertMode = static::IM_IN_SVG;
             break;
         case 'math':
             $this->insertMode = static::IM_IN_MATHML;
             break;
         case 'noscript':
             if ($this->insertMode == static::IM_IN_HEAD) {
                 $this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
             }
             break;
     }
     // Special case handling for SVG.
     if ($this->insertMode == static::IM_IN_SVG) {
         $lname = Elements::normalizeSvgElement($lname);
     }
     $pushes = 0;
     // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
     if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
         array_unshift($this->nsStack, array('' => $this->nsRoots[$lname]) + $this->nsStack[0]);
         $pushes++;
     }
     $needsWorkaround = false;
     if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) {
         // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
         foreach ($attributes as $aName => $aVal) {
             if ($aName === 'xmlns') {
                 $needsWorkaround = $aVal;
                 array_unshift($this->nsStack, array('' => $aVal) + $this->nsStack[0]);
                 $pushes++;
             } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') {
                 array_unshift($this->nsStack, array(substr($aName, $pos + 1) => $aVal) + $this->nsStack[0]);
                 $pushes++;
             }
         }
     }
     if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) {
         $this->autoclose($this->onlyInline);
         $this->onlyInline = null;
     }
     try {
         $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';
         if ($needsWorkaround !== false) {
             $xml = "<{$lname} xmlns=\"{$needsWorkaround}\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? "xmlns:{$prefix}=\"" . $this->nsStack[0][$prefix] . "\"" : "") . "/>";
             $frag = new \DOMDocument('1.0', 'UTF-8');
             $frag->loadXML($xml);
             $ele = $this->doc->importNode($frag->documentElement, true);
         } else {
             if (!isset($this->nsStack[0][$prefix]) || $prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS]) {
                 $ele = $this->doc->createElement($lname);
             } else {
                 $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
             }
         }
     } catch (\DOMException $e) {
         $this->parseError("Illegal tag name: <{$lname}>. Replaced with <invalid>.");
         $ele = $this->doc->createElement('invalid');
     }
     if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) {
         $this->onlyInline = $lname;
     }
     // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them.
     // When we are on a void tag, we do not need to care about namesapce nesting.
     if ($pushes > 0 && !Elements::isA($name, Elements::VOID_TAG)) {
         // PHP tends to free the memory used by DOM,
         // to avoid spl_object_hash collisions whe have to avoid garbage collection of $ele storing it into $pushes
         // see https://bugs.php.net/bug.php?id=67459
         $this->pushes[spl_object_hash($ele)] = array($pushes, $ele);
         // SEE https://github.com/facebook/hhvm/issues/2962
         if (defined('HHVM_VERSION')) {
             $ele->setAttribute('html5-php-fake-id-attribute', spl_object_hash($ele));
         }
     }
     foreach ($attributes as $aName => $aVal) {
         // xmlns attributes can't be set
         if ($aName === 'xmlns') {
             continue;
         }
         if ($this->insertMode == static::IM_IN_SVG) {
             $aName = Elements::normalizeSvgAttribute($aName);
         } elseif ($this->insertMode == static::IM_IN_MATHML) {
             $aName = Elements::normalizeMathMlAttribute($aName);
         }
         try {
             $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;
             if ($prefix === 'xmlns') {
                 $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal);
             } elseif ($prefix !== false && isset($this->nsStack[0][$prefix])) {
                 $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal);
             } else {
                 $ele->setAttribute($aName, $aVal);
             }
         } catch (\DOMException $e) {
             $this->parseError("Illegal attribute name for tag {$name}. Ignoring: {$aName}");
             continue;
         }
         // This is necessary on a non-DTD schema, like HTML5.
         if ($aName == 'id') {
             $ele->setIdAttribute('id', true);
         }
     }
     // Some elements have special processing rules. Handle those separately.
     if ($this->rules->hasRules($name) && $this->frag !== $this->current) {
         $this->current = $this->rules->evaluate($ele, $this->current);
     } else {
         $this->current->appendChild($ele);
         // XXX: Need to handle self-closing tags and unary tags.
         if (!Elements::isA($name, Elements::VOID_TAG)) {
             $this->current = $ele;
         }
     }
     // This is sort of a last-ditch attempt to correct for cases where no head/body
     // elements are provided.
     if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
         $this->insertMode = static::IM_IN_BODY;
     }
     // When we are on a void tag, we do not need to care about namesapce nesting,
     // but we have to remove the namespaces pushed to $nsStack.
     if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) {
         // remove the namespaced definded by current node
         for ($i = 0; $i < $pushes; $i++) {
             array_shift($this->nsStack);
         }
     }
     // Return the element mask, which the tokenizer can then use to set
     // various processing rules.
     return Elements::element($name);
 }