コード例 #1
0
 public function testLookupDecimal()
 {
     $this->assertEquals('&', CharacterReference::lookupDecimal(38));
     $this->assertEquals('&', CharacterReference::lookupDecimal('38'));
     $this->assertEquals('<', CharacterReference::lookupDecimal(60));
     $this->assertEquals('Σ', CharacterReference::lookupDecimal(931));
     $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931'));
 }
コード例 #2
0
ファイル: Tokenizer.php プロジェクト: aWEBoLabs/taxi
 /**
  * Decode a character reference and return the string.
  *
  * Returns false if the entity could not be found. If $inAttribute is set
  * to true, a bare & will be returned as-is.
  *
  * @param boolean $inAttribute
  *            Set to true if the text is inside of an attribute value.
  *            false otherwise.
  */
 protected function decodeCharacterReference($inAttribute = false)
 {
     // If it fails this, it's definitely not an entity.
     if ($this->scanner->current() != '&') {
         return false;
     }
     // Next char after &.
     $tok = $this->scanner->next();
     $entity = '';
     $start = $this->scanner->position();
     if ($tok == false) {
         return '&';
     }
     // These indicate not an entity. We return just
     // the &.
     if (strspn($tok, static::WHITE . "&<") == 1) {
         // $this->scanner->next();
         return '&';
     }
     // Numeric entity
     if ($tok == '#') {
         $tok = $this->scanner->next();
         // Hexidecimal encoding.
         // X[0-9a-fA-F]+;
         // x[0-9a-fA-F]+;
         if ($tok == 'x' || $tok == 'X') {
             $tok = $this->scanner->next();
             // Consume x
             // Convert from hex code to char.
             $hex = $this->scanner->getHex();
             if (empty($hex)) {
                 $this->parseError("Expected &#xHEX;, got &#x%s", $tok);
                 // We unconsume because we don't know what parser rules might
                 // be in effect for the remaining chars. For example. '&#>'
                 // might result in a specific parsing rule inside of tag
                 // contexts, while not inside of pcdata context.
                 $this->scanner->unconsume(2);
                 return '&';
             }
             $entity = CharacterReference::lookupHex($hex);
         } else {
             // Convert from decimal to char.
             $numeric = $this->scanner->getNumeric();
             if ($numeric === false) {
                 $this->parseError("Expected &#DIGITS;, got &#%s", $tok);
                 $this->scanner->unconsume(2);
                 return '&';
             }
             $entity = CharacterReference::lookupDecimal($numeric);
         }
     } else {
         // Attempt to consume a string up to a ';'.
         // [a-zA-Z0-9]+;
         $cname = $this->scanner->getAsciiAlpha();
         $entity = CharacterReference::lookupName($cname);
         // When no entity is found provide the name of the unmatched string
         // and continue on as the & is not part of an entity. The & will
         // be converted to &amp; elsewhere.
         if ($entity == null) {
             $this->parseError("No match in entity table for '%s'", $cname);
             $this->scanner->unconsume($this->scanner->position() - $start);
             return '&';
         }
     }
     // The scanner has advanced the cursor for us.
     $tok = $this->scanner->current();
     // We have an entity. We're done here.
     if ($tok == ';') {
         $this->scanner->next();
         return $entity;
     }
     // If in an attribute, then failing to match ; means unconsume the
     // entire string. Otherwise, failure to match is an error.
     if ($inAttribute) {
         $this->scanner->unconsume($this->scanner->position() - $start);
         return '&';
     }
     $this->parseError("Expected &ENTITY;, got &ENTITY%s (no trailing ;) ", $tok);
     return '&' . $entity;
 }