public function testLookupDecimal() { $this->assertEquals('&', CharacterReference::lookupDecimal(38)); $this->assertEquals('&', CharacterReference::lookupDecimal('38')); $this->assertEquals('<', CharacterReference::lookupDecimal(60)); $this->assertEquals('Σ', CharacterReference::lookupDecimal(931)); $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931')); }
/** * Decode a character reference and return the string. * * Returns false if the entity could not be found. If $inAttribute is set * to true, a bare & will be returned as-is. * * @param boolean $inAttribute * Set to true if the text is inside of an attribute value. * false otherwise. */ protected function decodeCharacterReference($inAttribute = false) { // If it fails this, it's definitely not an entity. if ($this->scanner->current() != '&') { return false; } // Next char after &. $tok = $this->scanner->next(); $entity = ''; $start = $this->scanner->position(); if ($tok == false) { return '&'; } // These indicate not an entity. We return just // the &. if (strspn($tok, static::WHITE . "&<") == 1) { // $this->scanner->next(); return '&'; } // Numeric entity if ($tok == '#') { $tok = $this->scanner->next(); // Hexidecimal encoding. // X[0-9a-fA-F]+; // x[0-9a-fA-F]+; if ($tok == 'x' || $tok == 'X') { $tok = $this->scanner->next(); // Consume x // Convert from hex code to char. $hex = $this->scanner->getHex(); if (empty($hex)) { $this->parseError("Expected &#xHEX;, got &#x%s", $tok); // We unconsume because we don't know what parser rules might // be in effect for the remaining chars. For example. '&#>' // might result in a specific parsing rule inside of tag // contexts, while not inside of pcdata context. $this->scanner->unconsume(2); return '&'; } $entity = CharacterReference::lookupHex($hex); } else { // Convert from decimal to char. $numeric = $this->scanner->getNumeric(); if ($numeric === false) { $this->parseError("Expected &#DIGITS;, got &#%s", $tok); $this->scanner->unconsume(2); return '&'; } $entity = CharacterReference::lookupDecimal($numeric); } } else { // Attempt to consume a string up to a ';'. // [a-zA-Z0-9]+; $cname = $this->scanner->getAsciiAlpha(); $entity = CharacterReference::lookupName($cname); // When no entity is found provide the name of the unmatched string // and continue on as the & is not part of an entity. The & will // be converted to & elsewhere. if ($entity == null) { $this->parseError("No match in entity table for '%s'", $cname); $this->scanner->unconsume($this->scanner->position() - $start); return '&'; } } // The scanner has advanced the cursor for us. $tok = $this->scanner->current(); // We have an entity. We're done here. if ($tok == ';') { $this->scanner->next(); return $entity; } // If in an attribute, then failing to match ; means unconsume the // entire string. Otherwise, failure to match is an error. if ($inAttribute) { $this->scanner->unconsume($this->scanner->position() - $start); return '&'; } $this->parseError("Expected &ENTITY;, got &ENTITY%s (no trailing ;) ", $tok); return '&' . $entity; }