/** * Accessor for content reduced to visible text. Acts * like a text mode browser, normalising space and * reducing images to their alt text. * @return string Content as plain text. * @access public */ function getText() { return SimplePage::normalise($this->content); }
function testHtmlEntityTranslation() { $this->assertEqual(SimplePage::normalise('<>"&''), '<>"&\''); }
/** * Visits the given node and all children * @param object $node Tidy XML node. */ private function walkForm($node, $form, $enclosing_label = '') { if ($node->name == 'a') { $this->page->addLink($this->tags()->createTag($node->name, (array) $node->attribute)->addContent($this->innerHtml($node))); } elseif (in_array($node->name, array('input', 'button', 'textarea', 'select'))) { $this->addWidgetToForm($node, $form, $enclosing_label); } elseif ($node->name == 'label') { $this->labels[] = $this->tags()->createTag($node->name, (array) $node->attribute)->addContent($this->innerHtml($node)); if ($node->hasChildren()) { foreach ($node->child as $child) { $this->walkForm($child, $form, SimplePage::normalise($this->innerHtml($node))); } } } elseif ($node->hasChildren()) { foreach ($node->child as $child) { $this->walkForm($child, $form); } } return $form; }
/** * Accessor for plain text of page as a text browser * would see it. * @return string Plain text of page. * @access public */ function getText() { if (!$this->text) { $this->text = SimplePage::normalise($this->raw); } return $this->text; }
function testUtf8WhitespaceNbsp() { $this->assertEqual(SimplePage::normalise(' § ' . html_entity_decode(' ', ENT_COMPAT, 'UTF-8') . '729', 'UTF-8'), '§ 729'); $this->assertEqual(SimplePage::normalise(html_entity_decode(' ', ENT_COMPAT, 'UTF-8'), 'UTF-8'), ''); $this->assertEqual(strlen(SimplePage::normalise(html_entity_decode('a b', ENT_COMPAT, 'UTF-8'), 'UTF-8')), 3); $this->assertEqual(strlen(SimplePage::normalise(html_entity_decode('a b', ENT_COMPAT, 'ISO-8859-1'), 'ISO-8859-1')), 3); $this->assertEqual(SimplePage::normalise('» »', 'UTF-8'), '» »'); $this->assertEqual(SimplePage::normalise('» »', 'ISO-8859-1'), utf8_decode('» »')); # latin1 strings should not get converted to utf8 $this->assertEqual(strlen(SimplePage::normalise(utf8_decode('ä'))), 1); }