Пример #1
0
 /**
  *    Accessor for content reduced to visible text. Acts
  *    like a text mode browser, normalising space and
  *    reducing images to their alt text.
  *    @return string       Content as plain text.
  *    @access public
  */
 function getText()
 {
     return SimplePage::normalise($this->content);
 }
Пример #2
0
 function testHtmlEntityTranslation()
 {
     $this->assertEqual(SimplePage::normalise('&lt;&gt;&quot;&amp;&#039;'), '<>"&\'');
 }
Пример #3
0
 /**
  *  Visits the given node and all children
  *  @param object $node      Tidy XML node.
  */
 private function walkForm($node, $form, $enclosing_label = '')
 {
     if ($node->name == 'a') {
         $this->page->addLink($this->tags()->createTag($node->name, (array) $node->attribute)->addContent($this->innerHtml($node)));
     } elseif (in_array($node->name, array('input', 'button', 'textarea', 'select'))) {
         $this->addWidgetToForm($node, $form, $enclosing_label);
     } elseif ($node->name == 'label') {
         $this->labels[] = $this->tags()->createTag($node->name, (array) $node->attribute)->addContent($this->innerHtml($node));
         if ($node->hasChildren()) {
             foreach ($node->child as $child) {
                 $this->walkForm($child, $form, SimplePage::normalise($this->innerHtml($node)));
             }
         }
     } elseif ($node->hasChildren()) {
         foreach ($node->child as $child) {
             $this->walkForm($child, $form);
         }
     }
     return $form;
 }
Пример #4
0
 /**
  *    Accessor for plain text of page as a text browser
  *    would see it.
  *    @return string        Plain text of page.
  *    @access public
  */
 function getText()
 {
     if (!$this->text) {
         $this->text = SimplePage::normalise($this->raw);
     }
     return $this->text;
 }
Пример #5
0
 function testUtf8WhitespaceNbsp()
 {
     $this->assertEqual(SimplePage::normalise(' § ' . html_entity_decode('&nbsp;', ENT_COMPAT, 'UTF-8') . '729', 'UTF-8'), '§ 729');
     $this->assertEqual(SimplePage::normalise(html_entity_decode('&nbsp;', ENT_COMPAT, 'UTF-8'), 'UTF-8'), '');
     $this->assertEqual(strlen(SimplePage::normalise(html_entity_decode('a&nbsp;b', ENT_COMPAT, 'UTF-8'), 'UTF-8')), 3);
     $this->assertEqual(strlen(SimplePage::normalise(html_entity_decode('a&nbsp; b', ENT_COMPAT, 'ISO-8859-1'), 'ISO-8859-1')), 3);
     $this->assertEqual(SimplePage::normalise('&raquo; &raquo;', 'UTF-8'), '» »');
     $this->assertEqual(SimplePage::normalise('&raquo; &raquo;', 'ISO-8859-1'), utf8_decode('» »'));
     # latin1 strings should not get converted to utf8
     $this->assertEqual(strlen(SimplePage::normalise(utf8_decode('ä'))), 1);
 }