コード例 #1
0
 /**
  * Extract the content of the page if it exists
  *
  * @param \DOMDocument $document
  * @return string|false
  */
 protected function parseBody(\DOMDocument $document)
 {
     // Replace images with their sources
     /** @var \DOMElement[] $images */
     $images = $document->getElementsByTagName('img');
     foreach ($images as $image) {
         $src = 'img' . implode('', parent::extract($image->getAttribute('src')));
         $image->parentNode->replaceChild($document->createElement('span', $src), $image);
     }
     // Extract raw text
     /** @var \DOMElement $node */
     $node = $document->getElementsByTagName('body')->item(0);
     if (!$node) {
         throw new \RuntimeException();
     }
     return $node->nodeValue;
 }
コード例 #2
0
 public function testExtract()
 {
     $extractor = new SimpleTextExtractor();
     static::assertEquals(['mary', 'is', 'very', 'tall', 'she', 'was', 'in', 'the', '9th', 'grade'], $extractor->extract('Mary is very tall. She was in the 9th grade.'));
 }