/** * Test that if the first h* found in the body is the same as the extracted title, it'll be removed. */ public function testRemoveH1FromBody() { $contentExtractor = new ContentExtractor(self::$contentExtractorConfig); $config = new SiteConfig(); $config->body = array('//div'); $config->title = array('//title'); $res = $contentExtractor->process('<html><body><title>My Title</title><div><h3>My Title</h3>' . str_repeat('this is the best part of the show', 10) . '</div></body></html>', 'https://lemonde.io/35941909', $config); $this->assertTrue($res, 'Extraction went well'); $domElement = $contentExtractor->getContent(); $content = $domElement->ownerDocument->saveXML($domElement); $this->assertNotContains('My Title', $content); $this->assertEquals('My Title', $contentExtractor->getTitle()); }