/**
  * Returns a document that can be persisted based on the resource.
  *
  * @param \VDB\Spider\Resource $resource
  *
  * @return \Simgroep\ConcurrentSpiderBundle\PersistableDocument
  */
 public function getDocumentByResource(Resource $resource)
 {
     switch ($resource->getResponse()->getContentType()) {
         case 'application/pdf':
         case 'application/octet-stream':
             $data = $this->pdf->getData($resource);
             break;
         case 'application/msword':
         case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
         case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
             if (false !== stripos($resource->getUri()->toString(), '.docx')) {
                 $data = $this->word2007->getData($resource);
                 break;
             }
             $data = $this->msdoc->getData($resource);
             break;
         case 'application/rtf':
             $data = $this->rtf->getData($resource);
             break;
         case 'application/vnd.oasis.opendocument.text':
             $data = $this->odt->getData($resource);
             break;
         case 'text/html':
         default:
             $data = $this->html->getData($resource);
             break;
     }
     return new PersistableDocument($data);
 }
 /**
  * @test
  */
 public function persistRetrieveValidDataFromWebPageWithDateAvailableAndModifiedValuesSetInFields()
 {
     $response = $this->getMockBuilder('Guzzle\\Http\\Message\\Response')->disableOriginalConstructor()->setMethods(['getContentType', 'getLastModified'])->getMock();
     $response->expects($this->once())->method('getContentType')->will($this->returnValue('text/html'));
     $response->expects($this->once())->method('getLastModified')->will($this->returnValue('2015-06-18T23:49:41Z'));
     $uri = $this->getMockBuilder('VDB\\Uri\\Uri')->disableOriginalConstructor()->setMethods(['toString'])->getMock();
     $uri->expects($this->exactly(2))->method('toString')->will($this->returnValue('http://blabdummy.de/dummydir/somewebpagedummyfile.html'));
     $crawler = new Crawler('', 'https://github.com');
     $crawler->addContent('<html><head><title>Site dummy 1</title><meta name="description" content="Dummy description" /><meta name="keywords" content="keyword1,keyword2" /><meta name="author" content="Dummy Author" /><meta name="SIM_archief" content="yes" /><meta name="SIM.simfaq" content="yes" /><meta name="SIM.item_trefwoorden" content="trefwoorden" /><meta name="DCTERMS.title" content="Title 2" /><meta name="DCTERMS.language" content="pl-PL" /><meta name="DCTERMS.type" content="dummytype" /><meta name="SIM.simloket_synoniemen" content="synoniemen" /><meta name="SIM.spatial" content="spatial" /><meta name="DCTERMS.identifier" content="identifierurl" /><meta name="SIM.audience" content="audience" /><meta name="SIM.subject" content="subject" /><meta name="DCTERMS.available" content="2015-06-18T23:49:41Z" /><meta name="DCTERMS.modified" content="2015-06-18T23:49:41Z" /></head><body><p>This is the text value.</p></body></html>');
     $resource = $this->getMockBuilder('VDB\\Spider\\Resource')->disableOriginalConstructor()->setMethods(['getCrawler', 'getResponse', 'getUri'])->getMock();
     $resource->expects($this->exactly(5))->method('getCrawler')->will($this->returnValue($crawler));
     $resource->expects($this->exactly(2))->method('getResponse')->will($this->returnValue($response));
     $resource->expects($this->exactly(2))->method('getUri')->will($this->returnValue($uri));
     $type = new Html(null);
     $data = $type->getData($resource);
     $expectedData = ['id' => sha1('http://blabdummy.de/dummydir/somewebpagedummyfile.html'), 'url' => 'http://blabdummy.de/dummydir/somewebpagedummyfile.html', 'content' => 'This is the text value.', 'title' => 'Site dummy 1', 'tstamp' => date('Y-m-d\\TH:i:s\\Z'), 'type' => ["text/html", "text", "html"], 'contentLength' => 23, 'lastModified' => date('Y-m-d\\TH:i:s\\Z'), 'date' => date('Y-m-d\\TH:i:s\\Z'), 'lang' => 'nl-NL', 'author' => 'Dummy Author', 'publishedDate' => date('Y-m-d\\TH:i:s\\Z'), 'updatedDate' => date('Y-m-d\\TH:i:s\\Z'), 'strippedContent' => 'This is the text value.', 'description' => 'Dummy description', 'keywords' => 'keyword1,keyword2'];
     $this->assertEquals($expectedData, $data);
 }