/**
  * Returns a document that can be persisted based on the resource.
  *
  * @param \VDB\Spider\Resource $resource
  *
  * @return \Simgroep\ConcurrentSpiderBundle\PersistableDocument
  */
 public function getDocumentByResource(Resource $resource)
 {
     switch ($resource->getResponse()->getContentType()) {
         case 'application/pdf':
         case 'application/octet-stream':
             $data = $this->pdf->getData($resource);
             break;
         case 'application/msword':
         case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
         case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
             if (false !== stripos($resource->getUri()->toString(), '.docx')) {
                 $data = $this->word2007->getData($resource);
                 break;
             }
             $data = $this->msdoc->getData($resource);
             break;
         case 'application/rtf':
             $data = $this->rtf->getData($resource);
             break;
         case 'application/vnd.oasis.opendocument.text':
             $data = $this->odt->getData($resource);
             break;
         case 'text/html':
         default:
             $data = $this->html->getData($resource);
             break;
     }
     return new PersistableDocument($data);
 }
 /**
  * @test
  * @expectedException \Simgroep\ConcurrentSpiderBundle\InvalidContentException
  * @expectedExceptionMessage PDF didn't contain enough content (minimal chars is 3)
  */
 public function throwExceptionOnLessThenMinimalContentLength()
 {
     $document = $this->getMockBuilder('Smalot\\PdfParser\\Document')->setMethods(['getText'])->getMock();
     $document->expects($this->once())->method('getText')->will($this->returnValue(''));
     $pdfType = $this->getMockBuilder('Smalot\\PdfParser\\Parser')->disableOriginalConstructor()->setMethods(['getText', 'parseContent'])->getMock();
     $pdfType->expects($this->once())->method('parseContent')->will($this->returnValue($document));
     $response = $this->getMockBuilder('Guzzle\\Http\\Message\\Response')->disableOriginalConstructor()->setMethods(['getBody'])->getMock();
     $response->expects($this->once())->method('getBody')->with(true);
     $resource = $this->getMockBuilder('VDB\\Spider\\Resource')->disableOriginalConstructor()->setMethods(['getResponse'])->getMock();
     $resource->expects($this->once())->method('getResponse')->will($this->returnValue($response));
     $type = new Pdf($pdfType);
     $data = $type->getData($resource);
     //change that to: $this->assertEquals($expectedData, $data);
     $this->assertEquals(9, count($data));
     $expectedKeys = ['id', 'url', 'content', 'title', 'tstamp', 'contentLength', 'lastModified', 'date', 'publishedDate'];
     foreach ($expectedKeys as $expectedKey) {
         $this->assertArrayHasKey($expectedKey, $data);
     }
     $this->assertEquals('dummyfile.pdf', $data['title']);
     $this->assertNotEmpty($data, $data['content']);
 }