/**
  * Returns a document that can be persisted based on the resource.
  *
  * @param \VDB\Spider\Resource $resource
  *
  * @return \Simgroep\ConcurrentSpiderBundle\PersistableDocument
  */
 public function getDocumentByResource(Resource $resource)
 {
     switch ($resource->getResponse()->getContentType()) {
         case 'application/pdf':
         case 'application/octet-stream':
             $data = $this->pdf->getData($resource);
             break;
         case 'application/msword':
         case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
         case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
             if (false !== stripos($resource->getUri()->toString(), '.docx')) {
                 $data = $this->word2007->getData($resource);
                 break;
             }
             $data = $this->msdoc->getData($resource);
             break;
         case 'application/rtf':
             $data = $this->rtf->getData($resource);
             break;
         case 'application/vnd.oasis.opendocument.text':
             $data = $this->odt->getData($resource);
             break;
         case 'text/html':
         default:
             $data = $this->html->getData($resource);
             break;
     }
     return new PersistableDocument($data);
 }
Ejemplo n.º 2
0
 /**
  * @test
  */
 public function retrieveValidDataFromOdtFile()
 {
     $response = $this->getMockBuilder('Guzzle\\Http\\Message\\Response')->disableOriginalConstructor()->setMethods(['getBody', 'getLastModified'])->getMock();
     $response->expects($this->once())->method('getLastModified')->will($this->returnValue('2015-06-18T23:49:41Z'));
     $response->expects($this->once())->method('getBody')->will($this->returnValue(file_get_contents(__DIR__ . '/../../Mock/Documents/test.odt')));
     $uri = $this->getMockBuilder('VDB\\Uri\\Uri')->disableOriginalConstructor()->setMethods(['toString'])->getMock();
     $uri->expects($this->exactly(2))->method('toString')->will($this->returnValue('http://blabdummy.de/dummydir/test.odt'));
     $crawler = new Crawler('', 'http://blabdummy.de/dummydir/test.odt');
     $resource = $this->getMockBuilder('VDB\\Spider\\Resource')->disableOriginalConstructor()->setMethods(['getResponse', 'getUri', 'getBody'])->getMock();
     $resource->expects($this->exactly(2))->method('getResponse')->will($this->returnValue($response));
     $resource->expects($this->exactly(2))->method('getUri')->will($this->returnValue($uri));
     $type = new Odt();
     $data = $type->getData($resource);
     $this->assertEquals(10, count($data));
     $expectedKeys = ['id', 'url', 'content', 'title', 'tstamp', 'contentLength', 'lastModified', 'date', 'publishedDate', 'updatedDate'];
     foreach ($expectedKeys as $expectedKey) {
         $this->assertArrayHasKey($expectedKey, $data);
     }
     $this->assertEquals('test.odt', $data['title']);
     $this->assertNotEmpty($data, $data['content']);
 }