/**
  * Grabs the content from the crawled page and publishes a job on the queue.
  *
  * @param \VDB\Spider\Resource                      $resource
  * @param \Simgroep\ConcurrentSpiderBundle\CrawlJob $crawlJob
  *
  * @throws \Simgroep\ConcurrentSpiderBundle\InvalidContentException
  */
 public function persist(Resource $resource, CrawlJob $crawlJob)
 {
     if (strlen($resource->getResponse()->getBody()) >= $this->maximumResourceSize) {
         throw new InvalidContentException(sprintf('Resource size exceeds limits (%s bytes)', $this->maximumResourceSize));
     }
     $document = $this->documentResolver->getDocumentByResource($resource);
     $persistenceEvent = new PersistenceEvent($document, $resource, $crawlJob->getMetadata());
     $this->eventDispatcher->dispatch(PersistenceEvents::PRE_PERSIST, $persistenceEvent);
     $message = new AMQPMessage(json_encode(array_merge(['document' => $document->toArray()], ['metadata' => $crawlJob->getMetadata()])), ['delivery_mode' => 1]);
     $this->queue->publish($message);
 }
 /**
  * @test
  */
 public function ifDataIsReturnedFromWord2007Document()
 {
     $response = $this->getMockBuilder('Guzzle\\Http\\Message\\Response')->disableOriginalConstructor()->setMethods(['getContentType'])->getMock();
     $response->expects($this->once())->method('getContentType')->will($this->returnValue('application/vnd.openxmlformats-officedocument.wordprocessingml.document'));
     $uri = $this->getMockBuilder('VDB\\Uri\\Uri')->disableOriginalConstructor()->setMethods(['toString'])->getMock();
     $uri->expects($this->any())->method('toString')->will($this->returnValue('dummyUri/documment.docx'));
     $resource = $this->getMockBuilder('\\VDB\\Spider\\Resource')->disableOriginalConstructor()->setMethods(['getResponse', 'getUri'])->getMock();
     $resource->expects($this->once())->method('getResponse')->will($this->returnValue($response));
     $resource->expects($this->any())->method('getUri')->will($this->returnValue($uri));
     $data = ['dummyKey' => 'dummyValue'];
     $htmlType = $this->getDocumentResolverMock('Simgroep\\ConcurrentSpiderBundle\\DocumentResolver\\Type\\Html');
     $pdfType = $this->getDocumentResolverMock('Simgroep\\ConcurrentSpiderBundle\\DocumentResolver\\Type\\Pdf');
     $msdocType = $this->getDocumentResolverMock('Simgroep\\ConcurrentSpiderBundle\\DocumentResolver\\Type\\MsDoc');
     $word2007Type = $this->getDocumentResolverMock('Simgroep\\ConcurrentSpiderBundle\\DocumentResolver\\Type\\Word2007');
     $word2007Type->expects($this->once())->method('getData')->will($this->returnValue($data));
     $rtfType = $this->getDocumentResolverMock('Simgroep\\ConcurrentSpiderBundle\\DocumentResolver\\Type\\Rtf');
     $odtType = $this->getDocumentResolverMock('Simgroep\\ConcurrentSpiderBundle\\DocumentResolver\\Type\\Odt');
     $documentResolver = new DocumentResolver($htmlType, $pdfType, $msdocType, $word2007Type, $rtfType, $odtType);
     $document = $documentResolver->getDocumentByResource($resource);
     $this->assertInstanceOf('\\Simgroep\\ConcurrentSpiderBundle\\PersistableDocument', $document);
     $this->assertEquals($data, $document->toArray());
 }