public function testExtractsWithDocument() { $extractFile = __FILE__; $expectedUrl = "http://localhost:8180/solr/update/extract?wt=json&json.nl=map&resource.name=ServiceTest.php&boost.field=2&literal.field=literal+value"; $expectedPostData = file_get_contents($extractFile); $expectedContentType = 'application/octet-stream'; // default for extract $expectedTimeout = false; // set a mock transport $mockTransport = $this->getMockHttpTransportInterface(); // setup expected call and response $mockTransport->expects($this->once())->method('performPostRequest')->with($this->equalTo($expectedUrl), $this->equalTo($expectedPostData), $this->equalTo($expectedContentType), $this->equalTo($expectedTimeout))->will($this->returnValue(Apache_Solr_HttpTransport_ResponseTest::get200Response())); $fixture = new Apache_Solr_Service(); $fixture->setHttpTransport($mockTransport); $literals = new Apache_Solr_Document(); $literals->field = "literal value"; $literals->setFieldBoost('field', 2); $fixture->extract($extractFile, null, $literals); }
public function testExtractWithUrlDefers() { $extractUrl = "http://example.com"; $expectedUrl = "http://localhost:8180/solr/update/extract?resource.name=http%3A%2F%2Fexample.com&wt=json&json.nl=map"; $expectedPostData = Apache_Solr_HttpTransport_ResponseTest::BODY_200; $expectedContentType = 'application/octet-stream'; // default for extract $expectedTimeout = false; // set a mock transport $mockTransport = $this->getMockHttpTransportInterface(); // setup expected call and response $mockTransport->expects($this->once())->method('performGetRequest')->with($this->equalTo($extractUrl))->will($this->returnValue(Apache_Solr_HttpTransport_ResponseTest::get200Response())); $mockTransport->expects($this->once())->method('performPostRequest')->with($this->equalTo($expectedUrl), $this->equalTo($expectedPostData), $this->equalTo($expectedContentType), $this->equalTo($expectedTimeout))->will($this->returnValue(Apache_Solr_HttpTransport_ResponseTest::get200Response())); $fixture = new Apache_Solr_Service(); $fixture->setHttpTransport($mockTransport); $fixture->extract($extractUrl); }
$d = null; try { $d = new Opus_Document($id); } catch (Opus_Model_NotFoundException $e) { // document with id $id does not exist continue; } $files = $d->getFile(); if (count($files) == 0) { continue; } foreach ($files as $file) { $overallNumOfFulltexts++; $response = null; try { $response = $solrServer->extract($file->getPath(), array('extractOnly' => 'true', 'extractFormat' => 'text')); } catch (Exception $e) { echo "error while extracting full text for document # " . $d->getId() . " (file name : " . $file->getPath() . " )\n"; $numOfNonExtractableFulltexts++; continue; } if (is_null($response->getRawResponse()) || strlen(trim($response->getRawResponse())) == 0) { echo "non-extractable full text for document # " . $d->getId() . " (file name: " . $file->getPath() . " )\n"; $numOfNonExtractableFulltexts++; } } } echo "overall num of full texts: {$overallNumOfFulltexts}\n"; $errorRate = 100.0 * $numOfNonExtractableFulltexts / $overallNumOfFulltexts; echo "num of non extractable full texts: {$numOfNonExtractableFulltexts} ({$errorRate} %)\n"; exit;