Esempio n. 1
0
 /**
  * Configure and make a request and return its results.
  *
  * @param string $file
  * @param string $type
  *
  * @return string
  *
  * @throws \Exception
  */
 protected function request($file, $type)
 {
     // check if is cached
     if (isset($this->cache[sha1($file)][$type])) {
         return $this->cache[sha1($file)][$type];
     }
     // parameters for cURL request
     $headers = [];
     switch ($type) {
         case 'html':
             $resource = 'tika';
             $headers[] = 'Accept: text/html';
             break;
         case 'mime':
             $name = basename($file);
             $resource = 'detect/stream';
             $headers[] = "Content-Disposition: attachment, filename={$name}";
             break;
         case 'lang':
             $resource = 'language/stream';
             break;
         case 'meta':
             $resource = 'meta';
             $headers[] = 'Accept: application/json';
             break;
         case 'text':
             $resource = 'tika';
             $headers[] = 'Accept: text/plain';
             break;
         case 'version':
             $resource = 'version';
             break;
         default:
             throw new Exception("Unknown type {$type}");
     }
     // cURL base options
     $options = [CURLOPT_PUT => true];
     // remote file options
     if ($file && preg_match('/^http/', $file)) {
         $options[CURLOPT_INFILE] = fopen($file, 'r');
     } elseif ($file && file_exists($file) && is_readable($file)) {
         $options[CURLOPT_INFILE] = fopen($file, 'r');
         $options[CURLOPT_INFILESIZE] = filesize($file);
     } elseif ($type == 'version') {
         $options = [CURLOPT_PUT => false];
     } else {
         throw new Exception("File {$file} can't be opened");
     }
     // sets headers
     $options[CURLOPT_HTTPHEADER] = $headers;
     // cURL init and options
     $options[CURLOPT_URL] = "http://{$this->host}:{$this->port}" . "/{$resource}";
     // get the response and the HTTP status code
     list($response, $status) = $this->exec($options);
     switch ($status) {
         // request completed successfully
         case 200:
             if ($type == 'meta') {
                 $response = Metadata::make($response, $file);
             }
             break;
             // request completed sucessfully but result is empty
         // request completed sucessfully but result is empty
         case 204:
             $response = null;
             break;
             //  unsupported media type
         //  unsupported media type
         case 415:
             throw new Exception('Unsupported media type');
             break;
             //  unprocessable entity
         //  unprocessable entity
         case 422:
             throw new Exception('Unprocessable document');
             break;
             // server error
         // server error
         case 500:
             throw new Exception('Error while processing document');
             break;
             // unexpected
         // unexpected
         default:
             throw new Exception("Unexpected response for /{$resource} ({$status})");
     }
     // cache certain responses
     if (in_array($type, ['lang', 'meta'])) {
         $this->cache[sha1($file)][$type] = $response;
     }
     return $response;
 }
Esempio n. 2
0
 /**
  * Configure and make a request and return its results.
  *
  * @param string $file
  * @param string $type
  *
  * @return string
  *
  * @throws \Exception
  */
 protected function request($file, $type)
 {
     // check if is cached
     if (isset($this->cache[sha1($file)][$type])) {
         return $this->cache[sha1($file)][$type];
     }
     // parameters for cURL request
     $arguments = [];
     switch ($type) {
         case 'html':
             $arguments[] = '--html';
             break;
         case 'mime':
             $arguments[] = '--detect';
             break;
         case 'lang':
             $arguments[] = '--language';
             break;
         case 'meta':
             $arguments[] = '--metadata --json';
             break;
         case 'text':
             $arguments[] = '--text';
             break;
         case 'version':
             $arguments[] = '--version';
             break;
         default:
             throw new Exception("Unknown type {$type}");
     }
     // invalid file
     if ($file && !preg_match('/^http/', $file) && !file_exists($file)) {
         throw new Exception("File {$file} can't be opened");
     }
     // add last argument
     if ($file) {
         $arguments[] = "'{$file}'";
     }
     // build command
     $command = "java -jar '{$this->path}' " . implode(' ', $arguments);
     // run command and process output
     $response = trim(shell_exec($command));
     // metadata response
     if ($type == 'meta') {
         // fix for invalid? json returned only with images
         $response = str_replace(basename($file) . '"}{', '", ', $response);
         $response = Metadata::make($response, $file);
     }
     // cache certain responses
     if (in_array($type, ['lang', 'meta'])) {
         $this->cache[sha1($file)][$type] = $response;
     }
     return $response;
 }
Esempio n. 3
0
 /**
  * Configure and make a request and return its results.
  *
  * @param string $type
  * @param string $file
  *
  * @return string
  *
  * @throws \Exception
  */
 public function request($type, $file = null)
 {
     // check if is cached
     if (isset($this->cache[sha1($file)][$type])) {
         return $this->cache[sha1($file)][$type];
     }
     // parameters for command
     $arguments = [];
     switch ($type) {
         case 'html':
             $arguments[] = '--html';
             break;
         case 'lang':
             $arguments[] = '--language';
             break;
         case 'mime':
             $arguments[] = '--detect';
             break;
         case 'meta':
             $arguments[] = '--metadata --json';
             break;
         case 'text':
             $arguments[] = '--text';
             break;
         case 'version':
             $arguments[] = '--version';
             break;
         default:
             throw new Exception("Unknown type {$type}");
     }
     // invalid local file
     if ($file && !preg_match('/^http/', $file) && !file_exists($file)) {
         throw new Exception("File {$file} can't be opened");
     } elseif ($file && preg_match('/^http/', $file) && !preg_match('/200/', @get_headers($file)[0])) {
         throw new Exception("File {$file} can't be opened", 2);
     }
     // add last argument
     if ($file) {
         $arguments[] = "'{$file}'";
     }
     // build command
     $command = ($this->java ?: 'java') . " -jar '{$this->path}' " . implode(' ', $arguments);
     // run command
     $exit = -1;
     $response = null;
     $descriptors = [['pipe', 'r'], ['pipe', 'w'], ['file', '/tmp/tika-error.log', 'a']];
     $process = proc_open($command, $descriptors, $pipes);
     // get output if command runs ok
     if (is_resource($process)) {
         fclose($pipes[0]);
         $response = trim(stream_get_contents($pipes[1]));
         fclose($pipes[1]);
         $exit = proc_close($process);
     } else {
         throw new Exception("Error running command {$command}");
     }
     // exception if exit value is not zero
     if ($exit > 0) {
         throw new Exception("Unexpected exit value ({$exit}) for command {$command}");
     }
     // metadata response
     if ($type == 'meta') {
         // fix for invalid? json returned only with images
         $response = str_replace(basename($file) . '"}{', '", ', $response);
         $response = Metadata::make($response, $file);
     }
     // cache certain responses
     if (in_array($type, ['lang', 'meta'])) {
         $this->cache[sha1($file)][$type] = $response;
     }
     return $response;
 }
Esempio n. 4
0
 /**
  * Configure, make a request and return its results.
  *
  * @param string $type
  * @param string $file
  *
  * @return string
  *
  * @throws \Exception
  */
 public function request($type, $file = null)
 {
     static $retries = [];
     // check if is cached
     if (isset($this->cache[sha1($file)][$type])) {
         return $this->cache[sha1($file)][$type];
     } elseif (!isset($retries[sha1($file)])) {
         $retries[sha1($file)] = $this->retries;
     }
     // parameters for cURL request
     $headers = [];
     switch ($type) {
         case 'html':
             $resource = 'tika';
             $headers[] = 'Accept: text/html';
             break;
         case 'lang':
             $resource = 'language/stream';
             break;
         case 'mime':
             $name = basename($file);
             $resource = 'detect/stream';
             $headers[] = "Content-Disposition: attachment, filename={$name}";
             break;
         case 'meta':
             $resource = 'meta';
             $headers[] = 'Accept: application/json';
             break;
         case 'text':
             $resource = 'tika';
             $headers[] = 'Accept: text/plain';
             break;
         case 'version':
             $resource = 'version';
             break;
         default:
             throw new Exception("Unknown type {$type}");
     }
     // base options
     $options = $this->options;
     // remote file options
     if ($file && preg_match('/^http/', $file)) {
         $options[CURLOPT_INFILE] = fopen($file, 'r');
     } elseif ($file && file_exists($file) && is_readable($file)) {
         $options[CURLOPT_INFILE] = fopen($file, 'r');
         $options[CURLOPT_INFILESIZE] = filesize($file);
     } elseif ($type == 'version') {
         $options[CURLOPT_PUT] = false;
     } else {
         throw new Exception("File {$file} can't be opened");
     }
     // sets headers
     $options[CURLOPT_HTTPHEADER] = $headers;
     // cURL init and options
     $options[CURLOPT_URL] = "http://{$this->host}:{$this->port}" . "/{$resource}";
     // get the response and the HTTP status code
     list($response, $status) = $this->exec($options);
     // request completed successfully
     if ($status == 200) {
         if ($type == 'meta') {
             $response = Metadata::make($response, $file);
         }
     } elseif ($status == 204) {
         $response = null;
     } elseif ($status == 500 && $retries[sha1($file)]--) {
         usleep(100000);
         $response = $this->request($type, $file);
     } else {
         $this->error($status, $resource);
     }
     // cache certain responses
     if (in_array($type, ['lang', 'meta'])) {
         $this->cache[sha1($file)][$type] = $response;
     }
     return $response;
 }