/** * Configure and make a request and return its results. * * @param string $file * @param string $type * * @return string * * @throws \Exception */ protected function request($file, $type) { // check if is cached if (isset($this->cache[sha1($file)][$type])) { return $this->cache[sha1($file)][$type]; } // parameters for cURL request $headers = []; switch ($type) { case 'html': $resource = 'tika'; $headers[] = 'Accept: text/html'; break; case 'mime': $name = basename($file); $resource = 'detect/stream'; $headers[] = "Content-Disposition: attachment, filename={$name}"; break; case 'lang': $resource = 'language/stream'; break; case 'meta': $resource = 'meta'; $headers[] = 'Accept: application/json'; break; case 'text': $resource = 'tika'; $headers[] = 'Accept: text/plain'; break; case 'version': $resource = 'version'; break; default: throw new Exception("Unknown type {$type}"); } // cURL base options $options = [CURLOPT_PUT => true]; // remote file options if ($file && preg_match('/^http/', $file)) { $options[CURLOPT_INFILE] = fopen($file, 'r'); } elseif ($file && file_exists($file) && is_readable($file)) { $options[CURLOPT_INFILE] = fopen($file, 'r'); $options[CURLOPT_INFILESIZE] = filesize($file); } elseif ($type == 'version') { $options = [CURLOPT_PUT => false]; } else { throw new Exception("File {$file} can't be opened"); } // sets headers $options[CURLOPT_HTTPHEADER] = $headers; // cURL init and options $options[CURLOPT_URL] = "http://{$this->host}:{$this->port}" . "/{$resource}"; // get the response and the HTTP status code list($response, $status) = $this->exec($options); switch ($status) { // request completed successfully case 200: if ($type == 'meta') { $response = Metadata::make($response, $file); } break; // request completed sucessfully but result is empty // request completed sucessfully but result is empty case 204: $response = null; break; // unsupported media type // unsupported media type case 415: throw new Exception('Unsupported media type'); break; // unprocessable entity // unprocessable entity case 422: throw new Exception('Unprocessable document'); break; // server error // server error case 500: throw new Exception('Error while processing document'); break; // unexpected // unexpected default: throw new Exception("Unexpected response for /{$resource} ({$status})"); } // cache certain responses if (in_array($type, ['lang', 'meta'])) { $this->cache[sha1($file)][$type] = $response; } return $response; }
/** * Configure and make a request and return its results. * * @param string $file * @param string $type * * @return string * * @throws \Exception */ protected function request($file, $type) { // check if is cached if (isset($this->cache[sha1($file)][$type])) { return $this->cache[sha1($file)][$type]; } // parameters for cURL request $arguments = []; switch ($type) { case 'html': $arguments[] = '--html'; break; case 'mime': $arguments[] = '--detect'; break; case 'lang': $arguments[] = '--language'; break; case 'meta': $arguments[] = '--metadata --json'; break; case 'text': $arguments[] = '--text'; break; case 'version': $arguments[] = '--version'; break; default: throw new Exception("Unknown type {$type}"); } // invalid file if ($file && !preg_match('/^http/', $file) && !file_exists($file)) { throw new Exception("File {$file} can't be opened"); } // add last argument if ($file) { $arguments[] = "'{$file}'"; } // build command $command = "java -jar '{$this->path}' " . implode(' ', $arguments); // run command and process output $response = trim(shell_exec($command)); // metadata response if ($type == 'meta') { // fix for invalid? json returned only with images $response = str_replace(basename($file) . '"}{', '", ', $response); $response = Metadata::make($response, $file); } // cache certain responses if (in_array($type, ['lang', 'meta'])) { $this->cache[sha1($file)][$type] = $response; } return $response; }
/** * Configure and make a request and return its results. * * @param string $type * @param string $file * * @return string * * @throws \Exception */ public function request($type, $file = null) { // check if is cached if (isset($this->cache[sha1($file)][$type])) { return $this->cache[sha1($file)][$type]; } // parameters for command $arguments = []; switch ($type) { case 'html': $arguments[] = '--html'; break; case 'lang': $arguments[] = '--language'; break; case 'mime': $arguments[] = '--detect'; break; case 'meta': $arguments[] = '--metadata --json'; break; case 'text': $arguments[] = '--text'; break; case 'version': $arguments[] = '--version'; break; default: throw new Exception("Unknown type {$type}"); } // invalid local file if ($file && !preg_match('/^http/', $file) && !file_exists($file)) { throw new Exception("File {$file} can't be opened"); } elseif ($file && preg_match('/^http/', $file) && !preg_match('/200/', @get_headers($file)[0])) { throw new Exception("File {$file} can't be opened", 2); } // add last argument if ($file) { $arguments[] = "'{$file}'"; } // build command $command = ($this->java ?: 'java') . " -jar '{$this->path}' " . implode(' ', $arguments); // run command $exit = -1; $response = null; $descriptors = [['pipe', 'r'], ['pipe', 'w'], ['file', '/tmp/tika-error.log', 'a']]; $process = proc_open($command, $descriptors, $pipes); // get output if command runs ok if (is_resource($process)) { fclose($pipes[0]); $response = trim(stream_get_contents($pipes[1])); fclose($pipes[1]); $exit = proc_close($process); } else { throw new Exception("Error running command {$command}"); } // exception if exit value is not zero if ($exit > 0) { throw new Exception("Unexpected exit value ({$exit}) for command {$command}"); } // metadata response if ($type == 'meta') { // fix for invalid? json returned only with images $response = str_replace(basename($file) . '"}{', '", ', $response); $response = Metadata::make($response, $file); } // cache certain responses if (in_array($type, ['lang', 'meta'])) { $this->cache[sha1($file)][$type] = $response; } return $response; }
/** * Configure, make a request and return its results. * * @param string $type * @param string $file * * @return string * * @throws \Exception */ public function request($type, $file = null) { static $retries = []; // check if is cached if (isset($this->cache[sha1($file)][$type])) { return $this->cache[sha1($file)][$type]; } elseif (!isset($retries[sha1($file)])) { $retries[sha1($file)] = $this->retries; } // parameters for cURL request $headers = []; switch ($type) { case 'html': $resource = 'tika'; $headers[] = 'Accept: text/html'; break; case 'lang': $resource = 'language/stream'; break; case 'mime': $name = basename($file); $resource = 'detect/stream'; $headers[] = "Content-Disposition: attachment, filename={$name}"; break; case 'meta': $resource = 'meta'; $headers[] = 'Accept: application/json'; break; case 'text': $resource = 'tika'; $headers[] = 'Accept: text/plain'; break; case 'version': $resource = 'version'; break; default: throw new Exception("Unknown type {$type}"); } // base options $options = $this->options; // remote file options if ($file && preg_match('/^http/', $file)) { $options[CURLOPT_INFILE] = fopen($file, 'r'); } elseif ($file && file_exists($file) && is_readable($file)) { $options[CURLOPT_INFILE] = fopen($file, 'r'); $options[CURLOPT_INFILESIZE] = filesize($file); } elseif ($type == 'version') { $options[CURLOPT_PUT] = false; } else { throw new Exception("File {$file} can't be opened"); } // sets headers $options[CURLOPT_HTTPHEADER] = $headers; // cURL init and options $options[CURLOPT_URL] = "http://{$this->host}:{$this->port}" . "/{$resource}"; // get the response and the HTTP status code list($response, $status) = $this->exec($options); // request completed successfully if ($status == 200) { if ($type == 'meta') { $response = Metadata::make($response, $file); } } elseif ($status == 204) { $response = null; } elseif ($status == 500 && $retries[sha1($file)]--) { usleep(100000); $response = $this->request($type, $file); } else { $this->error($status, $resource); } // cache certain responses if (in_array($type, ['lang', 'meta'])) { $this->cache[sha1($file)][$type] = $response; } return $response; }