/** * Extract function * @param $fileID */ public function extract($fileID) { if (isset($fileID)) { $file = $this->File->find('first', ['conditions' => ['File.id' => $fileID], 'contain' => ['TextFile' => ['order' => 'TextFile.updated DESC', 'limit' => 1]]]); //get the file of interest if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') { $pdfToTextPath = Configure::read("pdftotextPath.windows"); //save path to the pdftotext for the server } elseif (PHP_OS == "Linux") { $pdfToTextPath = Configure::read("pdftotextPath.linux"); } elseif (PHP_OS == "FreeBSD") { $pdfToTextPath = Configure::read("pdftotextPath.freebsd"); } else { $pdfToTextPath = Configure::read("pdftotextPath.mac"); } $fileToExtract = WWW_ROOT . 'files' . DS . 'pdf' . DS . $file['File']['publication_id'] . DS . $file['File']['filename']; // find the path to the file name exec($pdfToTextPath . ' -layout -r 300 "' . $fileToExtract . '" -', $lines); //run the extraction $start = false; $data = json_decode($file['TextFile'][0]['extracted_data'], true); var_dump($data['citation']); $citation = ""; foreach ($lines as $line) { if (strpos($line, $data['citation']) !== false) { $start = true; } if ($start == true) { if ($line !== "") { $citation .= $line . " "; } else { break; } } } var_dump($citation); $client = new SoapClient("http://wing.comp.nus.edu.sg/parsCit/wing.nus.wsdl"); $client->extract_citations($citation); $curl = curl_init(); curl_setopt_array($curl, [CURLOPT_RETURNTRANSFER => 1, CURLOPT_URL => 'http://freecite.library.brown.edu/citations/create', CURLOPT_USERAGENT => 'ChalkLab Citation Retriever', CURLOPT_POST => 1, CURLOPT_POSTFIELDS => ['citation' => $citation], CURLOPT_HEADER => 'Accept: text/xml']); $result = curl_exec($curl); curl_close($curl); echo $result; die; } }