Example #1
0
 /**
  * Extract function
  * @param $fileID
  */
 public function extract($fileID)
 {
     if (isset($fileID)) {
         $file = $this->File->find('first', ['conditions' => ['File.id' => $fileID], 'contain' => ['TextFile' => ['order' => 'TextFile.updated DESC', 'limit' => 1]]]);
         //get the file of interest
         if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
             $pdfToTextPath = Configure::read("pdftotextPath.windows");
             //save path to the pdftotext for the server
         } elseif (PHP_OS == "Linux") {
             $pdfToTextPath = Configure::read("pdftotextPath.linux");
         } elseif (PHP_OS == "FreeBSD") {
             $pdfToTextPath = Configure::read("pdftotextPath.freebsd");
         } else {
             $pdfToTextPath = Configure::read("pdftotextPath.mac");
         }
         $fileToExtract = WWW_ROOT . 'files' . DS . 'pdf' . DS . $file['File']['publication_id'] . DS . $file['File']['filename'];
         // find the path to the file name
         exec($pdfToTextPath . ' -layout -r 300  "' . $fileToExtract . '" -', $lines);
         //run the extraction
         $start = false;
         $data = json_decode($file['TextFile'][0]['extracted_data'], true);
         var_dump($data['citation']);
         $citation = "";
         foreach ($lines as $line) {
             if (strpos($line, $data['citation']) !== false) {
                 $start = true;
             }
             if ($start == true) {
                 if ($line !== "") {
                     $citation .= $line . " ";
                 } else {
                     break;
                 }
             }
         }
         var_dump($citation);
         $client = new SoapClient("http://wing.comp.nus.edu.sg/parsCit/wing.nus.wsdl");
         $client->extract_citations($citation);
         $curl = curl_init();
         curl_setopt_array($curl, [CURLOPT_RETURNTRANSFER => 1, CURLOPT_URL => 'http://freecite.library.brown.edu/citations/create', CURLOPT_USERAGENT => 'ChalkLab Citation Retriever', CURLOPT_POST => 1, CURLOPT_POSTFIELDS => ['citation' => $citation], CURLOPT_HEADER => 'Accept: text/xml']);
         $result = curl_exec($curl);
         curl_close($curl);
         echo $result;
         die;
     }
 }