Пример #1
7
function ocr($id, $template)
{
    //connect to mysql and getting the coordinate data
    require_once 'TesseractOCR/TesseractOCR.php';
    $link = mysql_connect('localhost:3306', 'root', 'ysAb7cEkjvOa');
    mysql_select_db('billdb');
    //$id = 171;
    //$template = 54;
    $sql = "SELECT billFilePath from bills where billID = " . $id;
    $result = mysql_query($sql);
    $row = mysql_fetch_array($result, MYSQL_NUM);
    $ini_filename = $row[0];
    $im = imagecreatefromjpeg($ini_filename);
    //echo $ini_filename;
    //echo $id;
    //echo $template;
    mysql_select_db('templatedb');
    $sql = "SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = " . $template . " AND dataFieldLabel = 'amount'";
    $result = mysql_query($sql);
    $row = mysql_fetch_array($result, MYSQL_NUM);
    $x1 = $row[0];
    $y1 = $row[1];
    $x2 = $row[2] - $row[0];
    $y2 = $row[3] - $row[1];
    //cropping the image using the coordinate data
    $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2);
    $thumb_im = imagecrop($im, $to_crop_array);
    imagejpeg($thumb_im, 'images/cropped1.jpg', 100);
    //run OCR on the cropped section
    $tesseract = new TesseractOCR('images/cropped1.jpg');
    $tesseract->setLanguage(eng);
    $amount = $tesseract->recognize();
    $sql = "SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = " . $template . " AND dataFieldLabel = 'duedate'";
    $result = mysql_query($sql);
    $row = mysql_fetch_array($result, MYSQL_NUM);
    $x1 = $row[0];
    $y1 = $row[1];
    $x2 = $row[2] - $row[0];
    $y2 = $row[3] - $row[1];
    //cropping the image using the coordinate data
    $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2);
    $thumb_im = imagecrop($im, $to_crop_array);
    imagejpeg($thumb_im, 'images/cropped2.jpg', 100);
    //run OCR on the cropped section
    $tesseract = new TesseractOCR('images/cropped2.jpg');
    $tesseract->setLanguage(eng);
    $duedate = $tesseract->recognize();
    $amount = strtok($amount, " ");
    $day = strtok($duedate, " ");
    $month = strtok(" ");
    $year = strtok(" ");
    switch ($month) {
        case Jan:
            $month = "01";
            break;
        case Feb:
            $month = "02";
            break;
        case Mar:
            $month = "03";
            break;
        case Apr:
            $month = "04";
            break;
        case May:
            $month = "05";
            break;
        case Jun:
            $month = "06";
            break;
        case Jul:
            $month = "07";
            break;
        case Aug:
            $month = "08";
            break;
        case Sep:
            $month = "09";
            break;
        case Oct:
            $month = "10";
            break;
        case Nov:
            $month = "11";
            break;
        case Dec:
            $month = "12";
            break;
    }
    //echo "<br>" . $amount . "<br>";
    //echo $year;
    //echo $month;
    //echo $day;
    mysql_select_db('billdb');
    $sql = "UPDATE bills SET totalAmt = " . $amount . ", billDueDate = '" . $year . "-" . $month . "-" . $day . "' WHERE billID = " . $id;
    //echo "<br>" . $sql;
    $result = mysql_query($sql);
    mysql_close($link);
}
Пример #2
7
 public function ocr($billID, $template)
 {
     $amountImgFileDirectory = "images/detection_result/";
     $dueDateImgFileDirectory = "images/detection_result/";
     $amountImgFileName = "croppedAmt.jpg";
     $dueDateImgFileName = "croppedDueDate.jpg";
     //connect to mysql and getting the coordinate data
     require_once 'TesseractOCR.php';
     $this->billdb->select('billFilePath');
     $this->billdb->where('billID', $billID);
     $query1 = $this->billdb->get('bills');
     //$this->billdb->query("SELECT billFilePath from bills where billID = " . $billID);
     $ini_filename = $query1->result()[0]->billFilePath;
     $im = imagecreatefromjpeg($ini_filename);
     list($width, $height) = getimagesize($ini_filename);
     $query2 = $this->templatedb->query("SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = '" . $template . "' AND dataFieldLabel = 'amount'");
     $row = $query2->row(0);
     $x1 = $row->coordinateLabelX;
     $y1 = $row->coordinateLabelY;
     $x2 = $row->coordinateLabelX2 - $row->coordinateLabelX;
     $y2 = $row->coordinateLabelY2 - $row->coordinateLabelY;
     // Scale Up coordinates
     $x1 = $x1 * $width;
     $y1 = $y1 * $height;
     $x2 = $x2 * $width;
     $y2 = $y2 * $height;
     //cropping the image using the coordinate data
     $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2);
     if ($template != 0) {
         $thumb_im = imagecrop($im, $to_crop_array);
     } else {
         $thumb_im = $im;
     }
     imagejpeg($thumb_im, $amountImgFileDirectory . $amountImgFileName, 100);
     //run OCR on the cropped section
     $tesseract = new TesseractOCR($amountImgFileDirectory . $amountImgFileName);
     $tesseract->setLanguage('eng');
     $amount = $tesseract->recognize();
     $amount = preg_replace("/[^0-9,.]/", "", $amount);
     $query3 = $this->templatedb->query("SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = '" . $template . "' AND dataFieldLabel = 'duedate'");
     $row = $query3->row(0);
     $x1 = $row->coordinateLabelX;
     $y1 = $row->coordinateLabelY;
     $x2 = $row->coordinateLabelX2 - $row->coordinateLabelX;
     $y2 = $row->coordinateLabelY2 - $row->coordinateLabelY;
     // Scale Up coordinates
     $x1 = $x1 * $width;
     $y1 = $y1 * $height;
     $x2 = $x2 * $width;
     $y2 = $y2 * $height;
     //cropping the image using the coordinate data
     $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2);
     if ($template != 0) {
         $thumb_im = imagecrop($im, $to_crop_array);
     } else {
         $thumb_im = $im;
     }
     imagejpeg($thumb_im, $dueDateImgFileDirectory . $dueDateImgFileName, 100);
     //run OCR on the cropped section
     $tesseract = new TesseractOCR($dueDateImgFileDirectory . $dueDateImgFileName);
     $tesseract->setLanguage('eng');
     $duedate = $tesseract->recognize();
     $amount = strtok($amount, " ");
     $day = strtok($duedate, " ");
     $month = strtok(" ");
     $year = strtok(" ");
     str_replace(array(",", "."), "", $day);
     str_replace(array(",", "."), "", $month);
     str_replace(array(",", "."), "", $year);
     if (ctype_alpha($day)) {
         $temp = $day;
         $day = $month;
         $month = $temp;
     }
     switch ($month) {
         case 'Jan':
         case 'January':
             $month = "01";
             break;
         case 'Feb':
         case 'February':
             $month = "02";
             break;
         case 'Mar':
         case 'March':
             $month = "03";
             break;
         case 'Apr':
         case 'April':
             $month = "04";
             break;
         case 'May':
             $month = "05";
             break;
         case 'Jun':
         case 'June':
             $month = "06";
             break;
         case 'Jul':
         case 'July':
             $month = "07";
             break;
         case 'Aug':
         case 'August':
             $month = "08";
             break;
         case 'Sep':
         case 'September':
             $month = "09";
             break;
         case 'Oct':
         case 'October':
             $month = "10";
             break;
         case 'Nov':
         case 'November':
             $month = "11";
             break;
         case 'Dec':
         case 'December':
             $month = "12";
             break;
     }
     $data = array('totalAmt' => $amount, 'billDueDate' => $year . "-" . $month . "-" . $day);
     $this->billdb->where('billID', $billID);
     $this->billdb->update('bills', $data);
     /* remove the cropped images once the check is complete.
     
             $command = escapeshellcmd('rm -f ' . $amountImgFileDirectory . $amountImgFileName);
     
             shell_exec($command);
             $command = escapeshellcmd('rm -f ' . $dueDateImgFileDirectory . $dueDateImgFileName);
     
             shell_exec($command);
     		*/
     return $ini_filename;
 }
Пример #3
0
<?php

require_once 'vendor/autoload.php';
for ($i = 1; $i <= 10; $i++) {
    $tesseract = new TesseractOCR("img/image{$i}.jpg");
    $tesseract->setLanguage('eng');
    $tesseract->setWhitelist(range('a', 'z'), range(0, 9));
    echo $tesseract->recognize() . "\r";
}
 private function parseContent($fileUri, $language)
 {
     if (!File::exists($fileUri)) {
         throw new Exception('Document parsing job #' . $this->job->getJobId() . ' received a uri to a file that does not seem to exist.');
     }
     $tesseract = new TesseractOCR($fileUri);
     $tesseract->setTempDir(Config::get('paperwork.tesseractTempDirectory'));
     if (isset($language)) {
         $tesseract->setLanguage($language);
     }
     return $tesseract->recognize();
 }
 public function testSpecificLanguageRecognition()
 {
     $tesseract = new TesseractOCR("{$this->imagesDir}german.png");
     $tesseract->setLanguage('deu');
     $this->assertEquals('grüßen in Deutsch', $tesseract->recognize());
 }
Пример #6
-1
 public function getOCR()
 {
     require_once base_path() . '/vendor/thiagoalessio/tesseract_ocr/TesseractOCR/TesseractOCR.php';
     $tesseract = new TesseractOCR(public_path() . '/assets/img/social/fb_login.png');
     $tesseract->setTempDir(storage_path());
     $tesseract->setLanguage('eng');
     //same 3-letters code as tesseract training data packages
     echo $tesseract->recognize();
 }
Пример #7
-1
 /**
  * Get Ocr uploaded Image Text.
  *
  * @return with Success with Text Extracted or Error
  */
 public function postUpload()
 {
     // Build the input for our validation
     $input = array('image' => Input::file('image'));
     // Within the ruleset, make sure we let the validator know that this
     // file should be an image
     $rules = array('image' => 'required|mimes:jpeg,png,pdf');
     // Now pass the input and rules into the validator
     $validator = Validator::make($input, $rules);
     // Check to see if validation fails or passes
     if ($validator->fails()) {
         // Redirect with a helpful message to inform the user that
         // the provided file was not an adequate type
         return Redirect::back()->with('message', 'Error: The provided file was not an image');
     } else {
         $file = Input::file('image');
         $destinationPath = 'uploads/photos';
         $image = $file->getClientOriginalName();
         Input::file('image')->move($destinationPath, $image);
         require_once base_path() . '/vendor/thiagoalessio/tesseract_ocr/TesseractOCR/TesseractOCR.php';
         $tesseract = new TesseractOCR(public_path() . '/' . $destinationPath . '/' . $image);
         $tesseract->setTempDir(storage_path());
         $tesseract->setLanguage('eng');
         //same 3-letters code as tesseract training data packages
         $ocr = $tesseract->recognize();
         return Redirect::route('upload-form')->with('message', 'Success: File upload was successful')->with('ocr', $ocr);
     }
     return Redirect::back()->with('error', 'An error occured');
 }