Example #1
7
 public function ocr($billID, $template)
 {
     $amountImgFileDirectory = "images/detection_result/";
     $dueDateImgFileDirectory = "images/detection_result/";
     $amountImgFileName = "croppedAmt.jpg";
     $dueDateImgFileName = "croppedDueDate.jpg";
     //connect to mysql and getting the coordinate data
     require_once 'TesseractOCR.php';
     $this->billdb->select('billFilePath');
     $this->billdb->where('billID', $billID);
     $query1 = $this->billdb->get('bills');
     //$this->billdb->query("SELECT billFilePath from bills where billID = " . $billID);
     $ini_filename = $query1->result()[0]->billFilePath;
     $im = imagecreatefromjpeg($ini_filename);
     list($width, $height) = getimagesize($ini_filename);
     $query2 = $this->templatedb->query("SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = '" . $template . "' AND dataFieldLabel = 'amount'");
     $row = $query2->row(0);
     $x1 = $row->coordinateLabelX;
     $y1 = $row->coordinateLabelY;
     $x2 = $row->coordinateLabelX2 - $row->coordinateLabelX;
     $y2 = $row->coordinateLabelY2 - $row->coordinateLabelY;
     // Scale Up coordinates
     $x1 = $x1 * $width;
     $y1 = $y1 * $height;
     $x2 = $x2 * $width;
     $y2 = $y2 * $height;
     //cropping the image using the coordinate data
     $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2);
     if ($template != 0) {
         $thumb_im = imagecrop($im, $to_crop_array);
     } else {
         $thumb_im = $im;
     }
     imagejpeg($thumb_im, $amountImgFileDirectory . $amountImgFileName, 100);
     //run OCR on the cropped section
     $tesseract = new TesseractOCR($amountImgFileDirectory . $amountImgFileName);
     $tesseract->setLanguage('eng');
     $amount = $tesseract->recognize();
     $amount = preg_replace("/[^0-9,.]/", "", $amount);
     $query3 = $this->templatedb->query("SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = '" . $template . "' AND dataFieldLabel = 'duedate'");
     $row = $query3->row(0);
     $x1 = $row->coordinateLabelX;
     $y1 = $row->coordinateLabelY;
     $x2 = $row->coordinateLabelX2 - $row->coordinateLabelX;
     $y2 = $row->coordinateLabelY2 - $row->coordinateLabelY;
     // Scale Up coordinates
     $x1 = $x1 * $width;
     $y1 = $y1 * $height;
     $x2 = $x2 * $width;
     $y2 = $y2 * $height;
     //cropping the image using the coordinate data
     $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2);
     if ($template != 0) {
         $thumb_im = imagecrop($im, $to_crop_array);
     } else {
         $thumb_im = $im;
     }
     imagejpeg($thumb_im, $dueDateImgFileDirectory . $dueDateImgFileName, 100);
     //run OCR on the cropped section
     $tesseract = new TesseractOCR($dueDateImgFileDirectory . $dueDateImgFileName);
     $tesseract->setLanguage('eng');
     $duedate = $tesseract->recognize();
     $amount = strtok($amount, " ");
     $day = strtok($duedate, " ");
     $month = strtok(" ");
     $year = strtok(" ");
     str_replace(array(",", "."), "", $day);
     str_replace(array(",", "."), "", $month);
     str_replace(array(",", "."), "", $year);
     if (ctype_alpha($day)) {
         $temp = $day;
         $day = $month;
         $month = $temp;
     }
     switch ($month) {
         case 'Jan':
         case 'January':
             $month = "01";
             break;
         case 'Feb':
         case 'February':
             $month = "02";
             break;
         case 'Mar':
         case 'March':
             $month = "03";
             break;
         case 'Apr':
         case 'April':
             $month = "04";
             break;
         case 'May':
             $month = "05";
             break;
         case 'Jun':
         case 'June':
             $month = "06";
             break;
         case 'Jul':
         case 'July':
             $month = "07";
             break;
         case 'Aug':
         case 'August':
             $month = "08";
             break;
         case 'Sep':
         case 'September':
             $month = "09";
             break;
         case 'Oct':
         case 'October':
             $month = "10";
             break;
         case 'Nov':
         case 'November':
             $month = "11";
             break;
         case 'Dec':
         case 'December':
             $month = "12";
             break;
     }
     $data = array('totalAmt' => $amount, 'billDueDate' => $year . "-" . $month . "-" . $day);
     $this->billdb->where('billID', $billID);
     $this->billdb->update('bills', $data);
     /* remove the cropped images once the check is complete.
     
             $command = escapeshellcmd('rm -f ' . $amountImgFileDirectory . $amountImgFileName);
     
             shell_exec($command);
             $command = escapeshellcmd('rm -f ' . $dueDateImgFileDirectory . $dueDateImgFileName);
     
             shell_exec($command);
     		*/
     return $ini_filename;
 }
Example #2
7
function ocr($id, $template)
{
    //connect to mysql and getting the coordinate data
    require_once 'TesseractOCR/TesseractOCR.php';
    $link = mysql_connect('localhost:3306', 'root', 'ysAb7cEkjvOa');
    mysql_select_db('billdb');
    //$id = 171;
    //$template = 54;
    $sql = "SELECT billFilePath from bills where billID = " . $id;
    $result = mysql_query($sql);
    $row = mysql_fetch_array($result, MYSQL_NUM);
    $ini_filename = $row[0];
    $im = imagecreatefromjpeg($ini_filename);
    //echo $ini_filename;
    //echo $id;
    //echo $template;
    mysql_select_db('templatedb');
    $sql = "SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = " . $template . " AND dataFieldLabel = 'amount'";
    $result = mysql_query($sql);
    $row = mysql_fetch_array($result, MYSQL_NUM);
    $x1 = $row[0];
    $y1 = $row[1];
    $x2 = $row[2] - $row[0];
    $y2 = $row[3] - $row[1];
    //cropping the image using the coordinate data
    $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2);
    $thumb_im = imagecrop($im, $to_crop_array);
    imagejpeg($thumb_im, 'images/cropped1.jpg', 100);
    //run OCR on the cropped section
    $tesseract = new TesseractOCR('images/cropped1.jpg');
    $tesseract->setLanguage(eng);
    $amount = $tesseract->recognize();
    $sql = "SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = " . $template . " AND dataFieldLabel = 'duedate'";
    $result = mysql_query($sql);
    $row = mysql_fetch_array($result, MYSQL_NUM);
    $x1 = $row[0];
    $y1 = $row[1];
    $x2 = $row[2] - $row[0];
    $y2 = $row[3] - $row[1];
    //cropping the image using the coordinate data
    $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2);
    $thumb_im = imagecrop($im, $to_crop_array);
    imagejpeg($thumb_im, 'images/cropped2.jpg', 100);
    //run OCR on the cropped section
    $tesseract = new TesseractOCR('images/cropped2.jpg');
    $tesseract->setLanguage(eng);
    $duedate = $tesseract->recognize();
    $amount = strtok($amount, " ");
    $day = strtok($duedate, " ");
    $month = strtok(" ");
    $year = strtok(" ");
    switch ($month) {
        case Jan:
            $month = "01";
            break;
        case Feb:
            $month = "02";
            break;
        case Mar:
            $month = "03";
            break;
        case Apr:
            $month = "04";
            break;
        case May:
            $month = "05";
            break;
        case Jun:
            $month = "06";
            break;
        case Jul:
            $month = "07";
            break;
        case Aug:
            $month = "08";
            break;
        case Sep:
            $month = "09";
            break;
        case Oct:
            $month = "10";
            break;
        case Nov:
            $month = "11";
            break;
        case Dec:
            $month = "12";
            break;
    }
    //echo "<br>" . $amount . "<br>";
    //echo $year;
    //echo $month;
    //echo $day;
    mysql_select_db('billdb');
    $sql = "UPDATE bills SET totalAmt = " . $amount . ", billDueDate = '" . $year . "-" . $month . "-" . $day . "' WHERE billID = " . $id;
    //echo "<br>" . $sql;
    $result = mysql_query($sql);
    mysql_close($link);
}
Example #3
1
 /**
  * Get Captcha Value and input in box
  */
 function captcha($imglocation, $typeinbox)
 {
     $html = $this->getSource();
     $tidy = tidy_parse_string($html)->html()->value;
     $searchqp = htmlqp($tidy, 'body');
     $captchaurl = $searchqp->branch($imglocation)->attr('src');
     $saveimg = '/tmp/mycaptcha.png';
     file_put_contents($saveimg, file_get_contents($captchaurl));
     $tesseract = new TesseractOCR($saveimg);
     $crackedvalue = $tesseract->recognize();
     $this->driver->findElement(WebDriverBy::CssSelector($typeinbox))->sendKeys($crackedvalue);
 }
function numbersForFileNamed($fileName)
{
    $tesseract = new \TesseractOCR(BASE_PATH . 'inbox/' . $fileName);
    $tesseract->setWhitelist(range(0, 9));
    return preg_split('/[ \\n]/', $tesseract->recognize());
}
Example #5
0
 public static function createOCRTextFile($originalFile, $assetsID, $filename)
 {
     $text = TesseractOCR::recognize($originalFile);
     if (file_put_contents(self::getSaveDir($assetsID, 'ocr') . DIRECTORY_SEPARATOR . $filename . '.txt', $text) === FALSE) {
         return FALSE;
     }
     $return['ocr'][] = array('name' => $filename . '.txt', 'path' => self::getSaveDir($assetsID, 'ocr', FALSE), 'size' => filesize(self::getSaveDir($assetsID, 'ocr') . $filename . '.txt'), 'type' => self::getMimeType(self::getSaveDir($assetsID, 'ocr') . $filename . '.txt'), 'errors' => '');
 }
Example #6
0
<?php

require_once 'vendor/autoload.php';
for ($i = 1; $i <= 10; $i++) {
    $tesseract = new TesseractOCR("img/image{$i}.jpg");
    $tesseract->setLanguage('eng');
    $tesseract->setWhitelist(range('a', 'z'), range(0, 9));
    echo $tesseract->recognize() . "\r";
}
 public function tesseract($image_path)
 {
     require_once 'D:\\xampp\\htdocs\\ocr\\vendor\\thiagoalessio\\tesseract_ocr\\TesseractOCR\\TesseractOCR.php';
     $tesseract = new TesseractOCR(public_path() . '/images/' . $image_path);
     $text = $tesseract->recognize();
     Session::put('trans', $text);
     return $text;
 }
Example #8
-1
 public function recognizeText($imageFile)
 {
     require_once "TesseractOCR/TesseractOCR.php";
     require_once "Repositories/CR_File.php";
     // Recognize text from image
     $tesseract = new TesseractOCR($imageFile->filePath);
     $tesseract->setWhitelist(range('A', 'Z'), range('a', 'z'), range(0, 9), '_-.,;"#<>()%{}[]= ');
     $txt = $tesseract->recognize();
     // Save text file
     // public/output/code_filename.ext
     $codeFilePath = "public/output/code_" . $imageFile->fileName . $this->LANGUAGES[$this->language];
     $recognizedCodeFile = new CR_File($codeFilePath);
     $recognizedCodeFile->write($txt);
     return $recognizedCodeFile;
 }
Example #9
-1
 function generateConfigFile($arguments)
 {
     $configFile = mfcs::config('mfcstmp') . '/tesseract-ocr-config-' . rand() . '.conf';
     exec("touch {$configFile}");
     $whitelist = TesseractOCR::generateWhitelist($arguments);
     if (!empty($whitelist)) {
         $fp = fopen($configFile, 'w');
         fwrite($fp, "tessedit_char_whitelist {$whitelist}");
         fclose($fp);
     }
     return $configFile;
 }
    // Grab the uploaded file
    $file = $request->files->get('upload');
    // Extract some information about the uploaded file
    $info = new SplFileInfo($file->getClientOriginalName());
    // Create a quasi-random filename
    $filename = sprintf('%d.%s', time(), $info->getExtension());
    // Copy the file
    $file->move(__DIR__ . '/../uploads', $filename);
    // Instantiate the Tessearct library
    $tesseract = new TesseractOCR(__DIR__ . '/../uploads/' . $filename);
    // Perform OCR on the uploaded image
    $text = $tesseract->recognize();
    return $app['twig']->render('results.twig', ['text' => $text]);
});
$app->post('/identify-telephone-number', function (Request $request) use($app) {
    // Grab the uploaded file
    $file = $request->files->get('upload');
    // Extract some information about the uploaded file
    $info = new SplFileInfo($file->getClientOriginalName());
    // Create a quasi-random filename
    $filename = sprintf('%d.%s', time(), $info->getExtension());
    // Copy the file
    $file->move(__DIR__ . '/../uploads', $filename);
    // Instantiate the Tessearct library
    $tesseract = new TesseractOCR(__DIR__ . '/../uploads/' . $filename);
    // Perform OCR on the uploaded image
    $text = $tesseract->recognize();
    $number = findPhoneNumber($text, 'GB');
    return $app->json(['number' => $number]);
});
$app->run();
 private function parseContent($fileUri, $language)
 {
     if (!File::exists($fileUri)) {
         throw new Exception('Document parsing job #' . $this->job->getJobId() . ' received a uri to a file that does not seem to exist.');
     }
     $tesseract = new TesseractOCR($fileUri);
     $tesseract->setTempDir(Config::get('paperwork.tesseractTempDirectory'));
     if (isset($language)) {
         $tesseract->setLanguage($language);
     }
     return $tesseract->recognize();
 }
 public function testSpecificLanguageRecognition()
 {
     $tesseract = new TesseractOCR("{$this->imagesDir}german.png");
     $tesseract->setLanguage('deu');
     $this->assertEquals('grüßen in Deutsch', $tesseract->recognize());
 }
Example #13
-1
 function ocr($img, $lng = 'fre')
 {
     $t = new \TesseractOCR($img);
     $t->setTempDir(CACHE_PATH);
     // $t->setLanguage($lng);
     return $t->recognize();
 }
Example #14
-1
 public function getOCR()
 {
     require_once base_path() . '/vendor/thiagoalessio/tesseract_ocr/TesseractOCR/TesseractOCR.php';
     $tesseract = new TesseractOCR(public_path() . '/assets/img/social/fb_login.png');
     $tesseract->setTempDir(storage_path());
     $tesseract->setLanguage('eng');
     //same 3-letters code as tesseract training data packages
     echo $tesseract->recognize();
 }
Example #15
-1
 /**
  * Get Ocr uploaded Image Text.
  *
  * @return with Success with Text Extracted or Error
  */
 public function postUpload()
 {
     // Build the input for our validation
     $input = array('image' => Input::file('image'));
     // Within the ruleset, make sure we let the validator know that this
     // file should be an image
     $rules = array('image' => 'required|mimes:jpeg,png,pdf');
     // Now pass the input and rules into the validator
     $validator = Validator::make($input, $rules);
     // Check to see if validation fails or passes
     if ($validator->fails()) {
         // Redirect with a helpful message to inform the user that
         // the provided file was not an adequate type
         return Redirect::back()->with('message', 'Error: The provided file was not an image');
     } else {
         $file = Input::file('image');
         $destinationPath = 'uploads/photos';
         $image = $file->getClientOriginalName();
         Input::file('image')->move($destinationPath, $image);
         require_once base_path() . '/vendor/thiagoalessio/tesseract_ocr/TesseractOCR/TesseractOCR.php';
         $tesseract = new TesseractOCR(public_path() . '/' . $destinationPath . '/' . $image);
         $tesseract->setTempDir(storage_path());
         $tesseract->setLanguage('eng');
         //same 3-letters code as tesseract training data packages
         $ocr = $tesseract->recognize();
         return Redirect::route('upload-form')->with('message', 'Success: File upload was successful')->with('ocr', $ocr);
     }
     return Redirect::back()->with('error', 'An error occured');
 }