public function ocr($billID, $template) { $amountImgFileDirectory = "images/detection_result/"; $dueDateImgFileDirectory = "images/detection_result/"; $amountImgFileName = "croppedAmt.jpg"; $dueDateImgFileName = "croppedDueDate.jpg"; //connect to mysql and getting the coordinate data require_once 'TesseractOCR.php'; $this->billdb->select('billFilePath'); $this->billdb->where('billID', $billID); $query1 = $this->billdb->get('bills'); //$this->billdb->query("SELECT billFilePath from bills where billID = " . $billID); $ini_filename = $query1->result()[0]->billFilePath; $im = imagecreatefromjpeg($ini_filename); list($width, $height) = getimagesize($ini_filename); $query2 = $this->templatedb->query("SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = '" . $template . "' AND dataFieldLabel = 'amount'"); $row = $query2->row(0); $x1 = $row->coordinateLabelX; $y1 = $row->coordinateLabelY; $x2 = $row->coordinateLabelX2 - $row->coordinateLabelX; $y2 = $row->coordinateLabelY2 - $row->coordinateLabelY; // Scale Up coordinates $x1 = $x1 * $width; $y1 = $y1 * $height; $x2 = $x2 * $width; $y2 = $y2 * $height; //cropping the image using the coordinate data $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2); if ($template != 0) { $thumb_im = imagecrop($im, $to_crop_array); } else { $thumb_im = $im; } imagejpeg($thumb_im, $amountImgFileDirectory . $amountImgFileName, 100); //run OCR on the cropped section $tesseract = new TesseractOCR($amountImgFileDirectory . $amountImgFileName); $tesseract->setLanguage('eng'); $amount = $tesseract->recognize(); $amount = preg_replace("/[^0-9,.]/", "", $amount); $query3 = $this->templatedb->query("SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = '" . $template . "' AND dataFieldLabel = 'duedate'"); $row = $query3->row(0); $x1 = $row->coordinateLabelX; $y1 = $row->coordinateLabelY; $x2 = $row->coordinateLabelX2 - $row->coordinateLabelX; $y2 = $row->coordinateLabelY2 - $row->coordinateLabelY; // Scale Up coordinates $x1 = $x1 * $width; $y1 = $y1 * $height; $x2 = $x2 * $width; $y2 = $y2 * $height; //cropping the image using the coordinate data $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2); if ($template != 0) { $thumb_im = imagecrop($im, $to_crop_array); } else { $thumb_im = $im; } imagejpeg($thumb_im, $dueDateImgFileDirectory . $dueDateImgFileName, 100); //run OCR on the cropped section $tesseract = new TesseractOCR($dueDateImgFileDirectory . $dueDateImgFileName); $tesseract->setLanguage('eng'); $duedate = $tesseract->recognize(); $amount = strtok($amount, " "); $day = strtok($duedate, " "); $month = strtok(" "); $year = strtok(" "); str_replace(array(",", "."), "", $day); str_replace(array(",", "."), "", $month); str_replace(array(",", "."), "", $year); if (ctype_alpha($day)) { $temp = $day; $day = $month; $month = $temp; } switch ($month) { case 'Jan': case 'January': $month = "01"; break; case 'Feb': case 'February': $month = "02"; break; case 'Mar': case 'March': $month = "03"; break; case 'Apr': case 'April': $month = "04"; break; case 'May': $month = "05"; break; case 'Jun': case 'June': $month = "06"; break; case 'Jul': case 'July': $month = "07"; break; case 'Aug': case 'August': $month = "08"; break; case 'Sep': case 'September': $month = "09"; break; case 'Oct': case 'October': $month = "10"; break; case 'Nov': case 'November': $month = "11"; break; case 'Dec': case 'December': $month = "12"; break; } $data = array('totalAmt' => $amount, 'billDueDate' => $year . "-" . $month . "-" . $day); $this->billdb->where('billID', $billID); $this->billdb->update('bills', $data); /* remove the cropped images once the check is complete. $command = escapeshellcmd('rm -f ' . $amountImgFileDirectory . $amountImgFileName); shell_exec($command); $command = escapeshellcmd('rm -f ' . $dueDateImgFileDirectory . $dueDateImgFileName); shell_exec($command); */ return $ini_filename; }
function ocr($id, $template) { //connect to mysql and getting the coordinate data require_once 'TesseractOCR/TesseractOCR.php'; $link = mysql_connect('localhost:3306', 'root', 'ysAb7cEkjvOa'); mysql_select_db('billdb'); //$id = 171; //$template = 54; $sql = "SELECT billFilePath from bills where billID = " . $id; $result = mysql_query($sql); $row = mysql_fetch_array($result, MYSQL_NUM); $ini_filename = $row[0]; $im = imagecreatefromjpeg($ini_filename); //echo $ini_filename; //echo $id; //echo $template; mysql_select_db('templatedb'); $sql = "SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = " . $template . " AND dataFieldLabel = 'amount'"; $result = mysql_query($sql); $row = mysql_fetch_array($result, MYSQL_NUM); $x1 = $row[0]; $y1 = $row[1]; $x2 = $row[2] - $row[0]; $y2 = $row[3] - $row[1]; //cropping the image using the coordinate data $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2); $thumb_im = imagecrop($im, $to_crop_array); imagejpeg($thumb_im, 'images/cropped1.jpg', 100); //run OCR on the cropped section $tesseract = new TesseractOCR('images/cropped1.jpg'); $tesseract->setLanguage(eng); $amount = $tesseract->recognize(); $sql = "SELECT coordinateLabelX, coordinateLabelY, coordinateLabelX2, coordinateLabelY2 FROM datafields WHERE templateID = " . $template . " AND dataFieldLabel = 'duedate'"; $result = mysql_query($sql); $row = mysql_fetch_array($result, MYSQL_NUM); $x1 = $row[0]; $y1 = $row[1]; $x2 = $row[2] - $row[0]; $y2 = $row[3] - $row[1]; //cropping the image using the coordinate data $to_crop_array = array('x' => $x1, 'y' => $y1, 'width' => $x2, 'height' => $y2); $thumb_im = imagecrop($im, $to_crop_array); imagejpeg($thumb_im, 'images/cropped2.jpg', 100); //run OCR on the cropped section $tesseract = new TesseractOCR('images/cropped2.jpg'); $tesseract->setLanguage(eng); $duedate = $tesseract->recognize(); $amount = strtok($amount, " "); $day = strtok($duedate, " "); $month = strtok(" "); $year = strtok(" "); switch ($month) { case Jan: $month = "01"; break; case Feb: $month = "02"; break; case Mar: $month = "03"; break; case Apr: $month = "04"; break; case May: $month = "05"; break; case Jun: $month = "06"; break; case Jul: $month = "07"; break; case Aug: $month = "08"; break; case Sep: $month = "09"; break; case Oct: $month = "10"; break; case Nov: $month = "11"; break; case Dec: $month = "12"; break; } //echo "<br>" . $amount . "<br>"; //echo $year; //echo $month; //echo $day; mysql_select_db('billdb'); $sql = "UPDATE bills SET totalAmt = " . $amount . ", billDueDate = '" . $year . "-" . $month . "-" . $day . "' WHERE billID = " . $id; //echo "<br>" . $sql; $result = mysql_query($sql); mysql_close($link); }
/** * Get Captcha Value and input in box */ function captcha($imglocation, $typeinbox) { $html = $this->getSource(); $tidy = tidy_parse_string($html)->html()->value; $searchqp = htmlqp($tidy, 'body'); $captchaurl = $searchqp->branch($imglocation)->attr('src'); $saveimg = '/tmp/mycaptcha.png'; file_put_contents($saveimg, file_get_contents($captchaurl)); $tesseract = new TesseractOCR($saveimg); $crackedvalue = $tesseract->recognize(); $this->driver->findElement(WebDriverBy::CssSelector($typeinbox))->sendKeys($crackedvalue); }
function numbersForFileNamed($fileName) { $tesseract = new \TesseractOCR(BASE_PATH . 'inbox/' . $fileName); $tesseract->setWhitelist(range(0, 9)); return preg_split('/[ \\n]/', $tesseract->recognize()); }
public static function createOCRTextFile($originalFile, $assetsID, $filename) { $text = TesseractOCR::recognize($originalFile); if (file_put_contents(self::getSaveDir($assetsID, 'ocr') . DIRECTORY_SEPARATOR . $filename . '.txt', $text) === FALSE) { return FALSE; } $return['ocr'][] = array('name' => $filename . '.txt', 'path' => self::getSaveDir($assetsID, 'ocr', FALSE), 'size' => filesize(self::getSaveDir($assetsID, 'ocr') . $filename . '.txt'), 'type' => self::getMimeType(self::getSaveDir($assetsID, 'ocr') . $filename . '.txt'), 'errors' => ''); }
<?php require_once 'vendor/autoload.php'; for ($i = 1; $i <= 10; $i++) { $tesseract = new TesseractOCR("img/image{$i}.jpg"); $tesseract->setLanguage('eng'); $tesseract->setWhitelist(range('a', 'z'), range(0, 9)); echo $tesseract->recognize() . "\r"; }
public function tesseract($image_path) { require_once 'D:\\xampp\\htdocs\\ocr\\vendor\\thiagoalessio\\tesseract_ocr\\TesseractOCR\\TesseractOCR.php'; $tesseract = new TesseractOCR(public_path() . '/images/' . $image_path); $text = $tesseract->recognize(); Session::put('trans', $text); return $text; }
public function recognizeText($imageFile) { require_once "TesseractOCR/TesseractOCR.php"; require_once "Repositories/CR_File.php"; // Recognize text from image $tesseract = new TesseractOCR($imageFile->filePath); $tesseract->setWhitelist(range('A', 'Z'), range('a', 'z'), range(0, 9), '_-.,;"#<>()%{}[]= '); $txt = $tesseract->recognize(); // Save text file // public/output/code_filename.ext $codeFilePath = "public/output/code_" . $imageFile->fileName . $this->LANGUAGES[$this->language]; $recognizedCodeFile = new CR_File($codeFilePath); $recognizedCodeFile->write($txt); return $recognizedCodeFile; }
function generateConfigFile($arguments) { $configFile = mfcs::config('mfcstmp') . '/tesseract-ocr-config-' . rand() . '.conf'; exec("touch {$configFile}"); $whitelist = TesseractOCR::generateWhitelist($arguments); if (!empty($whitelist)) { $fp = fopen($configFile, 'w'); fwrite($fp, "tessedit_char_whitelist {$whitelist}"); fclose($fp); } return $configFile; }
// Grab the uploaded file $file = $request->files->get('upload'); // Extract some information about the uploaded file $info = new SplFileInfo($file->getClientOriginalName()); // Create a quasi-random filename $filename = sprintf('%d.%s', time(), $info->getExtension()); // Copy the file $file->move(__DIR__ . '/../uploads', $filename); // Instantiate the Tessearct library $tesseract = new TesseractOCR(__DIR__ . '/../uploads/' . $filename); // Perform OCR on the uploaded image $text = $tesseract->recognize(); return $app['twig']->render('results.twig', ['text' => $text]); }); $app->post('/identify-telephone-number', function (Request $request) use($app) { // Grab the uploaded file $file = $request->files->get('upload'); // Extract some information about the uploaded file $info = new SplFileInfo($file->getClientOriginalName()); // Create a quasi-random filename $filename = sprintf('%d.%s', time(), $info->getExtension()); // Copy the file $file->move(__DIR__ . '/../uploads', $filename); // Instantiate the Tessearct library $tesseract = new TesseractOCR(__DIR__ . '/../uploads/' . $filename); // Perform OCR on the uploaded image $text = $tesseract->recognize(); $number = findPhoneNumber($text, 'GB'); return $app->json(['number' => $number]); }); $app->run();
private function parseContent($fileUri, $language) { if (!File::exists($fileUri)) { throw new Exception('Document parsing job #' . $this->job->getJobId() . ' received a uri to a file that does not seem to exist.'); } $tesseract = new TesseractOCR($fileUri); $tesseract->setTempDir(Config::get('paperwork.tesseractTempDirectory')); if (isset($language)) { $tesseract->setLanguage($language); } return $tesseract->recognize(); }
public function testSpecificLanguageRecognition() { $tesseract = new TesseractOCR("{$this->imagesDir}german.png"); $tesseract->setLanguage('deu'); $this->assertEquals('grüßen in Deutsch', $tesseract->recognize()); }
function ocr($img, $lng = 'fre') { $t = new \TesseractOCR($img); $t->setTempDir(CACHE_PATH); // $t->setLanguage($lng); return $t->recognize(); }
public function getOCR() { require_once base_path() . '/vendor/thiagoalessio/tesseract_ocr/TesseractOCR/TesseractOCR.php'; $tesseract = new TesseractOCR(public_path() . '/assets/img/social/fb_login.png'); $tesseract->setTempDir(storage_path()); $tesseract->setLanguage('eng'); //same 3-letters code as tesseract training data packages echo $tesseract->recognize(); }
/** * Get Ocr uploaded Image Text. * * @return with Success with Text Extracted or Error */ public function postUpload() { // Build the input for our validation $input = array('image' => Input::file('image')); // Within the ruleset, make sure we let the validator know that this // file should be an image $rules = array('image' => 'required|mimes:jpeg,png,pdf'); // Now pass the input and rules into the validator $validator = Validator::make($input, $rules); // Check to see if validation fails or passes if ($validator->fails()) { // Redirect with a helpful message to inform the user that // the provided file was not an adequate type return Redirect::back()->with('message', 'Error: The provided file was not an image'); } else { $file = Input::file('image'); $destinationPath = 'uploads/photos'; $image = $file->getClientOriginalName(); Input::file('image')->move($destinationPath, $image); require_once base_path() . '/vendor/thiagoalessio/tesseract_ocr/TesseractOCR/TesseractOCR.php'; $tesseract = new TesseractOCR(public_path() . '/' . $destinationPath . '/' . $image); $tesseract->setTempDir(storage_path()); $tesseract->setLanguage('eng'); //same 3-letters code as tesseract training data packages $ocr = $tesseract->recognize(); return Redirect::route('upload-form')->with('message', 'Success: File upload was successful')->with('ocr', $ocr); } return Redirect::back()->with('error', 'An error occured'); }