$im = imagecreatefrompng(IMAGES_DIRECTORY . $row['filename']); } } $sql = "SELECT count(*) as c FROM ocrtrain\r\n\t\t\t\tWHERE fid = '{$fid}' and vid = '{$vid}' and bid = '{$bid}'"; $cc = $db->GetRow($sql); if ($cc['c'] > 0) { print T_("Found duplicate") . " {$fid} {$vid} {$bid}"; } else { $row['width'] = imagesx($im); $row['height'] = imagesy($im); $image = crop($im, applytransforms($box, $row)); $a1 = kfill_modified($image, 5); $a2 = remove_boundary_noise($a1, 2); $timage = resize_bounding($a2); $bimage = thinzs_np($timage); $t = sector_distance($bimage); $count++; $sql = "INSERT INTO ocrtrain (ocrtid,val,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,fid,vid,bid,kb)\r\n\t\t\t\t\tVALUES (NULL,'{$val}','{$t[0][1]}','{$t[0][2]}','{$t[0][3]}','{$t[0][4]}','{$t[0][5]}','{$t[0][6]}','{$t[0][7]}','{$t[0][8]}','{$t[0][9]}','{$t[0][10]}','{$t[0][11]}','{$t[0][12]}','{$t[1][1]}','{$t[1][2]}','{$t[1][3]}','{$t[1][4]}','{$fid}','{$vid}','{$bid}','{$kb}')"; $db->Execute($sql); } } } print T_("Trained") . ": {$count} " . T_("characters"); //generate kb generate_kb($kb); print T_("Generated KB"); } if (isset($_GET['submit'])) { //run process in background $qid = intval($_GET['qid']); $verifiers = " AND (";
/** * Return what character based on the guess and the given kb * * @param image $image The image of the character * @param int $btid The box type this came from * @param int $qid The questionniare this box came from * * @return char The character detected * @author Adam Zammit <*****@*****.**> * @since 2010-10-18 */ function ocr_guess($image, $btid, $qid) { include_once dirname(__FILE__) . '/../config.inc.php'; include_once dirname(__FILE__) . '/../db.inc.php'; global $db; //remove speckles $a1 = kfill_modified($image, 5); //remove boundary lines $a2 = remove_boundary_noise($a1, 2); //resize the image to suit the OCR functions $timage = resize_bounding($a2); //thin the image to a skeleton $bimage = thinzs_np($timage); //extract the 16 features from the image $f = sector_distance($bimage); $sql = "SELECT val,\r\n\t\t\t(\r\n\t\t\t(pow(exp( - IFNULL((pow((m1-'{$f[0][1]}'),2)/(2*v1)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m2-'{$f[0][2]}'),2)/(2*v2)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m3-'{$f[0][3]}'),2)/(2*v3)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m4-'{$f[0][4]}'),2)/(2*v4)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m5-'{$f[0][5]}'),2)/(2*v5)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m6-'{$f[0][6]}'),2)/(2*v6)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m7-'{$f[0][7]}'),2)/(2*v7)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m8-'{$f[0][8]}'),2)/(2*v8)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m9-'{$f[0][9]}'),2)/(2*v9)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m10-'{$f[0][10]}'),2)/(2*v10)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m11-'{$f[0][11]}'),2)/(2*v11)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m12-'{$f[0][12]}'),2)/(2*v12)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m13-'{$f[1][1]}'),2)/(2*v13)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m14-'{$f[1][2]}'),2)/(2*v14)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m15-'{$f[1][3]}'),2)/(2*v15)),0)),2)) +\r\n\t\t\t(pow(exp( - IFNULL((pow((m16-'{$f[1][4]}'),2)/(2*v16)),0)),2))\r\n\t\t\t) as calc\r\n\t\tFROM ocrkbdata\r\n\t\tJOIN ocrkbboxgroup ON (ocrkbdata.kb = ocrkbboxgroup.kb AND ocrkbboxgroup.btid = '{$btid}' AND ocrkbboxgroup.qid = '{$qid}')\r\n\t\tORDER BY calc DESC"; $guess = $db->GetRow($sql); //DEBUG //print $sql . "<br/>"; return $guess['val']; }
$im = imagecreatefromstring($row['image']); } else { $im = imagecreatefrompng(IMAGES_DIRECTORY . $row['filename']); } $row['width'] = imagesx($im); $row['height'] = imagesy($im); $box['tlx'] += BOX_EDGE; $box['tly'] += BOX_EDGE; $box['brx'] -= BOX_EDGE; $box['bry'] -= BOX_EDGE; $timage = crop($im, applytransforms($box, $row)); $ktimage = kfill_modified($timage, 5); $ttimage = remove_boundary_noise($ktimage, 2); $kttimage = resize_bounding($ttimage); $i = thinzs_np($kttimage); $t = sector_distance($i); $sql = "INSERT INTO ocrtrain (ocrtid,val,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,fid,vid,bid,kb)\r\n VALUES (NULL,'{$val}','{$t[0][1]}','{$t[0][2]}','{$t[0][3]}','{$t[0][4]}','{$t[0][5]}','{$t[0][6]}','{$t[0][7]}','{$t[0][8]}','{$t[0][9]}','{$t[0][10]}','{$t[0][11]}','{$t[0][12]}','{$t[1][1]}','{$t[1][2]}','{$t[1][3]}','{$t[1][4]}','{$fid}','{$vid}','{$bid}','{$kb}')"; $db->Execute($sql); $sql = "DELETE from ocrprocess\r\n\t\tWHERE ocrprocessid = '{$o['ocrprocessid']}'"; $db->Execute($sql); print T_("Trained") . ": {$val} " . T_("to knowledge base") . ": {$kb}"; $db->CompleteTrans(); } if ($completed == 1) { $sql = "SELECT count(*) as c\r\n\t\tFROM ocrprocess\r\n\t\tWHERE kb = '{$kb}'"; $rs = $db->GetRow($sql); if ($rs['c'] == 0) { generate_kb($kb); print T_("Generated KB"); } else { print T_("Did not generate KB as not all records trained");