include_once 'data.php'; include_once 'functions.php'; session_write_close(); database_connect(IL_DATABASE_PATH, 'library'); $file_query = $dbHandle->quote(intval($_GET['file'])); $result = $dbHandle->query("SELECT file FROM library WHERE id={$file_query} LIMIT 1"); $file = $result->fetchColumn(); $dbHandle = null; if (is_file(IL_PDF_PATH . get_subfolder($file) . DIRECTORY_SEPARATOR . $file)) { exec(select_ghostscript() . ' -dSAFER -dBATCH -dNOPAUSE -sDEVICE=bmp16m -r300 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dDOINTERPOLATE -o "' . IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.%03d.bmp" "' . IL_PDF_PATH . DIRECTORY_SEPARATOR . get_subfolder($file) . DIRECTORY_SEPARATOR . $file . '"'); $file_arr = glob(IL_TEMP_PATH . DIRECTORY_SEPARATOR . '*.bmp'); if (is_array($file_arr)) { set_time_limit(600); for ($i = 0; $i < count($file_arr); $i++) { exec(select_tesseract() . ' "' . $file_arr[$i] . '" "' . IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.' . $i . '"'); if (is_file(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.' . $i . '.txt')) { file_put_contents(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . 'final.txt', file_get_contents(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.' . $i . '.txt'), FILE_APPEND); unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.' . $i . '.txt'); unlink($file_arr[$i]); } else { die('OCR software not functional.'); } } $string = file_get_contents(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . 'final.txt'); unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . 'final.txt'); $string = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}]+/u', ' ', $string); $string = trim($string); if (!empty($string)) { $order = array("\r\n", "\n", "\r"); $string = str_replace($order, ' ', $string);
if (is_readable(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.odt') && filesize(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.odt') > 0) { unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.odt'); die('OK'); } else { die; } } elseif ($_GET['binary'] == 'ghostscript') { exec(select_ghostscript() . ' -sDEVICE=png16m -r15 -dTextAlphaBits=1 -dGraphicsAlphaBits=1 -dFirstPage=1 -dLastPage=1 -o "' . IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.png" test.pdf'); if (file_exists(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.png')) { unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.png'); die('OK'); } else { die; } } elseif ($_GET['binary'] == 'tesseract') { exec(select_tesseract() . ' "' . __DIR__ . DIRECTORY_SEPARATOR . 'test.bmp" "' . IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test-tesseract"'); if (is_readable(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test-tesseract.txt') && filesize(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test-tesseract.txt') > 0) { unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test-tesseract.txt'); die('OK'); } else { die; } } elseif ($_GET['binary'] == 'soffice') { if (PHP_OS == 'Linux' || PHP_OS == 'Darwin') { putenv('HOME=' . IL_TEMP_PATH); } exec(select_soffice() . ' --headless --convert-to pdf --outdir "' . IL_TEMP_PATH . '" "' . __DIR__ . DIRECTORY_SEPARATOR . 'test.odt"'); if (PHP_OS == 'Linux' || PHP_OS == 'Darwin') { putenv('HOME=""'); } $converted_file = IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.pdf';