Example #1
0
include_once 'data.php';
include_once 'functions.php';
session_write_close();
database_connect(IL_DATABASE_PATH, 'library');
$file_query = $dbHandle->quote(intval($_GET['file']));
$result = $dbHandle->query("SELECT file FROM library WHERE id={$file_query} LIMIT 1");
$file = $result->fetchColumn();
$dbHandle = null;
if (is_file(IL_PDF_PATH . get_subfolder($file) . DIRECTORY_SEPARATOR . $file)) {
    exec(select_ghostscript() . ' -dSAFER -dBATCH -dNOPAUSE -sDEVICE=bmp16m -r300 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dDOINTERPOLATE -o "' . IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.%03d.bmp" "' . IL_PDF_PATH . DIRECTORY_SEPARATOR . get_subfolder($file) . DIRECTORY_SEPARATOR . $file . '"');
    $file_arr = glob(IL_TEMP_PATH . DIRECTORY_SEPARATOR . '*.bmp');
    if (is_array($file_arr)) {
        set_time_limit(600);
        for ($i = 0; $i < count($file_arr); $i++) {
            exec(select_tesseract() . ' "' . $file_arr[$i] . '" "' . IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.' . $i . '"');
            if (is_file(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.' . $i . '.txt')) {
                file_put_contents(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . 'final.txt', file_get_contents(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.' . $i . '.txt'), FILE_APPEND);
                unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . '.' . $i . '.txt');
                unlink($file_arr[$i]);
            } else {
                die('OCR software not functional.');
            }
        }
        $string = file_get_contents(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . 'final.txt');
        unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . $file . 'final.txt');
        $string = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}]+/u', ' ', $string);
        $string = trim($string);
        if (!empty($string)) {
            $order = array("\r\n", "\n", "\r");
            $string = str_replace($order, ' ', $string);
    if (is_readable(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.odt') && filesize(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.odt') > 0) {
        unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.odt');
        die('OK');
    } else {
        die;
    }
} elseif ($_GET['binary'] == 'ghostscript') {
    exec(select_ghostscript() . ' -sDEVICE=png16m -r15 -dTextAlphaBits=1 -dGraphicsAlphaBits=1 -dFirstPage=1 -dLastPage=1 -o "' . IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.png" test.pdf');
    if (file_exists(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.png')) {
        unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.png');
        die('OK');
    } else {
        die;
    }
} elseif ($_GET['binary'] == 'tesseract') {
    exec(select_tesseract() . ' "' . __DIR__ . DIRECTORY_SEPARATOR . 'test.bmp" "' . IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test-tesseract"');
    if (is_readable(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test-tesseract.txt') && filesize(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test-tesseract.txt') > 0) {
        unlink(IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test-tesseract.txt');
        die('OK');
    } else {
        die;
    }
} elseif ($_GET['binary'] == 'soffice') {
    if (PHP_OS == 'Linux' || PHP_OS == 'Darwin') {
        putenv('HOME=' . IL_TEMP_PATH);
    }
    exec(select_soffice() . ' --headless --convert-to pdf --outdir "' . IL_TEMP_PATH . '" "' . __DIR__ . DIRECTORY_SEPARATOR . 'test.odt"');
    if (PHP_OS == 'Linux' || PHP_OS == 'Darwin') {
        putenv('HOME=""');
    }
    $converted_file = IL_TEMP_PATH . DIRECTORY_SEPARATOR . 'test.pdf';