Example #1
0
                //first we have to figure out how many pages
                //are in the file.  this is a rough method.
                //we have gs kick up an error after it opens
                //the file and sees how many pages there are
                $numpages = `{$gs} -dNODISPLAY "{$filepath}" -c quit`;
                $pos1 = strpos($numpages, 'through');
                $numpages = substr($numpages, $pos1);
                $pos2 = strpos($numpages, ".");
                $numpages = trim(substr($numpages, 8, $pos2 - 8));
                for ($page = 1; $page <= $numpages; $page++) {
                    //gs the page and return as a string
                    $tempstring = `{$gs} -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -dFirstPage={$page} -dLastPage={$page} -c save -f ps2ascii.ps "{$filepath}" -c quit`;
                    //strip out all the trash from the string
                    //$tempstring = string_clean($tempstring,$preventIndex,$keepIndex);
                    $filetext = strtolower($tempstring) . "\n" . $filename;
                    doindexfile($db, $filetext, $rfiles->fields['id'], $rindextable->fields['associated_table'], $rfiles->fields['ftableid'], $page);
                }
                $db->Execute("UPDATE files SET indexed=1 WHERE id=" . $rfiles->fields['id']);
                $pdffilecounter++;
            }
        }
    }
    echo 'Indexed file: ' . $rfiles->fields['filename'] . '.<br>';
    $rfiles->MoveNext();
}
// The rest just serves to report some statistics..
if (!$textfilecounter) {
    $textfilecounter = 0;
}
if (!$pdffilecounter) {
    $pdffilecounter = 0;
Example #2
0
/**
 *  Indexes the content of the given file
 *
 * The file is converted to a text file (pdfs with ghost script,
 * word files were already converted to html,html characters are stripped),
 * all words are lowercased, it is checked whether an entry in the table words
 * already exists, if not, it is added.  A relation to the word is made in 
 * the table associated with the given column
 */
function indexfile($db, $tableinfo, $indextable, $recordid, $fileid, $htmlfileid)
{
    return false;
    if (!$indextable) {
        return false;
    }
    // if the html file exists, we'll work with that one
    if ($htmlfileid) {
        $fp = fopen(file_path($db, $htmlfileid), "r");
        if ($fp) {
            while (!feof($fp)) {
                $filetext .= fgetss($fp, 64000);
            }
            fclose($fp);
        }
        $filetext = strtolower($filetext);
        doindexfile($db, $filetext, $htmlfileid, $indextable, $recordid, $pagenr);
    }
}