function addToSearchableObjects($wasNew = false)
 {
     $columns_to_drop = array();
     if ($wasNew) {
         $columns_to_drop = $this->getSearchableColumns();
     } else {
         $searchable_columns = $this->getSearchableColumns();
         if (is_array($searchable_columns)) {
             foreach ($searchable_columns as $column_name) {
                 if (isset($this->searchable_composite_columns[$column_name])) {
                     foreach ($this->searchable_composite_columns[$column_name] as $colName) {
                         if ($this->isColumnModified($colName)) {
                             $columns_to_drop[] = $column_name;
                             break;
                         }
                     }
                 } else {
                     if ($this->isColumnModified($column_name)) {
                         $columns_to_drop[] = $column_name;
                     }
                 }
             }
         }
         $searchable_columns = null;
     }
     if (count($columns_to_drop) > 0) {
         if (!$wasNew) {
             SearchableObjects::dropContentByObjectColumns($this, $columns_to_drop);
         }
         $docx_id = FileTypes::findOne(array('id' => true, 'conditions' => '`extension` = ' . DB::escape('docx')));
         $pdf_id = FileTypes::findOne(array('id' => true, 'conditions' => '`extension` = ' . DB::escape('pdf')));
         $odt_id = FileTypes::findOne(array('id' => true, 'conditions' => '`extension` = ' . DB::escape('odt')));
         $fodt_id = FileTypes::findOne(array('id' => true, 'conditions' => '`extension` = ' . DB::escape('fodt')));
         foreach ($columns_to_drop as $column_name) {
             $content = $this->getSearchableColumnContent($column_name);
             if (get_class($this->manager()) == 'ProjectFiles') {
                 $content = utf8_encode($content);
             } elseif (get_class($this->manager()) == 'ProjectFileRevisions') {
                 if ($column_name == "filecontent") {
                     $file = ProjectFileRevisions::findById($this->getObjectId());
                     try {
                         if ($file->getFileTypeId() == $docx_id) {
                             if (class_exists('DOMDocument')) {
                                 $file_path = "tmp/doc_filecontent_" . $this->getObjectId() . ".docx";
                                 $file_tmp = @fopen($file_path, 'w');
                                 if ($file_tmp) {
                                     fwrite($file_tmp, $file->getFileContent());
                                     fclose($file_tmp);
                                     $content = docx2text($file_path);
                                     unlink($file_path);
                                 }
                             }
                         } elseif ($file->getFileTypeId() == $pdf_id) {
                             $file_path = "tmp/pdf_filecontent_" . $this->getObjectId() . ".pdf";
                             $file_tmp = @fopen($file_path, 'w');
                             if ($file_tmp) {
                                 fwrite($file_tmp, $file->getFileContent());
                                 fclose($file_tmp);
                                 $content = pdf2text($file_path);
                                 unlink($file_path);
                             }
                         } elseif ($file->getFileTypeId() == $odt_id) {
                             if (class_exists('DOMDocument')) {
                                 $file_path = "tmp/odt_filecontent_" . $this->getObjectId() . ".odt";
                                 $file_tmp = @fopen($file_path, 'w');
                                 if ($file_tmp) {
                                     fwrite($file_tmp, $file->getFileContent());
                                     fclose($file_tmp);
                                     $content = odt2text($file_path);
                                     unlink($file_path);
                                 }
                             }
                         } elseif ($file->getFileTypeId() == $fodt_id) {
                             $file_path = "tmp/fodt_filecontent_" . $this->getObjectId() . ".fodt";
                             $file_tmp = @fopen($file_path, 'w');
                             if ($file_tmp) {
                                 fwrite($file_tmp, $file->getFileContent());
                                 fclose($file_tmp);
                                 $content = fodt2text($file_path, $this->getObjectId());
                                 unlink($file_path);
                             }
                         }
                     } catch (FileNotInRepositoryError $e) {
                         $content = "";
                     }
                 } else {
                     $content = utf8_encode($content);
                 }
             }
             if (trim($content) != '') {
                 $searchable_object = new SearchableObject();
                 $searchable_object->setRelObjectId($this->getObjectId());
                 $searchable_object->setColumnName(DB::escape($column_name));
                 if (strlen($content) > 65535) {
                     $content = utf8_safe(substr($content, 0, 65535));
                 }
                 $content = DB::escape($content);
                 $sql = "\n\t\t\t\t\t\tINSERT INTO " . TABLE_PREFIX . "searchable_objects (rel_object_id, column_name, content)\n\t\t\t\t\t\tVALUES (" . $searchable_object->getRelObjectId() . "," . $searchable_object->getColumnName() . "," . $content . ")\n\t\t\t\t\t\tON DUPLICATE KEY UPDATE content = {$content}";
                 DB::execute($sql);
                 $searchable_object = null;
             }
             $content = null;
         }
     }
     $columns_to_drop = null;
 }
Пример #2
0
<?php

if (isset($_FILES['text_import_file'])) {
    $tmp_name = $_FILES['text_import_file']['tmp_name'];
    $name = $_FILES['text_import_file']['name'];
    $type = $_FILES['text_import_file']['type'];
    $text = '';
    if ($type == 'text/plain') {
        // plain text
        $text = file_get_contents($tmp_name);
    } elseif ($type == 'application/pdf') {
        // pdf
        $text = pdf2text($tmp_name);
    } elseif ($type == 'application/octet-stream') {
        // docx
        $extension_explode = explode('.', $name);
        $extension = end($extension_explode);
        if ($extension == 'docx') {
            $text = docx2text($tmp_name);
        }
    } elseif ($type == 'application/vnd.oasis.opendocument.text') {
        // openoffice doc
        $text = odt2text($tmp_name);
    }
}
header('Content-type: text/html; charset=UTF-8');
echo strip_tags($text);
exit;
Пример #3
0
echo 'Tipe File : ' . $tipe_file . '<br>';
switch ($tipe_file) {
    case "txt":
        $kalimat = file_get_contents($nama_file, true);
        break;
    case "doc":
        $kalimat = parseWord($nama_file);
        break;
    case "pdf":
        $kalimat = pdf2text($nama_file);
        break;
    case "docx":
        $kalimat = docx2text($nama_file);
        break;
    case "odt":
        $kalimat = odt2text($nama_file);
        break;
}
echo 'Isi File Text ' . $kalimat;
// -----proses tokenising-----
$kata = tokenising($kalimat);
//---proses filtering---
$hasil = filtering($kata, $bahasa);
//--- proses Stemming and calculate tf ---
if ($bahasa == "id") {
    $doc_terms = calculate_tf($hasil);
} else {
    $doc_terms = calculate_tf_en($hasil);
}
// update nilai tf di database
// hapus term dokumen yg ada di database