function addToSearchableObjects($wasNew = false) { $columns_to_drop = array(); if ($wasNew) { $columns_to_drop = $this->getSearchableColumns(); } else { $searchable_columns = $this->getSearchableColumns(); if (is_array($searchable_columns)) { foreach ($searchable_columns as $column_name) { if (isset($this->searchable_composite_columns[$column_name])) { foreach ($this->searchable_composite_columns[$column_name] as $colName) { if ($this->isColumnModified($colName)) { $columns_to_drop[] = $column_name; break; } } } else { if ($this->isColumnModified($column_name)) { $columns_to_drop[] = $column_name; } } } } $searchable_columns = null; } if (count($columns_to_drop) > 0) { if (!$wasNew) { SearchableObjects::dropContentByObjectColumns($this, $columns_to_drop); } $docx_id = FileTypes::findOne(array('id' => true, 'conditions' => '`extension` = ' . DB::escape('docx'))); $pdf_id = FileTypes::findOne(array('id' => true, 'conditions' => '`extension` = ' . DB::escape('pdf'))); $odt_id = FileTypes::findOne(array('id' => true, 'conditions' => '`extension` = ' . DB::escape('odt'))); $fodt_id = FileTypes::findOne(array('id' => true, 'conditions' => '`extension` = ' . DB::escape('fodt'))); foreach ($columns_to_drop as $column_name) { $content = $this->getSearchableColumnContent($column_name); if (get_class($this->manager()) == 'ProjectFiles') { $content = utf8_encode($content); } elseif (get_class($this->manager()) == 'ProjectFileRevisions') { if ($column_name == "filecontent") { $file = ProjectFileRevisions::findById($this->getObjectId()); try { if ($file->getFileTypeId() == $docx_id) { if (class_exists('DOMDocument')) { $file_path = "tmp/doc_filecontent_" . $this->getObjectId() . ".docx"; $file_tmp = @fopen($file_path, 'w'); if ($file_tmp) { fwrite($file_tmp, $file->getFileContent()); fclose($file_tmp); $content = docx2text($file_path); unlink($file_path); } } } elseif ($file->getFileTypeId() == $pdf_id) { $file_path = "tmp/pdf_filecontent_" . $this->getObjectId() . ".pdf"; $file_tmp = @fopen($file_path, 'w'); if ($file_tmp) { fwrite($file_tmp, $file->getFileContent()); fclose($file_tmp); $content = pdf2text($file_path); unlink($file_path); } } elseif ($file->getFileTypeId() == $odt_id) { if (class_exists('DOMDocument')) { $file_path = "tmp/odt_filecontent_" . $this->getObjectId() . ".odt"; $file_tmp = @fopen($file_path, 'w'); if ($file_tmp) { fwrite($file_tmp, $file->getFileContent()); fclose($file_tmp); $content = odt2text($file_path); unlink($file_path); } } } elseif ($file->getFileTypeId() == $fodt_id) { $file_path = "tmp/fodt_filecontent_" . $this->getObjectId() . ".fodt"; $file_tmp = @fopen($file_path, 'w'); if ($file_tmp) { fwrite($file_tmp, $file->getFileContent()); fclose($file_tmp); $content = fodt2text($file_path, $this->getObjectId()); unlink($file_path); } } } catch (FileNotInRepositoryError $e) { $content = ""; } } else { $content = utf8_encode($content); } } if (trim($content) != '') { $searchable_object = new SearchableObject(); $searchable_object->setRelObjectId($this->getObjectId()); $searchable_object->setColumnName(DB::escape($column_name)); if (strlen($content) > 65535) { $content = utf8_safe(substr($content, 0, 65535)); } $content = DB::escape($content); $sql = "\n\t\t\t\t\t\tINSERT INTO " . TABLE_PREFIX . "searchable_objects (rel_object_id, column_name, content)\n\t\t\t\t\t\tVALUES (" . $searchable_object->getRelObjectId() . "," . $searchable_object->getColumnName() . "," . $content . ")\n\t\t\t\t\t\tON DUPLICATE KEY UPDATE content = {$content}"; DB::execute($sql); $searchable_object = null; } $content = null; } } $columns_to_drop = null; }
<?php if (isset($_FILES['text_import_file'])) { $tmp_name = $_FILES['text_import_file']['tmp_name']; $name = $_FILES['text_import_file']['name']; $type = $_FILES['text_import_file']['type']; $text = ''; if ($type == 'text/plain') { // plain text $text = file_get_contents($tmp_name); } elseif ($type == 'application/pdf') { // pdf $text = pdf2text($tmp_name); } elseif ($type == 'application/octet-stream') { // docx $extension_explode = explode('.', $name); $extension = end($extension_explode); if ($extension == 'docx') { $text = docx2text($tmp_name); } } elseif ($type == 'application/vnd.oasis.opendocument.text') { // openoffice doc $text = odt2text($tmp_name); } } header('Content-type: text/html; charset=UTF-8'); echo strip_tags($text); exit;
echo 'Tipe File : ' . $tipe_file . '<br>'; switch ($tipe_file) { case "txt": $kalimat = file_get_contents($nama_file, true); break; case "doc": $kalimat = parseWord($nama_file); break; case "pdf": $kalimat = pdf2text($nama_file); break; case "docx": $kalimat = docx2text($nama_file); break; case "odt": $kalimat = odt2text($nama_file); break; } echo 'Isi File Text ' . $kalimat; // -----proses tokenising----- $kata = tokenising($kalimat); //---proses filtering--- $hasil = filtering($kata, $bahasa); //--- proses Stemming and calculate tf --- if ($bahasa == "id") { $doc_terms = calculate_tf($hasil); } else { $doc_terms = calculate_tf_en($hasil); } // update nilai tf di database // hapus term dokumen yg ada di database