<?php // ambil informasi tentang file yang akan diindeks $kd_dok = $_GET['no']; $nama_file = $_GET['nama_file']; $bahasa = $_GET['bahasa']; $tabel_tf = "tf_document"; $nama_file = "files/" . $nama_file; $tipe_file = substr($nama_file, strpos($nama_file, '.') + 1); echo 'Tipe File : ' . $tipe_file . '<br>'; switch ($tipe_file) { case "txt": $kalimat = file_get_contents($nama_file, true); break; case "doc": $kalimat = parseWord($nama_file); break; case "pdf": $kalimat = pdf2text($nama_file); break; case "docx": $kalimat = docx2text($nama_file); break; case "odt": $kalimat = odt2text($nama_file); break; } echo 'Isi File Text ' . $kalimat; // -----proses tokenising----- $kata = tokenising($kalimat); //---proses filtering---
<?php /***************************************************************** This approach uses detection of NUL (chr(00)) and end line (chr(13)) to decide where the text is: - divide the file contents up by chr(13) - reject any slices containing a NUL - stitch the rest together again - clean up with a regular expression *****************************************************************/ function parseWord($userDoc) { $fileHandle = fopen($userDoc, "r"); $line = @fread($fileHandle, filesize($userDoc)); $lines = explode(chr(0xd), $line); $outtext = ""; foreach ($lines as $thisline) { $pos = strpos($thisline, chr(0x0)); if ($pos !== FALSE || strlen($thisline) == 0) { } else { $outtext .= $thisline . " "; } } $outtext = preg_replace("/[^a-zA-Z0-9\\s\\,\\.\\-\n\r\t@\\/\\_\\(\\)]/", "", $outtext); return $outtext; } $userDoc = "verdic.docx"; $text = parseWord($userDoc); echo $text;
<?php include_once $_SERVER['DOCUMENT_ROOT'] . '/inc/current_pg_function.php'; include_once $_SERVER['DOCUMENT_ROOT'] . '/inc/global-settings.php'; include_once $_SERVER['DOCUMENT_ROOT'] . '/inc/pre-function.php'; include_once $_SERVER['DOCUMENT_ROOT'] . '/inc/doc_reader.php'; $raw_data = file_get_contents('data.txt'); $data = unserialize($raw_data); $file = $data['ref'] . '-letter.doc'; $doc = parseWord($file); $firstname = $data['firstname']; $lastname = $data['lastname']; $fullname = $firstname . " " . $lastname; $doc_title = $data['doc_title']; $changeNameErrors = array(); $updates = false; $changed = false; $total_pgs = 0; //pre($_POST); if (isset($_POST['changeNameDate'])) { if (trim($_POST['firstname']) == "") { $changeNameErrors['fname'] = "Please enter your first name."; } if (trim($_POST['lastname']) == "") { $changeNameErrors['lname'] = "Please enter your last name."; } if (empty($changeNameErrors)) { $fname = trim($_POST['firstname']); $lname = trim($_POST['lastname']); if ($fname !== $data['firstname']) { $data['firstname'] = $fname;