/** * @access private */ protected static function pdf2text($filename) { if (!extension_loaded('zlib')) { return ''; } $pdf = new PDF2Text(); $pdf->setFilename($filename); try { $pdf->decodePDF(); } catch (Exception $e) { return ''; } $content = $pdf->output(); if ($content == '') { // try with different multibyte setting $pdf->setUnicode(true); $pdf->decodePDF(); $content = $pdf->output(); } return $content; }
public function uploadfiles() { $data = array(); //$tid = (isset($_GET['tid'])?addslashes($_GET['tid']):die('no ID')); $t = isset($_GET['t']) ? addslashes($_GET['t']) : die('no t'); $dc = new InputFileModel(); if (isset($_GET['files'])) { $error = false; $files = array(); $uploaddir = $dc->upload_location; $arrSuc = array(); $arrSucAsli = array(); foreach ($_FILES as $file) { $f = new InputFileModel(); $q = "INSERT INTO {$f->table_name} SET file_folder_id = '0',file_author = '" . Account::getMyID() . "'"; global $db; $fid = $db->qid($q); $f->getByID($fid); if ($fid) { $newname = $fid; $f->file_url = basename($file['name']); $ext = end(explode(".", $file['name'])); $f->file_ext = $ext; $f->file_filename = $fid . "." . $ext; $f->file_date = leap_mysqldate(); if (move_uploaded_file($file['tmp_name'], $uploaddir . $f->file_filename)) { $files[] = $uploaddir . $file['name']; $f->file_size = filesize($uploaddir . $f->file_filename); if ($f->file_ext == "pdf") { $a = new PDF2Text(); $a->setFilename($uploaddir . $f->file_filename); $a->decodePDF(); $f->file_isi = preg_replace("/\r|\n/", " ", $a->output()); //the path to the PDF file $strPDF = $uploaddir . $f->file_filename; $thumb = $uploaddir . "thumbs/" . $fid . ".jpg"; exec("convert \"{$strPDF}[0]\" \"{$thumb}\""); } // if pdf $arrSuc[] = $f->file_filename; $arrSucAsli[] = "<div id='file_" . $fid . "_{$t}' class='mlt_item'>" . $f->file_url . " <i onclick=\"deleteFromList_{$t}('" . $f->file_filename . "');\" class='glyphicon glyphicon-remove'></i></div>"; $f->load = 1; $f->save(); if (isset($_SESSION['target_id']['obj'])) { $target = get_class($_SESSION['target_id']['obj']); } else { $target = "inputfile_unknown"; } PortalFileLogger::save2log($uploaddir . $f->file_filename, $target, $f->file_url); } else { $error = true; } } } $data = $error ? array('error' => 'There was an error uploading your files') : array('files' => $files, 'ftext' => implode(",", $arrSuc), 'ftextAsli' => implode(" ", $arrSucAsli)); } else { $data = array('success' => 'Form was submitted', 'formData' => $_POST); } echo json_encode($data); }
private function pdfToText($filename) { //The PDF2Text class is HUGE. Magical black box. See file for citations require_once "class.pdf2text.php"; $a = new PDF2Text(); $a->setFilename($filename); $a->decodePDF(); return $a->output(); }
protected function execute(array $arguments = array(), array $options = array()) { $file = $arguments['pdf-file']; $this->logLine(sprintf('Extracting text from pdf file: %s', $file), nbLogger::COMMENT); $pdf2text = new PDF2Text(); $pdf2text->setFilename($file); $pdf2text->setUnicode(isset($options['unicode'])); $pdf2text->decodePDF(); $output = $pdf2text->output(); file_put_contents(sprintf('%s.txt', $file), $output); $this->logLine('Text extracted!', nbLogger::COMMENT); }
/** * @access private */ protected static function pdf2text($filename) { $pdf = new PDF2Text(); $pdf->setFilename($filename); $pdf->decodePDF(); $content = $pdf->output(); if ($content == '') { // try with different multibyte setting $pdf->setUnicode(true); $pdf->decodePDF(); $content = $pdf->output(); } return $content; }
$id = ""; if (isset($_GET['id'])) { $id = $_GET['id']; } $sql = " SELECT * \n\t\t\t FROM DOCUMENTO D\n\t\t\t INNER JOIN TIPO T ON (D.TIPO_ID = T.TIPO_ID)\n\t\t\t INNER JOIN DEPARTAMENTO DE ON (D.DEPA_ID = DE.DEPA_ID)\n\t\t\t WHERE DOCU_ID = {$id} "; $params = array(); $options = array("Scrollable" => SQLSRV_CURSOR_KEYSET); $resultado = sqlsrv_query($conn, $sql, $params, $options); $fila = sqlsrv_fetch_object($resultado); $total = sqlsrv_num_rows($resultado); //si es pdf if ($fila->TIPO_ID == 1) { require_once "../../lib/pdf2text.php"; $ruta = $fila->DOCU_LINK; $a = new PDF2Text(); $a->setFilename($ruta); $a->decodePDF(); $contentIndexed .= $a->output() . "\n\n"; $contentIndexed = ereg_replace(chr(12), "\r\n{new_page}\r\n", ereg_replace("\n", "\r\n", $contentIndexed)); // Enumeramos los patrones por el número de página que corresponde. $arrContent = explode("\r\n{new_page}\r\n", $contentIndexed); $arrayPaginas = $arrContent; // Lo guardamos porque luego lo usaremos para guardar el archivo o en base de datos. $contentIndexed2 = "{1}"; $xCount = 2; foreach ($arrContent as $page) { $contentIndexed2 .= trim($page) . "\r\n{" . $xCount . "}\r\n"; $xCount++; } $contentIndexed = $contentIndexed2; // Recorremos las líneas del archivo y eliminamos las que tengan básicamente números por no ser interesantes ya que normalmente
public function uploadfiles() { $data = array(); $tid = isset($_GET['tid']) ? addslashes($_GET['tid']) : die('no ID'); $t = isset($_GET['t']) ? addslashes($_GET['t']) : die('no t'); $dc = new DocumentsPortal(); if (isset($_GET['files'])) { $error_size = 0; $error = false; $files = array(); $uploaddir = $dc->upload_location; foreach ($_FILES as $file) { $f = new DocumentsPortal(); $q = "INSERT INTO {$f->table_name} SET file_folder_id = '{$tid}',file_author = '" . Account::getMyID() . "'"; global $db; $fid = $db->qid($q); $f->getByID($fid); if ($fid) { $newname = $fid; $f->file_url = basename($file['name']); $ext = end(explode(".", $file['name'])); $f->file_ext = $ext; $f->file_filename = $fid . "." . $ext; $f->file_date = leap_mysqldate(); // if pdf //cek size $size_awal = $file['size']; if (move_uploaded_file($file['tmp_name'], $uploaddir . $f->file_filename)) { $files[] = $uploaddir . $file['name']; $f->file_size = filesize($uploaddir . $f->file_filename); //size akhir $size_akhir = $f->file_size; //cek apakah tengah2 gagal if ($size_awal != $size_akhir) { //hapus file corrupt unlink($uploaddir . $f->file_filename); //delete file corrupt di db $f->delete($fid); $error = true; $error_size = 1; } else { if ($f->file_ext == "pdf") { $a = new PDF2Text(); $a->setFilename($uploaddir . $f->file_filename); $a->decodePDF(); $f->file_isi = preg_replace("/\r|\n/", " ", $a->output()); //the path to the PDF file $strPDF = $uploaddir . $f->file_filename; $thumb = $uploaddir . "thumbs/" . $fid . ".jpg"; putenv("PATH=/usr/local/bin:/usr/bin:/bin"); putenv("MAGICK_TEMPDIR=/tmp"); exec("convert \"{$strPDF}[0]\" \"{$thumb}\""); } $f->load = 1; $f->save(); //log BLogger::addLog("file_id = {$fid}, filename = " . $f->file_url, "upload_file"); } //else cek size } else { $error = true; $f->delete($fid); } } } $data = $error ? array('error' => 'There was an error uploading your files') : array('files' => $files); } else { $data = array('success' => 'Form was submitted', 'formData' => $_POST); } if ($error_size) { $data['err_size'] = 1; } //$data['fil'] = $_FILES; echo json_encode($data); }
foreach ($entries as $entry) { $sDta .= $entry->getName() . "\\r\\n"; } // getName | getPackedSize | getUnpackedSize @rar_close($rar_file); $sData = '"type":"archive","text":"' . $sDta . ' "'; $sMsg .= "contentsSucces"; } else { $sMsg .= "contentsFail"; } } } else { if ($sExt == "pdf") { include 'class.pdf2text.php'; $oPdf = new PDF2Text(); $oPdf->setFilename($sSFile); $oPdf->decodePDF(); $sCnt = str_replace(array("\n", "\r", "\t"), array("\\n", "\\n", ""), substr($oPdf->output(), 0, PREVIEW_BYTES)); $sData = '"type":"ascii","text":"' . $sCnt . ' "'; $sMsg .= "contentsSucces"; } else { if ($sExt == "doc") { ////////////////////////////// // does not seem to be possible ////////////////////////////// } else { $oHnd = fopen($sSFile, "r"); $sCnt = preg_replace(array("/\n/", "/\r/", "/\t/"), array("\\n", "\\r", "\\t"), addslashes(fread($oHnd, 600))); fclose($oHnd); $sData = '"type":"ascii","text":"' . $sCnt . '"'; $sMsg .= "contentsSucces";
public function save(Doctrine_Connection $conn = null) { if ($this->isNew()) { $this->changeUri(); if ($this->getMimeType() == 'application/pdf') { $pdf = new PDF2Text(); $pdf->setFilename($this->getFullURI()); $content = ''; try { $pdf->decodePDF(); $content = $pdf->output(); if ($content == '') { // try with different multibyte setting $pdf->setUnicode(true); $pdf->decodePDF(); $content = $pdf->output(); } } catch (Exception $e) { } if ($content != '') { $this->setExtractedInfo(utf8_encode($content)); } } if ($this->getMimeType() == 'text/plain') { $content = file_get_contents($this->getFullURI()); $this->setExtractedInfo(utf8_encode($content)); } } parent::save($conn); }
/** * Read PDf content * * @param string $file_name * @return string */ public static function readPdfFile($file_name) { $a = new PDF2Text(); $a->setFilename(self::TEMP_FOLDER . $file_name); $a->decodePDF(); return $a->output(); }
$username = ""; // Mysql username $password = ""; // Mysql password $db_name = ""; // Database name $tbl_name = ""; // Table name $file_name = $_FILES['pdf']['name']; $filename = explode(".", $file_name); $column_name = $filename[0]; ini_set('max_execution_time', 300); include 'pdf2text.php'; include 'stem_code.php'; $a = new PDF2Text(); $a->setFilename($file_name); $a->decodePDF(); $stringput = $a->output(); $stringput = strtolower($stringput); $allword_count = explode(" ", $stringput); $wordarray = array(); $wordarrays = array(); foreach ($allword_count as $key => $val) { array_push($wordarrays, $val); } /*Stemming Code*/ foreach ($wordarrays as $key => $word) { $stem = PorterStemmer::Stem($word); array_push($wordarray, $stem); } $stopwords = array('on', 'us', 'xc', 'be', 'by', 'at', 'but', 'e', 'i', 'be', 'by', 'g', 'j', 'and', 'is', 'f', 'are', 'p', 'can', 'each', 'we', 'x', 'in', 'b', 'as', 'c', 'd', 'for', 'also', 'an', 'all', '-', 'a', 'any', 'in', 'the', 'thesis', 'to', 'of', 'dammalapati');
function evt__cuadro__procesar($datos) { //$this->dep('datos')->tabla('norma')->cargar($datos); //$datos2=$this->dep('datos')->tabla('norma')->get(); //$fp_imagen = $this->dep('datos')->tabla('norma')->get_blob('pdf'); $a = new PDF2Text(); $path = $_SERVER['DOCUMENT_ROOT'] . "/68.pdf"; $a->setFilename($path); $a->decodePDF(); $texto = $a->output(); // $z=strlen($texto); $prueba = var_export($texto, true) . PHP_EOL; // $prueba=str_replace(".","",$prueba); // $prueba=str_replace(":","",$prueba); // $prueba=str_replace('\"',"",$prueba); // $prueba=str_replace('ó',"",$prueba); $buscar = array('á', 'é', 'í', 'ó', 'ú', 'ñ', 'ü'); $remplzr = array('a', 'e', 'i', 'o', 'u', 'n', 'u'); $prueba2 = str_replace($buscar, $remplzr, $prueba); //echo (substr($prueba2, 1000,2000));exit(); $buscar = array('/[^A-z0-9-<>]/', '/[-]+/', '/<[^>]*>/'); //$buscar = array('/[a-zA-Z0-9]/', '/[-]+/', '/<[^>]*>/'); $remplzr = array(' ', '-', ''); $prueba2 = preg_replace($buscar, $remplzr, $prueba); echo substr($prueba2, 0, 1000); exit; echo substr($prueba3, 0, 1000); exit; //$prueba=str_replace('\á',"a",$prueba); //$prueba=str_replace('ó',"o",$prueba); // $a = array('.','á','ó'); // $b = array('','a','o'); // echo str_replace($a,$b,$prueba) ; //$prueba=ereg_replace("[óòôõºö]","o",$prueba); echo substr($prueba3, 1000, 2000); exit; //$sql="update norma set palabras_clave='".$prueba."' where id_norma=211"; print_r($sql); toba::db('designa')->consultar($sql); //print_r($z);exit(); $z = strlen($texto); //echo (substr($texto,0,10)); //echo strrchr($texto,"a"); //exit; print_r($z); exit; //$sql="update norma set palabras_clave='".$a->output()."' where id_norma=211"; //toba::db('designa')->consultar($sql); $texto2 = str_replace("ó", "o", $texto); exit; print_r($z); exit; $texto = str_replace("\\'", "", $texto); //eliminamos las comillas simples $texto = str_replace('\\"', "", $texto); //eliminamos las comillas dobles $texto = str_replace('\\“', "", $texto); $texto2 = str_replace("ó", "o", $texto); $texto = str_replace(array('á', 'à', 'ä', 'â', 'ª', 'Á', 'À', 'Â', 'Ä'), array('a', 'a', 'a', 'a', 'a', 'A', 'A', 'A', 'A'), $texto); $texto = str_replace(array('ó', 'ò', 'ö', 'ô', 'Ó', 'Ò', 'Ö', 'Ô'), array('o', 'o', 'o', 'o', 'O', 'O', 'O', 'O'), $texto); if (isset($fp_imagen)) { $temp_nombre = md5(uniqid(time())) . '.pdf'; $temp_archivo = toba::proyecto()->get_www_temp($temp_nombre); $temp_fp = fopen($temp_archivo['path'], 'w'); stream_copy_to_stream($fp_imagen, $temp_fp); $a = new PDF2Text(); } }
public function uploadfiles() { //apakah ada file $adafile = isset($_GET['adafile']) ? $_GET['adafile'] : ''; //cek if ada file if ($adafile) { $if = new InputFileModel(); $uploadpath = $if->upload_location; if (file_exists($uploadpath . $adafile)) { //delete old file if (unlink($uploadpath . $adafile)) { $arrf = $if->getWhere("file_filename = '{$adafile}' LIMIT 0,1"); if (count($arrf) > 0) { $if->delete($arrf[0]->file_id); } //delete from log PortalFileLogger::deleteFileLog($uploadpath . $adafile); /*if(file_exists(_PHOTOPATH.'thumbnail/' . $adafile)) { //delete old thumb file unlink(_PHOTOPATH.'thumbnail/' . $adafile); }*/ } } } $data = array(); //$tid = (isset($_GET['tid'])?addslashes($_GET['tid']):die('no ID')); $t = isset($_GET['t']) ? addslashes($_GET['t']) : die('no t'); $data['files'] = $_GET['files']; $data['bool'] = 0; $dc = new InputFileModel(); if (isset($_GET['files'])) { $error = false; $files = array(); $uploaddir = $dc->upload_location; foreach ($_FILES as $file) { $f = new InputFileModel(); $q = "INSERT INTO {$f->table_name} SET file_folder_id = '0',file_author = '" . Account::getMyID() . "'"; global $db; $fid = $db->qid($q); $f->getByID($fid); if ($fid) { $newname = $fid; $f->file_url = basename($file['name']); $ext = end(explode(".", $file['name'])); $f->file_ext = $ext; $f->file_filename = $fid . "." . $ext; $f->file_date = leap_mysqldate(); // if pdf if (move_uploaded_file($file['tmp_name'], $uploaddir . $f->file_filename)) { $files[] = $uploaddir . $file['name']; $f->file_size = filesize($uploaddir . $f->file_filename); if ($f->file_ext == "pdf") { $a = new PDF2Text(); $a->setFilename($uploaddir . $f->file_filename); $a->decodePDF(); $f->file_isi = preg_replace("/\r|\n/", " ", $a->output()); //the path to the PDF file $strPDF = $uploaddir . $f->file_filename; $thumb = $uploaddir . "thumbs/" . $fid . ".jpg"; exec("convert \"{$strPDF}[0]\" \"{$thumb}\""); } $f->load = 1; $data['bool'] = $f->save(); $data['isImage'] = Leap\View\InputFile::isImage($f->file_filename); $data['filename'] = $f->file_filename; if (isset($_SESSION['target_id']['obj'])) { $target = get_class($_SESSION['target_id']['obj']); } else { $target = "inputfile_unknown"; } PortalFileLogger::save2log($uploaddir . $f->file_filename, $target, $f->file_url); die(json_encode($data)); } else { $error = true; } } } $data = $error ? array('error' => 'There was an error uploading your files') : array('files' => $files); } else { $data = array('success' => 'Form was submitted', 'formData' => $_POST); } echo json_encode($data); }
/** * Index posts stored in $this->unindexedPosts * * @since 1.0 */ function index() { global $wp_filesystem, $searchwp; $this->check_for_parallel_indexer(); if (is_array($this->unindexedPosts) && count($this->unindexedPosts)) { do_action('searchwp_indexer_pre_chunk', $this->unindexedPosts); // all of the IDs to index have not been indexed, proceed with indexing them while (($unindexedPost = current($this->unindexedPosts)) !== false) { $this->setPost($unindexedPost); // log the attempt $count = get_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts', true); if ($count == false) { $count = 0; } else { $count = intval($count); } $count++; // increment our counter to prevent the indexer getting stuck on a gigantic PDF update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts', $count); do_action('searchwp_log', 'Attempt ' . $count . ' at indexing ' . $this->post->ID); // if we breached the maximum number of attempts, flag it to skip $this->maxAttemptsToIndex = absint(apply_filters('searchwp_max_index_attempts', $this->maxAttemptsToIndex)); if (intval($count) > $this->maxAttemptsToIndex) { do_action('searchwp_log', 'Too many indexing attempts on ' . $this->post->ID . ' (' . $this->maxAttemptsToIndex . ') - skipping'); // flag it to be skipped update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip', true); } else { // check to see if we're running a second pass on terms $termCache = get_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'terms', true); if (!is_array($termCache)) { do_action('searchwp_index_post', $this->post); // if it's an attachment, we want the permalink $slug = $this->post->post_type == 'attachment' ? str_replace(get_bloginfo('wpurl'), '', get_permalink($this->post->ID)) : ''; // we allow users to override the extracted content from documents, if they have done so this flag is set $skipDocProcessing = get_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip_doc_processing', true); $omitDocProcessing = apply_filters('searchwp_omit_document_processing', false); if (!$skipDocProcessing && !$omitDocProcessing) { // if it's a PDF we need to populate our Custom Field with it's content if ($this->post->post_mime_type == 'application/pdf') { // grab the filename of the PDF $filename = get_attached_file($this->post->ID); // allow for external PDF content extraction $pdfContent = apply_filters('searchwp_external_pdf_processing', '', $filename, $this->post->ID); // only try to extract content if the external processing has not provided the PDF content we're looking for if (empty($pdfContent)) { // PdfParser runs only on 5.3+ but SearchWP runs on 5.2+ if (version_compare(PHP_VERSION, '5.3', '>=')) { include_once $searchwp->dir . '/vendor/pdfparser-bootloader.php'; } // a wrapper class was conditionally included if we're running PHP 5.3+ so let's try that if (class_exists('SearchWP_PdfParser')) { // try PdfParser first $parser = new SearchWP_PdfParser(); $parser = $parser->init(); $pdf = $parser->parseFile($filename); $text = $pdf->getText(); $pdfContent = trim(str_replace("\n", " ", $text)); } // try PDF2Text if (empty($pdfContent)) { if (!class_exists('PDF2Text')) { include_once $searchwp->dir . '/includes/class.pdf2text.php'; } $pdfParser = new PDF2Text(); $pdfParser->setFilename($filename); $pdfParser->decodePDF(); $pdfContent = $pdfParser->output(); $pdfContent = trim(str_replace("\n", " ", $pdfContent)); } // check to see if the first pass produced nothing or concatenated strings $fullContentLength = strlen($pdfContent); $numberOfSpaces = substr_count($pdfContent, ' '); if (empty($pdfContent) || $numberOfSpaces / $fullContentLength * 100 < 10) { WP_Filesystem(); $filecontent = $wp_filesystem->exists($filename) ? $wp_filesystem->get_contents($filename) : ''; if (false != strpos($filecontent, 'trailer')) { if (!class_exists('pdf_readstream')) { include_once $searchwp->dir . '/includes/class.pdfreadstream.php'; } $pdfContent = ''; $pdf = new pdf(get_attached_file($this->post->ID)); $pages = $pdf->get_pages(); if (!empty($pages)) { while (list($nr, $page) = each($pages)) { $pdfContent .= $page->get_text(); } } } else { // empty out the content so wacky concatenations are not indexed $pdfContent = ''; // flag it for further review update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'review', true); update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip', true); } } } $pdfContent = trim($pdfContent); if (!empty($pdfContent)) { $pdfContent = sanitize_text_field($pdfContent); delete_post_meta($this->post->ID, SEARCHWP_PREFIX . 'content'); update_post_meta($this->post->ID, SEARCHWP_PREFIX . 'content', $pdfContent); } } elseif ($this->post->post_mime_type == 'text/plain') { // if it's plain text, index it's content WP_Filesystem(); $filename = get_attached_file($this->post->ID); $textContent = $wp_filesystem->exists($filename) ? $wp_filesystem->get_contents($filename) : ''; $textContent = str_replace("\n", " ", $textContent); if (!empty($textContent)) { $textContent = sanitize_text_field($textContent); update_post_meta($this->post->ID, SEARCHWP_PREFIX . 'content', $textContent); } } else { // all other file types } } $postTerms = array(); $postTerms['title'] = $this->indexTitle(); $postTerms['slug'] = $this->indexSlug(str_replace('/', ' ', $slug)); $postTerms['content'] = $this->indexContent(); $postTerms['excerpt'] = $this->indexExcerpt(); if (apply_filters('searchwp_index_comments', true)) { $postTerms['comments'] = $this->indexComments(); } // index taxonomies $taxonomies = get_object_taxonomies($this->post->post_type); if (!empty($taxonomies)) { while (($taxonomy = current($taxonomies)) !== false) { $terms = get_the_terms($this->post->ID, $taxonomy); if (!empty($terms)) { $postTerms['taxonomy'][$taxonomy] = $this->indexTaxonomyTerms($taxonomy, $terms); } next($taxonomies); } reset($taxonomies); } // index custom fields $customFields = apply_filters('searchwp_get_custom_fields', get_post_custom($this->post->ID), $this->post->ID); if (!empty($customFields)) { while (($customFieldValue = current($customFields)) !== false) { $customFieldName = key($customFields); // there are a few useless (when it comes to search) WordPress core custom fields, so let's exclude them by default $omitWpMetadata = apply_filters('searchwp_omit_wp_metadata', array('_edit_lock', '_wp_page_template', '_edit_last', '_wp_old_slug')); $excludedCustomFieldKeys = apply_filters('searchwp_excluded_custom_fields', array('_' . SEARCHWP_PREFIX . 'indexed', '_' . SEARCHWP_PREFIX . 'attempts', '_' . SEARCHWP_PREFIX . 'terms', '_' . SEARCHWP_PREFIX . 'last_index', '_' . SEARCHWP_PREFIX . 'skip', '_' . SEARCHWP_PREFIX . 'skip_doc_processing', '_' . SEARCHWP_PREFIX . 'review')); // merge the two arrays of keys if possible if (is_array($omitWpMetadata) && is_array($excludedCustomFieldKeys)) { $excluded_meta_keys = array_merge($omitWpMetadata, $excludedCustomFieldKeys); } elseif (is_array($omitWpMetadata)) { $excluded_meta_keys = $omitWpMetadata; } else { $excluded_meta_keys = $excludedCustomFieldKeys; } $excluded_meta_keys = is_array($excluded_meta_keys) ? array_unique($excluded_meta_keys) : array(); // allow developers to conditionally omit specific custom fields $omit_this_custom_field = apply_filters("searchwp_omit_meta_key", false, $customFieldName, $this->post); $omit_this_custom_field = apply_filters("searchwp_omit_meta_key_{$customFieldName}", $omit_this_custom_field, $this->post); if (!in_array($customFieldName, $excluded_meta_keys) && !$omit_this_custom_field) { // allow devs to swap out their own content // e.g. parsing ACF Relationship fields (that store only post IDs) to actually retrieve that content at runtime $customFieldValue = apply_filters('searchwp_custom_fields', $customFieldValue, $customFieldName, $this->post); $customFieldValue = apply_filters("searchwp_custom_field_{$customFieldName}", $customFieldValue, $this->post); $postTerms['customfield'][$customFieldName] = $this->indexCustomField($customFieldName, $customFieldValue); } next($customFields); } reset($customFields); } // allow developer to store arbitrary information a la Custom Fields (without them actually being Custom Fields) $extraMetadata = apply_filters("searchwp_extra_metadata", false, $this->post); if ($extraMetadata) { if (is_array($extraMetadata)) { foreach ($extraMetadata as $extraMetadataKey => $extraMetadataValue) { // TODO: make sure there are no collisions? // while( isset( $postTerms['customfield'][$extraMetadataKey] ) ) { // $extraMetadataKey .= '_'; // } $postTerms['customfield'][$extraMetadataKey] = $this->indexCustomField($extraMetadataKey, $extraMetadataValue); } } } // we need to break out the terms from all of this content $termCountBreakout = array(); if (is_array($postTerms) && count($postTerms)) { foreach ($postTerms as $type => $terms) { switch ($type) { case 'title': case 'slug': case 'content': case 'excerpt': case 'comments': if (is_array($terms) && count($terms)) { foreach ($terms as $term) { $termCountBreakout[$term['term']][$type] = $term['count']; } } break; case 'taxonomy': case 'customfield': if (is_array($terms) && count($terms)) { foreach ($terms as $name => $nameTerms) { if (is_array($nameTerms) && count($nameTerms)) { foreach ($nameTerms as $nameTerm) { $termCountBreakout[$nameTerm['term']][$type][$name] = $nameTerm['count']; } } } } break; } } } } else { $termCountBreakout = $termCache; // if there was a term cache, this repeated processing doesn't count, so decrement it delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts'); delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip'); } // unless the term chunk limit says otherwise, we're going to flag this as being OK to log as indexed $flagAsIndexed = true; // we now have a multidimensional array of terms with counts per type in $termCountBreakout // if the term count is huge, we need to split up this process so as to avoid // hitting upper PHP execution time limits (term insertion is heavy), so we'll chunk the array of terms $termChunkMax = 500; // try to set a better default based on php.ini's memory_limit $memoryLimit = ini_get('memory_limit'); if (preg_match('/^(\\d+)(.)$/', $memoryLimit, $matches)) { if ($matches[2] == 'M') { $termChunkMax = (int) $matches[1] * 15; // 15 terms per MB RAM } else { // memory was set in K... $termChunkMax = 100; } } $termChunkLimit = apply_filters('searchwp_process_term_limit', $termChunkMax); if (count($termCountBreakout) > $termChunkLimit) { $acceptableTermCountBreakout = array_slice($termCountBreakout, 0, $termChunkLimit); // if we haven't pulled all of the terms, we can't consider this post indexed... if ($termChunkLimit < count($termCountBreakout) - 1) { $flagAsIndexed = false; // save the term breakout so we don't have to do it again $remainingTerms = array_slice($termCountBreakout, $termChunkLimit + 1); update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'terms', $remainingTerms); } // set the acceptable breakout as the main breakout $termCountBreakout = $acceptableTermCountBreakout; } $this->recordPostTerms($termCountBreakout); unset($termCountBreakout); // flag the post as indexed if ($flagAsIndexed) { // clean up our stored term array if necessary if ($termCache) { delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'terms'); } // clean up the attempt counter delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts'); delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip'); update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'indexed', true); update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'last_index', current_time('timestamp')); } } next($this->unindexedPosts); } reset($this->unindexedPosts); do_action('searchwp_indexer_post_chunk'); } }
/** * @deprecated * * Fallback to pure PHP library on IT-hosting (#1409). * @return string */ protected function _pdf2text($pdf_file) { # TODO: error, PDF2Text echoes to screen - messes up header() call. require_once __DIR__ . '/../class.pdf2text.php'; $pdf = new PDF2Text(); $pdf->setFilename($pdf_file); $pdf->decodePDF(); $result = $pdf->output(); // Fix floating 's' 'th' ',' etc - English locale-specific. $result = preg_replace('#(\\w)\\ss\\s#', '$1s ', $result); $result = preg_replace('#\\sth\\s\\w#', ' th$1', $result); $result = preg_replace('#\\s([,\\.;])#', '$1', $result); // Concatenate lines starting with lowercase letters. $result = preg_replace('#\\s+([a-z])#', ' $1', $result); $result = str_replace(' ', ' ', $result); // A crude conversion to HTML. #$html = preg_replace('#([\w\. ])\n(\r)?\n(\w)#ms', '$1<br>$2', $result); $html = str_replace(array("\n\n\n", "\n\n", "\n"), '<br>', $result); return $html; }
function __construct() { // parent::__construct(); // if ($this->sAction) { switch ($this->sAction) { case "fileList": // retreive file list $sDir = isset($_POST["folder"]) ? $_POST["folder"] : "/"; $aFiles = array(); $listing = tntbase_get_path_contents($sDir, true); foreach ($listing as $file => $prop) { $oFNfo = $this->getFileInfo(array("file" => $file, "type" => $prop)); $aFiles[] = $oFNfo; } $this->aReturn['msg'] .= "fileListing"; $this->aReturn['data'] = $aFiles; break; case "duplicate": // duplicate file $sCRegx = "/(?<=(_copy))([0-9])+(?=(\\.))/"; $sNRegx = "/(\\.)(?=[A-Za-z0-9]+\$)/"; $oMtch = preg_match($sCRegx, $this->sSFile, $aMatches); if (count($aMatches) > 0) { $sNewFile = preg_replace($sCRegx, intval($aMatches[0]) + 1, $this->sSFile); } else { $sNewFile = preg_replace($sNRegx, "_copy0.", $this->sSFile); } while (file_exists($sNewFile)) { // $$ there could be a quicker way $oMtch = preg_match($sCRegx, $sNewFile, $aMatches); $sNewFile = preg_replace($sCRegx, intval($aMatches[0]) + 1, $sNewFile); } if (copy($this->sSFile, $sNewFile)) { $oFNfo = $this->fileInfo($sNewFile); $this->aReturn['data'] = $oFNfo; $this->aReturn['msg'] .= "duplicated#" . $sNewFile; } else { $this->aReturn['error'] = "notduplicated#" . $sNewFile; } break; case "swfUpload": // swf file upload if ($this->sAction == "swfUpload") { foreach ($_GET as $k => $v) { $_POST[$k] = $v; } } case "upload": // file upload $sElName = $this->sAction == "upload" ? "fileToUpload" : "Filedata"; if (!empty($_FILES[$sElName]["error"])) { switch ($_FILES[$sElName]["error"]) { case "1": $sErr = "uploadErr1"; break; case "2": $sErr = "uploadErr2"; break; case "3": $sErr = "uploadErr3"; break; case "4": $sErr = "uploadErr4"; break; case "6": $sErr = "uploadErr6"; break; case "7": $sErr = "uploadErr7"; break; case "8": $sErr = "uploadErr8"; break; default: $sErr = "uploadErr"; } } else { if (empty($_FILES[$sElName]["tmp_name"]) || $_FILES[$sElName]["tmp_name"] == "none") { $this->aReturn['error'] = "No file was uploaded.."; } else { $sFolder = $_POST["folder"]; $this->aReturn['msg'] .= "sFolder_" . $sFolder; $sPath = $sFolder; $sDeny = $_POST["deny"]; $sAllow = $_POST["allow"]; $sResize = $_POST["resize"]; $oFile = $_FILES[$sElName]; $sFile = $oFile["name"]; $sMime = array_pop(preg_split("/\\./", $sFile)); //mime_content_type($sDir.$file); //$oFile["type"]; // // $iRpt = 1; $sFileTo = $sPath . $oFile["name"]; while (file_exists($sFileTo)) { $aFile = explode(".", $oFile["name"]); $aFile[0] .= "_" . $iRpt++; $sFile = implode(".", $aFile); $sFileTo = $sPath . $sFile; } $sFileTo = $this->sConnBse . $sFileTo; move_uploaded_file($oFile["tmp_name"], $sFileTo); $oFNfo = $this->fileInfo($sFileTo); $bAllow = $sAllow == ""; $sFileExt = array_pop(explode(".", $sFile)); if ($oFNfo) { $this->aReturn['msg'] .= $iRpt === 1 ? 'fileUploaded' : 'fileExistsrenamed'; // check if file is allowed in this session $$$$$$todo: check SFB_DENY foreach (explode("|", $sAllow) as $sAllowExt) { if ($sAllowExt == $sFileExt) { $bAllow = true; break; } } foreach (explode("|", $sDeny) as $sDenyExt) { if ($sDenyExt == $sFileExt) { $bAllow = false; break; } } } else { $bAllow = false; } if (!$bAllow) { $this->aReturn['error'] = "uploadNotallowed#" . $sFileExt; @unlink($sFileTo); } else { if ($sResize && $sResize != "null" && $sResize != "undefined" && ($sMime == "jpeg" || $sMime == "jpg")) { $aResize = explode(",", $sResize); $iToW = $aResize[0]; $iToH = $aResize[1]; list($iW, $iH) = getimagesize($sFileTo); $fXrs = $iToW / $iW; $fYrs = $iToH / $iH; if (false) { //just resize $fRsz = min($fXrs, $fYrs); if ($fRsz < 1) { $iNW = intval($iW * $fRsz); $iNH = intval($iH * $fRsz); $oImgN = imagecreatetruecolor($iNW, $iNH); $oImg = imagecreatefromjpeg($sFileTo); imagecopyresampled($oImgN, $oImg, 0, 0, 0, 0, $iNW, $iNH, $iW, $iH); imagejpeg($oImgN, $sFileTo); } } else { // crop after resize $fRsz = max($fXrs, $fYrs); //if ($fRsz<1) { if ($fXrs < 1 || $fYrs < 1) { $iNW = intval($iW * $fRsz); $iNH = intval($iH * $fRsz); $iFrX = $iNW > $iToW ? ($iNW - $iToW) / 2 : 0; $iFrY = $iNH > $iToH ? ($iNH - $iToH) / 2 : 0; $iFrW = $iNW > $iToW ? $iToW * (1 / $fRsz) : $iW; $iFrH = $iNH > $iToH ? $iToH * (1 / $fRsz) : $iH; $oImgN = imagecreatetruecolor($iToW, $iToH); $oImg = imagecreatefromjpeg($sFileTo); imagecopyresampled($oImgN, $oImg, 0, 0, $iFrX, $iFrY, $iToW, $iToH, $iFrW, $iFrH); imagejpeg($oImgN, $sFileTo); } } $oFNfo = $this->fileInfo($sFileTo); } $this->aReturn['data'] = $oFNfo; } } } break; case "delete": // file delete if (count($_POST) != 3 || !isset($_POST["folder"]) || !isset($_POST["file"])) { exit("ku ka"); } if (is_file($this->sSFile)) { if (@unlink($this->sSFile)) { $this->aReturn['msg'] .= "fileDeleted"; } else { $this->aReturn['error'] .= "fileNotdeleted"; } } else { if (@rmdir($this->sSFile)) { $this->aReturn['msg'] .= "folderDeleted"; } else { $this->aReturn['error'] .= "folderNotdeleted"; } } break; case "download": // file force download $sZeFile = $this->sConnBse . $this->sSFile; if (file_exists($sZeFile)) { ob_start(); $sType = "application/octet-stream"; header("Cache-Control: public, must-revalidate"); header("Pragma: hack"); header("Content-Type: " . $this->sSFile); header("Content-Length: " . (string) filesize($sZeFile)); header('Content-Disposition: attachment; filename="' . array_pop(explode("/", $sZeFile)) . '"'); header("Content-Transfer-Encoding: binary\n"); ob_end_clean(); readfile($sZeFile); exit; } break; case "read": // read txt file contents $sExt = strtolower(array_pop(explode('.', $this->sSFile))); // // install extensions and add to php.ini // - extension=php_zip.dll if ($sExt == "zip") { $sDta = ""; if (!function_exists("zip_open")) { $sErr .= "php_zip not installed or enabled"; } else { if ($zip = @zip_open(getcwd() . "/" . $this->sSFile)) { // while ($zip_entry = @zip_read($zip)) { $sDta .= @zip_entry_name($zip_entry) . "\\r\\n"; } // zip_entry_filesize | zip_entry_compressedsize | zip_entry_compressionmethod @zip_close($zip); $this->aReturn['data'] = array('type' => 'archive', 'text' => $sDta); } } } else { if ($sExt == "rar") { // - extension=php_rar.dll if (!function_exists("rar_open")) { $sMsg .= "php_rar not installed or enabled"; } else { if ($rar_file = @rar_open(getcwd() . "/" . $this->sSFile)) { $entries = @rar_list($rar_file); foreach ($entries as $entry) { $sDta .= $entry->getName() . "\\r\\n"; } // getName | getPackedSize | getUnpackedSize @rar_close($rar_file); $this->aReturn['data'] = array('type' => 'archive', 'text' => $sDta); } } } else { if ($sExt == "pdf") { include 'class.pdf2text.php'; $oPdf = new PDF2Text(); $oPdf->setFilename($this->sSFile); $oPdf->decodePDF(); $sCnt = str_replace(array("\n", "\r", "\t"), array("\\n", "\\n", ""), substr($oPdf->output(), 0, PREVIEW_BYTES)); $this->aReturn['data'] = array('type' => 'ascii', 'text' => $sCnt); } else { if ($sExt == "doc") { ////////////////////////////// // does not seem to be possible ////////////////////////////// } else { $oHnd = fopen($this->sSFile, "r"); $sCnt = preg_replace(array("/\n/", "/\r/", "/\t/"), array("\\n", "\\r", "\\t"), addslashes(fread($oHnd, 600))); fclose($oHnd); $this->aReturn['data'] = array('type' => 'ascii', 'text' => $sCnt); } } } } $this->aReturn['msg'] .= count($this->aReturn['data']) ? 'contentsSucces' : 'contentsFail'; break; case "rename": // rename file if (isset($_POST["file"]) && isset($_POST["nfile"])) { $sFile = $_POST["file"]; $sNFile = $_POST["nfile"]; $sFileExt = array_pop(preg_split("/\\./", $sFile)); $sNFileExt = array_pop(preg_split("/\\./", $sNFile)); $sNSFile = str_replace($sFile, $sNFile, $this->sSFile); if (@filetype($this->sSFile) == "file" && $sFileExt != $sNFileExt) { $this->aReturn['error'] .= "filenameNoext"; // } else if (!preg_match("/^\w+(\.\w+)*$/",$sNFile)) { } else { if (!preg_match('=^[^/?*;:{}\\\\]+\\.[^/?*;:{}\\\\]+$=', $sNFile)) { $this->aReturn['error'] .= "filenamInvalid"; } else { if ($sFile == $sNFile) { $this->aReturn['msg'] .= "filenameNochange"; } else { if ($sNFile == "") { $this->aReturn['error'] .= "filenameNothing"; } else { if (file_exists($sNSFile)) { $this->aReturn['error'] .= "filenameExists"; } else { if (@rename($this->sSFile, $sNSFile)) { $this->aReturn['msg'] .= "filenameSucces"; } else { $this->aReturn['error'] .= "filenameFailed"; } } } } } } } break; case "addFolder": // add folder if (isset($_POST["folder"])) { $sFolderName = isset($_POST["foldername"]) ? $_POST["foldername"] : "new folder"; $iRpt = 1; $sFolder = $this->sConnBse . $_POST["folder"] . $sFolderName; while (file_exists($sFolder)) { $sFolder = $this->sConnBse . $_POST["folder"] . $sFolderName . $iRpt++; } if (mkdir($sFolder)) { $this->aReturn['msg'] .= "folderCreated"; $oFNfo = $this->fileInfo($sFolder); if ($oFNfo) { $this->aReturn['data'] = $oFNfo; } else { $this->aReturn['error'] .= "folderFailed"; } } else { $this->aReturn['error'] .= "folderFailed"; } } break; case "moveFiles": // move files if (isset($_POST["file"]) && isset($_POST["folder"]) && isset($_POST["nfolder"])) { // $sFolder = $_POST["folder"]; $sNFolder = $_POST["nfolder"]; $aFiles = explode(",", $_POST["file"]); $aMoved = array(); $aNotMoved = array(); for ($i = 0; $i < count($this->aFiles); $i++) { $sFile = $aFiles[$i]; $this->sSFile = $this->aFiles[$i]; $sNSFile = str_replace($sFile, $sNFolder . "/" . $sFile, $this->sSFile); if (file_exists($sNSFile)) { $this->aReturn['error'] .= "filemoveExists[" . $this->sSFile . " " . $sNSFile . "] "; $aNotMoved[] = $sFile; } else { if (@rename($this->sSFile, $sNSFile)) { $this->aReturn['msg'] .= "filemoveSucces"; $aMoved[] = $sFile; } else { $this->aReturn['error'] .= "filemoveFailed"; $aNotMoved[] = $sFile; } } } $this->aReturn['data'] = array('moved' => $aMoved, 'notmoved' => $aNotMoved, 'newfolder' => $sNFolder); } break; } $this->returnJSON($this->aReturn); } }
<?php include 'PDF2Text.php'; $a = new PDF2Text(); $a->setFilename('x.pdf'); $a->decodePDF(); $extracted_plaintext = $a->output(); //cuenta el numero de palabras en el texto echo "<br><br>Hay " . str_word_count($extracted_plaintext, 0) . " palabras en la cadena <br><br>'{$extracted_plaintext}'"; //guardo las palabras en un array $array_cadena = str_word_count($extracted_plaintext, 1); //saco cada elemento del array foreach ($array_cadena as $palabra) { echo $palabra . " "; }
$note_id = ""; $pdfContent = ""; $note_title = ""; if (isset($_GET['id']) && $_GET['id'] != "") { $note_id = $_GET['id']; $sql = "select mn_note_path,mn_note_title,mn_note_uploaded_by,mn_note_created_date from mn_note where mn_note_id={$note_id} limit 0,1"; $data = $objDB->select($sql); if ($data && !empty($data)) { $note_title = $data[0]['mn_note_title']; $name = $data[0]['mn_note_path']; $filePath = 'notes/pdf/' . $name; if (file_exists($filePath)) { include 'libs/class.pdf2text.php'; $pdfObj = new PDF2Text(); $pdfObj->setFilename($filePath); $pdfObj->setFilename; $pdfObj->decodePDF(); $pdfContent = $pdfObj->output(); $total_char = strlen($pdfContent); $fourty_percent_char = $total_char * 40 / 100; $pdfContent = tokenTruncate($pdfContent, $fourty_percent_char) . "<span>…More</span>"; } } else { header("Location:index.php?p=browesnotes&err=file_not_found"); } } if ($pdfContent != "") { $table = 'mn_user'; $key = 'mn_user_id'; $value = 'mn_user_display_name';
$currentObject = $objects[$i]; // Check if an object includes data stream. if (preg_match("#stream(.*)endstream#ismU", $currentObject, $stream)) { $stream = ltrim($stream[1]); // Check object parameters and look for text data. $options = getObjectOptions($currentObject); if (!(empty($options["Length1"]) && empty($options["Type"]) && empty($options["Subtype"]))) { continue; } // So, we have text data. Decode it. $data = getDecodedStream($stream, $options); if (strlen($data)) { if (preg_match_all("#BT(.*)ET#ismU", $data, $textContainers)) { $textContainers = @$textContainers[1]; getDirtyTexts($texts, $textContainers); } else { getCharTransformations($transformations, $data); } } } } // Analyze text blocks taking into account character transformations and return results. return getTextUsingTransformations($texts, $transformations); } include dirname(__FILE__) . '/../../kernel/pdf2text.php'; $a = new PDF2Text(); $a->setFilename(dirname(__FILE__) . '/../../../InformDovidka.pdf'); //grab the test file at http://www.newyorklivearts.org/Videographer_RFP.pdf $a->decodePDF(); // echo $a->output(); echo pdf3text(dirname(__FILE__) . '/../../../InformDovidka.pdf');
/** * Extract plain text from PDF * * @since 2.5 * @param $post_id integer The post ID of the PDF in the Media library * * @return string The contents of the PDF */ function extract_pdf_text( $post_id ) { global $wp_filesystem, $searchwp; $pdf_post = get_post( absint( $post_id ) ); // make sure it's a PDF if ( 'application/pdf' !== $pdf_post->post_mime_type ) { return ''; } // grab the filename of the PDF $filename = get_attached_file( absint( $post_id ) ); // make sure the file exists locally if ( ! file_exists( $filename ) ) { return ''; } // PdfParser runs only on 5.3+ but SearchWP runs on 5.2+ if ( version_compare( PHP_VERSION, '5.3', '>=' ) ) { /** @noinspection PhpIncludeInspection */ include_once( $searchwp->dir . '/vendor/pdfparser-bootloader.php' ); // a wrapper class was conditionally included if we're running PHP 5.3+ so let's try that if ( class_exists( 'SearchWP_PdfParser' ) ) { /** @noinspection PhpIncludeInspection */ include_once( $searchwp->dir . '/vendor/pdfparser/vendor/autoload.php' ); // try PdfParser first $parser = new SearchWP_PdfParser(); $parser = $parser->init(); try { $pdf = $parser->parseFile( $filename ); $pdfContent = $pdf->getText(); } catch (Exception $e) { do_action( 'searchwp_log', 'PDF parsing failed: ' . $e->getMessage() ); return false; } } } // try PDF2Text if ( empty( $pdfContent ) ) { if ( ! class_exists( 'PDF2Text' ) ) { /** @noinspection PhpIncludeInspection */ include_once( $searchwp->dir . '/vendor/class.pdf2text.php' ); } $pdfParser = new PDF2Text(); $pdfParser->setFilename( $filename ); $pdfParser->decodePDF(); $pdfContent = $pdfParser->output(); $pdfContent = trim( str_replace( "\n", ' ', $pdfContent ) ); } // check to see if the first pass produced nothing or concatenated strings $fullContentLength = strlen( $pdfContent ); $numberOfSpaces = substr_count( $pdfContent, ' ' ); if ( empty( $pdfContent ) || ( ( $numberOfSpaces / $fullContentLength ) * 100 < 10 ) ) { WP_Filesystem(); if ( method_exists( $wp_filesystem, 'exists' ) && method_exists( $wp_filesystem, 'get_contents' ) ) { $filecontent = $wp_filesystem->exists( $filename ) ? $wp_filesystem->get_contents( $filename ) : ''; } else { $filecontent = ''; } if ( false != strpos( $filecontent, 'trailer' ) ) { if ( ! class_exists( 'pdf_readstream' ) ) { /** @noinspection PhpIncludeInspection */ include_once( $searchwp->dir . '/vendor/class.pdfreadstream.php' ); } $pdfContent = ''; $pdf = new pdf( get_attached_file( $this->post->ID ) ); $pages = $pdf->get_pages(); if ( ! empty( $pages ) ) { /** @noinspection PhpUnusedLocalVariableInspection */ while ( list( $nr, $page ) = each( $pages ) ) { if ( method_exists( $page, 'get_text' ) ) { $pdfContent .= $page->get_text(); } } } } else { // empty out the content so wacky concatenations are not indexed $pdfContent = false; } } return $pdfContent; }
function pdf_to_text($filename) { $a = new PDF2Text(); $a->setFilename($filename); $a->decodePDF(); return $a->output(); }
<?php include 'class.pdf2text.php'; $a = new PDF2Text(); $a->setFilename('hello.pdf'); $a->decodePDF(); echo $a->output();