PDF2Text::setFilename PHP代码示例

示例#1

26

显示文件

文件： PdfDocumentExtractor.php 项目： veronico12/lucene-silverstripe-plugin

 /**
  * @access private
  */
 protected static function pdf2text($filename)
 {
     if (!extension_loaded('zlib')) {
         return '';
     }
     $pdf = new PDF2Text();
     $pdf->setFilename($filename);
     try {
         $pdf->decodePDF();
     } catch (Exception $e) {
         return '';
     }
     $content = $pdf->output();
     if ($content == '') {
         // try with different multibyte setting
         $pdf->setUnicode(true);
         $pdf->decodePDF();
         $content = $pdf->output();
     }
     return $content;
 }

示例#2

0

显示文件

文件： UploaderMultiples.php 项目： CapsuleCorpIndonesia/martabak_revolution

 public function uploadfiles()
 {
     $data = array();
     //$tid = (isset($_GET['tid'])?addslashes($_GET['tid']):die('no ID'));
     $t = isset($_GET['t']) ? addslashes($_GET['t']) : die('no t');
     $dc = new InputFileModel();
     if (isset($_GET['files'])) {
         $error = false;
         $files = array();
         $uploaddir = $dc->upload_location;
         $arrSuc = array();
         $arrSucAsli = array();
         foreach ($_FILES as $file) {
             $f = new InputFileModel();
             $q = "INSERT INTO {$f->table_name} SET file_folder_id = '0',file_author = '" . Account::getMyID() . "'";
             global $db;
             $fid = $db->qid($q);
             $f->getByID($fid);
             if ($fid) {
                 $newname = $fid;
                 $f->file_url = basename($file['name']);
                 $ext = end(explode(".", $file['name']));
                 $f->file_ext = $ext;
                 $f->file_filename = $fid . "." . $ext;
                 $f->file_date = leap_mysqldate();
                 if (move_uploaded_file($file['tmp_name'], $uploaddir . $f->file_filename)) {
                     $files[] = $uploaddir . $file['name'];
                     $f->file_size = filesize($uploaddir . $f->file_filename);
                     if ($f->file_ext == "pdf") {
                         $a = new PDF2Text();
                         $a->setFilename($uploaddir . $f->file_filename);
                         $a->decodePDF();
                         $f->file_isi = preg_replace("/\r|\n/", " ", $a->output());
                         //the path to the PDF file
                         $strPDF = $uploaddir . $f->file_filename;
                         $thumb = $uploaddir . "thumbs/" . $fid . ".jpg";
                         exec("convert \"{$strPDF}[0]\" \"{$thumb}\"");
                     }
                     // if pdf
                     $arrSuc[] = $f->file_filename;
                     $arrSucAsli[] = "<div id='file_" . $fid . "_{$t}' class='mlt_item'>" . $f->file_url . " <i onclick=\"deleteFromList_{$t}('" . $f->file_filename . "');\" class='glyphicon glyphicon-remove'></i></div>";
                     $f->load = 1;
                     $f->save();
                     if (isset($_SESSION['target_id']['obj'])) {
                         $target = get_class($_SESSION['target_id']['obj']);
                     } else {
                         $target = "inputfile_unknown";
                     }
                     PortalFileLogger::save2log($uploaddir . $f->file_filename, $target, $f->file_url);
                 } else {
                     $error = true;
                 }
             }
         }
         $data = $error ? array('error' => 'There was an error uploading your files') : array('files' => $files, 'ftext' => implode(",", $arrSuc), 'ftextAsli' => implode(" ", $arrSucAsli));
     } else {
         $data = array('success' => 'Form was submitted', 'formData' => $_POST);
     }
     echo json_encode($data);
 }

示例#3

0

显示文件

文件： class.fileToStr.php 项目： t-web/doeqs_new

 private function pdfToText($filename)
 {
     //The PDF2Text class is HUGE. Magical black box. See file for citations
     require_once "class.pdf2text.php";
     $a = new PDF2Text();
     $a->setFilename($filename);
     $a->decodePDF();
     return $a->output();
 }

示例#4

0

显示文件

文件： nbPdfToTextCommand.php 项目： nubee/bee

 protected function execute(array $arguments = array(), array $options = array())
 {
     $file = $arguments['pdf-file'];
     $this->logLine(sprintf('Extracting text from pdf file: %s', $file), nbLogger::COMMENT);
     $pdf2text = new PDF2Text();
     $pdf2text->setFilename($file);
     $pdf2text->setUnicode(isset($options['unicode']));
     $pdf2text->decodePDF();
     $output = $pdf2text->output();
     file_put_contents(sprintf('%s.txt', $file), $output);
     $this->logLine('Text extracted!', nbLogger::COMMENT);
 }

示例#5

0

显示文件

文件： PdfDocumentExtractor.php 项目： helpfulrobot/asecondwill-lucene

 /**
  * @access private
  */
 protected static function pdf2text($filename)
 {
     $pdf = new PDF2Text();
     $pdf->setFilename($filename);
     $pdf->decodePDF();
     $content = $pdf->output();
     if ($content == '') {
         // try with different multibyte setting
         $pdf->setUnicode(true);
         $pdf->decodePDF();
         $content = $pdf->output();
     }
     return $content;
 }

示例#6

0

显示文件

文件： indexado.php 项目： Ocjara22/Indexador-de-documentos

$id = "";
if (isset($_GET['id'])) {
    $id = $_GET['id'];
}
$sql = " SELECT * \n\t\t\t FROM DOCUMENTO D\n\t\t\t INNER JOIN TIPO T ON (D.TIPO_ID = T.TIPO_ID)\n\t\t\t INNER JOIN DEPARTAMENTO DE ON (D.DEPA_ID = DE.DEPA_ID)\n\t\t\t WHERE DOCU_ID = {$id} ";
$params = array();
$options = array("Scrollable" => SQLSRV_CURSOR_KEYSET);
$resultado = sqlsrv_query($conn, $sql, $params, $options);
$fila = sqlsrv_fetch_object($resultado);
$total = sqlsrv_num_rows($resultado);
//si es pdf
if ($fila->TIPO_ID == 1) {
    require_once "../../lib/pdf2text.php";
    $ruta = $fila->DOCU_LINK;
    $a = new PDF2Text();
    $a->setFilename($ruta);
    $a->decodePDF();
    $contentIndexed .= $a->output() . "\n\n";
    $contentIndexed = ereg_replace(chr(12), "\r\n{new_page}\r\n", ereg_replace("\n", "\r\n", $contentIndexed));
    // Enumeramos los patrones por el número de página que corresponde.
    $arrContent = explode("\r\n{new_page}\r\n", $contentIndexed);
    $arrayPaginas = $arrContent;
    // Lo guardamos porque luego lo usaremos para guardar el archivo o en base de datos.
    $contentIndexed2 = "{1}";
    $xCount = 2;
    foreach ($arrContent as $page) {
        $contentIndexed2 .= trim($page) . "\r\n{" . $xCount . "}\r\n";
        $xCount++;
    }
    $contentIndexed = $contentIndexed2;
    // Recorremos las líneas del archivo y eliminamos las que tengan básicamente números por no ser interesantes ya que normalmente

示例#7

0

显示文件

文件： DMWeb.php 项目： CapsuleCorpIndonesia/es-teler-baru-suka

 public function uploadfiles()
 {
     $data = array();
     $tid = isset($_GET['tid']) ? addslashes($_GET['tid']) : die('no ID');
     $t = isset($_GET['t']) ? addslashes($_GET['t']) : die('no t');
     $dc = new DocumentsPortal();
     if (isset($_GET['files'])) {
         $error_size = 0;
         $error = false;
         $files = array();
         $uploaddir = $dc->upload_location;
         foreach ($_FILES as $file) {
             $f = new DocumentsPortal();
             $q = "INSERT INTO {$f->table_name} SET file_folder_id = '{$tid}',file_author = '" . Account::getMyID() . "'";
             global $db;
             $fid = $db->qid($q);
             $f->getByID($fid);
             if ($fid) {
                 $newname = $fid;
                 $f->file_url = basename($file['name']);
                 $ext = end(explode(".", $file['name']));
                 $f->file_ext = $ext;
                 $f->file_filename = $fid . "." . $ext;
                 $f->file_date = leap_mysqldate();
                 // if pdf
                 //cek size
                 $size_awal = $file['size'];
                 if (move_uploaded_file($file['tmp_name'], $uploaddir . $f->file_filename)) {
                     $files[] = $uploaddir . $file['name'];
                     $f->file_size = filesize($uploaddir . $f->file_filename);
                     //size akhir
                     $size_akhir = $f->file_size;
                     //cek apakah tengah2 gagal
                     if ($size_awal != $size_akhir) {
                         //hapus file corrupt
                         unlink($uploaddir . $f->file_filename);
                         //delete file corrupt di db
                         $f->delete($fid);
                         $error = true;
                         $error_size = 1;
                     } else {
                         if ($f->file_ext == "pdf") {
                             $a = new PDF2Text();
                             $a->setFilename($uploaddir . $f->file_filename);
                             $a->decodePDF();
                             $f->file_isi = preg_replace("/\r|\n/", " ", $a->output());
                             //the path to the PDF file
                             $strPDF = $uploaddir . $f->file_filename;
                             $thumb = $uploaddir . "thumbs/" . $fid . ".jpg";
                             putenv("PATH=/usr/local/bin:/usr/bin:/bin");
                             putenv("MAGICK_TEMPDIR=/tmp");
                             exec("convert \"{$strPDF}[0]\" \"{$thumb}\"");
                         }
                         $f->load = 1;
                         $f->save();
                         //log
                         BLogger::addLog("file_id = {$fid}, filename = " . $f->file_url, "upload_file");
                     }
                     //else cek size
                 } else {
                     $error = true;
                     $f->delete($fid);
                 }
             }
         }
         $data = $error ? array('error' => 'There was an error uploading your files') : array('files' => $files);
     } else {
         $data = array('success' => 'Form was submitted', 'formData' => $_POST);
     }
     if ($error_size) {
         $data['err_size'] = 1;
     }
     //$data['fil'] = $_FILES;
     echo json_encode($data);
 }

示例#8

0

显示文件

文件： sfbrowser.php 项目： elgodmaster/soccer2

             foreach ($entries as $entry) {
                 $sDta .= $entry->getName() . "\\r\\n";
             }
             // getName | getPackedSize | getUnpackedSize
             @rar_close($rar_file);
             $sData = '"type":"archive","text":"' . $sDta . ' "';
             $sMsg .= "contentsSucces";
         } else {
             $sMsg .= "contentsFail";
         }
     }
 } else {
     if ($sExt == "pdf") {
         include 'class.pdf2text.php';
         $oPdf = new PDF2Text();
         $oPdf->setFilename($sSFile);
         $oPdf->decodePDF();
         $sCnt = str_replace(array("\n", "\r", "\t"), array("\\n", "\\n", ""), substr($oPdf->output(), 0, PREVIEW_BYTES));
         $sData = '"type":"ascii","text":"' . $sCnt . ' "';
         $sMsg .= "contentsSucces";
     } else {
         if ($sExt == "doc") {
             //////////////////////////////
             // does not seem to be possible
             //////////////////////////////
         } else {
             $oHnd = fopen($sSFile, "r");
             $sCnt = preg_replace(array("/\n/", "/\r/", "/\t/"), array("\\n", "\\r", "\\t"), addslashes(fread($oHnd, 600)));
             fclose($oHnd);
             $sData = '"type":"ascii","text":"' . $sCnt . '"';
             $sMsg .= "contentsSucces";

示例#9

0

显示文件

文件： Multimedia.class.php 项目： naturalsciences/Darwin

 public function save(Doctrine_Connection $conn = null)
 {
     if ($this->isNew()) {
         $this->changeUri();
         if ($this->getMimeType() == 'application/pdf') {
             $pdf = new PDF2Text();
             $pdf->setFilename($this->getFullURI());
             $content = '';
             try {
                 $pdf->decodePDF();
                 $content = $pdf->output();
                 if ($content == '') {
                     // try with different multibyte setting
                     $pdf->setUnicode(true);
                     $pdf->decodePDF();
                     $content = $pdf->output();
                 }
             } catch (Exception $e) {
             }
             if ($content != '') {
                 $this->setExtractedInfo(utf8_encode($content));
             }
         }
         if ($this->getMimeType() == 'text/plain') {
             $content = file_get_contents($this->getFullURI());
             $this->setExtractedInfo(utf8_encode($content));
         }
     }
     parent::save($conn);
 }

示例#10

0

显示文件

文件： ReadFile.php 项目： jessesiu/GigaDBV3

 /**
  * Read PDf content
  * 
  * @param string $file_name
  * @return string
  */
 public static function readPdfFile($file_name)
 {
     $a = new PDF2Text();
     $a->setFilename(self::TEMP_FOLDER . $file_name);
     $a->decodePDF();
     return $a->output();
 }

示例#11

0

显示文件

文件： pdf_final.php 项目： httvncoder/TF-IDF--Information_RetrievalO-K-means-

$username = "";
// Mysql username
$password = "";
// Mysql password
$db_name = "";
// Database name
$tbl_name = "";
// Table name
$file_name = $_FILES['pdf']['name'];
$filename = explode(".", $file_name);
$column_name = $filename[0];
ini_set('max_execution_time', 300);
include 'pdf2text.php';
include 'stem_code.php';
$a = new PDF2Text();
$a->setFilename($file_name);
$a->decodePDF();
$stringput = $a->output();
$stringput = strtolower($stringput);
$allword_count = explode(" ", $stringput);
$wordarray = array();
$wordarrays = array();
foreach ($allword_count as $key => $val) {
    array_push($wordarrays, $val);
}
/*Stemming Code*/
foreach ($wordarrays as $key => $word) {
    $stem = PorterStemmer::Stem($word);
    array_push($wordarray, $stem);
}
$stopwords = array('on', 'us', 'xc', 'be', 'by', 'at', 'but', 'e', 'i', 'be', 'by', 'g', 'j', 'and', 'is', 'f', 'are', 'p', 'can', 'each', 'we', 'x', 'in', 'b', 'as', 'c', 'd', 'for', 'also', 'an', 'all', '-', 'a', 'any', 'in', 'the', 'thesis', 'to', 'of', 'dammalapati');

示例#12

0

显示文件

文件： ci_ver_normas.php 项目： andreagranados/designa

 function evt__cuadro__procesar($datos)
 {
     //$this->dep('datos')->tabla('norma')->cargar($datos);
     //$datos2=$this->dep('datos')->tabla('norma')->get();
     //$fp_imagen = $this->dep('datos')->tabla('norma')->get_blob('pdf');
     $a = new PDF2Text();
     $path = $_SERVER['DOCUMENT_ROOT'] . "/68.pdf";
     $a->setFilename($path);
     $a->decodePDF();
     $texto = $a->output();
     //  $z=strlen($texto);
     $prueba = var_export($texto, true) . PHP_EOL;
     //             $prueba=str_replace(".","",$prueba);
     //             $prueba=str_replace(":","",$prueba);
     //             $prueba=str_replace('\"',"",$prueba);
     //             $prueba=str_replace('ó',"",$prueba);
     $buscar = array('á', 'é', 'í', 'ó', 'ú', 'ñ', 'ü');
     $remplzr = array('a', 'e', 'i', 'o', 'u', 'n', 'u');
     $prueba2 = str_replace($buscar, $remplzr, $prueba);
     //echo (substr($prueba2, 1000,2000));exit();
     $buscar = array('/[^A-z0-9-<>]/', '/[-]+/', '/<[^>]*>/');
     //$buscar = array('/[a-zA-Z0-9]/', '/[-]+/', '/<[^>]*>/');
     $remplzr = array(' ', '-', '');
     $prueba2 = preg_replace($buscar, $remplzr, $prueba);
     echo substr($prueba2, 0, 1000);
     exit;
     echo substr($prueba3, 0, 1000);
     exit;
     //$prueba=str_replace('\á',"a",$prueba);
     //$prueba=str_replace('ó',"o",$prueba);
     //            $a = array('.','á','ó');
     //            $b = array('','a','o');
     //            echo str_replace($a,$b,$prueba)  ;
     //$prueba=ereg_replace("[óòôõºö]","o",$prueba);
     echo substr($prueba3, 1000, 2000);
     exit;
     //$sql="update norma  set palabras_clave='".$prueba."' where id_norma=211";
     print_r($sql);
     toba::db('designa')->consultar($sql);
     //print_r($z);exit();
     $z = strlen($texto);
     //echo (substr($texto,0,10));
     //echo strrchr($texto,"a");
     //exit;
     print_r($z);
     exit;
     //$sql="update norma  set palabras_clave='".$a->output()."' where id_norma=211";
     //toba::db('designa')->consultar($sql);
     $texto2 = str_replace("ó", "o", $texto);
     exit;
     print_r($z);
     exit;
     $texto = str_replace("\\'", "", $texto);
     //eliminamos las comillas simples
     $texto = str_replace('\\"', "", $texto);
     //eliminamos las comillas dobles
     $texto = str_replace('\\“', "", $texto);
     $texto2 = str_replace("ó", "o", $texto);
     $texto = str_replace(array('á', 'à', 'ä', 'â', 'ª', 'Á', 'À', 'Â', 'Ä'), array('a', 'a', 'a', 'a', 'a', 'A', 'A', 'A', 'A'), $texto);
     $texto = str_replace(array('ó', 'ò', 'ö', 'ô', 'Ó', 'Ò', 'Ö', 'Ô'), array('o', 'o', 'o', 'o', 'O', 'O', 'O', 'O'), $texto);
     if (isset($fp_imagen)) {
         $temp_nombre = md5(uniqid(time())) . '.pdf';
         $temp_archivo = toba::proyecto()->get_www_temp($temp_nombre);
         $temp_fp = fopen($temp_archivo['path'], 'w');
         stream_copy_to_stream($fp_imagen, $temp_fp);
         $a = new PDF2Text();
     }
 }

示例#13

0

显示文件

文件： Uploader.php 项目： CapsuleCorpIndonesia/apel-enak

 public function uploadfiles()
 {
     //apakah ada file
     $adafile = isset($_GET['adafile']) ? $_GET['adafile'] : '';
     //cek if ada file
     if ($adafile) {
         $if = new InputFileModel();
         $uploadpath = $if->upload_location;
         if (file_exists($uploadpath . $adafile)) {
             //delete old file
             if (unlink($uploadpath . $adafile)) {
                 $arrf = $if->getWhere("file_filename = '{$adafile}' LIMIT 0,1");
                 if (count($arrf) > 0) {
                     $if->delete($arrf[0]->file_id);
                 }
                 //delete from log
                 PortalFileLogger::deleteFileLog($uploadpath . $adafile);
                 /*if(file_exists(_PHOTOPATH.'thumbnail/' . $adafile))
                   {
                       //delete old thumb file
                       unlink(_PHOTOPATH.'thumbnail/' . $adafile);
                   }*/
             }
         }
     }
     $data = array();
     //$tid = (isset($_GET['tid'])?addslashes($_GET['tid']):die('no ID'));
     $t = isset($_GET['t']) ? addslashes($_GET['t']) : die('no t');
     $data['files'] = $_GET['files'];
     $data['bool'] = 0;
     $dc = new InputFileModel();
     if (isset($_GET['files'])) {
         $error = false;
         $files = array();
         $uploaddir = $dc->upload_location;
         foreach ($_FILES as $file) {
             $f = new InputFileModel();
             $q = "INSERT INTO {$f->table_name} SET file_folder_id = '0',file_author = '" . Account::getMyID() . "'";
             global $db;
             $fid = $db->qid($q);
             $f->getByID($fid);
             if ($fid) {
                 $newname = $fid;
                 $f->file_url = basename($file['name']);
                 $ext = end(explode(".", $file['name']));
                 $f->file_ext = $ext;
                 $f->file_filename = $fid . "." . $ext;
                 $f->file_date = leap_mysqldate();
                 // if pdf
                 if (move_uploaded_file($file['tmp_name'], $uploaddir . $f->file_filename)) {
                     $files[] = $uploaddir . $file['name'];
                     $f->file_size = filesize($uploaddir . $f->file_filename);
                     if ($f->file_ext == "pdf") {
                         $a = new PDF2Text();
                         $a->setFilename($uploaddir . $f->file_filename);
                         $a->decodePDF();
                         $f->file_isi = preg_replace("/\r|\n/", " ", $a->output());
                         //the path to the PDF file
                         $strPDF = $uploaddir . $f->file_filename;
                         $thumb = $uploaddir . "thumbs/" . $fid . ".jpg";
                         exec("convert \"{$strPDF}[0]\" \"{$thumb}\"");
                     }
                     $f->load = 1;
                     $data['bool'] = $f->save();
                     $data['isImage'] = Leap\View\InputFile::isImage($f->file_filename);
                     $data['filename'] = $f->file_filename;
                     if (isset($_SESSION['target_id']['obj'])) {
                         $target = get_class($_SESSION['target_id']['obj']);
                     } else {
                         $target = "inputfile_unknown";
                     }
                     PortalFileLogger::save2log($uploaddir . $f->file_filename, $target, $f->file_url);
                     die(json_encode($data));
                 } else {
                     $error = true;
                 }
             }
         }
         $data = $error ? array('error' => 'There was an error uploading your files') : array('files' => $files);
     } else {
         $data = array('success' => 'Form was submitted', 'formData' => $_POST);
     }
     echo json_encode($data);
 }

示例#14

0

显示文件

文件： class.indexer.php 项目： vossavant/phoenix

 /**
  * Index posts stored in $this->unindexedPosts
  *
  * @since 1.0
  */
 function index()
 {
     global $wp_filesystem, $searchwp;
     $this->check_for_parallel_indexer();
     if (is_array($this->unindexedPosts) && count($this->unindexedPosts)) {
         do_action('searchwp_indexer_pre_chunk', $this->unindexedPosts);
         // all of the IDs to index have not been indexed, proceed with indexing them
         while (($unindexedPost = current($this->unindexedPosts)) !== false) {
             $this->setPost($unindexedPost);
             // log the attempt
             $count = get_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts', true);
             if ($count == false) {
                 $count = 0;
             } else {
                 $count = intval($count);
             }
             $count++;
             // increment our counter to prevent the indexer getting stuck on a gigantic PDF
             update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts', $count);
             do_action('searchwp_log', 'Attempt ' . $count . ' at indexing ' . $this->post->ID);
             // if we breached the maximum number of attempts, flag it to skip
             $this->maxAttemptsToIndex = absint(apply_filters('searchwp_max_index_attempts', $this->maxAttemptsToIndex));
             if (intval($count) > $this->maxAttemptsToIndex) {
                 do_action('searchwp_log', 'Too many indexing attempts on ' . $this->post->ID . ' (' . $this->maxAttemptsToIndex . ') - skipping');
                 // flag it to be skipped
                 update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip', true);
             } else {
                 // check to see if we're running a second pass on terms
                 $termCache = get_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'terms', true);
                 if (!is_array($termCache)) {
                     do_action('searchwp_index_post', $this->post);
                     // if it's an attachment, we want the permalink
                     $slug = $this->post->post_type == 'attachment' ? str_replace(get_bloginfo('wpurl'), '', get_permalink($this->post->ID)) : '';
                     // we allow users to override the extracted content from documents, if they have done so this flag is set
                     $skipDocProcessing = get_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip_doc_processing', true);
                     $omitDocProcessing = apply_filters('searchwp_omit_document_processing', false);
                     if (!$skipDocProcessing && !$omitDocProcessing) {
                         // if it's a PDF we need to populate our Custom Field with it's content
                         if ($this->post->post_mime_type == 'application/pdf') {
                             // grab the filename of the PDF
                             $filename = get_attached_file($this->post->ID);
                             // allow for external PDF content extraction
                             $pdfContent = apply_filters('searchwp_external_pdf_processing', '', $filename, $this->post->ID);
                             // only try to extract content if the external processing has not provided the PDF content we're looking for
                             if (empty($pdfContent)) {
                                 // PdfParser runs only on 5.3+ but SearchWP runs on 5.2+
                                 if (version_compare(PHP_VERSION, '5.3', '>=')) {
                                     include_once $searchwp->dir . '/vendor/pdfparser-bootloader.php';
                                 }
                                 // a wrapper class was conditionally included if we're running PHP 5.3+ so let's try that
                                 if (class_exists('SearchWP_PdfParser')) {
                                     // try PdfParser first
                                     $parser = new SearchWP_PdfParser();
                                     $parser = $parser->init();
                                     $pdf = $parser->parseFile($filename);
                                     $text = $pdf->getText();
                                     $pdfContent = trim(str_replace("\n", " ", $text));
                                 }
                                 // try PDF2Text
                                 if (empty($pdfContent)) {
                                     if (!class_exists('PDF2Text')) {
                                         include_once $searchwp->dir . '/includes/class.pdf2text.php';
                                     }
                                     $pdfParser = new PDF2Text();
                                     $pdfParser->setFilename($filename);
                                     $pdfParser->decodePDF();
                                     $pdfContent = $pdfParser->output();
                                     $pdfContent = trim(str_replace("\n", " ", $pdfContent));
                                 }
                                 // check to see if the first pass produced nothing or concatenated strings
                                 $fullContentLength = strlen($pdfContent);
                                 $numberOfSpaces = substr_count($pdfContent, ' ');
                                 if (empty($pdfContent) || $numberOfSpaces / $fullContentLength * 100 < 10) {
                                     WP_Filesystem();
                                     $filecontent = $wp_filesystem->exists($filename) ? $wp_filesystem->get_contents($filename) : '';
                                     if (false != strpos($filecontent, 'trailer')) {
                                         if (!class_exists('pdf_readstream')) {
                                             include_once $searchwp->dir . '/includes/class.pdfreadstream.php';
                                         }
                                         $pdfContent = '';
                                         $pdf = new pdf(get_attached_file($this->post->ID));
                                         $pages = $pdf->get_pages();
                                         if (!empty($pages)) {
                                             while (list($nr, $page) = each($pages)) {
                                                 $pdfContent .= $page->get_text();
                                             }
                                         }
                                     } else {
                                         // empty out the content so wacky concatenations are not indexed
                                         $pdfContent = '';
                                         // flag it for further review
                                         update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'review', true);
                                         update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip', true);
                                     }
                                 }
                             }
                             $pdfContent = trim($pdfContent);
                             if (!empty($pdfContent)) {
                                 $pdfContent = sanitize_text_field($pdfContent);
                                 delete_post_meta($this->post->ID, SEARCHWP_PREFIX . 'content');
                                 update_post_meta($this->post->ID, SEARCHWP_PREFIX . 'content', $pdfContent);
                             }
                         } elseif ($this->post->post_mime_type == 'text/plain') {
                             // if it's plain text, index it's content
                             WP_Filesystem();
                             $filename = get_attached_file($this->post->ID);
                             $textContent = $wp_filesystem->exists($filename) ? $wp_filesystem->get_contents($filename) : '';
                             $textContent = str_replace("\n", " ", $textContent);
                             if (!empty($textContent)) {
                                 $textContent = sanitize_text_field($textContent);
                                 update_post_meta($this->post->ID, SEARCHWP_PREFIX . 'content', $textContent);
                             }
                         } else {
                             // all other file types
                         }
                     }
                     $postTerms = array();
                     $postTerms['title'] = $this->indexTitle();
                     $postTerms['slug'] = $this->indexSlug(str_replace('/', ' ', $slug));
                     $postTerms['content'] = $this->indexContent();
                     $postTerms['excerpt'] = $this->indexExcerpt();
                     if (apply_filters('searchwp_index_comments', true)) {
                         $postTerms['comments'] = $this->indexComments();
                     }
                     // index taxonomies
                     $taxonomies = get_object_taxonomies($this->post->post_type);
                     if (!empty($taxonomies)) {
                         while (($taxonomy = current($taxonomies)) !== false) {
                             $terms = get_the_terms($this->post->ID, $taxonomy);
                             if (!empty($terms)) {
                                 $postTerms['taxonomy'][$taxonomy] = $this->indexTaxonomyTerms($taxonomy, $terms);
                             }
                             next($taxonomies);
                         }
                         reset($taxonomies);
                     }
                     // index custom fields
                     $customFields = apply_filters('searchwp_get_custom_fields', get_post_custom($this->post->ID), $this->post->ID);
                     if (!empty($customFields)) {
                         while (($customFieldValue = current($customFields)) !== false) {
                             $customFieldName = key($customFields);
                             // there are a few useless (when it comes to search) WordPress core custom fields, so let's exclude them by default
                             $omitWpMetadata = apply_filters('searchwp_omit_wp_metadata', array('_edit_lock', '_wp_page_template', '_edit_last', '_wp_old_slug'));
                             $excludedCustomFieldKeys = apply_filters('searchwp_excluded_custom_fields', array('_' . SEARCHWP_PREFIX . 'indexed', '_' . SEARCHWP_PREFIX . 'attempts', '_' . SEARCHWP_PREFIX . 'terms', '_' . SEARCHWP_PREFIX . 'last_index', '_' . SEARCHWP_PREFIX . 'skip', '_' . SEARCHWP_PREFIX . 'skip_doc_processing', '_' . SEARCHWP_PREFIX . 'review'));
                             // merge the two arrays of keys if possible
                             if (is_array($omitWpMetadata) && is_array($excludedCustomFieldKeys)) {
                                 $excluded_meta_keys = array_merge($omitWpMetadata, $excludedCustomFieldKeys);
                             } elseif (is_array($omitWpMetadata)) {
                                 $excluded_meta_keys = $omitWpMetadata;
                             } else {
                                 $excluded_meta_keys = $excludedCustomFieldKeys;
                             }
                             $excluded_meta_keys = is_array($excluded_meta_keys) ? array_unique($excluded_meta_keys) : array();
                             // allow developers to conditionally omit specific custom fields
                             $omit_this_custom_field = apply_filters("searchwp_omit_meta_key", false, $customFieldName, $this->post);
                             $omit_this_custom_field = apply_filters("searchwp_omit_meta_key_{$customFieldName}", $omit_this_custom_field, $this->post);
                             if (!in_array($customFieldName, $excluded_meta_keys) && !$omit_this_custom_field) {
                                 // allow devs to swap out their own content
                                 // e.g. parsing ACF Relationship fields (that store only post IDs) to actually retrieve that content at runtime
                                 $customFieldValue = apply_filters('searchwp_custom_fields', $customFieldValue, $customFieldName, $this->post);
                                 $customFieldValue = apply_filters("searchwp_custom_field_{$customFieldName}", $customFieldValue, $this->post);
                                 $postTerms['customfield'][$customFieldName] = $this->indexCustomField($customFieldName, $customFieldValue);
                             }
                             next($customFields);
                         }
                         reset($customFields);
                     }
                     // allow developer to store arbitrary information a la Custom Fields (without them actually being Custom Fields)
                     $extraMetadata = apply_filters("searchwp_extra_metadata", false, $this->post);
                     if ($extraMetadata) {
                         if (is_array($extraMetadata)) {
                             foreach ($extraMetadata as $extraMetadataKey => $extraMetadataValue) {
                                 // TODO: make sure there are no collisions?
                                 // while( isset( $postTerms['customfield'][$extraMetadataKey] ) ) {
                                 //    $extraMetadataKey .= '_';
                                 // }
                                 $postTerms['customfield'][$extraMetadataKey] = $this->indexCustomField($extraMetadataKey, $extraMetadataValue);
                             }
                         }
                     }
                     // we need to break out the terms from all of this content
                     $termCountBreakout = array();
                     if (is_array($postTerms) && count($postTerms)) {
                         foreach ($postTerms as $type => $terms) {
                             switch ($type) {
                                 case 'title':
                                 case 'slug':
                                 case 'content':
                                 case 'excerpt':
                                 case 'comments':
                                     if (is_array($terms) && count($terms)) {
                                         foreach ($terms as $term) {
                                             $termCountBreakout[$term['term']][$type] = $term['count'];
                                         }
                                     }
                                     break;
                                 case 'taxonomy':
                                 case 'customfield':
                                     if (is_array($terms) && count($terms)) {
                                         foreach ($terms as $name => $nameTerms) {
                                             if (is_array($nameTerms) && count($nameTerms)) {
                                                 foreach ($nameTerms as $nameTerm) {
                                                     $termCountBreakout[$nameTerm['term']][$type][$name] = $nameTerm['count'];
                                                 }
                                             }
                                         }
                                     }
                                     break;
                             }
                         }
                     }
                 } else {
                     $termCountBreakout = $termCache;
                     // if there was a term cache, this repeated processing doesn't count, so decrement it
                     delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts');
                     delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip');
                 }
                 // unless the term chunk limit says otherwise, we're going to flag this as being OK to log as indexed
                 $flagAsIndexed = true;
                 // we now have a multidimensional array of terms with counts per type in $termCountBreakout
                 // if the term count is huge, we need to split up this process so as to avoid
                 // hitting upper PHP execution time limits (term insertion is heavy), so we'll chunk the array of terms
                 $termChunkMax = 500;
                 // try to set a better default based on php.ini's memory_limit
                 $memoryLimit = ini_get('memory_limit');
                 if (preg_match('/^(\\d+)(.)$/', $memoryLimit, $matches)) {
                     if ($matches[2] == 'M') {
                         $termChunkMax = (int) $matches[1] * 15;
                         // 15 terms per MB RAM
                     } else {
                         // memory was set in K...
                         $termChunkMax = 100;
                     }
                 }
                 $termChunkLimit = apply_filters('searchwp_process_term_limit', $termChunkMax);
                 if (count($termCountBreakout) > $termChunkLimit) {
                     $acceptableTermCountBreakout = array_slice($termCountBreakout, 0, $termChunkLimit);
                     // if we haven't pulled all of the terms, we can't consider this post indexed...
                     if ($termChunkLimit < count($termCountBreakout) - 1) {
                         $flagAsIndexed = false;
                         // save the term breakout so we don't have to do it again
                         $remainingTerms = array_slice($termCountBreakout, $termChunkLimit + 1);
                         update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'terms', $remainingTerms);
                     }
                     // set the acceptable breakout as the main breakout
                     $termCountBreakout = $acceptableTermCountBreakout;
                 }
                 $this->recordPostTerms($termCountBreakout);
                 unset($termCountBreakout);
                 // flag the post as indexed
                 if ($flagAsIndexed) {
                     // clean up our stored term array if necessary
                     if ($termCache) {
                         delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'terms');
                     }
                     // clean up the attempt counter
                     delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts');
                     delete_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip');
                     update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'indexed', true);
                     update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'last_index', current_time('timestamp'));
                 }
             }
             next($this->unindexedPosts);
         }
         reset($this->unindexedPosts);
         do_action('searchwp_indexer_post_chunk');
     }
 }

示例#15

0

显示文件

文件： Oupodcast_Provider.php 项目： iet-ou/open-media-player-core

 /**
  * @deprecated
  *
  * Fallback to pure PHP library on IT-hosting (#1409).
  * @return string
  */
 protected function _pdf2text($pdf_file)
 {
     # TODO: error, PDF2Text echoes to screen - messes up header() call.
     require_once __DIR__ . '/../class.pdf2text.php';
     $pdf = new PDF2Text();
     $pdf->setFilename($pdf_file);
     $pdf->decodePDF();
     $result = $pdf->output();
     // Fix floating 's' 'th' ',' etc - English locale-specific.
     $result = preg_replace('#(\\w)\\ss\\s#', '$1s ', $result);
     $result = preg_replace('#\\sth\\s\\w#', ' th$1', $result);
     $result = preg_replace('#\\s([,\\.;])#', '$1', $result);
     // Concatenate lines starting with lowercase letters.
     $result = preg_replace('#\\s+([a-z])#', ' $1', $result);
     $result = str_replace('  ', ' ', $result);
     // A crude conversion to HTML.
     #$html = preg_replace('#([\w\. ])\n(\r)?\n(\w)#ms', '$1<br>$2', $result);
     $html = str_replace(array("\n\n\n", "\n\n", "\n"), '<br>', $result);
     return $html;
 }

示例#16

0

显示文件

文件： sfbrowser.php 项目： holtzermann17/drupal_planetary

 function __construct()
 {
     //
     parent::__construct();
     //
     if ($this->sAction) {
         switch ($this->sAction) {
             case "fileList":
                 // retreive file list
                 $sDir = isset($_POST["folder"]) ? $_POST["folder"] : "/";
                 $aFiles = array();
                 $listing = tntbase_get_path_contents($sDir, true);
                 foreach ($listing as $file => $prop) {
                     $oFNfo = $this->getFileInfo(array("file" => $file, "type" => $prop));
                     $aFiles[] = $oFNfo;
                 }
                 $this->aReturn['msg'] .= "fileListing";
                 $this->aReturn['data'] = $aFiles;
                 break;
             case "duplicate":
                 // duplicate file
                 $sCRegx = "/(?<=(_copy))([0-9])+(?=(\\.))/";
                 $sNRegx = "/(\\.)(?=[A-Za-z0-9]+\$)/";
                 $oMtch = preg_match($sCRegx, $this->sSFile, $aMatches);
                 if (count($aMatches) > 0) {
                     $sNewFile = preg_replace($sCRegx, intval($aMatches[0]) + 1, $this->sSFile);
                 } else {
                     $sNewFile = preg_replace($sNRegx, "_copy0.", $this->sSFile);
                 }
                 while (file_exists($sNewFile)) {
                     // $$ there could be a quicker way
                     $oMtch = preg_match($sCRegx, $sNewFile, $aMatches);
                     $sNewFile = preg_replace($sCRegx, intval($aMatches[0]) + 1, $sNewFile);
                 }
                 if (copy($this->sSFile, $sNewFile)) {
                     $oFNfo = $this->fileInfo($sNewFile);
                     $this->aReturn['data'] = $oFNfo;
                     $this->aReturn['msg'] .= "duplicated#" . $sNewFile;
                 } else {
                     $this->aReturn['error'] = "notduplicated#" . $sNewFile;
                 }
                 break;
             case "swfUpload":
                 // swf file upload
                 if ($this->sAction == "swfUpload") {
                     foreach ($_GET as $k => $v) {
                         $_POST[$k] = $v;
                     }
                 }
             case "upload":
                 // file upload
                 $sElName = $this->sAction == "upload" ? "fileToUpload" : "Filedata";
                 if (!empty($_FILES[$sElName]["error"])) {
                     switch ($_FILES[$sElName]["error"]) {
                         case "1":
                             $sErr = "uploadErr1";
                             break;
                         case "2":
                             $sErr = "uploadErr2";
                             break;
                         case "3":
                             $sErr = "uploadErr3";
                             break;
                         case "4":
                             $sErr = "uploadErr4";
                             break;
                         case "6":
                             $sErr = "uploadErr6";
                             break;
                         case "7":
                             $sErr = "uploadErr7";
                             break;
                         case "8":
                             $sErr = "uploadErr8";
                             break;
                         default:
                             $sErr = "uploadErr";
                     }
                 } else {
                     if (empty($_FILES[$sElName]["tmp_name"]) || $_FILES[$sElName]["tmp_name"] == "none") {
                         $this->aReturn['error'] = "No file was uploaded..";
                     } else {
                         $sFolder = $_POST["folder"];
                         $this->aReturn['msg'] .= "sFolder_" . $sFolder;
                         $sPath = $sFolder;
                         $sDeny = $_POST["deny"];
                         $sAllow = $_POST["allow"];
                         $sResize = $_POST["resize"];
                         $oFile = $_FILES[$sElName];
                         $sFile = $oFile["name"];
                         $sMime = array_pop(preg_split("/\\./", $sFile));
                         //mime_content_type($sDir.$file); //$oFile["type"]; //
                         //
                         $iRpt = 1;
                         $sFileTo = $sPath . $oFile["name"];
                         while (file_exists($sFileTo)) {
                             $aFile = explode(".", $oFile["name"]);
                             $aFile[0] .= "_" . $iRpt++;
                             $sFile = implode(".", $aFile);
                             $sFileTo = $sPath . $sFile;
                         }
                         $sFileTo = $this->sConnBse . $sFileTo;
                         move_uploaded_file($oFile["tmp_name"], $sFileTo);
                         $oFNfo = $this->fileInfo($sFileTo);
                         $bAllow = $sAllow == "";
                         $sFileExt = array_pop(explode(".", $sFile));
                         if ($oFNfo) {
                             $this->aReturn['msg'] .= $iRpt === 1 ? 'fileUploaded' : 'fileExistsrenamed';
                             // check if file is allowed in this session $$$$$$todo: check SFB_DENY
                             foreach (explode("|", $sAllow) as $sAllowExt) {
                                 if ($sAllowExt == $sFileExt) {
                                     $bAllow = true;
                                     break;
                                 }
                             }
                             foreach (explode("|", $sDeny) as $sDenyExt) {
                                 if ($sDenyExt == $sFileExt) {
                                     $bAllow = false;
                                     break;
                                 }
                             }
                         } else {
                             $bAllow = false;
                         }
                         if (!$bAllow) {
                             $this->aReturn['error'] = "uploadNotallowed#" . $sFileExt;
                             @unlink($sFileTo);
                         } else {
                             if ($sResize && $sResize != "null" && $sResize != "undefined" && ($sMime == "jpeg" || $sMime == "jpg")) {
                                 $aResize = explode(",", $sResize);
                                 $iToW = $aResize[0];
                                 $iToH = $aResize[1];
                                 list($iW, $iH) = getimagesize($sFileTo);
                                 $fXrs = $iToW / $iW;
                                 $fYrs = $iToH / $iH;
                                 if (false) {
                                     //just resize
                                     $fRsz = min($fXrs, $fYrs);
                                     if ($fRsz < 1) {
                                         $iNW = intval($iW * $fRsz);
                                         $iNH = intval($iH * $fRsz);
                                         $oImgN = imagecreatetruecolor($iNW, $iNH);
                                         $oImg = imagecreatefromjpeg($sFileTo);
                                         imagecopyresampled($oImgN, $oImg, 0, 0, 0, 0, $iNW, $iNH, $iW, $iH);
                                         imagejpeg($oImgN, $sFileTo);
                                     }
                                 } else {
                                     // crop after resize
                                     $fRsz = max($fXrs, $fYrs);
                                     //if ($fRsz<1) {
                                     if ($fXrs < 1 || $fYrs < 1) {
                                         $iNW = intval($iW * $fRsz);
                                         $iNH = intval($iH * $fRsz);
                                         $iFrX = $iNW > $iToW ? ($iNW - $iToW) / 2 : 0;
                                         $iFrY = $iNH > $iToH ? ($iNH - $iToH) / 2 : 0;
                                         $iFrW = $iNW > $iToW ? $iToW * (1 / $fRsz) : $iW;
                                         $iFrH = $iNH > $iToH ? $iToH * (1 / $fRsz) : $iH;
                                         $oImgN = imagecreatetruecolor($iToW, $iToH);
                                         $oImg = imagecreatefromjpeg($sFileTo);
                                         imagecopyresampled($oImgN, $oImg, 0, 0, $iFrX, $iFrY, $iToW, $iToH, $iFrW, $iFrH);
                                         imagejpeg($oImgN, $sFileTo);
                                     }
                                 }
                                 $oFNfo = $this->fileInfo($sFileTo);
                             }
                             $this->aReturn['data'] = $oFNfo;
                         }
                     }
                 }
                 break;
             case "delete":
                 // file delete
                 if (count($_POST) != 3 || !isset($_POST["folder"]) || !isset($_POST["file"])) {
                     exit("ku ka");
                 }
                 if (is_file($this->sSFile)) {
                     if (@unlink($this->sSFile)) {
                         $this->aReturn['msg'] .= "fileDeleted";
                     } else {
                         $this->aReturn['error'] .= "fileNotdeleted";
                     }
                 } else {
                     if (@rmdir($this->sSFile)) {
                         $this->aReturn['msg'] .= "folderDeleted";
                     } else {
                         $this->aReturn['error'] .= "folderNotdeleted";
                     }
                 }
                 break;
             case "download":
                 // file force download
                 $sZeFile = $this->sConnBse . $this->sSFile;
                 if (file_exists($sZeFile)) {
                     ob_start();
                     $sType = "application/octet-stream";
                     header("Cache-Control: public, must-revalidate");
                     header("Pragma: hack");
                     header("Content-Type: " . $this->sSFile);
                     header("Content-Length: " . (string) filesize($sZeFile));
                     header('Content-Disposition: attachment; filename="' . array_pop(explode("/", $sZeFile)) . '"');
                     header("Content-Transfer-Encoding: binary\n");
                     ob_end_clean();
                     readfile($sZeFile);
                     exit;
                 }
                 break;
             case "read":
                 // read txt file contents
                 $sExt = strtolower(array_pop(explode('.', $this->sSFile)));
                 //
                 // install extensions and add to php.ini
                 // - extension=php_zip.dll
                 if ($sExt == "zip") {
                     $sDta = "";
                     if (!function_exists("zip_open")) {
                         $sErr .= "php_zip not installed or enabled";
                     } else {
                         if ($zip = @zip_open(getcwd() . "/" . $this->sSFile)) {
                             //
                             while ($zip_entry = @zip_read($zip)) {
                                 $sDta .= @zip_entry_name($zip_entry) . "\\r\\n";
                             }
                             // zip_entry_filesize | zip_entry_compressedsize | zip_entry_compressionmethod
                             @zip_close($zip);
                             $this->aReturn['data'] = array('type' => 'archive', 'text' => $sDta);
                         }
                     }
                 } else {
                     if ($sExt == "rar") {
                         // - extension=php_rar.dll
                         if (!function_exists("rar_open")) {
                             $sMsg .= "php_rar not installed or enabled";
                         } else {
                             if ($rar_file = @rar_open(getcwd() . "/" . $this->sSFile)) {
                                 $entries = @rar_list($rar_file);
                                 foreach ($entries as $entry) {
                                     $sDta .= $entry->getName() . "\\r\\n";
                                 }
                                 // getName | getPackedSize | getUnpackedSize
                                 @rar_close($rar_file);
                                 $this->aReturn['data'] = array('type' => 'archive', 'text' => $sDta);
                             }
                         }
                     } else {
                         if ($sExt == "pdf") {
                             include 'class.pdf2text.php';
                             $oPdf = new PDF2Text();
                             $oPdf->setFilename($this->sSFile);
                             $oPdf->decodePDF();
                             $sCnt = str_replace(array("\n", "\r", "\t"), array("\\n", "\\n", ""), substr($oPdf->output(), 0, PREVIEW_BYTES));
                             $this->aReturn['data'] = array('type' => 'ascii', 'text' => $sCnt);
                         } else {
                             if ($sExt == "doc") {
                                 //////////////////////////////
                                 // does not seem to be possible
                                 //////////////////////////////
                             } else {
                                 $oHnd = fopen($this->sSFile, "r");
                                 $sCnt = preg_replace(array("/\n/", "/\r/", "/\t/"), array("\\n", "\\r", "\\t"), addslashes(fread($oHnd, 600)));
                                 fclose($oHnd);
                                 $this->aReturn['data'] = array('type' => 'ascii', 'text' => $sCnt);
                             }
                         }
                     }
                 }
                 $this->aReturn['msg'] .= count($this->aReturn['data']) ? 'contentsSucces' : 'contentsFail';
                 break;
             case "rename":
                 // rename file
                 if (isset($_POST["file"]) && isset($_POST["nfile"])) {
                     $sFile = $_POST["file"];
                     $sNFile = $_POST["nfile"];
                     $sFileExt = array_pop(preg_split("/\\./", $sFile));
                     $sNFileExt = array_pop(preg_split("/\\./", $sNFile));
                     $sNSFile = str_replace($sFile, $sNFile, $this->sSFile);
                     if (@filetype($this->sSFile) == "file" && $sFileExt != $sNFileExt) {
                         $this->aReturn['error'] .= "filenameNoext";
                         //						} else if (!preg_match("/^\w+(\.\w+)*$/",$sNFile)) {
                     } else {
                         if (!preg_match('=^[^/?*;:{}\\\\]+\\.[^/?*;:{}\\\\]+$=', $sNFile)) {
                             $this->aReturn['error'] .= "filenamInvalid";
                         } else {
                             if ($sFile == $sNFile) {
                                 $this->aReturn['msg'] .= "filenameNochange";
                             } else {
                                 if ($sNFile == "") {
                                     $this->aReturn['error'] .= "filenameNothing";
                                 } else {
                                     if (file_exists($sNSFile)) {
                                         $this->aReturn['error'] .= "filenameExists";
                                     } else {
                                         if (@rename($this->sSFile, $sNSFile)) {
                                             $this->aReturn['msg'] .= "filenameSucces";
                                         } else {
                                             $this->aReturn['error'] .= "filenameFailed";
                                         }
                                     }
                                 }
                             }
                         }
                     }
                 }
                 break;
             case "addFolder":
                 // add folder
                 if (isset($_POST["folder"])) {
                     $sFolderName = isset($_POST["foldername"]) ? $_POST["foldername"] : "new folder";
                     $iRpt = 1;
                     $sFolder = $this->sConnBse . $_POST["folder"] . $sFolderName;
                     while (file_exists($sFolder)) {
                         $sFolder = $this->sConnBse . $_POST["folder"] . $sFolderName . $iRpt++;
                     }
                     if (mkdir($sFolder)) {
                         $this->aReturn['msg'] .= "folderCreated";
                         $oFNfo = $this->fileInfo($sFolder);
                         if ($oFNfo) {
                             $this->aReturn['data'] = $oFNfo;
                         } else {
                             $this->aReturn['error'] .= "folderFailed";
                         }
                     } else {
                         $this->aReturn['error'] .= "folderFailed";
                     }
                 }
                 break;
             case "moveFiles":
                 // move files
                 if (isset($_POST["file"]) && isset($_POST["folder"]) && isset($_POST["nfolder"])) {
                     //
                     $sFolder = $_POST["folder"];
                     $sNFolder = $_POST["nfolder"];
                     $aFiles = explode(",", $_POST["file"]);
                     $aMoved = array();
                     $aNotMoved = array();
                     for ($i = 0; $i < count($this->aFiles); $i++) {
                         $sFile = $aFiles[$i];
                         $this->sSFile = $this->aFiles[$i];
                         $sNSFile = str_replace($sFile, $sNFolder . "/" . $sFile, $this->sSFile);
                         if (file_exists($sNSFile)) {
                             $this->aReturn['error'] .= "filemoveExists[" . $this->sSFile . " " . $sNSFile . "] ";
                             $aNotMoved[] = $sFile;
                         } else {
                             if (@rename($this->sSFile, $sNSFile)) {
                                 $this->aReturn['msg'] .= "filemoveSucces";
                                 $aMoved[] = $sFile;
                             } else {
                                 $this->aReturn['error'] .= "filemoveFailed";
                                 $aNotMoved[] = $sFile;
                             }
                         }
                     }
                     $this->aReturn['data'] = array('moved' => $aMoved, 'notmoved' => $aNotMoved, 'newfolder' => $sNFolder);
                 }
                 break;
         }
         $this->returnJSON($this->aReturn);
     }
 }

示例#17

0

显示文件

文件： leer.php 项目： zzainss/leer-pdf

<?php

include 'PDF2Text.php';
$a = new PDF2Text();
$a->setFilename('x.pdf');
$a->decodePDF();
$extracted_plaintext = $a->output();
//cuenta el numero de palabras en el texto
echo "<br><br>Hay " . str_word_count($extracted_plaintext, 0) . " palabras en la cadena <br><br>'{$extracted_plaintext}'";
//guardo las palabras en un array
$array_cadena = str_word_count($extracted_plaintext, 1);
//saco cada elemento del array
foreach ($array_cadena as $palabra) {
    echo $palabra . " ";
}

示例#18

0

显示文件

文件： viewnote.php 项目： bhushansonar/choosy_kids

$note_id = "";
$pdfContent = "";
$note_title = "";
if (isset($_GET['id']) && $_GET['id'] != "") {
    $note_id = $_GET['id'];
    $sql = "select mn_note_path,mn_note_title,mn_note_uploaded_by,mn_note_created_date from mn_note where mn_note_id={$note_id} limit 0,1";
    $data = $objDB->select($sql);
    if ($data && !empty($data)) {
        $note_title = $data[0]['mn_note_title'];
        $name = $data[0]['mn_note_path'];
        $filePath = 'notes/pdf/' . $name;
        if (file_exists($filePath)) {
            include 'libs/class.pdf2text.php';
            $pdfObj = new PDF2Text();
            $pdfObj->setFilename($filePath);
            $pdfObj->setFilename;
            $pdfObj->decodePDF();
            $pdfContent = $pdfObj->output();
            $total_char = strlen($pdfContent);
            $fourty_percent_char = $total_char * 40 / 100;
            $pdfContent = tokenTruncate($pdfContent, $fourty_percent_char) . "<span>…More</span>";
        }
    } else {
        header("Location:index.php?p=browesnotes&err=file_not_found");
    }
}
if ($pdfContent != "") {
    $table = 'mn_user';
    $key = 'mn_user_id';
    $value = 'mn_user_display_name';

示例#19

0

显示文件

文件： test.php 项目： parxomchik/Agro

        $currentObject = $objects[$i];
        // Check if an object includes data stream.
        if (preg_match("#stream(.*)endstream#ismU", $currentObject, $stream)) {
            $stream = ltrim($stream[1]);
            // Check object parameters and look for text data.
            $options = getObjectOptions($currentObject);
            if (!(empty($options["Length1"]) && empty($options["Type"]) && empty($options["Subtype"]))) {
                continue;
            }
            // So, we have text data. Decode it.
            $data = getDecodedStream($stream, $options);
            if (strlen($data)) {
                if (preg_match_all("#BT(.*)ET#ismU", $data, $textContainers)) {
                    $textContainers = @$textContainers[1];
                    getDirtyTexts($texts, $textContainers);
                } else {
                    getCharTransformations($transformations, $data);
                }
            }
        }
    }
    // Analyze text blocks taking into account character transformations and return results.
    return getTextUsingTransformations($texts, $transformations);
}
include dirname(__FILE__) . '/../../kernel/pdf2text.php';
$a = new PDF2Text();
$a->setFilename(dirname(__FILE__) . '/../../../InformDovidka.pdf');
//grab the test file at http://www.newyorklivearts.org/Videographer_RFP.pdf
$a->decodePDF();
// echo $a->output();
echo pdf3text(dirname(__FILE__) . '/../../../InformDovidka.pdf');

示例#20

0

显示文件

文件： class.indexer.php 项目： acutedeveloper/carepoint-development

	/**
	 * Extract plain text from PDF
	 *
	 * @since 2.5
	 * @param $post_id integer The post ID of the PDF in the Media library
	 *
	 * @return string The contents of the PDF
	 */
	function extract_pdf_text( $post_id ) {
		global $wp_filesystem, $searchwp;

		$pdf_post = get_post( absint( $post_id ) );

		// make sure it's a PDF
		if ( 'application/pdf' !== $pdf_post->post_mime_type ) {
			return '';
		}

		// grab the filename of the PDF
		$filename = get_attached_file( absint( $post_id ) );

		// make sure the file exists locally
		if ( ! file_exists( $filename ) ) {
			return '';
		}

		// PdfParser runs only on 5.3+ but SearchWP runs on 5.2+
		if ( version_compare( PHP_VERSION, '5.3', '>=' ) ) {

			/** @noinspection PhpIncludeInspection */
			include_once( $searchwp->dir . '/vendor/pdfparser-bootloader.php' );

			// a wrapper class was conditionally included if we're running PHP 5.3+ so let's try that
			if ( class_exists( 'SearchWP_PdfParser' ) ) {

				/** @noinspection PhpIncludeInspection */
				include_once( $searchwp->dir . '/vendor/pdfparser/vendor/autoload.php' );

				// try PdfParser first
				$parser = new SearchWP_PdfParser();
				$parser = $parser->init();
				try {
					$pdf = $parser->parseFile( $filename );
					$pdfContent = $pdf->getText();
				} catch (Exception $e) {
					do_action( 'searchwp_log', 'PDF parsing failed: ' . $e->getMessage() );
					return false;
				}
			}
		}

		// try PDF2Text
		if ( empty( $pdfContent ) ) {
			if ( ! class_exists( 'PDF2Text' ) ) {
				/** @noinspection PhpIncludeInspection */
				include_once( $searchwp->dir . '/vendor/class.pdf2text.php' );
			}
			$pdfParser = new PDF2Text();
			$pdfParser->setFilename( $filename );
			$pdfParser->decodePDF();
			$pdfContent = $pdfParser->output();
			$pdfContent = trim( str_replace( "\n", ' ', $pdfContent ) );
		}

		// check to see if the first pass produced nothing or concatenated strings
		$fullContentLength = strlen( $pdfContent );
		$numberOfSpaces = substr_count( $pdfContent, ' ' );
		if ( empty( $pdfContent ) || ( ( $numberOfSpaces / $fullContentLength ) * 100 < 10 ) ) {
			WP_Filesystem();

			if ( method_exists( $wp_filesystem, 'exists' ) && method_exists( $wp_filesystem, 'get_contents' ) ) {
				$filecontent = $wp_filesystem->exists( $filename ) ? $wp_filesystem->get_contents( $filename ) : '';
			} else {
				$filecontent = '';
			}

			if ( false != strpos( $filecontent, 'trailer' ) ) {
				if ( ! class_exists( 'pdf_readstream' ) ) {
					/** @noinspection PhpIncludeInspection */
					include_once( $searchwp->dir . '/vendor/class.pdfreadstream.php' );
				}
				$pdfContent = '';
				$pdf = new pdf( get_attached_file( $this->post->ID ) );
				$pages = $pdf->get_pages();
				if ( ! empty( $pages ) ) {
					/** @noinspection PhpUnusedLocalVariableInspection */
					while ( list( $nr, $page ) = each( $pages ) ) {
						if ( method_exists( $page, 'get_text' ) ) {
							$pdfContent .= $page->get_text();
						}
					}
				}
			} else {
				// empty out the content so wacky concatenations are not indexed
				$pdfContent = false;
			}
		}

		return $pdfContent;
	}

示例#21

-4

显示文件

文件： pdf.php 项目： kidaa30/Swevers

function pdf_to_text($filename)
{
    $a = new PDF2Text();
    $a->setFilename($filename);
    $a->decodePDF();
    return $a->output();
}

示例#22

-4

显示文件

文件： index.php 项目： kingspp/Genie

<?php

include 'class.pdf2text.php';
$a = new PDF2Text();
$a->setFilename('hello.pdf');
$a->decodePDF();
echo $a->output();

PHP PDF2Text::setFilename示例