PHP PdfParser примеры использования

Язык программирования: PHP

Класс/Тип: PdfParser

Примеров на hotexamples.com: 5

PHP PdfParser - 5 примеров найдено. Это лучшие примеры PHP кода для PdfParser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

parseFile(4)

parse(1)

Пример #1

Показать файл

Файл: class-wp-file-search.php Проект: ellak-monades-aristeias/wp-file-search

 /**
  * Parses each compatible document found.
  *
  * @param     boolean     $direct_parsing     Proceeds parsing only when this parameter and admin's choice match.
  * @since     1.0.0
  */
 private function parse_documents($direct_parsing_hook)
 {
     $options = get_option(self::OPTIONS_KEY);
     $direct_parsing_option = $options[self::OPT_DIRECT_PARSING];
     if ($direct_parsing_option !== $direct_parsing_hook) {
         return;
     }
     $upload_dir = wp_upload_dir();
     $documents = $this->get_unparsed_documents();
     foreach ($documents as $document) {
         $filepath = $upload_dir['basedir'] . DIRECTORY_SEPARATOR . $document['filename'];
         $content = NULL;
         switch ($document['mime_type']) {
             case 'application/pdf':
                 $content = PdfParser::parse($filepath);
                 break;
             case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
                 $content = DocxParser::parse($filepath);
                 break;
             case 'application/vnd.oasis.opendocument.text':
                 $content = OdtParser::parse($filepath);
                 break;
             default:
                 break;
         }
         if (!$content) {
             continue;
         }
         // add content to postmeta
         $this->save_doc_contents($document['post_id'], $content);
     }
     // update last parsing date
     update_option(self::OPTIONS_LAST_UPDATE_KEY, gmdate('Y-m-d H:i:s'));
 }

Пример #2

Показать файл

Файл: LuceneUtils.php Проект: MexinaD/SuiteCRM

/**
 * Given a path to a PDF document returns a lucene document with filename and contents set.
 * @param $path
 * @return Zend_Search_Lucene_Document
 */
function createPDFDocument($path)
{
    require_once 'PdfParser.php';
    $text = PdfParser::parseFile($path);
    $doc = new Zend_Search_Lucene_Document();
    $doc->addField(Zend_Search_Lucene_Field::Text('filename', basename($path)));
    $doc->addField(Zend_Search_Lucene_Field::UnStored('contents', $text));
    return $doc;
}

Пример #3

Показать файл

Файл: process.php Проект: sijie123/NotesAcademy

//MS Doc + slsx for catdoc
// http://www.wagner.pp.ru/~vitus/software/catdoc/
// http://stackoverflow.com/questions/5671988/how-to-extract-just-plain-text-from-doc-docx-files-unix
//PDF
// https://gist.github.com/smalot/6183152
// pdftotext
$str = "";
if ($file->getType() == "doc") {
    $str = exec("catdoc '" . escapeshellcmd($filename) . "'");
}
if ($file->getType() == "docx") {
    $str = exec("unzip -p '" . escapeshellcmd($filename) . "' word/document.xml | sed -e 's/<\\/w:p>/\n/g; s/<[^>]\\{1,\\}>//g; s/[^[:print:]\n]\\{1,\\}//g'");
}
if ($file->getType() == "pdf") {
    include "pdfparser.php";
    $parser = new PdfParser();
    $str = $parser->parseFile($filename);
    $im = new imagick($filename);
    $im->setImageFormat('jpg');
    $imdata = base64_encode($im);
}
$textarr1 = preg_split("/\\s+/", $str);
$textarr = [];
foreach ($textarr1 as $b) {
    if (!in_array($b, $textarr)) {
        $textarr[$b] = 1;
    } else {
        $textarr[$b] += 1;
    }
}
$a = $file->originalname;

Пример #4

Показать файл

Файл: CustomController.php Проект: hukumonline/admin

 public function docAction()
 {
     $this->_helper->layout->setLayout('layout-dms-uploader');
     //$this->_helper->layout->disableLayout();
     //$this->_helper->viewRenderer->setNoRender(TRUE);
     $request = $this->getRequest();
     // 		$source = ROOT_DIR . "/data/PP_NO_31_1995.DOC";
     // 		$source = ROOT_DIR . "/data/PERPRES_NO_77_2007.DOC";
     //\PhpOffice\PhpWord\Settings::loadConfig(CONFIG_PATH.'/phpword.ini');
     //$phpWord = \PhpOffice\PhpWord\IOFactory::load($source,"MsDoc");
     //echo $this->write($phpWord, basename(__FILE__, '.php'), ['HTML' => 'html']);
     // 		Pandamp_Debug::manager($this->read_doc($source));
     if ($request->isPost()) {
         $registry = Zend_Registry::getInstance();
         $files = $registry->get('files');
         $regType = ['peraturan pemerintah', 'peraturan presiden', 'undang-undang', 'peraturan menteri'];
         // 		$content = $this->parseWord($source);
         // 		$lines = file($source);
         // 		$text = strtolower($lines[1]);
         // 		$outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$text);
         // 		if ( file_exists($source) ) {
         $pdf = new PdfParser();
         $string = $pdf->parseFile($files['uploadedFile1']['tmp_name']);
         $outtext = preg_replace("/[^a-zA-Z0-9\\s\\,\\.\\-\n\r\t@\\/\\_\\(\\)]/", "", $string);
         Pandamp_Debug::manager($outtext);
         if (($fh = fopen($files['uploadedFile1']['tmp_name'], 'r')) !== false) {
             $headers = fread($fh, 0xa00);
             $n1 = ord($headers[0x21c]) - 1;
             $n2 = (ord($headers[0x21d]) - 8) * 256;
             $n3 = ord($headers[0x21e]) * 256 * 256;
             $n4 = ord($headers[0x21f]) * 256 * 256 * 256;
             $textLength = $n1 + $n2 + $n3 + $n4;
             $extracted_plaintext = fread($fh, $textLength);
             //echo nl2br($extracted_plaintext);die;
             $c = nl2br($extracted_plaintext);
             //print_r($this->extract_emails_from($extracted_plaintext));die;
             $fp = fopen("php://memory", 'r+');
             fputs($fp, $c);
             rewind($fp);
             while ($line = fgets($fp)) {
                 /*foreach(preg_split("/((\r?\n)|(\r\n?))/", $line) as $l){
                 		    echo $l."\n";die;
                 		}*/
                 $arr = preg_split("/((\r?\n)|(\r\n?))/", $line);
                 //Pandamp_Debug::manager($arr);
                 $result = array_filter($regType, create_function('$e', 'return strstr("' . strtolower($arr[0]) . '", $e);'));
                 if ($result) {
                     $txt = array_values($result);
                     $txt = ucwords($txt[0]) . "\n";
                 }
                 $txt .= $arr[1];
             }
             fclose($fp);
             $this->view->assign('test', $txt);
         }
         // 		}
     }
 }

Пример #5

Показать файл

Файл: checking.php Проект: devarsh13/Plagiarism-Checker

    if ($res === true) {
        @$zip->extractTo('uploads/ffcp/');
        $zip->close();
    }
}
$files = glob("uploads/ffcp/*");
foreach ($files as $file) {
    //echo $file;
    include_once 'PDF2TEXT.php';
    include_once 'DOCTOTEXT.php';
    $a = new SplFileInfo($file);
    $extension = $a->getExtension();
    //echo $extension;
    if ($extension == 'pdf') {
        //echo "asdsad";
        $a = new PdfParser();
        $data = $a->parseFile($file);
        $data = preg_replace('/[^A-Za-z0-9]/', '', $data);
        //echo $data;
        $handle = fopen("one.txt", 'w');
        fwrite($handle, $data);
        $fp->fingerPrint($file);
    } else {
        if ($extension == 'docx') {
            $docobj = new DocxConversion($file);
            $data = $docobj->convertToText();
            $data = preg_replace('/[^A-Za-z0-9]/', '', $data);
            $handle = fopen("one.txt", 'w');
            fwrite($handle, $data);
            $fp->fingerPrint($file);
        } else {