コード例 #1
10
ファイル: Snapshot.php プロジェクト: SimZal/laracrawl
 public function readBinary()
 {
     $parser = new \Smalot\PdfParser\Parser();
     try {
         $result = $parser->parseContent($this->binary);
         $text = $result->getText();
         $text = str_replace(["\n\n"], "", $text);
     } catch (Exception $exception) {
         $text = NULL;
     }
     $text = empty($text) ? 'No readable text. File size: ' . strlen($this->binary) . 'B' : $text;
     return '<pre>' . $text . '</pre>';
 }
コード例 #2
6
ファイル: index.php プロジェクト: emraanzaki/pdfparsing
    if (!empty($file)) {
        $moved = move_uploaded_file($file['tmp_name'], dirname(__FILE__) . '/../uploads/' . sha1(time()) . "-" . $file['name']);
        if ($moved) {
            return new Response(json_encode(array('message' => 'Upload Successful!')), '200');
        } else {
            return new Response(json_encode(array('message' => 'File upload error!')), '500');
        }
    }
});
$app->get('pages/{id}', function (Silex\Application $app, $id) {
    // Add a parameter for an ID in the route, and it will be supplied as argument in the function
    if (!array_key_exists($id, $app['files'])) {
        $app->abort(404, 'The PDF file could not be found');
    }
    $file = $app['files'][$id];
    $parser = new \Smalot\PdfParser\Parser();
    $filepath = dirname(__FILE__) . '/../uploads/' . $file;
    $document = $parser->parseFile($filepath);
    $details = $document->getDetails();
    $dir = dirname(__FILE__) . '/../uploads/pages/' . $id;
    if (!file_exists($dir)) {
        $dirCreate = mkdir($dir);
        for ($i = 1; $i <= $details['Pages']; $i++) {
            $fpdi = new FPDI();
            $fpdi->setSourceFile($filepath);
            $tpl = $fpdi->importPage($i);
            $size = $fpdi->getTemplateSize($tpl);
            $orientation = $size['h'] > $size['w'] ? 'P' : 'L';
            $fpdi->AddPage($orientation);
            $fpdi->useTemplate($tpl, null, null, $size['w'], $size['h'], true);
            try {
コード例 #3
2
 public static function parse($filename)
 {
     if (!$filename || !file_exists($filename)) {
         return false;
     }
     // Parse pdf file and build necessary objects.
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile($filename);
     // Retrieve all pages from the pdf file.
     $pages = $pdf->getPages();
     // Loop over each page to extract text.
     $text = "";
     foreach ($pages as $page) {
         $text .= $page->getText();
     }
     return $text;
 }
コード例 #4
1
 function fromFile($filename)
 {
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile($filename);
     $text = $pdf->getText();
     $linhas = explode("\n", $text);
     foreach ($linhas as $linha) {
         if (strlen($linha) == 61) {
             $this->linha = str_replace(" ", "", $linha);
             break;
         }
     }
     return $this->parseLinha();
 }
コード例 #5
1
ファイル: Page.php プロジェクト: andrewolobo/mpTracker
 public function testGetText()
 {
     // Document with text.
     $filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
     $parser = new \Smalot\PdfParser\Parser();
     $document = $parser->parseFile($filename);
     $pages = $document->getPages();
     $page = $pages[0];
     $text = $page->getText();
     //        var_dump($text);
     $this->assert->string($text)->hasLengthGreaterThan(150);
     $this->assert->string($text)->contains('Document title');
     $this->assert->string($text)->contains('Lorem ipsum');
     $this->assert->string($text)->contains('Calibri');
     $this->assert->string($text)->contains('Arial');
     $this->assert->string($text)->contains('Times');
     $this->assert->string($text)->contains('Courier New');
     $this->assert->string($text)->contains('Verdana');
 }
コード例 #6
1
ファイル: basvuru_kaydet.php プロジェクト: kaantunc/myk_git
	function readDocument($path){
	
		$conf =& JFactory::getConfig();
		$tmp_path = $conf->getValue('config.tmp_path');
	
		$acceptableFiles = array( 'application/msword' => "doc",//doc
				'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "docx",//docx,
				'application/pdf' => "pdf" //pdf
		);
	
		$finfo 		    = finfo_open(FILEINFO_MIME_TYPE);
		$file_extension = finfo_file($finfo, $path);
		finfo_close($finfo);
	
		if(array_key_exists($file_extension, $acceptableFiles)){
	
			if($acceptableFiles[$file_extension] == "doc" || $acceptableFiles[$file_extension] == "docx"){
					
				require_once 'libraries/PHPWord-master/src/PhpWord/Autoloader.php';
				\PhpOffice\PhpWord\Autoloader::register();
					
				$phpWord = \PhpOffice\PhpWord\IOFactory::load($path);
				$result = $this->write($phpWord, "temp", array('HTML' => 'html'),$tmp_path);
					
				$contents = file_get_contents($tmp_path.'/temp.html', true);
					
			}else if($acceptableFiles[$file_extension] == "pdf"){
				include 'libraries/pdfparser/vendor/autoload.php';
					
				$parser = new \Smalot\PdfParser\Parser();
				$pdf    = $parser->parseFile($path);
					
				$contents = $pdf->getText();
					
			}
		}
		return $contents;
	}
コード例 #7
0
ファイル: AqbController.php プロジェクト: rowej83/ShipAdmin
 public function parsePostPDF()
 {
     $validator = Validator::make(Input::all(), array('packinglist' => 'required'));
     if (!$validator->fails()) {
         //validation passes
         $file = Input::file('packinglist');
         $parser = new \Smalot\PdfParser\Parser();
         $pdf = $parser->parseFile($file);
         $pages = $pdf->getPages();
         $arrayOfPos = array();
         foreach ($pages as $page) {
             $text = nl2br($page->getText());
             $tempPDF = explode('<br />', $text);
             $getPO = explode(':', $tempPDF[10]);
             $PO = trim($getPO[1]);
             array_push($arrayOfPos, $PO);
         }
         $totalOfPos = count($arrayOfPos);
         $queryString = $this->joinKohlsParsePO($arrayOfPos);
         //    $data['POs'] = $arrayOfPos;
         $returnPOString = '';
         foreach ($arrayOfPos as $returnPO) {
             $returnPOString .= $returnPO . '<br>';
         }
         $data['POs'] = $returnPOString;
         $data['totalOfPOs'] = $totalOfPos;
         $data['queryString'] = $queryString;
         return View::make('parsepdf-output', $data);
     } else {
         //validation fails
         return View::make('parsepdf-input')->with(array('response' => '<p style="color:red;">Please select a packing list pdf to parse.</p>'));
     }
 }
コード例 #8
0
 public function extractPdf()
 {
     //Parse pdf file and build necessary objects.
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile($this->submittedCV);
     $text = $pdf->getText();
     return $text;
 }
コード例 #9
0
ファイル: PdfType.php プロジェクト: enhavo/enhavo
 protected function pdfToString($sourcefile)
 {
     // Parse pdf file and build necessary objects.
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile($sourcefile);
     $text = $pdf->getText();
     $text = str_replace(array('&', '%', '$'), ' ', $text);
     return $text;
 }
コード例 #10
0
ファイル: PdfFile.php プロジェクト: jaberu/PhpOfficeUtils
 private function getDocument()
 {
     if (empty($this->document)) {
         $parser = new \Smalot\PdfParser\Parser();
         try {
             $this->document = $parser->parseFile($this->filename);
         } catch (\Exception $ex) {
             throw new ParseException($ex);
         }
     }
     return $this->document;
 }
コード例 #11
0
 /**
  * This method parses the given document and extract its fingerprints
  * encapsulating the result in a Document object.
  *
  * @private
  * @param $documentName the name given by the user to the file
  * @param $documentPath the temporary path that the document is being stored
  * @return {Document}
  */
 private function parseDocument($documentName, $documentPath)
 {
     $pdfParser = new \Smalot\PdfParser\Parser();
     $parsedPDF = $pdfParser->parseFile($documentPath);
     $text = $parsedPDF->getText();
     $fingerprints = $this->documentService->extractFingerprint($text);
     $doc = new Document();
     $doc->setName($documentName);
     //preg_replace('/[^a-zA-Z0-9]/', '', $text)
     $doc->setContent($text);
     $doc->setFingerprints($fingerprints);
     return $doc;
 }
コード例 #12
0
 protected static function addToPDFSearchIndex($strFile, $arrParentSet)
 {
     $objFile = new \File($strFile);
     if (!Validator::isValidPDF($objFile)) {
         return false;
     }
     $objDatabase = \Database::getInstance();
     $objModel = $objFile->getModel();
     $arrMeta = \Frontend::getMetaData($objModel->meta, $arrParentSet['language']);
     // Use the file name as title if none is given
     if ($arrMeta['title'] == '') {
         $arrMeta['title'] = specialchars($objFile->basename);
     }
     $arrSet = array('pid' => $arrParentSet['pid'], 'tstamp' => time(), 'title' => $arrMeta['title'], 'url' => $objFile->value, 'filesize' => \System::getReadableSize($objFile->size, 2), 'checksum' => $objFile->hash, 'protected' => $arrParentSet['protected'], 'groups' => $arrParentSet['groups'], 'language' => $arrParentSet['language'], 'mime' => $objFile->mime);
     // Return if the file is indexed and up to date
     $objIndex = $objDatabase->prepare("SELECT * FROM tl_search WHERE url=? AND checksum=?")->execute($arrSet['url'], $arrSet['checksum']);
     // there are already indexed files containing this file (same checksum and filename)
     if ($objIndex->numRows) {
         // Return if the page with the file is indexed
         if (in_array($arrSet['pid'], $objIndex->fetchEach('pid'))) {
             return false;
         }
         $strContent = $objIndex->text;
     } else {
         try {
             // parse only for the first occurrence
             $parser = new \Smalot\PdfParser\Parser();
             $objPDF = $parser->parseFile($strFile);
             $strContent = $objPDF->getText();
         } catch (\Exception $e) {
             // Missing object refernce #...
             return false;
         }
     }
     // Put everything together
     $arrSet['text'] = $strContent;
     $arrSet['text'] = trim(preg_replace('/ +/', ' ', \String::decodeEntities($arrSet['text'])));
     // Update an existing old entry
     if ($objIndex->pid == $arrSet['pid']) {
         $objDatabase->prepare("UPDATE tl_search %s WHERE id=?")->set($arrSet)->execute($objIndex->id);
         $intInsertId = $objIndex->id;
     } else {
         $objInsertStmt = $objDatabase->prepare("INSERT INTO tl_search %s")->set($arrSet)->execute();
         $intInsertId = $objInsertStmt->insertId;
     }
     static::indexContent($arrSet, $intInsertId);
 }
コード例 #13
0
 /**
  * Tests that the 'printable/pdf/node/{node}' path returns the right content.
  */
 public function testCustomPageExists()
 {
     global $base_url;
     $node_type_storage = \Drupal::entityManager()->getStorage('node_type');
     // Test /node/add page with only one content type.
     $node_type_storage->load('article')->delete();
     $this->drupalGet('node/add');
     $this->assertResponse(200);
     $this->assertUrl('node/add/page');
     // Create a node.
     $edit = array();
     $edit['title[0][value]'] = $this->randomMachineName(8);
     $bodytext = $this->randomMachineName(16) . 'This is functional test which I am writing for printable module.';
     $edit['body[0][value]'] = $bodytext;
     $this->drupalPostForm('node/add/page', $edit, t('Save'));
     // Check that the Basic page has been created.
     $this->assertRaw(t('!post %title has been created.', array('!post' => 'Basic page', '%title' => $edit['title[0][value]'])), 'Basic page created.');
     // Check that the node exists in the database.
     $node = $this->drupalGetNodeByTitle($edit['title[0][value]']);
     $this->assertTrue($node, 'Node found in database.');
     // Verify that pages do not show submitted information by default.
     $this->drupalGet('node/' . $node->id());
     $this->assertResponse(200);
     // Set the PDF generating tool.
     $this->drupalGet('admin/config/user-interface/printable/pdf');
     $this->drupalPostForm(NULL, array('print_pdf_pdf_tool' => 'mPDF', 'print_pdf_content_disposition' => 1, 'print_pdf_filename' => 'modules/custom/printable/src/Tests/testPDF'), t('Submit'));
     $this->drupalGet('admin/config/user-interface/printable/pdf');
     $this->assertResponse(200);
     // Test whether PDF page is being generated.
     $this->drupalGet('printable/pdf/node/' . $node->id());
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile('modules/custom/printable/src/Tests/testPDF.pdf');
     $text = $pdf->getText();
     $this->drupalGet('node/add');
     $new_edit = array();
     $new_edit['title[0][value]'] = $this->randomMachineName(8);
     $bodytext = $text;
     $new_edit['body[0][value]'] = $bodytext;
     $this->drupalPostForm('node/add/page', $new_edit, t('Save'));
     $new_node = $this->drupalGetNodeByTitle($new_edit['title[0][value]']);
     $this->drupalGet('node/' . $new_node->id());
     $this->assertResponse(200);
     // Checks the presence of body in the page.
     $this->assertRaw($edit['body[0][value]'], 'Body discovered successfully in the printable page');
     // Check if footer is rendering correctly.
     $this->assertRaw($base_url . '/node/' . $node->id(), 'Source Url discovered in the printable page');
 }
コード例 #14
0
ファイル: HelloController.php プロジェクト: KasselR/yii2-kr
 public function actionImport()
 {
     require 'vendor/autoload.php';
     $db = new MongoClient("mongodb://localhost:27017");
     $grid = $db->selectDB('xpps')->getGridFS();
     $parser = new \Smalot\PdfParser\Parser();
     $dir = new RecursiveDirectoryIterator('C:\\tmp\\klett-cotta\\daten\\www.traumaundgewalt.de');
     foreach (new RecursiveIteratorIterator($dir) as $file) {
         if (!is_dir($file) and $file->getExtension() == "pdf") {
             $pdf = $parser->parseFile($file);
             $metas = $pdf->getDetails();
             echo basename($file) . " path: " . realpath($file) . "<br>";
             #echo var_dump((string)$file)."<br>";
             #$grid->storeFile((string)$file, array('metadata'=>$metas));
         }
     }
 }
コード例 #15
0
function pdfToString()
{
    $links = crawl_page("http://www.betriebsrestaurant-gmbh.de/index.php?id=91");
    $pdfLink = "";
    foreach ($links as $file) {
        if (strpos(strtolower($file), '.pdf') !== FALSE && strpos($file, '_FMI_') !== FALSE) {
            $weekNumber = date("W");
            if ($weekNumber === substr($file, 16, 2)) {
                // current link is MI pdf
                $pdfLink = "http://www.betriebsrestaurant-gmbh.de/" . $file;
            }
        }
    }
    // Parse pdf file and build necessary objects.
    $parser = new \Smalot\PdfParser\Parser();
    $pdf = $parser->parseFile($pdfLink);
    $text = $pdf->getText();
    return $text;
}
コード例 #16
0
ファイル: Parser.php プロジェクト: dollavon/pdfparser
 public function testParseFile()
 {
     $directory = getcwd() . '/samples/bugs';
     if (is_dir($directory)) {
         $files = scandir($directory);
         $parser = new \Smalot\PdfParser\Parser();
         foreach ($files as $file) {
             if (preg_match('/^.*\\.pdf$/i', $file)) {
                 try {
                     $document = $parser->parseFile($directory . '/' . $file);
                     $pages = $document->getPages();
                     $page = $pages[0];
                     $content = $page->getText();
                     $this->assert->string($content);
                 } catch (\Exception $e) {
                     if ($e->getMessage() != 'Secured pdf file are currently not supported.' && strpos($e->getMessage(), 'TCPDF_PARSER') != 0) {
                         throw $e;
                     }
                 }
             }
         }
     }
 }
コード例 #17
0
ファイル: teachercrawler.php プロジェクト: demag0gue/Crawler
 public function crawl()
 {
     if ($this->hasError()) {
         return;
     }
     if (!$this->isLoggedIn()) {
         $this->setError(true);
         return;
     }
     if (!$this->downloadList()) {
         return;
     }
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile(path . 'saved/teacher/teacher.pdf');
     $content = explode(PHP_EOL, $pdf->getText());
     array_splice($content, 0, 1);
     $output = array();
     foreach ($content as $line) {
         $parts = explode(' ', $line);
         $teacher = '';
         $list = array();
         foreach ($parts as $part) {
             if (ctype_space($part) || $part == '') {
                 continue;
             }
             if (strlen($part) > 3 || ctype_lower($part) || strpos($part, '.') !== false) {
                 $teacher .= $part . ' ';
                 continue;
             }
             array_push($list, $part);
         }
         $teacher = rtrim($teacher);
         $output[$teacher] = $list;
     }
     $this->output = $output;
 }
コード例 #18
0
 /**
  * Display a listing of the resource.
  *
  * @return \Illuminate\Http\Response
  */
 public function StartExtractPdf(Request $request, $pdfid, $retailername)
 {
     $pdfs = \DB::table('pdf')->where('id', '=', $pdfid)->get();
     foreach ($pdfs as $pdf_url) {
         $parser = new \Smalot\PdfParser\Parser();
         $pdf = $parser->parseFile($pdf_url->pricelist_file);
         //method called from Parser.php
         // $text = $pdf->getSectionsText();
         $text = $pdf->getText();
         //method called from Object.php
         $toReplace = array('&', ',', '"', '\\r\\n', 'Price', 'Q uad', 'M icro', 'M aximus', 'D D R3');
         $with = array('-', '_', 'inch', ' ', 'Price ', 'Quad', 'Micro', 'Maximus', 'DDR3');
         $string = str_replace($toReplace, $with, $text);
         $new = trim(preg_replace('/\\n/', ' ', $string));
         $filename = strtolower($retailername) . "-pricelist-pdf.txt";
         $myfile = fopen(public_path() . "/file/" . $filename, "w") or die("Unable to open file!");
         fwrite($myfile, $new);
         fclose($myfile);
         if ($myfile) {
             return '<div class="alert alert-success">successfully extract data from pdf</div>';
         }
     }
     return '<div class="alert alert-danger">failed to extract data from pdf</div>';
 }
コード例 #19
0
 /**
  * @uses processOuterBorders
  * @uses processGridLine
  * @uses processText
  * @uses processHiddenClue
  * @throws \Exception
  */
 private function parseRawData()
 {
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseContent($this->rawData);
     $pdfObjects = $pdf->getObjects();
     foreach ($pdfObjects as $key => $object) {
         $content = $object->getContent();
         if ('' === $content) {
             continue;
         }
         foreach (self::$knownContentTypes as $method => $regex) {
             if (1 === preg_match($regex, $content, $matches)) {
                 $this->{$method}($matches);
                 break;
             }
         }
     }
     $this->fillGaps();
     $this->labels = $this->labelFactory->getFromRaw($this->labelsRaw);
     $this->resetTempProperties();
 }
コード例 #20
0
 private function &read_pdf()
 {
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile($this->filename);
     $text = $pdf->getText();
     return $text;
 }
コード例 #21
0
ファイル: KohlsController.php プロジェクト: rowej83/ShipAdmin
 function getArrayOfPOs($file)
 {
     $returnArray = array();
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile($file);
     $pages = $pdf->getPages();
     foreach ($pages as $page) {
         $text = nl2br($page->getText());
         $tempPDF = explode('<br />', $text);
         $getPO = explode(':', $tempPDF[10]);
         $data['PO'] = trim($getPO[1]);
         $isGround = $this->checkIfGround($text);
         if ($isGround) {
             $data['shipterms'] = "Ground";
         } else {
             $data['shipterms'] = "Not Ground";
         }
         //    $PO = trim($getPO[1]);
         array_push($returnArray, $data);
     }
     //dd($returnArray);
     return $returnArray;
 }
コード例 #22
0
ファイル: MacysController.php プロジェクト: rowej83/ShipAdmin
 function getArrayOfPOs($file)
 {
     $returnArray = array();
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile($file);
     $pages = $pdf->getPages();
     foreach ($pages as $page) {
         $text = nl2br($page->getText());
         $tempPDF = explode('<br />', $text);
         //  dd($tempPDF);
         foreach ($tempPDF as $tempPDFArrayIndex) {
             //        dd($tempPDFArrayIndex);
             if ($this->checkIfArrayIndexContainsOrder($tempPDFArrayIndex)) {
                 // array index contains order #
                 // echo 'temppdfvalue-iftrue:'.$tempPDFArrayIndex.'<br>';
                 $getPO = explode(':', $tempPDFArrayIndex);
                 //dd($getPO);
             } else {
                 //array index does not contain order # ..keep trying
                 //          echo 'temppdfvalue-iffalse:'.$tempPDFArrayIndex.'<br>';
             }
         }
         // $getPO = explode(':', $tempPDF[6]);
         // dd($getPO[1]);
         $data['PO'] = trim($getPO[1]);
         $isGround = $this->checkIfGround($text);
         if ($isGround) {
             $data['shipterms'] = "Ground";
         } else {
             $data['shipterms'] = "Not Ground";
         }
         //    $PO = trim($getPO[1]);
         array_push($returnArray, $data);
     }
     //dd($returnArray);
     return $returnArray;
 }
コード例 #23
0
ファイル: pdf2text.php プロジェクト: patyalves17/PdfAjax
<?php

// Include Composer autoloader if not already done.
include 'vendor/autoload.php';
// Filename
$filename = isset($argv[1]) ? $argv[1] : 'Clipping_Eletronico_ ABC_Grande_Sao_Paulo_e_Llitoral.pdf';
// Parse pdf file and build necessary objects.
$parser = new \Smalot\PdfParser\Parser();
$pdf = $parser->parseFile($filename);
// Retrieve all details from the pdf file.
$details = $pdf->getDetails();
echo "Metadata <br/>";
foreach ($details as $property => $value) {
    if (is_array($value)) {
        $value = implode(', ', $value);
    }
    echo $property . ' => ' . $value . "<br/>";
}
echo "\nTexto:<br/>";
$text = $pdf->getText();
echo $text;
echo '<br/>Texto procurado: ';
if (strpos($text, 'Assessoria') !== FALSE) {
    echo 'Encontrado';
} else {
    echo "Não encontrado";
}
コード例 #24
0
 public function testResolveXRef()
 {
     // Document with text.
     $filename = __DIR__ . '/../../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
     $parser = new \Smalot\PdfParser\Parser();
     $document = $parser->parseFile($filename);
     $object = $document->getObjectById('3_0');
     $kids = $object->get('Kids');
     $this->assert->object($kids)->isInstanceOf('\\Smalot\\PdfParser\\Element\\ElementArray');
     $this->assert->array($kids->getContent())->hasSize(1);
     $pages = $kids->getContent();
     $this->assert->object(reset($pages))->isInstanceOf('\\Smalot\\PdfParser\\Page');
 }
コード例 #25
0
ファイル: readPdf.php プロジェクト: juaning/bibliotek
<?php

include 'vendor/autoload.php';
$parser = new \Smalot\PdfParser\Parser();
$pdf = $parser->parseFile('pdf/test2.pdf');
$pages = $pdf->getPages();
function testFonts($fonts)
{
    foreach ($fonts as $key => $font) {
        echo "Key: " . $key . " Font: " . $font->getName() . " Type: " . $font->getType() . " Details: ";
        echo $font->getContent();
        echo "\n";
    }
}
function testTextRelationToFont($page)
{
}
foreach ($pages as $page) {
    // var_dump($page->getDocument());
    echo $page->getUniqueId();
    // echo $page->getText();
    // $fonts = $page->getFonts();
    // var_dump($page->getDetails());
    // var_dump($fonts);
}
コード例 #26
0
ファイル: index2.php プロジェクト: patyalves17/PdfAjax
<?php

header('Content-Type: text/html; charset=UTF-8');
include 'vendor/autoload.php';
$message = '';
$texts = array();
if ($_SERVER['REQUEST_METHOD'] == 'POST') {
    try {
        $content = '';
        if (isset($_POST['inputUrl']) && preg_match('/^https?:\\/\\//', trim($_POST['inputUrl']))) {
            $content = file_get_contents(trim($_POST['inputUrl']));
        } elseif (isset($_FILES['inputFile']) && $_FILES['inputFile']['type'] == 'application/pdf') {
            $content = file_get_contents($_FILES['inputFile']['tmp_name']);
        }
        if ($content) {
            $parser = new \Smalot\PdfParser\Parser();
            $pdf = $parser->parseContent($content);
            $pages = $pdf->getPages();
            foreach ($pages as $page) {
                $texts[] = $page->getText();
            }
        } else {
            throw new Exception('Unable to retrieve content. Check if it is really a pdf file.');
        }
    } catch (Exception $e) {
        $message = $e->getMessage();
    }
}
?>
<!DOCTYPE html>
<html>
コード例 #27
0
ファイル: index.php プロジェクト: kawashita86/xlsconversion
 } else {
     if (strpos($filename, '.xls') !== false || strpos($filename, '.XLS') !== false) {
         $reader = PHPExcel_IOFactory::load(PROJ_ROOT . '/upload/' . $filename);
         $sheetData = $reader->getActiveSheet()->toArray(null, true, true, false);
     } else {
         if (stripos($filename, '.pdf') !== false) {
             if (Utils::getValue('template') == 'nestle' || Utils::getValue('template') == 'affinity') {
                 $sheetData = Utils::getRemotePDFtoText(PROJ_ROOT . '/upload/' . $filename);
             } else {
                 if (Utils::getValue('template') == 'claber') {
                     $sheetData = Utils::getRemotePDFtoText(PROJ_ROOT . '/upload/' . $filename);
                 } else {
                     if (Utils::getValue('template') == 'monge') {
                         $sheetData = Utils::getRemotePDFtoText(PROJ_ROOT . '/upload/' . $filename);
                     } else {
                         $reader = new \Smalot\PdfParser\Parser();
                         $pdf = $reader->parseFile('upload/' . $filename);
                         $sheetData = $pdf->getPages();
                     }
                 }
             }
         } else {
             if (strpos($filename, '.txt') !== false || strpos($filename, '.TXT') !== false) {
                 $sheetData = file_get_contents('upload/' . $filename);
             }
         }
     }
     if (Utils::getValue('template') && !empty(Utils::getValue('template'))) {
         $className = ucfirst(Utils::getValue('template')) . 'Xml';
         if (class_exists($className)) {
             if (Utils::getValue('template') == 'amazon') {
コード例 #28
-1
ファイル: index.php プロジェクト: kordianbruck/TUM.sexy
function pdfToString()
{
    $weekNumber = date('W');
    //Check if we have the current week in cache
    $text = apc_fetch('hungertext' . $weekNumber);
    if ($text !== false) {
        return $text;
    }
    //Otherwise fetch all links
    $links = crawl_page(URL_PAGE_WITH_LINKS);
    $pdfLink = '';
    foreach ($links as $file) {
        if (strpos(strtolower($file), '.pdf') !== FALSE && strpos($file, '_FMI_') !== FALSE && $weekNumber === substr($file, 16, 2)) {
            $pdfLink = URL_MAIN . $file;
        }
    }
    //Don't proceed when no link was found
    if (empty($pdfLink)) {
        return;
    }
    // Parse pdf file and build necessary objects.
    $parser = new \Smalot\PdfParser\Parser();
    $pdf = $parser->parseFile($pdfLink);
    $text = $pdf->getText();
    //Store it in cache
    apc_store('hungertext' . $weekNumber, $text, 2 * 24 * 3600);
    //return it
    return $text;
}
コード例 #29
-1
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle()
 {
     // parse url
     $url = $this->parseURLPDF();
     // Parse pdf file and build necessary objects.
     $parser = new \Smalot\PdfParser\Parser();
     $pdf = $parser->parseFile($url);
     $pages = $pdf->getPages();
     foreach ($pages as $page) {
         $p = new ParsePage($page->getArray());
         $products = $p->parseProducts();
         foreach ($products as $product) {
             $p = Product::whereCode($product['code'])->first();
             if (is_null($p)) {
                 $p = new Product();
             }
             $p->fill($product);
             $p->save();
         }
     }
 }