public function readBinary() { $parser = new \Smalot\PdfParser\Parser(); try { $result = $parser->parseContent($this->binary); $text = $result->getText(); $text = str_replace(["\n\n"], "", $text); } catch (Exception $exception) { $text = NULL; } $text = empty($text) ? 'No readable text. File size: ' . strlen($this->binary) . 'B' : $text; return '<pre>' . $text . '</pre>'; }
header('Content-Type: text/html; charset=UTF-8'); include 'vendor/autoload.php'; $message = ''; $texts = array(); if ($_SERVER['REQUEST_METHOD'] == 'POST') { try { $content = ''; if (isset($_POST['inputUrl']) && preg_match('/^https?:\\/\\//', trim($_POST['inputUrl']))) { $content = file_get_contents(trim($_POST['inputUrl'])); } elseif (isset($_FILES['inputFile']) && $_FILES['inputFile']['type'] == 'application/pdf') { $content = file_get_contents($_FILES['inputFile']['tmp_name']); } if ($content) { $parser = new \Smalot\PdfParser\Parser(); $pdf = $parser->parseContent($content); $pages = $pdf->getPages(); foreach ($pages as $page) { $texts[] = $page->getText(); } } else { throw new Exception('Unable to retrieve content. Check if it is really a pdf file.'); } } catch (Exception $e) { $message = $e->getMessage(); } } ?> <!DOCTYPE html> <html> <head>
/** * @uses processOuterBorders * @uses processGridLine * @uses processText * @uses processHiddenClue * @throws \Exception */ private function parseRawData() { $parser = new \Smalot\PdfParser\Parser(); $pdf = $parser->parseContent($this->rawData); $pdfObjects = $pdf->getObjects(); foreach ($pdfObjects as $key => $object) { $content = $object->getContent(); if ('' === $content) { continue; } foreach (self::$knownContentTypes as $method => $regex) { if (1 === preg_match($regex, $content, $matches)) { $this->{$method}($matches); break; } } } $this->fillGaps(); $this->labels = $this->labelFactory->getFromRaw($this->labelsRaw); $this->resetTempProperties(); }