/** */ public function download_and_parse() { $this->download_if_necessary(); $absolute_filename = $this->getLocalPath(); if (!file_exists($absolute_filename)) { echo "Not Found: " . $this->id . "\n"; return; } $y = explode(".", $this->url); $endung = mb_strtolower($y[count($y) - 1]); $metadata = RISPDF2Text::document_pdf_metadata($absolute_filename); $this->seiten_anzahl = $metadata["seiten"]; $this->datum_dokument = $metadata["datum"]; if ($this->datum_dokument == "") { $this->datum_dokument = null; } if ($endung == "pdf") { $this->text_pdf = RISPDF2Text::document_text_pdf($absolute_filename); } else { $this->text_pdf = ""; } $this->text_ocr_raw = RISPDF2Text::document_text_ocr($absolute_filename, $this->seiten_anzahl); $this->text_ocr_corrected = RISPDF2Text::ris_ocr_clean($this->text_ocr_raw); $this->ocr_von = Dokument::$OCR_VON_TESSERACT; copy($absolute_filename, OMNIPAGE_PDF_DIR . $this->id . "." . $endung); }