public function run($args)
 {
     define("VERYFAST", true);
     if (count($args) == 0) {
         die("./yii recalc_documents [Dokument-ID|alle]\n");
     }
     if ($args[0] == "alle") {
         $sql = Yii::app()->db->createCommand();
         $sql->select("id")->from("dokumente")->where("id >= 579866")->order("id");
         $data = $sql->queryColumn(["id"]);
     } else {
         $data = [IntVal($args[0])];
     }
     $anz = count($data);
     foreach ($data as $nr => $dok_id) {
         echo "{$nr} / {$anz} => {$dok_id}\n";
         /** @var Dokument $dokument */
         $dokument = Dokument::model()->findByPk($dok_id);
         if (!$dokument) {
             continue;
         }
         $dokument->download_if_necessary();
         $dokument->geo_extract();
         $absolute_filename = $dokument->getLocalPath();
         $metadata = RISPDF2Text::document_pdf_metadata($absolute_filename);
         $dokument->seiten_anzahl = $metadata["seiten"];
         $dokument->datum_dokument = $metadata["datum"];
         $dokument->save();
         echo $dokument->id . " => " . $dokument->seiten_anzahl . " / " . $dokument->datum_dokument . "\n";
     }
 }
 /**
  */
 public function download_and_parse()
 {
     $this->download_if_necessary();
     $absolute_filename = $this->getLocalPath();
     if (!file_exists($absolute_filename)) {
         echo "Not Found: " . $this->id . "\n";
         return;
     }
     $y = explode(".", $this->url);
     $endung = mb_strtolower($y[count($y) - 1]);
     $metadata = RISPDF2Text::document_pdf_metadata($absolute_filename);
     $this->seiten_anzahl = $metadata["seiten"];
     $this->datum_dokument = $metadata["datum"];
     if ($this->datum_dokument == "") {
         $this->datum_dokument = null;
     }
     if ($endung == "pdf") {
         $this->text_pdf = RISPDF2Text::document_text_pdf($absolute_filename);
     } else {
         $this->text_pdf = "";
     }
     $this->text_ocr_raw = RISPDF2Text::document_text_ocr($absolute_filename, $this->seiten_anzahl);
     $this->text_ocr_corrected = RISPDF2Text::ris_ocr_clean($this->text_ocr_raw);
     $this->ocr_von = Dokument::$OCR_VON_TESSERACT;
     copy($absolute_filename, OMNIPAGE_PDF_DIR . $this->id . "." . $endung);
 }