Exemplo n.º 1
0
 /**
  * Récupération du texte à indexer dans l'archive
  */
 function get_text($filename)
 {
     $this->zip = zip_open($filename);
     if ($this->zip) {
         while ($zip_entry = zip_read($this->zip)) {
             $t = array();
             $tab = explode("/", dirname(zip_entry_name($zip_entry)));
             $type_images_doc_num = $tab[count($tab) - 1];
             if ($type_images_doc_num == "X") {
                 if (zip_entry_open($this->zip, $zip_entry, "r")) {
                     $xmlGz = zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
                     $tmpfile = tempnam("/tmp", "ocr");
                     @file_put_contents($tmpfile, $xmlGz);
                     ob_start();
                     readgzfile($tmpfile);
                     $xml = ob_get_clean();
                     $xml_dom = new xml_dom($xml, "iso-8859-1");
                     $textBlocs = @$xml_dom->get_nodes("alto/Layout/Page/PrintSpace/TextBlock");
                     if ($textBlocs) {
                         foreach ($textBlocs as $textBloc) {
                             $textlines = $xml_dom->get_nodes("TextLine", $textBloc);
                             foreach ($textlines as $textline) {
                                 $strings = $xml_dom->get_nodes("String", $textline);
                                 foreach ($strings as $string) {
                                     $attrs = $xml_dom->get_attributes($string);
                                     foreach ($attrs as $attr => $value) {
                                         if ($attr == 'CONTENT') {
                                             $texte_final .= " " . $value;
                                         }
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     return $texte_final;
 }
Exemplo n.º 2
0
 function rec_record($record)
 {
     global $charset, $base_path;
     $rec = new oai_record($record, $charset, $base_path . "/admin/connecteurs/in/oai/xslt", $this->metadata_prefix, $this->xslt_transform, $this->sets_names);
     $rec_uni = $rec->unimarc;
     if (!$rec->error) {
         //On a un enregistrement unimarc, on l'enregistre
         $rec_uni_dom = new xml_dom($rec_uni, $charset);
         if (!$rec_uni_dom->error) {
             //Initialisation
             $ref = "";
             $ufield = "";
             $usubfield = "";
             $field_order = 0;
             $subfield_order = 0;
             $value = "";
             $date_import = $rec->header["DATESTAMP"];
             $fs = $rec_uni_dom->get_nodes("unimarc/notice/f");
             //Recherche du 001
             for ($i = 0; $i < count($fs); $i++) {
                 if ($fs[$i]["ATTRIBS"]["c"] == "001") {
                     $ref = $rec_uni_dom->get_datas($fs[$i]);
                     break;
                 }
             }
             //Mise à jour
             if ($ref) {
                 //Si conservation des anciennes notices, on regarde si elle existe
                 if (!$this->del_old) {
                     $requete = "select count(*) from entrepot_source_" . $this->source_id . " where connector_id='" . addslashes($this->get_id()) . "' and ref='" . addslashes($ref) . "'";
                     $rref = pmb_mysql_query($requete);
                     if ($rref) {
                         $ref_exists = pmb_mysql_result($rref, 0, 0);
                     }
                 }
                 //Si pas de conservation des anciennes notices, on supprime
                 if ($this->del_old) {
                     $requete = "delete from entrepot_source_" . $this->source_id . " where connector_id='" . addslashes($this->get_id()) . "' and ref='" . addslashes($ref) . "'";
                     pmb_mysql_query($requete);
                 }
                 //Si pas de conservation ou reférence inexistante
                 if ($this->del_old || !$this->del_old && !$ref_exists) {
                     //Insertion de l'entête
                     $n_header["rs"] = $rec_uni_dom->get_value("unimarc/notice/rs");
                     $n_header["ru"] = $rec_uni_dom->get_value("unimarc/notice/ru");
                     $n_header["el"] = $rec_uni_dom->get_value("unimarc/notice/el");
                     $n_header["bl"] = $rec_uni_dom->get_value("unimarc/notice/bl");
                     $n_header["hl"] = $rec_uni_dom->get_value("unimarc/notice/hl");
                     $n_header["dt"] = $rec_uni_dom->get_value("unimarc/notice/dt");
                     //Récupération d'un ID
                     $requete = "insert into external_count (recid) values('" . addslashes($this->get_id() . " " . $this->source_id . " " . $ref) . "')";
                     $rid = pmb_mysql_query($requete);
                     if ($rid) {
                         $recid = pmb_mysql_insert_id();
                     }
                     foreach ($n_header as $hc => $code) {
                         $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\n\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\n\t\t\t\t\t\t\t'" . $hc . "','',-1,0,'" . addslashes($code) . "','',{$recid})";
                         pmb_mysql_query($requete);
                     }
                     for ($i = 0; $i < count($fs); $i++) {
                         $ufield = $fs[$i]["ATTRIBS"]["c"];
                         $field_order = $i;
                         $ss = $rec_uni_dom->get_nodes("s", $fs[$i]);
                         if (is_array($ss)) {
                             for ($j = 0; $j < count($ss); $j++) {
                                 $usubfield = $ss[$j]["ATTRIBS"]["c"];
                                 $value = $rec_uni_dom->get_datas($ss[$j]);
                                 $subfield_order = $j;
                                 $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\n\t\t\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\n\t\t\t\t\t\t\t\t\t'" . addslashes($ufield) . "','" . addslashes($usubfield) . "'," . $field_order . "," . $subfield_order . ",'" . addslashes($value) . "',\n\t\t\t\t\t\t\t\t\t' " . addslashes(strip_empty_words($value)) . " ',{$recid})";
                                 pmb_mysql_query($requete);
                             }
                         } else {
                             $value = $rec_uni_dom->get_datas($fs[$i]);
                             $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\n\t\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\n\t\t\t\t\t\t\t\t'" . addslashes($ufield) . "','" . addslashes($usubfield) . "'," . $field_order . "," . $subfield_order . ",'" . addslashes($value) . "',\n\t\t\t\t\t\t\t\t' " . addslashes(strip_empty_words($value)) . " ',{$recid})";
                             pmb_mysql_query($requete);
                         }
                     }
                 }
                 $this->n_recu++;
             }
         }
     }
 }
Exemplo n.º 3
0
 function list_metadata_formats($identifier = "", $callback = "", $callback_progress = "")
 {
     $this->clear_error();
     $url = $this->url_base . "?verb=ListMetadataFormats";
     if ($identifier) {
         $url .= "&identifier=" . rawurlencode($identifier);
     }
     $this->send_request($url, $callback_progress);
     $metadatas = array();
     if (!$this->error) {
         if (!$callback) {
             for ($i = 0; $i < count($this->prt->records); $i++) {
                 $record = new xml_dom($this->prt->records[$i], $this->charset);
                 if (!$record->error) {
                     $m = array();
                     $m["PREFIX"] = $record->get_value("metadataFormat/metadataPrefix");
                     $m["SCHEMA"] = $record->get_value("metadataFormat/schema");
                     $m["NAMESPACE"] = $record->get_value("metadataFormat/metadataNamespace");
                     $metadatas[] = $m;
                 }
             }
             if ($identifier == "") {
                 $this->metadatas = $metadatas;
             }
         }
     }
     return $metadatas;
 }
Exemplo n.º 4
0
 public function rec_record($record = array())
 {
     global $dbh, $charset, $base_path;
     $xml = new DOMDocument('1.0', 'utf-8');
     //$xml->formatOutput = true;
     $xml_rec = $xml->createElement('record');
     $xml->appendChild($xml_rec);
     $xml_rec->setAttribute('key', $record['zapi:key']);
     $xml_rec->setAttribute('version', $record['zapi:version']);
     if (is_array($record['content']) && count($record['content'])) {
         foreach ($record['content'] as $k => $v) {
             $this->recurse_record($xml, $xml_rec, $k, $v);
         }
     }
     if (is_array($record['attachments']) && count($record['attachments'])) {
         $xml_atts = $xml->createElement('attachments');
         $xml_rec->appendChild($xml_atts);
         foreach ($record['attachments'] as $k => $attachment) {
             $xml_att = $xml->createElement('attachment');
             $xml_atts->appendChild($xml_att);
             foreach ($attachment['content'] as $k1 => $v1) {
                 $xml_att->setAttribute('zapi:key', $attachment['zapi:key']);
                 $xml_att->setAttribute('zapi:version', $record['zapi:version']);
                 if ($record['url']) {
                     $new_elt = $xml->createElement('url', $record['url']);
                     $xml_att->appendChild($new_elt);
                 }
                 $this->recurse_record($xml, $xml_att, $k1, $v1);
             }
         }
     }
     $in = $xml->saveXML();
     $xsl_filename = $base_path . '/admin/connecteurs/in/zotero/xslt/zotero_atom_json.xsl';
     $proc = new XSLTProcessor();
     $xslDoc = new DOMDocument();
     $xslDoc->load($xsl_filename);
     $proc->registerPHPFunctions();
     $proc->importStylesheet($xslDoc);
     $out = $proc->transformToXml($xml);
     $ref = 0;
     if ($out) {
         //On a un enregistrement unimarc, on l'enregistre
         $rec_uni_dom = new xml_dom($out, $charset);
         if (!$rec_uni_dom->error) {
             //Initialisation
             $ref = "";
             $ufield = "";
             $usubfield = "";
             $field_order = 0;
             $subfield_order = 0;
             $value = "";
             $date_import = date('Y-m-d H:i:s');
             $fs = $rec_uni_dom->get_nodes("unimarc/notice/f");
             //Recherche du 001
             for ($i = 0; $i < count($fs); $i++) {
                 if ($fs[$i]["ATTRIBS"]["c"] == "001") {
                     $ref = $rec_uni_dom->get_datas($fs[$i]);
                     break;
                 }
             }
             //Mise à jour
             if ($ref) {
                 //Suppression anciennes notices
                 $q = "delete from entrepot_source_" . $this->source_id . " where ref='" . addslashes($ref) . "'";
                 @pmb_mysql_query($q, $dbh);
                 //Insertion de l'entête
                 $n_header["rs"] = $rec_uni_dom->get_value("unimarc/notice/rs");
                 $n_header["ru"] = $rec_uni_dom->get_value("unimarc/notice/ru");
                 $n_header["el"] = $rec_uni_dom->get_value("unimarc/notice/el");
                 $n_header["bl"] = $rec_uni_dom->get_value("unimarc/notice/bl");
                 $n_header["hl"] = $rec_uni_dom->get_value("unimarc/notice/hl");
                 $n_header["dt"] = $rec_uni_dom->get_value("unimarc/notice/dt");
                 //Récupération d'un ID
                 $requete = "insert into external_count (recid, source_id) values('" . addslashes($this->get_id() . " " . $this->source_id . " " . $ref) . "', " . $this->source_id . ")";
                 $rid = pmb_mysql_query($requete);
                 if ($rid) {
                     $recid = pmb_mysql_insert_id();
                 }
                 foreach ($n_header as $hc => $code) {
                     $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\r\n\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\r\n\t\t\t\t\t\t'" . $hc . "','',-1,0,'" . addslashes($code) . "','',{$recid})";
                     pmb_mysql_query($requete);
                 }
                 for ($i = 0; $i < count($fs); $i++) {
                     $ufield = $fs[$i]["ATTRIBS"]["c"];
                     $field_order = $i;
                     $ss = $rec_uni_dom->get_nodes("s", $fs[$i]);
                     if (is_array($ss)) {
                         for ($j = 0; $j < count($ss); $j++) {
                             $usubfield = $ss[$j]["ATTRIBS"]["c"];
                             $value = $rec_uni_dom->get_datas($ss[$j]);
                             $subfield_order = $j;
                             $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\r\n\t\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\r\n\t\t\t\t\t\t\t\t'" . addslashes($ufield) . "','" . addslashes($usubfield) . "'," . $field_order . "," . $subfield_order . ",'" . addslashes($value) . "',\r\n\t\t\t\t\t\t\t\t' " . addslashes(strip_empty_words($value)) . " ',{$recid})";
                             pmb_mysql_query($requete);
                         }
                     } else {
                         $value = $rec_uni_dom->get_datas($fs[$i]);
                         $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\r\n\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\r\n\t\t\t\t\t\t\t'" . addslashes($ufield) . "','" . addslashes($usubfield) . "'," . $field_order . "," . $subfield_order . ",'" . addslashes($value) . "',\r\n\t\t\t\t\t\t\t' " . addslashes(strip_empty_words($value)) . " ',{$recid})";
                         pmb_mysql_query($requete);
                     }
                 }
             }
         }
     }
     return $ref;
 }
Exemplo n.º 5
0
 function rec_records($noticesxml, $source_id, $search_id)
 {
     global $charset, $base_path;
     if (!trim($noticesxml)) {
         return;
     }
     $rec_uni_dom = new xml_dom($noticesxml, $charset);
     $notices = $rec_uni_dom->get_nodes("unimarc/notice");
     foreach ($notices as $anotice) {
         $this->rec_record($rec_uni_dom, $anotice, $source_id, $search_id);
     }
 }