/** * Récupération du texte à indexer dans l'archive */ function get_text($filename) { $this->zip = zip_open($filename); if ($this->zip) { while ($zip_entry = zip_read($this->zip)) { $t = array(); $tab = explode("/", dirname(zip_entry_name($zip_entry))); $type_images_doc_num = $tab[count($tab) - 1]; if ($type_images_doc_num == "X") { if (zip_entry_open($this->zip, $zip_entry, "r")) { $xmlGz = zip_entry_read($zip_entry, zip_entry_filesize($zip_entry)); $tmpfile = tempnam("/tmp", "ocr"); @file_put_contents($tmpfile, $xmlGz); ob_start(); readgzfile($tmpfile); $xml = ob_get_clean(); $xml_dom = new xml_dom($xml, "iso-8859-1"); $textBlocs = @$xml_dom->get_nodes("alto/Layout/Page/PrintSpace/TextBlock"); if ($textBlocs) { foreach ($textBlocs as $textBloc) { $textlines = $xml_dom->get_nodes("TextLine", $textBloc); foreach ($textlines as $textline) { $strings = $xml_dom->get_nodes("String", $textline); foreach ($strings as $string) { $attrs = $xml_dom->get_attributes($string); foreach ($attrs as $attr => $value) { if ($attr == 'CONTENT') { $texte_final .= " " . $value; } } } } } } } } } } return $texte_final; }
function rec_record($record) { global $charset, $base_path; $rec = new oai_record($record, $charset, $base_path . "/admin/connecteurs/in/oai/xslt", $this->metadata_prefix, $this->xslt_transform, $this->sets_names); $rec_uni = $rec->unimarc; if (!$rec->error) { //On a un enregistrement unimarc, on l'enregistre $rec_uni_dom = new xml_dom($rec_uni, $charset); if (!$rec_uni_dom->error) { //Initialisation $ref = ""; $ufield = ""; $usubfield = ""; $field_order = 0; $subfield_order = 0; $value = ""; $date_import = $rec->header["DATESTAMP"]; $fs = $rec_uni_dom->get_nodes("unimarc/notice/f"); //Recherche du 001 for ($i = 0; $i < count($fs); $i++) { if ($fs[$i]["ATTRIBS"]["c"] == "001") { $ref = $rec_uni_dom->get_datas($fs[$i]); break; } } //Mise à jour if ($ref) { //Si conservation des anciennes notices, on regarde si elle existe if (!$this->del_old) { $requete = "select count(*) from entrepot_source_" . $this->source_id . " where connector_id='" . addslashes($this->get_id()) . "' and ref='" . addslashes($ref) . "'"; $rref = pmb_mysql_query($requete); if ($rref) { $ref_exists = pmb_mysql_result($rref, 0, 0); } } //Si pas de conservation des anciennes notices, on supprime if ($this->del_old) { $requete = "delete from entrepot_source_" . $this->source_id . " where connector_id='" . addslashes($this->get_id()) . "' and ref='" . addslashes($ref) . "'"; pmb_mysql_query($requete); } //Si pas de conservation ou reférence inexistante if ($this->del_old || !$this->del_old && !$ref_exists) { //Insertion de l'entête $n_header["rs"] = $rec_uni_dom->get_value("unimarc/notice/rs"); $n_header["ru"] = $rec_uni_dom->get_value("unimarc/notice/ru"); $n_header["el"] = $rec_uni_dom->get_value("unimarc/notice/el"); $n_header["bl"] = $rec_uni_dom->get_value("unimarc/notice/bl"); $n_header["hl"] = $rec_uni_dom->get_value("unimarc/notice/hl"); $n_header["dt"] = $rec_uni_dom->get_value("unimarc/notice/dt"); //Récupération d'un ID $requete = "insert into external_count (recid) values('" . addslashes($this->get_id() . " " . $this->source_id . " " . $ref) . "')"; $rid = pmb_mysql_query($requete); if ($rid) { $recid = pmb_mysql_insert_id(); } foreach ($n_header as $hc => $code) { $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\n\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\n\t\t\t\t\t\t\t'" . $hc . "','',-1,0,'" . addslashes($code) . "','',{$recid})"; pmb_mysql_query($requete); } for ($i = 0; $i < count($fs); $i++) { $ufield = $fs[$i]["ATTRIBS"]["c"]; $field_order = $i; $ss = $rec_uni_dom->get_nodes("s", $fs[$i]); if (is_array($ss)) { for ($j = 0; $j < count($ss); $j++) { $usubfield = $ss[$j]["ATTRIBS"]["c"]; $value = $rec_uni_dom->get_datas($ss[$j]); $subfield_order = $j; $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\n\t\t\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\n\t\t\t\t\t\t\t\t\t'" . addslashes($ufield) . "','" . addslashes($usubfield) . "'," . $field_order . "," . $subfield_order . ",'" . addslashes($value) . "',\n\t\t\t\t\t\t\t\t\t' " . addslashes(strip_empty_words($value)) . " ',{$recid})"; pmb_mysql_query($requete); } } else { $value = $rec_uni_dom->get_datas($fs[$i]); $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\n\t\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\n\t\t\t\t\t\t\t\t'" . addslashes($ufield) . "','" . addslashes($usubfield) . "'," . $field_order . "," . $subfield_order . ",'" . addslashes($value) . "',\n\t\t\t\t\t\t\t\t' " . addslashes(strip_empty_words($value)) . " ',{$recid})"; pmb_mysql_query($requete); } } } $this->n_recu++; } } } }
function list_metadata_formats($identifier = "", $callback = "", $callback_progress = "") { $this->clear_error(); $url = $this->url_base . "?verb=ListMetadataFormats"; if ($identifier) { $url .= "&identifier=" . rawurlencode($identifier); } $this->send_request($url, $callback_progress); $metadatas = array(); if (!$this->error) { if (!$callback) { for ($i = 0; $i < count($this->prt->records); $i++) { $record = new xml_dom($this->prt->records[$i], $this->charset); if (!$record->error) { $m = array(); $m["PREFIX"] = $record->get_value("metadataFormat/metadataPrefix"); $m["SCHEMA"] = $record->get_value("metadataFormat/schema"); $m["NAMESPACE"] = $record->get_value("metadataFormat/metadataNamespace"); $metadatas[] = $m; } } if ($identifier == "") { $this->metadatas = $metadatas; } } } return $metadatas; }
public function rec_record($record = array()) { global $dbh, $charset, $base_path; $xml = new DOMDocument('1.0', 'utf-8'); //$xml->formatOutput = true; $xml_rec = $xml->createElement('record'); $xml->appendChild($xml_rec); $xml_rec->setAttribute('key', $record['zapi:key']); $xml_rec->setAttribute('version', $record['zapi:version']); if (is_array($record['content']) && count($record['content'])) { foreach ($record['content'] as $k => $v) { $this->recurse_record($xml, $xml_rec, $k, $v); } } if (is_array($record['attachments']) && count($record['attachments'])) { $xml_atts = $xml->createElement('attachments'); $xml_rec->appendChild($xml_atts); foreach ($record['attachments'] as $k => $attachment) { $xml_att = $xml->createElement('attachment'); $xml_atts->appendChild($xml_att); foreach ($attachment['content'] as $k1 => $v1) { $xml_att->setAttribute('zapi:key', $attachment['zapi:key']); $xml_att->setAttribute('zapi:version', $record['zapi:version']); if ($record['url']) { $new_elt = $xml->createElement('url', $record['url']); $xml_att->appendChild($new_elt); } $this->recurse_record($xml, $xml_att, $k1, $v1); } } } $in = $xml->saveXML(); $xsl_filename = $base_path . '/admin/connecteurs/in/zotero/xslt/zotero_atom_json.xsl'; $proc = new XSLTProcessor(); $xslDoc = new DOMDocument(); $xslDoc->load($xsl_filename); $proc->registerPHPFunctions(); $proc->importStylesheet($xslDoc); $out = $proc->transformToXml($xml); $ref = 0; if ($out) { //On a un enregistrement unimarc, on l'enregistre $rec_uni_dom = new xml_dom($out, $charset); if (!$rec_uni_dom->error) { //Initialisation $ref = ""; $ufield = ""; $usubfield = ""; $field_order = 0; $subfield_order = 0; $value = ""; $date_import = date('Y-m-d H:i:s'); $fs = $rec_uni_dom->get_nodes("unimarc/notice/f"); //Recherche du 001 for ($i = 0; $i < count($fs); $i++) { if ($fs[$i]["ATTRIBS"]["c"] == "001") { $ref = $rec_uni_dom->get_datas($fs[$i]); break; } } //Mise à jour if ($ref) { //Suppression anciennes notices $q = "delete from entrepot_source_" . $this->source_id . " where ref='" . addslashes($ref) . "'"; @pmb_mysql_query($q, $dbh); //Insertion de l'entête $n_header["rs"] = $rec_uni_dom->get_value("unimarc/notice/rs"); $n_header["ru"] = $rec_uni_dom->get_value("unimarc/notice/ru"); $n_header["el"] = $rec_uni_dom->get_value("unimarc/notice/el"); $n_header["bl"] = $rec_uni_dom->get_value("unimarc/notice/bl"); $n_header["hl"] = $rec_uni_dom->get_value("unimarc/notice/hl"); $n_header["dt"] = $rec_uni_dom->get_value("unimarc/notice/dt"); //Récupération d'un ID $requete = "insert into external_count (recid, source_id) values('" . addslashes($this->get_id() . " " . $this->source_id . " " . $ref) . "', " . $this->source_id . ")"; $rid = pmb_mysql_query($requete); if ($rid) { $recid = pmb_mysql_insert_id(); } foreach ($n_header as $hc => $code) { $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\r\n\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\r\n\t\t\t\t\t\t'" . $hc . "','',-1,0,'" . addslashes($code) . "','',{$recid})"; pmb_mysql_query($requete); } for ($i = 0; $i < count($fs); $i++) { $ufield = $fs[$i]["ATTRIBS"]["c"]; $field_order = $i; $ss = $rec_uni_dom->get_nodes("s", $fs[$i]); if (is_array($ss)) { for ($j = 0; $j < count($ss); $j++) { $usubfield = $ss[$j]["ATTRIBS"]["c"]; $value = $rec_uni_dom->get_datas($ss[$j]); $subfield_order = $j; $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\r\n\t\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\r\n\t\t\t\t\t\t\t\t'" . addslashes($ufield) . "','" . addslashes($usubfield) . "'," . $field_order . "," . $subfield_order . ",'" . addslashes($value) . "',\r\n\t\t\t\t\t\t\t\t' " . addslashes(strip_empty_words($value)) . " ',{$recid})"; pmb_mysql_query($requete); } } else { $value = $rec_uni_dom->get_datas($fs[$i]); $requete = "insert into entrepot_source_" . $this->source_id . " (connector_id,source_id,ref,date_import,ufield,usubfield,field_order,subfield_order,value,i_value,recid) values(\r\n\t\t\t\t\t\t\t'" . addslashes($this->get_id()) . "'," . $this->source_id . ",'" . addslashes($ref) . "','" . addslashes($date_import) . "',\r\n\t\t\t\t\t\t\t'" . addslashes($ufield) . "','" . addslashes($usubfield) . "'," . $field_order . "," . $subfield_order . ",'" . addslashes($value) . "',\r\n\t\t\t\t\t\t\t' " . addslashes(strip_empty_words($value)) . " ',{$recid})"; pmb_mysql_query($requete); } } } } } return $ref; }
function rec_records($noticesxml, $source_id, $search_id) { global $charset, $base_path; if (!trim($noticesxml)) { return; } $rec_uni_dom = new xml_dom($noticesxml, $charset); $notices = $rec_uni_dom->get_nodes("unimarc/notice"); foreach ($notices as $anotice) { $this->rec_record($rec_uni_dom, $anotice, $source_id, $search_id); } }