private function fetchAndImport($pa_item_queue, $po_client, $pa_config, $pa_tables, $ps_code) { if (!is_array($pa_tables)) { $pa_tables = array(); } $t_rel_type = new ca_relationship_types(); $vs_base_url = $pa_config['baseUrl']; $o_dm = Datamodel::load(); $t_locale = new ca_locales(); $t_list = new ca_lists(); $vn_source_id = $t_list->getItemIDFromList('object_sources', $pa_config['code']); $pn_rep_type_id = $t_list->getItemIDFromList('object_representation_types', 'front'); foreach ($pa_item_queue as $vn_i => $va_item) { $vs_table = $va_item['table']; $va_import_relationships_from = $pa_config['importRelatedFor'][$va_item['table']]; print "oo"; print_r($va_import_relationships_from); $vn_id = $va_item['id']; if (!$vn_id) { print "[Notice] SKIP CAUSE NO ID ({$ps_code})\n"; continue; } if (isset($this->opa_processed_records[$vs_table . '/' . $vn_id])) { continue; } $vs_idno = trim((string) $va_item['idno']); try { $o_xml = $po_client->getItem($vs_table, $vn_id)->get(); } catch (exception $e) { print "[ERROR] While trying to get item information: " . $e->getMessage() . "\n"; continue; } $o_item = $o_xml->getItem; $t_instance = $o_dm->getInstanceByTableName($vs_table, false); $t_instance_label = $t_instance->getLabelTableInstance(); // Look for existing record $vb_skip = false; $vb_update = false; $vs_label_fld = $t_instance->getLabelDisplayField(); $vs_label = (string) $o_item->preferred_labels->en_US->{$vs_label_fld}; print "[Notice] Processing [{$vs_table}] {$vs_label} [{$vs_idno}] ({$ps_code})\n"; $t_instance_label->clear(); if ($vs_idno && ($vs_table == 'ca_objects' && $t_instance->load(array('idno' => $vs_idno)) || $vs_table != 'ca_objects' && $t_instance->load(array('idno' => $vs_idno)))) { if ($t_instance->hasField('deleted') && $t_instance->get('deleted') == 1) { $t_instance->set('deleted', 0); } //print "[Notice] Update [{$vs_idno}] for {$vs_table} 'cause it already exists ({$ps_code})\n"; if (!$t_instance->getPrimaryKey()) { $vb_skip = true; print "[ERROR] Could not load instance for [{$vs_idno}]\n"; } $vb_update = true; $t_instance->setMode(ACCESS_WRITE); // Clear labels $t_instance->removeAllLabels(); if ($t_instance->numErrors()) { print "[ERROR] Could not remove labels for updating: " . join("; ", $t_instance->getErrors()) . "\n"; } // Clear attributes $t_instance->removeAttributes(null, array('dontCheckMinMax' => true)); if ($t_instance->numErrors()) { print "[ERROR] Could not remove attributes for updating: " . join("; ", $t_instance->getErrors()) . "\n"; } // Clear relationships if (is_array($va_import_relationships_from)) { foreach ($va_import_relationships_from as $vs_rel_table => $va_table_info) { $t_instance->removeRelationships($vs_rel_table); if ($t_instance->numErrors()) { print "[ERROR] Could not remove {$vs_rel_table} relationships for updating: " . join("; ", $t_instance->getErrors()) . "\n"; } } } if ($t_instance->tableName() == 'ca_objects') { //$t_instance->set('source_id', $vn_source_id); } $t_instance->update(); if ($t_instance->numErrors()) { print "[ERROR] Could not clear record for updating: " . join("; ", $t_instance->getErrors()) . "\n"; } } // create new one if (!$vb_update) { $t_instance->clear(); if ($t_instance->tableName() == 'ca_objects') { //$t_instance->set('source_id', $vn_source_id); } } $t_instance->setMode(ACCESS_WRITE); // add intrinsics switch ($vs_table) { case 'ca_collections': $va_intrinsics = array('status', 'access', 'idno'); break; case 'ca_occurrences': $va_intrinsics = array('status', 'access', 'idno'); break; case 'ca_objects': $va_intrinsics = array('status', 'access', 'idno'); break; case 'ca_entities': $va_intrinsics = array('status', 'access', 'lifespan', 'source_id', 'idno'); break; case 'ca_object_lots': $va_intrinsics = array('status', 'access', 'idno_stub'); break; default: $va_intrinsics = array('status', 'access', 'idno'); break; } // TODO: Need to properly handle foreign-key intrinsics when the item they point to doesn't exist // eg. source_id fields, various ca_objects and ca_object_lots intrinsics, etc. if ($vs_table == 'ca_list_items') { // does list exist? $vs_list_code = (string) $o_item->{'list_code'}; $t_list = new ca_lists(); if (!$t_list->load(array('list_code' => $vs_list_code))) { // create list $t_list->setMode(ACCESS_WRITE); // TODO: should we bother to replicate the is_hierarchical, use_as_vocabulary and default_sort settings via a service? // For now just set reasonable values $t_list->set('list_code', $vs_list_code); $t_list->set('is_hierarchical', 1); $t_list->set('use_as_vocabulary', 1); $t_list->set('default_sort', 0); $t_list->insert(); if ($t_list->numErrors()) { print "[ERROR] Could not insert new list '{$vs_list_code}': " . join('; ', $t_list->getErrors()) . "\n"; } else { $t_list->addLabel(array('name' => $vs_list_code), $pn_locale_id, null, true); if ($t_list->numErrors()) { print "[ERROR] Could not add label to new list '{$vs_list_code}': " . join('; ', $t_list->getErrors()) . "\n"; } } } $t_instance->set('list_id', $t_list->getPrimaryKey()); } foreach ($va_intrinsics as $vs_f) { $t_instance->set($vs_f, $o_item->{$vs_f}); } if (!$vb_update) { $vn_type_id = $t_instance->getTypeIDForCode((string) $o_item->type_id); if (!$vn_type_id) { print "NO TYPE FOR {$vs_table}/" . $o_item->type_id . "\n"; } $t_instance->set('type_id', $vn_type_id); if ($t_instance->tableName() == 'ca_objects') { //$t_instance->set('source_id', $vn_source_id); } // TODO: add hook onBeforeInsert() $t_instance->insert(); // TODO: add hook onInsert() if ($t_instance->numErrors()) { print "[ERROR] Could not insert record: " . join('; ', $t_instance->getErrors()) . "\n"; } } // add attributes // TODO: make this configurable $va_codes = $t_instance->getApplicableElementCodes(); // $va_codes = array( // 'description', // 'georeference', 'geonames', 'internal_notes', // 'oclc_number', 'file_name', // 'digitized_by', 'digitized_date', 'call_number', // 'other_call_number', 'collection_title', 'collection_number', // 'box_number', 'folder_number', 'volume_number', 'page_number', 'shelf', // 'pulled_digitization', 'pulled_name', 'pulled_date', 'returned_digitization', // 'returned_name', 'returned_date', 'needs_redigitization', 'donor', 'copyright_holder', // 'reproduction_restrictions', 'administrative_notes', 'date_view', 'date_item', // 'view_format', 'item_format', 'dimensions', 'map_scale', 'image_description', 'address', // 'lcsh_terms', 'inscription' // // ); foreach ($va_codes as $vs_code) { $t_element = $t_instance->_getElementInstance($vs_code); switch ($t_element->get('datatype')) { case 0: // container $va_elements = $t_element->getElementsInSet(); $o_attr = $o_item->{'ca_attribute_' . $vs_code}; foreach ($o_attr as $va_tag => $o_tags) { foreach ($o_tags as $vs_locale => $o_values) { if (!($vn_locale_id = $t_locale->localeCodeToID($vs_locale))) { $vn_locale_id = null; } $va_container_data = array('locale_id' => $vn_locale_id); foreach ($o_values as $o_value) { foreach ($va_elements as $vn_i => $va_element_info) { if ($va_element_info['datatype'] == 0) { continue; } if ($vs_value = trim((string) $o_value->{$va_element_info['element_code']})) { switch ($va_element_info['datatype']) { case 3: //list $va_tmp = explode(":", $vs_value); //<item_id>:<item_idno> //print "CONTAINER LIST CODE=".$va_tmp[1]."/$vs_value/".$va_element_info['list_id']."\n"; $va_container_data[$va_element_info['element_code']] = $t_list->getItemIDFromList($va_element_info['list_id'], $va_tmp[1]); break; default: $va_container_data[$va_element_info['element_code']] = $vs_value; break; } } } $t_instance->replaceAttribute($va_container_data, $vs_code); } } } break; case 3: // list $o_attr = $o_item->{'ca_attribute_' . $vs_code}; foreach ($o_attr as $va_tag => $o_tags) { foreach ($o_tags as $vs_locale => $o_values) { if (!($vn_locale_id = $t_locale->localeCodeToID($vs_locale))) { $vn_locale_id = null; } foreach ($o_values as $o_value) { if ($vs_value = trim((string) $o_value->{$vs_code})) { $va_tmp = explode(":", $vs_value); //<item_id>:<item_idno> // TODO: create lists and list items if they don't already exist if ($vn_item_id = $t_list->getItemIDFromList($t_element->get('list_id'), $va_tmp[1])) { $t_instance->replaceAttribute(array($vs_code => $vn_item_id, 'locale_id' => $vn_locale_id), $vs_code); } } } } } break; case 15: // File // File case 16: // Media $t_instance->update(); if ($t_instance->numErrors()) { print "[ERROR] Could not update record before media: " . join('; ', $t_instance->getErrors()) . "\n"; } // TODO: detect if media has changes and only pull if it has $o_attr = $o_item->{'ca_attribute_' . $vs_code}; foreach ($o_attr as $va_tag => $o_tags) { foreach ($o_tags as $vs_locale => $o_values) { if (!($vn_locale_id = $t_locale->localeCodeToID($vs_locale))) { $vn_locale_id = null; } foreach ($o_values as $o_value) { if ($vs_value = trim((string) $o_value->{$vs_code})) { $t_instance->replaceAttribute(array($vs_code => $vs_value, 'locale_id' => $vn_locale_id), $vs_code); } } } } $t_instance->update(); if ($t_instance->numErrors()) { print "[ERROR] Could not update record after media: " . join('; ', $t_instance->getErrors()) . "\n"; } break; default: $o_attr = $o_item->{'ca_attribute_' . $vs_code}; foreach ($o_attr as $va_tag => $o_tags) { foreach ($o_tags as $vs_locale => $o_values) { if (!($vn_locale_id = $t_locale->localeCodeToID($vs_locale))) { $vn_locale_id = null; } foreach ($o_values as $o_value) { if ($vs_value = trim((string) $o_value->{$vs_code})) { $t_instance->replaceAttribute(array($vs_code => $vs_value, 'locale_id' => $vn_locale_id), $vs_code); } } } } break; } } $t_instance->update(); if ($t_instance->numErrors()) { print "[ERROR] Could not update [1] record: " . join('; ', $t_instance->getErrors()) . "\n"; } // TODO: add hook onBeforeUpdate() $t_instance->update(); // TODO: add hook onUpdate() if ($t_instance->numErrors()) { print "[ERROR] Could not update [2] record: " . join('; ', $t_instance->getErrors()) . "\n"; } // get label fields $va_label_data = array(); foreach ($t_instance->getLabelUIFields() as $vs_field) { if (!($va_label_data[$vs_field] = $o_item->preferred_labels->en_US->{$vs_field})) { $va_label_data[$vs_field] = $o_item->preferred_labels->en_US->{$vs_field}; } } // TODO: add hook onBeforeAddLabel() $t_instance->addLabel($va_label_data, 1, null, true); // TODO: add hook onAddLabel() if ($t_instance->numErrors()) { print "ERROR adding label: " . join('; ', $t_instance->getErrors()) . "\n"; } $this->opa_processed_records[$va_item['table'] . '/' . (int) $va_item['id']] = $t_instance->getPrimaryKey(); if ($vb_skip) { continue; } if (!is_array($va_import_relationships_from)) { continue; } $pa_tables[$va_item['table']] = true; // Are there relationships? $pb_imported_self_relations = false; print_r($va_import_relationships_from); foreach ($va_import_relationships_from as $vs_rel_table => $va_table_info) { $vb_is_self_relation = $vs_rel_table == $t_instance->tableName() && !$pb_imported_self_relations ? true : false; if (!$pa_tables[$vs_rel_table] || $vb_is_self_relation) { // load related records recursively if ($vs_rel_table == $t_instance->tableName()) { $pb_imported_self_relations = true; } if ($o_item->{'related_' . $vs_rel_table}) { $t_rel = $o_dm->getInstanceByTableName($vs_rel_table, false); // TODO: add hook onBeforeAddRelationships() foreach ($o_item->{'related_' . $vs_rel_table} as $vs_tag => $o_related_items) { foreach ($o_related_items as $vs_i => $o_related_item) { if (is_array($pa_config['importRelatedFor'][$va_item['table']][$vs_rel_table])) { $va_rel_types = array_keys($pa_config['importRelatedFor'][$va_item['table']][$vs_rel_table]); if (is_array($va_rel_types) && sizeof($va_rel_types) && !in_array((string) $o_related_item->relationship_type_code, $va_rel_types)) { print "[INFO] Skipped relationship for {$vs_display_name} because type='" . (string) $o_related_item->relationship_type_code . "' is excluded\n"; continue; } } $vs_pk = $t_rel->primaryKey(); $vn_id = (int) $o_related_item->{$vs_pk}; $va_queue = array($vs_rel_table . "/" . $vn_id => array('table' => $vs_rel_table, 'id' => $vn_id, 'idno' => (string) $o_related_item->idno)); // TODO: Add from/until support $this->fetchAndImport($va_queue, $po_client, $pa_config, $pa_tables, $ps_code); $vn_rel_record_id = $this->opa_processed_records[$vs_rel_table . '/' . (int) $vn_id]; $vb_skip = false; if ($vb_is_self_relation) { if ($this->opa_processed_self_relations[$vs_rel_table][$vn_rel_record_id][$t_instance->getPrimaryKey()][(string) $o_related_item->relationship_type_code] || $this->opa_processed_self_relations[$vs_rel_table][$t_instance->getPrimaryKey()][$vn_rel_record_id][(string) $o_related_item->relationship_type_code]) { $vb_skip = true; } else { $this->opa_processed_self_relations[$vs_rel_table][$t_instance->getPrimaryKey()][$vn_rel_record_id][(string) $o_related_item->relationship_type_code] = $this->opa_processed_self_relations[$vs_rel_table][$vn_rel_record_id][$t_instance->getPrimaryKey()][(string) $o_related_item->relationship_type_code] = true; } } if (!$vb_skip) { $t_instance->addRelationship($vs_rel_table, $vn_rel_record_id, (string) $o_related_item->relationship_type_code); if ($t_instance->numErrors()) { print "[ERROR] Could not add relationship to {$vs_rel_table} for row_id={$vn_rel_record_id}: " . join('; ', $t_instance->getErrors()) . "\n"; } } } } // TODO: add hook onAddRelationships() } } } // Is there media? if ($t_instance->tableName() == 'ca_objects') { try { $o_rep_xml = $po_client->getObjectRepresentations((int) $va_item['id'], array('large', 'original'))->get(); } catch (exception $e) { print "[ERROR] While getting object representations: " . $e->getMessage() . "\n"; } $va_existing_reps = $t_instance->getRepresentations(array('large', 'original')); $va_existing_md5s = array(); $va_rep_ids = array(); $va_dupe_reps = array(); foreach ($va_existing_reps as $va_rep) { if ($va_existing_md5s[$va_rep['info']['original']['MD5']] && $va_existing_md5s[$va_rep['info']['large']['MD5']]) { // dupe $va_dupe_reps[] = $va_rep['representation_id']; continue; } $va_existing_md5s[$va_rep['info']['original']['MD5']] = $va_rep['representation_id']; $va_existing_md5s[$va_rep['info']['large']['MD5']] = $va_rep['representation_id']; $va_rep_ids[] = $va_rep['representation_id']; } if ($o_rep_xml->getObjectRepresentations) { foreach ($o_rep_xml->getObjectRepresentations as $vs_x => $o_reps) { foreach ($o_reps as $vs_key => $o_rep) { if ($vs_url = trim((string) $o_rep->urls->large)) { $vs_remote_original_md5 = (string) $o_rep->info->original->MD5; $vs_remote_large_md5 = (string) $o_rep->info->large->MD5; if (isset($va_existing_md5s[$vs_remote_original_md5]) && $va_existing_md5s[$vs_remote_original_md5] || isset($va_existing_md5s[$vs_remote_large_md5]) && $va_existing_md5s[$vs_remote_large_md5]) { print "[NOTICE] Skipping representation at {$vs_url} because it already exists (MD5={$vs_remote_original_md5}/{$vs_remote_large_md5}) ({$ps_code})\n"; if (!($vn_kill_rep_id = $va_existing_md5s[$vs_remote_large_md5])) { $vn_kill_rep_id = $va_existing_md5s[$vs_remote_original_md5]; } foreach ($va_existing_md5s as $vs_md5 => $vn_rep_id) { if ($vn_kill_rep_id == $vn_rep_id) { $t_existing_rep_link = new ca_objects_x_object_representations(); if ($t_existing_rep_link->load(array('object_id' => $t_instance->getPrimaryKey(), 'representation_id' => $vn_rep_id))) { $t_existing_rep_link->setMode(ACCESS_WRITE); // print "update object_id ".$t_instance->getPrimaryKey()."/rep=$vn_rep_id to rank=".$o_rep->rank."/primary=".$o_rep->is_primary."\n"; $t_existing_rep_link->set('is_primary', (int) $o_rep->is_primary); $t_existing_rep_link->set('rank', (int) $o_rep->rank); $t_existing_rep_link->update(); if ($t_existing_rep_link->numErrors()) { print_r($t_existing_rep_link->getErrors()); } } unset($va_existing_md5s[$vs_md5]); } } continue; } print "[Notice] Importing for [{$vs_idno}] media from {$vs_url}: primary=" . (string) $o_rep->is_primary . " ({$ps_code})\n"; print "instance has id=" . $t_instance->getPrimaryKey() . "\n"; // TODO: add hook onBeforeAddMedia() $vn_link_id = $t_instance->addRepresentation($vs_url, $pn_rep_type_id, 1, (int) $o_rep->status, (int) $o_rep->access, (int) $o_rep->is_primary); // TODO: add hook onAddMedia() if ($t_instance->numErrors()) { print "[ERROR] Could not load object representation: " . join("; ", $t_instance->getErrors()) . " ({$ps_code})\n"; } else { $t_link = new ca_objects_x_object_representations($vn_link_id); $t_new_rep = new ca_object_representations($t_link->get('representation_id')); //unlink($x=$t_new_rep->getMediaPath('media', 'original')); } } } } } $va_rep_ids = array(); foreach ($va_existing_md5s as $vs_md5 => $vn_rep_id) { if ($va_rep_ids[$vn_rep_id]) { continue; } $t_obj_x_rep = new ca_objects_x_object_representations(); while ($t_obj_x_rep->load(array('object_id' => $t_instance->getPrimaryKey(), 'representation_id' => $vn_rep_id))) { $t_obj_x_rep->setMode(ACCESS_WRITE); $t_obj_x_rep->delete(true); if ($t_obj_x_rep->numErrors()) { print "[ERROR] Could not load remove object-to-representation link: " . join("; ", $t_obj_x_rep->getErrors()) . " ({$ps_code})\n"; break; } if (!$t_obj_x_rep->load(array('representation_id' => $vn_rep_id))) { $t_rep = new ca_object_representations(); if ($t_rep->load($vn_rep_id)) { $t_rep->setMode(ACCESS_WRITE); $t_rep->delete(true, array('hard' => true)); if ($t_rep->numErrors()) { print "[ERROR] Could not remove representation: " . join("; ", $t_rep->getErrors()) . "\n"; break; } } } } $va_rep_ids[$vn_rep_id] = true; } foreach ($va_dupe_reps as $vn_dupe_rep_id) { $t_rep = new ca_object_representations(); if ($t_rep->load($vn_dupe_rep_id)) { print "[Notice] DELETE DUPE {$vn_dupe_rep_id}\n"; $t_rep->setMode(ACCESS_WRITE); $t_rep->delete(true, array('hard' => true)); if ($t_rep->numErrors()) { print "[ERROR] Could not remove dupe representation: " . join("; ", $t_rep->getErrors()) . "\n"; break; } } } } unset($pa_tables[$va_item['table']]); } }
if (!($pn_nl_locale_id = $t_locale->loadLocaleByCode('nl_NL'))) { $pn_nl_locale_id = $t_locale->loadLocaleByCode('nl_BE'); } if (!$pn_nl_locale_id) { die("ERROR: You can only import the Dutch-language AAT into an installation configured to support the nl_NL (Netherlands) or nl_BE (Vlaams Belgium) locale. Add one of these locales to your system and try again.\n"); } // create vocabulary list record (if it doesn't exist already) $t_list = new ca_lists(); if (!$t_list->load(array('list_code' => 'aat_nl'))) { $t_list->setMode(ACCESS_WRITE); $t_list->set('list_code', 'aat_nl'); $t_list->set('is_system_list', 0); $t_list->set('is_hierarchical', 1); $t_list->set('use_as_vocabulary', 1); $t_list->insert(); if ($t_list->numErrors()) { print "ERROR: couldn't create ca_list row for AAT: " . join('; ', $t_list->getErrors()) . "\n"; die; } $t_list->addLabel(array('name' => 'Art & Architecture Thesaurus [Nederlands]'), $pn_en_locale_id, null, true); } $vn_list_id = $t_list->getPrimaryKey(); // get list item types (should be defined by base installation profile [base.profile]) // if your installation didn't use a profile inheriting from base.profile then you should make sure // that a list with code='list_item_types' is defined and the following four item codes are defined. // If these are not defined then the AAT will still import, but without any distinction between // terms, facets and guide terms $vn_list_item_type_concept = $t_list->getItemIDFromList('list_item_types', 'concept'); $vn_list_item_type_facet = $t_list->getItemIDFromList('list_item_types', 'facet'); $vn_list_item_type_guide_term = $t_list->getItemIDFromList('list_item_types', 'guide_term'); $vn_list_item_type_hierarchy_name = $t_list->getItemIDFromList('list_item_types', 'hierarchy_name');
$t_tgn = $t_list->addItem('tgn', true, false, null, null, 'tgn'); $t_tgn->addLabel(array('name_singular' => 'Thesaurus of Geographic Names', 'name_plural' => 'Thesaurus of Geographic Names'), $pn_en_locale_id, null, true); $vn_tgn_id = $t_tgn->getPrimaryKey(); } else { $t_tgn = new ca_list_items($vn_tgn_id); } // Create list for place types (if it doesn't exist already) $t_place_types = new ca_lists(); if (!$t_place_types->load(array('list_code' => 'tgn_place_types'))) { $t_place_types->setMode(ACCESS_WRITE); $t_place_types->set('list_code', 'tgn_place_types'); $t_place_types->set('is_system_list', 1); $t_place_types->set('is_hierarchical', 1); $t_place_types->set('use_as_vocabulary', 1); $t_place_types->insert(); if ($t_place_types->numErrors()) { print "[Error] couldn't create ca_list row for place types: " . join('; ', $t_place_types->getErrors()) . "\n"; die; } $t_place_types->addLabel(array('name' => 'Getty TGN place types'), $pn_en_locale_id, null, true); } $vn_place_type_list_id = $t_place_types->getPrimaryKey(); // load places $o_xml = new XMLReader(); print "[Notice] READING TGN TERMS...\n"; $vn_last_message_length = 0; $vn_term_count = 0; $t_place = new ca_places(); $t_place->setMode(ACCESS_WRITE); $t_place->logChanges(false); // Don't log changes to records during import – takes time and we don't need the logs
/** * */ public function refine(&$pa_destination_data, $pa_group, $pa_item, $pa_source_data, $pa_options = null) { global $g_ui_locale_id; $vs_delimiter = caGetOption('delimiter', $pa_options, null); if (!($pn_locale_id = ca_locales::getDefaultCataloguingLocaleID())) { $pn_locale_id = $g_ui_locale_id; } $o_log = isset($pa_options['log']) && is_object($pa_options['log']) ? $pa_options['log'] : null; $t_mapping = caGetOption('mapping', $pa_options, null); if ($t_mapping) { $o_dm = Datamodel::load(); if ($t_mapping->get('table_num') != $o_dm->getTableNum('ca_list_items')) { if ($o_log) { $o_log->logError(_t("listItemIndentedHierarchyBuilder refinery may only be used in imports to ca_list_items")); } return null; } } $va_group_dest = explode(".", $pa_group['destination']); $vs_terminal = array_pop($va_group_dest); $pm_value = $pa_source_data[$pa_item['source']]; // Get list of fields to insert if (!is_array($va_levels = $pa_item['settings']['listItemIndentedHierarchyBuilder_levels'])) { if ($o_log) { $o_log->logError(_t("listItemIndentedHierarchyBuilder requires levels option be set to a list of data source placeholders")); } return null; } else { $va_level_types = $pa_item['settings']['listItemIndentedHierarchyBuilder_levelTypes']; } // Get list, or create if it doesn't already exist if (!($vs_list_code = $pa_item['settings']['listItemIndentedHierarchyBuilder_list'])) { if ($o_log) { $o_log->logError(_t("listItemIndentedHierarchyBuilder requires list option be set")); } return null; } $t_list = new ca_lists(); if (!$t_list->load(array('list_code' => $vs_list_code))) { // create list $t_list->set('list_code', $vs_list_code); $t_list->setMode(ACCESS_WRITE); $t_list->insert(); if ($t_list->numErrors()) { if ($o_log) { $o_log->logError(_t("listItemIndentedHierarchyBuilder could not create list %1: %2", $vs_list_code, join("; ", $t_list->getErrors()))); } return null; } $t_list->addLabel(array('name' => caUcFirstUTF8Safe($vs_list_code)), $pn_locale_id, null, true); if ($t_list->numErrors()) { if ($o_log) { $o_log->logError(_t("listItemIndentedHierarchyBuilder could not create list label %1: %2", $vs_list_code, join("; ", $t_list->getErrors()))); } return null; } } // Handle each level if (!is_array($va_level_values = listItemIndentedHierarchyBuilderRefinery::$opa_level_values)) { $va_level_values = $va_level_value_ids = array(); } $va_level_value_ids = listItemIndentedHierarchyBuilderRefinery::$opa_level_value_ids; $vn_max_level = 0; $vn_parent_id = null; foreach ($va_levels as $vn_i => $vs_level_placeholder) { $vs_level_value = null; if (strlen($vs_level_placeholder)) { if ($vs_level_value = BaseRefinery::parsePlaceholder($vs_level_placeholder, $pa_source_data, $pa_item, 0, array('reader' => caGetOption('reader', $pa_options, null), 'returnAsString' => true))) { if (!$vn_parent_id && isset(listItemIndentedHierarchyBuilderRefinery::$opa_level_value_ids[$vn_i - 1])) { $vn_parent_id = listItemIndentedHierarchyBuilderRefinery::$opa_level_value_ids[$vn_i - 1]; } $vs_type = isset($va_level_types[$vn_i]) ? $va_level_types[$vn_i] : null; if ($vn_item_id = DataMigrationUtils::getListItemID($vs_list_code, preg_replace("![^A-Za-z0-9_]+!", "_", $vs_level_value), $vs_type, $pn_locale_id, array('is_enabled' => 1, 'parent_id' => $vn_parent_id, 'preferred_labels' => array('name_singular' => $vs_level_value, 'name_plural' => $vs_level_value)), array('matchOnIdno' => true, 'log' => $o_log, 'transaction' => caGetOption('transaction', $pa_options, null), 'importEvent' => caGetOption('event', $pa_options, null), 'importEventSource' => 'listItemIndentedHierarchyBuilder'))) { $vn_parent_id = $vn_item_id; $va_level_values[$vn_i] = $vs_level_value; $va_level_value_ids[$vn_i] = $vn_item_id; $vn_max_level = $vn_i; } } } } listItemIndentedHierarchyBuilderRefinery::$opa_level_values = array_slice($va_level_values, 0, $vn_max_level + 1); listItemIndentedHierarchyBuilderRefinery::$opa_level_value_ids = array_slice($va_level_value_ids, 0, $vn_max_level + 1); if ($pa_item['settings']['listItemIndentedHierarchyBuilder_list'] == 'returnData') { return $vn_parent_id; } return null; }
/** * * * @return string */ function caLoadULAN($ps_path_to_ulan_data = null, $ps_path_to_ulan_config = null, $pa_options = null) { require_once __CA_LIB_DIR__ . '/core/Db.php'; require_once __CA_LIB_DIR__ . '/core/Configuration.php'; require_once __CA_LIB_DIR__ . '/ca/Utils/DataMigrationUtils.php'; require_once __CA_MODELS_DIR__ . '/ca_locales.php'; require_once __CA_MODELS_DIR__ . '/ca_entities.php'; require_once __CA_MODELS_DIR__ . '/ca_entities_x_entities.php'; require_once __CA_MODELS_DIR__ . '/ca_lists.php'; require_once __CA_MODELS_DIR__ . '/ca_list_items.php'; require_once __CA_MODELS_DIR__ . '/ca_list_items_x_list_items.php'; require_once __CA_MODELS_DIR__ . '/ca_relationship_types.php'; $t = new Timer(); $o_log = new KLogger(__CA_APP_DIR__ . '/log', KLogger::INFO); $va_parent_child_links = array(); $va_item_item_links = array(); $va_ulan_id_to_item_id = array(); $o_log->logInfo("Starting import of Getty ULAN"); define('__CA_DONT_DO_SEARCH_INDEXING__', true); $_ = new Zend_Translate('gettext', __CA_APP_DIR__ . '/locale/en_US/messages.mo', 'en_US'); $t_locale = new ca_locales(); $pn_en_locale_id = $t_locale->loadLocaleByCode('en_US'); if (!($o_config = Configuration::load($ps_path_to_ulan_config))) { $o_log->logError("Could not load ULAN import configuration file"); die("ERROR: Could not load ULAN import configuration\n"); } $vs_ulan_import_mode = $o_config->get('ulan_import_target'); $t_list = null; if ($vs_ulan_import_mode == 'ca_entities') { $va_ulan_types = $o_config->getAssoc('ulan_entity_types'); $va_mapping = $o_config->getAssoc('ulan_entity_mapping'); } elseif ($vs_ulan_import_mode == 'ca_list_items') { $va_ulan_types = $o_config->getAssoc('ulan_list_item_types'); if (!($vs_ulan_list_code = $o_config->get('ulan_import_list'))) { $vs_ulan_list_code = 'ULAN'; } // create vocabulary list record (if it doesn't exist already) $t_list = new ca_lists(); if (!$t_list->load(array('list_code' => $vs_ulan_list_code))) { $t_list->setMode(ACCESS_WRITE); $t_list->set('list_code', $vs_ulan_list_code); $t_list->set('is_system_list', 0); $t_list->set('is_hierarchical', 1); $t_list->set('use_as_vocabulary', 1); $t_list->insert(); if ($t_list->numErrors()) { $o_log->logError("Could not create list record for ULAN: " . join('; ', $t_list->getErrors())); die("ERROR: couldn't create ca_list row for ULAN: " . join('; ', $t_list->getErrors()) . "\n"); } $t_list->addLabel(array('name' => 'Union List of Artist Names'), $pn_en_locale_id, null, true); } $vn_list_id = $t_list->getPrimaryKey(); $va_mapping = $o_config->getAssoc('ulan_list_item_mapping'); } else { $o_log->logError("Invalid ULAN import mode {$vs_ulan_import_mode}"); die("ERROR: invalid ULAN import mode {$vs_ulan_import_mode}\n"); } $vn_last_message_length = 0; $vn_term_count = 0; $va_subject = array(); foreach (array('ULAN1.xml', 'ULAN2.xml', 'ULAN3.xml') as $vs_file) { if (!$ps_path_to_ulan_data) { $ps_path_to_ulan_data = "."; } if (!file_exists($ps_path_to_ulan_data . "/{$vs_file}")) { $o_log->logError("Could not find ULAN data file {$vs_file}"); print "[ERROR] cannot find ULAN data.\n"; continue; } $o_log->logInfo("Processing ULAN file {$vs_file}"); print "[Notice] Processing ULAN file {$vs_file}\n"; // load $o_xml = new XMLReader(); $o_xml->open($ps_path_to_ulan_data . '/' . $vs_file); while ($o_xml->read()) { switch ($o_xml->name) { # --------------------------- case 'Subject': if ($o_xml->nodeType == XMLReader::END_ELEMENT) { if (in_array($va_subject['subject_id'], array('500000000', '500000001'))) { break; } // skip top-level root $vs_preferred_term = $va_subject['preferred_term']; $pb_is_enabled = false; switch ($va_subject['record_type']) { case 'Person': default: $vn_type_id = $va_ulan_types['Person']; $pb_is_enabled = true; break; case 'Corporate Body': $vn_type_id = $va_ulan_types['Corporate Body']; $pb_is_enabled = true; break; } print str_repeat(chr(8), $vn_last_message_length); $vs_message = "\tIMPORTING #" . ($vn_term_count + 1) . " [" . $va_subject['subject_id'] . "] " . $vs_preferred_term; if (($vn_l = 100 - strlen($vs_message)) < 1) { $vn_l = 1; } $vs_message .= str_repeat(' ', $vn_l); $vn_last_message_length = strlen($vs_message); print $vs_message; if ($vs_ulan_import_mode == 'ca_entities') { $va_np_labels = array(); if (is_array($va_subject['non_preferred_terms'])) { for ($vn_i = 0; $vn_i < sizeof($va_subject['non_preferred_terms']); $vn_i++) { $va_np_labels[] = DataMigrationUtils::splitEntityName(trim(htmlentities($va_subject['non_preferred_terms'][$vn_i]))); } } $t_item = DataMigrationUtils::getEntityID(DataMigrationUtils::splitEntityName(trim(htmlentities($vs_preferred_term, ENT_NOQUOTES))), $vn_type_id, $pn_en_locale_id, array('idno' => $va_subject['subject_id']), array('nonPreferredLabels' => $va_np_labels, 'returnInstance' => true)); if (!$t_item) { $o_log->logError("Failed to create entity for ULAN artist {$vs_preferred_term}"); break; } $t_item->setMode(ACCESS_WRITE); $va_ulan_id_to_item_id[$va_subject['subject_id']] = $t_item->getPrimaryKey(); } else { if ($t_item = $t_list->addItem($va_subject['subject_id'], $pb_is_enabled, false, null, $vn_type_id, $va_subject['subject_id'], '', 4, 1)) { $va_ulan_id_to_item_id[$va_subject['subject_id']] = $t_item->getPrimaryKey(); if ($va_subject['preferred_parent_subject_id'] != 500000000) { $va_parent_child_links[$va_subject['subject_id']] = $va_subject['preferred_parent_subject_id']; } // add preferred labels if (!$t_item->addLabel(array('name_singular' => trim(htmlentities($vs_preferred_term, ENT_NOQUOTES)), 'name_plural' => trim(htmlentities($vs_preferred_term, ENT_NOQUOTES)), 'description' => $va_subject['description']), $pn_en_locale_id, null, true)) { $o_log->logError("Could not add preferred label to ULAN term [" . $va_subject['subject_id'] . "] " . $vs_preferred_term . ": " . join("; ", $t_item->getErrors())); } // add alternate labels if (is_array($va_subject['non_preferred_terms'])) { for ($vn_i = 0; $vn_i < sizeof($va_subject['non_preferred_terms']); $vn_i++) { $vs_np_label = $va_subject['non_preferred_terms'][$vn_i]; $vs_np_term_type = $va_subject['non_preferred_term_types'][$vn_i]; switch ($vs_np_term_type) { case 'Used For Term': $vn_np_term_type_id = $vn_list_item_label_type_uf; break; case 'Alternate Descriptor': $vn_np_term_type_id = $vn_list_item_label_type_alt; break; default: $vn_np_term_type_id = null; break; } if (!$t_item->addLabel(array('name_singular' => trim(htmlentities($vs_np_label, ENT_NOQUOTES)), 'name_plural' => trim(htmlentities($vs_np_label, ENT_NOQUOTES)), 'description' => ''), $pn_en_locale_id, $vn_np_term_type_id, false)) { $o_log->logError("Could not add non-preferred label to ULAN term [" . $va_subject['subject_id'] . "] " . $vs_np_label); } } } } else { $o_log->logError("Could not import ULAN term [" . $va_subject['subject_id'] . "] " . $vs_preferred_term . ": " . join("; ", $t_list->getErrors())); break; } } // Map content fields foreach ($va_mapping as $vs_dest => $vs_source) { $va_values = array(); switch ($vs_source) { case 'biography': if (!is_array($va_subject['biographies'])) { break; } foreach ($va_subject['biographies'] as $va_bio) { $va_values[] = $va_bio['text']; } break; case 'biography_dates': if (!is_array($va_subject['biographies'])) { break; } foreach ($va_subject['biographies'] as $va_bio) { if ($va_bio['birth_date'] == 1000 || $va_bio['birth_date'] < -5000) { if ($va_bio['death_date'] >= 2050) { break 2; } else { $va_values[] = "before " . $va_bio['death_date']; } } elseif ($va_bio['death_date'] >= 2050) { $va_values[] = "after " . $va_bio['birth_date']; } else { $va_values[] = $va_bio['birth_date'] . " - " . $va_bio['death_date']; } } break; case 'sex': if (!is_array($va_subject['biographies'])) { break; } foreach ($va_subject['biographies'] as $va_bio) { $va_values[] = $va_bio['sex']; } break; case 'nationality_name': if (!is_array($va_subject['nationalities'])) { break; } foreach ($va_subject['nationalities'] as $va_nationality) { $va_values[] = $va_nationality['name']; } break; case 'nationality_code': if (!is_array($va_subject['nationalities'])) { break; } foreach ($va_subject['nationalities'] as $va_nationality) { $va_values[] = $va_nationality['code']; } break; case 'role_name': if (!is_array($va_subject['roles'])) { break; } foreach ($va_subject['roles'] as $va_role) { $va_values[] = $va_role['name']; } break; case 'role_code': if (!is_array($va_subject['roles'])) { break; } foreach ($va_subject['roles'] as $va_role) { $va_values[] = $va_role['code']; } break; } if (sizeof($va_values)) { $va_dest = explode('.', $vs_dest); $vs_fld = array_pop($va_dest); if ($t_item->hasField($vs_fld)) { $t_item->set($vs_fld, join("\n", $va_values)); } else { foreach ($va_values as $vs_value) { $t_item->addAttribute(array('locale_id' => $pn_en_locale_id, $vs_fld => $vs_value), $vs_fld); } } $t_item->update(array('dontCheckCircularReferences' => true, 'dontSetHierarchicalIndexing' => true)); if ($t_item->numErrors()) { $o_log->logError("Could not update ULAN list item with content values: " . join("; ", $t_item->getErrors())); } } } // record item-item relations if (is_array($va_subject['related_subjects'])) { foreach ($va_subject['related_subjects'] as $vs_rel_subject_id) { $va_item_item_links[$va_subject['subject_id']] = $vs_rel_subject_id; } } $vn_term_count++; } else { $va_subject = array('subject_id' => $o_xml->getAttribute('Subject_ID')); } break; # --------------------------- # --------------------------- case 'Biographies': while ($o_xml->read()) { switch ($o_xml->name) { case 'Preferred_Biography': $va_bio = array(); while ($o_xml->read()) { switch ($o_xml->name) { case 'Biography_Text': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_bio['text'] = $o_xml->value; break; } break; case 'Birth_Date': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_bio['birth_date'] = $o_xml->value; if ($va_bio['birth_date'] < 0) { $va_bio['birth_date'] = abs($va_bio['birth_date']) . " BCE"; } break; } break; case 'Death_Date': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_bio['death_date'] = $o_xml->value; if ($va_bio['death_date'] < 0) { $va_bio['death_date'] = abs($va_bio['death_date']) . " BCE"; } break; } break; case 'Sex': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_bio['sex'] = $o_xml->value; break; } break; case 'Preferred_Biography': break 2; } } $va_subject['biographies'][] = $va_bio; break; case 'Biographies': break 2; } } break; # --------------------------- # --------------------------- case 'Nationalities': while ($o_xml->read()) { switch ($o_xml->name) { case 'Preferred_Nationality': $va_nationality = array(); while ($o_xml->read()) { switch ($o_xml->name) { case 'Nationality_Code': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_nationality['code'] = $o_xml->value; $va_nationality['name'] = array_pop(explode('/', $o_xml->value)); break; } break; case 'Preferred_Nationality': break 2; } } $va_subject['nationalities'][] = $va_nationality; break; case 'Nationalities': break 2; } } break; # --------------------------- # --------------------------- case 'Roles': while ($o_xml->read()) { switch ($o_xml->name) { case 'Preferred_Role': $va_role = array(); while ($o_xml->read()) { switch ($o_xml->name) { case 'Role_ID': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_role['code'] = $o_xml->value; $va_role['name'] = array_pop(explode('/', $o_xml->value)); break; } break; case 'Preferred_Role': break 2; } } $va_subject['roles'][] = $va_role; break; case 'Roles': break 2; } } break; # --------------------------- # --------------------------- case 'Record_Type': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['record_type'] = $o_xml->value; break; } break; # --------------------------- # --------------------------- case 'Hierarchy': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['hierarchy'] = $o_xml->value; break; } break; # --------------------------- # --------------------------- case 'Parent_Relationships': $vn_parent_id = $vs_historic_flag = null; while ($o_xml->read()) { switch ($o_xml->name) { case 'Preferred_Parent': while ($o_xml->read()) { switch ($o_xml->name) { case 'Parent_Subject_ID': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $vn_parent_id = $o_xml->value; break; } break; case 'Historic_Flag': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $vs_historic_flag = $o_xml->value; break; } break; case 'Preferred_Parent': $va_subject['preferred_parent_subject_id'] = $vn_parent_id; break 2; } } break; case 'Parent_Relationships': break 2; } } break; # --------------------------- # --------------------------- case 'Preferred_Term': while ($o_xml->read()) { switch ($o_xml->name) { case 'Term_Type': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['preferred_term_type'] = $o_xml->value; break; } break; case 'Term_Text': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['preferred_term'] = $o_xml->value; break; } break; case 'Term_ID': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['preferred_term_id'] = $o_xml->value; break; } break; break; case 'Preferred_Term': break 2; } } break; # --------------------------- # --------------------------- case 'Non-Preferred_Term': while ($o_xml->read()) { switch ($o_xml->name) { case 'Term_Type': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['non_preferred_term_types'][] = $o_xml->value; break; } break; case 'Term_Text': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['non_preferred_terms'][] = $o_xml->value; break; } break; case 'Term_ID': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['non_preferred_term_ids'][] = $o_xml->value; break; } break; case 'Non-Preferred_Term': break 2; } } break; # --------------------------- # --------------------------- case 'VP_Subject_ID': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['related_subjects'][] = $o_xml->value; break; } break; # --------------------------- } } $o_xml->close(); } $o_log->logInfo("Begin linking ULAN terms in hierarchy"); print "\n\nLINKING TERMS IN HIERARCHY...\n"; $vn_last_message_length = 0; $t_list = new ca_lists(); $t_item = new ca_list_items(); $t_item->setMode(ACCESS_WRITE); $vn_list_root_id = $t_list->getRootListItemID($vn_list_id); foreach ($va_parent_child_links as $vs_child_id => $vs_parent_id) { print str_repeat(chr(8), $vn_last_message_length); $vs_message = "\tLINKING {$vs_child_id} to parent {$vs_parent_id}"; if (($vn_l = 100 - strlen($vs_message)) < 1) { $vn_l = 1; } $vs_message .= str_repeat(' ', $vn_l); $vn_last_message_length = strlen($vs_message); print $vs_message; if (in_array($vs_parent_id, array('500000000', '500000001'))) { if (!$t_item->load($vn_child_item_id)) { $o_log->logError("Could not load item for {$vs_child_id} (was translated to item_id={$vn_child_item_id})"); continue; } $t_item->set('parent_id', $vn_list_root_id); $t_item->update(array('dontCheckCircularReferences' => true, 'dontSetHierarchicalIndexing' => true)); if ($t_item->numErrors()) { $o_log->logError("Could not set parent_id for {$vs_child_id} to root): " . join('; ', $t_item->getErrors())); continue; } $va_ulan_id_to_item_id[$vs_parent_id] = $vn_list_root_id; } if (!($vn_child_item_id = $va_ulan_id_to_item_id[$vs_child_id])) { $o_log->logError("No list item id for child_id {$vs_child_id} (were there previous errors?)"); continue; } if (!($vn_parent_item_id = $va_ulan_id_to_item_id[$vs_parent_id])) { $o_log->logError("No list item id for parent_id {$vs_parent_id} (were there previous errors?)"); continue; } if (!$t_item->load($vn_child_item_id)) { $o_log->logError("Could not load item for {$vs_child_id} (was translated to item_id={$vn_child_item_id})"); continue; } $t_item->set('parent_id', $vn_parent_item_id); $t_item->update(array('dontCheckCircularReferences' => true, 'dontSetHierarchicalIndexing' => true)); if ($t_item->numErrors()) { $o_log->logError("Could not set parent_id for {$vs_child_id} (was translated to item_id={$vn_child_item_id}): " . join('; ', $t_item->getErrors())); } } if ($vn_list_item_relation_type_id_related > 0) { $o_log->logInfo("Begin adding ULAN related term links"); $vn_last_message_length = 0; $t_item = new ca_list_items(); $t_link = new ca_list_items_x_list_items(); $t_link->setMode(ACCESS_WRITE); foreach ($va_item_item_links as $vs_left_id => $vs_right_id) { print str_repeat(chr(8), $vn_last_message_length); $vs_message = "\tLINKING {$vs_left_id} to {$vs_right_id}"; if (($vn_l = 100 - strlen($vs_message)) < 1) { $vn_l = 1; } $vs_message .= str_repeat(' ', $vn_l); $vn_last_message_length = strlen($vs_message); print $vs_message; if (!($vn_left_item_id = $va_ulan_id_to_item_id[$vs_left_id])) { $o_log->logError("No list item id for left_id {$vs_left_id} (were there previous errors?)"); continue; } if (!($vn_right_item_id = $va_ulan_id_to_item_id[$vs_right_id])) { $o_log->logError("No list item id for right_id {$vs_right_id} (were there previous errors?)"); continue; } $t_link->set('term_left_id', $vn_left_item_id); $t_link->set('term_right_id', $vn_right_item_id); $t_link->set('type_id', $vn_list_item_relation_type_id_related); $t_link->insert(); if ($t_link->numErrors()) { $o_log->logError("Could not set link between {$vs_left_id} (was translated to item_id={$vn_left_item_id}) and {$vs_right_id} (was translated to item_id={$vn_right_item_id}): " . join('; ', $t_link->getErrors())); } } } else { $o_log->logWarn("Skipped import of term-term relationships because the ca_list_items_x_list_items 'related' relationship type is not defined for your installation"); } $vn_duration = $t->getTime(1); $vs_time = caFormatInterval($vn_duration); $o_log->logInfo("Rebuilding hierarchical indices..."); $t_item->rebuildAllHierarchicalIndexes(); $o_log->logInfo("ULAN import complete. Took {$vs_time} ({$vn_duration})"); print "\n\nIMPORT COMPLETE. Took {$vs_time} ({$vn_duration})\n"; }
/** * * * @return string */ function caLoadAAT($ps_path_to_aat_data = null, $pa_options = null) { if (!$ps_path_to_aat_data) { $ps_path_to_aat_data = "./AAT.xml"; } if (!file_exists($ps_path_to_aat_data)) { die("ERROR: cannot find AAT data.\n"); } require_once __CA_LIB_DIR__ . '/core/Db.php'; require_once __CA_MODELS_DIR__ . '/ca_locales.php'; require_once __CA_MODELS_DIR__ . '/ca_lists.php'; require_once __CA_MODELS_DIR__ . '/ca_list_items.php'; require_once __CA_MODELS_DIR__ . '/ca_list_items_x_list_items.php'; require_once __CA_MODELS_DIR__ . '/ca_relationship_types.php'; $_ = new Zend_Translate('gettext', __CA_APP_DIR__ . '/locale/en_US/messages.mo', 'en_US'); $t_locale = new ca_locales(); $pn_en_locale_id = $t_locale->loadLocaleByCode('en_US'); // create vocabulary list record (if it doesn't exist already) $t_list = new ca_lists(); if (!$t_list->load(array('list_code' => 'aat'))) { $t_list->setMode(ACCESS_WRITE); $t_list->set('list_code', 'aat'); $t_list->set('is_system_list', 0); $t_list->set('is_hierarchical', 1); $t_list->set('use_as_vocabulary', 1); $t_list->insert(); if ($t_list->numErrors()) { print "ERROR: couldn't create ca_list row for AAT: " . join('; ', $t_list->getErrors()) . "\n"; die; } $t_list->addLabel(array('name' => 'Art & Architecture Thesaurus'), $pn_en_locale_id, null, true); } $vn_list_id = $t_list->getPrimaryKey(); // get list item types (should be defined by base installation profile [base.profile]) // if your installation didn't use a profile inheriting from base.profile then you should make sure // that a list with code='list_item_types' is defined and the following four item codes are defined. // If these are not defined then the AAT will still import, but without any distinction between // terms, facets and guide terms $vn_list_item_type_concept = $t_list->getItemIDFromList('list_item_types', 'concept'); $vn_list_item_type_facet = $t_list->getItemIDFromList('list_item_types', 'facet'); $vn_list_item_type_guide_term = $t_list->getItemIDFromList('list_item_types', 'guide_term'); $vn_list_item_type_hierarchy_name = $t_list->getItemIDFromList('list_item_types', 'hierarchy_name'); // get list item label types (should be defined by base installation profile [base.profile]) // if your installation didn't use a profile inheriting from base.profile then you should make sure // that a list with code='list_item_label_types' is defined and the following four item codes are defined. // If these are not defined then the AAT will still import, but without any distinction between // terms, facets and guide terms $vn_list_item_label_type_uf = $t_list->getItemIDFromList('list_item_label_types', 'uf'); $vn_list_item_label_type_alt = $t_list->getItemIDFromList('list_item_label_types', 'alt'); // get list item-to-item relationship type (should be defined by base installation profile [base.profile]) // if your installation didn't use a profile inheriting from base.profile then you should make sure // that a ca_list_items_x_list_items relationship type with code='related' is defined. Otherwise import of term-to-term // relationships will fail. $t_rel_types = new ca_relationship_types(); $vn_list_item_relation_type_id_related = $t_rel_types->getRelationshipTypeID('ca_list_items_x_list_items', 'related'); // load voc_terms $o_xml = new XMLReader(); $o_xml->open($ps_path_to_aat_data); print "READING AAT TERMS...\n"; $va_parent_child_links = array(); $va_item_item_links = array(); $va_aat_id_to_item_id = array(); $vn_last_message_length = 0; $va_subject = array(); $vn_term_count = 0; while ($o_xml->read()) { switch ($o_xml->name) { # --------------------------- case 'Subject': if ($o_xml->nodeType == XMLReader::END_ELEMENT) { if ($va_subject['subject_id'] == '300000000') { break; } // skip top-level root $vs_preferred_term = $va_subject['preferred_term']; switch ($va_subject['record_type']) { case 'Concept': $vn_type_id = $vn_list_item_type_hierarchy_name; $pb_is_enabled = true; break; case 'Facet': $vn_type_id = $vn_list_item_type_facet; $vs_preferred_term = '<' . $vs_preferred_term . '>'; $pb_is_enabled = false; break; case 'Guide Term': $vn_type_id = $vn_list_item_type_guide_term; $vs_preferred_term = '<' . $vs_preferred_term . '>'; $pb_is_enabled = false; break; case 'Hierarchy Name': $vn_type_id = $vn_list_item_type_hierarchy_name; $pb_is_enabled = false; break; default: $vn_type_id = null; $pb_is_enabled = true; break; } print str_repeat(chr(8), $vn_last_message_length); $vs_message = "\tIMPORTING #" . ($vn_term_count + 1) . " [" . $va_subject['subject_id'] . "] " . $vs_preferred_term; if (($vn_l = 100 - strlen($vs_message)) < 1) { $vn_l = 1; } $vs_message .= str_repeat(' ', $vn_l); $vn_last_message_length = strlen($vs_message); print $vs_message; if ($t_item = $t_list->addItem($va_subject['subject_id'], $pb_is_enabled, false, null, $vn_type_id, $va_subject['subject_id'], '', 4, 1)) { $va_aat_id_to_item_id[$va_subject['subject_id']] = $t_item->getPrimaryKey(); if ($va_subject['preferred_parent_subject_id'] != 300000000) { $va_parent_child_links[$va_subject['subject_id']] = $va_subject['preferred_parent_subject_id']; } // add preferred labels if (!$t_item->addLabel(array('name_singular' => trim(htmlentities($vs_preferred_term, ENT_NOQUOTES)), 'name_plural' => trim(htmlentities($vs_preferred_term, ENT_NOQUOTES)), 'description' => $va_subject['description']), $pn_en_locale_id, null, true)) { print "ERROR: Could not add preferred label to AAT term [" . $va_subject['subject_id'] . "] " . $vs_preferred_term . ": " . join("; ", $t_item->getErrors()) . "\n"; } // add alternate labels if (is_array($va_subject['non_preferred_terms'])) { for ($vn_i = 0; $vn_i < sizeof($va_subject['non_preferred_terms']); $vn_i++) { $vs_np_label = $va_subject['non_preferred_terms'][$vn_i]; $vs_np_term_type = $va_subject['non_preferred_term_types'][$vn_i]; switch ($vs_np_term_type) { case 'Used For Term': $vn_np_term_type_id = $vn_list_item_label_type_uf; break; case 'Alternate Descriptor': $vn_np_term_type_id = $vn_list_item_label_type_alt; break; default: $vn_np_term_type_id = null; break; } if (!$t_item->addLabel(array('name_singular' => trim(htmlentities($vs_np_label, ENT_NOQUOTES)), 'name_plural' => trim(htmlentities($vs_np_label, ENT_NOQUOTES)), 'description' => ''), $pn_en_locale_id, $vn_np_term_type_id, false)) { print "ERROR: Could not add non-preferred label to AAT term [" . $va_subject['subject_id'] . "] " . $vs_np_label . "\n"; //: ".join("; ", $t_item->getErrors())."\n"; } } } // record item-item relations if (is_array($va_subject['related_subjects'])) { foreach ($va_subject['related_subjects'] as $vs_rel_subject_id) { $va_item_item_links[$va_subject['subject_id']] = $vs_rel_subject_id; } } $vn_term_count++; } else { print "ERROR: Could not import AAT term [" . $va_subject['subject_id'] . "] " . $vs_preferred_term . ": " . join("; ", $t_list->getErrors()) . "\n"; } } else { $va_subject = array('subject_id' => $o_xml->getAttribute('Subject_ID')); } break; # --------------------------- # --------------------------- case 'Descriptive_Note': while ($o_xml->read()) { switch ($o_xml->name) { case 'Note_Text': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['description'] = $o_xml->value; break; } break; case 'Descriptive_Note': break 2; } } break; # --------------------------- # --------------------------- case 'Record_Type': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['record_type'] = $o_xml->value; break; } break; # --------------------------- # --------------------------- case 'Facet_Code': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['facet_code'] = $o_xml->value; break; } break; # --------------------------- # --------------------------- case 'Parent_Relationships': $vn_parent_id = $vs_historic_flag = null; while ($o_xml->read()) { switch ($o_xml->name) { case 'Preferred_Parent': while ($o_xml->read()) { switch ($o_xml->name) { case 'Parent_Subject_ID': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $vn_parent_id = $o_xml->value; break; } break; case 'Historic_Flag': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $vs_historic_flag = $o_xml->value; break; } break; case 'Preferred_Parent': $va_subject['preferred_parent_subject_id'] = $vn_parent_id; break 2; } } break; case 'Parent_Relationships': break 2; } } break; # --------------------------- # --------------------------- case 'Preferred_Term': while ($o_xml->read()) { switch ($o_xml->name) { case 'Term_Type': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['preferred_term_type'] = $o_xml->value; break; } break; case 'Term_Text': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['preferred_term'] = $o_xml->value; break; } break; case 'Term_ID': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['preferred_term_id'] = $o_xml->value; break; } break; break; case 'Preferred_Term': break 2; } } break; # --------------------------- # --------------------------- case 'Non-Preferred_Term': while ($o_xml->read()) { switch ($o_xml->name) { case 'Term_Type': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['non_preferred_term_types'][] = $o_xml->value; break; } break; case 'Term_Text': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['non_preferred_terms'][] = $o_xml->value; break; } break; case 'Term_ID': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['non_preferred_term_ids'][] = $o_xml->value; break; } break; case 'Non-Preferred_Term': break 2; } } break; # --------------------------- # --------------------------- case 'VP_Subject_ID': switch ($o_xml->nodeType) { case XMLReader::ELEMENT: $o_xml->read(); $va_subject['related_subjects'][] = $o_xml->value; break; } break; # --------------------------- } } $o_xml->close(); print "\n\nLINKING TERMS IN HIERARCHY...\n"; $vn_last_message_length = 0; $t_item = new ca_list_items(); $t_item->setMode(ACCESS_WRITE); foreach ($va_parent_child_links as $vs_child_id => $vs_parent_id) { print str_repeat(chr(8), $vn_last_message_length); $vs_message = "\tLINKING {$vs_child_id} to parent {$vs_parent_id}"; if (($vn_l = 100 - strlen($vs_message)) < 1) { $vn_l = 1; } $vs_message .= str_repeat(' ', $vn_l); $vn_last_message_length = strlen($vs_message); print $vs_message; if (!($vn_child_item_id = $va_aat_id_to_item_id[$vs_child_id])) { print "ERROR: no list item id for child_id {$vs_child_id} (were there previous errors?)\n"; continue; } if (!($vn_parent_item_id = $va_aat_id_to_item_id[$vs_parent_id])) { print "ERROR: no list item id for parent_id {$vs_child_id} (were there previous errors?)\n"; continue; } if (!$t_item->load($vn_child_item_id)) { print "ERROR: could not load item for {$vs_child_id} (was translated to item_id={$vn_child_item_id})\n"; continue; } $t_item->set('parent_id', $vn_parent_item_id); $t_item->update(); if ($t_item->numErrors()) { print "ERROR: could not set parent_id for {$vs_child_id} (was translated to item_id={$vn_child_item_id}): " . join('; ', $t_item->getErrors()) . "\n"; } } if ($vn_list_item_relation_type_id_related > 0) { print "\n\nADDING RELATED TERM LINKS...\n"; $vn_last_message_length = 0; $t_item = new ca_list_items(); $t_link = new ca_list_items_x_list_items(); $t_link->setMode(ACCESS_WRITE); foreach ($va_item_item_links as $vs_left_id => $vs_right_id) { print str_repeat(chr(8), $vn_last_message_length); $vs_message = "\tLINKING {$vs_left_id} to {$vs_right_id}"; if (($vn_l = 100 - strlen($vs_message)) < 1) { $vn_l = 1; } $vs_message .= str_repeat(' ', $vn_l); $vn_last_message_length = strlen($vs_message); print $vs_message; if (!($vn_left_item_id = $va_aat_id_to_item_id[$vs_left_id])) { print "ERROR: no list item id for left_id {$vs_left_id} (were there previous errors?)\n"; continue; } if (!($vn_right_item_id = $va_aat_id_to_item_id[$vs_right_id])) { print "ERROR: no list item id for right_id {$vs_right_id} (were there previous errors?)\n"; continue; } $t_link->set('term_left_id', $vn_left_item_id); $t_link->set('term_right_id', $vn_right_item_id); $t_link->set('type_id', $vn_list_item_relation_type_id_related); $t_link->insert(); if ($t_link->numErrors()) { print "ERROR: could not set link between {$vs_left_id} (was translated to item_id={$vn_left_item_id}) and {$vs_right_id} (was translated to item_id={$vn_right_item_id}): " . join('; ', $t_link->getErrors()) . "\n"; } } } else { print "WARNING: Skipped import of term-term relationships because the ca_list_items_x_list_items 'related' relationship type is not defined for your installation\n"; } print "\n\nIMPORT COMPLETE.\n"; }