/** * Generate mappings for ElasticSearch based upon currently configured search indexing */ public static function generate_elasticSearch_configuration($po_opts = null) { require_once __CA_LIB_DIR__ . "/core/Search/SearchBase.php"; require_once __CA_LIB_DIR__ . "/core/Configuration.php"; require_once __CA_LIB_DIR__ . "/core/Datamodel.php"; require_once __CA_LIB_DIR__ . "/core/Zend/Http/Client.php"; $vo_app_conf = Configuration::load(); $vo_search_conf = Configuration::load($vo_app_conf->get("search_config")); $vo_search_indexing_conf = Configuration::load($vo_search_conf->get("search_indexing_config")); $o_db = new Db(); $o_datamodel = Datamodel::load(); // delete and create index $vo_http_client = new Zend_Http_Client(); $vo_http_client->setUri($vo_search_conf->get('search_elasticsearch_base_url') . "/" . $vo_search_conf->get('search_elasticsearch_index_name')); try { $vo_http_client->request('DELETE'); $vo_http_client->request('PUT'); } catch (Zend_Http_Client_Adapter_Exception $e) { CLIUtils::addError(_t('Couldn\'t connect to ElasticSearch. Is the service running?')); return; } $va_tables = $vo_search_indexing_conf->getAssocKeys(); $vo_search_base = new SearchBase(); foreach ($va_tables as $vs_table) { // get fields to index for this table if (!is_array($va_table_fields = $vo_search_base->getFieldsToIndex($vs_table))) { $va_table_fields = array(); } $t_instance = $o_datamodel->getTableInstance($vs_table); $vn_table_num = $o_datamodel->getTableNum($vs_table); $va_attributes = null; $va_opts = array(); if (is_array($va_table_fields)) { $va_rewritten_fields = array(); foreach ($va_table_fields as $vs_field_name => $va_field_options) { if (preg_match('!^_ca_attribute_([\\d]*)$!', $vs_field_name, $va_matches)) { $va_rewritten_fields['A' . $va_matches[1]] = $va_field_options; $qr_type_restrictions = $o_db->query(' SELECT DISTINCT came.* FROM ca_metadata_type_restrictions camtr INNER JOIN ca_metadata_elements as came ON came.element_id = camtr.element_id WHERE camtr.table_num = ? AND came.element_code = ? ', (int) $vn_table_num, (string) $va_matches[1]); while ($qr_type_restrictions->nextRow()) { $vn_element_id = $qr_type_restrictions->get('element_id'); $va_attributes[$vn_element_id] = array('element_id' => $vn_element_id, 'element_code' => $qr_type_restrictions->get('element_code'), 'datatype' => $qr_type_restrictions->get('datatype')); } } else { $va_rewritten_fields[$vs_field_name] = $va_field_options; } } $va_table_fields = $va_rewritten_fields; } if (is_array($va_attributes)) { foreach ($va_attributes as $vn_element_id => $va_element_info) { if (!preg_match("!^_ca_attribute_([\\d]+)\$!", $va_element_info['element_code'], $va_matches)) { continue; } $vs_element_code = $vs_table . ".A" . $va_matches[1]; $va_element_opts = array(); switch ($va_element_info['datatype']) { case 1: // text // text case 3: // list // list case 5: // url // url case 6: // currency // currency case 8: // length // length case 9: // weight // weight case 13: // LCSH // LCSH case 14: // geonames // geonames case 15: // file // file case 16: // media // media case 19: // taxonomy // taxonomy case 20: // information service $va_element_opts['properties']['type'] = 'string'; break; case 2: // daterange $va_element_opts['properties']['type'] = 'date'; $va_element_opts['properties']["format"] = 'dateOptionalTime'; $va_element_opts['properties']["ignore_malformed"] = false; $va_table_fields[$vs_element_code . '_text'] = array_merge($va_opts, array('properties' => array('type' => 'string'))); break; case 4: // geocode $va_element_opts['properties']['type'] = 'geo_point'; $va_table_fields[$vs_element_code . '_text'] = array_merge($va_opts, array('properties' => array('type' => 'string'))); break; case 10: // timecode // timecode case 12: // numeric/float $va_element_opts['properties']['type'] = 'double'; break; case 11: // integer $va_element_opts['properties']['type'] = 'long'; break; default: $va_element_opts['properties']['type'] = 'string'; break; } $va_table_fields[$vs_element_code] = array_merge($va_opts, $va_element_opts); } } if (is_array($va_table_fields)) { foreach ($va_table_fields as $vs_field_name => $va_field_options) { $va_field_options['properties']["store"] = in_array("STORE", $va_field_options) ? 'yes' : 'no'; if ($va_field_options["BOOST"]) { $va_field_options['properties']["boost"] = floatval($va_field_options["BOOST"]); } if (in_array("DONT_TOKENIZE", $va_field_options)) { // TODO: maybe do something? } // "intrinsic" fields if (!isset($va_field_options['properties']['type']) && $t_instance->hasField($vs_field_name)) { switch ($t_instance->getFieldInfo($vs_field_name, "FIELD_TYPE")) { case FT_TEXT: case FT_MEDIA: case FT_FILE: case FT_PASSWORD: case FT_VARS: $va_field_options['properties']['type'] = 'string'; break; case FT_NUMBER: case FT_TIME: case FT_TIMERANGE: case FT_TIMECODE: if ($t_instance->getFieldInfo($vs_field_name, "LIST_CODE")) { // list-based intrinsics get indexed with both item_id and label text $va_field_options['properties']['type'] = 'string'; } else { $va_field_options['properties']['type'] = 'double'; } break; case FT_TIMESTAMP: case FT_DATETIME: case FT_HISTORIC_DATETIME: case FT_DATE: case FT_HISTORIC_DATE: case FT_DATERANGE: case FT_HISTORIC_DATERANGE: $va_field_options['properties']['type'] = 'date'; break; case FT_BIT: $va_field_options['properties']['type'] = 'boolean'; break; default: $va_field_options['properties']['type'] = "string"; break; } } if (!$va_field_options['properties']['type']) { $va_field_options['properties']['type'] = "string"; } $vo_http_client = new Zend_Http_Client(); $vo_http_client->setUri($vo_search_conf->get('search_elasticsearch_base_url') . "/" . $vo_search_conf->get('search_elasticsearch_index_name') . "/" . $vs_table . "/" . "_mapping"); $va_mapping = array(); $va_mapping[$vs_table]["properties"][$vs_table . "." . $vs_field_name] = $va_field_options["properties"]; $vo_http_client->setRawData(json_encode($va_mapping))->setEncType('text/json')->request('POST'); try { $vo_http_response = $vo_http_client->request(); $va_response = json_decode($vo_http_response->getBody(), true); if (!$va_response["ok"]) { CLIUtils::addError(_t("Something went wrong at %1 with message: %2", "{$vs_table}.{$vs_field_name}", $va_response["error"])); CLIUtils::addError(_t("Mapping sent to ElasticSearch was: %1", json_encode($va_mapping))); return; } } catch (Exception $e) { CLIUtils::addError(_t("Something went wrong at %1", "{$vs_table}.{$vs_field_name}")); CLIUtils::addError(_t("Response body was: %1", $vo_http_response->getBody())); return; } } } /* related tables */ $va_related_tables = $vo_search_base->getRelatedIndexingTables($vs_table); foreach ($va_related_tables as $vs_related_table) { $va_related_table_fields = $vo_search_base->getFieldsToIndex($vs_table, $vs_related_table); foreach ($va_related_table_fields as $vs_related_table_field => $va_related_table_field_options) { $va_related_table_field_options['properties']["store"] = in_array("STORE", $va_related_table_field_options) ? 'yes' : 'no'; $va_related_table_field_options['properties']['type'] = "string"; if (in_array("DONT_TOKENIZE", $va_related_table_field_options)) { // TODO: do something? } $vo_http_client = new Zend_Http_Client(); $vo_http_client->setUri($vo_search_conf->get('search_elasticsearch_base_url') . "/" . $vo_search_conf->get('search_elasticsearch_index_name') . "/" . $vs_table . "/" . "_mapping"); $va_mapping = array(); $va_mapping[$vs_table]["properties"][$vs_related_table . '.' . $vs_related_table_field] = $va_related_table_field_options["properties"]; $vo_http_client->setRawData(json_encode($va_mapping))->setEncType('text/json')->request('POST'); try { $vo_http_response = $vo_http_client->request(); $va_response = json_decode($vo_http_response->getBody(), true); if (!$va_response["ok"]) { CLIUtils::addError(_t("Something went wrong at %1 with message: %2", "{$vs_table}/{$vs_related_table}.{$vs_related_table_field}", $va_response["error"])); CLIUtils::addError(_t("Mapping sent to ElasticSearch was: %1", json_encode($va_mapping))); return; } } catch (Exception $e) { CLIUtils::addError(_t("Something went wrong at %1", "{$vs_table}/{$vs_related_table}.{$vs_related_table_field}")); CLIUtils::addError(_t("Response body was: %1", $vo_http_response->getBody())); return; } } } /* created and modified fields */ $va_mapping = array(); $va_mapping[$vs_table]["properties"]["created"] = array('type' => 'date', 'format' => 'dateOptionalTime', 'ignore_malformed' => false); $va_mapping[$vs_table]["properties"]["modified"] = array('type' => 'date', 'format' => 'dateOptionalTime', 'ignore_malformed' => false); $va_mapping[$vs_table]["properties"]["created_user_id"] = array('type' => 'double'); $va_mapping[$vs_table]["properties"]["modified_user_id"] = array('type' => 'double'); $vo_http_client = new Zend_Http_Client(); $vo_http_client->setUri($vo_search_conf->get('search_elasticsearch_base_url') . "/" . $vo_search_conf->get('search_elasticsearch_index_name') . "/" . $vs_table . "/" . "_mapping"); $vo_http_client->setRawData(json_encode($va_mapping))->setEncType('text/json')->request('POST'); try { $vo_http_response = $vo_http_client->request(); $va_response = json_decode($vo_http_response->getBody(), true); if (!$va_response["ok"]) { CLIUtils::addError(_t("Something went wrong at %1 with message: %2", "{$vs_table}.created/modified", $va_response["error"])); CLIUtils::addError(_t("Mapping sent to ElasticSearch was: %1", json_encode($va_mapping))); return; } } catch (Exception $e) { CLIUtils::addError(_t("Something went wrong at %1", "{$vs_table}.created")); CLIUtils::addError(_t("Response body was: %1", $vo_http_response->getBody())); return; } } CLIUtils::addMessage(_t('ElasticSearch schema was created successfully!'), array('color' => 'bold_green')); CLIUtils::addMessage(_t("Note that all data has been wiped from the index so you must issue a full reindex now, either using caUtils rebuild-search-index or the web-based tool under Manage > Administration > Maintenance."), array('color' => 'red')); }
public static function updateSolrConfiguration($pb_invoked_from_command_line = false) { /* get search and search indexing configuration */ $po_app_config = Configuration::load(); $po_search_config = Configuration::load($po_app_config->get("search_config")); $po_search_indexing_config = Configuration::load($po_search_config->get("search_indexing_config")); $ps_solr_home_dir = $po_search_config->get('search_solr_home_dir'); $po_datamodel = Datamodel::load(); $po_search_base = new SearchBase(); global $o_db; if (!is_object($o_db)) { /* catch command line usage */ $o_db = new Db(); } $t_element = new ca_metadata_elements(); /* parse search indexing configuration to see which tables are indexed */ $va_tables = $po_search_indexing_config->getAssocKeys(); /* create solr.xml first to support multicore */ $vs_solr_xml = ""; $vs_solr_xml .= '<?xml version="1.0" encoding="UTF-8" ?>' . SolrConfiguration::nl(); $vs_solr_xml .= '<solr persistent="true">' . SolrConfiguration::nl(); $vs_solr_xml .= SolrConfiguration::tabs(1) . '<cores adminPath="/admin/cores">' . SolrConfiguration::nl(); foreach ($va_tables as $vs_table) { /* I don't like tablenums, so we use the table name to name the cores */ $vs_solr_xml .= SolrConfiguration::tabs(2) . '<core name="' . $vs_table . '" instanceDir="' . $vs_table . '" />' . SolrConfiguration::nl(); } $vs_solr_xml .= SolrConfiguration::tabs(1) . '</cores>' . SolrConfiguration::nl(); $vs_solr_xml .= '</solr>' . SolrConfiguration::nl(); /* try to write configuration file */ $vr_solr_xml_file = fopen($ps_solr_home_dir . "/solr.xml", 'w+'); // overwrite old one if (!is_resource($vr_solr_xml_file)) { die("Couldn't write to solr.xml file in Solr home directory. Please check the permissions.\n"); } fprintf($vr_solr_xml_file, "%s", $vs_solr_xml); fclose($vr_solr_xml_file); /* configure the cores */ foreach ($va_tables as $vs_table) { $t_instance = $po_datamodel->getTableInstance($vs_table); /* create core directory */ if (!file_exists($ps_solr_home_dir . "/" . $vs_table)) { if (!mkdir($ps_solr_home_dir . "/" . $vs_table, 0777)) { /* TODO: think about permissions */ die("Couldn't create directory in Solr home. Please check the permissions.\n"); } } /* create conf directory */ if (!file_exists($ps_solr_home_dir . "/" . $vs_table . "/conf")) { if (!mkdir($ps_solr_home_dir . "/" . $vs_table . "/conf", 0777)) { die("Couldn't create directory in core directory. Please check the permissions.\n"); } } /* create solrconfig.xml for this core */ $vr_solrconfig_xml_file = fopen($ps_solr_home_dir . "/" . $vs_table . "/conf/solrconfig.xml", 'w+'); if (!is_resource($vr_solrconfig_xml_file)) { die("Couldn't write to solrconfig.xml file for core {$vs_table}. Please check the permissions.\n"); } /* read template and copy it */ $va_solrconfig_xml_template = file(__CA_LIB_DIR__ . "/core/Search/Solr/solrplugin_templates/solrconfig.xml"); if (!is_array($va_solrconfig_xml_template)) { die("Couldn't read solrconfig.xml template."); } foreach ($va_solrconfig_xml_template as $vs_line) { fprintf($vr_solrconfig_xml_file, "%s", $vs_line); } fclose($vr_solrconfig_xml_file); /* create schema.xml for this core */ $vr_schema_xml_file = fopen($ps_solr_home_dir . "/" . $vs_table . "/conf/schema.xml", 'w+'); if (!is_resource($vr_schema_xml_file)) { die("Couldn't write to schema.xml file for core {$vs_table}. Please check the permissions.\n"); } /* read template, modify it, add table-specific fields and write to schema.xml configuration for this core */ $va_schema_xml_template = file(__CA_LIB_DIR__ . "/core/Search/Solr/solrplugin_templates/schema.xml"); if (!is_array($va_schema_xml_template)) { die("Couldn't read solrconfig.xml template."); } foreach ($va_schema_xml_template as $vs_line) { /* 1st replacement: core name */ if (strpos($vs_line, "CORE_NAME") !== false) { fprintf($vr_schema_xml_file, "%s", str_replace("CORE_NAME", $vs_table, $vs_line)); continue; } /* 2nd replacement: fields - the big part */ if (strpos($vs_line, "<!--FIELDS-->") !== false) { $vs_field_schema = ""; $vs_subject_table_copyfields = ""; /* the schema is very very hardcoded, so we have to create a design that still fits * when new metadata elements are created or sth like that. for now, we're just considering * the "straightforward" fields */ $va_schema_fields = array(); /* list of all fields created - is used for copyField directives after field block */ /* subject table */ /* we add the PK - this is used for incremental indexing */ $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $vs_table . '.' . $t_instance->primaryKey() . '" type="int" indexed="true" stored="true" />' . SolrConfiguration::nl(); $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $t_instance->primaryKey() . '" type="int" indexed="true" stored="true" />' . SolrConfiguration::nl(); $vs_subject_table_copyfields .= SolrConfiguration::tabs(1) . '<copyField source="' . $vs_table . '.' . $t_instance->primaryKey() . '" dest="' . $t_instance->primaryKey() . '" />' . SolrConfiguration::nl(); /* get fields-to-index from search indexing configuration */ if (!is_array($va_table_fields = $po_search_base->getFieldsToIndex($vs_table))) { $va_table_fields = array(); } $vn_table_num = $po_datamodel->getTableNum($vs_table); $va_attributes = null; if (is_array($va_table_fields)) { foreach ($va_table_fields as $vs_field_name => $va_field_options) { if (preg_match('!^_ca_attribute_(\\d+)$!', $vs_field_name, $va_matches)) { $t_element->load($va_matches[1]); $va_attributes[$t_element->getPrimaryKey()] = array('element_id' => $t_element->get('element_id'), 'element_code' => $t_element->get('element_code'), 'datatype' => $t_element->get('datatype')); } } } if (is_array($va_attributes)) { $va_metadata_fields = array(); foreach ($va_attributes as $vn_element_id => $va_element_info) { $va_metadata_fields += SolrConfiguration::getElementType($va_element_info); } /*set datatype for metadata elements in $va_table_fields array*/ foreach ($va_metadata_fields as $key => $value) { if (array_key_exists($key, $va_table_fields)) { unset($va_table_fields[$key]); } $va_table_fields[$key] = $value; } } /* we now have the current configuration */ /* since Solr supports live updates only if changes are 'backwards-compatible' * (i.e. no fields are deleted), we have to merge the current configuration with the * cached one, create the new configuration based upon that and cache it. * * Invocation of the command-line script support/utils/createSolrConfiguration.php, * however, creates a completely fresh configuration and caches it. */ $va_frontend_options = array('lifetime' => null, 'logging' => false, 'write_control' => true, 'automatic_cleaning_factor' => 0, 'automatic_serialization' => true); $vs_cache_dir = __CA_APP_DIR__ . '/tmp'; $va_backend_options = array('cache_dir' => $vs_cache_dir, 'file_locking' => true, 'read_control' => false, 'file_name_prefix' => 'ca_cache', 'cache_file_perm' => 0777); $vo_cache = Zend_Cache::factory('Core', 'File', $va_frontend_options, $va_backend_options); if (!($va_cache_data = $vo_cache->load('ca_search_indexing_info_' . $vs_table))) { $va_cache_data = array(); } if (!$pb_invoked_from_command_line) { $va_table_fields = array_merge($va_cache_data, $va_table_fields); } $vo_cache->save($va_table_fields, 'ca_search_indexing_info_' . $vs_table); if (is_array($va_table_fields)) { foreach ($va_table_fields as $vs_field_name => $va_field_options) { if (in_array("STORE", $va_field_options)) { $vb_field_is_stored = true; } else { $vb_field_is_stored = false; } if (in_array("DONT_TOKENIZE", $va_field_options)) { $vb_field_is_tokenized = false; } else { $vb_field_is_tokenized = true; } $va_schema_fields[] = $vs_table . '.' . SolrConfiguration::adjustFieldsToIndex($vs_field_name); if (in_array($va_field_options['type'], array('text', 'string'))) { $vs_type = $vb_field_is_tokenized ? 'text' : 'string'; } else { if (!isset($va_field_options['type']) && $t_instance->hasField($vs_field_name)) { // if the primary key is configured to be indexed in search_indexing.conf, ignore it here // (we add it anyway and solr doesn't like duplicate fields!) if ($t_instance->primaryKey() == $vs_field_name) { continue; } switch ($t_instance->getFieldInfo($vs_field_name, "FIELD_TYPE")) { case FT_TEXT: case FT_MEDIA: case FT_FILE: case FT_PASSWORD: case FT_VARS: $va_field_options['type'] = 'text'; break; case FT_NUMBER: case FT_TIME: case FT_TIMERANGE: case FT_TIMECODE: $va_field_options['type'] = 'float'; break; case FT_TIMESTAMP: case FT_DATETIME: case FT_HISTORIC_DATETIME: case FT_DATE: case FT_HISTORIC_DATE: case FT_DATERANGE: case FT_HISTORIC_DATERANGE: $va_field_options['type'] = 'daterange'; break; case FT_BIT: $va_field_options['type'] = 'bool'; break; default: $va_field_options['type'] = null; break; } } $vs_type = isset($va_field_options['type']) && $va_field_options['type'] ? $va_field_options['type'] : 'text'; } $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $vs_table . '.' . SolrConfiguration::adjustFieldsToIndex($vs_field_name) . '" type="' . $vs_type; $vs_field_schema .= '" indexed="true" '; $vb_field_is_stored ? $vs_field_schema .= 'stored="true" ' : ($vs_field_schema .= 'stored="false" '); $vs_field_schema .= '/>' . SolrConfiguration::nl(); } } /* related tables */ $va_related_tables = $po_search_base->getRelatedIndexingTables($vs_table); foreach ($va_related_tables as $vs_related_table) { $va_related_table_fields = $po_search_base->getFieldsToIndex($vs_table, $vs_related_table); foreach ($va_related_table_fields as $vs_related_table_field => $va_related_table_field_options) { if (in_array("STORE", $va_related_table_field_options)) { $vb_field_is_stored = true; } else { $vb_field_is_stored = false; } if (in_array("DONT_TOKENIZE", $va_related_table_field_options)) { $vb_field_is_tokenized = false; } else { $vb_field_is_tokenized = true; } $va_schema_fields[] = $vs_related_table . '.' . $vs_related_table_field; $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $vs_related_table . '.' . $vs_related_table_field . '" type="'; $vb_field_is_tokenized ? $vs_field_schema .= 'text' : ($vs_field_schema .= 'string'); $vs_field_schema .= '" indexed="true" '; $vb_field_is_stored ? $vs_field_schema .= 'stored="true" ' : ($vs_field_schema .= 'stored="false" '); $vs_field_schema .= '/>' . SolrConfiguration::nl(); } } /* copyfield directives * we use a single field in each index (called "text") where * all other fields are copied. the text field is the default * search field. it is used if a field name specification is * omitted in a search query. */ $vs_copyfields = ""; foreach ($va_schema_fields as $vs_schema_field) { $vs_copyfields .= SolrConfiguration::tabs(1) . '<copyField source="' . $vs_schema_field . '" dest="text" />' . SolrConfiguration::nl(); } // // Get access points // if (!is_array($va_access_points = $po_search_base->getAccessPoints($vs_table))) { $va_access_points = array(); } foreach ($va_access_points as $vs_access_point => $va_access_point_info) { foreach ($va_access_point_info['fields'] as $vn_i => $vs_schema_field) { $vs_copyfields .= SolrConfiguration::tabs(1) . '<copyField source="' . $vs_schema_field . '" dest="' . $vs_access_point . '" />' . SolrConfiguration::nl(); } $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $vs_access_point . '" type="text" indexed="true" stored="true" multiValued="true"/>' . SolrConfiguration::nl(); } /* write field indexing config into file */ fprintf($vr_schema_xml_file, "%s", $vs_field_schema); continue; } /* 3rd replacement: uniquekey */ if (strpos($vs_line, "<!--KEY-->") !== false) { $vs_pk = $t_instance->primaryKey(); fprintf($vr_schema_xml_file, "%s", str_replace("<!--KEY-->", $vs_table . "." . $vs_pk, $vs_line)); continue; } /* 4th replacement: copyFields */ if (strpos($vs_line, "<!--COPYFIELDS-->") !== false) { /* $vs_copyfields *should* be set, otherwise the template has been messed up */ fprintf($vr_schema_xml_file, "%s", $vs_copyfields); // add copyField for the subject table fields so that the pk can be adressed in 2 ways: // "objects.object_id" or "object.id" fprintf($vr_schema_xml_file, "%s", $vs_subject_table_copyfields); continue; } /* "normal" line */ fprintf($vr_schema_xml_file, "%s", $vs_line); } fclose($vr_schema_xml_file); } }