/** * Generate mappings for Solr based upon currently configured search indexing */ public static function generate_solr_configuration($po_opts = null) { require_once __CA_LIB_DIR__ . "/core/Search/Solr/SolrConfiguration.php"; SolrConfiguration::updateSolrConfiguration(true); // @TODO what if something goes wrong!? CLIUtils::addMessage(_t('Solr schema was created successfully!'), array('color' => 'bold_green')); CLIUtils::addMessage(_t("Note that all data has been wiped from the index so you must issue a full reindex now, either using caUtils rebuild-search-index or the web-based tool under Manage > Administration > Maintenance."), array('color' => 'red')); }
private function _refreshSolrConfiguration() { SolrConfiguration::updateSolrConfiguration(); /* reload all cores */ $vo_http_client = new Zend_Http_Client(); $vo_http_client->setUri($this->ops_search_solr_url . "/" . "/admin/cores"); $vo_search_indexing_config = Configuration::load($this->opo_search_config->get('search_indexing_config')); $va_tables = $vo_search_indexing_config->getAssocKeys(); /* reload all tables */ foreach ($va_tables as $vs_table) { $vo_http_client->setParameterGet(array('action' => 'RELOAD', 'core' => $vs_table)); $vo_http_client->request(); } }
private static function getElementType($pa_element_info) { $va_table_fields = $va_element_opts = array(); $vn_element_id = $pa_element_info['element_id']; switch ($pa_element_info['datatype']) { case 0: //container /* Retrieve child elements of the container. */ $t_element = new ca_metadata_elements((int) $pa_element_info['element_id']); if ($t_element->getPrimaryKey()) { $va_children = $t_element->getElementsInSet(); foreach ($va_children as $va_child) { if ($va_child['element_id'] == $vn_element_id) { continue; } $va_table_fields += SolrConfiguration::getElementType($va_child); } } break; case 1: // text // text case 3: // list // list case 5: // url // url case 6: // currency // currency case 8: // length // length case 9: // weight // weight case 13: // LCSH // LCSH case 14: // geonames // geonames case 15: // file // file case 16: // media // media case 19: // taxonomy // taxonomy case 20: // information service $va_element_opts['type'] = 'text'; break; case 2: // daterange $va_element_opts['type'] = 'daterange'; $va_table_fields['_ca_attribute_' . $vn_element_id . '_text'] = array('type' => 'text'); break; case 4: // geocode $va_element_opts['type'] = 'geocode'; $va_table_fields['_ca_attribute_' . $vn_element_id . '_text'] = array('type' => 'text'); break; case 10: // timecode // timecode case 12: // numeric/float $va_element_opts['type'] = 'float'; break; case 11: // integer $va_element_opts['type'] = 'int'; break; default: $va_element_opts['type'] = 'text'; break; } $va_table_fields['_ca_attribute_' . $vn_element_id] = $va_element_opts; return $va_table_fields; }
public static function updateSolrConfiguration($pb_invoked_from_command_line = false) { /* get search and search indexing configuration */ $po_app_config = Configuration::load(); $po_search_config = Configuration::load($po_app_config->get("search_config")); $po_search_indexing_config = Configuration::load($po_search_config->get("search_indexing_config")); $ps_solr_home_dir = $po_search_config->get('search_solr_home_dir'); $po_datamodel = Datamodel::load(); $po_search_base = new SearchBase(); global $o_db; if (!is_object($o_db)) { /* catch command line usage */ $o_db = new Db(); } /* parse search indexing configuration to see which tables are indexed */ $va_tables = $po_search_indexing_config->getAssocKeys(); /* create solr.xml first to support multicore */ $vs_solr_xml = ""; $vs_solr_xml .= '<?xml version="1.0" encoding="UTF-8" ?>' . SolrConfiguration::nl(); $vs_solr_xml .= '<solr persistent="true">' . SolrConfiguration::nl(); $vs_solr_xml .= SolrConfiguration::tabs(1) . '<cores adminPath="/admin/cores">' . SolrConfiguration::nl(); foreach ($va_tables as $vs_table) { /* I don't like tablenums, so we use the table name to name the cores */ $vs_solr_xml .= SolrConfiguration::tabs(2) . '<core name="' . $vs_table . '" instanceDir="' . $vs_table . '" />' . SolrConfiguration::nl(); } $vs_solr_xml .= SolrConfiguration::tabs(1) . '</cores>' . SolrConfiguration::nl(); $vs_solr_xml .= '</solr>' . SolrConfiguration::nl(); /* try to write configuration file */ $vr_solr_xml_file = fopen($ps_solr_home_dir . "/solr.xml", 'w+'); // overwrite old one if (!is_resource($vr_solr_xml_file)) { die("Couldn't write to solr.xml file in Solr home directory. Please check the permissions.\n"); } fprintf($vr_solr_xml_file, "%s", $vs_solr_xml); fclose($vr_solr_xml_file); /* configure the cores */ foreach ($va_tables as $vs_table) { $t_instance = $po_datamodel->getTableInstance($vs_table); /* create core directory */ if (!file_exists($ps_solr_home_dir . "/" . $vs_table)) { if (!mkdir($ps_solr_home_dir . "/" . $vs_table, 0777)) { /* TODO: think about permissions */ die("Couldn't create directory in Solr home. Please check the permissions.\n"); } } /* create conf directory */ if (!file_exists($ps_solr_home_dir . "/" . $vs_table . "/conf")) { if (!mkdir($ps_solr_home_dir . "/" . $vs_table . "/conf", 0777)) { die("Couldn't create directory in core directory. Please check the permissions.\n"); } } /* create solrconfig.xml for this core */ $vr_solrconfig_xml_file = fopen($ps_solr_home_dir . "/" . $vs_table . "/conf/solrconfig.xml", 'w+'); if (!is_resource($vr_solrconfig_xml_file)) { die("Couldn't write to solrconfig.xml file for core {$vs_table}. Please check the permissions.\n"); } /* read template and copy it */ $va_solrconfig_xml_template = file(__CA_LIB_DIR__ . "/core/Search/Solr/solrplugin_templates/solrconfig.xml"); if (!is_array($va_solrconfig_xml_template)) { die("Couldn't read solrconfig.xml template."); } foreach ($va_solrconfig_xml_template as $vs_line) { fprintf($vr_solrconfig_xml_file, "%s", $vs_line); } fclose($vr_solrconfig_xml_file); /* create schema.xml for this core */ $vr_schema_xml_file = fopen($ps_solr_home_dir . "/" . $vs_table . "/conf/schema.xml", 'w+'); if (!is_resource($vr_schema_xml_file)) { die("Couldn't write to schema.xml file for core {$vs_table}. Please check the permissions.\n"); } /* read template, modify it, add table-specific fields and write to schema.xml configuration for this core */ $va_schema_xml_template = file(__CA_LIB_DIR__ . "/core/Search/Solr/solrplugin_templates/schema.xml"); if (!is_array($va_schema_xml_template)) { die("Couldn't read solrconfig.xml template."); } foreach ($va_schema_xml_template as $vs_line) { /* 1st replacement: core name */ if (strpos($vs_line, "CORE_NAME") !== false) { fprintf($vr_schema_xml_file, "%s", str_replace("CORE_NAME", $vs_table, $vs_line)); continue; } /* 2nd replacement: fields - the big part */ if (strpos($vs_line, "<!--FIELDS-->") !== false) { $vs_field_schema = ""; $vs_subject_table_copyfields = ""; /* the schema is very very hardcoded, so we have to create a design that still fits * when new metadata elements are created or sth like that. for now, we're just considering * the "straightforward" fields */ $va_schema_fields = array(); /* list of all fields created - is used for copyField directives after field block */ /* subject table */ /* we add the PK - this is used for incremental indexing */ $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $vs_table . '.' . $t_instance->primaryKey() . '" type="int" indexed="true" stored="true" />' . SolrConfiguration::nl(); $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $t_instance->primaryKey() . '" type="int" indexed="true" stored="true" />' . SolrConfiguration::nl(); $vs_subject_table_copyfields .= SolrConfiguration::tabs(1) . '<copyField source="' . $vs_table . '.' . $t_instance->primaryKey() . '" dest="' . $t_instance->primaryKey() . '" />' . SolrConfiguration::nl(); /* get fields-to-index from search indexing configuration */ if (!is_array($va_table_fields = $po_search_base->getFieldsToIndex($vs_table))) { $va_table_fields = array(); } $vn_table_num = $po_datamodel->getTableNum($vs_table); /* replace virtual _metadata field with actual _ca_attribute_N type fields */ $va_attributes = null; $va_opts = array(); if (isset($va_table_fields['_metadata'])) { if (!is_array($va_opts = $va_table_fields['_metadata'])) { $va_opts = array(); } unset($va_table_fields['_metadata']); $qr_type_restrictions = $o_db->query(' SELECT DISTINCT came.* FROM ca_metadata_type_restrictions camtr INNER JOIN ca_metadata_elements as came ON came.element_id = camtr.element_id WHERE camtr.table_num = ? ', (int) $vn_table_num); $va_attributes = array(); while ($qr_type_restrictions->nextRow()) { $vn_element_id = $qr_type_restrictions->get('element_id'); $va_attributes[$vn_element_id] = array('element_id' => $vn_element_id, 'element_code' => $qr_type_restrictions->get('element_code'), 'datatype' => $qr_type_restrictions->get('datatype')); } } if (is_array($va_table_fields)) { foreach ($va_table_fields as $vs_field_name => $va_field_options) { if (preg_match('!^ca_attribute_(.*)$!', $vs_field_name, $va_matches)) { $qr_type_restrictions = $o_db->query(' SELECT DISTINCT came.* FROM ca_metadata_type_restrictions camtr INNER JOIN ca_metadata_elements as came ON came.element_id = camtr.element_id WHERE camtr.table_num = ? AND came.element_code = ? ', (int) $vn_table_num, (string) $va_matches[1]); while ($qr_type_restrictions->nextRow()) { $vn_element_id = $qr_type_restrictions->get('element_id'); $va_attributes[$vn_element_id] = array('element_id' => $vn_element_id, 'element_code' => $qr_type_restrictions->get('element_code'), 'datatype' => $qr_type_restrictions->get('datatype')); } } } } if (is_array($va_attributes)) { foreach ($va_attributes as $vn_element_id => $va_element_info) { $vs_element_code = $va_element_info['element_code']; $va_element_opts = array(); switch ($va_element_info['datatype']) { case 0: //container /* Retrieve child elements of the container. */ $qr_container_elements = $o_db->query(' SELECT * FROM ca_metadata_elements WHERE parent_id = ?', $va_element_info['element_id']); while ($qr_container_elements->nextRow()) { /* For each child if it is a container itself, retrieve its own children elements, which are actuall elements*/ $qr_container_grand_children_elements = $o_db->query(' SELECT * FROM ca_metadata_elements WHERE parent_id = ?', $qr_container_elements->get('element_id')); while ($qr_container_grand_children_elements->nextRow()) { $container_element_code = $qr_container_grand_children_elements->get('element_code'); $va_table_fields[$container_element_code] = array_merge($va_opts, array('type' => 'text')); } } break; case 1: // text // text case 3: // list // list case 5: // url // url case 6: // currency // currency case 8: // length // length case 9: // weight // weight case 13: // LCSH // LCSH case 14: // geonames // geonames case 15: // file // file case 16: // media // media case 19: // taxonomy // taxonomy case 20: // information service $va_element_opts['type'] = 'text'; break; case 2: // daterange $va_element_opts['type'] = 'daterange'; $va_table_fields[$vs_element_code . '_text'] = array_merge($va_opts, array('type' => 'text')); break; case 4: // geocode $va_element_opts['type'] = 'geocode'; $va_table_fields[$vs_element_code . '_text'] = array_merge($va_opts, array('type' => 'text')); break; case 10: // timecode // timecode case 12: // numeric/float $va_element_opts['type'] = 'float'; break; case 11: // integer $va_element_opts['type'] = 'int'; break; default: $va_element_opts['type'] = 'text'; break; } $va_table_fields[$vs_element_code] = array_merge($va_opts, $va_element_opts); } } /* we now have the current configuration */ /* since Solr supports live updates only if changes are 'backwards-compatible' * (i.e. no fields are deleted), we have to merge the current configuration with the * cached one, create the new configuration based upon that and cache it. * * Invocation of the command-line script support/utils/createSolrConfiguration.php, * however, creates a completely fresh configuration and caches it. */ $va_frontend_options = array('lifetime' => null, 'logging' => false, 'write_control' => true, 'automatic_cleaning_factor' => 0, 'automatic_serialization' => true); $vs_cache_dir = __CA_APP_DIR__ . '/tmp'; $va_backend_options = array('cache_dir' => $vs_cache_dir, 'file_locking' => true, 'read_control' => false, 'file_name_prefix' => 'ca_cache', 'cache_file_perm' => 0777); $vo_cache = Zend_Cache::factory('Core', 'File', $va_frontend_options, $va_backend_options); if (!($va_cache_data = $vo_cache->load('ca_search_indexing_info_' . $vs_table))) { $va_cache_data = array(); } if (!$pb_invoked_from_command_line) { $va_table_fields = array_merge($va_cache_data, $va_table_fields); } $vo_cache->save($va_table_fields, 'ca_search_indexing_info_' . $vs_table); if (is_array($va_table_fields)) { foreach ($va_table_fields as $vs_field_name => $va_field_options) { if (in_array("STORE", $va_field_options)) { $vb_field_is_stored = true; } else { $vb_field_is_stored = false; } if (in_array("DONT_TOKENIZE", $va_field_options)) { $vb_field_is_tokenized = false; } else { $vb_field_is_tokenized = true; } $va_schema_fields[] = $vs_table . '.' . $vs_field_name; if (in_array($va_field_options['type'], array('text', 'string'))) { $vs_type = $vb_field_is_tokenized ? 'text' : 'string'; } else { if (!isset($va_field_options['type']) && $t_instance->hasField($vs_field_name)) { switch ($t_instance->getFieldInfo($vs_field_name, "FIELD_TYPE")) { case FT_TEXT: case FT_MEDIA: case FT_FILE: case FT_PASSWORD: case FT_VARS: $va_field_options['type'] = 'text'; break; case FT_NUMBER: case FT_TIME: case FT_TIMERANGE: case FT_TIMECODE: $va_field_options['type'] = 'float'; break; case FT_TIMESTAMP: case FT_DATETIME: case FT_HISTORIC_DATETIME: case FT_DATE: case FT_HISTORIC_DATE: case FT_DATERANGE: case FT_HISTORIC_DATERANGE: $va_field_options['type'] = 'daterange'; break; case FT_BIT: $va_field_options['type'] = 'bool'; break; default: $va_field_options['type'] = null; break; } } $vs_type = isset($va_field_options['type']) && $va_field_options['type'] ? $va_field_options['type'] : 'text'; } $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $vs_table . '.' . $vs_field_name . '" type="' . $vs_type; $vs_field_schema .= '" indexed="true" '; $vb_field_is_stored ? $vs_field_schema .= 'stored="true" ' : ($vs_field_schema .= 'stored="false" '); $vs_field_schema .= '/>' . SolrConfiguration::nl(); } } /* related tables */ $va_related_tables = $po_search_base->getRelatedIndexingTables($vs_table); foreach ($va_related_tables as $vs_related_table) { $va_related_table_fields = $po_search_base->getFieldsToIndex($vs_table, $vs_related_table); foreach ($va_related_table_fields as $vs_related_table_field => $va_related_table_field_options) { if (in_array("STORE", $va_related_table_field_options)) { $vb_field_is_stored = true; } else { $vb_field_is_stored = false; } if (in_array("DONT_TOKENIZE", $va_related_table_field_options)) { $vb_field_is_tokenized = false; } else { $vb_field_is_tokenized = true; } $va_schema_fields[] = $vs_related_table . '.' . $vs_related_table_field; $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $vs_related_table . '.' . $vs_related_table_field . '" type="'; $vb_field_is_tokenized ? $vs_field_schema .= 'text' : ($vs_field_schema .= 'string'); $vs_field_schema .= '" indexed="true" '; $vb_field_is_stored ? $vs_field_schema .= 'stored="true" ' : ($vs_field_schema .= 'stored="false" '); $vs_field_schema .= '/>' . SolrConfiguration::nl(); } } /* copyfield directives * we use a single field in each index (called "text") where * all other fields are copied. the text field is the default * search field. it is used if a field name specification is * omitted in a search query. */ $vs_copyfields = ""; foreach ($va_schema_fields as $vs_schema_field) { $vs_copyfields .= SolrConfiguration::tabs(1) . '<copyField source="' . $vs_schema_field . '" dest="text" />' . SolrConfiguration::nl(); } // // Get access points // if (!is_array($va_access_points = $po_search_base->getAccessPoints($vs_table))) { $va_access_points = array(); } foreach ($va_access_points as $vs_access_point => $va_access_point_info) { foreach ($va_access_point_info['fields'] as $vn_i => $vs_schema_field) { $vs_copyfields .= SolrConfiguration::tabs(1) . '<copyField source="' . $vs_schema_field . '" dest="' . $vs_access_point . '" />' . SolrConfiguration::nl(); } $vs_field_schema .= SolrConfiguration::tabs(2) . '<field name="' . $vs_access_point . '" type="text" indexed="true" stored="true" multiValued="true"/>' . SolrConfiguration::nl(); } /* write field indexing config into file */ fprintf($vr_schema_xml_file, "%s", $vs_field_schema); continue; } /* 3rd replacement: uniquekey */ if (strpos($vs_line, "<!--KEY-->") !== false) { $vs_pk = $t_instance->primaryKey(); fprintf($vr_schema_xml_file, "%s", str_replace("<!--KEY-->", $vs_table . "." . $vs_pk, $vs_line)); continue; } /* 4th replacement: copyFields */ if (strpos($vs_line, "<!--COPYFIELDS-->") !== false) { /* $vs_copyfields *should* be set, otherwise the template has been messed up */ fprintf($vr_schema_xml_file, "%s", $vs_copyfields); // add copyField for the subject table fields so that the pk can be adressed in 2 ways: // "objects.object_id" or "object.id" fprintf($vr_schema_xml_file, "%s", $vs_subject_table_copyfields); continue; } /* "normal" line */ fprintf($vr_schema_xml_file, "%s", $vs_line); } fclose($vr_schema_xml_file); } }