/** * Indexes a single item on the specified index. * * Used as a helper method in indexItems(). * * @param \Drupal\search_api\IndexInterface $index * The index for which the item is being indexed. * @param string $id * The item's ID. * @param \Drupal\search_api\Item\ItemInterface $item * The item to index. * * @throws \Exception * Any encountered database (or other) exceptions are passed on, out of this * method. */ protected function indexItem(IndexInterface $index, $id, ItemInterface $item) { $fields = $this->getFieldInfo($index); $fields_updated = FALSE; $field_errors = array(); $db_info = $this->getIndexDbInfo($index); $denormalized_table = $db_info['index_table']; $txn = $this->database->startTransaction('search_api_indexing'); $text_table = $denormalized_table . '_text'; try { $inserts = array(); $text_inserts = array(); foreach ($item->getFields() as $name => $field) { $denormalized_value = NULL; // Sometimes index changes are not triggering the update hooks // correctly. Therefore, to avoid DB errors, we re-check the tables // here before indexing. if (empty($fields[$name]['table']) && !$fields_updated) { unset($db_info['field_tables'][$name]); $this->fieldsUpdated($index); $fields_updated = TRUE; $fields = $db_info['field_tables']; } if (empty($fields[$name]['table']) && empty($field_errors[$name])) { // Log an error, but only once per field. Since a superfluous field is // not too serious, we just index the rest of the item normally. $field_errors[$name] = TRUE; $this->getLogger()->warning("Unknown field @field: please check (and re-save) the index's fields settings.", array('@field' => $name)); continue; } $table = $fields[$name]['table']; $boost = $fields[$name]['boost']; $this->database->delete($table) ->condition('item_id', $id) ->execute(); $this->database->delete($denormalized_table) ->condition('item_id', $id) ->execute(); $type = $field->getType(); $value = array(); foreach ($field->getValues() as $field_value) { $converted_value = $this->convert($field_value, $type, $field->getOriginalType(), $index); // Don't add NULL values to the return array. Also, adding an empty // array is, of course, a waste of time. if (isset($converted_value) && $converted_value !== array()) { $value = array_merge($value, is_array($converted_value) ? $converted_value : array($converted_value)); } } if (Utility::isTextType($type, array('text', 'tokenized_text'))) { $words = array(); // Store the first 30 characters of the string as the denormalized // value. $field_value = $value; $denormalized_value = ''; do { $denormalized_value .= array_shift($field_value)['value'] . ' '; } while (strlen($denormalized_value) < 30); $denormalized_value = Unicode::truncateBytes(trim($denormalized_value), 30); foreach ($value as $token) { // Taken from core search to reflect less importance of words later // in the text. // Focus is a decaying value in terms of the amount of unique words // up to this point. From 100 words and more, it decays, to e.g. 0.5 // at 500 words and 0.3 at 1000 words. $focus = min(1, .01 + 3.5 / (2 + count($words) * .015)); $value = $token['value']; if (is_numeric($value)) { $value = ltrim($value, '-0'); } elseif (Unicode::strlen($value) < $this->configuration['min_chars']) { continue; } $value = Unicode::strtolower($value); $token['score'] = $token['score'] * $focus; if (!isset($words[$value])) { $words[$value] = $token; } else { $words[$value]['score'] += $token['score']; } $token['value'] = $value; } if ($words) { $field_name = self::getTextFieldName($name); foreach ($words as $word) { $text_inserts[$text_table][] = array( 'item_id' => $id, 'field_name' => $field_name, 'word' => $word['value'], 'score' => (int) round($word['score'] * $boost * self::SCORE_MULTIPLIER), ); } } } else { $values = array(); if (is_array($value)) { foreach ($value as $v) { if (isset($v)) { $values["$v"] = TRUE; } } $values = array_keys($values); } elseif (isset($value)) { $values[] = $value; } if ($values) { $denormalized_value = reset($values); $insert = $this->database->insert($table) ->fields(array('item_id', 'value')); foreach ($values as $v) { $insert->values(array( 'item_id' => $id, 'value' => $v, )); } $insert->execute(); } } // Insert a value in the denormalized table for all fields. if (isset($denormalized_value)) { $inserts[$denormalized_table][$fields[$name]['column']] = trim($denormalized_value); } } foreach ($inserts as $table => $data) { $this->database->insert($table) ->fields(array_merge($data, array('item_id' => $id))) ->execute(); } foreach ($text_inserts as $table => $data) { $query = $this->database->insert($table) ->fields(array('item_id', 'field_name', 'word', 'score')); foreach ($data as $row) { $query->values($row); } $query->execute(); } } catch (\Exception $e) { $txn->rollback(); throw $e; } }
/** * Indexes a single item on the specified index. * * Used as a helper method in indexItems(). * * @param \Drupal\search_api\IndexInterface $index * The index for which the item is being indexed. * @param \Drupal\search_api\Item\ItemInterface $item * The item to index. * * @throws \Exception * Any encountered database (or other) exceptions are passed on, out of this * method. */ protected function indexItem(IndexInterface $index, ItemInterface $item) { $fields = $this->getFieldInfo($index); $fields_updated = FALSE; $field_errors = array(); $db_info = $this->getIndexDbInfo($index); $denormalized_table = $db_info['index_table']; $item_id = $item->getId(); $transaction = $this->database->startTransaction('search_api_db_indexing'); try { // Remove the item from the denormalized table. $this->database->delete($denormalized_table)->condition('item_id', $item_id)->execute(); $denormalized_values = array(); $text_inserts = array(); foreach ($item->getFields() as $field_id => $field) { // Sometimes index changes are not triggering the update hooks // correctly. Therefore, to avoid DB errors, we re-check the tables // here before indexing. if (empty($fields[$field_id]['table']) && !$fields_updated) { unset($db_info['field_tables'][$field_id]); $this->fieldsUpdated($index); $fields_updated = TRUE; $fields = $db_info['field_tables']; } if (empty($fields[$field_id]['table']) && empty($field_errors[$field_id])) { // Log an error, but only once per field. Since a superfluous field is // not too serious, we just index the rest of the item normally. $field_errors[$field_id] = TRUE; $this->getLogger()->warning("Unknown field @field: please check (and re-save) the index's fields settings.", array('@field' => $field_id)); continue; } $field_info = $fields[$field_id]; $table = $field_info['table']; $column = $field_info['column']; $this->database->delete($table)->condition('item_id', $item_id)->execute(); $type = $field->getType(); $values = array(); foreach ($field->getValues() as $field_value) { $converted_value = $this->convert($field_value, $type, $field->getOriginalType(), $index); // Don't add NULL values to the return array. Also, adding an empty // array is, of course, a waste of time. if (isset($converted_value) && $converted_value !== array()) { $values = array_merge($values, is_array($converted_value) ? $converted_value : array($converted_value)); } } if (!$values) { // SQLite sometimes has problems letting columns not present in an // INSERT statement default to NULL, so we set NULL values for the // denormalized table explicitly. $denormalized_values[$column] = NULL; continue; } // If the field contains more than one value, we remember that the field // can be multi-valued. if (count($values) > 1) { $db_info['field_tables'][$field_id]['multi-valued'] = TRUE; } if (Utility::isTextType($type, array('text', 'tokenized_text'))) { // Remember the text table the first time we encounter it. if (!isset($text_table)) { $text_table = $table; } $unique_tokens = array(); $denormalized_value = ''; foreach ($values as $token) { $word = $token['value']; $score = $token['score']; // Store the first 30 characters of the string as the denormalized // value. if (strlen($denormalized_value) < 30) { $denormalized_value .= $word . ' '; } // Skip words that are too short, except for numbers. if (is_numeric($word)) { $word = ltrim($word, '-0'); } elseif (Unicode::strlen($word) < $this->configuration['min_chars']) { continue; } // Taken from core search to reflect less importance of words later // in the text. // Focus is a decaying value in terms of the amount of unique words // up to this point. From 100 words and more, it decays, to e.g. 0.5 // at 500 words and 0.3 at 1000 words. $score *= min(1, 0.01 + 3.5 / (2 + count($unique_tokens) * 0.015)); // Only insert each canonical base form of a word once. $word_base_form = $this->dbmsCompatibility->preprocessIndexValue($word); if (!isset($unique_tokens[$word_base_form])) { $unique_tokens[$word_base_form] = array('value' => $word, 'score' => $score); } else { $unique_tokens[$word_base_form]['score'] += $score; } } $denormalized_values[$column] = Unicode::truncateBytes(trim($denormalized_value), 30); if ($unique_tokens) { $field_name = self::getTextFieldName($field_id); $boost = $field_info['boost']; foreach ($unique_tokens as $token) { $text_inserts[] = array('item_id' => $item_id, 'field_name' => $field_name, 'word' => $token['value'], 'score' => (int) round($token['score'] * $boost * self::SCORE_MULTIPLIER)); } } } else { $denormalized_values[$column] = reset($values); // Make sure no duplicate values are inserted (which would lead to a // database exception). // Use the canonical base form of the value for the comparison to // avoid not catching different values that are duplicates under the // database table's collation. $case_insensitive_unique_values = array(); foreach ($values as $value) { $value_base_form = $this->dbmsCompatibility->preprocessIndexValue("{$value}", 'field'); // We still insert the value in its original case. $case_insensitive_unique_values[$value_base_form] = $value; } $values = array_values($case_insensitive_unique_values); $insert = $this->database->insert($table)->fields(array('item_id', 'value')); foreach ($values as $value) { $insert->values(array('item_id' => $item_id, 'value' => $value)); } $insert->execute(); } } $this->database->insert($denormalized_table)->fields(array_merge($denormalized_values, array('item_id' => $item_id)))->execute(); if ($text_inserts && isset($text_table)) { $query = $this->database->insert($text_table)->fields(array('item_id', 'field_name', 'word', 'score')); foreach ($text_inserts as $row) { $query->values($row); } $query->execute(); } // In case any new fields were detected as multi-valued, we re-save the // index's DB info. $this->getKeyValueStore()->set($index->id(), $db_info); } catch (\Exception $e) { $transaction->rollback(); throw $e; } }
/** * Prepares a single item for indexing. * * Used as a helper method in indexItem()/indexItems(). * * @param \Drupal\search_api\Item\ItemInterface $item * The item to index. */ protected function prepareItem(IndexInterface $index, ItemInterface $item) { $item_id = $item->getId(); $item_to_index = ['objectID' => $item_id]; /** @var \Drupal\search_api\Item\FieldInterface $field */ $item_fields = $item->getFields(); $item_fields += $this->getSpecialFields($index, $item); foreach ($item_fields as $field_id => $field) { $type = $field->getType(); $values = NULL; foreach ($field->getValues() as $field_value) { if (!$field_value) { continue; } switch ($type) { case 'text': case 'string': case 'uri': $field_value .= ''; if (Unicode::strlen($field_value) > 10000) { $field_value = Unicode::substr(trim($field_value), 0, 10000); } $values[] = $field_value; break; case 'integer': case 'duration': case 'decimal': $values[] = 0 + $field_value; break; case 'boolean': $values[] = $field_value ? TRUE : FALSE; break; case 'date': if (is_numeric($field_value) || !$field_value) { $values[] = 0 + $field_value; break; } $values[] = strtotime($field_value); break; default: $values[] = $field_value; } } if (count($values) <= 1) { $values = reset($values); } $item_to_index[$field->getFieldIdentifier()] = $values; } return $item_to_index; }
/** * Indexes a single item on the specified index. * * Used as a helper method in indexItems(). * * @param \Drupal\search_api\IndexInterface $index * The index for which the item is being indexed. * @param \Drupal\search_api\Item\ItemInterface $item * The item to index. */ protected function indexItem(IndexInterface $index, ItemInterface $item) { $item_id = $item->getId(); $item_to_index = array('objectID' => $item_id); /** @var \Drupal\search_api\Item\FieldInterface $field */ foreach ($item as $key => $field) { $item_to_index[$field->getFieldIdentifier()] = $field->getValues(); } return $item_to_index; }
/** * {@inheritdoc} */ public function addResultItem(ItemInterface $result_item) { $this->resultItems[$result_item->getId()] = $result_item; return $this; }