Ejemplo n.º 1
0
  /**
   * Indexes a single item on the specified index.
   *
   * Used as a helper method in indexItems().
   *
   * @param \Drupal\search_api\IndexInterface $index
   *   The index for which the item is being indexed.
   * @param string $id
   *   The item's ID.
   * @param \Drupal\search_api\Item\ItemInterface $item
   *   The item to index.
   *
   * @throws \Exception
   *   Any encountered database (or other) exceptions are passed on, out of this
   *   method.
   */
  protected function indexItem(IndexInterface $index, $id, ItemInterface $item) {
    $fields = $this->getFieldInfo($index);
    $fields_updated = FALSE;
    $field_errors = array();
    $db_info = $this->getIndexDbInfo($index);

    $denormalized_table = $db_info['index_table'];
    $txn = $this->database->startTransaction('search_api_indexing');
    $text_table = $denormalized_table . '_text';

    try {
      $inserts = array();
      $text_inserts = array();
      foreach ($item->getFields() as $name => $field) {
        $denormalized_value = NULL;
        // Sometimes index changes are not triggering the update hooks
        // correctly. Therefore, to avoid DB errors, we re-check the tables
        // here before indexing.
        if (empty($fields[$name]['table']) && !$fields_updated) {
          unset($db_info['field_tables'][$name]);
          $this->fieldsUpdated($index);
          $fields_updated = TRUE;
          $fields = $db_info['field_tables'];
        }
        if (empty($fields[$name]['table']) && empty($field_errors[$name])) {
          // Log an error, but only once per field. Since a superfluous field is
          // not too serious, we just index the rest of the item normally.
          $field_errors[$name] = TRUE;
          $this->getLogger()->warning("Unknown field @field: please check (and re-save) the index's fields settings.", array('@field' => $name));
          continue;
        }
        $table = $fields[$name]['table'];

        $boost = $fields[$name]['boost'];
        $this->database->delete($table)
          ->condition('item_id', $id)
          ->execute();
        $this->database->delete($denormalized_table)
          ->condition('item_id', $id)
          ->execute();

        $type = $field->getType();
        $value = array();
        foreach ($field->getValues() as $field_value) {
          $converted_value = $this->convert($field_value, $type, $field->getOriginalType(), $index);

          // Don't add NULL values to the return array. Also, adding an empty
          // array is, of course, a waste of time.
          if (isset($converted_value) && $converted_value !== array()) {
            $value = array_merge($value, is_array($converted_value) ? $converted_value : array($converted_value));
          }
        }

        if (Utility::isTextType($type, array('text', 'tokenized_text'))) {
          $words = array();
          // Store the first 30 characters of the string as the denormalized
          // value.
          $field_value = $value;
          $denormalized_value = '';

          do {
            $denormalized_value .= array_shift($field_value)['value'] . ' ';
          } while (strlen($denormalized_value) < 30);
          $denormalized_value = Unicode::truncateBytes(trim($denormalized_value), 30);

          foreach ($value as $token) {
            // Taken from core search to reflect less importance of words later
            // in the text.
            // Focus is a decaying value in terms of the amount of unique words
            // up to this point. From 100 words and more, it decays, to e.g. 0.5
            // at 500 words and 0.3 at 1000 words.
            $focus = min(1, .01 + 3.5 / (2 + count($words) * .015));

            $value = $token['value'];
            if (is_numeric($value)) {
              $value = ltrim($value, '-0');
            }
            elseif (Unicode::strlen($value) < $this->configuration['min_chars']) {
              continue;
            }
            $value = Unicode::strtolower($value);
            $token['score'] = $token['score'] * $focus;
            if (!isset($words[$value])) {
              $words[$value] = $token;
            }
            else {
              $words[$value]['score'] += $token['score'];
            }
            $token['value'] = $value;
          }
          if ($words) {
            $field_name = self::getTextFieldName($name);
            foreach ($words as $word) {
              $text_inserts[$text_table][] = array(
                'item_id' => $id,
                'field_name' => $field_name,
                'word' => $word['value'],
                'score' => (int) round($word['score'] * $boost * self::SCORE_MULTIPLIER),
              );
            }
          }
        }
        else {
          $values = array();
          if (is_array($value)) {
            foreach ($value as $v) {
              if (isset($v)) {
                $values["$v"] = TRUE;
              }
            }
            $values = array_keys($values);
          }
          elseif (isset($value)) {
            $values[] = $value;
          }
          if ($values) {
            $denormalized_value = reset($values);
            $insert = $this->database->insert($table)
              ->fields(array('item_id', 'value'));
            foreach ($values as $v) {
              $insert->values(array(
                'item_id' => $id,
                'value' => $v,
              ));
            }
            $insert->execute();
          }
        }

        // Insert a value in the denormalized table for all fields.
        if (isset($denormalized_value)) {
          $inserts[$denormalized_table][$fields[$name]['column']] = trim($denormalized_value);
        }
      }

      foreach ($inserts as $table => $data) {
        $this->database->insert($table)
          ->fields(array_merge($data, array('item_id' => $id)))
          ->execute();
      }
      foreach ($text_inserts as $table => $data) {
        $query = $this->database->insert($table)
          ->fields(array('item_id', 'field_name', 'word', 'score'));
        foreach ($data as $row) {
          $query->values($row);
        }
        $query->execute();
      }
    }
    catch (\Exception $e) {
      $txn->rollback();
      throw $e;
    }
  }
Ejemplo n.º 2
0
 /**
  * Indexes a single item on the specified index.
  *
  * Used as a helper method in indexItems().
  *
  * @param \Drupal\search_api\IndexInterface $index
  *   The index for which the item is being indexed.
  * @param \Drupal\search_api\Item\ItemInterface $item
  *   The item to index.
  *
  * @throws \Exception
  *   Any encountered database (or other) exceptions are passed on, out of this
  *   method.
  */
 protected function indexItem(IndexInterface $index, ItemInterface $item)
 {
     $fields = $this->getFieldInfo($index);
     $fields_updated = FALSE;
     $field_errors = array();
     $db_info = $this->getIndexDbInfo($index);
     $denormalized_table = $db_info['index_table'];
     $item_id = $item->getId();
     $transaction = $this->database->startTransaction('search_api_db_indexing');
     try {
         // Remove the item from the denormalized table.
         $this->database->delete($denormalized_table)->condition('item_id', $item_id)->execute();
         $denormalized_values = array();
         $text_inserts = array();
         foreach ($item->getFields() as $field_id => $field) {
             // Sometimes index changes are not triggering the update hooks
             // correctly. Therefore, to avoid DB errors, we re-check the tables
             // here before indexing.
             if (empty($fields[$field_id]['table']) && !$fields_updated) {
                 unset($db_info['field_tables'][$field_id]);
                 $this->fieldsUpdated($index);
                 $fields_updated = TRUE;
                 $fields = $db_info['field_tables'];
             }
             if (empty($fields[$field_id]['table']) && empty($field_errors[$field_id])) {
                 // Log an error, but only once per field. Since a superfluous field is
                 // not too serious, we just index the rest of the item normally.
                 $field_errors[$field_id] = TRUE;
                 $this->getLogger()->warning("Unknown field @field: please check (and re-save) the index's fields settings.", array('@field' => $field_id));
                 continue;
             }
             $field_info = $fields[$field_id];
             $table = $field_info['table'];
             $column = $field_info['column'];
             $this->database->delete($table)->condition('item_id', $item_id)->execute();
             $type = $field->getType();
             $values = array();
             foreach ($field->getValues() as $field_value) {
                 $converted_value = $this->convert($field_value, $type, $field->getOriginalType(), $index);
                 // Don't add NULL values to the return array. Also, adding an empty
                 // array is, of course, a waste of time.
                 if (isset($converted_value) && $converted_value !== array()) {
                     $values = array_merge($values, is_array($converted_value) ? $converted_value : array($converted_value));
                 }
             }
             if (!$values) {
                 // SQLite sometimes has problems letting columns not present in an
                 // INSERT statement default to NULL, so we set NULL values for the
                 // denormalized table explicitly.
                 $denormalized_values[$column] = NULL;
                 continue;
             }
             // If the field contains more than one value, we remember that the field
             // can be multi-valued.
             if (count($values) > 1) {
                 $db_info['field_tables'][$field_id]['multi-valued'] = TRUE;
             }
             if (Utility::isTextType($type, array('text', 'tokenized_text'))) {
                 // Remember the text table the first time we encounter it.
                 if (!isset($text_table)) {
                     $text_table = $table;
                 }
                 $unique_tokens = array();
                 $denormalized_value = '';
                 foreach ($values as $token) {
                     $word = $token['value'];
                     $score = $token['score'];
                     // Store the first 30 characters of the string as the denormalized
                     // value.
                     if (strlen($denormalized_value) < 30) {
                         $denormalized_value .= $word . ' ';
                     }
                     // Skip words that are too short, except for numbers.
                     if (is_numeric($word)) {
                         $word = ltrim($word, '-0');
                     } elseif (Unicode::strlen($word) < $this->configuration['min_chars']) {
                         continue;
                     }
                     // Taken from core search to reflect less importance of words later
                     // in the text.
                     // Focus is a decaying value in terms of the amount of unique words
                     // up to this point. From 100 words and more, it decays, to e.g. 0.5
                     // at 500 words and 0.3 at 1000 words.
                     $score *= min(1, 0.01 + 3.5 / (2 + count($unique_tokens) * 0.015));
                     // Only insert each canonical base form of a word once.
                     $word_base_form = $this->dbmsCompatibility->preprocessIndexValue($word);
                     if (!isset($unique_tokens[$word_base_form])) {
                         $unique_tokens[$word_base_form] = array('value' => $word, 'score' => $score);
                     } else {
                         $unique_tokens[$word_base_form]['score'] += $score;
                     }
                 }
                 $denormalized_values[$column] = Unicode::truncateBytes(trim($denormalized_value), 30);
                 if ($unique_tokens) {
                     $field_name = self::getTextFieldName($field_id);
                     $boost = $field_info['boost'];
                     foreach ($unique_tokens as $token) {
                         $text_inserts[] = array('item_id' => $item_id, 'field_name' => $field_name, 'word' => $token['value'], 'score' => (int) round($token['score'] * $boost * self::SCORE_MULTIPLIER));
                     }
                 }
             } else {
                 $denormalized_values[$column] = reset($values);
                 // Make sure no duplicate values are inserted (which would lead to a
                 // database exception).
                 // Use the canonical base form of the value for the comparison to
                 // avoid not catching different values that are duplicates under the
                 // database table's collation.
                 $case_insensitive_unique_values = array();
                 foreach ($values as $value) {
                     $value_base_form = $this->dbmsCompatibility->preprocessIndexValue("{$value}", 'field');
                     // We still insert the value in its original case.
                     $case_insensitive_unique_values[$value_base_form] = $value;
                 }
                 $values = array_values($case_insensitive_unique_values);
                 $insert = $this->database->insert($table)->fields(array('item_id', 'value'));
                 foreach ($values as $value) {
                     $insert->values(array('item_id' => $item_id, 'value' => $value));
                 }
                 $insert->execute();
             }
         }
         $this->database->insert($denormalized_table)->fields(array_merge($denormalized_values, array('item_id' => $item_id)))->execute();
         if ($text_inserts && isset($text_table)) {
             $query = $this->database->insert($text_table)->fields(array('item_id', 'field_name', 'word', 'score'));
             foreach ($text_inserts as $row) {
                 $query->values($row);
             }
             $query->execute();
         }
         // In case any new fields were detected as multi-valued, we re-save the
         // index's DB info.
         $this->getKeyValueStore()->set($index->id(), $db_info);
     } catch (\Exception $e) {
         $transaction->rollback();
         throw $e;
     }
 }
 /**
  * Prepares a single item for indexing.
  *
  * Used as a helper method in indexItem()/indexItems().
  *
  * @param \Drupal\search_api\Item\ItemInterface $item
  *   The item to index.
  */
 protected function prepareItem(IndexInterface $index, ItemInterface $item)
 {
     $item_id = $item->getId();
     $item_to_index = ['objectID' => $item_id];
     /** @var \Drupal\search_api\Item\FieldInterface $field */
     $item_fields = $item->getFields();
     $item_fields += $this->getSpecialFields($index, $item);
     foreach ($item_fields as $field_id => $field) {
         $type = $field->getType();
         $values = NULL;
         foreach ($field->getValues() as $field_value) {
             if (!$field_value) {
                 continue;
             }
             switch ($type) {
                 case 'text':
                 case 'string':
                 case 'uri':
                     $field_value .= '';
                     if (Unicode::strlen($field_value) > 10000) {
                         $field_value = Unicode::substr(trim($field_value), 0, 10000);
                     }
                     $values[] = $field_value;
                     break;
                 case 'integer':
                 case 'duration':
                 case 'decimal':
                     $values[] = 0 + $field_value;
                     break;
                 case 'boolean':
                     $values[] = $field_value ? TRUE : FALSE;
                     break;
                 case 'date':
                     if (is_numeric($field_value) || !$field_value) {
                         $values[] = 0 + $field_value;
                         break;
                     }
                     $values[] = strtotime($field_value);
                     break;
                 default:
                     $values[] = $field_value;
             }
         }
         if (count($values) <= 1) {
             $values = reset($values);
         }
         $item_to_index[$field->getFieldIdentifier()] = $values;
     }
     return $item_to_index;
 }
 /**
  * Indexes a single item on the specified index.
  *
  * Used as a helper method in indexItems().
  *
  * @param \Drupal\search_api\IndexInterface $index
  *   The index for which the item is being indexed.
  * @param \Drupal\search_api\Item\ItemInterface $item
  *   The item to index.
  */
 protected function indexItem(IndexInterface $index, ItemInterface $item)
 {
     $item_id = $item->getId();
     $item_to_index = array('objectID' => $item_id);
     /** @var \Drupal\search_api\Item\FieldInterface $field */
     foreach ($item as $key => $field) {
         $item_to_index[$field->getFieldIdentifier()] = $field->getValues();
     }
     return $item_to_index;
 }
Ejemplo n.º 5
0
 /**
  * {@inheritdoc}
  */
 public function addResultItem(ItemInterface $result_item)
 {
     $this->resultItems[$result_item->getId()] = $result_item;
     return $this;
 }