getKeyValuesByFrequency() public method

Return an array with key frequency pairs
public getKeyValuesByFrequency ( ) : array
return array
Esempio n. 1
0
 /**
  * Get the term frequency
  * @param DocumentAbstract $document - the document to evaluate
  * @param string $token The token to look for
  * @param int $mode The type of term frequency to use
  * @return int|float 
  */
 public function getTermFrequency(DocumentAbstract $document, $token, $mode = 1)
 {
     $freqDist = new FreqDist($document->getDocumentData());
     $keyValuesByWeight = $freqDist->getKeyValuesByFrequency();
     //The token does not exist in the document
     if (!isset($keyValuesByWeight[$token])) {
         return 0;
     }
     switch ($mode) {
         case self::BOOLEAN_MODE:
             //a test was already performed if the token exists in the document
             //just return true
             return 1;
         case self::LOGARITHMIC_MODE:
             return log($keyValuesByWeight[$token] + 1);
         case self::AUGMENTED_MODE:
             //FreqDist getKeyValuesByFrequency is already sorted
             //in ascending order
             $maxFrequency = current($keyValuesByWeight);
             return 0.5 + 0.5 * $keyValuesByWeight[$token] / $maxFrequency;
             return $keyValuesByWeight;
         case self::FREQUENCY_MODE:
         default:
             return $keyValuesByWeight[$token];
     }
 }
 /**
  * Builds the internal index data structure using the provided collection
  * @param ICollection $collection 
  */
 protected function buildIndex(ICollection $collection)
 {
     //first pass compute frequencies and all the terms in the collection
     foreach ($collection as $id => $document) {
         $freqDist = new FreqDist($document->getDocumentData());
         foreach ($freqDist->getKeyValuesByFrequency() as $term => $freq) {
             if (!isset($this->index[$term])) {
                 $this->index[$term] = array(self::FREQ => 0, self::POSTINGS => array());
             }
             $this->index[$term][self::FREQ] += $freq;
             $this->index[$term][self::POSTINGS][] = $id;
         }
     }
 }