예제 #1
0
 /**
  * Get the term frequency
  * @param DocumentAbstract $document - the document to evaluate
  * @param string $token The token to look for
  * @param int $mode The type of term frequency to use
  * @return int|float 
  */
 public function getTermFrequency(DocumentAbstract $document, $token, $mode = 1)
 {
     $freqDist = new FreqDist($document->getDocumentData());
     $keyValuesByWeight = $freqDist->getKeyValuesByFrequency();
     //The token does not exist in the document
     if (!isset($keyValuesByWeight[$token])) {
         return 0;
     }
     switch ($mode) {
         case self::BOOLEAN_MODE:
             //a test was already performed if the token exists in the document
             //just return true
             return 1;
         case self::LOGARITHMIC_MODE:
             return log($keyValuesByWeight[$token] + 1);
         case self::AUGMENTED_MODE:
             //FreqDist getKeyValuesByFrequency is already sorted
             //in ascending order
             $maxFrequency = current($keyValuesByWeight);
             return 0.5 + 0.5 * $keyValuesByWeight[$token] / $maxFrequency;
             return $keyValuesByWeight;
         case self::FREQUENCY_MODE:
         default:
             return $keyValuesByWeight[$token];
     }
 }
예제 #2
0
 /**
  * 
  * @param array $tokens
  * @param mixed $id
  * @param DateTime $createdOn
  * @param array $metadata
  */
 public function __construct(array $tokens, $id = null, DateTime $createdOn = null, $metadata = [])
 {
     parent::__construct($tokens, null);
     $this->id = $id ?: ++self::$counter;
     $this->createdOn = $createdOn ?: new DateTime();
     $this->metadata = $metadata;
 }