/** * Get the term frequency * @param DocumentAbstract $document - the document to evaluate * @param string $token The token to look for * @param int $mode The type of term frequency to use * @return int|float */ public function getTermFrequency(DocumentAbstract $document, $token, $mode = 1) { $freqDist = new FreqDist($document->getDocumentData()); $keyValuesByWeight = $freqDist->getKeyValuesByFrequency(); //The token does not exist in the document if (!isset($keyValuesByWeight[$token])) { return 0; } switch ($mode) { case self::BOOLEAN_MODE: //a test was already performed if the token exists in the document //just return true return 1; case self::LOGARITHMIC_MODE: return log($keyValuesByWeight[$token] + 1); case self::AUGMENTED_MODE: //FreqDist getKeyValuesByFrequency is already sorted //in ascending order $maxFrequency = current($keyValuesByWeight); return 0.5 + 0.5 * $keyValuesByWeight[$token] / $maxFrequency; return $keyValuesByWeight; case self::FREQUENCY_MODE: default: return $keyValuesByWeight[$token]; } }
/** * * @param array $tokens * @param mixed $id * @param DateTime $createdOn * @param array $metadata */ public function __construct(array $tokens, $id = null, DateTime $createdOn = null, $metadata = []) { parent::__construct($tokens, null); $this->id = $id ?: ++self::$counter; $this->createdOn = $createdOn ?: new DateTime(); $this->metadata = $metadata; }