public function testFilter() { $filter = new Hash(); $this->assertEquals('0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33', $filter->apply('foo')); $filter = new Hash('sha256'); $this->assertEquals('2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae', $filter->apply('foo')); }
/** * Returns similar documents ids from shingles * * @return array */ protected function _getSimilarDocsIds() { $similarDocsIds = array(); $shingles = $this->_getShigles(); foreach ($shingles as $docId => $shingles1Doc) { $shinglesList = Hash::apply($shingles, '{n}.{n}', 'array_count_values'); $shingles1Doc = array_flip($shingles1Doc); $shinglesEquals = array_intersect_key($shinglesList, $shingles1Doc); $countSimilarShingles = array_sum($shinglesEquals) - count($shingles1Doc); $countShingles1Doc = count($shingles1Doc); $countShingles = count($shingles) - 1; if ($countSimilarShingles <= 0 || $countShingles1Doc <= 0 || $countShingles <= 0) { continue; } $similarity = round($countSimilarShingles / $countShingles1Doc * 100 / $countShingles, 2); if ($similarity > $this->_params['allowSimilarity']) { $similarDocsIds[$docId] = true; unset($shingles[$docId]); } } return $similarDocsIds; }
/** * testApply * * @return void */ public function testApply() { $data = static::articleData(); $result = Hash::apply($data, '{n}.Article.id', 'array_sum'); $this->assertEquals(15, $result); }