public function testTransformationWithStopWords() { $samples = ['Lorem ipsum dolor sit amet dolor', 'Mauris placerat ipsum dolor', 'Mauris diam eros fringilla diam']; $stopWords = new StopWords(['dolor', 'diam']); $vocabulary = [0 => 'Lorem', 1 => 'ipsum', 2 => 'sit', 3 => 'amet', 4 => 'Mauris', 5 => 'placerat', 6 => 'eros', 7 => 'fringilla']; $tokensCounts = [[0 => 1, 1 => 1, 2 => 1, 3 => 1, 4 => 0, 5 => 0, 6 => 0, 7 => 0], [0 => 0, 1 => 1, 2 => 0, 3 => 0, 4 => 1, 5 => 1, 6 => 0, 7 => 0], [0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 1, 5 => 0, 6 => 1, 7 => 1]]; $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords); $vectorizer->fit($samples); $this->assertSame($vocabulary, $vectorizer->getVocabulary()); $vectorizer->transform($samples); $this->assertSame($tokensCounts, $samples); }