* */ /** * @var string $book */ $book = file_get_contents('data/books/pg74.txt'); /** * Create a tokenizer object to parse the book into a set of tokens * */ $tokenizer = new \TextAnalysis\Tokenizers\GeneralTokenizer(); /** * Get the set of tokens generated by the tokenize, see * */ $tokens = $tokenizer->tokenize($book); $freqDist = new \TextAnalysis\Analysis\FreqDist($tokens); /** * Get the top 10 most used words in Tom Sawyer */ $top10 = array_splice($freqDist->getKeyValuesByFrequency(), 0, 10); /** * Use High Charts to visualize the data */ $pageBuilder = new BarPageBuilder($top10); $html = $pageBuilder->getHtmlPage(); file_put_contents("pub/pages/example_01_frequency_analysis.html", $html); echo 'go to the directory pub/pages/example_01_frequency_analysis.html and open the file with your web browser' . PHP_EOL; /** * go to the directory in this project and open the file with your web browser */
/** * @var string $book */ $tomSawyerBook = file_get_contents('data/books/pg74.txt'); $huckFinnBook = file_get_contents('data/books/pg76.txt'); /** * Create a tokenizer object to parse the book into a set of tokens * */ $tokenizer = new \TextAnalysis\Tokenizers\GeneralTokenizer(); /** * Get the set of tokens generated by the tokenize and * create a token document from the tokens * */ $tomSawyerDocument = new \TextAnalysis\Documents\TokensDocument($tokenizer->tokenize($tomSawyerBook)); $huckFinnDocument = new \TextAnalysis\Documents\TokensDocument($tokenizer->tokenize($huckFinnBook)); /** * create a document collection that can have filters or further analysis done */ $docCollection = new \TextAnalysis\Collections\DocumentArrayCollection(array($tomSawyerDocument, $huckFinnDocument)); /** * Apply filters to the document collection * lower case the documents, remove quotes and remove stop words */ $filters = array(new \TextAnalysis\Filters\LowerCaseFilter(), new \TextAnalysis\Filters\QuotesFilter(), new \TextAnalysis\Filters\EnglishStopWordsFilter()); /** * Applies the filters to all the documents */ $docCollection->applyTransformations($filters); /**