/** * Returns the name of the programming language in which the source should have been written. Throws an * exception with code self::ERROR_INVALID_TYPE if the file's type is not a source code. It also throws an * exception with code self::ERROR_NOTFOUND_FILE if the file cannot be found. */ public function getProgrammingLanguage() { if ($this->getType() != self::TYPE_SOURCE) { throw new ApplicationModelException_File('This method supports only source code type of files.', self::ERROR_INVALID_TYPE); } $filePath = $this->getPath(); if (!is_file($filePath)) { throw new ApplicationModelException_File('Cannot access file ' . $filePath . '.', self::ERROR_NOTFOUND_FILE); } $sourceText = substr(file_get_contents($filePath), 0, 20000); if (isset($this->application->config['file_extensions'][$this->extension])) { return $this->application->config['file_extensions'][$this->extension]; } $systemExtension = strtolower(pathinfo($filePath, PATHINFO_EXTENSION)); if (isset($this->application->config['file_extensions'][$systemExtension])) { return $this->application->config['file_extensions'][$systemExtension]; } $detector = new ProgrammingLanguageDetector(); $detector->importKnowledgeBase(file_get_contents(dirname(__FILE__) . '/../lib/ProgrammingLanguageDetector/knowledge_base.dat')); return $detector->detect($sourceText); }
* Copyright (C) 2013 powder96 <https://github.com/powder96> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ if ($_SERVER['REMOTE_ADDR'] != '127.0.0.1') { echo 'This script is accessible only from localhost.'; exit; } error_reporting(E_ALL); require_once dirname(__FILE__) . '/ProgrammingLanguageDetector.php'; $detector = new ProgrammingLanguageDetector(); $trainingFiles = glob(dirname(__FILE__) . '/training_data/*/*.*'); foreach ($trainingFiles as $file) { $path = explode('/', $file); end($path); $type = prev($path); $detector->train(file_get_contents($file), $type); } file_put_contents(dirname(__FILE__) . '/knowledge_base.dat', $detector->exportKnowledgeBase()); echo 'Done!';