function selectSubstringModifiedHistogram($histogram, $inputText, $substringLength) { //only consider the middle 50% of the text when generating the histogram $inputText = substr($inputText, strlen($inputText) * 0.25, strlen($inputText) * 0.5); $substring = selectSubstringHistogram($histogram, $inputText, $substringLength, 0); return $subtring; }
<?php include "includes/common.php"; $string = "this is a test this is a second test"; //1872 $words = explode(" ", $string); //split our standardized input by spaces $histogram = array_count_values($words); //return an arrray of occurances print_r($histogram); echo "<br>"; $histogram = scoreHistogram($histogram); print_r($histogram); echo "<br>"; //selectSubstringHistogram($histogram, $inputText, $substringLength, $count) $substring = selectSubstringHistogram($histogram, $string, 4, 0); echo "<br>{$substring}<br>"; $sql = array(); foreach ($histogram as $word => $count) { $sql[] = '("' . $word . '", ' . $count . ')'; } $blah = array_merge(array_keys($histogram), array_values($histogram)); //echo implode(',', $histogram); print_r($blah);
//based on sampling method chosen, select the identifiable substring switch ($scoringMethod) { case "histogram": $substring = selectSubstringHistogram(genHistogram($inputText), $inputText, $substringLength, 0); break; case "modifiedhist": $substring = selectSubstringModifiedHistogram(genHistogram($inputText), $inputText, $substringLength); break; case "multipleRandSamples": $substring = ""; break; case "random": $substring = selectSubstringRandom($inputText, $substringLength); break; default: $substring = selectSubstringHistogram(genHistogram($inputText), $inputText, $substringLength, 0); } echo "\"{$substring}\""; ?> </div> </div> <div class="post"> <h2 class="title">Regular Expression</a></h2> <div class="entry"> <?php $regex = createRegex($substring); echo $regex . "<br><br>"; ?> </div> </div>
/** * Process an individual filepath. * * Type = 1 for individual processed files, 2 for files processed from a folder crawl. * * @param $type - allows this function to use individual files (1) or files processed from a folder crawl (2) * @param $path - the local mounted directory ("/mnt/share") * @param $netPath - the actual network directory * @param $scoringMethod - scoring technique used (i.e. histogram, random, etc.) * @param $substringLength - from the config table * @param $snortFile - from the config table */ function processFile($type, $path, $netPath, $scoringMethod, $substringLength, $snortFile) { if (!fileAlreadyProcessed($path)) { $file = fopen($path, 'r') or die("processFile(): can't open {$path}"); $substring = ""; $inputText = fread($file, filesize($path)); fclose($file); switch ($scoringMethod) { case "histogram": $substring = selectSubstringHistogram(genHistogram($inputText), $inputText, $substringLength, 0); break; case "modifiedhist": //$substring = selectSubstringModifiedHistogram(genHistogram($inputText), $inputText, $substringLength); break; case "multipleRandSamples": break; case "random": //$substring = selectSubstringRandom($inputText, $substringLength); break; default: $substring = selectSubstringHistogram(genHistogram($inputText), $inputText, $substringLength, 0); } if ($substring == "") { return; //if no unique substring is found, skip this file } $sid = getNextsid(); $rule = createSnortRule($sid, $path, $substring); if ($snortFile != "") { //if snortFile was passed, write the rule out to the snort file writeToFile($snortFile, $rule); } //writes file to the database include "dbconnect.php"; $parts = explode("/", $path); //get our path element parts $fileName = array_pop($parts); $path = implode("/", $parts); //rebuild our path $netPath = mysql_real_escape_string($netPath); //path name to be stored in the database $path = mysql_real_escape_string($path); $fileName = mysql_real_escape_string($fileName); $rule = mysql_real_escape_string($rule); $regex = mysql_real_escape_string(createRegex($substring)); $query = "INSERT INTO rules (file_name, path, rule, regex, count, sid, type) VALUES ('{$fileName}', '{$netPath}', '{$rule}', '{$regex}', 1, {$sid}, {$type})"; mysql_query($query); include "dbclose.php"; } return; }