/** * Creates new pattern cache files * * @param string $content * * @return \Generator */ public function createPatterns($content) { // get all relevant patterns from the INI file // - containing "*" or "?" // - not containing "*" or "?", but not having a comment preg_match_all('/(?<=\\[)(?:[^\\r\\n]*[?*][^\\r\\n]*)(?=\\])|(?<=\\[)(?:[^\\r\\n*?]+)(?=\\])(?![^\\[]*Comment=)/m', $content, $matches); if (empty($matches[0]) || !is_array($matches[0])) { (yield []); return; } $quoterHelper = new Quoter(); $matches = $matches[0]; usort($matches, [$this, 'compareBcStrings']); // build an array to structure the data. this requires some memory, but we need this step to be able to // sort the data in the way we need it (see below). $data = []; foreach ($matches as $pattern) { if ('GJK_Browscap_Version' === $pattern) { continue; } $pattern = strtolower($pattern); $patternhash = Pattern::getHashForPattern($pattern, false); $tmpLength = Pattern::getPatternLength($pattern); // special handling of default entry if ($tmpLength === 0) { $patternhash = str_repeat('z', 32); } if (!isset($data[$patternhash])) { $data[$patternhash] = []; } if (!isset($data[$patternhash][$tmpLength])) { $data[$patternhash][$tmpLength] = []; } $pattern = $quoterHelper->pregQuote($pattern); // Check if the pattern contains digits - in this case we replace them with a digit regular expression, // so that very similar patterns (e.g. only with different browser version numbers) can be compressed. // This helps to speed up the first (and most expensive) part of the pattern search a lot. if (strpbrk($pattern, '0123456789') !== false) { $compressedPattern = preg_replace('/\\d/', '[\\d]', $pattern); if (!in_array($compressedPattern, $data[$patternhash][$tmpLength])) { $data[$patternhash][$tmpLength][] = $compressedPattern; } } else { $data[$patternhash][$tmpLength][] = $pattern; } } unset($matches); // sorting of the data is important to check the patterns later in the correct order, because // we need to check the most specific (=longest) patterns first, and the least specific // (".*" for "Default Browser") last. // // sort by pattern start to group them ksort($data); // and then by pattern length (longest first) foreach (array_keys($data) as $key) { krsort($data[$key]); } // write optimized file (grouped by the first character of the has, generated from the pattern // start) with multiple patterns joined by tabs. this is to speed up loading of the data (small // array with pattern strings instead of an large array with single patterns) and also enables // us to search for multiple patterns in one preg_match call for a fast first search // (3-10 faster), followed by a detailed search for each single pattern. $contents = []; foreach ($data as $patternhash => $tmpEntries) { if (empty($tmpEntries)) { continue; } $subkey = SubKey::getPatternCacheSubkey($patternhash); if (!isset($contents[$subkey])) { $contents[$subkey] = []; } foreach ($tmpEntries as $tmpLength => $tmpPatterns) { if (empty($tmpPatterns)) { continue; } $chunks = array_chunk($tmpPatterns, self::COUNT_PATTERN); foreach ($chunks as $chunk) { $contents[$subkey][] = $patternhash . "\t" . $tmpLength . "\t" . implode("\t", $chunk); } } } unset($data); $subkeys = SubKey::getAllPatternCacheSubkeys(); foreach ($contents as $subkey => $content) { $subkey = (string) $subkey; (yield [$subkey => $content]); unset($subkeys[$subkey]); } foreach (array_keys($subkeys) as $subkey) { $subkey = (string) $subkey; (yield [$subkey => []]); } }
/** * */ public function testGetAllPatternCacheSubkeys() { $result = SubKey::getAllPatternCacheSubkeys(); self::assertInternalType('array', $result); self::assertSame(256, count($result)); }