With the variants options, all variants from the maximum number of pattern characters to one
character will be returned. This is required in some cases, the a placeholder is used very
early in the pattern.
Example:
Pattern: "Mozilla/* (Nintendo 3DS; *) Version/*"
User agent: "Mozilla/5.0 (Nintendo 3DS; U; ; en) Version/1.7567.US"
In this case the has for the pattern is created for "Mozilla/" while the pattern
for the hash for user agent is created for "Mozilla/5.0". The variants option
results in an array with hashes for "Mozilla/5.0", "Mozilla/5.", "Mozilla/5",
"Mozilla/" ... "M", so that the pattern hash is included.
/** * Gets some possible patterns that have to be matched against the user agent. With the given * user agent string, we can optimize the search for potential patterns: * - We check the first characters of the user agent (or better: a hash, generated from it) * - We compare the length of the pattern with the length of the user agent * (the pattern cannot be longer than the user agent!) * * @param string $userAgent * * @return \Generator */ public function getPatterns($userAgent) { $starts = Pattern::getHashForPattern($userAgent, true); $length = strlen($userAgent); // add special key to fall back to the default browser $starts[] = str_repeat('z', 32); // get patterns, first for the given browser and if that is not found, // for the default browser (with a special key) foreach ($starts as $tmpStart) { $tmpSubkey = SubKey::getPatternCacheSubkey($tmpStart); if (!$this->cache->hasItem('browscap.patterns.' . $tmpSubkey, true)) { $this->logger->debug('cache key "browscap.patterns.' . $tmpSubkey . '" not found'); continue; } $success = null; $file = $this->cache->getItem('browscap.patterns.' . $tmpSubkey, true, $success); if (!$success) { $this->logger->debug('cache key "browscap.patterns.' . $tmpSubkey . '" not found'); continue; } if (!is_array($file) || !count($file)) { $this->logger->debug('cache key "browscap.patterns.' . $tmpSubkey . '" was empty'); continue; } $found = false; foreach ($file as $buffer) { list($tmpBuffer, $len, $patterns) = explode("\t", $buffer, 3); if ($tmpBuffer === $tmpStart) { if ($len <= $length) { (yield trim($patterns)); } $found = true; } elseif ($found === true) { break; } } } (yield ''); }
/** * Creates new pattern cache files * * @param string $content * * @return \Generator */ public function createPatterns($content) { // get all relevant patterns from the INI file // - containing "*" or "?" // - not containing "*" or "?", but not having a comment preg_match_all('/(?<=\\[)(?:[^\\r\\n]*[?*][^\\r\\n]*)(?=\\])|(?<=\\[)(?:[^\\r\\n*?]+)(?=\\])(?![^\\[]*Comment=)/m', $content, $matches); if (empty($matches[0]) || !is_array($matches[0])) { (yield []); return; } $quoterHelper = new Quoter(); $matches = $matches[0]; usort($matches, [$this, 'compareBcStrings']); // build an array to structure the data. this requires some memory, but we need this step to be able to // sort the data in the way we need it (see below). $data = []; foreach ($matches as $pattern) { if ('GJK_Browscap_Version' === $pattern) { continue; } $pattern = strtolower($pattern); $patternhash = Pattern::getHashForPattern($pattern, false); $tmpLength = Pattern::getPatternLength($pattern); // special handling of default entry if ($tmpLength === 0) { $patternhash = str_repeat('z', 32); } if (!isset($data[$patternhash])) { $data[$patternhash] = []; } if (!isset($data[$patternhash][$tmpLength])) { $data[$patternhash][$tmpLength] = []; } $pattern = $quoterHelper->pregQuote($pattern); // Check if the pattern contains digits - in this case we replace them with a digit regular expression, // so that very similar patterns (e.g. only with different browser version numbers) can be compressed. // This helps to speed up the first (and most expensive) part of the pattern search a lot. if (strpbrk($pattern, '0123456789') !== false) { $compressedPattern = preg_replace('/\\d/', '[\\d]', $pattern); if (!in_array($compressedPattern, $data[$patternhash][$tmpLength])) { $data[$patternhash][$tmpLength][] = $compressedPattern; } } else { $data[$patternhash][$tmpLength][] = $pattern; } } unset($matches); // sorting of the data is important to check the patterns later in the correct order, because // we need to check the most specific (=longest) patterns first, and the least specific // (".*" for "Default Browser") last. // // sort by pattern start to group them ksort($data); // and then by pattern length (longest first) foreach (array_keys($data) as $key) { krsort($data[$key]); } // write optimized file (grouped by the first character of the has, generated from the pattern // start) with multiple patterns joined by tabs. this is to speed up loading of the data (small // array with pattern strings instead of an large array with single patterns) and also enables // us to search for multiple patterns in one preg_match call for a fast first search // (3-10 faster), followed by a detailed search for each single pattern. $contents = []; foreach ($data as $patternhash => $tmpEntries) { if (empty($tmpEntries)) { continue; } $subkey = SubKey::getPatternCacheSubkey($patternhash); if (!isset($contents[$subkey])) { $contents[$subkey] = []; } foreach ($tmpEntries as $tmpLength => $tmpPatterns) { if (empty($tmpPatterns)) { continue; } $chunks = array_chunk($tmpPatterns, self::COUNT_PATTERN); foreach ($chunks as $chunk) { $contents[$subkey][] = $patternhash . "\t" . $tmpLength . "\t" . implode("\t", $chunk); } } } unset($data); $subkeys = SubKey::getAllPatternCacheSubkeys(); foreach ($contents as $subkey => $content) { $subkey = (string) $subkey; (yield [$subkey => $content]); unset($subkeys[$subkey]); } foreach (array_keys($subkeys) as $subkey) { $subkey = (string) $subkey; (yield [$subkey => []]); } }
/** * @group pattern */ public function testGetPatternStartWithVariants() { $pattern = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.68 Safari/537.36'; $expected = [0 => 'aaa556aeec36ac3edfe2f5deea5f1d28', 1 => '31d050fd7a4ea6c972063ef30d18991a', 2 => 'dbeb1c32b66fd7717de583d999f89ec3', 3 => '13e6ce11d0a70e2a5a3df41bf11d493e', 4 => '3a4a9ff7cf86e273442bad1305f3d1fd', 5 => 'b70924c16a59b9cc2de329464b64118e', 6 => '89364cb625249b3d478bace02699e05d', 7 => '27c9d5187cd283f8d160ec1ed2b5ac89', 8 => '6f8f57715090da2632453988d9a1501b', 9 => 'd41d8cd98f00b204e9800998ecf8427e']; self::assertSame($expected, Pattern::getHashForPattern(strtolower($pattern), true)); }