$handler = new StreamHandler(STDERR, Logger::DEBUG); $handler->setFormatter(new LineFormatter("[%datetime%] %channel%.%level_name%: %message%\n")); $log->pushHandler($handler); $script = new DocumentedScript(); $script->setName('What The Field')->setVersion('0.1')->setDescription('')->addParameter(new Parameter('f', 'file', Parameter::VALUE_REQUIRED), 'xml file')->addParameter(new Parameter('c', 'config', Parameter::VALUE_REQUIRED), 'config file')->addParameter(new Parameter('k', 'keys', '3'), 'top n keys to show')->addParameter(new Parameter('s', 'samples', '3'), 'number of value samples pr. key')->addParameter(new Parameter('t', 'truncate', '50'), 'truncate sample values above this length')->setProgram(function ($options, $arguments) use($log) { $feedPath = $options['file']; $configPath = $options['config']; $topN = (int) $options['keys']; $sampleN = (int) $options['samples']; $truncateN = (int) $options['truncate']; $fieldScorers = (require $configPath); $valuesDiscoveries = []; foreach ($fieldScorers as $fieldName => $scoreObj) { $valuesDiscoveries[$fieldName] = new ValueDiscovery($scoreObj); } $feed = new Feed($feedPath, new CollectionDiscovery(), $valuesDiscoveries, $log); $collectionPath = $feed->discoverCollectionXPath(); $mapping = $feed->getAllFieldXPathScores(); echo "COLLECTION\t{$collectionPath}\n"; foreach ($mapping as $name => $scores) { $keys = array_keys(array_filter($scores, function ($item) { return $item > 0; })); $strKeys = implode(" ", array_slice($keys, 0, $topN)); $name = str_pad($name, 10); echo "KEY\t{$name}\t{$strKeys}\n"; if ($sampleN > 0) { // sample mode $c = count($keys); for ($i = 1; $i <= $c; $i++) { if ($i > $topN) {
public function test() { $feedPath = $this->getFoodiePath(); $feed = new Feed($feedPath, new CollectionDiscovery(), ['id' => new ValueDiscovery([], [new Score\IsUnique(), new Score\Boost(-1, [new Score\MatchFilterValidate(FILTER_VALIDATE_URL), new Score\Max([new Score\Constant(0), new Score\IsDecimal()])]), new Score\Boost(-0.001, [new Score\AncestorCount()]), new Score\Boost(-0.001, [new Score\MatchCount('/\\s+/S')]), new Score\Boost(-0.01, [new Score\IsMatch('/[^\\d]+/S')]), new Score\Boost(0.1, [new Score\IsGreaterThan(99999)])])], new NullLogger()); $this->assertEquals('/items/item/id', $feed->discoverFieldXPath('id')); }