Example #1
0
$handler = new StreamHandler(STDERR, Logger::DEBUG);
$handler->setFormatter(new LineFormatter("[%datetime%] %channel%.%level_name%: %message%\n"));
$log->pushHandler($handler);
$script = new DocumentedScript();
$script->setName('What The Field')->setVersion('0.1')->setDescription('')->addParameter(new Parameter('f', 'file', Parameter::VALUE_REQUIRED), 'xml file')->addParameter(new Parameter('c', 'config', Parameter::VALUE_REQUIRED), 'config file')->addParameter(new Parameter('k', 'keys', '3'), 'top n keys to show')->addParameter(new Parameter('s', 'samples', '3'), 'number of value samples pr. key')->addParameter(new Parameter('t', 'truncate', '50'), 'truncate sample values above this length')->setProgram(function ($options, $arguments) use($log) {
    $feedPath = $options['file'];
    $configPath = $options['config'];
    $topN = (int) $options['keys'];
    $sampleN = (int) $options['samples'];
    $truncateN = (int) $options['truncate'];
    $fieldScorers = (require $configPath);
    $valuesDiscoveries = [];
    foreach ($fieldScorers as $fieldName => $scoreObj) {
        $valuesDiscoveries[$fieldName] = new ValueDiscovery($scoreObj);
    }
    $feed = new Feed($feedPath, new CollectionDiscovery(), $valuesDiscoveries, $log);
    $collectionPath = $feed->discoverCollectionXPath();
    $mapping = $feed->getAllFieldXPathScores();
    echo "COLLECTION\t{$collectionPath}\n";
    foreach ($mapping as $name => $scores) {
        $keys = array_keys(array_filter($scores, function ($item) {
            return $item > 0;
        }));
        $strKeys = implode("  ", array_slice($keys, 0, $topN));
        $name = str_pad($name, 10);
        echo "KEY\t{$name}\t{$strKeys}\n";
        if ($sampleN > 0) {
            // sample mode
            $c = count($keys);
            for ($i = 1; $i <= $c; $i++) {
                if ($i > $topN) {
Example #2
0
 public function test()
 {
     $feedPath = $this->getFoodiePath();
     $feed = new Feed($feedPath, new CollectionDiscovery(), ['id' => new ValueDiscovery([], [new Score\IsUnique(), new Score\Boost(-1, [new Score\MatchFilterValidate(FILTER_VALIDATE_URL), new Score\Max([new Score\Constant(0), new Score\IsDecimal()])]), new Score\Boost(-0.001, [new Score\AncestorCount()]), new Score\Boost(-0.001, [new Score\MatchCount('/\\s+/S')]), new Score\Boost(-0.01, [new Score\IsMatch('/[^\\d]+/S')]), new Score\Boost(0.1, [new Score\IsGreaterThan(99999)])])], new NullLogger());
     $this->assertEquals('/items/item/id', $feed->discoverFieldXPath('id'));
 }