Example #1
0
 public function apply(array $exprs, $source)
 {
     $this->checkFuzzy($this->args, $exprs, 'parameter');
     $exprVals = array_map(function ($val) {
         return $val->value();
     }, $exprs);
     $value = $this->value() . '(' . implode(', ', $exprVals) . ')';
     switch (count($this->args)) {
         case 1:
             $value = '(' . $this->value() . $exprVals[0] . ')';
             break;
         case 2:
             $value = '(' . $exprVals[0] . ')' . $this->value() . '(' . $exprVals[1] . ')';
             break;
         default:
             grokit_error('Got an operator with ' . $count($this->args) . ' arguments!');
     }
     $is_const = $this->deterministic;
     foreach ($exprs as $expr) {
         $is_const = $is_const && $expr->is_const();
     }
     $info = new ExpressionInfo($source, $this->resultType, $value, $is_const);
     foreach ($exprs as $expr) {
         $info->absorbMeta($expr);
     }
     return $info;
 }
Example #2
0
function ExtremeTuples(array $t_args, array $inputs, array $outputs)
{
    $extremes = get_first_key($t_args, ['extremes']);
    $nExt = \count($extremes);
    grokit_assert($nExt > 0, 'No extremes specified for ExtremeTuples GLA.');
    if (\count($inputs) == 0) {
        grokit_assert(array_key_exists('inputs', $t_args), 'No arguments specified for ExtremeTuples GLA.');
        $count = 0;
        foreach ($t_args['inputs'] as $type) {
            if (is_identifier($type)) {
                $type = lookupType(strval($type));
            }
            grokit_assert(is_datatype($type), 'Only datatypes can be specified as inputs to ' . 'the ExtremeTuples GLA');
            $name = 'et_val' . $count;
            $inputs[$name] = $type;
        }
    }
    $outputMap = [];
    reset($outputs);
    foreach ($inputs as $name => $type) {
        $oKey = key($outputs);
        $outputs[$oKey] = $type;
        $outputMap[$oKey] = $name;
        next($outputs);
    }
    grokit_assert($nExt <= \count($inputs), 'There can not be more extreme values than there are inputs!');
    $mainAtts = [];
    $extraAtts = [];
    $minOpts = ['MIN', 'MINIMUM', '-', '<'];
    $maxOpts = ['MAX', 'MAXIMUM', '+', '>'];
    $inArrayCase = function ($needle, $haystack) {
        foreach ($haystack as $item) {
            if (strcasecmp($needle, $item) == 0) {
                return true;
            }
        }
        return false;
    };
    $minimum = [];
    foreach ($extremes as $name => $val) {
        grokit_assert(array_key_exists($name, $inputs), "ExtremeTuples: Expression with name " . $name . " specified as extreme not found in inputs");
    }
    foreach ($inputs as $name => $type) {
        if (array_key_exists($name, $extremes)) {
            $mainAtts[$name] = $type;
            if ($inArrayCase($extremes[$name], $minOpts)) {
                $minimum[$name] = true;
            } else {
                if ($inArrayCase($extremes[$name], $maxOpts)) {
                    $minimum[$name] = false;
                } else {
                    grokit_error('Unknown extreme type ' . $extremes[$name] . ' specified for ' . $name);
                }
            }
        } else {
            $extraAtts[$name] = $type;
        }
    }
    $debug = get_default($t_args, 'debug', 0);
    $className = generate_name('ExtremeTuples');
    ?>

class <?php 
    echo $className;
    ?>
 {

    struct Tuple {
<?php 
    foreach ($inputs as $name => $type) {
        ?>
        <?php 
        echo $type;
        ?>
 <?php 
        echo $name;
        ?>
;
<?php 
    }
    // foreach input
    ?>

        // Default Constructor, Copy Constructor, and Copy Assignment are all
        // default
        Tuple(void) = default;
        Tuple(const Tuple &) = default;
        Tuple & operator = (const Tuple &) = default;

        Tuple(<?php 
    echo array_template('const {val} & _{key}', ', ', $inputs);
    ?>
) :
            <?php 
    echo array_template('{key}(_{key})', ', ', $inputs);
    ?>

        { }

        // operator > means that this tuple is "better" than the other tuple.
        bool operator > ( const Tuple & other ) const {
<?php 
    foreach ($mainAtts as $name => $type) {
        $op1 = $minimum[$name] ? '<' : '>';
        $op2 = !$minimum[$name] ? '<' : '>';
        ?>
            if( <?php 
        echo $name;
        ?>
 <?php 
        echo $op1;
        ?>
 other.<?php 
        echo $name;
        ?>
 )
                return true;
            else if( <?php 
        echo $name;
        ?>
 <?php 
        echo $op2;
        ?>
 other.<?php 
        echo $name;
        ?>
 )
                return false;
<?php 
    }
    // foreach main attribute
    ?>

            return false;
        }

        bool operator < ( const Tuple& other ) const {
            return other > *this;
        }

        bool operator <= (const Tuple & other ) const {
            return ! (*this > other );
        }

        bool operator >= (const Tuple & other ) const {
            return !( other > *this );
        }

        bool operator == (const Tuple & other ) const {
            bool ret = true;
<?php 
    foreach ($mainAtts as $name => $type) {
        ?>
            ret &= <?php 
        echo $name;
        ?>
 == other.<?php 
        echo $name;
        ?>
;
<?php 
    }
    // foreach main attribute
    ?>
            return ret;
        }
    }; // struct Tuple

    typedef std::vector<Tuple> TupleVector;
public:
    class Iterator {
    public:
        typedef TupleVector::const_iterator iter_type;

    private:
        iter_type begin;
        iter_type end;

    public:
        Iterator(void) = default;
        Iterator(const Iterator &) = default;
        Iterator( const iter_type & _begin, const iter_type & _end ) : begin(_begin), end(_end)
        { }
        Iterator( const iter_type && _begin, const iter_type && _end ) : begin(_begin), end(_end)
        { }

        bool GetNextResult(<?php 
    echo typed_ref_args($outputs);
    ?>
) {
            if( begin != end ) {
<?php 
    foreach ($outputs as $name => $type) {
        ?>
                <?php 
        echo $name;
        ?>
 = begin-><?php 
        echo $outputMap[$name];
        ?>
;
<?php 
    }
    ?>
                begin++;
                return true;
            }
            else {
                return false;
            }
        }

    };

private:

    uintmax_t __count;  // number of tuples covered

    TupleVector tuples;

    // Iterator for multi output type
    Iterator multiIterator;

public:
    // Constructor and destructor
    <?php 
    echo $className;
    ?>
(void) : __count(0), tuples(), multiIterator()
    { }

    ~<?php 
    echo $className;
    ?>
() { }

    void AddItem( <?php 
    echo const_typed_ref_args($inputs);
    ?>
 ) {
        ++__count;
        Tuple t(<?php 
    echo args($inputs);
    ?>
);

        if( tuples.empty() ) {
            tuples.push_back(t);
        }
        else if( t > tuples.front() ) {
            tuples.clear();
            tuples.push_back(t);
        }
        else if( t == tuples.front() ) {
            tuples.push_back(t);
        }
    }

    void AddState( <?php 
    echo $className;
    ?>
 & other ) {
        if( tuples.size() == 0 ) {
            tuples.swap(other.tuples);
        }
        else if( other.tuples.size() == 0 ) {
            // Do nothing
        }
        else if( tuples.front() > other.tuples.front() ) {
            // fast path
        }
        else if( other.tuples.front() > tuples.front() ) {
            tuples.swap(other.tuples);
        }
        else {
            for( Tuple & t : other.tuples ) {
                tuples.push_back(t);
            }
        }
    }

    void Finalize( void ) {
        multiIterator = Iterator(tuples.cbegin(), tuples.cend());
    }

    bool GetNextResult(<?php 
    echo typed_ref_args($outputs);
    ?>
) {
        return multiIterator.GetNextResult(<?php 
    echo args($outputs);
    ?>
);
    }
}; // class <?php 
    echo $className;
    ?>

<?php 
    $system_headers = ['vector', 'algorithm', 'cinttypes'];
    if ($debug > 0) {
        $system_headers = array_merge($system_headers, ['iostream', 'sstream', 'string']);
    }
    return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'system_headers' => $system_headers);
}
Example #3
0
/**
 *  A GLA that determines the distinct values of a dataset.
 */
function Distinct(array $t_args, array $input, array $output)
{
    grokit_assert(\count($input) == \count($output), 'Distinct must have the same outputs as inputs.');
    $outputsToInputs = [];
    $i = 0;
    foreach ($input as $name => $type) {
        $outputsToInputs[array_keys($output)[$i]] = $name;
        array_set_index($output, $i++, $type);
    }
    $useMCT = get_default($t_args, 'use.mct', true);
    $initSize = get_default($t_args, 'init.size', 65536);
    $keepHashes = get_default($t_args, 'mct.keep.hashes', false);
    $fragmentSize = get_default($t_args, 'fragment.size', 100000);
    $nullCheck = get_default($t_args, 'null.check', false);
    grokit_assert(is_bool($useMCT), 'Distinct use.mct argument must be boolean');
    grokit_assert(is_integer($initSize), 'Distinct init.size argument must be an integer');
    grokit_assert($initSize > 0, 'Distinct init.size argument must be positive');
    grokit_assert(is_bool($keepHashes), 'Distinct mct.keep.hashes argument must be boolean');
    grokit_assert(is_integer($fragmentSize), 'Distinct fragment.size argument must be integral');
    grokit_assert($fragmentSize > 0, 'Distinct fragment.size argumenst must be positive');
    $nullable = [];
    if (is_bool($nullCheck)) {
        foreach ($input as $name => $type) {
            $nullable[$name] = $nullCheck;
        }
    } else {
        if (is_array($nullCheck)) {
            foreach ($input as $name => $type) {
                $nullable[$name] = false;
            }
            foreach ($nullCheck as $index => $n) {
                grokit_assert(is_string($n), 'Distinct null.check has invalid value at position ' . $index);
                grokit_assert(array_key_exists($n, $nullable), 'Distinct null.check has unknown input ' . $n . ' at position ' . $index);
                $nullable[$n] = true;
            }
        } else {
            grokit_error('Distinct null.check must be boolean or list of inputs to check for nulls');
        }
    }
    $keepHashesText = $keepHashes ? 'true' : 'false';
    $system_headers = ['cinttypes', 'functional', 'vector'];
    if ($useMCT) {
        $system_headers[] = 'mct/hash-set.hpp';
        $definedSet = "mct::closed_hash_set<Key, HashKey, std::equal_to<Key>, std::allocator<Key>, {$keepHashesText}>";
    } else {
        $system_headers[] = 'unordered_map';
        $definedSet = "std::unordered_set<Key, HashKey, std::equal_to<Key>, std::allocator<Key>>";
    }
    $className = generate_name('Distinct');
    ?>
class <?php 
    echo $className;
    ?>
 {

    public:
    // Value being placed into the set.
    struct Key {
<?php 
    foreach ($input as $name => $type) {
        ?>
        <?php 
        echo $type;
        ?>
 <?php 
        echo $name;
        ?>
;
<?php 
    }
    // for each input
    ?>

        // Construct the value by copying all of the attributes.
        Key(<?php 
    echo const_typed_ref_args($input);
    ?>
) :
<?php 
    $first = true;
    foreach ($input as $name => $type) {
        ?>
            <?php 
        echo $first ? ' ' : ',';
        ?>
 <?php 
        echo $name;
        ?>
(<?php 
        echo $name;
        ?>
)
<?php 
        $first = false;
    }
    // for each input
    ?>
        { }

        bool operator==(const Key & o ) const {
            return true <?php 
    echo array_template("&& ({key} == o.{key})", ' ', $input);
    ?>
;
        }

        size_t hash_value() const {
            uint64_t hash = H_b;
<?php 
    foreach ($input as $name => $type) {
        ?>
            hash = CongruentHash(Hash(<?php 
        echo $name;
        ?>
), hash);
<?php 
    }
    // for each input
    ?>
            return size_t(hash);
        }
    };

    // Hashing functor for our value
    struct HashKey {
        size_t operator()(const Key& o) const {
            return o.hash_value();
        }
    };

    using Set = <?php 
    echo $definedSet;
    ?>
;

    // Iterator object used in multi and fragment result types
    class Iterator {
        public:
        using iterator_t = Set::const_iterator;

        private:

        iterator_t start;
        iterator_t end;

        public:

        Iterator() : start(), end() { }

        Iterator( const iterator_t & _start, const iterator_t & _end ) :
            start(_start), end(_end)
        { }

        Iterator( const Iterator & o ) : start(o.start), end(o.end)
        { }

        bool GetNextResult(<?php 
    echo typed_ref_args($output);
    ?>
) {
            if( start != end ) {
<?php 
    foreach ($output as $name => $type) {
        ?>
                <?php 
        echo $name;
        ?>
 = start-><?php 
        echo $outputsToInputs[$name];
        ?>
;
<?php 
    }
    // for each output
    ?>
                start++;
                return true;
            } else {
                return false;
            }
        }
    };

    private:

    // Constants
    static constexpr size_t INIT_SIZE = <?php 
    echo $initSize;
    ?>
;
    static constexpr size_t FRAG_SIZE = <?php 
    echo $fragmentSize;
    ?>
;

    // Member variables

    uint64_t count;         // Total # tuples seen

    Set distinct;           // Set of distinct values

    using IteratorList = std::vector<Iterator>;

    Iterator multiIterator;     // Internal iterator for multi result type
    IteratorList fragments;     // Iterator for fragments

    public:

    <?php 
    echo $className;
    ?>
() :
        count(0),
        distinct(INIT_SIZE),
        multiIterator(),
        fragments()
    { }

    ~<?php 
    echo $className;
    ?>
() { }

    void Reset(void) {
        count = 0;
        distinct.clear();
    }

    void AddItem(<?php 
    echo const_typed_ref_args($input);
    ?>
) {
        count++;
<?php 
    foreach ($nullable as $name => $check) {
        if ($check) {
            ?>
        if( IsNull( <?php 
            echo $name;
            ?>
 ) ) return;
<?php 
        }
        // if checking for nulls
    }
    // foreach input
    ?>

        Key key(<?php 
    echo args($input);
    ?>
);

        distinct.insert(key);
/*
        auto it = distinct.find(key);
        if( it == distinct.end() ) {
            distinct.insert(key);
        }
*/
    }

    void AddState( <?php 
    echo $className;
    ?>
 & other ) {
        for( auto & elem : other.distinct ) {
            distinct.insert(elem);
            /*
            auto it = distinct.find(elem);
            if( it == distinct.end() ) {
                distinct.insert(elem);
            }
            */
        }
        count += other.count;
    }

    // Multi interface
    void Finalize(void) {
        multiIterator = Iterator(distinct.cbegin(), distinct.cend());
    }

    bool GetNextResult(<?php 
    echo typed_ref_args($output);
    ?>
) {
        return multiIterator.GetNextResult(<?php 
    echo args($output);
    ?>
);
    }

    // Fragment interface
    int GetNumFragments(void) {
        fragments.clear();
        int nFrag = 0;

        Iterator::iterator_t prev = distinct.cbegin();
        Iterator::iterator_t end = distinct.cend();
        Iterator::iterator_t next = prev;

        while( next != end ) {
            for( size_t i = 0; next != end && FRAG_SIZE > i; i++ ) {
                next++;
            }
            Iterator nIter(prev, next);
            fragments.push_back(nIter);

            prev = next;
            nFrag++;
        }

        return nFrag;
    }

    Iterator * Finalize(int fragment) {
        return new Iterator(fragments[fragment]);
    }

    bool GetNextResult(Iterator * it, <?php 
    echo typed_ref_args($output);
    ?>
) {
        return it->GetNextResult(<?php 
    echo args($output);
    ?>
);
    }

    // General methods
    uint64_t get_count() const {
        return count;
    }

    uint64_t get_countDistinct() const {
        return distinct.size();
    }

    const Set & get_distinct() const {
        return distinct;
    }
};

typedef <?php 
    echo $className;
    ?>
::Iterator <?php 
    echo $className;
    ?>
_Iterator;
<?php 
    return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => ['multi', 'fragment'], 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers, 'properties' => ['resettable']];
}
Example #4
0
function parseCaseNoBase(&$source, &$cases, &$default)
{
    // The return type of the tests must be boolean
    $testRetType = lookupType('bool');
    // We don't know the return type yet, it will be defined by the cases.
    $retType = null;
    $retSource = null;
    // Generate a name for the return value of the case.
    $value_name = generate_name("case_value");
    $prep = [];
    $info = new ExpressionInfo($source, null, $value_name, true);
    grokit_logic_assert(count($cases) > 0, 'No cases found for case statement at ' . $source);
    // Handle cases
    foreach ($cases as $case) {
        $test = parseExpression(ast_get($case, NodeKey::TEST));
        $expr = parseExpression(ast_get($case, NodeKey::EXPR));
        $first = false;
        // Test if the return type of the test is compatible with boolean
        if (canConvert($test->type(), $testRetType)) {
            $test = convertExpression($test, $testRetType, $retSource);
        } else {
            // Incompatible types
            grokit_error('Case test expression has return type ' . $test->type() . ' which is incompatible with boolean ' . $test->source());
        }
        // If the return type is not set, set it and continue.
        // Otherwise, make sure the expression's return type is compatible with
        // the already set return type.
        if ($retType === null) {
            $retType = $expr->type();
            $retSource = $expr->source();
            $first = true;
            $info->setType($retType);
        } else {
            if (canConvert($expr->type(), $retType)) {
                // The types are compatible or the same, so make them the same.
                $expr = convertExpression($expr, $retType, $retSource);
            } else {
                // Incompatible types
                grokit_error('Case return type ' . $expr->type() . ' of expression at ' . $expr->source() . ' incompatible with previous return type ' . $retType . ' defined by expression at ' . $retSource);
            }
        }
        // Absorb the metadata from the test and expression into our info
        $info->absorbMeta($test);
        $info->absorbMeta($expr);
        $myPrep = '';
        if (!$first) {
            $myPrep .= 'else ';
        }
        $myPrep .= "if( {$test->value()} ) {$value_name} = {$expr->value()};";
        $prep[] = $myPrep;
    }
    // Handle default
    if ($default !== null) {
        if (canConvert($default->type(), $retType)) {
            $default = convertExpression($default, $retType, $retSource);
        } else {
            // Incompatible types.
            grokit_error('Case return type ' . $default->type() . ' of default at ' . $default->source() . ' incompatible with previous return type ' . $retType . ' defined by expression at ' . $retSource);
        }
        $info->absorbMeta($default);
        $prep[] = "else {$value_name} = {$default->value()};";
    }
    // Prepend the declaration of the return variable
    array_unshift($prep, "{$retType} {$value_name};");
    // Add all of our stuff as preprocesses
    $info->addPreprocesses($prep);
    if ($info->is_const()) {
        $info->makeConstant();
    }
    return $info;
}
Example #5
0
 function parseSelectionWP($ast, $name, $header)
 {
     // Push LibraryManager so we can undo this waypoint's definitions.
     ob_start();
     LibraryManager::Push();
     $res = new GenerationInfo();
     /***************   PROCESS AST   ***************/
     $attMap = parseAttributeMap(ast_get($ast, NodeKey::ATT_MAP), $res);
     $qFilters = ast_get($ast, NodeKey::FILTERS);
     $qSynth = ast_get($ast, NodeKey::SYNTH);
     $queries = [];
     foreach ($qFilters as $query => $qInfo) {
         $filterAST = ast_get($qInfo, NodeKey::ARGS);
         $gfAST = ast_get($qInfo, NodeKey::TYPE);
         if ($gfAST !== null) {
             $filter = parseNamedExpressionList($filterAST);
         } else {
             $filter = parseExpressionList($filterAST);
         }
         $gf = null;
         if ($gfAST !== null) {
             $gf = parseGF($gfAST);
             $gf = $gf->apply($filter);
         }
         if (ast_has($qInfo, NodeKey::CARGS)) {
             $cargs = parseLiteralList(ast_get($qInfo, NodeKey::CARGS));
         } else {
             $cargs = [];
         }
         $sargs = ast_has($qInfo, NodeKey::SARGS) ? parseStateList(ast_get($qInfo, NodeKey::SARGS), $query) : [];
         $synths = array();
         $synthAST = ast_get($ast, NodeKey::SYNTH);
         if (ast_has($synthAST, $query)) {
             $curSynths = ast_get($synthAST, $query);
             foreach ($curSynths as $curSynthAST) {
                 $expr = parseExpression(ast_get($curSynthAST, NodeKey::EXPR));
                 $att = parseAttribute(ast_get($curSynthAST, NodeKey::ATT));
                 if ($att->type() == null) {
                     AttributeManager::setAttributeType($att->name(), $expr->type());
                     $att = AttributeManager::lookupAttribute($att->name());
                 } else {
                     if (canConvert($expr, $att->type())) {
                         $expr = convertExpression($expr, $att->type());
                     } else {
                         grokit_error('Unable to convert expression for synthesized attribute ' . $att->name() . ' from type ' . $expr->type() . ' to type ' . $att->type() . ' ' . $expr->source());
                     }
                 }
                 $synths[$att->name()] = $expr;
             }
         }
         $info = ['filters' => $filter, 'synths' => $synths, 'gf' => $gf, 'cargs' => $cargs, 'states' => $sargs];
         $queries[$query] = $info;
         $res->addJob($query, $name);
         $res->absorbInfoList($filter);
         $res->absorbInfoList($synths);
         $res->absorbInfoList($cargs);
         $res->absorbStateList($sargs);
         if ($gf !== null) {
             $res->absorbInfo($gf);
         }
     }
     /*************** END PROCESS AST ***************/
     // Get this waypoint's headers
     $myHeaders = $header . PHP_EOL . ob_get_clean();
     // Only one file at the moment
     $filename = $name . '.cc';
     $res->addFile($filename, $name);
     _startFile($filename);
     SelectionGenerate($name, $queries, $attMap);
     _endFile($filename, $myHeaders);
     // Pop LibraryManager again to get rid of this waypoint's declarations
     LibraryManager::Pop();
     return $res;
 }
Example #6
0
 public function absorbAttr($attr)
 {
     if ($attr->type() == null) {
         grokit_error("Attribute has no type set: " . print_r($attr, true));
     }
     $this->absorbInfo($attr->type());
 }
Example #7
0
 public function apply($inputs, $outputs, $sargs = [])
 {
     try {
         $input = [];
         foreach ($inputs as $n => $v) {
             if (is_datatype($v)) {
                 $input[$n] = $v;
             } else {
                 $input[$n] = $v->type();
             }
         }
         return lookupGT($this->name, $this->t_args, $input, $outputs, $sargs, $this->alias);
     } catch (Exception $e) {
         grokit_error('Failed to lookup GT ' . $this->name . ' from spec ' . $this->source, $e);
     }
 }
Example #8
0
/**
 *  GI that generates data in clusters, using a specified distribution for each
 *  cluster.
 *
 *  This GI requires the following template arguments:
 *      - 'n' or 0
 *          The number of tuples to generate. Note: this value is per task.
 *          The total number of tuples generated will be n_tasks * n
 *      - 'centers' or 1
 *          A list of configuration for the centers.
 *
 *  The following template arguments are optional:
 *      - 'outputs'
 *          If the outputs of the GI are not given implicitly, they can be
 *          specified in this template argument. The number of dimensions will
 *          be determined by the number of outputs.
 *
 *          All output types must be numeric real types. The default type for
 *          outputs is DOUBLE.
 *      - 'dist.lib' = 'std'
 *          Which library to use for generating distributions.
 *          Valid options are:
 *              - std
 *              - boost
 *      - 'seed' = null
 *          The seed to be used for the random number generator. This seed will
 *          be used to generate the seed for each task, and different runs with
 *          the same seed will produce the same data.
 *      - 'compute.sets' = 1
 *          The number of sets of tuples to compute at once.
 *
 *  Each center configuration is a functor with the form:
 *      dist_name(args)
 *
 *  The following distributions are supported:
 *      { Uniform Distributions }
 *      - uniform(a = 0, b = 1)
 *
 *      { Normal Distributions }
 *      - normal(mean = 0.0, std_dev = 1.0) [ synonyms: gaussian ]
 *      - inverse_gaussian(mean = 1, shape = 1) [ synonyms: inverse_normal ]
 *
 *      { Bernoulli Distributions }
 *      - binomial(t = 1, p = 0.5)
 *      - negative_binomial(k = 1, p = 0.5)
 *
 *      { Poisson Distributions }
 *      - exponential( lambda = 1 )
 *      - gamma(alpha = 1, beta = 1)    [ synonyms: Gamma ]
 */
function ClusterGen(array $t_args, array $outputs)
{
    $sys_headers = ['array', 'cinttypes'];
    $user_headers = [];
    $libraries = [];
    if (\count($outputs) == 0) {
        grokit_assert(array_key_exists('outputs', $t_args), 'ClusterGen: No outputs specified');
        $count = 0;
        foreach ($t_args['outputs'] as $type) {
            if (is_identifier($type)) {
                $type = lookupType($type);
            }
            grokit_assert(is_datatype($type), 'ClusterGen: Non data-type ' . $type . ' given as output');
            $name = 'output' . $count++;
            $outputs[$name] = $type;
        }
    }
    foreach ($outputs as $name => &$type) {
        if (is_null($type)) {
            $type = lookupType('base::DOUBLE');
        } else {
            grokit_assert($type->is('real'), 'ClusterGen: Non-real datatype ' . $type . ' given as output');
        }
    }
    $myOutputs = [];
    foreach ($outputs as $name => $type) {
        $myOutputs[$name] = $type;
    }
    $tSize = \count($outputs);
    $seed = get_default($t_args, 'seed', null);
    if ($seed !== null) {
        grokit_assert(is_int($seed), 'ClusterGen: Seed must be an integer or null.');
    } else {
        $user_headers[] = 'HashFunctions.h';
    }
    $distLib = get_default($t_args, 'dist.lib', 'std');
    $distNS = '';
    switch ($distLib) {
        case 'std':
            $sys_headers[] = 'random';
            $distNS = 'std';
            break;
        case 'boost':
            $sys_headers[] = 'boost/random.hpp';
            $distNS = 'boost::random';
            $libraries[] = 'boost_random-mt';
            if ($seed === null) {
                // Need random_device
                $sys_headers[] = 'boost/random/random_device.hpp';
                $libraries[] = 'boost_system-mt';
            }
            break;
        default:
            grokit_error('ClusterGen: Unknown RNG library ' . $distLib);
    }
    $distRNG = 'mt19937';
    $RNGtype = $distNS . '::' . $distRNG;
    $nTuples = get_first_key($t_args, ['n', '0']);
    grokit_assert(is_int($nTuples), 'ClusterGen: the number of tuples to be produced must be an integer.');
    $centers = get_first_key($t_args, ['centers', 1]);
    grokit_assert(is_array($centers), 'ClusterGen: centers must be an array of functors');
    $handleDist = function ($name, $args, $oType) use($distNS) {
        $distName = '';
        $distArgs = [];
        switch ($name) {
            case 'gaussian':
            case 'normal':
                $distName = $distNS . '::' . 'normal_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Normal distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $mean = get_default($args, ['mean', 0], 0.0);
                $sigma = get_default($args, ['std_dev', 'sigma', 1], 1.0);
                grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of binomial distribution must be a real number.');
                grokit_assert(is_numeric($sigma), 'ClusterGen: sigma parameter of binomial distribution must be a real number.');
                $mean = floatval($mean);
                $sigma = floatval($sigma);
                $distArgs = [$mean, $sigma];
                break;
            case 'binomial':
                $distName = $distNS . '::' . 'binomial_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $t = get_default($args, ['t', 0], 1);
                $p = get_default($args, ['p', 1], 0.5);
                grokit_assert(is_int($t), 'ClusterGen: t parameter of binomial distribution must be an integer.');
                grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.');
                $p = floatval($p);
                grokit_assert($p >= 0 && $p <= 1, 'ClusterGen: p parameter of binomial distribution must be in the range [0, 1]');
                grokit_assert($t >= 0, 'ClusterGen: t parameter of binomial distribution must be in the range [0, +inf)');
                $distArgs = [$t, $p];
                break;
            case 'negative_binomial':
                $distName = $distNS . '::' . 'negative_binomial_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Negative Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $k = get_default($args, ['k', 0], 1);
                $p = get_default($args, ['p', 1], 0.5);
                grokit_assert(is_int($k), 'ClusterGen: k parameter of binomial distribution must be an integer.');
                grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.');
                $p = floatval($p);
                grokit_assert($p > 0 && $p <= 1, 'ClusterGen: p parameter of negative binomial distribution must be in the range (0, 1]');
                grokit_assert($k > 0, 'ClusterGen: k parameter of negative binomial distribution must be in the range (0, +inf)');
                $distArgs = [$k, $p];
                break;
            case 'inverse_gaussian':
            case 'inverse_normal':
                grokit_assert(\count($args) <= 2, 'ClusterGen: Inverse Gaussian distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $mean = get_default($args, ['mean', 0], 1);
                $shape = get_default($args, ['shape', 1], 1);
                grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of inverse gaussian distribution must be a real number.');
                grokit_assert(is_numeric($shape), 'ClusterGen: shape parameter of inverse gaussian distribution must be a real number.');
                $mean = floatval($mean);
                $shape = floatval($shape);
                grokit_assert($mean > 0, 'ClusterGen: mean of inverse gaussian distribution must be in range (0, inf)');
                grokit_assert($shape > 0, 'ClusterGen: shape of inverse gaussian distribution must be in range (0, inf)');
                $gen_args = ['output' => $oType, 'ns' => $distNS];
                $distName = strval(lookupResource('datagen::InverseGaussianGen', $gen_args));
                $distArgs = [$mean, $shape];
                break;
            case 'uniform':
                $distName = $distNS . '::' . 'uniform_real_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Uniform distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $a = get_default($args, ['a', 0], 0.0);
                $b = get_default($args, ['b', 1], 1.0);
                grokit_assert(is_numeric($a), 'ClusterGen: `a` parameter of uniform distribution must be a real number.');
                grokit_assert(is_numeric($b), 'ClusterGen: `b` parameter of uniform distribution must be a real number.');
                $a = floatval($a);
                $b = floatval($b);
                grokit_assert($b >= $a, 'ClusterGen: `b` parameter of uniform distribution must be >= the `a` parameter.');
                $distArgs = [$a, $b];
                break;
            case 'exponential':
                $distName = $distNS . '::' . 'exponential_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 1, 'ClusterGen: Exponential distribution takes at most 1 argument.');
                $lambda = get_default($args, ['lambda', 0], 1.0);
                grokit_assert(is_numeric($lambda), 'ClusterGen: `lambda` parameter of exponential distribution must be a real number.');
                $lambda = floatval($lambda);
                grokit_assert($lambda > 0, 'ClusterGen: `lambda` parameter of exponential distribution must be in range (0, +inf).');
                $distArgs = [$lambda];
                break;
            case 'gamma':
            case 'Gamma':
                $distName = $distNS . '::' . 'gamma_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Gamma distribution takes at most 2 arguments.');
                $alpha = get_default($args, ['alpha', 0], 1.0);
                $beta = det_default($args, ['beta', 1], 1.0);
                grokit_assert(is_numeric($alpha), 'ClusterGen: `alpha` parameter of gamma distribution must be a real number.');
                grokit_assert(is_numeric($beta), 'ClusterGen: `beta` parameter of gamma distribution must be a real number.');
                $alpha = floatval($alpha);
                $beta = floatval($beta);
                $distArgs = [$alpha, $beta];
                break;
            default:
                grokit_error('ClusterGen: Unknown distribution ' . $name . ' given for center');
        }
        return [$distName, $distArgs];
    };
    $dists = [];
    $distArgs = [];
    $count = 0;
    $oType = '';
    $nCenters = 1;
    reset($outputs);
    foreach ($centers as $val) {
        $cluster = $val;
        if (is_functor($val)) {
            $cluster = [$val];
        } else {
            if (is_array($val)) {
                $nCenters = lcm($nCenters, \count($val));
            } else {
                grokit_error('ClusterGen: center descriptions must be functors or list of functors');
            }
        }
        $curDist = [];
        $curDistArgs = [];
        $curDistName = 'distribution' . $count++;
        $oType = strval(current($outputs));
        $iCount = 0;
        foreach ($cluster as $functor) {
            grokit_assert(is_functor($functor), 'ClusterGen: center description must be a functor');
            $vName = $curDistName . '_' . $iCount++;
            $ret = $handleDist($functor->name(), $functor->args(), $oType);
            $curDist[$vName] = $ret[0];
            $curDistArgs[$vName] = $ret[1];
        }
        next($outputs);
        $dists[$curDistName] = $curDist;
        $distArgs[$curDistName] = $curDistArgs;
    }
    // Determine the default number of sets to compute at a time.
    // We want to generate either $nTuples or 10,000 tuples, depending on which
    // is less.
    $defaultSetsTarget = min($nTuples, 10000);
    $setsToTarget = intval(ceil($defaultSetsTarget / $nCenters));
    $computeSets = get_default($t_args, 'compute.sets', $setsToTarget);
    grokit_assert(is_int($computeSets) && $computeSets > 0, 'ClusterGen: compute.sets must be a positive integer, ' . $computeSets . ' given');
    $className = generate_name('ClusterGen');
    // For some BIZZARE reason, the $outputs array was getting modified while
    // traversing over the $dists array. Making a deep copy of the outputs and
    // then reassigning it seems to fix the issue.
    $outputs = $myOutputs;
    ?>

class <?php 
    echo $className;
    ?>
 {

    // The number of tuples to produce per task
    static constexpr size_t N = <?php 
    echo $nTuples;
    ?>
;
    static constexpr size_t CacheSize = <?php 
    echo $computeSets * $nCenters;
    ?>
;

    // Typedefs
    typedef std::tuple<<?php 
    echo array_template('{val}', ', ', $outputs);
    ?>
> Tuple;
    typedef std::array<Tuple, CacheSize> TupleArray;
    typedef TupleArray::const_iterator TupleIterator;
    typedef <?php 
    echo $RNGtype;
    ?>
 RandGen;

    // Number of tuples produced.
    uintmax_t count;

    // Cache a number of outputs for efficiency
    TupleArray cache;
    TupleIterator cacheIt;

    // Random number generator
    RandGen rng;

    // Distributions
<?php 
    // This is the section causing issues.
    foreach ($dists as $name => $list) {
        foreach ($list as $vName => $type) {
            ?>
    <?php 
            echo $type;
            ?>
 <?php 
            echo $vName;
            ?>
;
<?php 
        }
        // foreach distribution
    }
    // foreach cluster set
    ?>

    // Helper function to generate tuples.
    void GenerateTuples(void) {
<?php 
    $tIndex = 0;
    foreach ($dists as $name => $list) {
        $lCenters = \count($list);
        // $nCenters has been defined to be the LCM of the number of centers in
        // any column, so $lCenter is guaranteed to divide evenly into
        // CacheSize
        ?>
        for( size_t index = 0; CacheSize > index; index += <?php 
        echo $lCenters;
        ?>
 ) {
<?php 
        $index = 0;
        foreach ($list as $vName => $type) {
            ?>
            std::get<<?php 
            echo $tIndex;
            ?>
>(cache[index + <?php 
            echo $index;
            ?>
]) = <?php 
            echo $vName;
            ?>
(rng);
<?php 
            $index++;
        }
        // foreach value in tuple
        ?>
        }
<?php 
        $tIndex++;
    }
    // foreach distribution
    ?>
        cacheIt = cache.cbegin();
    }

public:
    // Constructor
    <?php 
    echo $className;
    ?>
( GIStreamProxy & _stream ) :
        cache()
        , cacheIt()
        , count(0)
        , rng()
<?php 
    foreach ($dists as $name => $list) {
        foreach ($list as $vName => $type) {
            ?>
        , <?php 
            echo $vName;
            ?>
(<?php 
            echo implode(', ', $distArgs[$name][$vName]);
            ?>
)
<?php 
        }
        // foreach distribution
    }
    // foreach cluster set
    ?>

    {
<?php 
    if (is_null($seed)) {
        ?>
        <?php 
        echo $distNS;
        ?>
::random_device rd;
<?php 
    }
    // if seed is null
    ?>
        RandGen::result_type seed = <?php 
    echo is_null($seed) ? 'rd()' : "CongruentHash({$seed}, _stream.get_id() )";
    ?>
;
        rng.seed(seed);

        cacheIt = cache.cend();
    }

    // Destructor
    ~<?php 
    echo $className;
    ?>
(void) { }

    bool ProduceTuple(<?php 
    echo typed_ref_args($outputs);
    ?>
) {
        if( N > count ) {
            if( cacheIt == cache.cend() ) {
                GenerateTuples();
            }
<?php 
    $tIndex = 0;
    foreach ($outputs as $name => $type) {
        ?>
            <?php 
        echo $name;
        ?>
 = std::get<<?php 
        echo $tIndex;
        ?>
>(*cacheIt);
<?php 
        $tIndex++;
    }
    // foreach output
    ?>

            ++cacheIt;
            ++count;

            return true;
        }
        else {
            return false;
        }
    }
};

<?php 
    return array('kind' => 'GI', 'name' => $className, 'output' => $outputs, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'libraries' => $libraries);
}
Example #9
0
function BITSET(array $t_args)
{
    grokit_assert(array_key_exists('values', $t_args), 'No values specified for bitset!');
    $values = $t_args['values'];
    $indicies = array_keys($values);
    $maxIndex = \max($indicies);
    $minIndex = \min($indicies);
    grokit_assert($maxIndex < 64, 'Highest index of bitset must be less than 64');
    grokit_assert($minIndex >= 0, 'Indicies of bitset must be >= 0');
    $mask = 0;
    foreach ($values as $index => $name) {
        $firstChar = substr($name, 0, 1);
        $arr = str_split($name);
        $valid = array_reduce($arr, function ($res, $item) {
            $res = $res && (ctype_alnum($item) || $item == '_');
            return $res;
        }, ctype_alpha($firstChar) || $firstChar == '_');
        grokit_assert($valid, "Invalid name ({$name}) given for index ({$index}) in bitset.");
        $mask = $mask | 1 << $index;
    }
    $nBits = floor(pow(2, ceil(log($maxIndex + 1, 2))));
    $nBits = \max(8, $nBits);
    $nHex = $nBits / 4;
    $storageType = "uint{$nBits}_t";
    switch ($nBits) {
        case 8:
            $methodIntType = 'base::BYTE';
            break;
        case 16:
            $methodIntType = 'base::SMALLINT';
            break;
        case 32:
            $methodIntType = 'base::INT';
            break;
        case 64:
            $methodIntType = 'base::BIGINT';
            break;
        default:
            grokit_error('BITSET requires invalid number of bits (' . $nBits . ')');
    }
    $className = generate_name('BITSET');
    $methods = [];
    $constructors = [];
    $functions = [];
    $globalContents = "";
    ?>

class <?php 
    echo $className;
    ?>
 {
public:
    typedef <?php 
    echo $storageType;
    ?>
 StorageType;

private:
    StorageType bits;

    static constexpr StorageType _MASK_ = 0x<?php 
    echo sprintf("%0{$nHex}X", $mask);
    ?>
;

public:

    <?php 
    echo $className;
    ?>
(void);
<?php 
    $constructors[] = [[$methodIntType], true];
    ?>
    <?php 
    echo $className;
    ?>
(const StorageType _bits);

    <?php 
    echo $className;
    ?>
 & operator =( const StorageType _bits );

    /***** Comparison Opeators *****/
    bool operator ==( const <?php 
    echo $className;
    ?>
 & o ) const;
    bool operator !=( const <?php 
    echo $className;
    ?>
 & o ) const;
    bool operator <( const <?php 
    echo $className;
    ?>
 & o ) const;
    bool operator >( const <?php 
    echo $className;
    ?>
 & o ) const;
    bool operator <=( const <?php 
    echo $className;
    ?>
 & o ) const;
    bool operator >=( const <?php 
    echo $className;
    ?>
 & o ) const;

    /***** Conversion *****/
    void ToJson( Json::Value & dest ) const;
    void FromJson( const Json::Value & src );

    /***** Accessors *****/
<?php 
    $methods[] = ['Bits', [], $methodIntType, true];
    ?>
    StorageType Bits(void) const;

<?php 
    $methods[] = ['IsSet', ['base::BYTE'], 'base::bool', true];
    ?>
    // Whether or not a bit is set by index
    bool IsSet(unsigned char index) const;

    // Accessors for each value
<?php 
    foreach ($values as $index => $name) {
        $methods[] = [$name, [], 'base::bool', true];
        ?>
    bool <?php 
        echo $name;
        ?>
(void) const;
<?php 
    }
    // for each value
    ?>
};

inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( void ) : bits(0) { }

inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const StorageType _bits ) : bits(_bits) { }

inline
<?php 
    echo $className;
    ?>
 & <?php 
    echo $className;
    ?>
 :: operator = (const StorageType _bits) {
    bits = _bits;
    return *this;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator == (const <?php 
    echo $className;
    ?>
 & o ) const {
    return bits == o.bits;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator != (const <?php 
    echo $className;
    ?>
 & o ) const {
    return bits != o.bits;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator < (const <?php 
    echo $className;
    ?>
 & o ) const {
    return (bits == (bits & o.bits)) && (bits != o.bits);
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator > (const <?php 
    echo $className;
    ?>
 & o ) const {
    return (bits == (bits | o.bits)) && (bits != o.bits);
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator <= (const <?php 
    echo $className;
    ?>
 & o ) const {
    return bits == (bits & o.bits);
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator >= (const <?php 
    echo $className;
    ?>
 & o ) const {
    return bits == (bits | o.bits);
}

inline
auto <?php 
    echo $className;
    ?>
 :: Bits( void ) const -> StorageType {
    return bits;
}

inline
bool <?php 
    echo $className;
    ?>
::IsSet(unsigned char index) const {
    StorageType mask = ((StorageType) 1) << index; //>
    return bits & mask;
}

inline
void <?php 
    echo $className;
    ?>
 :: ToJson( Json::Value & dest ) const {
    dest = (Json::Int64) bits;
}

inline
void <?php 
    echo $className;
    ?>
 :: FromJson( const Json::Value & src ) {
    bits = (StorageType) src.asInt64();
}

<?php 
    foreach ($values as $index => $name) {
        ?>
bool <?php 
        echo $className;
        ?>
::<?php 
        echo $name;
        ?>
(void) const {
    return bits & 0x<?php 
        echo sprintf("%X", 1 << $index);
        ?>
;
}

<?php 
    }
    // for each value
    ?>

<?php 
    ob_start();
    ?>

<?php 
    $functions[] = ['Hash', ['@type'], 'base::BIGINT', true, true];
    ?>
template<>
inline
uint64_t Hash(const @type & thing) {
    return thing.Bits();
}

inline
void FromString( @type & c, const char * str ) {
    c = atol(str);
}

inline
int ToString( const @type & c, char * buffer ) {
<?php 
    $format = $nBits < 16 ? 'hh' : ($nBits < 32 ? 'h' : ($nBits < 64 ? '' : 'l'));
    ?>
    sprintf(buffer, "%<?php 
    echo $format;
    ?>
d", c.Bits());
    return strlen(buffer) + 1;
}

inline
void ToJson( const @type & src, Json::Value & dest ) {
    src.ToJson(dest);
}

inline
void FromJson( const Json::Value & src, @type & dest ) {
    dest.FromJson(src);
}

<?php 
    $globalContents .= ob_get_clean();
    ?>

<?php 
    return ['kind' => 'TYPE', 'name' => $className, 'binary_operators' => ['==', '!=', '>', '<', '>=', '<='], 'system_headers' => ['cinttypes'], 'global_content' => $globalContents, 'complex' => false, 'methods' => $methods, 'constructors' => $constructors, 'functions' => $functions, 'describe_json' => DescribeJson('integer'), 'extras' => ['size.bytes' => $nBits / 8]];
}
Example #10
0
 public function lookupFunction($name, array $args, array $targs = [], $fuzzy = true, $allowGenerate = true)
 {
     // If the name of the function isn't namespaced, assume the base namespace
     // as long as we are doing fuzzy lookups with names.
     // If fuzzy is false, don't do this (mostly used for operators)
     if (!self::IsNamespaced($name) && $fuzzy) {
         $name = self::JoinNamespace('base', $name);
     }
     // Generate the hash for the function call.
     $hash = self::HashFunctionSignature($name, $args, $targs);
     $has_targs = count($targs) > 0;
     // See if there is a cached function matching this hash, and if so,
     // use it.
     if ($this->isFunctionDefined($hash)) {
         return $this->getDefinedFunction($hash);
     }
     // If we've gotten this far, the function hasn't been cached (to our
     // knowledge), so we'll have to generate it.
     $info = null;
     // Only look through the registered concrete functions if there are no
     // template arguments.
     if (!$has_targs && $this->functionRegistered($name, $args)) {
         $info = $this->lookupConcreteFunction($name, $args, $hash);
     } else {
         if ($allowGenerate) {
             $info = $this->generateFunction($name, $args, $targs, $hash);
         } else {
             $fArgs = implode(', ', $args);
             grokit_error("Unable to lookup function {$name}({$fArgs})");
         }
     }
     // Cache the function by both the original hash and the one provided
     // by the generator (if different)
     if (!$this->isFunctionDefined($hash)) {
         $this->functionCache[$hash] = $info;
     }
     if (!$this->isFunctionDefined($info->hash())) {
         $this->functionCache[$info->hash()] = $info;
     }
     return $info;
 }
Example #11
0
function GISTGenerate_ProduceResults($wpName, $queries, $attMap)
{
    ?>

extern "C"
int GISTProduceResultsWorkFunc_<?php 
    echo $wpName;
    ?>
(WorkDescription &workDescription, ExecEngineData &result) {
    GISTProduceResultsWD myWork;
    myWork.swap(workDescription);

    // Inputs
    QueryExit& whichOne = myWork.get_whichOne();
    GLAState& gist = myWork.get_gist();

    int fragmentNo = myWork.get_fragmentNo();

    // Outputs
    Chunk output;

<?php 
    cgDeclareQueryIDs($queries);
    ?>

    QueryIDSet queriesToRun = whichOne.query;

    // Start columns for outputs
<?php 
    foreach ($queries as $query => $info) {
        $output = $info['output'];
        cgConstructColumns($output);
    }
    // foreach query
    ?>

    // Output bitstring
    MMappedStorage myStore;
    Column bitmapOut(myStore);
    BStringIterator myOutBStringIter (bitmapOut, queriesToRun);

    PROFILING2_START;
    int64_t numTuples = 0;

#ifdef PER_QUERY_PROFILE
<?php 
    foreach ($queries as $query => $info) {
        ?>
    int64_t numTuples_<?php 
        echo queryName($query);
        ?>
 = 0;
<?php 
    }
    // foreach query
    ?>
#endif // PER_QUERY_PROFILE

<?php 
    foreach ($queries as $query => $info) {
        $gist = $info['gist'];
        $output = $info['output'];
        $resType = $gist->result_type();
        $resType = get_first_value($resType, ['fragment', 'multi', 'single', 'state']);
        ?>
    if( whichOne.query == <?php 
        echo queryName($query);
        ?>
 ) {

        // Extract the GIST state
        GLAPtr gistPtr;
        gistPtr.swap(gist);

        FATALIF(gistPtr.get_glaType() != <?php 
        echo $gist->cHash();
        ?>
,
            "GIST producing results is of incorrect type for query <?php 
        echo queryName($query);
        ?>
");

        <?php 
        echo $gist;
        ?>
* state_<?php 
        echo queryName($query);
        ?>
 = (<?php 
        echo $gist;
        ?>
*) gistPtr.get_glaPtr();

<?php 
        switch ($resType) {
            case 'single':
                ?>
        {
            state_<?php 
                echo queryName($query);
                ?>
->GetResult(<?php 
                echo implode(', ', $output);
                ?>
);
<?php 
                break;
            case 'multi':
                ?>
        state_<?php 
                echo queryName($query);
                ?>
->Finalize();
        while (state_<?php 
                echo queryName($query);
                ?>
->GetNextResult(<?php 
                echo implode(', ', $output);
                ?>
)) {
<?php 
                break;
            case 'fragment':
                ?>
        <?php 
                echo $gist;
                ?>
::Iterator* iterator
            = state_<?php 
                echo queryName($query);
                ?>
->Finalize( fragmentNo );
        while( state_<?php 
                echo queryName($query);
                ?>
->GetNextResult( iterator, <?php 
                echo implode(', ', $output);
                ?>
) ) {
<?php 
                break;
            case 'state':
                reset($output);
                $att = current($output);
                // Output attribute
                ?>
        {
<?php 
                if ($gist->finalize_as_state()) {
                    ?>
            state_<?php 
                    echo queryName($query);
                    ?>
->FinalizeState();
<?php 
                }
                ?>
             <?php 
                echo $att;
                ?>
 = <?php 
                echo $att->type();
                ?>
( state_<?php 
                echo queryName($query);
                ?>
 );
<?php 
                break;
            default:
                grokit_error("Do not know how to deal with output type of GLA {$gist}::cGla [{$resType}]");
        }
        // matches switch
        ?>

            numTuples++;
#ifdef PER_QUERY_PROFILE
            numTuples_<?php 
        echo queryName($query);
        ?>
++;
#endif // PER_QUERY_PROFILE

            // Advance the columns
            myOutBStringIter.Insert(<?php 
        echo queryName($query);
        ?>
);
            myOutBStringIter.Advance();

<?php 
        foreach ($output as $att) {
            ?>
            <?php 
            echo $att;
            ?>
_Column_Out.Insert(<?php 
            echo $att;
            ?>
);
            <?php 
            echo $att;
            ?>
_Column_Out.Advance();
<?php 
        }
        ?>
        } // Matches block for column stuff.
<?php 
        if ($resType == 'fragment') {
            ?>
        // Delete the iterator;
        delete iterator;
<?php 
        }
        ?>
    } // Matches whichOne
<?php 
    }
    // matches foreach query
    ?>

    myOutBStringIter.Done();
    output.SwapBitmap(myOutBStringIter);

    // Write columns
<?php 
    foreach ($queries as $query => $info) {
        $gist = $info['gist'];
        $output = $info['output'];
        ?>
    if( whichOne.query == <?php 
        echo queryName($query);
        ?>
 ) {
<?php 
        foreach ($output as $att) {
            ?>
        Column col_<?php 
            echo $att;
            ?>
;
        <?php 
            echo $att;
            ?>
_Column_Out.Done(col_<?php 
            echo $att;
            ?>
);
        output.SwapColumn(col_<?php 
            echo $att;
            ?>
, <?php 
            echo $att->slot();
            ?>
);
<?php 
        }
        // foreach output att
        ?>
    } // Matches whichOne
<?php 
    }
    // foreach query
    ?>

    ChunkContainer tempResult(output);
    tempResult.swap(result);

    return WP_FINALIZE; // ProduceResults
}

<?php 
}
Example #12
0
function FACTOR(array $t_args)
{
    $rawDict = get_first_key($t_args, ['dictionary', 'dict', 0]);
    // Double the quotes so that we escape them in SQLite, and add backslashes
    // to them so that we escape them in C++.
    $dict = addcslashes(\grokit\doubleChars($rawDict, '"'), '"\\');
    $cardinality = \grokit\dictionarySize($rawDict);
    $storageBytes = get_first_key_default($t_args, ['bytes', 1], 2);
    $cardBytes = $cardinality > 0 ? intval(ceil(log($cardinality, 256))) : 1;
    $storageBytes = $cardBytes > $storageBytes ? $cardBytes : $storageBytes;
    switch ($storageBytes) {
        case 1:
            $storageType = 'uint8_t';
            break;
        case 2:
            $storageType = 'uint16_t';
            break;
        case 4:
            $storageType = 'uint32_t';
            break;
        case 8:
            $storageType = 'uint64_t';
            break;
        default:
            grokit_error('Unsupported # of bytes (' . $storageBytes . ') given for FACTOR, only 1, 2, 4, and 8 supported.');
    }
    $className = generate_name('FACTOR_' . ensure_identifier($dict));
    $stringType = lookupType('base::STRING');
    $globalContent = '';
    $methods = [];
    $constructors = [];
    $functions = [];
    ?>

class <?php 
    echo $className;
    ?>
 {
public:
    typedef <?php 
    echo $storageType;
    ?>
 StorageType;

    static const        char *          DictionaryName      __attribute__((weak));
    static const        StorageType     InvalidID           __attribute__((weak));
    static const        StorageType     MaxID               __attribute__((weak));
    static const        Dictionary &    globalDictionary    __attribute__((weak));

public:
    /* ----- Members ----- */

    // The ID of this Factor;
    StorageType myID;

    /* ----- Constructors / Destructors ----- */

    // Default constructor
    <?php 
    echo $className;
    ?>
( void );

    // Constructor from null (same as default)
    <?php 
    echo $className;
    ?>
( const GrokitNull & );

    // Constructor from C strings / string literals
<?php 
    $constructors[] = [['base::STRING_LITERAL'], true];
    ?>
    <?php 
    echo $className;
    ?>
( const char * );

    // Constructor from Grokit STRING type.
<?php 
    $constructors[] = [['base::STRING'], true];
    ?>
    <?php 
    echo $className;
    ?>
( const <?php 
    echo $stringType;
    ?>
 & );

    // Constructor from storage type
    <?php 
    echo $className;
    ?>
( const StorageType );

    // Copy constructor and copy assignment
    // These can both be default
    <?php 
    echo $className;
    ?>
( const <?php 
    echo $className;
    ?>
 & ) = default;
    <?php 
    echo $className;
    ?>
 & operator =( const <?php 
    echo $className;
    ?>
 & ) = default;

    // Destructor
    ~<?php 
    echo $className;
    ?>
() { }

    /* ----- Methods ----- */

    // Standard FromString method
    void FromString( const char * );

    // FromString method used when building the dictionaries.
    void FromString( const char *, Dictionary & );

    // Looks up the factor in the global dictionary and returns the string
<?php 
    $methods[] = ['ToString', [], 'base::STRING_LITERAL', true];
    ?>
    const char * ToString( void ) const;

    // Returns the ID of the Factor.
    StorageType GetID( void ) const;

    // Returns whether or not the Factor is valid.
<?php 
    $methods[] = ['Valid', [], 'base::bool', true];
    ?>
    bool Valid( void ) const;
<?php 
    $methods[] = ['Invalid', [], 'base::bool', true];
    ?>
    bool Invalid( void ) const;

    // Translate the content
    void Translate( const Dictionary::TranslationTable& );

    void toJson( Json::Value & dest ) const;
    void fromJson( const Json::Value & src );

    /* ----- Operators ----- */

    // The dictionary keeps track of what the sorted order of the strings is.
    // These methods are based on the lexicographical ordering of the strings
    // the factors represent
    bool operator ==( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator !=( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator <( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator <=( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator >( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator >=( const <?php 
    echo $className;
    ?>
 & ) const;

    // Implicit conversion to storage type
    operator StorageType () const;
};

// Static member initialization
const <?php 
    echo $className;
    ?>
::StorageType <?php 
    echo $className;
    ?>
::InvalidID = std::numeric_limits<StorageType>::max();
const <?php 
    echo $className;
    ?>
::StorageType <?php 
    echo $className;
    ?>
::MaxID = <?php 
    echo $className;
    ?>
::InvalidID - 1;
const char * <?php 
    echo $className;
    ?>
::DictionaryName = "<?php 
    echo $dict;
    ?>
";
const Dictionary & <?php 
    echo $className;
    ?>
::globalDictionary = Dictionary::GetDictionary(<?php 
    echo $className;
    ?>
::DictionaryName);

/* ----- Constructors ----- */

// Default constructor
inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( void ):
    myID(InvalidID)
{}

inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const GrokitNull & nullval ):
    myID(InvalidID)
{ }

// Constructor from C strings / string literals
inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const char * str ) {
    FromString(str);
}

// Constructor from Grokit STRING type
inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const <?php 
    echo $stringType;
    ?>
 & str ) {
    FromString(str.ToString());
}

// Constructor from storage type
inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const <?php 
    echo $storageType;
    ?>
 id ):
    myID(id)
{ }

/* ----- Methods ----- */

inline
auto <?php 
    echo $className;
    ?>
 :: GetID(void) const -> StorageType {
    return myID;
}

// Standard FromString method
inline
void <?php 
    echo $className;
    ?>
 :: FromString( const char * str ) {
    // Global dictionary will return InvalidID if not found
    myID = globalDictionary.Lookup(str, InvalidID );
}

// FromString method used when building the dictionaries
inline
void <?php 
    echo $className;
    ?>
 :: FromString( const char * str, Dictionary & localDict ) {
    // First check if we are in the local dictionary
    myID = localDict.Lookup(str, InvalidID );
    if( myID != InvalidID )
        return;

    // Next check if we are in the global dictionary
    myID = globalDictionary.Lookup(str, InvalidID );
    if( myID != InvalidID )
        return;

    // Add a new entry to the local dictionary.
    // The dictionary should throw an error if the new ID is greater than
    // MaxID.
    myID = localDict.Insert( str, MaxID );
}

// Looks up the factor in the global dictionary and returns the string
inline
const char * <?php 
    echo $className;
    ?>
 :: ToString( void ) const {
    return globalDictionary.Dereference(myID);
}

// Determine whether or not the factor is valid
inline
bool <?php 
    echo $className;
    ?>
 :: Valid( void ) const {
    return myID != InvalidID;
}

inline
bool <?php 
    echo $className;
    ?>
 :: Invalid(void) const {
    return myID == InvalidID;
}

// Translate the content
inline
void <?php 
    echo $className;
    ?>
 :: Translate( const Dictionary::TranslationTable & tbl ) {
    auto it = tbl.find(myID);
    if( it != tbl.end() ) {
        myID = it->second;
    }
}


inline
void <?php 
    echo $className;
    ?>
 :: toJson( Json::Value & dest ) const {
    dest = (Json::Int64) myID;
}

inline
void <?php 
    echo $className;
    ?>
 :: fromJson( const Json::Value & src ) {
    myID = (StorageType) src.asInt64();
}

/* ----- Operators ----- */

inline
bool <?php 
    echo $className;
    ?>
 :: operator ==( const <?php 
    echo $className;
    ?>
 & o ) const {
    return myID == o.myID;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator !=( const <?php 
    echo $className;
    ?>
 & o ) const {
    return myID != o.myID;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator <( const <?php 
    echo $className;
    ?>
 & o ) const {
    return Valid() && o.Valid() && globalDictionary.Compare(myID, o.myID) < 0;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator <=( const <?php 
    echo $className;
    ?>
 & o ) const {
    return Valid() && o.Valid() && globalDictionary.Compare(myID, o.myID) <= 0;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator >( const <?php 
    echo $className;
    ?>
 & o ) const {
    return Valid() && o.Valid() && globalDictionary.Compare(myID, o.myID) > 0;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator >=( const <?php 
    echo $className;
    ?>
 & o ) const {
    return Valid() && o.Valid() && globalDictionary.Compare(myID, o.myID) >= 0;
}

// Implicit conversion to storage type
inline
<?php 
    echo $className;
    ?>
 :: operator StorageType() const {
    return myID;
}

<?php 
    ob_start();
    // Global functions
    ?>

inline
void FromString( @type & f, const char * str ) {
    f.FromString(str);
}

inline
void FromString( @type & f, const char * str, Dictionary & localDict ) {
    f.FromString(str, localDict);
}

inline
int ToString( const @type & f, char * buffer ) {
    const char * str = f.ToString();
    strcpy(buffer, str);
    return strlen(buffer) + 1;
}

<?php 
    $functions[] = ['Hash', ['@type'], 'base::BIGINT', true, true];
    ?>
template<>
inline
uint64_t Hash( const @type & x ) {
    return x.GetID();
}

inline
void ToJson( const @type & src, Json::Value & dest ) {
    src.toJson(dest);
}

inline
void FromJson( const Json::Value & src, @type & dest ) {
    dest.fromJson(src);
}

<?php 
    $functions[] = ['IsNull', ['@type'], 'BASE::BOOL', true, true];
    ?>
inline
bool IsNull( const @type f ) {
    return f.Invalid();
}

<?php 
    $globalContent .= ob_get_clean();
    ?>

<?php 
    // Function to get the dictionary at runtime.
    $describeInfoJson = function ($var, $myType) {
        ?>
    <?php 
        echo $var;
        ?>
["levels"] = Json::Value(Json::arrayValue);
    for( auto it = <?php 
        echo $myType;
        ?>
::globalDictionary.cbegin(); it != <?php 
        echo $myType;
        ?>
::globalDictionary.cend(); it++ ) {
        <?php 
        echo $var;
        ?>
["levels"][it->first] = it->second;
    }
<?php 
    };
    return ['kind' => 'TYPE', 'name' => $className, 'dictionary' => $dict, 'system_headers' => ['limits', 'cstring', 'cinttypes'], 'user_headers' => ['Dictionary.h', 'DictionaryManager.h', 'ColumnIteratorDict.h'], 'properties' => ['categorical'], 'extras' => ['cardinality' => $cardinality, 'size.bytes' => $storageBytes], 'binary_operators' => ['==', '!=', '<', '>', '<=', '>='], 'global_content' => $globalContent, 'complex' => 'ColumnIteratorDict< @type >', 'methods' => $methods, 'constructors' => $constructors, 'functions' => $functions, 'describe_json' => DescribeJson('factor', $describeInfoJson)];
}
Example #13
0
 public function lookupMethod($name, array $args)
 {
     grokit_assert(array_key_exists($name, $this->methods), 'No method registered with the name ' . $this->value() . '->' . $name);
     $candidates =& $this->methods[$name];
     // Matches is a mapping from the score to the candidate MethodInfo
     // We keep track of all possible matches in case we have multiple possibilities,
     // and we may in the future be able to print nice error messages.
     $matches = [];
     $nMatches = 0;
     $fStr = $this->value() . '->' . $name . '(' . implode(', ', $args) . ')';
     //fwrite(STDERR, 'Looking up ' . $fStr . PHP_EOL);
     foreach ($candidates as $cand) {
         $rating = $cand->compatibility($args);
         //fwrite(STDERR, 'Candidate: ' . $cand . ' Rating: ' . $rating . PHP_EOL );
         if ($rating >= 0) {
             if (!array_key_exists($rating, $matches)) {
                 $matches[$rating] = [];
             }
             $matches[$rating][] = $cand;
             $nMatches += 1;
         }
     }
     grokit_logic_assert(!array_key_exists(0, $matches) || count($matches[0]) == 1, 'Got more than one exact match for method ' . $fStr);
     //fwrite(STDERR, 'Matches: ' . PHP_EOL);
     //fwrite(STDERR, print_r($matches, true) . PHP_EOL);
     //fwrite(STDERR, 'Defined functions: ' . print_r($this->functionCache, true) . PHP_EOL );
     //fwrite(STDERR, 'Registered Functions: ' . print_r($this->registeredFunctions, true) . PHP_EOL );
     // If we have an exact match, use that.
     if (array_key_exists(0, $matches) && count($matches[0]) == 1) {
         $match = $matches[0][0];
     } else {
         if ($nMatches == 1) {
             // If there were no exact matches, but there was only one match, use that.
             $match = array_pop($matches);
             $match = array_pop($match);
         } else {
             if ($nMatches == 0) {
                 grokit_error('Failed to lookup method ' . $fStr . ', no possible matches.');
             } else {
                 // There were multiple possible matches.
                 // Aggregate the strings representing the possible matches and
                 // then put out an error.
                 $matchz = [];
                 foreach ($matches as $matchList) {
                     foreach ($matchList as $match) {
                         $matchz[] = $match;
                     }
                 }
                 $matchStr = implode(PHP_EOL, $matchz);
                 grokit_error('Failed to lookup method ' . $fStr . ', multiple possible' . ' matches:' . PHP_EOL . $matchStr);
             }
         }
     }
     return $match;
 }
Example #14
0
function OrderBy(array $t_args, array $inputs, array $outputs)
{
    if (\count($inputs) == 0) {
        grokit_assert(array_key_exists('input', $t_args), 'No inputs given for OrderBy');
        $inputs = $t_args['input'];
        foreach ($t_args['input'] as $name => &$type) {
            if (is_identifier($type)) {
                $type = lookupType(strval($type));
            }
            grokit_assert(is_datatype($type), 'Invalid type given for input ' . $name);
        }
    }
    grokit_assert(array_key_exists('order', $t_args), 'No ordering attributes given for OrderBy');
    $ordering = $t_args['order'];
    $ascOpts = ['ASC', 'ASCENDING', '+', '>'];
    $descOpts = ['DESC', 'DESCENDING', 'DES', 'DSC', '-', '<'];
    $ascending = [];
    foreach ($ordering as $name => $order) {
        grokit_assert(array_key_exists($name, $inputs), 'Ordering attribute ' . $name . ' not present in input');
        if (in_array_icase($order, $ascOpts)) {
            $ascending[$name] = true;
        } else {
            if (in_array_icase($order, $descOpts)) {
                $ascending[$name] = false;
            } else {
                grokit_error("Unknown ordering " . $order . " given for attribute " . $name);
            }
        }
    }
    $rankAtt = get_default($t_args, 'rank', null);
    grokit_assert(is_null($rankAtt) || is_attribute($rankAtt), 'Rank argument should be null or an attribute');
    grokit_assert(is_null($rankAtt) || array_key_exists($rankAtt->name(), $outputs), 'Rank attribute does not exist in outputs');
    if (!is_null($rankAtt) && is_null($outputs[$rankAtt->name()])) {
        $outputs[$rankAtt->name()] = lookupType('base::BIGINT');
    }
    $outputPassthroughAtts = [];
    foreach ($outputs as $name => $type) {
        if (is_null($rankAtt) || $rankAtt->name() != $name) {
            $outputPassthroughAtts[$name] = $type;
        }
    }
    $outToIn = [];
    $nInputs = \count($inputs);
    reset($inputs);
    reset($outputPassthroughAtts);
    for ($i = 0; $i < $nInputs; $i++) {
        $outName = key($outputPassthroughAtts);
        $inName = key($inputs);
        $outToIn[$outName] = $inName;
        // Unify types
        $outputs[$outName] = $inputs[$inName];
        $outputPassthroughAtts[$outName] = $inputs[$inName];
        next($inputs);
        next($outputPassthroughAtts);
    }
    $orderAtts = [];
    $extraAtts = [];
    foreach ($inputs as $name => $type) {
        if (array_key_exists($name, $ordering)) {
            $orderAtts[$name] = $type;
        } else {
            $extraAtts[$name] = $type;
        }
    }
    // Give 2^32 as the default, which should be effectively infinite
    $limitDefault = pow(2, 32);
    $limit = get_default($t_args, 'limit', $limitDefault);
    $limit = $limit == 0 ? $limitDefault : $limit;
    grokit_assert($limit > 0, 'The OrderBy limit must be a positive integer');
    $className = generate_name('OrderBy');
    $debug = get_default($t_args, 'debug', 0);
    ?>

class <?php 
    echo $className;
    ?>
 {
    struct Tuple {
<?php 
    foreach ($inputs as $name => $type) {
        ?>
        <?php 
        echo $type;
        ?>
 <?php 
        echo $name;
        ?>
;
<?php 
    }
    ?>

        Tuple( void ) = default;

        Tuple( const Tuple & other ) = default;

        Tuple( <?php 
    echo array_template('const {val} & _{key}', ', ', $inputs);
    ?>
):
            <?php 
    echo array_template('{key}(_{key})', ', ', $inputs);
    ?>

        { }

        Tuple & operator = (const Tuple & other ) = default;

        bool operator > ( const Tuple & other ) const {
<?php 
    foreach ($orderAtts as $name => $type) {
        $op1 = $ascending[$name] ? '<' : '>';
        $op2 = !$ascending[$name] ? '<' : '>';
        ?>
            if( <?php 
        echo $name;
        ?>
 <?php 
        echo $op1;
        ?>
 other.<?php 
        echo $name;
        ?>
 )
                return true;
            else if( <?php 
        echo $name;
        ?>
 <?php 
        echo $op2;
        ?>
 other.<?php 
        echo $name;
        ?>
 )
                return false;
<?php 
    }
    ?>

            return false;
        }

        bool operator < ( const Tuple& other ) const {
            return other > *this;
        }

        bool operator <= (const Tuple & other ) const {
            return ! (*this > other );
        }

        bool operator >= (const Tuple & other ) const {
            return !( other > *this );
        }

<?php 
    if ($debug > 0) {
        ?>
        std::string toString(void) const {
            std::ostringstream ss;

            ss << "( "; // >
<?php 
        $first = true;
        foreach ($inputs as $name => $type) {
            if ($first) {
                $first = false;
            } else {
                echo '            ss << ", ";' . PHP_EOL;
            }
            ?>
            ss << <?php 
            echo $name;
            ?>
; // >
<?php 
        }
        // foreach input
        ?>
            ss << " )"; // >

            return ss.str();
        }
<?php 
    }
    // debug > 0
    ?>

    }; // struct Tuple

    typedef std::vector<Tuple> TupleVector;
public:

    class Iterator {
    public:
        typedef TupleVector::const_iterator iter_type;

    private:
        iter_type begin;
        iter_type curr;
        iter_type end;

    public:
        Iterator(void) = default;
        Iterator( const iter_type & _begin, const iter_type & _end ) : begin(_begin), curr(_begin), end(_end)
        { }

        bool GetNextResult(<?php 
    echo typed_ref_args($outputs);
    ?>
) {
            if( curr != end ) {
<?php 
    foreach ($outputPassthroughAtts as $name => $type) {
        ?>
                <?php 
        echo $name;
        ?>
 = curr-><?php 
        echo $outToIn[$name];
        ?>
;
<?php 
    }
    if (!is_null($rankAtt)) {
        ?>
                <?php 
        echo $rankAtt;
        ?>
 = (curr - begin) + 1;
<?php 
    }
    // if we need to output the rank
    ?>
                curr++;
                return true;
            }
            else {
                return false;
            }
        }

    };

private:

    uintmax_t __count;  // number of tuples covered

    // K, as in Top-K
    static constexpr size_t K = <?php 
    echo $limit;
    ?>
;

    TupleVector tuples;

    // Iterator for multi output type
    Iterator multiIterator;

    typedef std::greater<Tuple> TupleCompare;

    // Function to force sorting so that GetNext gets the tuples in order.
    void Sort(void) {
        TupleCompare comp;
        // If tuples doesn't contain at least K elements, it was never made into
        // a heap in the first place, so sort it normally.
        if( tuples.size() >= K ) {
            std::sort_heap(tuples.begin(), tuples.end(), comp);
        } else {
            std::sort(tuples.begin(), tuples.end(), comp);
        }
    }

    // Internal function to add a tuple to the heap
    void AddTupleInternal(Tuple & t ) {
<?php 
    if ($debug >= 1) {
        ?>
        {
            std::ostringstream ss;
            ss << "T ACK: " << t.toString() << std::endl; // >
            std::cerr << ss.str(); // >
        }
<?php 
    }
    ?>
        TupleCompare comp;
        if( tuples.size() >= K ) {
<?php 
    if ($debug >= 1) {
        ?>
            {
                std::ostringstream ss;
                ss << "T REP: " << tuples.front().toString() << std::endl; // >
                std::cerr << ss.str(); // >
            }
<?php 
    }
    ?>
            std::pop_heap(tuples.begin(), tuples.end(), comp);
            tuples.pop_back();
            tuples.push_back(t);
            std::push_heap(tuples.begin(), tuples.end(), comp);
        } else {
            tuples.push_back(t);
            if( tuples.size() == K ) {
                std::make_heap(tuples.begin(), tuples.end(), comp);
            }
        }
    }

public:

    <?php 
    echo $className;
    ?>
() : __count(0), tuples(), multiIterator()
    { }

    ~<?php 
    echo $className;
    ?>
() { }

    void AddItem(<?php 
    echo const_typed_ref_args($inputs);
    ?>
) {
        __count++;
        Tuple t(<?php 
    echo args($inputs);
    ?>
);
<?php 
    if ($debug >= 2) {
        ?>
        {
            std::ostringstream ss;
            ss << "T NEW: " << t.toString() << std::endl; // >
            std::cerr << ss.str(); // >
        }
<?php 
    }
    ?>
        if( tuples.size() == K && !(t > tuples.front()) )
            return;

        AddTupleInternal(t);
    }

    void AddState( <?php 
    echo $className;
    ?>
 & other ) {
        __count += other.__count;
        for( Tuple & el : other.tuples ) {
            if( tuples.size() < K /*>*/ || el > tuples.front() ) {
                AddTupleInternal(el);
            }
        }
    }

    void Finalize() {
        Sort();
        Iterator::iter_type begin = tuples.cbegin();
        Iterator::iter_type end = tuples.cend();
        multiIterator = Iterator(begin, end);

<?php 
    if ($debug >= 1) {
        ?>
        std::ostringstream ss;
        ss << "[ "; //>
        bool first = true;
        for( auto el : tuples ) {
            if( first )
                first = false;
            else
                ss << ", "; //>>

            ss << el.toString(); //>>
        }
        ss << " ]" << std::endl; // >
        std::cerr << ss.str(); //>>
<?php 
    }
    ?>
    }

    bool GetNextResult( <?php 
    echo typed_ref_args($outputs);
    ?>
 ) {
        return multiIterator.GetNextResult(<?php 
    echo args($outputs);
    ?>
);
    }
};

<?php 
    $system_headers = ['vector', 'algorithm', 'cinttypes'];
    if ($debug > 0) {
        $system_headers = array_merge($system_headers, ['iostream', 'sstream', 'string']);
    }
    return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'system_headers' => $system_headers);
}
Example #15
0
    private static function printJson($js, $var)
    {
        if (is_null($js)) {
            ?>
    <?php 
            echo $var;
            ?>
 = Json::Value(Json::nullValue);
<?php 
        } else {
            if (is_string($js)) {
                ?>
    <?php 
                echo $var;
                ?>
 = "<?php 
                echo $js;
                ?>
";
<?php 
            } else {
                if (is_numeric($js)) {
                    ?>
    <?php 
                    echo $var;
                    ?>
 = <?php 
                    echo $js;
                    ?>
;
<?php 
                } else {
                    if (is_array($js)) {
                        // If it's an associative array (i.e. contains non-natural-number indicies)
                        // treat it as an object instead
                        $natural_indicies = true;
                        foreach ($js as $ind => $val) {
                            if (!is_int($ind) || $ind < 0) {
                                $natural_indicies = false;
                            }
                        }
                        if ($natural_indicies) {
                            self::printArray($js, $var);
                        } else {
                            self::printObject($js, $var);
                        }
                    } else {
                        if (is_object($js)) {
                            self::printObject($js, $var);
                        } else {
                            grokit_error("Cannot translate value of type " . gettype($js) . " to JSON");
                        }
                    }
                }
            }
        }
    }
Example #16
0
/**
 *  A GLA that estimates the cardinality of a dataset using a bloom filter of
 *  a configurable size.
 *
 *  Note: This filter has very high performance, so long as all of the states
 *  fit into cache, preferably L1 or L2, but L3 is also fine. Once the states
 *  are large enough that all of them cannot fit inside L3 cache at the same
 *  time, performance takes a nose dive (4x loss minimum).
 */
function BloomFilter(array $t_args, array $input, array $output)
{
    grokit_assert(\count($output) == 1, 'BloomFilter produces only 1 value, ' . \count($output) . ' outputs given.');
    $outputName = array_keys($output)[0];
    $outputType = array_get_index($output, 0);
    if (is_null($outputType)) {
        $outputType = lookupType('BASE::BIGINT');
    }
    $output[$outputName] = $outputType;
    grokit_assert($outputType->is('numeric'), 'BloomFilter output must be numeric!');
    $exp = get_first_key_default($t_args, ['exponent'], 16);
    grokit_assert(is_integer($exp), 'BloomFilter exponent must be an integer.');
    grokit_assert($exp > 0 && $exp < 64, 'BloomFilter exponent must be in range (0,64), ' . $exp . ' given.');
    $nullCheck = get_default($t_args, 'null.check', false);
    $nullable = [];
    if (is_bool($nullCheck)) {
        foreach ($input as $name => $type) {
            $nullable[$name] = $nullCheck;
        }
    } else {
        if (is_array($nullCheck)) {
            foreach ($input as $name => $type) {
                $nullable[$name] = false;
            }
            foreach ($nullCheck as $index => $n) {
                grokit_assert(is_string($n), 'BloomFilster null.check has invalid value at position ' . $index);
                grokit_assert(array_key_exists($n, $nullable), 'BloomFilster null.check has unknown input ' . $n . ' at position ' . $index);
                $nullable[$n] = true;
            }
        } else {
            grokit_error('BloomFilster null.check must be boolean or list of inputs to check for nulls');
        }
    }
    $debug = get_default($t_args, 'debug', 0);
    $bits = pow(2, $exp);
    $bytes = ceil($bits / 8.0);
    // Calculate the number of bits set for every possible value of a byte
    $nBits = [];
    for ($i = 0; $i < 256; $i++) {
        $n = $i;
        $b = 0;
        while ($n > 0) {
            $n &= $n - 1;
            $b++;
        }
        $nBits[$i] = $b;
    }
    $className = generate_name('BloomFilter');
    ?>
class <?php 
    echo $className;
    ?>
 {
    static constexpr size_t BITS = <?php 
    echo $bits;
    ?>
;
    static constexpr size_t BYTES = <?php 
    echo $bytes;
    ?>
;
    static constexpr size_t MASK = BITS - 1;
    static constexpr std::array<unsigned char, 256> BITS_SET = { <?php 
    echo implode(', ', $nBits);
    ?>
 };
    static constexpr std::array<unsigned char, 8> BIT_MASKS = {
        0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
    };

    size_t count;

    std::array<unsigned char, BYTES> set;
    //unsigned char set[BYTES];
    //std::bitset<BITS> set;

public:
    <?php 
    echo $className;
    ?>
() : count(0), set() {
        for( size_t i = 0; i < BYTES; i++ ) { //>
            set[i] = 0;
        }
    }

    ~<?php 
    echo $className;
    ?>
() { }

    void AddItem( <?php 
    echo const_typed_ref_args($input);
    ?>
 ) {
        count++;
<?php 
    foreach ($nullable as $name => $check) {
        if ($check) {
            ?>
        if( IsNull( <?php 
            echo $name;
            ?>
 ) ) return;
<?php 
        }
        // if checking for nulls
    }
    // foreach input
    ?>
        size_t hashVal = H_b;
<?php 
    foreach ($input as $name => $type) {
        ?>
        hashVal = CongruentHash(Hash(<?php 
        echo $name;
        ?>
), hashVal);
<?php 
    }
    // foreach input
    ?>
        hashVal = hashVal & MASK;
        const size_t bucket = hashVal >> 3;
        const size_t bucket_index = hashVal & 0x07;
        const unsigned char mask = BIT_MASKS[bucket_index];
        set[bucket] |= mask;
    }

    void AddState( <?php 
    echo $className;
    ?>
 & o ) {
        count += o.count;
        for( size_t i = 0; i < BYTES; i++ ) { //>
            set[i] |= o.set[i];
        }
    }

    void GetResult( <?php 
    echo $outputType;
    ?>
 & <?php 
    echo $outputName;
    ?>
 ) {
        size_t nBitsSet = 0;
        constexpr long double bits = static_cast<long double>(BITS);
        for( size_t i = 0; i < BYTES; i++ ) { //>
            nBitsSet += BITS_SET[set[i]];
        }
        long double bitsSet = static_cast<long double>(nBitsSet);

        if( nBitsSet == BITS ) {
            // All Bits set, just give the cardinality as an estimate.
            <?php 
    echo $outputName;
    ?>
 = count;
        } else {
            long double cardinality = - bits * std::log(1 - (bitsSet / bits));
            <?php 
    echo $outputName;
    ?>
 = cardinality;
        }

<?php 
    if ($debug > 0) {
        ?>
        std::cout << "BloomFilter:"
            << " bitsSet(" << bitsSet << ")"
            << " bits(" << bits << ")"
            << " cardinality(" << cardinality << ")"
            << " output(" << <?php 
        echo $outputName;
        ?>
 << ")"
            << std::endl;; //>
<?php 
    }
    // if debugging enabled
    ?>
    }
};

// Storage for static members
constexpr std::array<unsigned char, 256> <?php 
    echo $className;
    ?>
::BITS_SET;
constexpr std::array<unsigned char, 8> <?php 
    echo $className;
    ?>
::BIT_MASKS;
<?php 
    $system_headers = ['cmath', 'array'];
    if ($debug > 0) {
        $system_headers[] = 'iostream';
    }
    return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => 'single', 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers];
}
Example #17
0
function CATEGORY(array $t_args)
{
    if (array_key_exists('dict', $t_args)) {
        $values = $t_args['dict'];
        $maxID = 0;
        foreach ($values as $id => $val) {
            $maxID = \max($id, $maxID);
        }
    } else {
        $old_vals = get_first_key($t_args, ['values', 0]);
        $startAt = get_first_key_default($t_args, ['start.at'], 0);
        $values = [];
        $maxID = $startAt;
        foreach ($old_vals as $ind => $val) {
            $values[$maxID++] = $val;
        }
    }
    $cardinality = \count($values);
    // Add 1 to the cardinality for the invalid id
    $storageTypeBits = ceil(log($maxID + 1, 2));
    if ($storageTypeBits > 64) {
        // This should never happen. PHP would explode processing 2^64 values.
        grokit_error("Unable to store {$cardinality} values within 64 bits.");
    } else {
        if ($storageTypeBits > 32) {
            $storageType = 'uint64_t';
            $storageBytes = 8;
        } else {
            if ($storageTypeBits > 16) {
                $storageType = 'uint32_t';
                $storageBytes = 4;
            } else {
                if ($storageTypeBits > 8) {
                    $storageType = 'uint16_t';
                    $storageBytes = 2;
                } else {
                    $storageType = 'uint8_t';
                    $storageBytes = 1;
                }
            }
        }
    }
    $className = generate_name('CATEGORY');
    $stringType = lookupType('base::STRING');
    $methods = [];
    $constructors = [];
    $functions = [];
    ?>

class <?php 
    echo $className;
    ?>
 {
public:
    typedef <?php 
    echo $storageType;
    ?>
 StorageType;
    typedef std::unordered_map<StorageType, std::string> IDToNameMap;
    typedef std::unordered_map<std::string, StorageType> NameToIDMap;

    static const StorageType InvalidID __attribute__((weak));

private:
    static const IDToNameMap idToName __attribute__((weak));
    static const NameToIDMap nameToID __attribute__((weak));

    // The ID of this categorical variable
    StorageType myID;

public:

    /* ----- Constructors / Destructor ----- */
    <?php 
    echo $className;
    ?>
( void );

<?php 
    $constructors[] = [['base::STRING_LITERAL'], true];
    ?>
    <?php 
    echo $className;
    ?>
( const char * );
<?php 
    $constructors[] = [['base::STRING'], true];
    ?>
    <?php 
    echo $className;
    ?>
( const <?php 
    echo $stringType;
    ?>
 & );
    <?php 
    echo $className;
    ?>
( const <?php 
    echo $storageType;
    ?>
 );
    <?php 
    echo $className;
    ?>
( const <?php 
    echo $className;
    ?>
 & );
<?php 
    $constructors[] = [['BASE::NULL'], true];
    ?>
    <?php 
    echo $className;
    ?>
( const GrokitNull & );

    <?php 
    echo $className;
    ?>
 & operator =( const <?php 
    echo $className;
    ?>
 & ) = default;

    ~<?php 
    echo $className;
    ?>
(void) {}

    /* ----- Methods ----- */
    void FromString( const char * );

<?php 
    $methods[] = ['ToString', [], 'base::STRING_LITERAL', true];
    ?>
    const char * ToString( void ) const;

    StorageType GetID( void ) const;
    void SetID( StorageType id );

    // Determines whether or not the category is valid.
<?php 
    $methods[] = ['Invalid', [], 'base::bool', true];
    ?>
    bool Invalid(void) const;
<?php 
    $methods[] = ['Valid', [], 'base::bool', true];
    ?>
    bool Valid(void) const;

    /* ----- Operators ----- */
    bool operator ==( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator !=( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator <( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator <=( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator >( const <?php 
    echo $className;
    ?>
 & ) const;
    bool operator >=( const <?php 
    echo $className;
    ?>
 & ) const;

    // Implicit conversion to storage type
    operator <?php 
    echo $storageType;
    ?>
() const;

    // To/From Json
    void toJson( Json::Value & dest ) const;
    void fromJson( const Json::Value & src );
};

/* ----- Constructors ----- */

inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( void ) :
    myID(InvalidID)
{ }

inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const char * str ) {
    FromString(str);
}

inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const <?php 
    echo $stringType;
    ?>
 & str ) {
    FromString(str.ToString());
}

inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const <?php 
    echo $storageType;
    ?>
 val ) :
    myID(val)
{ }

inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const <?php 
    echo $className;
    ?>
 & other ) : myID(other.myID)
{ }

inline
<?php 
    echo $className;
    ?>
 :: <?php 
    echo $className;
    ?>
( const GrokitNull & nullval ) : myID(InvalidID)
{ }

/* ----- Methods ----- */

inline
void <?php 
    echo $className;
    ?>
 :: FromString( const char * str ) {
    auto it = nameToID.find(str);
    if( it != nameToID.end() ) {
        myID = it->second;
    }
    else {
        myID = InvalidID;
    }
}

inline
const char * <?php 
    echo $className;
    ?>
 :: ToString( void ) const {
    auto it = idToName.find(myID);
    if( it != idToName.end() ) {
        return it->second.c_str();
    }
    else {
        return "NULL";
    }
}

inline
auto <?php 
    echo $className;
    ?>
 :: GetID( void ) const -> StorageType {
    return myID;
}

inline
void <?php 
    echo $className;
    ?>
 :: SetID( StorageType id ) {
    myID = id;
}

inline
bool <?php 
    echo $className;
    ?>
 :: Valid(void) const {
    return idToName.count(myID) > 0;
}

inline
bool <?php 
    echo $className;
    ?>
 :: Invalid(void) const {
    return ! Valid();
}


/* ----- Operators ----- */
inline
bool <?php 
    echo $className;
    ?>
 :: operator ==( const <?php 
    echo $className;
    ?>
 & other ) const {
    return myID == other.myID;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator !=( const <?php 
    echo $className;
    ?>
 & other ) const {
    return myID != other.myID;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator <( const <?php 
    echo $className;
    ?>
 & other ) const {
    return myID < other.myID;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator >( const <?php 
    echo $className;
    ?>
 & other ) const {
    return myID > other.myID;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator <=( const <?php 
    echo $className;
    ?>
 & other ) const {
    return myID <= other.myID;
}

inline
bool <?php 
    echo $className;
    ?>
 :: operator >=( const <?php 
    echo $className;
    ?>
 & other ) const {
    return myID >= other.myID;
}

// To/From Json
inline
void <?php 
    echo $className;
    ?>
 :: toJson( Json::Value & dest ) const {
    dest = (Json::Int64) myID;
}

inline
void <?php 
    echo $className;
    ?>
 :: fromJson( const Json::Value & src ) {
    myID = (StorageType) src.asInt64();
}

inline
<?php 
    echo $className;
    ?>
 :: operator <?php 
    echo $storageType;
    ?>
 () const {
    return myID;
}

<?php 
    ob_start();
    $functions[] = ['Hash', ['@type'], 'base::BIGINT', true, true];
    ?>
template<>
inline
uint64_t Hash(const @type & thing) {
    return thing.GetID();
}

inline
void FromString( @type & c, const char * str ) {
    c.FromString(str);
}

inline
int ToString( const @type & c, char * buffer ) {
    const char * str = c.ToString();
    strcpy( buffer, str);
    int len = strlen(buffer);
    return len + 1;
}

inline
void ToJson( const @type & src, Json::Value & dest ) {
    src.toJson(dest);
}

inline
void FromJson( const Json::Value & src, @type & dest ) {
    dest.fromJson(src);
}

<?php 
    $functions[] = ['IsNull', ['@type'], 'BASE::BOOL', true, true];
    ?>
inline
bool IsNull( const @type c ) {
    return c.Invalid();
}
<?php 
    $globalContents = ob_get_clean();
    ?>

// Initialize static values
const <?php 
    echo $className;
    ?>
::IDToNameMap <?php 
    echo $className;
    ?>
 :: idToName = { <?php 
    echo array_template('{{key},"{val}"}', ',', $values);
    ?>
 };
const <?php 
    echo $className;
    ?>
::NameToIDMap <?php 
    echo $className;
    ?>
 :: nameToID = { <?php 
    echo array_template('{"{val}",{key}}', ',', $values);
    ?>
 };
const <?php 
    echo $className;
    ?>
::StorageType <?php 
    echo $className;
    ?>
 :: InvalidID = std::numeric_limits<<?php 
    echo $className;
    ?>
::StorageType>::max();

<?php 
    return ['kind' => 'TYPE', 'name' => $className, 'properties' => ['categorical'], 'extras' => ['cardinality' => $cardinality, 'size.bytes' => $storageBytes], 'binary_operators' => ['==', '!=', '<', '>', '<=', '>='], 'system_headers' => ['cinttypes', 'unordered_map', 'string', 'cstring', 'limits'], 'global_content' => $globalContents, 'complex' => false, 'methods' => $methods, 'constructors' => $constructors, 'functions' => $functions, 'describe_json' => DescribeJson('factor', DescribeJsonStatic(['levels' => $values]))];
}
Example #18
0
function GLAGenerate_Finalize($wpName, $queries, $attMap)
{
    ?>
#ifndef PER_QUERY_PROFILE
#define PER_QUERY_PROFILE
#endif

//+{"kind":"WPF", "name":"Finalize (Chunk)", "action":"start"}
extern "C"
int GLAFinalizeWorkFunc_<?php 
    echo $wpName;
    ?>
(WorkDescription &workDescription, ExecEngineData &result) {
    GLAFinalizeWD myWork;
    myWork.swap (workDescription);
    QueryExit whichOne = myWork.get_whichQueryExit();
    GLAState& glaState = myWork.get_glaState();
<?php 
    cgDeclareQueryIDs($queries);
    ?>

    // Set up the output chunk
    Chunk output;

    QueryIDSet queriesToRun = whichOne.query;
<?php 
    // Extract the state for the query
    foreach ($queries as $query => $info) {
        $gla = $info['gla'];
        ?>
    // Do query <?php 
        echo queryName($query);
        ?>
:
    <?php 
        echo $gla;
        ?>
 * state_<?php 
        echo queryName($query);
        ?>
 = NULL;
    if( whichOne.query == <?php 
        echo queryName($query);
        ?>
 ) {
        // Look for the state of query <?php 
        echo queryName($query);
        ?>
.
        GLAPtr state;
        state.swap(glaState);
        FATALIF( state.get_glaType() != <?php 
        echo $gla->cHash();
        ?>
,
            "Got GLA of unexpected type");
        state_<?php 
        echo queryName($query);
        ?>
 = (<?php 
        echo $gla;
        ?>
 *) state.get_glaPtr();
    }
<?php 
    }
    // foreach query
    ?>

    // Start columns for all possible outputs.
<?php 
    foreach ($queries as $query => $info) {
        $output = $info['output'];
        cgConstructColumns($output);
    }
    // foreach query
    ?>

    // This is the output bitstring
    MMappedStorage myStore;
    Column bitmapOut( myStore );
    BStringIterator myOutBStringIter( bitmapOut, queriesToRun );

    PROFILING2_START;
    int64_t numTuples = 0;

#ifdef PER_QUERY_PROFILE
<?php 
    foreach ($queries as $query => $info) {
        ?>
    int64_t numTuples_<?php 
        echo queryName($query);
        ?>
 = 0;
<?php 
    }
    // foreach query
    ?>
#endif // PER_QUERY_PROFILE

    // Extract results
<?php 
    foreach ($queries as $query => $info) {
        $gla = $info['gla'];
        $output = $info['output'];
        // If this is true, we return the GLA as a const state.
        // Otherwise, we pack the results into a chunk.
        $retState = $info['retState'];
        $stateName = 'state_' . queryName($query);
        ?>
    if( whichOne.query == <?php 
        echo queryName($query);
        ?>
 ) {
<?php 
        if ($retState) {
            ?>
        FATAL( "Called normal finalize for query that was supposed to be returned as a const state" );
<?php 
        } else {
            $resType = $gla->result_type();
            $resType = get_first_value($resType, ['fragment', 'multi', 'single', 'state']);
            if ($resType == 'single') {
                ?>
        <?php 
                echo $stateName;
                ?>
->GetResult(<?php 
                echo implode(', ', $output);
                ?>
);
        numTuples++;
#ifdef PER_QUERY_PROFILE
        numTuples_<?php 
                echo queryName($query);
                ?>
++;
#endif // PER_QUERY_PROFILE
        myOutBStringIter.Insert(<?php 
                echo queryName($query);
                ?>
);
        myOutBStringIter.Advance();
<?php 
                cgInsertAttributesList($output, '_Column_Out', 2);
            } elseif ($resType == 'multi') {
                ?>
        <?php 
                echo $stateName;
                ?>
->Finalize();
        while( <?php 
                echo $stateName;
                ?>
->GetNextResult(<?php 
                echo implode(', ', $output);
                ?>
) ) {
            numTuples++;
#ifdef PER_QUERY_PROFILE
            numTuples_<?php 
                echo queryName($query);
                ?>
++;
#endif // PER_QUERY_PROFILE
            myOutBStringIter.Insert(<?php 
                echo queryName($query);
                ?>
);
            myOutBStringIter.Advance();
<?php 
                cgInsertAttributesList($output, '_Column_Out', 3);
                ?>
        }
<?php 
            } elseif ($resType == 'fragment') {
                ?>
        int fragment = myWork.get_fragmentNo();
        <?php 
                echo $gla;
                ?>
_Iterator * iterator = <?php 
                echo $stateName;
                ?>
->Finalize(fragment);
        while( <?php 
                echo $stateName;
                ?>
->GetNextResult(iterator, <?php 
                echo implode(', ', $output);
                ?>
) ) {
            numTuples++;
#ifdef PER_QUERY_PROFILE
            numTuples_<?php 
                echo queryName($query);
                ?>
++;
#endif // PER_QUERY_PROFILE
            myOutBStringIter.Insert(<?php 
                echo queryName($query);
                ?>
);
            myOutBStringIter.Advance();
<?php 
                cgInsertAttributesList($output, '_Column_Out', 3);
                ?>
        }
        delete iterator;
<?php 
            } elseif ($resType == 'state') {
                reset($output);
                $att = current($output);
                // Output attribute
                if ($gla->finalize_as_state()) {
                    ?>
        <?php 
                    echo $stateName;
                    ?>
->FinalizeState();
<?php 
                }
                // if GLA finalized as state
                ?>
        <?php 
                echo $att;
                ?>
 = <?php 
                echo $att->type();
                ?>
( <?php 
                echo $stateName;
                ?>
 );
        numTuples++;
#ifdef PER_QUERY_PROFILE
        numTuples_<?php 
                echo queryName($query);
                ?>
++;
#endif // PER_QUERY_PROFILE
        myOutBStringIter.Insert(<?php 
                echo queryName($query);
                ?>
);
        myOutBStringIter.Advance();
<?php 
                cgInsertAttributesList($output, '_Column_Out', 2);
            } else {
                grokit_error('GLA ' . $gla . ' has no known result type: [' . implode(',', $resType) . ']');
            }
            // switch GLA result type
        }
        // else GLA produces into a chunk
        ?>
        myOutBStringIter.Done();
        output.SwapBitmap(myOutBStringIter);

        // Write columns
<?php 
        foreach ($output as $att) {
            ?>
        Column col_<?php 
            echo $att;
            ?>
;
        <?php 
            echo $att;
            ?>
_Column_Out.Done(col_<?php 
            echo $att;
            ?>
);
        output.SwapColumn( col_<?php 
            echo $att;
            ?>
, <?php 
            echo $att->slot();
            ?>
 );
<?php 
        }
        // foreach output attribute
        ?>
    }
<?php 
    }
    // foreach query
    ?>

    PROFILING2_END;

    PCounterList counterList;
    PCounter totalCnt("tpo", numTuples, "<?php 
    echo $wpName;
    ?>
");
    counterList.Append(totalCnt);

#ifdef PER_QUERY_PROFILE
<?php 
    foreach ($queries as $query => $info) {
        ?>
    {
        PCounter qCount("tpo <?php 
        echo queryName($query);
        ?>
", numTuples_<?php 
        echo queryName($query);
        ?>
, "<?php 
        echo $wpName;
        ?>
");
        counterList.Append(qCount);
    }
<?php 
    }
    // foreach query
    ?>
#endif // PER_QUERY_PROFILE

    PROFILING2_SET(counterList, "<?php 
    echo $wpName;
    ?>
");

    ChunkContainer tempResult(output);
    tempResult.swap(result);
    return WP_FINALIZE;
}
//+{"kind":"WPF", "name":"Finalize (Chunk)", "action":"end"}
<?php 
}
Example #19
0
function CSVReader(array $t_args, array $output)
{
    $my_output = [];
    // Handle case where outputs are given as template arguments
    // and not implied.
    if (\count($output) == 0) {
        grokit_assert(array_key_exists('output', $t_args), 'Did not receive any description of my output!');
        $output_list = $t_args['output'];
        grokit_assert(is_array($output_list), 'Expected list of types for template argument "output"');
        $i = 1;
        foreach ($outputs_list as $name => $out_type) {
            grokit_assert(is_datatype($out_type) || is_identifier($out_type), 'Expected only types in the "output" list');
            if (is_identifier($out_type)) {
                $out_type = lookupType($out_type->value());
            }
            $name = 'val_' . $i;
            $my_output[$name] = $out_type;
            $i += 1;
        }
    } else {
        foreach ($output as $key => $out) {
            $name = $key;
            $my_output[$name] = $out;
        }
    }
    $debug = get_default($t_args, 'debug', 0);
    $simple = get_default($t_args, 'simple', false);
    $trimCR = get_default($t_args, 'trim.cr', false);
    // Handle separator
    $separator = ',';
    if (array_key_exists('sep', $t_args) || array_key_exists('separator', $t_args)) {
        $sep = get_first_key($t_args, ['sep', 'separator']);
        grokit_assert(is_string($sep), "Got " . gettype($sep) . " instead of string for separator.");
        if (strtolower($sep) === 'tab') {
            $sep = '\\t';
        }
        grokit_assert($sep != "\n", 'CSV column delimiter cannot be new line');
        // Scream if separator is longer than one character
        grokit_assert(\strlen($sep) == 1 || $sep == '\\t', 'Expected string of length 1 for separator, got string <' . $sep . '> instead');
        $separator = $sep;
    }
    // Handle quote character
    $quotechar = '"';
    if (array_key_exists('quote', $t_args) && !is_null($t_args['quote'])) {
        grokit_assert(!$simple, 'Quote option not available for simple CSVReader');
        $quote = $t_args['quote'];
        grokit_assert(is_string($quote), "Got " . gettype($quote) . " instead of string for quote.");
        // Scream if separator is longer than one character
        grokit_assert(\strlen($quote) == 1, 'Expected string of length 1 for quote character, got string <' . $quote . '> instead');
        $quotechar = $quote;
    }
    $quotechar = addcslashes($quotechar, '\\\'');
    // Handle escape character
    $escapeChar = '\\';
    if (array_key_exists('escape', $t_args) && !is_null($t_args['escape'])) {
        grokit_assert(!$simple, 'Escape option not available for simple CSVReader');
        $escape = $t_args['escape'];
        grokit_assert(is_string($escape), 'Got ' . gettype($escape) . ' instead of string for escape character.');
        grokit_assert(\strlen($escape) == 1, 'Expected string of length 1 for escape character, got string <' . $escape . '> instead');
        $escapeChar = $escape;
    }
    $escapeChar = addcslashes($escapeChar, '\\\'');
    // Handle header lines
    $headerLines = 0;
    if (array_key_exists('skip', $t_args)) {
        $headerLines = $t_args['skip'];
        grokit_assert(is_int($headerLines), 'Got ' . gettype($headerLines) . ' instead of int for number of lines to skip.');
        grokit_assert($headerLines >= 0, 'Cannot skip a negative number of lines.');
    }
    // Maximum number of lines to read
    $maxLines = get_default($t_args, 'n', -1);
    grokit_assert(is_int($maxLines), 'Got ' . gettype($maxLines) . ' instead of int for template argument "n"');
    $nullArg = get_first_key_default($t_args, ['nullable'], false);
    $nullable = [];
    $nullStr = [];
    foreach ($my_output as $name => $type) {
        $nullable[$name] = false;
    }
    if ($nullArg === true) {
        foreach ($my_output as $name => $type) {
            $nullable[$name] = true;
            $nullStr[$name] = 'NULL';
        }
    } else {
        if (is_array($nullArg)) {
            foreach ($nullArg as $n => $v) {
                // If nullable value is an associative mapping, the value is either true/false
                // or the value of the null string
                if (is_string($n)) {
                    grokit_assert(is_string($v) || is_bool($v), 'CSVReader: nullable associative mapping must have string or boolean values');
                    grokit_assert(array_key_exists($n, $nullable), 'CSVReader: cannot make unknown attribute ' . $n . ' nullable');
                    if (is_bool($v)) {
                        $nullable[$n] = $v;
                        $nullStr[$n] = 'NULL';
                    } else {
                        $nullable[$n] = true;
                        $nullStr[$n] = $v;
                    }
                } else {
                    if (is_array($v)) {
                        grokit_assert(array_key_exists('attr', $v), 'CSVReader: Name of nullable attribute not specified');
                        $attrName = $v['attr']->name();
                        $nullable[$attrName] = true;
                        $nullStr[$attrName] = array_key_exists('null', $v) ? $v['null'] : 'NULL';
                    } else {
                        // Otherwise, it's just nullable
                        $attrName = $v->name();
                        grokit_assert(array_key_exists($attrName, $nullable), 'CSVReader: cannot make unknown attribute ' . $v . ' nullable');
                        $nullable[$attrName] = true;
                        $nullStr[$attrName] = 'NULL';
                    }
                }
            }
        } else {
            if ($nullArg === false) {
                // Nothing
            } else {
                if (is_string($nullArg)) {
                    foreach ($my_output as $name => $type) {
                        $nullable[$name] = true;
                        $nullStr[$name] = $nullArg;
                    }
                } else {
                    grokit_error('Template argument "nullable" must be boolean or array, ' . typeof($nullArg) . ' given');
                }
            }
        }
    }
    // Come up with a name for ourselves
    $className = generate_name('CSVReader');
    if ($debug >= 2) {
        foreach ($my_output as $name => $type) {
            fwrite(STDERR, "CSVReader: {$name} is nullable: " . ($nullable[$name] ? 'true' : 'false') . PHP_EOL);
        }
    }
    ?>

class <?php 
    echo $className;
    ?>
 {
    std::istream& my_stream;
    std::string fileName;

    // Template parameters
    static constexpr size_t MAX_LINES = <?php 
    echo $maxLines;
    ?>
;
    static constexpr size_t HEADER_LINES = <?php 
    echo $headerLines;
    ?>
;
    static constexpr char DELIMITER = '<?php 
    echo $separator;
    ?>
';
<?php 
    if (!$simple) {
        ?>
    static constexpr char QUOTE_CHAR = '<?php 
        echo $quotechar;
        ?>
';
    static constexpr char ESCAPE_CHAR = '<?php 
        echo $escapeChar;
        ?>
';

    typedef boost::escaped_list_separator<char> separator;
    typedef boost::tokenizer< separator > Tokenizer;
    separator my_separator;
    Tokenizer my_tokenizer;
<?php 
    }
    ?>

    // Prevent having to allocate this every time.
    std::string line;
    std::vector<std::string> tokens;

    size_t count;

<?php 
    \grokit\declareDictionaries($my_output);
    ?>

public:

    <?php 
    echo $className;
    ?>
 ( GIStreamProxy& _stream ) :
        my_stream(_stream.get_stream())
        , fileName(_stream.get_file_name())
<?php 
    if (!$simple) {
        ?>
        , my_separator(ESCAPE_CHAR, DELIMITER, QUOTE_CHAR)
        , my_tokenizer(std::string(""))
<?php 
    }
    ?>
        , count(0)
    {
<?php 
    if ($headerLines > 0) {
        ?>
        for( size_t i = 0; i < HEADER_LINES; ++i ) {
            FATALIF( !getline( my_stream, line ), "CSV Reader reached end of file before finishing header.\n" );
        }
<?php 
    }
    // If headerLines > 0
    ?>
    }

// >

    bool ProduceTuple( <?php 
    echo typed_ref_args($my_output);
    ?>
 ) {
        if (count < MAX_LINES) { //>
            count++;
        } else {
            return false;
        }

        if( getline( my_stream, line ) ) {
<?php 
    if ($trimCR) {
        ?>
            if( line.back() == '\r' ) {
                line.pop_back();
            }
<?php 
    }
    // if trimCR
    if (!$simple) {
        if ($debug >= 1) {
            ?>
            try {
<?php 
        }
        // if debug >= 1
        ?>
            my_tokenizer.assign( line, my_separator );
<?php 
        if ($debug >= 1) {
            ?>
            } catch(...) {
                FATAL("CSVReader for file %s failed on line: %s", fileName.c_str(), line.c_str());
            }
<?php 
        }
        // if debug >= 1
        ?>
            Tokenizer::iterator it = my_tokenizer.begin();

<?php 
        foreach ($my_output as $name => $type) {
            if ($nullable[$name]) {
                // nullable
                ?>
            <?php 
                \grokit\fromStringNullable($name, $type, 'it->c_str()', true, $nullStr[$name]);
                ?>

<?php 
            } else {
                // not nullable
                ?>
            <?php 
                echo \grokit\fromStringDict($name, $type, 'it->c_str()');
                ?>
;
<?php 
            }
            // end nullable check
            ?>
            ++it;
<?php 
        }
        // foreach output
    } else {
        ?>
            for( char & c : line ) {
                if( c == DELIMITER )
                    c = '\0';
            }

            const char * ptr = line.c_str();
<?php 
        $first = true;
        foreach ($my_output as $name => $type) {
            if ($first) {
                $first = false;
            } else {
                ?>
            while( *(ptr++) != '\0' )
                ; // Advance past next delimiter
<?php 
            }
            // not first output
            if ($nullable[$name]) {
                ?>
            <?php 
                echo \grokit\fromStringNullable($name, $type, 'ptr', true, $nullStr[$name]);
            } else {
                // not nullable
                ?>
            <?php 
                echo \grokit\fromStringDict($name, $type, 'ptr');
                ?>
;
<?php 
            }
            // if nullable
        }
        // foreach output
    }
    // if simple reader
    ?>

            return true;
        }
        else {
            return false;
        }
    }

<?php 
    \grokit\declareDictionaryGetters($my_output);
    ?>
};

<?php 
    $sys_headers = ['vector', 'string', 'iostream', 'cstdint'];
    if (!$simple) {
        $sys_headers[] = 'boost/tokenizer.hpp';
    }
    return ['name' => $className, 'kind' => 'GI', 'output' => $my_output, 'system_headers' => $sys_headers, 'user_headers' => ['GIStreamInfo.h', 'Dictionary.h', 'DictionaryManager.h']];
}