public function apply(array $exprs, $source) { $this->checkFuzzy($this->args, $exprs, 'parameter'); $exprVals = array_map(function ($val) { return $val->value(); }, $exprs); $value = $this->value() . '(' . implode(', ', $exprVals) . ')'; switch (count($this->args)) { case 1: $value = '(' . $this->value() . $exprVals[0] . ')'; break; case 2: $value = '(' . $exprVals[0] . ')' . $this->value() . '(' . $exprVals[1] . ')'; break; default: grokit_error('Got an operator with ' . $count($this->args) . ' arguments!'); } $is_const = $this->deterministic; foreach ($exprs as $expr) { $is_const = $is_const && $expr->is_const(); } $info = new ExpressionInfo($source, $this->resultType, $value, $is_const); foreach ($exprs as $expr) { $info->absorbMeta($expr); } return $info; }
function ExtremeTuples(array $t_args, array $inputs, array $outputs) { $extremes = get_first_key($t_args, ['extremes']); $nExt = \count($extremes); grokit_assert($nExt > 0, 'No extremes specified for ExtremeTuples GLA.'); if (\count($inputs) == 0) { grokit_assert(array_key_exists('inputs', $t_args), 'No arguments specified for ExtremeTuples GLA.'); $count = 0; foreach ($t_args['inputs'] as $type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Only datatypes can be specified as inputs to ' . 'the ExtremeTuples GLA'); $name = 'et_val' . $count; $inputs[$name] = $type; } } $outputMap = []; reset($outputs); foreach ($inputs as $name => $type) { $oKey = key($outputs); $outputs[$oKey] = $type; $outputMap[$oKey] = $name; next($outputs); } grokit_assert($nExt <= \count($inputs), 'There can not be more extreme values than there are inputs!'); $mainAtts = []; $extraAtts = []; $minOpts = ['MIN', 'MINIMUM', '-', '<']; $maxOpts = ['MAX', 'MAXIMUM', '+', '>']; $inArrayCase = function ($needle, $haystack) { foreach ($haystack as $item) { if (strcasecmp($needle, $item) == 0) { return true; } } return false; }; $minimum = []; foreach ($extremes as $name => $val) { grokit_assert(array_key_exists($name, $inputs), "ExtremeTuples: Expression with name " . $name . " specified as extreme not found in inputs"); } foreach ($inputs as $name => $type) { if (array_key_exists($name, $extremes)) { $mainAtts[$name] = $type; if ($inArrayCase($extremes[$name], $minOpts)) { $minimum[$name] = true; } else { if ($inArrayCase($extremes[$name], $maxOpts)) { $minimum[$name] = false; } else { grokit_error('Unknown extreme type ' . $extremes[$name] . ' specified for ' . $name); } } } else { $extraAtts[$name] = $type; } } $debug = get_default($t_args, 'debug', 0); $className = generate_name('ExtremeTuples'); ?> class <?php echo $className; ?> { struct Tuple { <?php foreach ($inputs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // foreach input ?> // Default Constructor, Copy Constructor, and Copy Assignment are all // default Tuple(void) = default; Tuple(const Tuple &) = default; Tuple & operator = (const Tuple &) = default; Tuple(<?php echo array_template('const {val} & _{key}', ', ', $inputs); ?> ) : <?php echo array_template('{key}(_{key})', ', ', $inputs); ?> { } // operator > means that this tuple is "better" than the other tuple. bool operator > ( const Tuple & other ) const { <?php foreach ($mainAtts as $name => $type) { $op1 = $minimum[$name] ? '<' : '>'; $op2 = !$minimum[$name] ? '<' : '>'; ?> if( <?php echo $name; ?> <?php echo $op1; ?> other.<?php echo $name; ?> ) return true; else if( <?php echo $name; ?> <?php echo $op2; ?> other.<?php echo $name; ?> ) return false; <?php } // foreach main attribute ?> return false; } bool operator < ( const Tuple& other ) const { return other > *this; } bool operator <= (const Tuple & other ) const { return ! (*this > other ); } bool operator >= (const Tuple & other ) const { return !( other > *this ); } bool operator == (const Tuple & other ) const { bool ret = true; <?php foreach ($mainAtts as $name => $type) { ?> ret &= <?php echo $name; ?> == other.<?php echo $name; ?> ; <?php } // foreach main attribute ?> return ret; } }; // struct Tuple typedef std::vector<Tuple> TupleVector; public: class Iterator { public: typedef TupleVector::const_iterator iter_type; private: iter_type begin; iter_type end; public: Iterator(void) = default; Iterator(const Iterator &) = default; Iterator( const iter_type & _begin, const iter_type & _end ) : begin(_begin), end(_end) { } Iterator( const iter_type && _begin, const iter_type && _end ) : begin(_begin), end(_end) { } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { if( begin != end ) { <?php foreach ($outputs as $name => $type) { ?> <?php echo $name; ?> = begin-><?php echo $outputMap[$name]; ?> ; <?php } ?> begin++; return true; } else { return false; } } }; private: uintmax_t __count; // number of tuples covered TupleVector tuples; // Iterator for multi output type Iterator multiIterator; public: // Constructor and destructor <?php echo $className; ?> (void) : __count(0), tuples(), multiIterator() { } ~<?php echo $className; ?> () { } void AddItem( <?php echo const_typed_ref_args($inputs); ?> ) { ++__count; Tuple t(<?php echo args($inputs); ?> ); if( tuples.empty() ) { tuples.push_back(t); } else if( t > tuples.front() ) { tuples.clear(); tuples.push_back(t); } else if( t == tuples.front() ) { tuples.push_back(t); } } void AddState( <?php echo $className; ?> & other ) { if( tuples.size() == 0 ) { tuples.swap(other.tuples); } else if( other.tuples.size() == 0 ) { // Do nothing } else if( tuples.front() > other.tuples.front() ) { // fast path } else if( other.tuples.front() > tuples.front() ) { tuples.swap(other.tuples); } else { for( Tuple & t : other.tuples ) { tuples.push_back(t); } } } void Finalize( void ) { multiIterator = Iterator(tuples.cbegin(), tuples.cend()); } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } }; // class <?php echo $className; ?> <?php $system_headers = ['vector', 'algorithm', 'cinttypes']; if ($debug > 0) { $system_headers = array_merge($system_headers, ['iostream', 'sstream', 'string']); } return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'system_headers' => $system_headers); }
/** * A GLA that determines the distinct values of a dataset. */ function Distinct(array $t_args, array $input, array $output) { grokit_assert(\count($input) == \count($output), 'Distinct must have the same outputs as inputs.'); $outputsToInputs = []; $i = 0; foreach ($input as $name => $type) { $outputsToInputs[array_keys($output)[$i]] = $name; array_set_index($output, $i++, $type); } $useMCT = get_default($t_args, 'use.mct', true); $initSize = get_default($t_args, 'init.size', 65536); $keepHashes = get_default($t_args, 'mct.keep.hashes', false); $fragmentSize = get_default($t_args, 'fragment.size', 100000); $nullCheck = get_default($t_args, 'null.check', false); grokit_assert(is_bool($useMCT), 'Distinct use.mct argument must be boolean'); grokit_assert(is_integer($initSize), 'Distinct init.size argument must be an integer'); grokit_assert($initSize > 0, 'Distinct init.size argument must be positive'); grokit_assert(is_bool($keepHashes), 'Distinct mct.keep.hashes argument must be boolean'); grokit_assert(is_integer($fragmentSize), 'Distinct fragment.size argument must be integral'); grokit_assert($fragmentSize > 0, 'Distinct fragment.size argumenst must be positive'); $nullable = []; if (is_bool($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = $nullCheck; } } else { if (is_array($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = false; } foreach ($nullCheck as $index => $n) { grokit_assert(is_string($n), 'Distinct null.check has invalid value at position ' . $index); grokit_assert(array_key_exists($n, $nullable), 'Distinct null.check has unknown input ' . $n . ' at position ' . $index); $nullable[$n] = true; } } else { grokit_error('Distinct null.check must be boolean or list of inputs to check for nulls'); } } $keepHashesText = $keepHashes ? 'true' : 'false'; $system_headers = ['cinttypes', 'functional', 'vector']; if ($useMCT) { $system_headers[] = 'mct/hash-set.hpp'; $definedSet = "mct::closed_hash_set<Key, HashKey, std::equal_to<Key>, std::allocator<Key>, {$keepHashesText}>"; } else { $system_headers[] = 'unordered_map'; $definedSet = "std::unordered_set<Key, HashKey, std::equal_to<Key>, std::allocator<Key>>"; } $className = generate_name('Distinct'); ?> class <?php echo $className; ?> { public: // Value being placed into the set. struct Key { <?php foreach ($input as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // for each input ?> // Construct the value by copying all of the attributes. Key(<?php echo const_typed_ref_args($input); ?> ) : <?php $first = true; foreach ($input as $name => $type) { ?> <?php echo $first ? ' ' : ','; ?> <?php echo $name; ?> (<?php echo $name; ?> ) <?php $first = false; } // for each input ?> { } bool operator==(const Key & o ) const { return true <?php echo array_template("&& ({key} == o.{key})", ' ', $input); ?> ; } size_t hash_value() const { uint64_t hash = H_b; <?php foreach ($input as $name => $type) { ?> hash = CongruentHash(Hash(<?php echo $name; ?> ), hash); <?php } // for each input ?> return size_t(hash); } }; // Hashing functor for our value struct HashKey { size_t operator()(const Key& o) const { return o.hash_value(); } }; using Set = <?php echo $definedSet; ?> ; // Iterator object used in multi and fragment result types class Iterator { public: using iterator_t = Set::const_iterator; private: iterator_t start; iterator_t end; public: Iterator() : start(), end() { } Iterator( const iterator_t & _start, const iterator_t & _end ) : start(_start), end(_end) { } Iterator( const Iterator & o ) : start(o.start), end(o.end) { } bool GetNextResult(<?php echo typed_ref_args($output); ?> ) { if( start != end ) { <?php foreach ($output as $name => $type) { ?> <?php echo $name; ?> = start-><?php echo $outputsToInputs[$name]; ?> ; <?php } // for each output ?> start++; return true; } else { return false; } } }; private: // Constants static constexpr size_t INIT_SIZE = <?php echo $initSize; ?> ; static constexpr size_t FRAG_SIZE = <?php echo $fragmentSize; ?> ; // Member variables uint64_t count; // Total # tuples seen Set distinct; // Set of distinct values using IteratorList = std::vector<Iterator>; Iterator multiIterator; // Internal iterator for multi result type IteratorList fragments; // Iterator for fragments public: <?php echo $className; ?> () : count(0), distinct(INIT_SIZE), multiIterator(), fragments() { } ~<?php echo $className; ?> () { } void Reset(void) { count = 0; distinct.clear(); } void AddItem(<?php echo const_typed_ref_args($input); ?> ) { count++; <?php foreach ($nullable as $name => $check) { if ($check) { ?> if( IsNull( <?php echo $name; ?> ) ) return; <?php } // if checking for nulls } // foreach input ?> Key key(<?php echo args($input); ?> ); distinct.insert(key); /* auto it = distinct.find(key); if( it == distinct.end() ) { distinct.insert(key); } */ } void AddState( <?php echo $className; ?> & other ) { for( auto & elem : other.distinct ) { distinct.insert(elem); /* auto it = distinct.find(elem); if( it == distinct.end() ) { distinct.insert(elem); } */ } count += other.count; } // Multi interface void Finalize(void) { multiIterator = Iterator(distinct.cbegin(), distinct.cend()); } bool GetNextResult(<?php echo typed_ref_args($output); ?> ) { return multiIterator.GetNextResult(<?php echo args($output); ?> ); } // Fragment interface int GetNumFragments(void) { fragments.clear(); int nFrag = 0; Iterator::iterator_t prev = distinct.cbegin(); Iterator::iterator_t end = distinct.cend(); Iterator::iterator_t next = prev; while( next != end ) { for( size_t i = 0; next != end && FRAG_SIZE > i; i++ ) { next++; } Iterator nIter(prev, next); fragments.push_back(nIter); prev = next; nFrag++; } return nFrag; } Iterator * Finalize(int fragment) { return new Iterator(fragments[fragment]); } bool GetNextResult(Iterator * it, <?php echo typed_ref_args($output); ?> ) { return it->GetNextResult(<?php echo args($output); ?> ); } // General methods uint64_t get_count() const { return count; } uint64_t get_countDistinct() const { return distinct.size(); } const Set & get_distinct() const { return distinct; } }; typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => ['multi', 'fragment'], 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers, 'properties' => ['resettable']]; }
function parseCaseNoBase(&$source, &$cases, &$default) { // The return type of the tests must be boolean $testRetType = lookupType('bool'); // We don't know the return type yet, it will be defined by the cases. $retType = null; $retSource = null; // Generate a name for the return value of the case. $value_name = generate_name("case_value"); $prep = []; $info = new ExpressionInfo($source, null, $value_name, true); grokit_logic_assert(count($cases) > 0, 'No cases found for case statement at ' . $source); // Handle cases foreach ($cases as $case) { $test = parseExpression(ast_get($case, NodeKey::TEST)); $expr = parseExpression(ast_get($case, NodeKey::EXPR)); $first = false; // Test if the return type of the test is compatible with boolean if (canConvert($test->type(), $testRetType)) { $test = convertExpression($test, $testRetType, $retSource); } else { // Incompatible types grokit_error('Case test expression has return type ' . $test->type() . ' which is incompatible with boolean ' . $test->source()); } // If the return type is not set, set it and continue. // Otherwise, make sure the expression's return type is compatible with // the already set return type. if ($retType === null) { $retType = $expr->type(); $retSource = $expr->source(); $first = true; $info->setType($retType); } else { if (canConvert($expr->type(), $retType)) { // The types are compatible or the same, so make them the same. $expr = convertExpression($expr, $retType, $retSource); } else { // Incompatible types grokit_error('Case return type ' . $expr->type() . ' of expression at ' . $expr->source() . ' incompatible with previous return type ' . $retType . ' defined by expression at ' . $retSource); } } // Absorb the metadata from the test and expression into our info $info->absorbMeta($test); $info->absorbMeta($expr); $myPrep = ''; if (!$first) { $myPrep .= 'else '; } $myPrep .= "if( {$test->value()} ) {$value_name} = {$expr->value()};"; $prep[] = $myPrep; } // Handle default if ($default !== null) { if (canConvert($default->type(), $retType)) { $default = convertExpression($default, $retType, $retSource); } else { // Incompatible types. grokit_error('Case return type ' . $default->type() . ' of default at ' . $default->source() . ' incompatible with previous return type ' . $retType . ' defined by expression at ' . $retSource); } $info->absorbMeta($default); $prep[] = "else {$value_name} = {$default->value()};"; } // Prepend the declaration of the return variable array_unshift($prep, "{$retType} {$value_name};"); // Add all of our stuff as preprocesses $info->addPreprocesses($prep); if ($info->is_const()) { $info->makeConstant(); } return $info; }
function parseSelectionWP($ast, $name, $header) { // Push LibraryManager so we can undo this waypoint's definitions. ob_start(); LibraryManager::Push(); $res = new GenerationInfo(); /*************** PROCESS AST ***************/ $attMap = parseAttributeMap(ast_get($ast, NodeKey::ATT_MAP), $res); $qFilters = ast_get($ast, NodeKey::FILTERS); $qSynth = ast_get($ast, NodeKey::SYNTH); $queries = []; foreach ($qFilters as $query => $qInfo) { $filterAST = ast_get($qInfo, NodeKey::ARGS); $gfAST = ast_get($qInfo, NodeKey::TYPE); if ($gfAST !== null) { $filter = parseNamedExpressionList($filterAST); } else { $filter = parseExpressionList($filterAST); } $gf = null; if ($gfAST !== null) { $gf = parseGF($gfAST); $gf = $gf->apply($filter); } if (ast_has($qInfo, NodeKey::CARGS)) { $cargs = parseLiteralList(ast_get($qInfo, NodeKey::CARGS)); } else { $cargs = []; } $sargs = ast_has($qInfo, NodeKey::SARGS) ? parseStateList(ast_get($qInfo, NodeKey::SARGS), $query) : []; $synths = array(); $synthAST = ast_get($ast, NodeKey::SYNTH); if (ast_has($synthAST, $query)) { $curSynths = ast_get($synthAST, $query); foreach ($curSynths as $curSynthAST) { $expr = parseExpression(ast_get($curSynthAST, NodeKey::EXPR)); $att = parseAttribute(ast_get($curSynthAST, NodeKey::ATT)); if ($att->type() == null) { AttributeManager::setAttributeType($att->name(), $expr->type()); $att = AttributeManager::lookupAttribute($att->name()); } else { if (canConvert($expr, $att->type())) { $expr = convertExpression($expr, $att->type()); } else { grokit_error('Unable to convert expression for synthesized attribute ' . $att->name() . ' from type ' . $expr->type() . ' to type ' . $att->type() . ' ' . $expr->source()); } } $synths[$att->name()] = $expr; } } $info = ['filters' => $filter, 'synths' => $synths, 'gf' => $gf, 'cargs' => $cargs, 'states' => $sargs]; $queries[$query] = $info; $res->addJob($query, $name); $res->absorbInfoList($filter); $res->absorbInfoList($synths); $res->absorbInfoList($cargs); $res->absorbStateList($sargs); if ($gf !== null) { $res->absorbInfo($gf); } } /*************** END PROCESS AST ***************/ // Get this waypoint's headers $myHeaders = $header . PHP_EOL . ob_get_clean(); // Only one file at the moment $filename = $name . '.cc'; $res->addFile($filename, $name); _startFile($filename); SelectionGenerate($name, $queries, $attMap); _endFile($filename, $myHeaders); // Pop LibraryManager again to get rid of this waypoint's declarations LibraryManager::Pop(); return $res; }
public function absorbAttr($attr) { if ($attr->type() == null) { grokit_error("Attribute has no type set: " . print_r($attr, true)); } $this->absorbInfo($attr->type()); }
public function apply($inputs, $outputs, $sargs = []) { try { $input = []; foreach ($inputs as $n => $v) { if (is_datatype($v)) { $input[$n] = $v; } else { $input[$n] = $v->type(); } } return lookupGT($this->name, $this->t_args, $input, $outputs, $sargs, $this->alias); } catch (Exception $e) { grokit_error('Failed to lookup GT ' . $this->name . ' from spec ' . $this->source, $e); } }
/** * GI that generates data in clusters, using a specified distribution for each * cluster. * * This GI requires the following template arguments: * - 'n' or 0 * The number of tuples to generate. Note: this value is per task. * The total number of tuples generated will be n_tasks * n * - 'centers' or 1 * A list of configuration for the centers. * * The following template arguments are optional: * - 'outputs' * If the outputs of the GI are not given implicitly, they can be * specified in this template argument. The number of dimensions will * be determined by the number of outputs. * * All output types must be numeric real types. The default type for * outputs is DOUBLE. * - 'dist.lib' = 'std' * Which library to use for generating distributions. * Valid options are: * - std * - boost * - 'seed' = null * The seed to be used for the random number generator. This seed will * be used to generate the seed for each task, and different runs with * the same seed will produce the same data. * - 'compute.sets' = 1 * The number of sets of tuples to compute at once. * * Each center configuration is a functor with the form: * dist_name(args) * * The following distributions are supported: * { Uniform Distributions } * - uniform(a = 0, b = 1) * * { Normal Distributions } * - normal(mean = 0.0, std_dev = 1.0) [ synonyms: gaussian ] * - inverse_gaussian(mean = 1, shape = 1) [ synonyms: inverse_normal ] * * { Bernoulli Distributions } * - binomial(t = 1, p = 0.5) * - negative_binomial(k = 1, p = 0.5) * * { Poisson Distributions } * - exponential( lambda = 1 ) * - gamma(alpha = 1, beta = 1) [ synonyms: Gamma ] */ function ClusterGen(array $t_args, array $outputs) { $sys_headers = ['array', 'cinttypes']; $user_headers = []; $libraries = []; if (\count($outputs) == 0) { grokit_assert(array_key_exists('outputs', $t_args), 'ClusterGen: No outputs specified'); $count = 0; foreach ($t_args['outputs'] as $type) { if (is_identifier($type)) { $type = lookupType($type); } grokit_assert(is_datatype($type), 'ClusterGen: Non data-type ' . $type . ' given as output'); $name = 'output' . $count++; $outputs[$name] = $type; } } foreach ($outputs as $name => &$type) { if (is_null($type)) { $type = lookupType('base::DOUBLE'); } else { grokit_assert($type->is('real'), 'ClusterGen: Non-real datatype ' . $type . ' given as output'); } } $myOutputs = []; foreach ($outputs as $name => $type) { $myOutputs[$name] = $type; } $tSize = \count($outputs); $seed = get_default($t_args, 'seed', null); if ($seed !== null) { grokit_assert(is_int($seed), 'ClusterGen: Seed must be an integer or null.'); } else { $user_headers[] = 'HashFunctions.h'; } $distLib = get_default($t_args, 'dist.lib', 'std'); $distNS = ''; switch ($distLib) { case 'std': $sys_headers[] = 'random'; $distNS = 'std'; break; case 'boost': $sys_headers[] = 'boost/random.hpp'; $distNS = 'boost::random'; $libraries[] = 'boost_random-mt'; if ($seed === null) { // Need random_device $sys_headers[] = 'boost/random/random_device.hpp'; $libraries[] = 'boost_system-mt'; } break; default: grokit_error('ClusterGen: Unknown RNG library ' . $distLib); } $distRNG = 'mt19937'; $RNGtype = $distNS . '::' . $distRNG; $nTuples = get_first_key($t_args, ['n', '0']); grokit_assert(is_int($nTuples), 'ClusterGen: the number of tuples to be produced must be an integer.'); $centers = get_first_key($t_args, ['centers', 1]); grokit_assert(is_array($centers), 'ClusterGen: centers must be an array of functors'); $handleDist = function ($name, $args, $oType) use($distNS) { $distName = ''; $distArgs = []; switch ($name) { case 'gaussian': case 'normal': $distName = $distNS . '::' . 'normal_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Normal distribution takes at most 2 arguments, ' . \count($args) . ' given'); $mean = get_default($args, ['mean', 0], 0.0); $sigma = get_default($args, ['std_dev', 'sigma', 1], 1.0); grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of binomial distribution must be a real number.'); grokit_assert(is_numeric($sigma), 'ClusterGen: sigma parameter of binomial distribution must be a real number.'); $mean = floatval($mean); $sigma = floatval($sigma); $distArgs = [$mean, $sigma]; break; case 'binomial': $distName = $distNS . '::' . 'binomial_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given'); $t = get_default($args, ['t', 0], 1); $p = get_default($args, ['p', 1], 0.5); grokit_assert(is_int($t), 'ClusterGen: t parameter of binomial distribution must be an integer.'); grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.'); $p = floatval($p); grokit_assert($p >= 0 && $p <= 1, 'ClusterGen: p parameter of binomial distribution must be in the range [0, 1]'); grokit_assert($t >= 0, 'ClusterGen: t parameter of binomial distribution must be in the range [0, +inf)'); $distArgs = [$t, $p]; break; case 'negative_binomial': $distName = $distNS . '::' . 'negative_binomial_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Negative Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given'); $k = get_default($args, ['k', 0], 1); $p = get_default($args, ['p', 1], 0.5); grokit_assert(is_int($k), 'ClusterGen: k parameter of binomial distribution must be an integer.'); grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.'); $p = floatval($p); grokit_assert($p > 0 && $p <= 1, 'ClusterGen: p parameter of negative binomial distribution must be in the range (0, 1]'); grokit_assert($k > 0, 'ClusterGen: k parameter of negative binomial distribution must be in the range (0, +inf)'); $distArgs = [$k, $p]; break; case 'inverse_gaussian': case 'inverse_normal': grokit_assert(\count($args) <= 2, 'ClusterGen: Inverse Gaussian distribution takes at most 2 arguments, ' . \count($args) . ' given'); $mean = get_default($args, ['mean', 0], 1); $shape = get_default($args, ['shape', 1], 1); grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of inverse gaussian distribution must be a real number.'); grokit_assert(is_numeric($shape), 'ClusterGen: shape parameter of inverse gaussian distribution must be a real number.'); $mean = floatval($mean); $shape = floatval($shape); grokit_assert($mean > 0, 'ClusterGen: mean of inverse gaussian distribution must be in range (0, inf)'); grokit_assert($shape > 0, 'ClusterGen: shape of inverse gaussian distribution must be in range (0, inf)'); $gen_args = ['output' => $oType, 'ns' => $distNS]; $distName = strval(lookupResource('datagen::InverseGaussianGen', $gen_args)); $distArgs = [$mean, $shape]; break; case 'uniform': $distName = $distNS . '::' . 'uniform_real_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Uniform distribution takes at most 2 arguments, ' . \count($args) . ' given'); $a = get_default($args, ['a', 0], 0.0); $b = get_default($args, ['b', 1], 1.0); grokit_assert(is_numeric($a), 'ClusterGen: `a` parameter of uniform distribution must be a real number.'); grokit_assert(is_numeric($b), 'ClusterGen: `b` parameter of uniform distribution must be a real number.'); $a = floatval($a); $b = floatval($b); grokit_assert($b >= $a, 'ClusterGen: `b` parameter of uniform distribution must be >= the `a` parameter.'); $distArgs = [$a, $b]; break; case 'exponential': $distName = $distNS . '::' . 'exponential_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 1, 'ClusterGen: Exponential distribution takes at most 1 argument.'); $lambda = get_default($args, ['lambda', 0], 1.0); grokit_assert(is_numeric($lambda), 'ClusterGen: `lambda` parameter of exponential distribution must be a real number.'); $lambda = floatval($lambda); grokit_assert($lambda > 0, 'ClusterGen: `lambda` parameter of exponential distribution must be in range (0, +inf).'); $distArgs = [$lambda]; break; case 'gamma': case 'Gamma': $distName = $distNS . '::' . 'gamma_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Gamma distribution takes at most 2 arguments.'); $alpha = get_default($args, ['alpha', 0], 1.0); $beta = det_default($args, ['beta', 1], 1.0); grokit_assert(is_numeric($alpha), 'ClusterGen: `alpha` parameter of gamma distribution must be a real number.'); grokit_assert(is_numeric($beta), 'ClusterGen: `beta` parameter of gamma distribution must be a real number.'); $alpha = floatval($alpha); $beta = floatval($beta); $distArgs = [$alpha, $beta]; break; default: grokit_error('ClusterGen: Unknown distribution ' . $name . ' given for center'); } return [$distName, $distArgs]; }; $dists = []; $distArgs = []; $count = 0; $oType = ''; $nCenters = 1; reset($outputs); foreach ($centers as $val) { $cluster = $val; if (is_functor($val)) { $cluster = [$val]; } else { if (is_array($val)) { $nCenters = lcm($nCenters, \count($val)); } else { grokit_error('ClusterGen: center descriptions must be functors or list of functors'); } } $curDist = []; $curDistArgs = []; $curDistName = 'distribution' . $count++; $oType = strval(current($outputs)); $iCount = 0; foreach ($cluster as $functor) { grokit_assert(is_functor($functor), 'ClusterGen: center description must be a functor'); $vName = $curDistName . '_' . $iCount++; $ret = $handleDist($functor->name(), $functor->args(), $oType); $curDist[$vName] = $ret[0]; $curDistArgs[$vName] = $ret[1]; } next($outputs); $dists[$curDistName] = $curDist; $distArgs[$curDistName] = $curDistArgs; } // Determine the default number of sets to compute at a time. // We want to generate either $nTuples or 10,000 tuples, depending on which // is less. $defaultSetsTarget = min($nTuples, 10000); $setsToTarget = intval(ceil($defaultSetsTarget / $nCenters)); $computeSets = get_default($t_args, 'compute.sets', $setsToTarget); grokit_assert(is_int($computeSets) && $computeSets > 0, 'ClusterGen: compute.sets must be a positive integer, ' . $computeSets . ' given'); $className = generate_name('ClusterGen'); // For some BIZZARE reason, the $outputs array was getting modified while // traversing over the $dists array. Making a deep copy of the outputs and // then reassigning it seems to fix the issue. $outputs = $myOutputs; ?> class <?php echo $className; ?> { // The number of tuples to produce per task static constexpr size_t N = <?php echo $nTuples; ?> ; static constexpr size_t CacheSize = <?php echo $computeSets * $nCenters; ?> ; // Typedefs typedef std::tuple<<?php echo array_template('{val}', ', ', $outputs); ?> > Tuple; typedef std::array<Tuple, CacheSize> TupleArray; typedef TupleArray::const_iterator TupleIterator; typedef <?php echo $RNGtype; ?> RandGen; // Number of tuples produced. uintmax_t count; // Cache a number of outputs for efficiency TupleArray cache; TupleIterator cacheIt; // Random number generator RandGen rng; // Distributions <?php // This is the section causing issues. foreach ($dists as $name => $list) { foreach ($list as $vName => $type) { ?> <?php echo $type; ?> <?php echo $vName; ?> ; <?php } // foreach distribution } // foreach cluster set ?> // Helper function to generate tuples. void GenerateTuples(void) { <?php $tIndex = 0; foreach ($dists as $name => $list) { $lCenters = \count($list); // $nCenters has been defined to be the LCM of the number of centers in // any column, so $lCenter is guaranteed to divide evenly into // CacheSize ?> for( size_t index = 0; CacheSize > index; index += <?php echo $lCenters; ?> ) { <?php $index = 0; foreach ($list as $vName => $type) { ?> std::get<<?php echo $tIndex; ?> >(cache[index + <?php echo $index; ?> ]) = <?php echo $vName; ?> (rng); <?php $index++; } // foreach value in tuple ?> } <?php $tIndex++; } // foreach distribution ?> cacheIt = cache.cbegin(); } public: // Constructor <?php echo $className; ?> ( GIStreamProxy & _stream ) : cache() , cacheIt() , count(0) , rng() <?php foreach ($dists as $name => $list) { foreach ($list as $vName => $type) { ?> , <?php echo $vName; ?> (<?php echo implode(', ', $distArgs[$name][$vName]); ?> ) <?php } // foreach distribution } // foreach cluster set ?> { <?php if (is_null($seed)) { ?> <?php echo $distNS; ?> ::random_device rd; <?php } // if seed is null ?> RandGen::result_type seed = <?php echo is_null($seed) ? 'rd()' : "CongruentHash({$seed}, _stream.get_id() )"; ?> ; rng.seed(seed); cacheIt = cache.cend(); } // Destructor ~<?php echo $className; ?> (void) { } bool ProduceTuple(<?php echo typed_ref_args($outputs); ?> ) { if( N > count ) { if( cacheIt == cache.cend() ) { GenerateTuples(); } <?php $tIndex = 0; foreach ($outputs as $name => $type) { ?> <?php echo $name; ?> = std::get<<?php echo $tIndex; ?> >(*cacheIt); <?php $tIndex++; } // foreach output ?> ++cacheIt; ++count; return true; } else { return false; } } }; <?php return array('kind' => 'GI', 'name' => $className, 'output' => $outputs, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'libraries' => $libraries); }
function BITSET(array $t_args) { grokit_assert(array_key_exists('values', $t_args), 'No values specified for bitset!'); $values = $t_args['values']; $indicies = array_keys($values); $maxIndex = \max($indicies); $minIndex = \min($indicies); grokit_assert($maxIndex < 64, 'Highest index of bitset must be less than 64'); grokit_assert($minIndex >= 0, 'Indicies of bitset must be >= 0'); $mask = 0; foreach ($values as $index => $name) { $firstChar = substr($name, 0, 1); $arr = str_split($name); $valid = array_reduce($arr, function ($res, $item) { $res = $res && (ctype_alnum($item) || $item == '_'); return $res; }, ctype_alpha($firstChar) || $firstChar == '_'); grokit_assert($valid, "Invalid name ({$name}) given for index ({$index}) in bitset."); $mask = $mask | 1 << $index; } $nBits = floor(pow(2, ceil(log($maxIndex + 1, 2)))); $nBits = \max(8, $nBits); $nHex = $nBits / 4; $storageType = "uint{$nBits}_t"; switch ($nBits) { case 8: $methodIntType = 'base::BYTE'; break; case 16: $methodIntType = 'base::SMALLINT'; break; case 32: $methodIntType = 'base::INT'; break; case 64: $methodIntType = 'base::BIGINT'; break; default: grokit_error('BITSET requires invalid number of bits (' . $nBits . ')'); } $className = generate_name('BITSET'); $methods = []; $constructors = []; $functions = []; $globalContents = ""; ?> class <?php echo $className; ?> { public: typedef <?php echo $storageType; ?> StorageType; private: StorageType bits; static constexpr StorageType _MASK_ = 0x<?php echo sprintf("%0{$nHex}X", $mask); ?> ; public: <?php echo $className; ?> (void); <?php $constructors[] = [[$methodIntType], true]; ?> <?php echo $className; ?> (const StorageType _bits); <?php echo $className; ?> & operator =( const StorageType _bits ); /***** Comparison Opeators *****/ bool operator ==( const <?php echo $className; ?> & o ) const; bool operator !=( const <?php echo $className; ?> & o ) const; bool operator <( const <?php echo $className; ?> & o ) const; bool operator >( const <?php echo $className; ?> & o ) const; bool operator <=( const <?php echo $className; ?> & o ) const; bool operator >=( const <?php echo $className; ?> & o ) const; /***** Conversion *****/ void ToJson( Json::Value & dest ) const; void FromJson( const Json::Value & src ); /***** Accessors *****/ <?php $methods[] = ['Bits', [], $methodIntType, true]; ?> StorageType Bits(void) const; <?php $methods[] = ['IsSet', ['base::BYTE'], 'base::bool', true]; ?> // Whether or not a bit is set by index bool IsSet(unsigned char index) const; // Accessors for each value <?php foreach ($values as $index => $name) { $methods[] = [$name, [], 'base::bool', true]; ?> bool <?php echo $name; ?> (void) const; <?php } // for each value ?> }; inline <?php echo $className; ?> :: <?php echo $className; ?> ( void ) : bits(0) { } inline <?php echo $className; ?> :: <?php echo $className; ?> ( const StorageType _bits ) : bits(_bits) { } inline <?php echo $className; ?> & <?php echo $className; ?> :: operator = (const StorageType _bits) { bits = _bits; return *this; } inline bool <?php echo $className; ?> :: operator == (const <?php echo $className; ?> & o ) const { return bits == o.bits; } inline bool <?php echo $className; ?> :: operator != (const <?php echo $className; ?> & o ) const { return bits != o.bits; } inline bool <?php echo $className; ?> :: operator < (const <?php echo $className; ?> & o ) const { return (bits == (bits & o.bits)) && (bits != o.bits); } inline bool <?php echo $className; ?> :: operator > (const <?php echo $className; ?> & o ) const { return (bits == (bits | o.bits)) && (bits != o.bits); } inline bool <?php echo $className; ?> :: operator <= (const <?php echo $className; ?> & o ) const { return bits == (bits & o.bits); } inline bool <?php echo $className; ?> :: operator >= (const <?php echo $className; ?> & o ) const { return bits == (bits | o.bits); } inline auto <?php echo $className; ?> :: Bits( void ) const -> StorageType { return bits; } inline bool <?php echo $className; ?> ::IsSet(unsigned char index) const { StorageType mask = ((StorageType) 1) << index; //> return bits & mask; } inline void <?php echo $className; ?> :: ToJson( Json::Value & dest ) const { dest = (Json::Int64) bits; } inline void <?php echo $className; ?> :: FromJson( const Json::Value & src ) { bits = (StorageType) src.asInt64(); } <?php foreach ($values as $index => $name) { ?> bool <?php echo $className; ?> ::<?php echo $name; ?> (void) const { return bits & 0x<?php echo sprintf("%X", 1 << $index); ?> ; } <?php } // for each value ?> <?php ob_start(); ?> <?php $functions[] = ['Hash', ['@type'], 'base::BIGINT', true, true]; ?> template<> inline uint64_t Hash(const @type & thing) { return thing.Bits(); } inline void FromString( @type & c, const char * str ) { c = atol(str); } inline int ToString( const @type & c, char * buffer ) { <?php $format = $nBits < 16 ? 'hh' : ($nBits < 32 ? 'h' : ($nBits < 64 ? '' : 'l')); ?> sprintf(buffer, "%<?php echo $format; ?> d", c.Bits()); return strlen(buffer) + 1; } inline void ToJson( const @type & src, Json::Value & dest ) { src.ToJson(dest); } inline void FromJson( const Json::Value & src, @type & dest ) { dest.FromJson(src); } <?php $globalContents .= ob_get_clean(); ?> <?php return ['kind' => 'TYPE', 'name' => $className, 'binary_operators' => ['==', '!=', '>', '<', '>=', '<='], 'system_headers' => ['cinttypes'], 'global_content' => $globalContents, 'complex' => false, 'methods' => $methods, 'constructors' => $constructors, 'functions' => $functions, 'describe_json' => DescribeJson('integer'), 'extras' => ['size.bytes' => $nBits / 8]]; }
public function lookupFunction($name, array $args, array $targs = [], $fuzzy = true, $allowGenerate = true) { // If the name of the function isn't namespaced, assume the base namespace // as long as we are doing fuzzy lookups with names. // If fuzzy is false, don't do this (mostly used for operators) if (!self::IsNamespaced($name) && $fuzzy) { $name = self::JoinNamespace('base', $name); } // Generate the hash for the function call. $hash = self::HashFunctionSignature($name, $args, $targs); $has_targs = count($targs) > 0; // See if there is a cached function matching this hash, and if so, // use it. if ($this->isFunctionDefined($hash)) { return $this->getDefinedFunction($hash); } // If we've gotten this far, the function hasn't been cached (to our // knowledge), so we'll have to generate it. $info = null; // Only look through the registered concrete functions if there are no // template arguments. if (!$has_targs && $this->functionRegistered($name, $args)) { $info = $this->lookupConcreteFunction($name, $args, $hash); } else { if ($allowGenerate) { $info = $this->generateFunction($name, $args, $targs, $hash); } else { $fArgs = implode(', ', $args); grokit_error("Unable to lookup function {$name}({$fArgs})"); } } // Cache the function by both the original hash and the one provided // by the generator (if different) if (!$this->isFunctionDefined($hash)) { $this->functionCache[$hash] = $info; } if (!$this->isFunctionDefined($info->hash())) { $this->functionCache[$info->hash()] = $info; } return $info; }
function GISTGenerate_ProduceResults($wpName, $queries, $attMap) { ?> extern "C" int GISTProduceResultsWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { GISTProduceResultsWD myWork; myWork.swap(workDescription); // Inputs QueryExit& whichOne = myWork.get_whichOne(); GLAState& gist = myWork.get_gist(); int fragmentNo = myWork.get_fragmentNo(); // Outputs Chunk output; <?php cgDeclareQueryIDs($queries); ?> QueryIDSet queriesToRun = whichOne.query; // Start columns for outputs <?php foreach ($queries as $query => $info) { $output = $info['output']; cgConstructColumns($output); } // foreach query ?> // Output bitstring MMappedStorage myStore; Column bitmapOut(myStore); BStringIterator myOutBStringIter (bitmapOut, queriesToRun); PROFILING2_START; int64_t numTuples = 0; #ifdef PER_QUERY_PROFILE <?php foreach ($queries as $query => $info) { ?> int64_t numTuples_<?php echo queryName($query); ?> = 0; <?php } // foreach query ?> #endif // PER_QUERY_PROFILE <?php foreach ($queries as $query => $info) { $gist = $info['gist']; $output = $info['output']; $resType = $gist->result_type(); $resType = get_first_value($resType, ['fragment', 'multi', 'single', 'state']); ?> if( whichOne.query == <?php echo queryName($query); ?> ) { // Extract the GIST state GLAPtr gistPtr; gistPtr.swap(gist); FATALIF(gistPtr.get_glaType() != <?php echo $gist->cHash(); ?> , "GIST producing results is of incorrect type for query <?php echo queryName($query); ?> "); <?php echo $gist; ?> * state_<?php echo queryName($query); ?> = (<?php echo $gist; ?> *) gistPtr.get_glaPtr(); <?php switch ($resType) { case 'single': ?> { state_<?php echo queryName($query); ?> ->GetResult(<?php echo implode(', ', $output); ?> ); <?php break; case 'multi': ?> state_<?php echo queryName($query); ?> ->Finalize(); while (state_<?php echo queryName($query); ?> ->GetNextResult(<?php echo implode(', ', $output); ?> )) { <?php break; case 'fragment': ?> <?php echo $gist; ?> ::Iterator* iterator = state_<?php echo queryName($query); ?> ->Finalize( fragmentNo ); while( state_<?php echo queryName($query); ?> ->GetNextResult( iterator, <?php echo implode(', ', $output); ?> ) ) { <?php break; case 'state': reset($output); $att = current($output); // Output attribute ?> { <?php if ($gist->finalize_as_state()) { ?> state_<?php echo queryName($query); ?> ->FinalizeState(); <?php } ?> <?php echo $att; ?> = <?php echo $att->type(); ?> ( state_<?php echo queryName($query); ?> ); <?php break; default: grokit_error("Do not know how to deal with output type of GLA {$gist}::cGla [{$resType}]"); } // matches switch ?> numTuples++; #ifdef PER_QUERY_PROFILE numTuples_<?php echo queryName($query); ?> ++; #endif // PER_QUERY_PROFILE // Advance the columns myOutBStringIter.Insert(<?php echo queryName($query); ?> ); myOutBStringIter.Advance(); <?php foreach ($output as $att) { ?> <?php echo $att; ?> _Column_Out.Insert(<?php echo $att; ?> ); <?php echo $att; ?> _Column_Out.Advance(); <?php } ?> } // Matches block for column stuff. <?php if ($resType == 'fragment') { ?> // Delete the iterator; delete iterator; <?php } ?> } // Matches whichOne <?php } // matches foreach query ?> myOutBStringIter.Done(); output.SwapBitmap(myOutBStringIter); // Write columns <?php foreach ($queries as $query => $info) { $gist = $info['gist']; $output = $info['output']; ?> if( whichOne.query == <?php echo queryName($query); ?> ) { <?php foreach ($output as $att) { ?> Column col_<?php echo $att; ?> ; <?php echo $att; ?> _Column_Out.Done(col_<?php echo $att; ?> ); output.SwapColumn(col_<?php echo $att; ?> , <?php echo $att->slot(); ?> ); <?php } // foreach output att ?> } // Matches whichOne <?php } // foreach query ?> ChunkContainer tempResult(output); tempResult.swap(result); return WP_FINALIZE; // ProduceResults } <?php }
function FACTOR(array $t_args) { $rawDict = get_first_key($t_args, ['dictionary', 'dict', 0]); // Double the quotes so that we escape them in SQLite, and add backslashes // to them so that we escape them in C++. $dict = addcslashes(\grokit\doubleChars($rawDict, '"'), '"\\'); $cardinality = \grokit\dictionarySize($rawDict); $storageBytes = get_first_key_default($t_args, ['bytes', 1], 2); $cardBytes = $cardinality > 0 ? intval(ceil(log($cardinality, 256))) : 1; $storageBytes = $cardBytes > $storageBytes ? $cardBytes : $storageBytes; switch ($storageBytes) { case 1: $storageType = 'uint8_t'; break; case 2: $storageType = 'uint16_t'; break; case 4: $storageType = 'uint32_t'; break; case 8: $storageType = 'uint64_t'; break; default: grokit_error('Unsupported # of bytes (' . $storageBytes . ') given for FACTOR, only 1, 2, 4, and 8 supported.'); } $className = generate_name('FACTOR_' . ensure_identifier($dict)); $stringType = lookupType('base::STRING'); $globalContent = ''; $methods = []; $constructors = []; $functions = []; ?> class <?php echo $className; ?> { public: typedef <?php echo $storageType; ?> StorageType; static const char * DictionaryName __attribute__((weak)); static const StorageType InvalidID __attribute__((weak)); static const StorageType MaxID __attribute__((weak)); static const Dictionary & globalDictionary __attribute__((weak)); public: /* ----- Members ----- */ // The ID of this Factor; StorageType myID; /* ----- Constructors / Destructors ----- */ // Default constructor <?php echo $className; ?> ( void ); // Constructor from null (same as default) <?php echo $className; ?> ( const GrokitNull & ); // Constructor from C strings / string literals <?php $constructors[] = [['base::STRING_LITERAL'], true]; ?> <?php echo $className; ?> ( const char * ); // Constructor from Grokit STRING type. <?php $constructors[] = [['base::STRING'], true]; ?> <?php echo $className; ?> ( const <?php echo $stringType; ?> & ); // Constructor from storage type <?php echo $className; ?> ( const StorageType ); // Copy constructor and copy assignment // These can both be default <?php echo $className; ?> ( const <?php echo $className; ?> & ) = default; <?php echo $className; ?> & operator =( const <?php echo $className; ?> & ) = default; // Destructor ~<?php echo $className; ?> () { } /* ----- Methods ----- */ // Standard FromString method void FromString( const char * ); // FromString method used when building the dictionaries. void FromString( const char *, Dictionary & ); // Looks up the factor in the global dictionary and returns the string <?php $methods[] = ['ToString', [], 'base::STRING_LITERAL', true]; ?> const char * ToString( void ) const; // Returns the ID of the Factor. StorageType GetID( void ) const; // Returns whether or not the Factor is valid. <?php $methods[] = ['Valid', [], 'base::bool', true]; ?> bool Valid( void ) const; <?php $methods[] = ['Invalid', [], 'base::bool', true]; ?> bool Invalid( void ) const; // Translate the content void Translate( const Dictionary::TranslationTable& ); void toJson( Json::Value & dest ) const; void fromJson( const Json::Value & src ); /* ----- Operators ----- */ // The dictionary keeps track of what the sorted order of the strings is. // These methods are based on the lexicographical ordering of the strings // the factors represent bool operator ==( const <?php echo $className; ?> & ) const; bool operator !=( const <?php echo $className; ?> & ) const; bool operator <( const <?php echo $className; ?> & ) const; bool operator <=( const <?php echo $className; ?> & ) const; bool operator >( const <?php echo $className; ?> & ) const; bool operator >=( const <?php echo $className; ?> & ) const; // Implicit conversion to storage type operator StorageType () const; }; // Static member initialization const <?php echo $className; ?> ::StorageType <?php echo $className; ?> ::InvalidID = std::numeric_limits<StorageType>::max(); const <?php echo $className; ?> ::StorageType <?php echo $className; ?> ::MaxID = <?php echo $className; ?> ::InvalidID - 1; const char * <?php echo $className; ?> ::DictionaryName = "<?php echo $dict; ?> "; const Dictionary & <?php echo $className; ?> ::globalDictionary = Dictionary::GetDictionary(<?php echo $className; ?> ::DictionaryName); /* ----- Constructors ----- */ // Default constructor inline <?php echo $className; ?> :: <?php echo $className; ?> ( void ): myID(InvalidID) {} inline <?php echo $className; ?> :: <?php echo $className; ?> ( const GrokitNull & nullval ): myID(InvalidID) { } // Constructor from C strings / string literals inline <?php echo $className; ?> :: <?php echo $className; ?> ( const char * str ) { FromString(str); } // Constructor from Grokit STRING type inline <?php echo $className; ?> :: <?php echo $className; ?> ( const <?php echo $stringType; ?> & str ) { FromString(str.ToString()); } // Constructor from storage type inline <?php echo $className; ?> :: <?php echo $className; ?> ( const <?php echo $storageType; ?> id ): myID(id) { } /* ----- Methods ----- */ inline auto <?php echo $className; ?> :: GetID(void) const -> StorageType { return myID; } // Standard FromString method inline void <?php echo $className; ?> :: FromString( const char * str ) { // Global dictionary will return InvalidID if not found myID = globalDictionary.Lookup(str, InvalidID ); } // FromString method used when building the dictionaries inline void <?php echo $className; ?> :: FromString( const char * str, Dictionary & localDict ) { // First check if we are in the local dictionary myID = localDict.Lookup(str, InvalidID ); if( myID != InvalidID ) return; // Next check if we are in the global dictionary myID = globalDictionary.Lookup(str, InvalidID ); if( myID != InvalidID ) return; // Add a new entry to the local dictionary. // The dictionary should throw an error if the new ID is greater than // MaxID. myID = localDict.Insert( str, MaxID ); } // Looks up the factor in the global dictionary and returns the string inline const char * <?php echo $className; ?> :: ToString( void ) const { return globalDictionary.Dereference(myID); } // Determine whether or not the factor is valid inline bool <?php echo $className; ?> :: Valid( void ) const { return myID != InvalidID; } inline bool <?php echo $className; ?> :: Invalid(void) const { return myID == InvalidID; } // Translate the content inline void <?php echo $className; ?> :: Translate( const Dictionary::TranslationTable & tbl ) { auto it = tbl.find(myID); if( it != tbl.end() ) { myID = it->second; } } inline void <?php echo $className; ?> :: toJson( Json::Value & dest ) const { dest = (Json::Int64) myID; } inline void <?php echo $className; ?> :: fromJson( const Json::Value & src ) { myID = (StorageType) src.asInt64(); } /* ----- Operators ----- */ inline bool <?php echo $className; ?> :: operator ==( const <?php echo $className; ?> & o ) const { return myID == o.myID; } inline bool <?php echo $className; ?> :: operator !=( const <?php echo $className; ?> & o ) const { return myID != o.myID; } inline bool <?php echo $className; ?> :: operator <( const <?php echo $className; ?> & o ) const { return Valid() && o.Valid() && globalDictionary.Compare(myID, o.myID) < 0; } inline bool <?php echo $className; ?> :: operator <=( const <?php echo $className; ?> & o ) const { return Valid() && o.Valid() && globalDictionary.Compare(myID, o.myID) <= 0; } inline bool <?php echo $className; ?> :: operator >( const <?php echo $className; ?> & o ) const { return Valid() && o.Valid() && globalDictionary.Compare(myID, o.myID) > 0; } inline bool <?php echo $className; ?> :: operator >=( const <?php echo $className; ?> & o ) const { return Valid() && o.Valid() && globalDictionary.Compare(myID, o.myID) >= 0; } // Implicit conversion to storage type inline <?php echo $className; ?> :: operator StorageType() const { return myID; } <?php ob_start(); // Global functions ?> inline void FromString( @type & f, const char * str ) { f.FromString(str); } inline void FromString( @type & f, const char * str, Dictionary & localDict ) { f.FromString(str, localDict); } inline int ToString( const @type & f, char * buffer ) { const char * str = f.ToString(); strcpy(buffer, str); return strlen(buffer) + 1; } <?php $functions[] = ['Hash', ['@type'], 'base::BIGINT', true, true]; ?> template<> inline uint64_t Hash( const @type & x ) { return x.GetID(); } inline void ToJson( const @type & src, Json::Value & dest ) { src.toJson(dest); } inline void FromJson( const Json::Value & src, @type & dest ) { dest.fromJson(src); } <?php $functions[] = ['IsNull', ['@type'], 'BASE::BOOL', true, true]; ?> inline bool IsNull( const @type f ) { return f.Invalid(); } <?php $globalContent .= ob_get_clean(); ?> <?php // Function to get the dictionary at runtime. $describeInfoJson = function ($var, $myType) { ?> <?php echo $var; ?> ["levels"] = Json::Value(Json::arrayValue); for( auto it = <?php echo $myType; ?> ::globalDictionary.cbegin(); it != <?php echo $myType; ?> ::globalDictionary.cend(); it++ ) { <?php echo $var; ?> ["levels"][it->first] = it->second; } <?php }; return ['kind' => 'TYPE', 'name' => $className, 'dictionary' => $dict, 'system_headers' => ['limits', 'cstring', 'cinttypes'], 'user_headers' => ['Dictionary.h', 'DictionaryManager.h', 'ColumnIteratorDict.h'], 'properties' => ['categorical'], 'extras' => ['cardinality' => $cardinality, 'size.bytes' => $storageBytes], 'binary_operators' => ['==', '!=', '<', '>', '<=', '>='], 'global_content' => $globalContent, 'complex' => 'ColumnIteratorDict< @type >', 'methods' => $methods, 'constructors' => $constructors, 'functions' => $functions, 'describe_json' => DescribeJson('factor', $describeInfoJson)]; }
public function lookupMethod($name, array $args) { grokit_assert(array_key_exists($name, $this->methods), 'No method registered with the name ' . $this->value() . '->' . $name); $candidates =& $this->methods[$name]; // Matches is a mapping from the score to the candidate MethodInfo // We keep track of all possible matches in case we have multiple possibilities, // and we may in the future be able to print nice error messages. $matches = []; $nMatches = 0; $fStr = $this->value() . '->' . $name . '(' . implode(', ', $args) . ')'; //fwrite(STDERR, 'Looking up ' . $fStr . PHP_EOL); foreach ($candidates as $cand) { $rating = $cand->compatibility($args); //fwrite(STDERR, 'Candidate: ' . $cand . ' Rating: ' . $rating . PHP_EOL ); if ($rating >= 0) { if (!array_key_exists($rating, $matches)) { $matches[$rating] = []; } $matches[$rating][] = $cand; $nMatches += 1; } } grokit_logic_assert(!array_key_exists(0, $matches) || count($matches[0]) == 1, 'Got more than one exact match for method ' . $fStr); //fwrite(STDERR, 'Matches: ' . PHP_EOL); //fwrite(STDERR, print_r($matches, true) . PHP_EOL); //fwrite(STDERR, 'Defined functions: ' . print_r($this->functionCache, true) . PHP_EOL ); //fwrite(STDERR, 'Registered Functions: ' . print_r($this->registeredFunctions, true) . PHP_EOL ); // If we have an exact match, use that. if (array_key_exists(0, $matches) && count($matches[0]) == 1) { $match = $matches[0][0]; } else { if ($nMatches == 1) { // If there were no exact matches, but there was only one match, use that. $match = array_pop($matches); $match = array_pop($match); } else { if ($nMatches == 0) { grokit_error('Failed to lookup method ' . $fStr . ', no possible matches.'); } else { // There were multiple possible matches. // Aggregate the strings representing the possible matches and // then put out an error. $matchz = []; foreach ($matches as $matchList) { foreach ($matchList as $match) { $matchz[] = $match; } } $matchStr = implode(PHP_EOL, $matchz); grokit_error('Failed to lookup method ' . $fStr . ', multiple possible' . ' matches:' . PHP_EOL . $matchStr); } } } return $match; }
function OrderBy(array $t_args, array $inputs, array $outputs) { if (\count($inputs) == 0) { grokit_assert(array_key_exists('input', $t_args), 'No inputs given for OrderBy'); $inputs = $t_args['input']; foreach ($t_args['input'] as $name => &$type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Invalid type given for input ' . $name); } } grokit_assert(array_key_exists('order', $t_args), 'No ordering attributes given for OrderBy'); $ordering = $t_args['order']; $ascOpts = ['ASC', 'ASCENDING', '+', '>']; $descOpts = ['DESC', 'DESCENDING', 'DES', 'DSC', '-', '<']; $ascending = []; foreach ($ordering as $name => $order) { grokit_assert(array_key_exists($name, $inputs), 'Ordering attribute ' . $name . ' not present in input'); if (in_array_icase($order, $ascOpts)) { $ascending[$name] = true; } else { if (in_array_icase($order, $descOpts)) { $ascending[$name] = false; } else { grokit_error("Unknown ordering " . $order . " given for attribute " . $name); } } } $rankAtt = get_default($t_args, 'rank', null); grokit_assert(is_null($rankAtt) || is_attribute($rankAtt), 'Rank argument should be null or an attribute'); grokit_assert(is_null($rankAtt) || array_key_exists($rankAtt->name(), $outputs), 'Rank attribute does not exist in outputs'); if (!is_null($rankAtt) && is_null($outputs[$rankAtt->name()])) { $outputs[$rankAtt->name()] = lookupType('base::BIGINT'); } $outputPassthroughAtts = []; foreach ($outputs as $name => $type) { if (is_null($rankAtt) || $rankAtt->name() != $name) { $outputPassthroughAtts[$name] = $type; } } $outToIn = []; $nInputs = \count($inputs); reset($inputs); reset($outputPassthroughAtts); for ($i = 0; $i < $nInputs; $i++) { $outName = key($outputPassthroughAtts); $inName = key($inputs); $outToIn[$outName] = $inName; // Unify types $outputs[$outName] = $inputs[$inName]; $outputPassthroughAtts[$outName] = $inputs[$inName]; next($inputs); next($outputPassthroughAtts); } $orderAtts = []; $extraAtts = []; foreach ($inputs as $name => $type) { if (array_key_exists($name, $ordering)) { $orderAtts[$name] = $type; } else { $extraAtts[$name] = $type; } } // Give 2^32 as the default, which should be effectively infinite $limitDefault = pow(2, 32); $limit = get_default($t_args, 'limit', $limitDefault); $limit = $limit == 0 ? $limitDefault : $limit; grokit_assert($limit > 0, 'The OrderBy limit must be a positive integer'); $className = generate_name('OrderBy'); $debug = get_default($t_args, 'debug', 0); ?> class <?php echo $className; ?> { struct Tuple { <?php foreach ($inputs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } ?> Tuple( void ) = default; Tuple( const Tuple & other ) = default; Tuple( <?php echo array_template('const {val} & _{key}', ', ', $inputs); ?> ): <?php echo array_template('{key}(_{key})', ', ', $inputs); ?> { } Tuple & operator = (const Tuple & other ) = default; bool operator > ( const Tuple & other ) const { <?php foreach ($orderAtts as $name => $type) { $op1 = $ascending[$name] ? '<' : '>'; $op2 = !$ascending[$name] ? '<' : '>'; ?> if( <?php echo $name; ?> <?php echo $op1; ?> other.<?php echo $name; ?> ) return true; else if( <?php echo $name; ?> <?php echo $op2; ?> other.<?php echo $name; ?> ) return false; <?php } ?> return false; } bool operator < ( const Tuple& other ) const { return other > *this; } bool operator <= (const Tuple & other ) const { return ! (*this > other ); } bool operator >= (const Tuple & other ) const { return !( other > *this ); } <?php if ($debug > 0) { ?> std::string toString(void) const { std::ostringstream ss; ss << "( "; // > <?php $first = true; foreach ($inputs as $name => $type) { if ($first) { $first = false; } else { echo ' ss << ", ";' . PHP_EOL; } ?> ss << <?php echo $name; ?> ; // > <?php } // foreach input ?> ss << " )"; // > return ss.str(); } <?php } // debug > 0 ?> }; // struct Tuple typedef std::vector<Tuple> TupleVector; public: class Iterator { public: typedef TupleVector::const_iterator iter_type; private: iter_type begin; iter_type curr; iter_type end; public: Iterator(void) = default; Iterator( const iter_type & _begin, const iter_type & _end ) : begin(_begin), curr(_begin), end(_end) { } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { if( curr != end ) { <?php foreach ($outputPassthroughAtts as $name => $type) { ?> <?php echo $name; ?> = curr-><?php echo $outToIn[$name]; ?> ; <?php } if (!is_null($rankAtt)) { ?> <?php echo $rankAtt; ?> = (curr - begin) + 1; <?php } // if we need to output the rank ?> curr++; return true; } else { return false; } } }; private: uintmax_t __count; // number of tuples covered // K, as in Top-K static constexpr size_t K = <?php echo $limit; ?> ; TupleVector tuples; // Iterator for multi output type Iterator multiIterator; typedef std::greater<Tuple> TupleCompare; // Function to force sorting so that GetNext gets the tuples in order. void Sort(void) { TupleCompare comp; // If tuples doesn't contain at least K elements, it was never made into // a heap in the first place, so sort it normally. if( tuples.size() >= K ) { std::sort_heap(tuples.begin(), tuples.end(), comp); } else { std::sort(tuples.begin(), tuples.end(), comp); } } // Internal function to add a tuple to the heap void AddTupleInternal(Tuple & t ) { <?php if ($debug >= 1) { ?> { std::ostringstream ss; ss << "T ACK: " << t.toString() << std::endl; // > std::cerr << ss.str(); // > } <?php } ?> TupleCompare comp; if( tuples.size() >= K ) { <?php if ($debug >= 1) { ?> { std::ostringstream ss; ss << "T REP: " << tuples.front().toString() << std::endl; // > std::cerr << ss.str(); // > } <?php } ?> std::pop_heap(tuples.begin(), tuples.end(), comp); tuples.pop_back(); tuples.push_back(t); std::push_heap(tuples.begin(), tuples.end(), comp); } else { tuples.push_back(t); if( tuples.size() == K ) { std::make_heap(tuples.begin(), tuples.end(), comp); } } } public: <?php echo $className; ?> () : __count(0), tuples(), multiIterator() { } ~<?php echo $className; ?> () { } void AddItem(<?php echo const_typed_ref_args($inputs); ?> ) { __count++; Tuple t(<?php echo args($inputs); ?> ); <?php if ($debug >= 2) { ?> { std::ostringstream ss; ss << "T NEW: " << t.toString() << std::endl; // > std::cerr << ss.str(); // > } <?php } ?> if( tuples.size() == K && !(t > tuples.front()) ) return; AddTupleInternal(t); } void AddState( <?php echo $className; ?> & other ) { __count += other.__count; for( Tuple & el : other.tuples ) { if( tuples.size() < K /*>*/ || el > tuples.front() ) { AddTupleInternal(el); } } } void Finalize() { Sort(); Iterator::iter_type begin = tuples.cbegin(); Iterator::iter_type end = tuples.cend(); multiIterator = Iterator(begin, end); <?php if ($debug >= 1) { ?> std::ostringstream ss; ss << "[ "; //> bool first = true; for( auto el : tuples ) { if( first ) first = false; else ss << ", "; //>> ss << el.toString(); //>> } ss << " ]" << std::endl; // > std::cerr << ss.str(); //>> <?php } ?> } bool GetNextResult( <?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } }; <?php $system_headers = ['vector', 'algorithm', 'cinttypes']; if ($debug > 0) { $system_headers = array_merge($system_headers, ['iostream', 'sstream', 'string']); } return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'system_headers' => $system_headers); }
private static function printJson($js, $var) { if (is_null($js)) { ?> <?php echo $var; ?> = Json::Value(Json::nullValue); <?php } else { if (is_string($js)) { ?> <?php echo $var; ?> = "<?php echo $js; ?> "; <?php } else { if (is_numeric($js)) { ?> <?php echo $var; ?> = <?php echo $js; ?> ; <?php } else { if (is_array($js)) { // If it's an associative array (i.e. contains non-natural-number indicies) // treat it as an object instead $natural_indicies = true; foreach ($js as $ind => $val) { if (!is_int($ind) || $ind < 0) { $natural_indicies = false; } } if ($natural_indicies) { self::printArray($js, $var); } else { self::printObject($js, $var); } } else { if (is_object($js)) { self::printObject($js, $var); } else { grokit_error("Cannot translate value of type " . gettype($js) . " to JSON"); } } } } } }
/** * A GLA that estimates the cardinality of a dataset using a bloom filter of * a configurable size. * * Note: This filter has very high performance, so long as all of the states * fit into cache, preferably L1 or L2, but L3 is also fine. Once the states * are large enough that all of them cannot fit inside L3 cache at the same * time, performance takes a nose dive (4x loss minimum). */ function BloomFilter(array $t_args, array $input, array $output) { grokit_assert(\count($output) == 1, 'BloomFilter produces only 1 value, ' . \count($output) . ' outputs given.'); $outputName = array_keys($output)[0]; $outputType = array_get_index($output, 0); if (is_null($outputType)) { $outputType = lookupType('BASE::BIGINT'); } $output[$outputName] = $outputType; grokit_assert($outputType->is('numeric'), 'BloomFilter output must be numeric!'); $exp = get_first_key_default($t_args, ['exponent'], 16); grokit_assert(is_integer($exp), 'BloomFilter exponent must be an integer.'); grokit_assert($exp > 0 && $exp < 64, 'BloomFilter exponent must be in range (0,64), ' . $exp . ' given.'); $nullCheck = get_default($t_args, 'null.check', false); $nullable = []; if (is_bool($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = $nullCheck; } } else { if (is_array($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = false; } foreach ($nullCheck as $index => $n) { grokit_assert(is_string($n), 'BloomFilster null.check has invalid value at position ' . $index); grokit_assert(array_key_exists($n, $nullable), 'BloomFilster null.check has unknown input ' . $n . ' at position ' . $index); $nullable[$n] = true; } } else { grokit_error('BloomFilster null.check must be boolean or list of inputs to check for nulls'); } } $debug = get_default($t_args, 'debug', 0); $bits = pow(2, $exp); $bytes = ceil($bits / 8.0); // Calculate the number of bits set for every possible value of a byte $nBits = []; for ($i = 0; $i < 256; $i++) { $n = $i; $b = 0; while ($n > 0) { $n &= $n - 1; $b++; } $nBits[$i] = $b; } $className = generate_name('BloomFilter'); ?> class <?php echo $className; ?> { static constexpr size_t BITS = <?php echo $bits; ?> ; static constexpr size_t BYTES = <?php echo $bytes; ?> ; static constexpr size_t MASK = BITS - 1; static constexpr std::array<unsigned char, 256> BITS_SET = { <?php echo implode(', ', $nBits); ?> }; static constexpr std::array<unsigned char, 8> BIT_MASKS = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }; size_t count; std::array<unsigned char, BYTES> set; //unsigned char set[BYTES]; //std::bitset<BITS> set; public: <?php echo $className; ?> () : count(0), set() { for( size_t i = 0; i < BYTES; i++ ) { //> set[i] = 0; } } ~<?php echo $className; ?> () { } void AddItem( <?php echo const_typed_ref_args($input); ?> ) { count++; <?php foreach ($nullable as $name => $check) { if ($check) { ?> if( IsNull( <?php echo $name; ?> ) ) return; <?php } // if checking for nulls } // foreach input ?> size_t hashVal = H_b; <?php foreach ($input as $name => $type) { ?> hashVal = CongruentHash(Hash(<?php echo $name; ?> ), hashVal); <?php } // foreach input ?> hashVal = hashVal & MASK; const size_t bucket = hashVal >> 3; const size_t bucket_index = hashVal & 0x07; const unsigned char mask = BIT_MASKS[bucket_index]; set[bucket] |= mask; } void AddState( <?php echo $className; ?> & o ) { count += o.count; for( size_t i = 0; i < BYTES; i++ ) { //> set[i] |= o.set[i]; } } void GetResult( <?php echo $outputType; ?> & <?php echo $outputName; ?> ) { size_t nBitsSet = 0; constexpr long double bits = static_cast<long double>(BITS); for( size_t i = 0; i < BYTES; i++ ) { //> nBitsSet += BITS_SET[set[i]]; } long double bitsSet = static_cast<long double>(nBitsSet); if( nBitsSet == BITS ) { // All Bits set, just give the cardinality as an estimate. <?php echo $outputName; ?> = count; } else { long double cardinality = - bits * std::log(1 - (bitsSet / bits)); <?php echo $outputName; ?> = cardinality; } <?php if ($debug > 0) { ?> std::cout << "BloomFilter:" << " bitsSet(" << bitsSet << ")" << " bits(" << bits << ")" << " cardinality(" << cardinality << ")" << " output(" << <?php echo $outputName; ?> << ")" << std::endl;; //> <?php } // if debugging enabled ?> } }; // Storage for static members constexpr std::array<unsigned char, 256> <?php echo $className; ?> ::BITS_SET; constexpr std::array<unsigned char, 8> <?php echo $className; ?> ::BIT_MASKS; <?php $system_headers = ['cmath', 'array']; if ($debug > 0) { $system_headers[] = 'iostream'; } return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => 'single', 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers]; }
function CATEGORY(array $t_args) { if (array_key_exists('dict', $t_args)) { $values = $t_args['dict']; $maxID = 0; foreach ($values as $id => $val) { $maxID = \max($id, $maxID); } } else { $old_vals = get_first_key($t_args, ['values', 0]); $startAt = get_first_key_default($t_args, ['start.at'], 0); $values = []; $maxID = $startAt; foreach ($old_vals as $ind => $val) { $values[$maxID++] = $val; } } $cardinality = \count($values); // Add 1 to the cardinality for the invalid id $storageTypeBits = ceil(log($maxID + 1, 2)); if ($storageTypeBits > 64) { // This should never happen. PHP would explode processing 2^64 values. grokit_error("Unable to store {$cardinality} values within 64 bits."); } else { if ($storageTypeBits > 32) { $storageType = 'uint64_t'; $storageBytes = 8; } else { if ($storageTypeBits > 16) { $storageType = 'uint32_t'; $storageBytes = 4; } else { if ($storageTypeBits > 8) { $storageType = 'uint16_t'; $storageBytes = 2; } else { $storageType = 'uint8_t'; $storageBytes = 1; } } } } $className = generate_name('CATEGORY'); $stringType = lookupType('base::STRING'); $methods = []; $constructors = []; $functions = []; ?> class <?php echo $className; ?> { public: typedef <?php echo $storageType; ?> StorageType; typedef std::unordered_map<StorageType, std::string> IDToNameMap; typedef std::unordered_map<std::string, StorageType> NameToIDMap; static const StorageType InvalidID __attribute__((weak)); private: static const IDToNameMap idToName __attribute__((weak)); static const NameToIDMap nameToID __attribute__((weak)); // The ID of this categorical variable StorageType myID; public: /* ----- Constructors / Destructor ----- */ <?php echo $className; ?> ( void ); <?php $constructors[] = [['base::STRING_LITERAL'], true]; ?> <?php echo $className; ?> ( const char * ); <?php $constructors[] = [['base::STRING'], true]; ?> <?php echo $className; ?> ( const <?php echo $stringType; ?> & ); <?php echo $className; ?> ( const <?php echo $storageType; ?> ); <?php echo $className; ?> ( const <?php echo $className; ?> & ); <?php $constructors[] = [['BASE::NULL'], true]; ?> <?php echo $className; ?> ( const GrokitNull & ); <?php echo $className; ?> & operator =( const <?php echo $className; ?> & ) = default; ~<?php echo $className; ?> (void) {} /* ----- Methods ----- */ void FromString( const char * ); <?php $methods[] = ['ToString', [], 'base::STRING_LITERAL', true]; ?> const char * ToString( void ) const; StorageType GetID( void ) const; void SetID( StorageType id ); // Determines whether or not the category is valid. <?php $methods[] = ['Invalid', [], 'base::bool', true]; ?> bool Invalid(void) const; <?php $methods[] = ['Valid', [], 'base::bool', true]; ?> bool Valid(void) const; /* ----- Operators ----- */ bool operator ==( const <?php echo $className; ?> & ) const; bool operator !=( const <?php echo $className; ?> & ) const; bool operator <( const <?php echo $className; ?> & ) const; bool operator <=( const <?php echo $className; ?> & ) const; bool operator >( const <?php echo $className; ?> & ) const; bool operator >=( const <?php echo $className; ?> & ) const; // Implicit conversion to storage type operator <?php echo $storageType; ?> () const; // To/From Json void toJson( Json::Value & dest ) const; void fromJson( const Json::Value & src ); }; /* ----- Constructors ----- */ inline <?php echo $className; ?> :: <?php echo $className; ?> ( void ) : myID(InvalidID) { } inline <?php echo $className; ?> :: <?php echo $className; ?> ( const char * str ) { FromString(str); } inline <?php echo $className; ?> :: <?php echo $className; ?> ( const <?php echo $stringType; ?> & str ) { FromString(str.ToString()); } inline <?php echo $className; ?> :: <?php echo $className; ?> ( const <?php echo $storageType; ?> val ) : myID(val) { } inline <?php echo $className; ?> :: <?php echo $className; ?> ( const <?php echo $className; ?> & other ) : myID(other.myID) { } inline <?php echo $className; ?> :: <?php echo $className; ?> ( const GrokitNull & nullval ) : myID(InvalidID) { } /* ----- Methods ----- */ inline void <?php echo $className; ?> :: FromString( const char * str ) { auto it = nameToID.find(str); if( it != nameToID.end() ) { myID = it->second; } else { myID = InvalidID; } } inline const char * <?php echo $className; ?> :: ToString( void ) const { auto it = idToName.find(myID); if( it != idToName.end() ) { return it->second.c_str(); } else { return "NULL"; } } inline auto <?php echo $className; ?> :: GetID( void ) const -> StorageType { return myID; } inline void <?php echo $className; ?> :: SetID( StorageType id ) { myID = id; } inline bool <?php echo $className; ?> :: Valid(void) const { return idToName.count(myID) > 0; } inline bool <?php echo $className; ?> :: Invalid(void) const { return ! Valid(); } /* ----- Operators ----- */ inline bool <?php echo $className; ?> :: operator ==( const <?php echo $className; ?> & other ) const { return myID == other.myID; } inline bool <?php echo $className; ?> :: operator !=( const <?php echo $className; ?> & other ) const { return myID != other.myID; } inline bool <?php echo $className; ?> :: operator <( const <?php echo $className; ?> & other ) const { return myID < other.myID; } inline bool <?php echo $className; ?> :: operator >( const <?php echo $className; ?> & other ) const { return myID > other.myID; } inline bool <?php echo $className; ?> :: operator <=( const <?php echo $className; ?> & other ) const { return myID <= other.myID; } inline bool <?php echo $className; ?> :: operator >=( const <?php echo $className; ?> & other ) const { return myID >= other.myID; } // To/From Json inline void <?php echo $className; ?> :: toJson( Json::Value & dest ) const { dest = (Json::Int64) myID; } inline void <?php echo $className; ?> :: fromJson( const Json::Value & src ) { myID = (StorageType) src.asInt64(); } inline <?php echo $className; ?> :: operator <?php echo $storageType; ?> () const { return myID; } <?php ob_start(); $functions[] = ['Hash', ['@type'], 'base::BIGINT', true, true]; ?> template<> inline uint64_t Hash(const @type & thing) { return thing.GetID(); } inline void FromString( @type & c, const char * str ) { c.FromString(str); } inline int ToString( const @type & c, char * buffer ) { const char * str = c.ToString(); strcpy( buffer, str); int len = strlen(buffer); return len + 1; } inline void ToJson( const @type & src, Json::Value & dest ) { src.toJson(dest); } inline void FromJson( const Json::Value & src, @type & dest ) { dest.fromJson(src); } <?php $functions[] = ['IsNull', ['@type'], 'BASE::BOOL', true, true]; ?> inline bool IsNull( const @type c ) { return c.Invalid(); } <?php $globalContents = ob_get_clean(); ?> // Initialize static values const <?php echo $className; ?> ::IDToNameMap <?php echo $className; ?> :: idToName = { <?php echo array_template('{{key},"{val}"}', ',', $values); ?> }; const <?php echo $className; ?> ::NameToIDMap <?php echo $className; ?> :: nameToID = { <?php echo array_template('{"{val}",{key}}', ',', $values); ?> }; const <?php echo $className; ?> ::StorageType <?php echo $className; ?> :: InvalidID = std::numeric_limits<<?php echo $className; ?> ::StorageType>::max(); <?php return ['kind' => 'TYPE', 'name' => $className, 'properties' => ['categorical'], 'extras' => ['cardinality' => $cardinality, 'size.bytes' => $storageBytes], 'binary_operators' => ['==', '!=', '<', '>', '<=', '>='], 'system_headers' => ['cinttypes', 'unordered_map', 'string', 'cstring', 'limits'], 'global_content' => $globalContents, 'complex' => false, 'methods' => $methods, 'constructors' => $constructors, 'functions' => $functions, 'describe_json' => DescribeJson('factor', DescribeJsonStatic(['levels' => $values]))]; }
function GLAGenerate_Finalize($wpName, $queries, $attMap) { ?> #ifndef PER_QUERY_PROFILE #define PER_QUERY_PROFILE #endif //+{"kind":"WPF", "name":"Finalize (Chunk)", "action":"start"} extern "C" int GLAFinalizeWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { GLAFinalizeWD myWork; myWork.swap (workDescription); QueryExit whichOne = myWork.get_whichQueryExit(); GLAState& glaState = myWork.get_glaState(); <?php cgDeclareQueryIDs($queries); ?> // Set up the output chunk Chunk output; QueryIDSet queriesToRun = whichOne.query; <?php // Extract the state for the query foreach ($queries as $query => $info) { $gla = $info['gla']; ?> // Do query <?php echo queryName($query); ?> : <?php echo $gla; ?> * state_<?php echo queryName($query); ?> = NULL; if( whichOne.query == <?php echo queryName($query); ?> ) { // Look for the state of query <?php echo queryName($query); ?> . GLAPtr state; state.swap(glaState); FATALIF( state.get_glaType() != <?php echo $gla->cHash(); ?> , "Got GLA of unexpected type"); state_<?php echo queryName($query); ?> = (<?php echo $gla; ?> *) state.get_glaPtr(); } <?php } // foreach query ?> // Start columns for all possible outputs. <?php foreach ($queries as $query => $info) { $output = $info['output']; cgConstructColumns($output); } // foreach query ?> // This is the output bitstring MMappedStorage myStore; Column bitmapOut( myStore ); BStringIterator myOutBStringIter( bitmapOut, queriesToRun ); PROFILING2_START; int64_t numTuples = 0; #ifdef PER_QUERY_PROFILE <?php foreach ($queries as $query => $info) { ?> int64_t numTuples_<?php echo queryName($query); ?> = 0; <?php } // foreach query ?> #endif // PER_QUERY_PROFILE // Extract results <?php foreach ($queries as $query => $info) { $gla = $info['gla']; $output = $info['output']; // If this is true, we return the GLA as a const state. // Otherwise, we pack the results into a chunk. $retState = $info['retState']; $stateName = 'state_' . queryName($query); ?> if( whichOne.query == <?php echo queryName($query); ?> ) { <?php if ($retState) { ?> FATAL( "Called normal finalize for query that was supposed to be returned as a const state" ); <?php } else { $resType = $gla->result_type(); $resType = get_first_value($resType, ['fragment', 'multi', 'single', 'state']); if ($resType == 'single') { ?> <?php echo $stateName; ?> ->GetResult(<?php echo implode(', ', $output); ?> ); numTuples++; #ifdef PER_QUERY_PROFILE numTuples_<?php echo queryName($query); ?> ++; #endif // PER_QUERY_PROFILE myOutBStringIter.Insert(<?php echo queryName($query); ?> ); myOutBStringIter.Advance(); <?php cgInsertAttributesList($output, '_Column_Out', 2); } elseif ($resType == 'multi') { ?> <?php echo $stateName; ?> ->Finalize(); while( <?php echo $stateName; ?> ->GetNextResult(<?php echo implode(', ', $output); ?> ) ) { numTuples++; #ifdef PER_QUERY_PROFILE numTuples_<?php echo queryName($query); ?> ++; #endif // PER_QUERY_PROFILE myOutBStringIter.Insert(<?php echo queryName($query); ?> ); myOutBStringIter.Advance(); <?php cgInsertAttributesList($output, '_Column_Out', 3); ?> } <?php } elseif ($resType == 'fragment') { ?> int fragment = myWork.get_fragmentNo(); <?php echo $gla; ?> _Iterator * iterator = <?php echo $stateName; ?> ->Finalize(fragment); while( <?php echo $stateName; ?> ->GetNextResult(iterator, <?php echo implode(', ', $output); ?> ) ) { numTuples++; #ifdef PER_QUERY_PROFILE numTuples_<?php echo queryName($query); ?> ++; #endif // PER_QUERY_PROFILE myOutBStringIter.Insert(<?php echo queryName($query); ?> ); myOutBStringIter.Advance(); <?php cgInsertAttributesList($output, '_Column_Out', 3); ?> } delete iterator; <?php } elseif ($resType == 'state') { reset($output); $att = current($output); // Output attribute if ($gla->finalize_as_state()) { ?> <?php echo $stateName; ?> ->FinalizeState(); <?php } // if GLA finalized as state ?> <?php echo $att; ?> = <?php echo $att->type(); ?> ( <?php echo $stateName; ?> ); numTuples++; #ifdef PER_QUERY_PROFILE numTuples_<?php echo queryName($query); ?> ++; #endif // PER_QUERY_PROFILE myOutBStringIter.Insert(<?php echo queryName($query); ?> ); myOutBStringIter.Advance(); <?php cgInsertAttributesList($output, '_Column_Out', 2); } else { grokit_error('GLA ' . $gla . ' has no known result type: [' . implode(',', $resType) . ']'); } // switch GLA result type } // else GLA produces into a chunk ?> myOutBStringIter.Done(); output.SwapBitmap(myOutBStringIter); // Write columns <?php foreach ($output as $att) { ?> Column col_<?php echo $att; ?> ; <?php echo $att; ?> _Column_Out.Done(col_<?php echo $att; ?> ); output.SwapColumn( col_<?php echo $att; ?> , <?php echo $att->slot(); ?> ); <?php } // foreach output attribute ?> } <?php } // foreach query ?> PROFILING2_END; PCounterList counterList; PCounter totalCnt("tpo", numTuples, "<?php echo $wpName; ?> "); counterList.Append(totalCnt); #ifdef PER_QUERY_PROFILE <?php foreach ($queries as $query => $info) { ?> { PCounter qCount("tpo <?php echo queryName($query); ?> ", numTuples_<?php echo queryName($query); ?> , "<?php echo $wpName; ?> "); counterList.Append(qCount); } <?php } // foreach query ?> #endif // PER_QUERY_PROFILE PROFILING2_SET(counterList, "<?php echo $wpName; ?> "); ChunkContainer tempResult(output); tempResult.swap(result); return WP_FINALIZE; } //+{"kind":"WPF", "name":"Finalize (Chunk)", "action":"end"} <?php }
function CSVReader(array $t_args, array $output) { $my_output = []; // Handle case where outputs are given as template arguments // and not implied. if (\count($output) == 0) { grokit_assert(array_key_exists('output', $t_args), 'Did not receive any description of my output!'); $output_list = $t_args['output']; grokit_assert(is_array($output_list), 'Expected list of types for template argument "output"'); $i = 1; foreach ($outputs_list as $name => $out_type) { grokit_assert(is_datatype($out_type) || is_identifier($out_type), 'Expected only types in the "output" list'); if (is_identifier($out_type)) { $out_type = lookupType($out_type->value()); } $name = 'val_' . $i; $my_output[$name] = $out_type; $i += 1; } } else { foreach ($output as $key => $out) { $name = $key; $my_output[$name] = $out; } } $debug = get_default($t_args, 'debug', 0); $simple = get_default($t_args, 'simple', false); $trimCR = get_default($t_args, 'trim.cr', false); // Handle separator $separator = ','; if (array_key_exists('sep', $t_args) || array_key_exists('separator', $t_args)) { $sep = get_first_key($t_args, ['sep', 'separator']); grokit_assert(is_string($sep), "Got " . gettype($sep) . " instead of string for separator."); if (strtolower($sep) === 'tab') { $sep = '\\t'; } grokit_assert($sep != "\n", 'CSV column delimiter cannot be new line'); // Scream if separator is longer than one character grokit_assert(\strlen($sep) == 1 || $sep == '\\t', 'Expected string of length 1 for separator, got string <' . $sep . '> instead'); $separator = $sep; } // Handle quote character $quotechar = '"'; if (array_key_exists('quote', $t_args) && !is_null($t_args['quote'])) { grokit_assert(!$simple, 'Quote option not available for simple CSVReader'); $quote = $t_args['quote']; grokit_assert(is_string($quote), "Got " . gettype($quote) . " instead of string for quote."); // Scream if separator is longer than one character grokit_assert(\strlen($quote) == 1, 'Expected string of length 1 for quote character, got string <' . $quote . '> instead'); $quotechar = $quote; } $quotechar = addcslashes($quotechar, '\\\''); // Handle escape character $escapeChar = '\\'; if (array_key_exists('escape', $t_args) && !is_null($t_args['escape'])) { grokit_assert(!$simple, 'Escape option not available for simple CSVReader'); $escape = $t_args['escape']; grokit_assert(is_string($escape), 'Got ' . gettype($escape) . ' instead of string for escape character.'); grokit_assert(\strlen($escape) == 1, 'Expected string of length 1 for escape character, got string <' . $escape . '> instead'); $escapeChar = $escape; } $escapeChar = addcslashes($escapeChar, '\\\''); // Handle header lines $headerLines = 0; if (array_key_exists('skip', $t_args)) { $headerLines = $t_args['skip']; grokit_assert(is_int($headerLines), 'Got ' . gettype($headerLines) . ' instead of int for number of lines to skip.'); grokit_assert($headerLines >= 0, 'Cannot skip a negative number of lines.'); } // Maximum number of lines to read $maxLines = get_default($t_args, 'n', -1); grokit_assert(is_int($maxLines), 'Got ' . gettype($maxLines) . ' instead of int for template argument "n"'); $nullArg = get_first_key_default($t_args, ['nullable'], false); $nullable = []; $nullStr = []; foreach ($my_output as $name => $type) { $nullable[$name] = false; } if ($nullArg === true) { foreach ($my_output as $name => $type) { $nullable[$name] = true; $nullStr[$name] = 'NULL'; } } else { if (is_array($nullArg)) { foreach ($nullArg as $n => $v) { // If nullable value is an associative mapping, the value is either true/false // or the value of the null string if (is_string($n)) { grokit_assert(is_string($v) || is_bool($v), 'CSVReader: nullable associative mapping must have string or boolean values'); grokit_assert(array_key_exists($n, $nullable), 'CSVReader: cannot make unknown attribute ' . $n . ' nullable'); if (is_bool($v)) { $nullable[$n] = $v; $nullStr[$n] = 'NULL'; } else { $nullable[$n] = true; $nullStr[$n] = $v; } } else { if (is_array($v)) { grokit_assert(array_key_exists('attr', $v), 'CSVReader: Name of nullable attribute not specified'); $attrName = $v['attr']->name(); $nullable[$attrName] = true; $nullStr[$attrName] = array_key_exists('null', $v) ? $v['null'] : 'NULL'; } else { // Otherwise, it's just nullable $attrName = $v->name(); grokit_assert(array_key_exists($attrName, $nullable), 'CSVReader: cannot make unknown attribute ' . $v . ' nullable'); $nullable[$attrName] = true; $nullStr[$attrName] = 'NULL'; } } } } else { if ($nullArg === false) { // Nothing } else { if (is_string($nullArg)) { foreach ($my_output as $name => $type) { $nullable[$name] = true; $nullStr[$name] = $nullArg; } } else { grokit_error('Template argument "nullable" must be boolean or array, ' . typeof($nullArg) . ' given'); } } } } // Come up with a name for ourselves $className = generate_name('CSVReader'); if ($debug >= 2) { foreach ($my_output as $name => $type) { fwrite(STDERR, "CSVReader: {$name} is nullable: " . ($nullable[$name] ? 'true' : 'false') . PHP_EOL); } } ?> class <?php echo $className; ?> { std::istream& my_stream; std::string fileName; // Template parameters static constexpr size_t MAX_LINES = <?php echo $maxLines; ?> ; static constexpr size_t HEADER_LINES = <?php echo $headerLines; ?> ; static constexpr char DELIMITER = '<?php echo $separator; ?> '; <?php if (!$simple) { ?> static constexpr char QUOTE_CHAR = '<?php echo $quotechar; ?> '; static constexpr char ESCAPE_CHAR = '<?php echo $escapeChar; ?> '; typedef boost::escaped_list_separator<char> separator; typedef boost::tokenizer< separator > Tokenizer; separator my_separator; Tokenizer my_tokenizer; <?php } ?> // Prevent having to allocate this every time. std::string line; std::vector<std::string> tokens; size_t count; <?php \grokit\declareDictionaries($my_output); ?> public: <?php echo $className; ?> ( GIStreamProxy& _stream ) : my_stream(_stream.get_stream()) , fileName(_stream.get_file_name()) <?php if (!$simple) { ?> , my_separator(ESCAPE_CHAR, DELIMITER, QUOTE_CHAR) , my_tokenizer(std::string("")) <?php } ?> , count(0) { <?php if ($headerLines > 0) { ?> for( size_t i = 0; i < HEADER_LINES; ++i ) { FATALIF( !getline( my_stream, line ), "CSV Reader reached end of file before finishing header.\n" ); } <?php } // If headerLines > 0 ?> } // > bool ProduceTuple( <?php echo typed_ref_args($my_output); ?> ) { if (count < MAX_LINES) { //> count++; } else { return false; } if( getline( my_stream, line ) ) { <?php if ($trimCR) { ?> if( line.back() == '\r' ) { line.pop_back(); } <?php } // if trimCR if (!$simple) { if ($debug >= 1) { ?> try { <?php } // if debug >= 1 ?> my_tokenizer.assign( line, my_separator ); <?php if ($debug >= 1) { ?> } catch(...) { FATAL("CSVReader for file %s failed on line: %s", fileName.c_str(), line.c_str()); } <?php } // if debug >= 1 ?> Tokenizer::iterator it = my_tokenizer.begin(); <?php foreach ($my_output as $name => $type) { if ($nullable[$name]) { // nullable ?> <?php \grokit\fromStringNullable($name, $type, 'it->c_str()', true, $nullStr[$name]); ?> <?php } else { // not nullable ?> <?php echo \grokit\fromStringDict($name, $type, 'it->c_str()'); ?> ; <?php } // end nullable check ?> ++it; <?php } // foreach output } else { ?> for( char & c : line ) { if( c == DELIMITER ) c = '\0'; } const char * ptr = line.c_str(); <?php $first = true; foreach ($my_output as $name => $type) { if ($first) { $first = false; } else { ?> while( *(ptr++) != '\0' ) ; // Advance past next delimiter <?php } // not first output if ($nullable[$name]) { ?> <?php echo \grokit\fromStringNullable($name, $type, 'ptr', true, $nullStr[$name]); } else { // not nullable ?> <?php echo \grokit\fromStringDict($name, $type, 'ptr'); ?> ; <?php } // if nullable } // foreach output } // if simple reader ?> return true; } else { return false; } } <?php \grokit\declareDictionaryGetters($my_output); ?> }; <?php $sys_headers = ['vector', 'string', 'iostream', 'cstdint']; if (!$simple) { $sys_headers[] = 'boost/tokenizer.hpp'; } return ['name' => $className, 'kind' => 'GI', 'output' => $my_output, 'system_headers' => $sys_headers, 'user_headers' => ['GIStreamInfo.h', 'Dictionary.h', 'DictionaryManager.h']]; }