function CreateView($args, $targs) { grokit_assert(\count($args) == 1, 'CreateView supports exactly 1 input'); $type = $args[0]; grokit_assert($type->is('array'), 'CreateView cannot create view on non-array type'); $innerType = $type->get('type'); $size = $type->get('size'); $viewType = lookupType('BASE::FixedArrayView', ['type' => $innerType, 'size' => $size]); $funcname = generate_name('CreateView_'); ?> <?php echo $viewType; ?> <?php echo $funcname; ?> ( const <?php echo $type; ?> &array ) { return <?php echo $viewType; ?> (array.data()); } <?php return ['kind' => 'FUNCTION', 'name' => $funcname, 'input' => $args, 'result' => $viewType, 'deterministic' => false]; }
public static function lookupAttribute($name) { if (!array_key_exists($name, self::$att_map)) { fwrite(STDERR, 'lookupAttribute called with name: ' . print_r($name, true)); //fwrite(STDERR, print_r(self::$att_map, true) ); } grokit_assert(array_key_exists($name, self::$att_map), 'Attempting to lookup unknown attribute ' . $name); return self::$att_map[$name]; }
function PatternMatcherOnig($t_args, $inputs) { grokit_assert(\count($inputs) == 1, 'PatternMatcherOnig GF only supports 1 input!'); $pattern = get_first_key($t_args, ['pattern']); $inName = array_keys($inputs)[0]; $inType = array_get_index($inputs, 0); $inTypeString = $inType->name(); $validTypes = ['BASE::STRING_LITERAL']; grokit_assert(in_array($inTypeString, $validTypes), 'Unsupported input type ' . $inTypeString); $className = generate_name('PatternMatcherOnigGF'); ?> class <?php echo $className; ?> { PatternMatcherOnig matcher; public: <?php echo $className; ?> () : matcher("<?php echo $pattern; ?> ") { } bool Filter( const <?php echo $inType; ?> & <?php echo $inName; ?> ) { return matcher.Match(<?php echo $inName; ?> ); } }; <?php return ['kind' => 'GF', 'name' => $className, 'input' => $inputs, 'user_headers' => ['PatternMatcherOnig.h']]; }
public static function getState($wp, $query) { grokit_assert(array_key_exists($wp, self::$states), 'No states available for waypoint ' . $wp); grokit_assert(array_key_exists($query, self::$states[$wp]), 'No state available for query ' . $query . ' in waypoint ' . $wp); return self::$states[$wp][$query]; }
function parseCase($ast) { assert_ast_type($ast, NodeType::CASE_NODE); $data = ast_node_data($ast); $source = ast_node_source($ast); $base_ast = ast_get($data, NodeKey::BASE); $cases = ast_get($data, NodeKey::CASES); $default_ast = ast_get($data, NodeKey::DEFAULT_CASE); // TODO: Allow case to handle no default if we add in null values. grokit_assert($default_ast !== null, 'Case statements with no default currently unsupported ' . ast_node_source($ast)); grokit_logic_assert(is_array($cases), 'Cases attribute of CASE statement was not an array! ' . ast_node_source($ast)); $default = parseExpression($default_ast); if ($base_ast === null) { return parseCaseNoBase($source, $cases, $default); } else { $base = parseExpression($base_ast); return parseCaseBase($source, $base, $cases, $default); } }
function Contains($args, $targs) { grokit_assert(\count($args) == 1, 'Contains supports exactly 1 input, ' . \count($args) . ' given'); grokit_assert(array_key_exists('values', $targs), 'Contains() requires a "values" template argument'); $inputName = 'contains_input'; $inputType = $args[0]; $boolType = lookupType('base::bool'); $typename = generate_name('_ContainsType'); $funcname = generate_name('Contains'); $sys_headers = ['cstddef']; $use_mct = get_default($targs, 'use.mct', false); if ($use_mct) { $sys_headers[] = 'mct/closed-hash-set.hpp'; $setType = 'mct::closed_hash_set<' . $inputType . ', KeyHash>'; } else { $sys_headers[] = 'unordered_set'; $setType = 'std::unordered_set<' . $inputType . ', KeyHash>'; } $values = $targs['values']; grokit_assert(is_array($values), 'Contains(): values argument must be an array of strings'); $quotedValues = []; $escapeChars = "\"'\n\r\t\\"; foreach ($values as $index => $val) { grokit_assert(is_string($val), "Contains(): Value at index {$index} is not a string"); $quotedValues[] = '"' . addcslashes($val, $escapeChars) . '"'; } $nVals = \count($quotedValues); ?> class <?php echo $typename; ?> { public: struct KeyHash { std::size_t operator () (const <?php echo $inputType; ?> & val) const { return static_cast<std::size_t>(Hash(val)); } }; using Set = <?php echo $setType; ?> ; // Singleton static const <?php echo $typename; ?> instance; private: static const char* str_values[<?php echo $nVals; ?> ]; Set values; <?php echo $typename; ?> (): values() { <?php echo $inputType; ?> temp; for( auto str : str_values ) { FromString(temp, str); values.insert(temp); } } public: bool exists(const <?php echo $inputType; ?> & <?php echo $inputName; ?> ) const { return values.count(<?php echo $inputName; ?> ) > 0; } }; const <?php echo $typename; ?> <?php echo $typename; ?> ::instance; const char* <?php echo $typename; ?> ::str_values[<?php echo $nVals; ?> ] = { <?php echo implode(", ", $quotedValues); ?> }; <?php echo $boolType; ?> <?php echo $funcname; ?> (const <?php echo $inputType; ?> & <?php echo $inputName; ?> ) { return <?php echo $typename; ?> ::instance.exists(<?php echo $inputName; ?> ); } <?php return ['kind' => 'FUNCTION', 'name' => $funcname, 'input' => $args, 'result' => $boolType, 'determinstic' => true, 'system_headers' => $sys_headers]; }
/** * A GLA that counts the number of distinct elements by keeping track of the * distinct elements. * * Unless an exact count of the distinct is absolutely needed, consider using * an approximation of the distinct, such as a Bloom Filter. */ function CountDistinct(array $t_args, array $input, array $output) { grokit_assert(\count($output) == 1, 'CountDistinct should have only 1 output, ' . \count($output) . 'given'); $outputName = array_keys($output)[0]; $outputType = array_get_index($output, 0); if (is_null($outputType)) { $outputType = lookupType('BASE::BIGINT'); } $output[$outputName] = $outputType; grokit_assert($outputType->is('numeric'), 'CountDistinct output must be numeric!'); $useMCT = get_default($t_args, 'use.mct', true); $keepHashes = get_default($t_args, 'mct.keep.hashes', false); $initSize = get_default($t_args, 'init.size', 65536); $nullCheck = get_default($t_args, 'null.check', false); grokit_assert(is_bool($useMCT), 'CountDistinct use.mct argument must be boolean'); grokit_assert(is_integer($initSize), 'Distinct init.size argument must be an integer'); grokit_assert($initSize > 0, 'Distinct init.size argument must be positive'); grokit_assert(is_bool($keepHashes), 'CountDistinct mct.keep.hashes argument must be boolean'); $distTmpArgs = ['use.mct' => $useMCT, 'init.size' => $initSize, 'mct.keep.hashes' => $keepHashes, 'null.check' => $nullCheck]; $gla = lookupGLA('BASE::DISTINCT', $distTmpArgs, $input, $input); $className = generate_name('CountDistinct'); ?> class <?php echo $className; ?> { using Distinct = <?php echo $gla->value(); ?> ; Distinct distinctGLA; public: <?php echo $className; ?> (void): distinctGLA() { } ~<?php echo $className; ?> (void) { } void AddItem(<?php echo const_typed_ref_args($input); ?> ) { distinctGLA.AddItem(<?php echo args($input); ?> ); } void AddState(<?php echo $className; ?> & o) { distinctGLA.AddState(o.distinctGLA); } void GetResult(<?php echo $outputType; ?> & <?php echo $outputName; ?> ) { <?php echo $outputName; ?> = distinctGLA.get_countDistinct(); } }; <?php return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => 'single']; }
function ExtremeTuples(array $t_args, array $inputs, array $outputs) { $extremes = get_first_key($t_args, ['extremes']); $nExt = \count($extremes); grokit_assert($nExt > 0, 'No extremes specified for ExtremeTuples GLA.'); if (\count($inputs) == 0) { grokit_assert(array_key_exists('inputs', $t_args), 'No arguments specified for ExtremeTuples GLA.'); $count = 0; foreach ($t_args['inputs'] as $type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Only datatypes can be specified as inputs to ' . 'the ExtremeTuples GLA'); $name = 'et_val' . $count; $inputs[$name] = $type; } } $outputMap = []; reset($outputs); foreach ($inputs as $name => $type) { $oKey = key($outputs); $outputs[$oKey] = $type; $outputMap[$oKey] = $name; next($outputs); } grokit_assert($nExt <= \count($inputs), 'There can not be more extreme values than there are inputs!'); $mainAtts = []; $extraAtts = []; $minOpts = ['MIN', 'MINIMUM', '-', '<']; $maxOpts = ['MAX', 'MAXIMUM', '+', '>']; $inArrayCase = function ($needle, $haystack) { foreach ($haystack as $item) { if (strcasecmp($needle, $item) == 0) { return true; } } return false; }; $minimum = []; foreach ($extremes as $name => $val) { grokit_assert(array_key_exists($name, $inputs), "ExtremeTuples: Expression with name " . $name . " specified as extreme not found in inputs"); } foreach ($inputs as $name => $type) { if (array_key_exists($name, $extremes)) { $mainAtts[$name] = $type; if ($inArrayCase($extremes[$name], $minOpts)) { $minimum[$name] = true; } else { if ($inArrayCase($extremes[$name], $maxOpts)) { $minimum[$name] = false; } else { grokit_error('Unknown extreme type ' . $extremes[$name] . ' specified for ' . $name); } } } else { $extraAtts[$name] = $type; } } $debug = get_default($t_args, 'debug', 0); $className = generate_name('ExtremeTuples'); ?> class <?php echo $className; ?> { struct Tuple { <?php foreach ($inputs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // foreach input ?> // Default Constructor, Copy Constructor, and Copy Assignment are all // default Tuple(void) = default; Tuple(const Tuple &) = default; Tuple & operator = (const Tuple &) = default; Tuple(<?php echo array_template('const {val} & _{key}', ', ', $inputs); ?> ) : <?php echo array_template('{key}(_{key})', ', ', $inputs); ?> { } // operator > means that this tuple is "better" than the other tuple. bool operator > ( const Tuple & other ) const { <?php foreach ($mainAtts as $name => $type) { $op1 = $minimum[$name] ? '<' : '>'; $op2 = !$minimum[$name] ? '<' : '>'; ?> if( <?php echo $name; ?> <?php echo $op1; ?> other.<?php echo $name; ?> ) return true; else if( <?php echo $name; ?> <?php echo $op2; ?> other.<?php echo $name; ?> ) return false; <?php } // foreach main attribute ?> return false; } bool operator < ( const Tuple& other ) const { return other > *this; } bool operator <= (const Tuple & other ) const { return ! (*this > other ); } bool operator >= (const Tuple & other ) const { return !( other > *this ); } bool operator == (const Tuple & other ) const { bool ret = true; <?php foreach ($mainAtts as $name => $type) { ?> ret &= <?php echo $name; ?> == other.<?php echo $name; ?> ; <?php } // foreach main attribute ?> return ret; } }; // struct Tuple typedef std::vector<Tuple> TupleVector; public: class Iterator { public: typedef TupleVector::const_iterator iter_type; private: iter_type begin; iter_type end; public: Iterator(void) = default; Iterator(const Iterator &) = default; Iterator( const iter_type & _begin, const iter_type & _end ) : begin(_begin), end(_end) { } Iterator( const iter_type && _begin, const iter_type && _end ) : begin(_begin), end(_end) { } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { if( begin != end ) { <?php foreach ($outputs as $name => $type) { ?> <?php echo $name; ?> = begin-><?php echo $outputMap[$name]; ?> ; <?php } ?> begin++; return true; } else { return false; } } }; private: uintmax_t __count; // number of tuples covered TupleVector tuples; // Iterator for multi output type Iterator multiIterator; public: // Constructor and destructor <?php echo $className; ?> (void) : __count(0), tuples(), multiIterator() { } ~<?php echo $className; ?> () { } void AddItem( <?php echo const_typed_ref_args($inputs); ?> ) { ++__count; Tuple t(<?php echo args($inputs); ?> ); if( tuples.empty() ) { tuples.push_back(t); } else if( t > tuples.front() ) { tuples.clear(); tuples.push_back(t); } else if( t == tuples.front() ) { tuples.push_back(t); } } void AddState( <?php echo $className; ?> & other ) { if( tuples.size() == 0 ) { tuples.swap(other.tuples); } else if( other.tuples.size() == 0 ) { // Do nothing } else if( tuples.front() > other.tuples.front() ) { // fast path } else if( other.tuples.front() > tuples.front() ) { tuples.swap(other.tuples); } else { for( Tuple & t : other.tuples ) { tuples.push_back(t); } } } void Finalize( void ) { multiIterator = Iterator(tuples.cbegin(), tuples.cend()); } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } }; // class <?php echo $className; ?> <?php $system_headers = ['vector', 'algorithm', 'cinttypes']; if ($debug > 0) { $system_headers = array_merge($system_headers, ['iostream', 'sstream', 'string']); } return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'system_headers' => $system_headers); }
/** * A GLA that estimates the cardinality of a dataset using a bloom filter of * a configurable size. * * Note: This filter has very high performance, so long as all of the states * fit into cache, preferably L1 or L2, but L3 is also fine. Once the states * are large enough that all of them cannot fit inside L3 cache at the same * time, performance takes a nose dive (4x loss minimum). */ function BloomFilter(array $t_args, array $input, array $output) { grokit_assert(\count($output) == 1, 'BloomFilter produces only 1 value, ' . \count($output) . ' outputs given.'); $outputName = array_keys($output)[0]; $outputType = array_get_index($output, 0); if (is_null($outputType)) { $outputType = lookupType('BASE::BIGINT'); } $output[$outputName] = $outputType; grokit_assert($outputType->is('numeric'), 'BloomFilter output must be numeric!'); $exp = get_first_key_default($t_args, ['exponent'], 16); grokit_assert(is_integer($exp), 'BloomFilter exponent must be an integer.'); grokit_assert($exp > 0 && $exp < 64, 'BloomFilter exponent must be in range (0,64), ' . $exp . ' given.'); $nullCheck = get_default($t_args, 'null.check', false); $nullable = []; if (is_bool($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = $nullCheck; } } else { if (is_array($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = false; } foreach ($nullCheck as $index => $n) { grokit_assert(is_string($n), 'BloomFilster null.check has invalid value at position ' . $index); grokit_assert(array_key_exists($n, $nullable), 'BloomFilster null.check has unknown input ' . $n . ' at position ' . $index); $nullable[$n] = true; } } else { grokit_error('BloomFilster null.check must be boolean or list of inputs to check for nulls'); } } $debug = get_default($t_args, 'debug', 0); $bits = pow(2, $exp); $bytes = ceil($bits / 8.0); // Calculate the number of bits set for every possible value of a byte $nBits = []; for ($i = 0; $i < 256; $i++) { $n = $i; $b = 0; while ($n > 0) { $n &= $n - 1; $b++; } $nBits[$i] = $b; } $className = generate_name('BloomFilter'); ?> class <?php echo $className; ?> { static constexpr size_t BITS = <?php echo $bits; ?> ; static constexpr size_t BYTES = <?php echo $bytes; ?> ; static constexpr size_t MASK = BITS - 1; static constexpr std::array<unsigned char, 256> BITS_SET = { <?php echo implode(', ', $nBits); ?> }; static constexpr std::array<unsigned char, 8> BIT_MASKS = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }; size_t count; std::array<unsigned char, BYTES> set; //unsigned char set[BYTES]; //std::bitset<BITS> set; public: <?php echo $className; ?> () : count(0), set() { for( size_t i = 0; i < BYTES; i++ ) { //> set[i] = 0; } } ~<?php echo $className; ?> () { } void AddItem( <?php echo const_typed_ref_args($input); ?> ) { count++; <?php foreach ($nullable as $name => $check) { if ($check) { ?> if( IsNull( <?php echo $name; ?> ) ) return; <?php } // if checking for nulls } // foreach input ?> size_t hashVal = H_b; <?php foreach ($input as $name => $type) { ?> hashVal = CongruentHash(Hash(<?php echo $name; ?> ), hashVal); <?php } // foreach input ?> hashVal = hashVal & MASK; const size_t bucket = hashVal >> 3; const size_t bucket_index = hashVal & 0x07; const unsigned char mask = BIT_MASKS[bucket_index]; set[bucket] |= mask; } void AddState( <?php echo $className; ?> & o ) { count += o.count; for( size_t i = 0; i < BYTES; i++ ) { //> set[i] |= o.set[i]; } } void GetResult( <?php echo $outputType; ?> & <?php echo $outputName; ?> ) { size_t nBitsSet = 0; constexpr long double bits = static_cast<long double>(BITS); for( size_t i = 0; i < BYTES; i++ ) { //> nBitsSet += BITS_SET[set[i]]; } long double bitsSet = static_cast<long double>(nBitsSet); if( nBitsSet == BITS ) { // All Bits set, just give the cardinality as an estimate. <?php echo $outputName; ?> = count; } else { long double cardinality = - bits * std::log(1 - (bitsSet / bits)); <?php echo $outputName; ?> = cardinality; } <?php if ($debug > 0) { ?> std::cout << "BloomFilter:" << " bitsSet(" << bitsSet << ")" << " bits(" << bits << ")" << " cardinality(" << cardinality << ")" << " output(" << <?php echo $outputName; ?> << ")" << std::endl;; //> <?php } // if debugging enabled ?> } }; // Storage for static members constexpr std::array<unsigned char, 256> <?php echo $className; ?> ::BITS_SET; constexpr std::array<unsigned char, 8> <?php echo $className; ?> ::BIT_MASKS; <?php $system_headers = ['cmath', 'array']; if ($debug > 0) { $system_headers[] = 'iostream'; } return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => 'single', 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers]; }
/** * A GLA that estimates the cardinality of a dataset using the HyperLogLog * algorithm, with a configurable number of bins. */ function HyperLogLog(array $t_args, array $input, array $output) { $debug = get_default($t_args, 'debug', 0); grokit_assert(\count($output) == 1, 'HyperLogLog produces only 1 value, ' . \count($output) . ' outputs given.'); $outputName = array_keys($output)[0]; $outputType = array_get_index($output, 0); if (is_null($outputType)) { $outputType = lookupType('BASE::BIGINT'); } $output[$outputName] = $outputType; grokit_assert($outputType->is('numeric'), 'BloomFilter output must be numeric!'); $exp = get_first_key_default($t_args, ['bins.exponent'], 4); grokit_assert(is_integer($exp), 'HyperLogLog bins.exponent must be an integer'); // Set limit of 2^24 bins, because states past 16MB start to get silly grokit_assert($exp >= 4 && $exp < 24, 'HyperLogLog bins.exponent must be in range [4, 24]'); $useBuiltinCtz = get_default($t_args, 'use.builtin.ctz', true); $ctzFunc = $useBuiltinCtz ? '__builtin_ctzl' : 'ctz'; $bins = pow(2, $exp); // Determine the value of alpha based on $exp switch ($exp) { case 4: $alpha = 0.673; break; case 5: $alpha = 0.697; break; case 6: $alpha = 0.709; break; default: $alpha = 0.7213000000000001 / (1 + 1.079 / $bins); } $className = generate_name('HyperLogLog'); ?> class <?php echo $className; ?> { // Number of bins for registers static constexpr const size_t NUM_BINS = <?php echo $bins; ?> ; // Number of bits used to index into registers, log2(NUM_BINS) static constexpr const size_t INDEX_BITS = <?php echo $exp; ?> ; // Mask used to obtain register index from hash value static constexpr const size_t INDEX_MASK = NUM_BINS - 1; // Alpha coefficient used to correct cardinality estimate. Based on NUM_BINS. static constexpr const long double ALPHA = <?php echo $alpha; ?> ; // Value of cardinality estimate after which we must apply the // large range correction static constexpr const long double LARGE_BREAKPOINT = (1.0 / 30.0) * <?php echo pow(2, 32); ?> ; // Constants for population count static constexpr const uint64_t m1 = 0x5555555555555555; static constexpr const uint64_t m2 = 0x3333333333333333; static constexpr const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; static constexpr const uint64_t h01 = 0x0101010101010101; // The registers std::array<unsigned char, NUM_BINS> registers; // A count used to remember how many tuples were processed, mostly for debugging. size_t count; public: <?php echo $className; ?> (void) : registers() { for( auto & elem : registers ) { elem = 0; } } ~<?php echo $className; ?> () { } int popcount(uint64_t x) { // Put count of each 2 bits into those 2 bits x -= (x >> 1) & m1; // Put count of each 4 bits into those 4 bits x = (x & m2) + ((x >> 2) & m2); // Put count of each 8 bits into those 8 bits x = (x + (x >> 4)) & m4; // Returns left 8 bits of x + (x << 8) + (x << 16) + ... return (x * h01) >> 56; } int ctz(int64_t x) { return popcount((x & -x) - 1); } void AddItem( <?php echo const_typed_ref_args($input); ?> ) { count++; uint64_t hashVal = H_b; <?php foreach ($input as $name => $type) { ?> hashVal = CongruentHash(Hash(<?php echo $name; ?> ), hashVal); <?php } // for each input ?> const size_t registerIndex = hashVal & INDEX_MASK; uint64_t value = hashVal >> INDEX_BITS; unsigned char nZeros = <?php echo $ctzFunc; ?> (value); unsigned char & registerValue = registers[registerIndex]; registerValue = registerValue > nZeros ? registerValue : nZeros; } void AddState( <?php echo $className; ?> & other ) { for( size_t i = 0; NUM_BINS > i; i++ ) { unsigned char & rVal = registers[i]; unsigned char & oVal = other.registers[i]; rVal = rVal > oVal ? rVal : oVal; } } void GetResult( <?php echo $outputType; ?> & <?php echo $outputName; ?> ) { // Compute harmonic sum of registers and correct by alpha long double cardEst = 0; size_t nZeroRegisters = 0; for( auto elem : registers ) { long double power = - static_cast<long double>(elem); cardEst += std::pow(2.0, power); if( elem == 0 ) nZeroRegisters++; } const long double nBins = static_cast<long double>(NUM_BINS); const long double zeroBins = static_cast<long double>(nZeroRegisters); cardEst = 1 / cardEst; cardEst *= ALPHA * nBins * nBins; long double cardinality = cardEst; if( (cardEst < 2.5 * NUM_BINS) ) { //> // Possible small range correction if( nZeroRegisters > 0 ) { // Small range correction cardinality = nBins * std::log(nBins / zeroBins); } } // TODO: Figure out if the large range correction is needed for 64-bit // hashes. <?php echo $outputName; ?> = cardinality; } }; <?php $system_headers = ['cmath', 'array', 'cinttypes']; if ($debug > 0) { $system_headers[] = 'iostream'; } return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => 'single', 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers]; }
function BITSET(array $t_args) { grokit_assert(array_key_exists('values', $t_args), 'No values specified for bitset!'); $values = $t_args['values']; $indicies = array_keys($values); $maxIndex = \max($indicies); $minIndex = \min($indicies); grokit_assert($maxIndex < 64, 'Highest index of bitset must be less than 64'); grokit_assert($minIndex >= 0, 'Indicies of bitset must be >= 0'); $mask = 0; foreach ($values as $index => $name) { $firstChar = substr($name, 0, 1); $arr = str_split($name); $valid = array_reduce($arr, function ($res, $item) { $res = $res && (ctype_alnum($item) || $item == '_'); return $res; }, ctype_alpha($firstChar) || $firstChar == '_'); grokit_assert($valid, "Invalid name ({$name}) given for index ({$index}) in bitset."); $mask = $mask | 1 << $index; } $nBits = floor(pow(2, ceil(log($maxIndex + 1, 2)))); $nBits = \max(8, $nBits); $nHex = $nBits / 4; $storageType = "uint{$nBits}_t"; switch ($nBits) { case 8: $methodIntType = 'base::BYTE'; break; case 16: $methodIntType = 'base::SMALLINT'; break; case 32: $methodIntType = 'base::INT'; break; case 64: $methodIntType = 'base::BIGINT'; break; default: grokit_error('BITSET requires invalid number of bits (' . $nBits . ')'); } $className = generate_name('BITSET'); $methods = []; $constructors = []; $functions = []; $globalContents = ""; ?> class <?php echo $className; ?> { public: typedef <?php echo $storageType; ?> StorageType; private: StorageType bits; static constexpr StorageType _MASK_ = 0x<?php echo sprintf("%0{$nHex}X", $mask); ?> ; public: <?php echo $className; ?> (void); <?php $constructors[] = [[$methodIntType], true]; ?> <?php echo $className; ?> (const StorageType _bits); <?php echo $className; ?> & operator =( const StorageType _bits ); /***** Comparison Opeators *****/ bool operator ==( const <?php echo $className; ?> & o ) const; bool operator !=( const <?php echo $className; ?> & o ) const; bool operator <( const <?php echo $className; ?> & o ) const; bool operator >( const <?php echo $className; ?> & o ) const; bool operator <=( const <?php echo $className; ?> & o ) const; bool operator >=( const <?php echo $className; ?> & o ) const; /***** Conversion *****/ void ToJson( Json::Value & dest ) const; void FromJson( const Json::Value & src ); /***** Accessors *****/ <?php $methods[] = ['Bits', [], $methodIntType, true]; ?> StorageType Bits(void) const; <?php $methods[] = ['IsSet', ['base::BYTE'], 'base::bool', true]; ?> // Whether or not a bit is set by index bool IsSet(unsigned char index) const; // Accessors for each value <?php foreach ($values as $index => $name) { $methods[] = [$name, [], 'base::bool', true]; ?> bool <?php echo $name; ?> (void) const; <?php } // for each value ?> }; inline <?php echo $className; ?> :: <?php echo $className; ?> ( void ) : bits(0) { } inline <?php echo $className; ?> :: <?php echo $className; ?> ( const StorageType _bits ) : bits(_bits) { } inline <?php echo $className; ?> & <?php echo $className; ?> :: operator = (const StorageType _bits) { bits = _bits; return *this; } inline bool <?php echo $className; ?> :: operator == (const <?php echo $className; ?> & o ) const { return bits == o.bits; } inline bool <?php echo $className; ?> :: operator != (const <?php echo $className; ?> & o ) const { return bits != o.bits; } inline bool <?php echo $className; ?> :: operator < (const <?php echo $className; ?> & o ) const { return (bits == (bits & o.bits)) && (bits != o.bits); } inline bool <?php echo $className; ?> :: operator > (const <?php echo $className; ?> & o ) const { return (bits == (bits | o.bits)) && (bits != o.bits); } inline bool <?php echo $className; ?> :: operator <= (const <?php echo $className; ?> & o ) const { return bits == (bits & o.bits); } inline bool <?php echo $className; ?> :: operator >= (const <?php echo $className; ?> & o ) const { return bits == (bits | o.bits); } inline auto <?php echo $className; ?> :: Bits( void ) const -> StorageType { return bits; } inline bool <?php echo $className; ?> ::IsSet(unsigned char index) const { StorageType mask = ((StorageType) 1) << index; //> return bits & mask; } inline void <?php echo $className; ?> :: ToJson( Json::Value & dest ) const { dest = (Json::Int64) bits; } inline void <?php echo $className; ?> :: FromJson( const Json::Value & src ) { bits = (StorageType) src.asInt64(); } <?php foreach ($values as $index => $name) { ?> bool <?php echo $className; ?> ::<?php echo $name; ?> (void) const { return bits & 0x<?php echo sprintf("%X", 1 << $index); ?> ; } <?php } // for each value ?> <?php ob_start(); ?> <?php $functions[] = ['Hash', ['@type'], 'base::BIGINT', true, true]; ?> template<> inline uint64_t Hash(const @type & thing) { return thing.Bits(); } inline void FromString( @type & c, const char * str ) { c = atol(str); } inline int ToString( const @type & c, char * buffer ) { <?php $format = $nBits < 16 ? 'hh' : ($nBits < 32 ? 'h' : ($nBits < 64 ? '' : 'l')); ?> sprintf(buffer, "%<?php echo $format; ?> d", c.Bits()); return strlen(buffer) + 1; } inline void ToJson( const @type & src, Json::Value & dest ) { src.ToJson(dest); } inline void FromJson( const Json::Value & src, @type & dest ) { dest.FromJson(src); } <?php $globalContents .= ob_get_clean(); ?> <?php return ['kind' => 'TYPE', 'name' => $className, 'binary_operators' => ['==', '!=', '>', '<', '>=', '<='], 'system_headers' => ['cinttypes'], 'global_content' => $globalContents, 'complex' => false, 'methods' => $methods, 'constructors' => $constructors, 'functions' => $functions, 'describe_json' => DescribeJson('integer'), 'extras' => ['size.bytes' => $nBits / 8]]; }
function Segmenter(array $t_args, array $input, array $output, array $given_states) { $resType = ['fragment', 'multi']; $system_headers = ['array', 'vector', 'memory', 'cinttypes', 'unordered_map']; $user_headers = ['HashFunctions.h']; $lib_headers = []; $preferFragment = get_default($t_args, 'inner.prefer.fragment', false); $wantedRes = $preferFragment ? ['fragment', 'multi'] : ['multi', 'fragment']; $nInputs = \count($input); grokit_assert($nInputs > 1, 'Segmenter: Not enough inputs specified!'); $keyName = array_keys($input)[0]; $keyType = array_get_index($input, 0); $innerInputs = array_slice($input, 1, $nInputs - 1, true); $gla = get_first_key($t_args, ['gla', 'GLA', 0]); grokit_assert(is_gla($gla), 'Segmenter: [gla] argument must be a valid GLA'); $gla = $gla->apply($innerInputs, $output, $given_states); $n_passes = get_default($t_args, 'passes', 1); grokit_assert(is_int($n_passes), 'Segmenter: [passes] argument must be an integer'); grokit_assert($n_passes > 0, 'Segmenter: [passes] argument must be > 0'); $libraries = $gla->libraries(); $innerRes = get_first_value($gla->result_type(), $wantedRes); $innerInputs = $gla->input(); $innerOutput = $gla->output(); $input = array_merge([$keyName => $keyType], $innerInputs); $output = $innerOutput; $segments = get_default($t_args, 'segments', 64); $constState = lookupResource('BASE::SegmenterState', ['gla' => $gla, 'passes' => $n_passes, 'segments' => $segments]); $className = generate_name('Segmenter_'); $savedArgs = []; $cArgs = []; $innerCArgs = []; if ($gla->configurable()) { $savedArgs['json_init'] = 'Json::Value'; $cArgs['json_init'] = 'Json::Value'; $innerCArgs[] = 'json_init'; } $cArgs['const_state'] = $constState; if ($gla->has_state()) { $innerCArgs[] = 'constState.inner_cstate'; } $cstStr = \count($innerCArgs) > 0 ? '(' . implode(',', $innerCArgs) . ')' : ''; grokit_assert(!$gla->iterable(), 'Segementer does not support iterable GLAs'); $iterable = $n_passes > 1; ?> class <?php echo $className; ?> { private: using ConstantState = <?php echo $constState; ?> ; using SplitState = ConstantState::SplitState; static constexpr const size_t NUM_STATES = SplitState::NUM_STATES; using InnerGLA = <?php echo $gla; ?> ; using InnerGLAPtr = std::unique_ptr<InnerGLA>; using GLA_Array = std::array<InnerGLAPtr, NUM_STATES>; public: using size_type = std::size_t; <?php if ($innerRes == 'fragment') { ?> class Iterator { private: InnerGLA * gla; int fragmentNum; InnerGLA::Iterator * innerIter; public: Iterator( InnerGLA * _gla, int _fragmentNum, int _innerFrag ) : gla(_gla), fragmentNum(_fragmentNum), innerIter(nullptr) { innerIter = gla->Finalize(_innerFrag); } ~Iterator(void) { if( innerIter != nullptr ) { delete innerIter; innerIter = nullptr; } } bool GetNextResult( <?php echo typed_ref_args($gla->output()); ?> ) { return innerIter->GetNextResult(<?php echo args($gla->output()); ?> ); } int FragmentNumber() { return fragmentNum; } }; <?php } else { // if inner result type is fragment ?> class Iterator { private: InnerGLA * gla; int fragmentNum; public: Iterator( InnerGLA * _gla, int fragNo ) : gla(_gla), fragmentNum(fragNo) { gla->Finalize(); } ~Iterator(void) { } bool GetNextResult( <?php echo typed_ref_args($gla->output()); ?> ) { return gla->GetNextResult(<?php echo args($gla->output()); ?> ); } int FragmentNumber() { return fragmentNum; } }; <?php } // if inner result type is multi ?> private: const ConstantState & constState; GLA_Array localState; // Iteration state for multi result type int numFrags; int multiFragNo; Iterator * multiIter; <?php if ($innerRes == 'fragment') { ?> using frag_info = std::pair<int, int>; using frag_map_t = std::unordered_map<int, frag_info>; frag_map_t fragMap; <?php } ?> <?php foreach ($savedArgs as $name => $type) { ?> const <?php echo $type; ?> <?php echo $name; ?> ; <?php } // foreach saved arg ?> public: // Constructor <?php echo $className; ?> ( <?php echo const_typed_ref_args($cArgs); ?> ) : constState(const_state) , localState() , numFrags(0) , multiFragNo(0) , multiIter(nullptr) <?php if ($innerRes == 'fragment') { ?> , fragMap() <?php } foreach ($savedArgs as $name => $type) { ?> , <?php echo $name; ?> (<?php echo $name; ?> ) <?php } // foreach constructor arg to save ?> { for( auto & elem : localState ) { elem.reset(new InnerGLA<?php echo $cstStr; ?> ); } } void AddItem( <?php echo const_typed_ref_args($input); ?> ) { uint64_t hashVal = CongruentHash(Hash(<?php echo $keyName; ?> ), H_b + 1); uint64_t passNum = (hashVal / NUM_STATES) % ConstantState::N_PASSES; uint64_t segNum = hashVal % NUM_STATES; <?php if ($n_passes > 1) { ?> if( passNum != constState.pass ) { return; } <?php } // more than 1 pass ?> localState[segNum]->AddItem(<?php echo args($innerInputs); ?> ); } void ChunkBoundary(void) { // Merge local states into the global state SplitState & globalStates = constState.segments; int theseAreOk[NUM_STATES]; for( int i = 0; NUM_STATES > i; i++ ) { theseAreOk[i] = 1; } int segsLeft = NUM_STATES; while( segsLeft > 0 ) { InnerGLA * checkedOut = nullptr; int whichOne = globalStates.CheckOutOne( theseAreOk, checkedOut ); if( checkedOut == NULL ) { checkedOut = new InnerGLA<?php echo $cstStr; ?> ; } checkedOut->AddState( *(localState[whichOne]) ); globalStates.CheckIn( whichOne, checkedOut ); theseAreOk[whichOne] = 0; segsLeft--; } // Re-initialize the local states for( auto & elem : localState ) { <?php if ($gla->is('resettable')) { ?> elem->Reset(); <?php } else { // if resettable ?> elem.reset(new InnerGLA<?php echo $cstStr; ?> ); <?php } // if not resettable ?> } } void AddState( <?php echo $className; ?> & o ) { // Do nothing } void Finalize() { SplitState & globalStates = constState.segments; if( multiIter != nullptr) delete multiIter; multiFragNo = 0; <?php if ($innerRes == 'fragment') { ?> frag_info fInfo = fragMap[multiFragNo]; multiIter = new Iterator(globalStates.Peek(fInfo.first), multiFragNo, fInfo.second); <?php } else { ?> multiIter = new Iterator(globalStates.Peek(multiFragNo), multiFragNo); <?php } ?> } bool GetNextResult(<?php echo typed_ref_args($output); ?> ) { bool gotResult = false; SplitState & globalStates = constState.segments; while( (multiFragNo < numFrags && multiIter != nullptr) && !gotResult ) { gotResult = multiIter->GetNextResult(<?php echo args($output); ?> ); if( !gotResult ) { multiFragNo++; delete multiIter; if( numFrags > multiFragNo ) { <?php if ($innerRes == 'fragment') { ?> frag_info fInfo = fragMap[multiFragNo]; multiIter = new Iterator(globalStates.Peek(fInfo.first), multiFragNo, fInfo.second); <?php } else { ?> multiIter = new Iterator(globalStates.Peek(multiFragNo), multiFragNo); <?php } ?> } else { multiIter = nullptr; } } } return gotResult; } int GetNumFragments(void) { <?php if ($innerRes == 'fragment') { ?> SplitState & globalStates = constState.segments; numFrags = 0; for (int i = 0; i < NUM_STATES; i++) { int curFrags = globalStates.Peek(i)->GetNumFragments(); for (int curFrag = 0; curFrag < curFrags; curFrag++) { fragMap[numFrags] = frag_info(i, curFrag); numFrags++; } } <?php } else { ?> numFrags = NUM_STATES; <?php } ?> return numFrags; } Iterator * Finalize( int fragment ) { SplitState & globalStates = constState.segments; <?php if ($innerRes == 'fragment') { ?> frag_info info = fragMap[fragment]; return new Iterator(globalStates.Peek(info.first), fragment, info.second); <?php } else { ?> return new Iterator(globalStates.Peek(fragment), fragment); <?php } ?> } bool GetNextResult( Iterator * it, <?php echo typed_ref_args($output); ?> ) { bool ret = it->GetNextResult(<?php echo args($output); ?> ); return ret; } <?php if ($iterable) { ?> bool ShouldIterate( ConstantState & modible ) { modible.pass++; return modible.pass < ConstantState::N_PASSES; } void PostFinalize() { constState.segments.Reset(); } <?php } // iterable ?> <?php if ($gla->is('finite container')) { ?> size_type size() { SplitState & globalStates = constState.segments; size_type s = 0; for( int i = 0; NUM_STATES > i; i++ ) { InnerGLA * ptr = globalStates.Peek(i); s += ptr->size(); } return s; } size_type size(int frag) { SplitState & globalStates = constState.segments; return globalStates.Peek(frag)->size(); } <?php } // if the gla is a container ?> }; typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $system_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'input' => $input, 'output' => $output, 'result_type' => $resType, 'generated_state' => $constState, 'required_states' => $gla->req_states(), 'chunk_boundary' => true, 'configurable' => $gla->configurable(), 'iterable' => $iterable, 'post_finalize' => $iterable, 'intermediates' => true]; }
function Multiplexer(array $t_args, array $inputs, array $outputs) { $className = generate_name('Multiplexer'); if (\count($inputs) == 0) { grokit_assert(array_key_exists('input', $t_args), 'No inputs specified for Multiplexer'); $inputs = $t_args['input']; foreach ($t_args['inputs'] as $name => &$type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Only types may be specified as inputs to Multiplexer.'); } $inputs = ensure_valid_names($inputs, 'multi_input'); } $glas = get_first_key($t_args, ['glas', 0]); grokit_assert(\count($glas) > 0, 'No GLAs specified for Multiplexer.'); $myGLAs = []; $glaInputs = []; $glaOutputs = []; $resultType = 'multi'; $usedOutputs = []; $libraries = []; $glaGenStates = []; $glaReqStates = []; $configurable = false; $constArgs = []; $genStates = []; $reqStates = []; $iterable = null; foreach ($glas as $name => $glaInfo) { grokit_assert(is_array($glaInfo), 'Template argument \'glas\' must be an array'); grokit_assert(array_key_exists('gla', $glaInfo), 'No GLA given for glas[' . $name . ']'); grokit_assert(array_key_exists('inputs', $glaInfo), 'No inputs given for glas[' . $name . ']'); grokit_assert(array_key_exists('outputs', $glaInfo), 'No outputs given for glas[' . $name . ']'); $gla = $glaInfo['gla']; $glaInAtts = $glaInfo['inputs']; $glaOutAtts = $glaInfo['outputs']; grokit_assert(is_gla($gla), 'Non-GLA given for glas[' . $name . '][gla]'); grokit_assert(is_array($glaInAtts), 'Non-array given for inputs for gla ' . $name); grokit_assert(is_array($glaOutAtts), 'Non-array given for outputs for gla ' . $name); $glaInAtts = array_map('strval', $glaInAtts); $glaOutAtts = array_map('strval', $glaOutAtts); $glaName = "innerGLA_" . $name; $glaInputs[$glaName] = []; $glaOutputs[$glaName] = []; foreach ($glaInAtts as $att) { grokit_assert(array_key_exists($att, $inputs), 'Input ' . $att . ' for GLA ' . $name . ' not found in inputs'); $glaInputs[$glaName][$att] = $inputs[$att]; } foreach ($glaOutAtts as $att) { grokit_assert(array_key_exists($att, $outputs), 'Output ' . $att . ' for GLA ' . $name . ' not found in outputs'); grokit_assert(!in_array($att, $usedOutputs), 'Output ' . $att . ' used by multiple GLAs'); $usedOutputs[] = $att; $glaOutputs[$glaName][$att] = $outputs[$att]; } //fwrite(STDERR, "Inputs for GLA " . $glaName . ": " . print_r($glaInputs[$glaName], true) . PHP_EOL ); //fwrite(STDERR, "Outputs for GLA " . $glaName . ": " . print_r($glaOutputs[$glaName], true) . PHP_EOL ); $gla = $gla->apply($glaInputs[$glaName], $glaOutputs[$glaName]); $myGLAs[$glaName] = $gla; $glaRez[$glaName] = get_first_value($gla->result_type(), ['multi', 'single', 'state']); $libraries = array_merge($libraries, $gla->libraries()); if ($glaRez[$glaName] == 'state') { grokit_assert(\count($glaOutputs[$glaName]) == 1, "GLA {$glaName} is produced as state, and thus must have exactly 1 output."); $stateType = lookupType('base::STATE', ['type' => $gla]); $glaOutputs[$glaName] = array_combine(array_keys($glaOutputs[$glaName]), [$stateType]); } else { grokit_assert(\count($glaOutputs[$glaName]) == \count($gla->output()), 'GLA ' . $glaName . ' produces different number of outputs than expected'); $glaOutputs[$glaName] = array_combine(array_keys($glaOutputs[$glaName]), $gla->output()); } // Set types for our output foreach ($glaOutputs[$glaName] as $attName => $type) { $outputs[$attName] = $type; } if (is_null($iterable)) { $iterable = $gla->iterable(); } else { grokit_assert($iterable == $gla->iterable(), 'Multiplexer does not support mixing iterable and non-iterable GLAs'); } $glaReqStates[$glaName] = $gla->req_states(); foreach ($gla->req_states() as $rstate) { $reqStates[] = $rstate; } $glaGenStates[$glaName] = $gla->state(); // TODO: Support constant states grokit_assert(!$gla->has_state(), 'Multiplexer currently does not support constant states.'); } $libraries = array_unique($libraries); $extra = ['glas' => $myGLAs]; ?> class <?php echo $className; ?> { <?php foreach ($myGLAs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // foreach inner gla ?> class Iterator { bool _gotResultsOnce; bool _valid; <?php foreach ($myGLAs as $name => $type) { ?> <?php echo $type; ?> * it_<?php echo $name; ?> ; <?php } // foreach inner gla ?> public: Iterator(void) : _gotResultsOnce(false), _valid(false), <?php echo array_template('it_{key}(nullptr)', ', ', $myGLAs); ?> { } Iterator(<?php echo typed_ref_args($myGLAs); ?> ) : _gotResultsOnce(false), _valid(true), <?php echo array_template('it_{key}(&{key})', ', ', $myGLAs); ?> { <?php foreach ($myGLAs as $name => $type) { if ($glaRez[$name] == 'multi') { ?> <?php echo $name; ?> .Finalize(); <?php } // if inner GLA is multi } // foreach inner gla ?> } Iterator( const Iterator & other) = default; ~Iterator() { } bool GetNextResult( <?php echo typed_ref_args($outputs); ?> ) { FATALIF(!_valid, "Tried to get results from an invalid iterator."); bool ret = !_gotResultsOnce; _gotResultsOnce = true; <?php foreach ($myGLAs as $name => $type) { if ($glaRez[$name] == 'multi') { ?> ret |= it_<?php echo $name; ?> ->GetNextResult(<?php echo args($glaOutputs[$name]); ?> ); <?php } // if inner GLA is multi } // foreach inner gla ?> if( ret ) { <?php foreach ($myGLAs as $name => $type) { if ($glaRez[$name] == 'single') { ?> it_<?php echo $name; ?> ->GetResult(<?php echo args($glaOutputs[$name]); ?> ); <?php } else { if ($glaRez[$name] == 'state') { $stateVar = array_keys($glaOutputs[$name])[0]; $stateType = $glaOutputs[$name][$stateVar]; ?> <?php echo $stateVar; ?> = <?php echo $stateType; ?> (it_<?php echo $name; ?> ); <?php } } // if inner GLA is state } // foreach inner gla ?> } return ret; } }; Iterator multiIterator; public: <?php echo $className; ?> () { } ~<?php echo $className; ?> () { } void AddItem(<?php echo const_typed_ref_args($inputs); ?> ) { // Call AddItem individually on each GLA. <?php foreach ($myGLAs as $gName => $gType) { ?> <?php echo $gName; ?> .AddItem(<?php echo args($glaInputs[$gName]); ?> ); <?php } // foreach inner gla ?> } void AddState( <?php echo $className; ?> & other ) { // Call AddState individually on each GLA. <?php foreach ($myGLAs as $gName => $gType) { ?> <?php echo $gName; ?> .AddState(other.<?php echo $gName; ?> ); <?php } // foreach inner gla ?> } void Finalize() { multiIterator = Iterator(<?php echo args($myGLAs); ?> ); } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } void GetResult(<?php echo typed_ref_args($outputs); ?> ) { Finalize(); GetNextResult(<?php echo args($outputs); ?> ); } <?php foreach (array_keys($myGLAs) as $index => $name) { ?> const <?php echo $myGLAs[$name]; ?> & GetGLA<?php echo $index; ?> () const { return <?php echo $name; ?> ; } <?php } ?> }; <?php return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => $resultType, 'libraries' => $libraries, 'configurable' => $configurable, 'extra' => $extra); }
public function __construct($hash, $name, $value, array &$args, array $targs) { $global = get_default($args, 'global', false); if ($global) { $nameParts = LibraryManager::SplitNamespace($value); $value = $nameParts[\count($nameParts) - 1]; } parent::__construct(InfoKind::T_FUNC, $hash, $name, $value, $args, $targs); grokit_assert(array_key_exists('input', $args), 'Malformed return value from function generator ' . $name . ': No input defined.'); $this->args = $args['input']; grokit_assert(array_key_exists('result', $args), 'Malformed return value from function generator ' . $name . ': No result type defined.'); $this->resultType = $args['result']; if (array_key_exists('deterministic', $args)) { $this->deterministic = $args['deterministic']; } }
function BernoulliSample($t_args, $inputs) { $p = get_first_key_default($t_args, ['p', 0], 0.5); grokit_assert(is_float($p) || is_integer($p), "BernoulliSample: p must be a number in the range [0, 1]"); grokit_assert($p >= 0 && $p <= 1, "BernoulliSample: p must be in the range [0, 1]"); $rng = get_first_key_default($t_args, ['rng'], 'mt19937_64'); $sys_headers = ['random']; // assuming std $libs = []; // assuming std $ns = 'std'; // assuming standard library $cState = lookupResource('base::BernoulliSampleState', ['sys_headers' => $sys_headers, 'libs' => $libs, 'namespace' => $ns]); $name = generate_name('BernoulliSample'); ?> class <?php echo $name; ?> { public: using state_type = <?php echo $cState; ?> ; using rng_type = <?php echo $ns; ?> ::<?php echo $rng; ?> ; using dist_type = <?php echo $ns; ?> ::bernoulli_distribution; private: rng_type rng; dist_type bernoulli; public: const constexpr static double P = <?php echo $p; ?> ; <?php echo $name; ?> (state_type & _state): rng(_state()), bernoulli(P) { } bool Filter( <?php echo const_typed_ref_args($inputs); ?> ) { return bernoulli(rng); } }; <?php return ['kind' => 'GF', 'name' => $name, 'input' => $inputs, 'system_headers' => $sys_headers, 'libraries' => $libs, 'generated_state' => $cState]; }
public function lookupType($name, &$t_args) { if (self::IsNamespaced($name)) { // Look up the type in the inner namespace $parts = self::SplitNamespace($alias); $ns = $parts[0]; $base = $parts[1]; $inner =& getInnerNS($ns); return $inner->lookupType($base, $t_args); } else { // See if we have the type in the cache. // For now, we will only do this for basic types if ($t_args === null) { if (array_key_exists($name, $this->typeCache)) { return $this->typeCache[$name]; } } $fullName = self::JoinNamespace($this->fullName(), $name); // It wasn't in the cache, so see if the function exists // to instantiate it. $php_ns = self::JoinNamespacePHP($this->phpName(), 'Type'); $cpp_ns = self::JoinNamespace($this->fullName(), 'Type'); $func_name = self::JoinNamespacePHP($php_ns, $name); grokit_assert(function_exists($func_name), 'Unable to instantiate type ' . $fullName . ': No corresponding ' . 'function ' . $func_name . ' found'); $args = []; if ($t_args !== null) { $args[] = $t_args; } $info = self::CallGenerator($cpp_ns, $func, $args); // Create a TypeInfo for the type. $tName = self::JoinNamespace($cpp_ns, $info['name']); $dt = new DataType($fullName, $tName, $info); // Store the type in the cache // Only for basic types right now $this->typeCache[$name] = $dt; return $dt; } }
/** * A GLA that determines the distinct values of a dataset. */ function Distinct(array $t_args, array $input, array $output) { grokit_assert(\count($input) == \count($output), 'Distinct must have the same outputs as inputs.'); $outputsToInputs = []; $i = 0; foreach ($input as $name => $type) { $outputsToInputs[array_keys($output)[$i]] = $name; array_set_index($output, $i++, $type); } $useMCT = get_default($t_args, 'use.mct', true); $initSize = get_default($t_args, 'init.size', 65536); $keepHashes = get_default($t_args, 'mct.keep.hashes', false); $fragmentSize = get_default($t_args, 'fragment.size', 100000); $nullCheck = get_default($t_args, 'null.check', false); grokit_assert(is_bool($useMCT), 'Distinct use.mct argument must be boolean'); grokit_assert(is_integer($initSize), 'Distinct init.size argument must be an integer'); grokit_assert($initSize > 0, 'Distinct init.size argument must be positive'); grokit_assert(is_bool($keepHashes), 'Distinct mct.keep.hashes argument must be boolean'); grokit_assert(is_integer($fragmentSize), 'Distinct fragment.size argument must be integral'); grokit_assert($fragmentSize > 0, 'Distinct fragment.size argumenst must be positive'); $nullable = []; if (is_bool($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = $nullCheck; } } else { if (is_array($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = false; } foreach ($nullCheck as $index => $n) { grokit_assert(is_string($n), 'Distinct null.check has invalid value at position ' . $index); grokit_assert(array_key_exists($n, $nullable), 'Distinct null.check has unknown input ' . $n . ' at position ' . $index); $nullable[$n] = true; } } else { grokit_error('Distinct null.check must be boolean or list of inputs to check for nulls'); } } $keepHashesText = $keepHashes ? 'true' : 'false'; $system_headers = ['cinttypes', 'functional', 'vector']; if ($useMCT) { $system_headers[] = 'mct/hash-set.hpp'; $definedSet = "mct::closed_hash_set<Key, HashKey, std::equal_to<Key>, std::allocator<Key>, {$keepHashesText}>"; } else { $system_headers[] = 'unordered_map'; $definedSet = "std::unordered_set<Key, HashKey, std::equal_to<Key>, std::allocator<Key>>"; } $className = generate_name('Distinct'); ?> class <?php echo $className; ?> { public: // Value being placed into the set. struct Key { <?php foreach ($input as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // for each input ?> // Construct the value by copying all of the attributes. Key(<?php echo const_typed_ref_args($input); ?> ) : <?php $first = true; foreach ($input as $name => $type) { ?> <?php echo $first ? ' ' : ','; ?> <?php echo $name; ?> (<?php echo $name; ?> ) <?php $first = false; } // for each input ?> { } bool operator==(const Key & o ) const { return true <?php echo array_template("&& ({key} == o.{key})", ' ', $input); ?> ; } size_t hash_value() const { uint64_t hash = H_b; <?php foreach ($input as $name => $type) { ?> hash = CongruentHash(Hash(<?php echo $name; ?> ), hash); <?php } // for each input ?> return size_t(hash); } }; // Hashing functor for our value struct HashKey { size_t operator()(const Key& o) const { return o.hash_value(); } }; using Set = <?php echo $definedSet; ?> ; // Iterator object used in multi and fragment result types class Iterator { public: using iterator_t = Set::const_iterator; private: iterator_t start; iterator_t end; public: Iterator() : start(), end() { } Iterator( const iterator_t & _start, const iterator_t & _end ) : start(_start), end(_end) { } Iterator( const Iterator & o ) : start(o.start), end(o.end) { } bool GetNextResult(<?php echo typed_ref_args($output); ?> ) { if( start != end ) { <?php foreach ($output as $name => $type) { ?> <?php echo $name; ?> = start-><?php echo $outputsToInputs[$name]; ?> ; <?php } // for each output ?> start++; return true; } else { return false; } } }; private: // Constants static constexpr size_t INIT_SIZE = <?php echo $initSize; ?> ; static constexpr size_t FRAG_SIZE = <?php echo $fragmentSize; ?> ; // Member variables uint64_t count; // Total # tuples seen Set distinct; // Set of distinct values using IteratorList = std::vector<Iterator>; Iterator multiIterator; // Internal iterator for multi result type IteratorList fragments; // Iterator for fragments public: <?php echo $className; ?> () : count(0), distinct(INIT_SIZE), multiIterator(), fragments() { } ~<?php echo $className; ?> () { } void Reset(void) { count = 0; distinct.clear(); } void AddItem(<?php echo const_typed_ref_args($input); ?> ) { count++; <?php foreach ($nullable as $name => $check) { if ($check) { ?> if( IsNull( <?php echo $name; ?> ) ) return; <?php } // if checking for nulls } // foreach input ?> Key key(<?php echo args($input); ?> ); distinct.insert(key); /* auto it = distinct.find(key); if( it == distinct.end() ) { distinct.insert(key); } */ } void AddState( <?php echo $className; ?> & other ) { for( auto & elem : other.distinct ) { distinct.insert(elem); /* auto it = distinct.find(elem); if( it == distinct.end() ) { distinct.insert(elem); } */ } count += other.count; } // Multi interface void Finalize(void) { multiIterator = Iterator(distinct.cbegin(), distinct.cend()); } bool GetNextResult(<?php echo typed_ref_args($output); ?> ) { return multiIterator.GetNextResult(<?php echo args($output); ?> ); } // Fragment interface int GetNumFragments(void) { fragments.clear(); int nFrag = 0; Iterator::iterator_t prev = distinct.cbegin(); Iterator::iterator_t end = distinct.cend(); Iterator::iterator_t next = prev; while( next != end ) { for( size_t i = 0; next != end && FRAG_SIZE > i; i++ ) { next++; } Iterator nIter(prev, next); fragments.push_back(nIter); prev = next; nFrag++; } return nFrag; } Iterator * Finalize(int fragment) { return new Iterator(fragments[fragment]); } bool GetNextResult(Iterator * it, <?php echo typed_ref_args($output); ?> ) { return it->GetNextResult(<?php echo args($output); ?> ); } // General methods uint64_t get_count() const { return count; } uint64_t get_countDistinct() const { return distinct.size(); } const Set & get_distinct() const { return distinct; } }; typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => ['multi', 'fragment'], 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers, 'properties' => ['resettable']]; }
function ConnectedComponents(array $t_args, array $inputs, array $outputs) { // Class name is randomly generated $className = generate_name("CCompGLA"); // Processing of inputs. grokit_assert(count($inputs) == 2, 'Connected Components: 2 inputs expected'); $inputs_ = array_combine(['src', 'dst'], $inputs); // Setting output type $outType = lookupType('int'); $outputs_ = ['node' => $outType, 'component' => $outType]; $outputs = array_combine(array_keys($outputs), $outputs_); $sys_headers = ["vector", "mct/hash-map.hpp"]; $user_headers = []; $lib_headers = []; ?> using namespace std; class <?php echo $className; ?> ; class <?php echo $className; ?> { class UnionFindMap{ private: mct::closed_hash_map<uint64_t, uint64_t>* parent; mct::closed_hash_map<uint64_t, uint64_t> sz; const uint64_t NON_EXISTING_ID = -1; public: // constructor did nothing UnionFindMap(){ parent = new mct::closed_hash_map<uint64_t, uint64_t>(); } uint64_t Find(uint64_t i){ if ((*parent).find(i) == (*parent).end()){ return NON_EXISTING_ID; } // use path compression here while (i != (*parent)[i]){ (*parent)[i] = (*parent)[(*parent)[i]]; i = (*parent)[i]; } return i; } // put merge small tree into higher tree // if disjoint, merge and return false void Union(uint64_t i, uint64_t j){ uint64_t ip = Find(i); uint64_t jp = Find(j); if (ip != NON_EXISTING_ID && jp != NON_EXISTING_ID){// both exists if (ip != jp){ if (sz[ip] < sz[jp]){ (*parent)[ip] = jp; sz[jp] += sz[ip]; }else{ (*parent)[jp] = ip; sz[ip] += sz[jp]; } } }else if(ip == NON_EXISTING_ID && jp == NON_EXISTING_ID){// both new (*parent)[i] = i; sz[i] = 2; (*parent)[j] = i; }else if (jp == NON_EXISTING_ID){ // i exists (*parent)[j] = ip; sz[ip] ++; }else{ (*parent)[i] = jp; sz[jp] ++; } } mct::closed_hash_map<uint64_t, uint64_t>* GetUF(){ return parent; } bool IsEmpty(){ return (*parent).empty(); } uint64_t GetSize(){ return (uint64_t) (*parent).size(); } void SetData(mct::closed_hash_map<uint64_t, uint64_t>* other_data){ parent = other_data; } // void FinalizeRoot(){ for(mct::closed_hash_map<uint64_t, uint64_t>::iterator it = (*parent).begin(); it != (*parent).end(); ++ it){ it->second = Find(it->first); } } void Clear(){ (*parent).clear(); sz.clear(); } ~UnionFindMap(){ delete parent; } }; private: // union-find map data structure, which contains nodeID->compID information UnionFindMap primary_uf; mct::closed_hash_map<uint64_t, uint64_t>::iterator output_iterator, output_iterator_end; bool localFinalized = false; public: <?php echo $className; ?> () {} void AddItem(<?php echo const_typed_ref_args($inputs_); ?> ) { uint64_t src_ = Hash(src); uint64_t dst_ = Hash(dst); primary_uf.Union(src_, dst_); } void AddState(<?php echo $className; ?> &other) { FinalizeLocalState(); other.FinalizeLocalState(); mct::closed_hash_map<uint64_t, uint64_t>* this_state_data = primary_uf.GetUF(); mct::closed_hash_map<uint64_t, uint64_t>* other_state_data = other.primary_uf.GetUF(); if (primary_uf.GetSize() < other.primary_uf.GetSize()){ mct::closed_hash_map<uint64_t, uint64_t>* tmp = this_state_data; this_state_data = other_state_data; other_state_data = tmp; primary_uf.SetData(this_state_data); other.primary_uf.SetData(other_state_data); } assert(primary_uf.GetSize() >= other.primary_uf.GetSize()); UnionFindMap secondary_uf; //go over the other state, and maintain a secondary table for(auto const& entry:(*other_state_data)){ if ((*this_state_data).count(entry.first) == 1){// key exists in this state uint64_t this_comp_id = (*this_state_data)[entry.first]; if (this_comp_id != entry.second) // merge needed secondary_uf.Union(this_comp_id, entry.second); }else{ (*this_state_data)[entry.first] = entry.second; } } // check if side table empty if (secondary_uf.IsEmpty()){ return; } // apply the side table secondary_uf.FinalizeRoot(); mct::closed_hash_map<uint64_t, uint64_t>* secondary_state_data = secondary_uf.GetUF(); for (auto& p:(*this_state_data)){ if ((*secondary_state_data).find(p.second) != (*secondary_state_data).end()){ p.second = (*secondary_state_data)[p.second]; } } } void FinalizeLocalState(){ if (!localFinalized){ primary_uf.FinalizeRoot(); localFinalized = true; } } void Finalize(){ output_iterator = primary_uf.GetUF()->begin(); output_iterator_end = primary_uf.GetUF()->end(); } bool GetNextResult(<?php echo typed_ref_args($outputs_); ?> ) { if (output_iterator != output_iterator_end){ node = output_iterator->first; component = output_iterator->second; ++ output_iterator; return true; }else{ return false; } } }; <?php return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi']; }
function STATE(array $t_args) { $type = get_first_key($t_args, ['type', '0']); grokit_assert(is_gla($type), 'Template argument to STATE must be a valid GLA.'); $type = $type->lookup(); $gContent = ''; $functions = []; $methods = []; $className = generate_name('STATE_'); ?> /** Type definition for generic GLA states. This type is only used to trannsport states withing the same memory space between operators. The object the state points to MUST be treated like a const. Note: this type cannot be read from the disk or written to the output. A different mechanism will be used for that. The type in the object must be a hash of the name of the class used to encode the object. Any function that assumes a certain type must explicitly verify the correctness of the type. The object can be manipulated like a basic datatype. STATE objects do not know how to deallocate the memory they use. Other mechanisms have to be used to ensure correct deallocation (acknowledgements of data packets that contain this as members). **/ class <?php echo $className; ?> { public: typedef <?php echo $type; ?> * pointer_type; typedef uint64_t hash_type; private: pointer_type object; hash_type type; public: <?php echo $className; ?> (): object(nullptr), type(0) {} <?php echo $className; ?> (pointer_type _object): object(_object), type(<?php echo $type->cHash(); ?> ) {} pointer_type GetObject() const { FATALIF(type != <?php echo $type->cHash(); ?> , "STATE contains incorrect type!"); return object; } <?php $methods[] = ['IsNull', [], 'BASE::BOOL', true]; ?> bool IsNull() const { return object == nullptr; } /** no destructor. object should not be deallocated here */ }; <?php ob_start(); ?> <?php $functions[] = ['IsNull', ['@type'], 'BASE::BOOL', true, true]; ?> inline bool IsNull( const @type & d ) { return d.IsNull(); } <?php $gContent .= ob_get_clean(); ?> <?php return array('kind' => 'TYPE', 'name' => $className, "complex" => false, 'extras' => ['type' => $type], 'properties' => ['__state__'], 'global_content' => $gContent, 'methods' => $methods, 'functions' => $functions); }
public function CallGenerator($name, &$args, $func, &$buffer) { $name_parts = explode(self::NS_SEP, $name); $ns_parts = array_map('strtoupper', explode(self::NS_SEP, $name, -1)); $name_end = $name_parts[\count($name_parts) - 1]; $ns = \implode(self::NS_SEP, $ns_parts); $name = self::JoinNamespace($ns, $name_end); if (is_string($func)) { grokit_assert(function_exists($func), 'Unable to generate ' . $name . ', no function named ' . $func . ' exists'); } grokit_logic_assert(is_callable($func), 'Unable to generate ' . $name . ', value passed as function is not callable!'); $preamble = ''; $postamble = ''; foreach ($ns_parts as $part) { $preamble .= 'namespace ' . $part . '{' . PHP_EOL; $postamble .= PHP_EOL . '} // end namespace ' . $part . PHP_EOL; } ob_start(); $res = call_user_func_array($func, $args); $cont = ob_get_clean(); if (is_typeinfo($res)) { return $res; } // Ensure the return value was correct and create the info object grokit_assert(is_array($res), 'Malformed return value from generator for ' . $name . ', got ' . gettype($res) . ' instead of an array'); grokit_assert(array_key_exists('kind', $res), 'Malformed return value from generator for ' . $name . ', no kind present.'); $tName = $name; if (array_key_exists('name', $res)) { // They gave us back a name, so append it to the namespace. $tName = self::JoinNamespace($ns, $res['name']); } $res['name'] = $tName; if (array_key_exists('system_headers', $res)) { $sysHeaders = $res['system_headers']; foreach ($sysHeaders as $h) { $this->addHeader('<' . $h . '>'); } } if (array_key_exists('user_headers', $res)) { $sysHeaders = $res['user_headers']; foreach ($sysHeaders as $h) { $this->addHeader('"' . $h . '"'); } } if (array_key_exists('lib_headers', $res)) { $libHdrs = $res['lib_headers']; foreach ($libHdrs as $h) { if (\count(explode('\\', $h)) == 1) { // No library specified, assume current $h = implode('\\', $ns_parts) . '\\' . $h; } $this->addLibHeader($h); } } $buffer .= $preamble; $buffer .= $cont; $buffer .= $postamble; if (array_key_exists('global_content', $res)) { $gCont = str_replace('@type', $tName, $res['global_content']); $buffer .= PHP_EOL . $gCont . PHP_EOL; } return $res; }
/** * A fixed-size typed array view on top of memory. The view is read-only * * This is used to prevent copying of data when extracting from a column. */ function FixedArrayView(array $t_args) { $constructors = []; $methods = []; $functions = []; $globalContent = ''; grokit_assert(array_key_exists('type', $t_args), 'FixedArrayView: No type given.'); grokit_assert(array_key_exists('size', $t_args), 'FixedArrayView: No size given'); $type = $t_args['type']; $size = $t_args['size']; if (is_array($type)) { $type = call_user_func_array('lookupType', $type); } else { $type = $type->lookup(); } grokit_assert(is_datatype($type), 'arrayView: [type] argument must be a valid datatype.'); grokit_assert($type->isFixedSize(), 'FixedArray: variable-sized types not supported'); grokit_assert(is_int($size), 'FixedArrayView: [size] argument must be an integer'); grokit_assert($size > 0, 'FixedArrayView: [size] arugment must be a positive number.'); $className = generate_name('FixedArrayView_' . $size . '_'); ?> struct <?php echo $className; ?> { using value_type = <?php echo $type; ?> ; using size_type = std::size_t; using difference_type = std::ptrdiff_t; using reference = value_type &; using const_reference = const value_type &; using pointer = value_type *; using const_pointer = const value_type *; using iterator = value_type *; using const_iterator = const value_type *; using reverse_iterator = std::reverse_iterator<iterator>; using const_reverse_iterator = std::reverse_iterator<const_iterator>; static constexpr const size_type SIZE = <?php echo $size; ?> ; const_pointer __elems_; <?php echo $className; ?> (): __elems_(nullptr) { } // Constructor from externally managed memory <?php echo $className; ?> (const_pointer ptr): __elems_(ptr) { } // Default copy and move constructors/assignment <?php echo $className; ?> (const <?php echo $className; ?> &other) = default; <?php echo $className; ?> & operator=(const <?php echo $className; ?> &other) = default; <?php echo $className; ?> (<?php echo $className; ?> &&other) = default; <?php echo $className; ?> & operator=(<?php echo $className; ?> &&other) = default; /***** Element Access *****/ <?php $methods[] = ['at', ['base::BIGINT'], $type->value(), true]; ?> const_reference at( size_type pos ) const { if( size() <= pos ) { std::ostringstream ss; ss << "Element access out of range:" << " size=" << size() << " index=" << pos; throw std::out_of_range(ss.str()); } return __elems_[pos]; } const_reference operator[]( size_type pos ) const { return __elems_[pos]; } <?php $methods[] = ['front', [], $type->value(), true]; ?> const_reference front() const { return __elems_[0]; } <?php $methods[] = ['back', [], $type->value(), true]; ?> const_reference back() const { return __elems_[SIZE-1]; } const_pointer data() const noexcept { return __elems_; } /***** Iterators *****/ const_iterator cbegin() const noexcept { return __elems_; } const_iterator begin() const noexcept { return cbegin(); } const_iterator cend() const noexcept { return __elems_ + size(); } const_iterator end() const noexcept { return cend(); } const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(cend()); } const_reverse_iterator rbegin() const noexcept { return crbegin(); } const_reverse_iterator crend() const noexcept { return const_reverse_iterator(cbegin()); } const_reverse_iterator rend() const noexcept { return crend(); } /***** Capacity *****/ <?php $methods[] = ['empty', [], 'base::bool', true]; ?> bool empty() const noexcept { return SIZE == 0; } <?php $methods[] = ['size', [], 'base::BIGINT', true]; ?> size_type size() const noexcept { return SIZE; } size_type max_size() const noexcept { return SIZE; } /***** Operations *****/ void swap( <?php echo $className; ?> & other ) noexcept { std::swap( __elems_, other.__elems_ ); } /***** EXTENTIONS *****/ void from_memory( const_pointer mem ) { __elems_ = mem; } }; <?php ob_start(); ?> inline bool operator == ( const @type & lhs, const @type & rhs ) { // Fast-track for views referring to the same memory if (lhs.__elems_ == rhs.__elems_) return true; for( @type::size_type i = 0; i < @type::SIZE; i++ ) { if( lhs[i] != rhs[i] ) return false; } return true; } inline bool operator != ( const @type & lhs, const @type & rhs ) { // Fast-track for views referring to the same memory if (lhs.__elems_ == rhs.__elems_) return false; for( @type::size_type i = 0; i < @type::SIZE; i++ ) { if( lhs[i] != rhs[i] ) return true; } return false; } inline bool operator < ( const @type & lhs, const @type & rhs ) { return std::lexicographical_compare(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend()); } inline bool operator > ( const @type & lhs, const @type & rhs ) { return rhs < lhs; } inline bool operator <= ( const @type & lhs, const @type & rhs ) { return !(lhs > rhs); } inline bool operator >=( const @type & lhs, const @type & rhs ) { return !(lhs < rhs); } // ostream operator for easier debugging. template<class CharT, class Traits = std::char_traits<CharT>> std::basic_ostream<CharT, Traits>& operator << ( std::basic_ostream<CharT, Traits> & os, const @type s ) { std::ostringstream ss; bool first = true; ss << "["; for( const auto & elem : s ) { if( first ) { first = false; } else { ss << ", "; } ss << elem; } ss << "]"; os << ss.str(); return os; } template<> inline std::size_t SizeFromBuffer<@type>(const char *buffer) { return @type::SIZE * sizeof(@type::value_type); } template<> inline std::size_t SerializedSize(const @type& from) { return @type::SIZE * sizeof(@type::value_type); } template<> inline std::size_t Serialize(char *buffer, const @type &from) { @type::pointer ptr = reinterpret_cast<@type::pointer>(buffer); std::copy(from.cbegin(), from.cend(), ptr); return SerializedSize(from); } template<> inline std::size_t Deserialize(const char *buffer, @type &dest) { @type::const_pointer ptr = reinterpret_cast<@type::const_pointer>(buffer); dest.from_memory(ptr); return SizeFromBuffer<@type>(buffer); } inline void ToJson( const @type & src, Json::Value & dest ) { dest = Json::Value(Json::arrayValue); for( @type::const_reference elem : src ) { Json::Value tmp; ToJson( elem, tmp ); dest.append(tmp); } } inline int ToString( const @type & x, char * buffer ) { <?php if ($size > 0) { ?> char * start = buffer; char * current = start; for( const auto & val : x ) { current += ToString( val, current ); // Replace null with space *(current-1) = ' '; } // Replace final comma with null *(current-1) = '\0'; return current - start; <?php } else { // if size > 0 ?> buffer[0] = '\0'; return 1; <?php } // if size == 0 ?> } <?php $functions[] = ['Hash', ['@type'], 'BASE::BIGINT', true, true]; ?> template<> inline uint64_t Hash( const @type & val ) { uint64_t hashVal = H_b; for( @type::const_reference elem : val ) { hashVal = CongruentHash(Hash(elem), hashVal); } return hashVal; } namespace std { #ifdef _HAS_STD_HASH // C++11 STL-compliant hash struct specialization template <> class hash<@type> { public: size_t operator () (const @type& key) const { return Hash(key); } }; #endif // _HAS_STD_HASH // std::swap specializations inline void swap( @type& lhs, @type& rhs ) { lhs.swap(rhs); } } <?php $globalContent .= ob_get_clean(); ?> <?php $innerDesc = function ($var, $myType) use($type) { $describer = $type->describer('json'); ?> <?php echo $var; ?> ["size"] = Json::Int64(<?php echo $myType; ?> ::SIZE); <?php $innerVar = "{$var}[\"inner_type\"]"; $describer($innerVar, $type); }; $sys_headers = ['iterator', 'algorithm', 'stdexcept', 'utility', 'cstdint', 'cstddef', 'iostream', 'sstream', 'cstring']; $user_headers = ['Config.h']; $extras = ['size' => $size, 'type' => $type]; $sizeBytes = $size * $type->get('size.bytes'); $extras['size.bytes'] = $sizeBytes; return ['kind' => 'TYPE', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'constructors' => $constructors, 'methods' => $methods, 'functions' => $functions, 'binary_operators' => ['==', '!=', '<', '>', '<=', '>='], 'global_content' => $globalContent, 'complex' => "ColumnIterator<@type, 0, {$sizeBytes}>", 'properties' => ['container', 'sequence', 'array-view'], 'extras' => $extras, 'describe_json' => DescribeJson('array', $innerDesc)]; }
function SelectionGenerate($wpName, $queries, $attMap) { //echo PHP_EOL . '/*' . PHP_EOL; //print_r($wpName); //print_r($queries); //print_r($attMap); //echo PHP_EOL . '*/' . PHP_EOL; ?> // module specific headers to allow separate compilation #include "GLAData.h" #include "Errors.h" //+{"kind":"WPF", "name":"Pre-Processing", "action":"start"} extern "C" int SelectionPreProcessWorkFunc_<?php echo $wpName; ?> (WorkDescription& workDescription, ExecEngineData& result) { SelectionPreProcessWD myWork; myWork.swap(workDescription); QueryExitContainer& queries = myWork.get_whichQueryExits(); QueryToGLASContMap & requiredStates = myWork.get_requiredStates(); QueryToGLAStateMap constStates; <?php cgDeclareQueryIDs($queries); ?> <?php foreach ($queries as $query => $info) { $gf = $info['gf']; if (!is_null($gf) && $gf->has_state()) { $state = $gf->state(); if ($state->configurable()) { $carg = $info['cargs']; echo ' // JSON Configuration for query ' . queryName($query) . PHP_EOL; $carg->init(); echo PHP_EOL; } // if gf const state is configurable } // if gf has state } //foreach query ?> FOREACH_TWL(iter, queries) { <?php foreach ($queries as $query => $val) { ?> if( iter.query == <?php echo queryName($query); ?> ) { <?php if ($val['gf'] !== null) { // This is a generalized filter $gf = $val['gf']; $given_states = $val['states']; if ($gf->has_state()) { $cstArgs = []; $state = $gf->state(); // If the state is configurable, give it the JSON carg if ($state->configurable()) { $carg = $query['cargs']; $cstArgs[] = $carg->name(); } // if gf state is configurable if (\count($given_states) > 0) { ?> FATALIF(!requiredStates.IsThere(<?php echo queryName($query); ?> ), "No required states received for query that declared required states"); GLAStateContainer& givenStates = requiredStates.Find(<?php echo queryName($query); ?> ); givenStates.MoveToStart(); GLAPtr reqTemp; <?php foreach ($givenStates as $gs) { $cstArgs[] = $gs->name(); ?> // Extract state from waypoint[<?php echo $gs->waypoint(); ?> ] <?php echo $gs->type(); ?> * <?php echo $gs->name(); ?> = nullptr; reqTemp.Swap(givenStates.Current()); FATALIF( reqTemp.get_glaType() != <?php echo $gs->type()->cHash(); ?> , "Got different type than expected for required state of type <?php echo $gs > type(); ?> "); <?php echo $gs->name(); ?> = (<?php echo $gs->type(); ?> *) reqTemp.get_glaPtr(); reqTemp.swap(givenStates.Current()); givenStates.Advance(); <?php } // foreach given state } // if we have given states $cstStr = \count($cstArgs) > 0 ? '(' . implode(', ', $cstArgs) . ')' : ''; ?> <?php echo $state; ?> * temp = new <?php echo $state; echo $cstStr; ?> ; GLAPtr newPtr( <?php echo $state->cHash(); ?> , (void *) temp ); QueryID qryID = <?php echo queryName($query); ?> ; constStates.Insert(qryID, newPtr); <?php } // if gf has state } // if( $val['gf'] !== null ) ?> } // if <?php echo queryName($query); ?> is current query <?php } // foreach query ?> } END_FOREACH; SelectionPreProcessRez myRez( constStates ); myRez.swap(result); return WP_PREPROCESSING; // for PreProcess } //+{"kind":"WPF", "name":"Pre-Processing", "action":"end"} //+{"kind":"WPF", "name":"Process Chunk", "action":"start"} extern "C" int SelectionProcessChunkWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { // go to the work description and get the input chunk SelectionProcessChunkWD myWork; myWork.swap (workDescription); Chunk &input = myWork.get_chunkToProcess (); QueryToGLAStateMap& constStates = myWork.get_constStates(); PROFILING2_START; QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits ()); <?php cgDeclareQueryIDs($queries); cgAccessColumns($attMap, 'input', $wpName); // Declare the constants needed by the filters and synth expressions. foreach ($queries as $query => $val) { ?> // Constants for query <?php echo queryName($query); ?> : <?php $filters = $val['filters']; $synths = $val['synths']; cgDeclareConstants($filters); cgDeclareConstants($synths); } // foreach query ?> // prepare bitstring iterator Column inBitCol; BStringIterator queries; input.SwapBitmap (queries); // creating storage for syhthesized attributes <?php foreach ($queries as $query => $val) { $synList = $val['synths']; foreach ($synList as $att => $syn) { ?> MMappedStorage <?php echo attStorage($att); ?> ; Column <?php echo attCol($att); ?> (<?php echo attStorage($att); ?> ); <?php echo attIteratorType($att); ?> colI_<?php echo $att; ?> (<?php echo attCol($att); ?> ); <?php echo attType($att); ?> <?php echo $att; ?> ; <?php } // foreach synthesized attribute } // foreach query ?> <?php foreach ($queries as $query => $val) { $givenStates = $val['states']; $gf = $val['gf']; $cargs = $val['cargs']; grokit_assert($gf !== null || count($givenStates) == 0, 'We were given states for query ' . $query . ' when we have no GF!'); if (!is_null($gf) && $gf->has_state()) { $state = $gf->state(); $stateName = 'cst_state_' . queryName($query); $constMod = $state->mutable() ? '' : 'const '; ?> // Extracting constant state for query <?php echo queryName($query); ?> FATALIF(!constStates.IsThere(<?php echo queryName($query); ?> ), "No constant state found for query <?php echo queryName($query); ?> ."); <?php echo $constMod; echo $state; ?> * <?php echo $stateName; ?> = nullptr; { GLAState& curState = constStates.Find(<?php echo queryName($query); ?> ); GLAPtr tmp; tmp.swap(curState); FATALIF( tmp.get_glaType() != <?php echo $state->cHash(); ?> , "Got different type than expected for constant state of type <?php echo $state; ?> "); <?php echo $stateName; ?> = (<?php echo $constMod; echo $state; ?> *) tmp.get_glaPtr(); tmp.swap(curState); } <?php } // if gf requires constant state if ($gf !== null) { $ctrArgs = []; if ($gf->configurable()) { echo ' // JSON initialiser for query ' . queryName($query) . PHP_EOL; $cargs->init(); echo PHP_EOL; $ctrArgs[] = $cargs->name(); } if ($gf->has_state()) { $ctrArgs[] = '*' . $stateName; } $ctrStr = \count($ctrArgs) > 0 ? '(' . implode(', ', $ctrArgs) . ')' : ''; ?> // Construct GF for query <?php echo queryName($query); ?> <?php echo $gf->value(); ?> <?php echo queryName($query); ?> _state<?php echo $ctrStr; ?> ; <?php } // if we have a GF } // foreach query ?> MMappedStorage bitStore; Column outBitCol(bitStore); BStringIterator outQueries (outBitCol, queriesToRun); #ifdef PER_QUERY_PROFILE <?php foreach ($queries as $query => $val) { ?> int64_t n_tuples_<?php echo queryName($query); ?> = 0; <?php } // foreach query ?> #endif // PER_QUERY_PROFILE int64_t numTuples = 0; while (!queries.AtEndOfColumn ()) { ++numTuples; QueryIDSet qry; qry = queries.GetCurrent(); qry.Intersect(queriesToRun); queries.Advance(); //selection code for all the predicates <?php cgAccessAttributes($attMap); foreach ($queries as $query => $val) { $gf = $val['gf']; $filters = $val['filters']; $synths = $val['synths']; $stateName = queryName($query) . '_state'; $filterVals = array_map(function ($expr) { return '(' . $expr . ')'; }, $filters); if ($gf === null) { // Simple set of expressions. if (\count($filterVals) > 0) { $selExpr = implode(' && ', $filterVals); } else { $selExpr = 'true'; } } else { // We have a GF $selExpr = "{$stateName}.Filter(" . implode(', ', $filterVals) . ")"; } ?> // do <?php echo queryName($query); ?> : <?php foreach ($synths as $att => $syn) { ?> <?php echo attType($att); ?> <?php echo $att; ?> ; <?php } // foreach synthesized attribute ?> if( qry.Overlaps(<?php echo queryName($query); ?> ) ) { #ifdef PER_QUERY_PROFILE ++numTuples_<?php echo queryName($query); ?> ; #endif // PER_QUERY_PROFILE <?php cgDeclarePreprocessing($filters, 2); ?> if( <?php echo $selExpr; ?> ) { // compute synthesized <?php cgDeclarePreprocessing($synths, 3); foreach ($synths as $att => $expr) { ?> <?php echo $att; ?> = <?php echo $expr; ?> ; <?php } //foreach synthesized attribute ?> } else { qry.Difference(<?php echo queryName($query); ?> ); } } <?php foreach ($synths as $att => $syn) { ?> colI_<?php echo $att; ?> .Insert(<?php echo $att; ?> ); colI_<?php echo $att; ?> .Advance(); <?php } // foreach synthesized attribute } // foreach query ?> outQueries.Insert(qry); outQueries.Advance(); <?php cgAdvanceAttributes($attMap); ?> } // while we still have tuples remaining // finally, if there were any results, put the data back in the chunk <?php cgPutbackColumns($attMap, 'input', $wpName); foreach ($queries as $query => $val) { $synths = $val['synths']; ?> if (<?php echo queryName($query); ?> .Overlaps(queriesToRun)) { <?php foreach ($synths as $att => $expr) { ?> colI_<?php echo $att; ?> .Done(<?php echo attCol($att); ?> ); input.SwapColumn(<?php echo attCol($att); ?> , <?php echo attSlot($att); ?> ); <?php } //foreach synthesized attribute ?> } // If <?php echo queryName($query); ?> overlaps queriesToRun <?php } // foreach query ?> // put in the output bitmap outQueries.Done (); input.SwapBitmap (outQueries); // Finish performance counters PROFILING2_END; PCounterList counterList; PCounter totalCnt("tpi", numTuples, "<?php echo $wpName; ?> "); counterList.Append(totalCnt); PCounter tplOutCnt("tpo", numTuples, "<?php echo $wpName; ?> "); counterList.Append(tplOutCnt); #ifdef PER_QUERY_PROFILE <?php foreach ($queries as $query => $val) { $filters = $val['filters']; ?> if( <?php echo queryName($query); ?> .Overlaps(queriesToRun)) { PCounter cnt("<?php echo queryName($query); ?> ", numTuples_<?php echo queryName($query); ?> , "<?php echo $wpName; ?> "); counterList.Append(cnt); } <?php } // foreach query ?> #endif // PER_QUERY_PROFILE PROFILING2_SET(counterList, "<?php echo $wpName; ?> "); ChunkContainer tempResult (input); tempResult.swap (result); return WP_PROCESS_CHUNK; // For Process Chunk } //+{"kind":"WPF", "name":"Process Chunk", "action":"end"} <?php }
function Max(array $t_args, array $input, array $output) { grokit_assert(\count($output) >= 1, 'Max GLA produces at least one output!'); grokit_assert(\count($output) == \count($input), 'Max GLA should have the same number of inputs and outputs'); $nValues = \count($output); $inputNames = array_keys($input); $outputNames = array_keys($output); // Outputs should be the same type as the inputs for ($index = 0; $index < $nValues; $index++) { array_set_index($output, $index, array_get_index($input, $index)); } $name = generate_name('Max_'); ?> class <?php echo $name; ?> { uintmax_t count; <?php foreach ($output as $k => $v) { ?> <?php echo $v; ?> _<?php echo $k; ?> ; <?php } // foreach output ?> public: <?php echo $name; ?> () : <?php foreach ($output as $k => $v) { ?> _<?php echo $k; ?> (), <?php } // foreach output ?> count(0) { } void AddItem( <?php echo const_typed_ref_args($input); ?> ) { if( count > 0 ) { <?php for ($index = 0; $index < $nValues; $index++) { ?> _<?php echo $outputNames[$index]; ?> = std::max(_<?php echo $outputNames[$index]; ?> , <?php echo $inputNames[$index]; ?> ); <?php } // foreach value ?> } else { <?php for ($index = 0; $index < $nValues; $index++) { ?> _<?php echo $outputNames[$index]; ?> = <?php echo $inputNames[$index]; ?> ; <?php } // foreach value ?> } count++; } void AddState( <?php echo $name; ?> & o ) { if (count > 0 && o.count > 0) { <?php for ($index = 0; $index < $nValues; $index++) { ?> _<?php echo $outputNames[$index]; ?> = std::max(_<?php echo $outputNames[$index]; ?> , o._<?php echo $outputNames[$index]; ?> ); <?php } // foreach value ?> } else if(o.count > 0) { // count == 0 <?php for ($index = 0; $index < $nValues; $index++) { ?> _<?php echo $outputNames[$index]; ?> = o._<?php echo $outputNames[$index]; ?> ; <?php } // foreach value ?> } // Otherwise, count > 0 && o.count == 0, so just keep our values count += o.count; } void GetResult(<?php echo typed_ref_args($output); ?> ) { <?php foreach ($output as $k => $v) { ?> <?php echo $k; ?> = _<?php echo $k; ?> ; <?php } // foreach output ?> } }; <?php return ['kind' => 'GLA', 'name' => $name, 'input' => $input, 'output' => $output, 'result_type' => 'single', 'system_headers' => ['algorithm', 'cstdint']]; }
public function absorbInfo($info) { grokit_assert(is_object($info), "Called absorbInfo on non-object: " . print_r($info, true)); $this->addLibs($info->libraries()); }
function GroupBy(array $t_args, array $inputs, array $outputs, array $states) { // Ensure we have valid inputs. if (\count($inputs) == 0) { // No inputs given, try to get them from template arguments. grokit_assert(array_key_exists('input', $t_args), 'No inputs given for GroupBy'); $inputs = $t_args['input']; if (!is_array($inputs)) { $inputs = [$inputs]; } foreach ($inputs as $name => &$type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Invalid type given for input ' . $name); } } grokit_assert(array_key_exists('group', $t_args), 'No groups specified for GroupBy'); $gbyAttMap = $t_args['group']; grokit_assert(is_array($gbyAttMap), 'Invalid value given for groups, expected an expression name or list of expression names'); $gbyAttMap = array_map('strval', $gbyAttMap); $gbyAttNames = array_keys($gbyAttMap); foreach ($gbyAttMap as $in => $out) { grokit_assert(array_key_exists($in, $inputs), 'Group ' . $in . ' not present in input'); grokit_assert(array_key_exists($out, $outputs), 'Output Attribute ' . $out . ' for group ' . $in . ' not found in outputs'); } $numGByAtts = \count($gbyAttNames); grokit_assert(array_key_exists('aggregate', $t_args), 'No aggregate specified for GroupBy'); $innerGLA = $t_args['aggregate']; grokit_assert(is_gla($innerGLA), 'Non-GLA specified as aggregate for GroupBy'); $debug = get_default($t_args, 'debug', 0); $init_size = get_default($t_args, 'init.size', 1024); $use_mct = get_default($t_args, 'use.mct', true); $keepHashes = get_default($t_args, 'mct.keep.hashes', false); grokit_assert(is_bool($keepHashes), 'GroupBy mct.keep.hashes argument must be boolean'); // determine the result type $use_fragments = get_default($t_args, 'use.fragments', true); $resType = $use_fragments ? ['fragment', 'multi'] : ['multi']; $fragSize = get_default($t_args, 'fragment.size', 2000000); // Always support state $resType[] = 'state'; // Class name randomly generated $className = generate_name("GroupBy"); // instantiate the inner GLA. input/output is derived from the main input/output $gbyAtts = []; $gbyAttsOut = []; $glaInputAtts = []; $glaOutputAtts = []; foreach ($inputs as $name => $type) { if (in_array($name, $gbyAttNames)) { $gbyAtts[$name] = $type; $gbyAttsOut[$gbyAttMap[$name]] = $type; $outputs[$gbyAttMap[$name]] = $type; } else { $glaInputAtts[$name] = $type; } } foreach ($outputs as $name => $type) { if (!in_array($name, $gbyAttMap)) { $glaOutputAtts[$name] = $type; } } $innerGLA = $innerGLA->apply($glaInputAtts, $glaOutputAtts, $states); $libraries = $innerGLA->libraries(); $innerRes = get_first_value($innerGLA->result_type(), ['multi', 'single', 'state']); if ($innerRes == 'state') { // If the result type is state, the only output is a state object // containing the GLA. $outputName = array_keys($glaOutputAtts)[0]; $innerOutputs = [$outputName => lookupType('base::STATE', ['type' => $innerGLA])]; } else { $innerOutputs = $innerGLA->output(); grokit_assert(\count($innerOutputs) == \count($glaOutputAtts), 'Expected ' . \count($glaOutputAtts) . ' outputs fromm Inner GLA, got ' . \count($innerOutputs)); } $constState = lookupResource('GroupByState', ['gla' => $innerGLA, 'groups' => $gbyAtts, 'debug' => $debug]); // constructor argumetns are inherited from inner GLA $configurable = $innerGLA->configurable(); $reqStates = $innerGLA->req_states(); // We need to specially create the constructor string because apparently // declaring Type Name(); is a function declaration instead of a variable // declaration for some reason. $constructorParts = []; if ($configurable) { $constructorParts[] = 'jsonInit'; } if ($innerGLA->has_state()) { $constructorParts[] = 'innerState'; } $constructorString = \count($constructorParts) > 0 ? '(' . implode(', ', $constructorParts) . ')' : ''; // add the outputs we got from the gla foreach ($innerOutputs as $name => $type) { grokit_assert(array_key_exists($name, $outputs), 'Inner GLA\'s outputs refer to unknown attribute ' . $name); grokit_assert($type !== null, 'GroupBy Inner GLA left output ' . $name . ' with no type'); $outputs[$name] = $type; } $iterable = $innerGLA->iterable(); // need to keep track of system includes needed $extraHeaders = array(); $allocatorText = "std::allocator<std::pair<const Key, {$innerGLA}> >"; if ($use_mct) { $keepHashesText = $keepHashes ? 'true' : 'false'; $extraHeaders[] = "mct/hash-map.hpp"; $map = "mct::closed_hash_map<Key, {$innerGLA}, HashKey, std::equal_to<Key>, {$allocatorText}, {$keepHashesText}>"; $mapType = 'mct::closed_hash_map'; } else { $extraHeaders[] = "unordered_map"; $map = "std::unordered_map<Key, {$innerGLA}, HashKey, std::equal_to<Key>, {$allocatorText}>"; $mapType = 'std::unordered_map'; } if ($debug > 0) { $extraHeaders[] = 'cstdio'; } ?> class <?php echo $className; ?> { public: using ConstantState = <?php echo $constState; ?> ; <?php if ($innerGLA->has_state()) { ?> using InnerState = ConstantState::InnerState; <?php } // if gla has state ?> using Key = ConstantState::Key; using HashKey = ConstantState::HashKey; using InnerGLA = <?php echo $innerGLA; ?> ; typedef <?php echo $map; ?> MapType; static const size_t INIT_SIZE = <?php echo $init_size; ?> ; public: class Iterator { MapType::iterator it; // current value MapType::iterator end; // last value in the fragment public: Iterator() { } Iterator(MapType::iterator _it, MapType::iterator _end): it(_it), end(_end) { if( it != end ) { <?php switch ($innerRes) { case 'multi': ?> it->second.Finalize(); <?php break; case 'state': if ($innerGLA->finalize_as_state()) { ?> it->second.FinalizeState(); <?php } // if we need to finalize as a state break; } // end switch inner restype ?> } } bool GetNextResult( <?php echo typed_ref_args($outputs); ?> ) { bool gotResult = false; while( it != end && !gotResult ) { <?php echo $innerGLA; ?> & gla = it->second; <?php foreach ($gbyAttMap as $in => $out) { ?> <?php echo $out; ?> = it->first.<?php echo $in; ?> ; <?php } // foreach grouping attribute ?> <?php switch ($innerRes) { case 'multi': ?> gotResult = gla.GetNextResult( <?php echo args($innerOutputs); ?> ); if( !gotResult ) { ++it; if( it != end ) { it->second.Finalize(); } } <?php break; case 'single': ?> gotResult = true; gla.GetResult(<?php echo args($innerOutputs); ?> ); ++it; <?php break; case 'state': reset($innerOutputs); // Assuming that $innerOutputs contains a single value that is // the state type. $oName = key($innerOutputs); $oType = current($innerOutputs); ?> gotResult = true; <?php echo $oName; ?> = <?php echo $oType; ?> ( &gla ); ++it; <?php } // switch inner result type ?> } return gotResult; } }; private: const ConstantState & constState; <?php if ($configurable) { ?> const Json::Value jsonInit; <?php } // if configurable ?> size_t count; MapType groupByMap; std::vector<MapType::iterator> theIterators; // the iterators, only 2 elements if multi, many if fragment Iterator multiIterator; public: <?php echo $className; ?> (<?php if ($configurable) { ?> const Json::Value & _jsonInit, <?php } ?> const ConstantState & _constState ) : constState(_constState) <?php if ($configurable) { ?> , jsonInit(_jsonInit) <?php } // if configurable ?> , count(0) , groupByMap( INIT_SIZE ) , theIterators() , multiIterator() { } ~<?php echo $className; ?> () {} void Reset(void) { count = 0; groupByMap.clear(); theIterators.clear(); } void AddItem(<?php echo array_template('const {val} & {key}', ', ', $inputs); ?> ) { count++; // check if _key is already in the map; if yes, add _value; else, add a new // entry (_key, _value) Key key(<?php echo array_template('{key}', ', ', $gbyAtts); ?> ); MapType::iterator it = groupByMap.find(key); if (it == groupByMap.end()) { // group does not exist // create an empty GLA and insert // better to not add the item here so we do not have // to transport a large state <?php if ($innerGLA->has_state()) { ?> const InnerState & innerState = constState.getConstState(key); <?php } // if gla has state ?> InnerGLA gla<?php echo $constructorString; ?> ; auto ret = groupByMap.insert(MapType::value_type(key, gla)); it = ret.first; // reposition } it->second.AddItem(<?php echo array_template('{key}', ', ', $glaInputAtts); ?> ); } void AddState(<?php echo $className; ?> & other) { count += other.count; // scan other hash and insert or update content in this one for (MapType::iterator it = other.groupByMap.begin(); it != other.groupByMap.end(); ++it) { const Key& okey = it->first; <?php echo $innerGLA; ?> & ogla = it->second; MapType::iterator itt = groupByMap.find(okey); if (itt != groupByMap.end()) { // found the group <?php echo $innerGLA; ?> & gla = itt->second; gla.AddState(ogla); } else { // add the other group to this hash groupByMap.insert(MapType::value_type(okey, ogla)); } } } <?php if ($iterable) { ?> bool ShouldIterate(ConstantState& modibleState) { <?php if ($debug > 0) { ?> fprintf(stderr, "<?php echo $className; ?> : ==== ShouldIterate ====\n"); <?php } // if debugging enabled ?> bool shouldIterate = false; for( MapType::iterator it = groupByMap.begin(); it != groupByMap.end(); ++it ) { const Key & key = it->first; InnerGLA & gla = it->second; <?php if ($innerGLA->has_state()) { ?> InnerState & innerState = modibleState.getModibleState(key); <?php } // if gla has state ?> bool glaRet = gla.ShouldIterate(innerState); shouldIterate = shouldIterate || glaRet; <?php if ($debug > 0) { ?> fprintf(stderr, "<?php echo $className; ?> : Key(%s) shouldIterate(%s)\n", key.to_string().c_str(), glaRet ? "true" : "false"); <?php } // if debugging enabled ?> } return shouldIterate; } <?php } // if iterable ?> <?php if (in_array('fragment', $resType)) { ?> int GetNumFragments(void){ int size = groupByMap.size(); int sizeFrag = <?php echo $fragSize; ?> ; // setup the fragment boundaries // scan via iterator and count int frag=0; int pos=0; MapType::iterator it = groupByMap.begin(); theIterators.clear(); theIterators.push_back( it ); // special case when size < num_fragments // > if (sizeFrag == 0){ it = groupByMap.end(); theIterators.push_back( it ); return 1; // one fragment } while(it!=groupByMap.end()){ while(it!=groupByMap.end() && pos<( frag + 1 )*sizeFrag){ //> ++it; pos++; } theIterators.push_back( it ); frag++; } <?php if ($debug > 0) { ?> fprintf(stderr, "<?php echo $className; ?> : fragments(%d)\n", frag); <?php } ?> return frag; } Iterator* Finalize(int fragment){ // Call finalize on all inner GLAs in this fragment. MapType::iterator iter = theIterators[fragment]; MapType::iterator iterEnd = theIterators[fragment+1]; Iterator* rez = new Iterator(theIterators[fragment], theIterators[fragment+1] ); return rez; } bool GetNextResult(Iterator* it, <?php echo array_template('{val} & {key}', ', ', $outputs); ?> ) { return it->GetNextResult(<?php echo args($outputs); ?> ); } <?php } // if using fragment interface ?> void Finalize() { multiIterator = Iterator( groupByMap.begin(), groupByMap.end() ); <?php if ($debug >= 1) { ?> fprintf(stderr, "<?php echo $className; ?> : groups(%lu) tuples(%lu)\n", groupByMap.size(), count); <?php } ?> } bool GetNextResult(<?php echo array_template('{val} & {key}', ', ', $outputs); ?> ) { return multiIterator.GetNextResult( <?php echo args($outputs); ?> ); } std::size_t size() const { return groupByMap.size(); } const MapType& GetMap() const { return groupByMap; } bool Contains(<?php echo const_typed_ref_args($gbyAtts); ?> ) const { Key key(<?php echo args($gbyAtts); ?> ); return groupByMap.count(key) > 0; } const InnerGLA& Get(<?php echo const_typed_ref_args($gbyAtts); ?> ) const { Key key(<?php echo args($gbyAtts); ?> ); return groupByMap.at(key); } bool Contains(Key key) const { return groupByMap.count(key) > 0; } const InnerGLA& Get(Key key) const { return groupByMap.at(key); } }; <?php if (in_array('fragment', $resType)) { ?> typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php } ?> <?php $sys_headers = array_merge(['iomanip', 'iostream', 'cstring'], $extraHeaders); return array('kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => array('HashFunctions.h'), 'input' => $inputs, 'output' => $outputs, 'result_type' => $resType, 'configurable' => $configurable, 'generated_state' => $constState, 'required_states' => $reqStates, 'iterable' => $iterable, 'properties' => ['resettable', 'finite container'], 'libraries' => $libraries, 'extra' => ['inner_gla' => $innerGLA]); }
/** * A fixed array containing a given type. * * This is very similar to the STL array datatype, except that the size is not * allowed to be 0. */ function FixedArray(array $t_args) { $constructors = []; $methods = []; $functions = []; $globalContent = ''; grokit_assert(array_key_exists('type', $t_args), 'FixedArray: No type given for elements'); grokit_assert(array_key_exists('size', $t_args), 'FixedArray: No size given'); $type = $t_args['type']; $size = $t_args['size']; if (is_array($type)) { // Perform type lookup $type = call_user_func_array('lookupType', $type); } else { $type = $type->lookup(); } grokit_assert(is_datatype($type), 'FixedArray: [type] argument must be a valid datatype'); grokit_assert($type->isFixedSize(), 'FixedArray: variable-sized types not supported'); grokit_assert(is_int($size), 'FixedArray: [size] argument must be an integer'); grokit_assert($size > 0, 'FixedArray: [size] arugment must be a positive number.'); $className = generate_name('FixedArray_' . $size . '_'); ?> struct <?php echo $className; ?> { using value_type = <?php echo $type; ?> ; using size_type = std::size_t; using difference_type = std::ptrdiff_t; using reference = value_type &; using const_reference = const value_type &; using pointer = value_type *; using const_pointer = const value_type *; using iterator = value_type *; using const_iterator = const value_type *; using reverse_iterator = std::reverse_iterator<iterator>; using const_reverse_iterator = std::reverse_iterator<const_iterator>; static constexpr const size_type SIZE = <?php echo $size; ?> ; value_type __elems_[SIZE > 0 ? SIZE : 1]; // No explicit contruct/copy/destroy for aggregate type <?php $constructors[] = [[], true]; ?> /***** Element Access *****/ <?php $methods[] = ['at', ['base::BIGINT'], $type->value(), true]; ?> reference at( size_type pos ) { if( size() <= pos ) { std::ostringstream ss; ss << "Element access out of range:" << " size=" << size() << " index=" << pos; throw std::out_of_range(ss.str()); } return __elems_[pos]; } const_reference at( size_type pos ) const { if( size() <= pos ) { std::ostringstream ss; ss << "Element access out of range:" << " size=" << size() << " index=" << pos; throw std::out_of_range(ss.str()); } return __elems_[pos]; } reference operator[]( size_type pos ) { return __elems_[pos]; } constexpr const_reference operator[]( size_type pos ) const { return __elems_[pos]; } <?php $methods[] = ['front', [], $type->value(), true]; ?> reference front() { return __elems_[0]; } constexpr const_reference front() const { return __elems_[0]; } <?php $methods[] = ['back', [], $type->value(), true]; ?> reference back() { return __elems_[SIZE-1]; } constexpr const_reference back() const { return __elems_[SIZE-1]; } pointer data() noexcept { return __elems_; } const_pointer data() const noexcept { return __elems_; } /***** Iterators *****/ iterator begin() noexcept { return __elems_; } const_iterator cbegin() const noexcept { return __elems_; } const_iterator begin() const noexcept { return cbegin(); } iterator end() noexcept { return __elems_ + size(); } const_iterator cend() const noexcept { return __elems_ + size(); } const_iterator end() const noexcept { return cend(); } reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(cend()); } const_reverse_iterator rbegin() const noexcept { return crbegin(); } reverse_iterator rend() noexcept { return reverse_iterator(begin()); } const_reverse_iterator crend() const noexcept { return const_reverse_iterator(cbegin()); } const_reverse_iterator rend() const noexcept { return crend(); } /***** Capacity *****/ <?php $methods[] = ['empty', [], 'base::bool', true]; ?> constexpr bool empty() const noexcept { return SIZE == 0; } <?php $methods[] = ['size', [], 'base::BIGINT', true]; ?> constexpr size_type size() const noexcept { return SIZE; } constexpr size_type max_size() const noexcept { return SIZE; } /***** Operations *****/ void fill( const value_type & value ) { std::fill_n(begin(), SIZE, value); } void swap( <?php echo $className; ?> & other ) noexcept(noexcept(std::swap(std::declval<value_type&>(), std::declval<value_type&>()))) { std::swap( __elems_, other.__elems_ ); } /***** EXTENTIONS *****/ void from_memory( const_pointer mem ) { std::copy(mem, mem+SIZE, __elems_); } }; <?php ob_start(); ?> inline bool operator == ( const @type & lhs, const @type & rhs ) { for( @type::size_type i = 0; i < @type::SIZE; i++ ) { //> if( lhs[i] != rhs[i] ) return false; } return true; } inline bool operator != ( const @type & lhs, const @type & rhs ) { for( @type::size_type i = 0; i < @type::SIZE; i++ ) { //> if( lhs[i] != rhs[i] ) return true; } return false; } inline bool operator < ( const @type & lhs, const @type & rhs ) { //> return std::lexicographical_compare(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend()); } inline bool operator > ( const @type & lhs, const @type & rhs ) { return rhs < lhs; //> } inline bool operator <= ( const @type & lhs, const @type & rhs ) { //> return !(lhs > rhs); } inline bool operator >=( const @type & lhs, const @type & rhs ) { return !(lhs < rhs); //> } // ostream operator for easier debugging. template<class CharT, class Traits = std::char_traits<CharT>> std::basic_ostream<CharT, Traits>& operator << ( std::basic_ostream<CharT, Traits> & os, const @type s ) { std::ostringstream ss; bool first = true; ss << "["; for( const auto & elem : s ) { if( first ) { first = false; } else { ss << ", "; } ss << elem; } ss << "]"; os << ss.str(); return os; } inline void ToJson( const @type & src, Json::Value & dest ) { dest = Json::Value(Json::arrayValue); for( @type::const_reference elem : src ) { Json::Value tmp; ToJson( elem, tmp ); dest.append(tmp); } } inline void FromJson( const Json::Value & src, @type & dest ) { FATALIF(!src.isArray(), "Attempted to read array from non-array JSON"); FATALIF(!(src.size() == @type::SIZE), "Invalid number of elements in JSON for Array"); for( Json::ArrayIndex i = 0; i < @type::SIZE; i++ ) { //> FromJson( src[i], dest[i] ); } } inline int ToString( const @type & x, char * buffer ) { <?php if ($size > 0) { ?> char * start = buffer; char * current = start; for( const auto & val : x ) { current += ToString( val, current ); // Replace null with space *(current-1) = ' '; } // Replace final comma with null *(current-1) = '\0'; return current - start; <?php } else { // if size > 0 ?> buffer[0] = '\0'; return 1; <?php } // if size == 0 ?> } inline void FromString( @type & x, const char * buffer ) { char * current = NULL; char * saveptr = NULL; const char * delim = " "; char * copy = strdup(buffer); current = strtok_r(copy, delim, &saveptr); for( auto & val : x ) { FATALIF(current == NULL, "Not enough elements in string representation of array"); ToString(val, current); current = strtok_r(NULL, delim, &saveptr); } free((void *) copy); } <?php $functions[] = ['Hash', ['@type'], 'BASE::BIGINT', true, true]; ?> template<> inline uint64_t Hash( const @type & val ) { uint64_t hashVal = H_b; for( @type::const_reference elem : val ) { hashVal = CongruentHash(Hash(elem), hashVal); } return hashVal; } namespace std { #ifdef _HAS_STD_HASH // C++11 STL-compliant hash struct specialization template <> class hash<@type> { public: size_t operator () (const @type& key) const { return Hash(key); } }; #endif // _HAS_STD_HASH // std::get specializations template< size_t I > constexpr @type::reference get( @type& a ) { static_assert(I < @type::SIZE, "Index out of bounds for std::get(@type)"); return a.__elems_[I]; } template< size_t I > constexpr @type::value_type&& get( @type&& a ) { static_assert(I < @type::SIZE, "Index out of bounds for std::get(@type)"); return std::move(a.__elems_[I]); } template< size_t I > constexpr @type::const_reference get( const @type& a ) { static_assert(I < @type::SIZE, "Index out of bounds for std::get(@type)"); return a.__elems_[I]; } // std::swap specializations inline void swap( @type& lhs, @type& rhs ) { lhs.swap(rhs); } // std::tuple_size template<> class tuple_size< @type > : public integral_constant<size_t, @type::SIZE> { }; // std::tuple_element template<size_t I> struct tuple_element< I, @type > { using type = @type::value_type; }; } <?php $globalContent .= ob_get_clean(); ?> <?php $innerDesc = function ($var, $myType) use($type) { $describer = $type->describer('json'); ?> <?php echo $var; ?> ["size"] = Json::Int64(<?php echo $myType; ?> ::SIZE); <?php $innerVar = "{$var}[\"inner_type\"]"; $describer($innerVar, $type); }; $sys_headers = ['iterator', 'algorithm', 'stdexcept', 'utility', 'cinttypes', 'cstddef', 'iostream', 'sstream', 'cstring', 'cstdlib']; $user_headers = ['Config.h']; $extras = ['size' => $size, 'type' => $type]; if ($type->has('size.bytes')) { $extras['size.bytes'] = $size * $type->get('size.bytes'); } return ['kind' => 'TYPE', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'constructors' => $constructors, 'methods' => $methods, 'functions' => $functions, 'binary_operators' => ['==', '!=', '<', '>', '<=', '>='], 'global_content' => $globalContent, 'complex' => false, 'properties' => ['container', 'sequence', 'array'], 'extras' => $extras, 'describe_json' => DescribeJson('array', $innerDesc)]; }
/** * GI that generates data in clusters, using a specified distribution for each * cluster. * * This GI requires the following template arguments: * - 'n' or 0 * The number of tuples to generate. Note: this value is per task. * The total number of tuples generated will be n_tasks * n * - 'centers' or 1 * A list of configuration for the centers. * * The following template arguments are optional: * - 'outputs' * If the outputs of the GI are not given implicitly, they can be * specified in this template argument. The number of dimensions will * be determined by the number of outputs. * * All output types must be numeric real types. The default type for * outputs is DOUBLE. * - 'dist.lib' = 'std' * Which library to use for generating distributions. * Valid options are: * - std * - boost * - 'seed' = null * The seed to be used for the random number generator. This seed will * be used to generate the seed for each task, and different runs with * the same seed will produce the same data. * - 'compute.sets' = 1 * The number of sets of tuples to compute at once. * * Each center configuration is a functor with the form: * dist_name(args) * * The following distributions are supported: * { Uniform Distributions } * - uniform(a = 0, b = 1) * * { Normal Distributions } * - normal(mean = 0.0, std_dev = 1.0) [ synonyms: gaussian ] * - inverse_gaussian(mean = 1, shape = 1) [ synonyms: inverse_normal ] * * { Bernoulli Distributions } * - binomial(t = 1, p = 0.5) * - negative_binomial(k = 1, p = 0.5) * * { Poisson Distributions } * - exponential( lambda = 1 ) * - gamma(alpha = 1, beta = 1) [ synonyms: Gamma ] */ function ClusterGen(array $t_args, array $outputs) { $sys_headers = ['array', 'cinttypes']; $user_headers = []; $libraries = []; if (\count($outputs) == 0) { grokit_assert(array_key_exists('outputs', $t_args), 'ClusterGen: No outputs specified'); $count = 0; foreach ($t_args['outputs'] as $type) { if (is_identifier($type)) { $type = lookupType($type); } grokit_assert(is_datatype($type), 'ClusterGen: Non data-type ' . $type . ' given as output'); $name = 'output' . $count++; $outputs[$name] = $type; } } foreach ($outputs as $name => &$type) { if (is_null($type)) { $type = lookupType('base::DOUBLE'); } else { grokit_assert($type->is('real'), 'ClusterGen: Non-real datatype ' . $type . ' given as output'); } } $myOutputs = []; foreach ($outputs as $name => $type) { $myOutputs[$name] = $type; } $tSize = \count($outputs); $seed = get_default($t_args, 'seed', null); if ($seed !== null) { grokit_assert(is_int($seed), 'ClusterGen: Seed must be an integer or null.'); } else { $user_headers[] = 'HashFunctions.h'; } $distLib = get_default($t_args, 'dist.lib', 'std'); $distNS = ''; switch ($distLib) { case 'std': $sys_headers[] = 'random'; $distNS = 'std'; break; case 'boost': $sys_headers[] = 'boost/random.hpp'; $distNS = 'boost::random'; $libraries[] = 'boost_random-mt'; if ($seed === null) { // Need random_device $sys_headers[] = 'boost/random/random_device.hpp'; $libraries[] = 'boost_system-mt'; } break; default: grokit_error('ClusterGen: Unknown RNG library ' . $distLib); } $distRNG = 'mt19937'; $RNGtype = $distNS . '::' . $distRNG; $nTuples = get_first_key($t_args, ['n', '0']); grokit_assert(is_int($nTuples), 'ClusterGen: the number of tuples to be produced must be an integer.'); $centers = get_first_key($t_args, ['centers', 1]); grokit_assert(is_array($centers), 'ClusterGen: centers must be an array of functors'); $handleDist = function ($name, $args, $oType) use($distNS) { $distName = ''; $distArgs = []; switch ($name) { case 'gaussian': case 'normal': $distName = $distNS . '::' . 'normal_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Normal distribution takes at most 2 arguments, ' . \count($args) . ' given'); $mean = get_default($args, ['mean', 0], 0.0); $sigma = get_default($args, ['std_dev', 'sigma', 1], 1.0); grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of binomial distribution must be a real number.'); grokit_assert(is_numeric($sigma), 'ClusterGen: sigma parameter of binomial distribution must be a real number.'); $mean = floatval($mean); $sigma = floatval($sigma); $distArgs = [$mean, $sigma]; break; case 'binomial': $distName = $distNS . '::' . 'binomial_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given'); $t = get_default($args, ['t', 0], 1); $p = get_default($args, ['p', 1], 0.5); grokit_assert(is_int($t), 'ClusterGen: t parameter of binomial distribution must be an integer.'); grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.'); $p = floatval($p); grokit_assert($p >= 0 && $p <= 1, 'ClusterGen: p parameter of binomial distribution must be in the range [0, 1]'); grokit_assert($t >= 0, 'ClusterGen: t parameter of binomial distribution must be in the range [0, +inf)'); $distArgs = [$t, $p]; break; case 'negative_binomial': $distName = $distNS . '::' . 'negative_binomial_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Negative Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given'); $k = get_default($args, ['k', 0], 1); $p = get_default($args, ['p', 1], 0.5); grokit_assert(is_int($k), 'ClusterGen: k parameter of binomial distribution must be an integer.'); grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.'); $p = floatval($p); grokit_assert($p > 0 && $p <= 1, 'ClusterGen: p parameter of negative binomial distribution must be in the range (0, 1]'); grokit_assert($k > 0, 'ClusterGen: k parameter of negative binomial distribution must be in the range (0, +inf)'); $distArgs = [$k, $p]; break; case 'inverse_gaussian': case 'inverse_normal': grokit_assert(\count($args) <= 2, 'ClusterGen: Inverse Gaussian distribution takes at most 2 arguments, ' . \count($args) . ' given'); $mean = get_default($args, ['mean', 0], 1); $shape = get_default($args, ['shape', 1], 1); grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of inverse gaussian distribution must be a real number.'); grokit_assert(is_numeric($shape), 'ClusterGen: shape parameter of inverse gaussian distribution must be a real number.'); $mean = floatval($mean); $shape = floatval($shape); grokit_assert($mean > 0, 'ClusterGen: mean of inverse gaussian distribution must be in range (0, inf)'); grokit_assert($shape > 0, 'ClusterGen: shape of inverse gaussian distribution must be in range (0, inf)'); $gen_args = ['output' => $oType, 'ns' => $distNS]; $distName = strval(lookupResource('datagen::InverseGaussianGen', $gen_args)); $distArgs = [$mean, $shape]; break; case 'uniform': $distName = $distNS . '::' . 'uniform_real_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Uniform distribution takes at most 2 arguments, ' . \count($args) . ' given'); $a = get_default($args, ['a', 0], 0.0); $b = get_default($args, ['b', 1], 1.0); grokit_assert(is_numeric($a), 'ClusterGen: `a` parameter of uniform distribution must be a real number.'); grokit_assert(is_numeric($b), 'ClusterGen: `b` parameter of uniform distribution must be a real number.'); $a = floatval($a); $b = floatval($b); grokit_assert($b >= $a, 'ClusterGen: `b` parameter of uniform distribution must be >= the `a` parameter.'); $distArgs = [$a, $b]; break; case 'exponential': $distName = $distNS . '::' . 'exponential_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 1, 'ClusterGen: Exponential distribution takes at most 1 argument.'); $lambda = get_default($args, ['lambda', 0], 1.0); grokit_assert(is_numeric($lambda), 'ClusterGen: `lambda` parameter of exponential distribution must be a real number.'); $lambda = floatval($lambda); grokit_assert($lambda > 0, 'ClusterGen: `lambda` parameter of exponential distribution must be in range (0, +inf).'); $distArgs = [$lambda]; break; case 'gamma': case 'Gamma': $distName = $distNS . '::' . 'gamma_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Gamma distribution takes at most 2 arguments.'); $alpha = get_default($args, ['alpha', 0], 1.0); $beta = det_default($args, ['beta', 1], 1.0); grokit_assert(is_numeric($alpha), 'ClusterGen: `alpha` parameter of gamma distribution must be a real number.'); grokit_assert(is_numeric($beta), 'ClusterGen: `beta` parameter of gamma distribution must be a real number.'); $alpha = floatval($alpha); $beta = floatval($beta); $distArgs = [$alpha, $beta]; break; default: grokit_error('ClusterGen: Unknown distribution ' . $name . ' given for center'); } return [$distName, $distArgs]; }; $dists = []; $distArgs = []; $count = 0; $oType = ''; $nCenters = 1; reset($outputs); foreach ($centers as $val) { $cluster = $val; if (is_functor($val)) { $cluster = [$val]; } else { if (is_array($val)) { $nCenters = lcm($nCenters, \count($val)); } else { grokit_error('ClusterGen: center descriptions must be functors or list of functors'); } } $curDist = []; $curDistArgs = []; $curDistName = 'distribution' . $count++; $oType = strval(current($outputs)); $iCount = 0; foreach ($cluster as $functor) { grokit_assert(is_functor($functor), 'ClusterGen: center description must be a functor'); $vName = $curDistName . '_' . $iCount++; $ret = $handleDist($functor->name(), $functor->args(), $oType); $curDist[$vName] = $ret[0]; $curDistArgs[$vName] = $ret[1]; } next($outputs); $dists[$curDistName] = $curDist; $distArgs[$curDistName] = $curDistArgs; } // Determine the default number of sets to compute at a time. // We want to generate either $nTuples or 10,000 tuples, depending on which // is less. $defaultSetsTarget = min($nTuples, 10000); $setsToTarget = intval(ceil($defaultSetsTarget / $nCenters)); $computeSets = get_default($t_args, 'compute.sets', $setsToTarget); grokit_assert(is_int($computeSets) && $computeSets > 0, 'ClusterGen: compute.sets must be a positive integer, ' . $computeSets . ' given'); $className = generate_name('ClusterGen'); // For some BIZZARE reason, the $outputs array was getting modified while // traversing over the $dists array. Making a deep copy of the outputs and // then reassigning it seems to fix the issue. $outputs = $myOutputs; ?> class <?php echo $className; ?> { // The number of tuples to produce per task static constexpr size_t N = <?php echo $nTuples; ?> ; static constexpr size_t CacheSize = <?php echo $computeSets * $nCenters; ?> ; // Typedefs typedef std::tuple<<?php echo array_template('{val}', ', ', $outputs); ?> > Tuple; typedef std::array<Tuple, CacheSize> TupleArray; typedef TupleArray::const_iterator TupleIterator; typedef <?php echo $RNGtype; ?> RandGen; // Number of tuples produced. uintmax_t count; // Cache a number of outputs for efficiency TupleArray cache; TupleIterator cacheIt; // Random number generator RandGen rng; // Distributions <?php // This is the section causing issues. foreach ($dists as $name => $list) { foreach ($list as $vName => $type) { ?> <?php echo $type; ?> <?php echo $vName; ?> ; <?php } // foreach distribution } // foreach cluster set ?> // Helper function to generate tuples. void GenerateTuples(void) { <?php $tIndex = 0; foreach ($dists as $name => $list) { $lCenters = \count($list); // $nCenters has been defined to be the LCM of the number of centers in // any column, so $lCenter is guaranteed to divide evenly into // CacheSize ?> for( size_t index = 0; CacheSize > index; index += <?php echo $lCenters; ?> ) { <?php $index = 0; foreach ($list as $vName => $type) { ?> std::get<<?php echo $tIndex; ?> >(cache[index + <?php echo $index; ?> ]) = <?php echo $vName; ?> (rng); <?php $index++; } // foreach value in tuple ?> } <?php $tIndex++; } // foreach distribution ?> cacheIt = cache.cbegin(); } public: // Constructor <?php echo $className; ?> ( GIStreamProxy & _stream ) : cache() , cacheIt() , count(0) , rng() <?php foreach ($dists as $name => $list) { foreach ($list as $vName => $type) { ?> , <?php echo $vName; ?> (<?php echo implode(', ', $distArgs[$name][$vName]); ?> ) <?php } // foreach distribution } // foreach cluster set ?> { <?php if (is_null($seed)) { ?> <?php echo $distNS; ?> ::random_device rd; <?php } // if seed is null ?> RandGen::result_type seed = <?php echo is_null($seed) ? 'rd()' : "CongruentHash({$seed}, _stream.get_id() )"; ?> ; rng.seed(seed); cacheIt = cache.cend(); } // Destructor ~<?php echo $className; ?> (void) { } bool ProduceTuple(<?php echo typed_ref_args($outputs); ?> ) { if( N > count ) { if( cacheIt == cache.cend() ) { GenerateTuples(); } <?php $tIndex = 0; foreach ($outputs as $name => $type) { ?> <?php echo $name; ?> = std::get<<?php echo $tIndex; ?> >(*cacheIt); <?php $tIndex++; } // foreach output ?> ++cacheIt; ++count; return true; } else { return false; } } }; <?php return array('kind' => 'GI', 'name' => $className, 'output' => $outputs, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'libraries' => $libraries); }
function PrintGenerate($wpName, $queries, $attMap) { ?> // module specifsic headers to allow separate compilation #include <iostream> #include <string.h> #include "Profiling.h" //+{"kind":"WPF", "name":"Process Chunk", "action":"start"} extern "C" int PrintWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { // get the work description PrintWorkDescription myWork; myWork.swap (workDescription); Chunk &input = myWork.get_chunkToPrint (); QueryToFileMap& streams = myWork.get_streams(); QueryToCounters& counters = myWork.get_counters(); QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits ()); // prepare bitstring iterator Column inBitCol; BStringIterator queries; input.SwapBitmap (queries); <?php cgDeclareQueryIDs($queries); cgAccessColumns($attMap, 'input', $wpName); cgConstantInit($queries); ?> // for each query, define a stream variable <?php foreach ($queries as $query => $val) { $type = $val["type"]; if ($type == 'json') { // Need some extra variables ?> Json::Value json; Json::Value jsonRow; Json::FastWriter jsonWriter; std::string jsonString; <?php } // if type is json ?> PrintFileObj& pfo_<?php echo queryName($query); ?> = streams.Find(<?php echo queryName($query); ?> ); DistributedCounter* counter_<?php echo queryName($query); ?> = counters.Find(<?php echo queryName($query); ?> ); FILE* file_<?php echo queryName($query); ?> = pfo_<?php echo queryName($query); ?> .get_file(); const char * DELIM_<?php echo queryName($query); ?> = "<?php echo $type == 'json' ? ',' : $val["separator"]; ?> "; #ifdef PER_QUERY_PROFILE size_t n_tuples_<?php echo queryName($query); ?> = 0; #endif // PER_QUERY_PROFILE <?php } // foreach query ?> // PRINTING constexpr const size_t BUFFER_LENGTH = 10 * 1024 * 1024; // 10 MB char buffer[BUFFER_LENGTH]; // ALIN, CHANGE THIS TO A DEFINED CONSTANT PROFILING2_START; size_t n_tuples = 0; while (!queries.AtEndOfColumn ()){ ++n_tuples; QueryIDSet qry; qry = queries.GetCurrent(); qry.Intersect(queriesToRun); queries.Advance(); <?php cgAccessAttributes($attMap); foreach ($queries as $query => $val) { ?> // execute <?php echo queryName($query); ?> code if (qry.Overlaps(<?php echo queryName($query); ?> ) && counter_<?php echo queryName($query); ?> ->Decrement(1)>=0){ <?php cgPreprocess($val); ?> #ifdef PER_QUERY_PROFILE ++n_tuples_<?php echo queryName($query); ?> ; #endif // PER_QUERY_PROFILE int curr=0; // the position where we write the next attribute <?php if ($type == 'json') { ?> jsonRow = Json::Value(Json::arrayValue); <?php foreach ($val["expressions"] as $exp) { ?> json = Json::Value(Json::nullValue); ToJson(<?php echo $exp->value(); ?> , json); jsonRow.append(json); <?php } // for each expression ?> jsonString = jsonWriter.write(jsonRow); fprintf(file_<?php echo queryName($query); ?> , "%s,", jsonString.c_str()); <?php } else { if ($type == 'csv') { foreach ($val["expressions"] as $exp) { ?> curr += ToString(<?php echo $exp->value(); ?> ,buffer+curr); curr += sprintf(buffer + (curr-1), "%s", DELIM_<?php echo queryName($query); ?> ) - 1; <?php } // for each expression ?> // Replace the last comma with a newline buffer[curr-1]='\n'; // Null terminate the string buffer[curr]='\0'; // Now we print the buffer fprintf(file_<?php echo queryName($query); ?> , "%s", buffer); <?php } } // if output file is csv ?> } <?php } // for each query cgAdvanceAttributes($attMap); ?> } <?php cgPutbackColumns($attMap, 'input', $wpName); ?> PROFILING2_END; PCounterList counterList; PCounter totalCnt("tpi", n_tuples, "<?php echo $wpName; ?> "); counterList.Append(totalCnt); #ifdef PER_QUERY_PROFILE // add query counters to list <?php foreach ($queries as $query => $val) { ?> { PCounter cnt("<?php echo queryName($query); ?> ", n_tuples_<?php echo queryName($query); ?> , "<?php echo $wpName; ?> "); counterList.Append(cnt); } <?php } ?> #endif // PER_QUERY_PROFILE PROFILING2_SET(counterList, "<?php echo $wpName; ?> "); // just return some arbitrary value... don't worry about reconstructing the chunk return WP_PROCESS_CHUNK; } //+{"kind":"WPF", "name":"Process Chunk", "action":"end"} //+{"kind":"WPF", "name":"Finalize", "action":"start"} extern "C" int PrintFinalizeWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { PrintFinalizeWorkDescription myWork; myWork.swap( workDescription ); QueryToFileMap& streams = myWork.get_streams(); QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits()); <?php cgDeclareQueryIDs($queries); ?> // For each query, define a stream variable <?php $jsonVarsDefined = false; foreach ($queries as $query => $val) { $type = $val['type']; if ($type == 'json' && !$jsonVarsDefined) { $jsonVarsDefined = true; ?> Json::Value json; Json::FastWriter jsonWriter; std::string jsonString; <?php } // if we need to define extra json vars ?> PrintFileObj& pfo_<?php echo queryName($query); ?> = streams.Find(<?php echo queryName($query); ?> ); FILE* file_<?php echo queryName($query); ?> = pfo_<?php echo queryName($query); ?> .get_file(); <?php } // for each query ?> <?php foreach ($queries as $query => $val) { $type = $val['type']; if ($type == 'json') { ?> // Set up the types array json = Json::Value(Json::arrayValue); <?php foreach ($val['expressions'] as $exp) { $describer = $exp->type()->describer('json'); grokit_assert(is_callable($describer), 'Invalid JSON describer for type ' . $exp->type()); ?> { Json::Value tmp; <?php $describer('tmp'); ?> json.append(tmp); } <?php } // for each expression ?> jsonString = jsonWriter.write(json); // Last character is a newline, remove it jsonString.erase(jsonString.size() - 1, 1); // End the content section and write out the types fseek(file_<?php echo queryName($query); ?> , -1, SEEK_CUR); // overwrite the last comma fprintf(file_<?php echo queryName($query); ?> , " ], \"types\": %s }", jsonString.c_str()); <?php } // if type is json } // for each query ?> return WP_FINALIZE; } //+{"kind":"WPF", "name":"Finalize", "action":"end"} <?php }
function SplitState(array $t_args) { $sys_headers = ['mutex', 'condition_variable', 'array', 'random']; $user_headers = []; grokit_assert(array_key_exists('type', $t_args), 'SplitState: No type given'); grokit_assert(array_key_exists('size', $t_args), 'SplitState: No size given'); $type = $t_args['type']; $size = $t_args['size']; $className = generate_name('SplitState_'); ?> class <?php echo $className; ?> { public: using StateType = <?php echo $type; ?> ; static constexpr size_t NUM_STATES = <?php echo $size; ?> ; private: using UniqueLock = std::unique_lock<std::mutex>; using StateArray = std::array<StateType *, NUM_STATES>; using BoolArray = std::array<bool, NUM_STATES>; // Array of states StateArray stateArray; // Mutex to protect states std::mutex myMutex; // Condition variable to wake up threads blocked on acquiring a state. std::condition_variable signalVar; // Keeps track of which states are available to be checked out. BoolArray writeLocked; // Random number generator std::mt19937_64 rng; public: // Constructor <?php echo $className; ?> ( ) : stateArray(), myMutex(), signalVar(), writeLocked(), rng() { stateArray.fill(nullptr); writeLocked.fill(false); std::random_device rd; // 64-bits of seed uint32_t seed_vals[2]; seed_vals[0] = rd(); seed_vals[1] = rd(); std::seed_seq seed(seed_vals, seed_vals + 2); rng.seed(seed); } // Destructor ~<?php echo $className; ?> () { for( auto elem : stateArray ) { if( elem != nullptr ) { delete elem; } } } // Methods int CheckOutOne( int *theseAreOK, StateType *& checkMeOut ) { // first, figure out all of the OK segments int numWanted = 0; int goodOnes[NUM_STATES]; for (int i = 0; i < NUM_STATES; i++) { //> if (theseAreOK[i] == 1) { goodOnes[numWanted] = i; numWanted++; } } { UniqueLock lock(myMutex); // Acquire lock // now, try them one-at-a-time, in random order while (1) { // try each of the desired hash table segments, in random order for (int i = 0; i < numWanted; i++) { //> // randomly pick one of the guys in the list std::uniform_int_distribution<int> dist(i, numWanted-1); int whichIndex = dist(rng); // move him into the current slot int whichToChoose = goodOnes[whichIndex]; goodOnes[whichIndex] = goodOnes[i]; goodOnes[i] = whichToChoose; // try him if (!writeLocked[whichToChoose]) { // he is open, so write lock him writeLocked[whichToChoose] = true; // and return him checkMeOut = stateArray[whichToChoose]; stateArray[whichToChoose] = nullptr; return whichToChoose; } } // if we got here, then every one that we want is write locked. So // we will go to sleep until one of them is unlocked, at which point // we will wake up and try again... signalVar.wait(lock); } } } void CheckIn( int whichEntry, StateType *& checkMeIn ) { // just note that no one is writing this one, then signal all potential writers { UniqueLock lock(myMutex); writeLocked[whichEntry] = false; stateArray[whichEntry] = checkMeIn; checkMeIn = nullptr; } signalVar.notify_all(); } StateType * Peek( int whichEntry ) { return stateArray[ whichEntry ]; } void Delete( int whichEntry ) { if( stateArray[whichEntry] != nullptr ) { delete stateArray[whichEntry]; stateArray[whichEntry] = nullptr; } } void Reset() { for(size_t i = 0; i < NUM_STATES; i++) { <?php if ($type->is('resettable')) { ?> if( stateArray[i] != nullptr ) { stateArray[i]->Reset(); } <?php } else { ?> Delete(i); <?php } // not resettabile ?> } } }; <?php return ['kind' => 'RESOURCE', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers]; }
function Average(array $t_args, array $input, array $output) { $className = generate_name('Average'); grokit_assert(\count($input) == \count($output), 'Average must have the same number of inputs and outputs'); $outToIn = []; $internalTypes = []; $internalInit = []; reset($output); foreach ($input as $name => $type) { $outKey = key($output); $outToIn[$outKey] = $name; if ($type->is('numeric')) { $internalTypes[$name] = 'long double'; $internalInit[$name] = '0.0'; } else { $internalTypes[$name] = $type; $internalInit[$name] = ''; } if (is_null(current($output))) { if ($type->is('numeric')) { $output[$outKey] = lookupType('base::DOUBLE'); } else { $output[$outKey] = $type; } } next($output); } $countType = 'uint64_t'; $debug = get_default($t_args, 'debug', 0); ?> class <?php echo $className; ?> { private: <?php echo $countType; ?> count; // keeps the number of tuples aggregated <?php foreach ($internalTypes as $name => $type) { ?> <?php echo $type; ?> sum_<?php echo $name; ?> ; <?php } // foreach internal value ?> public: <?php echo $className; ?> () : count(0) <?php foreach ($internalInit as $name => $init) { ?> , sum_<?php echo $name; ?> (<?php echo $init; ?> ) <?php } // foreach internal initializer ?> {} void AddItem(<?php echo const_typed_ref_args($input); ?> ) { count++; <?php foreach ($input as $name => $type) { ?> sum_<?php echo $name; ?> += <?php echo $name; ?> ; <?php } // foreach input ?> } void AddState(<?php echo $className; ?> & o){ count += o.count; <?php foreach ($input as $name => $type) { ?> sum_<?php echo $name; ?> += o.sum_<?php echo $name; ?> ; <?php } // foreach input ?> } // we only support one tuple as output void GetResult(<?php echo typed_ref_args($output); ?> ) const { if( count > 0 ) { <?php foreach ($output as $name => $type) { $inName = $outToIn[$name]; ?> <?php echo $name; ?> = (sum_<?php echo $inName; ?> ) / count; <?php } // foreach output ?> } else { <?php foreach ($output as $name => $type) { ?> <?php echo $name; ?> = sum_<?php echo $inName; ?> ; <?php } // foreach output ?> } } }; <?php $sys_headers = ['cinttypes']; if ($debug > 0) { $sys_headers[] = 'iostream'; $sys_headers[] = 'sstream'; } return array('kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'input' => $input, 'output' => $output, 'result_type' => 'single'); }