function Hash($args, array $t_args = []) { $funcName = generate_name('Hash_'); $retType = lookupType('base::BIGINT'); echo $retType; ?> <?php echo $funcName; ?> ( <?php echo const_typed_ref_args($args); ?> ) { uint64_t hashVal = H_b; <?php foreach ($args as $name => $type) { ?> hashVal = CongruentHash(<?php echo $name; ?> , hashVal); <?php } // foreach argument ?> return static_cast<<?php echo $retType; ?> >(hashVal); } <?php return ['kind' => 'FUNCTION', 'name' => $funcName, 'result' => $retType, 'system_headers' => ['cinttypes'], 'user_headers' => ['HashFunctions.h'], 'deterministic' => true]; }
function ConnectedComponents(array $t_args, array $inputs, array $outputs) { // Class name is randomly generated $className = generate_name("CCompGLA"); // Processing of inputs. grokit_assert(count($inputs) == 2, 'Connected Components: 2 inputs expected'); $inputs_ = array_combine(['src', 'dst'], $inputs); // Setting output type $outType = lookupType('int'); $outputs_ = ['node' => $outType, 'component' => $outType]; $outputs = array_combine(array_keys($outputs), $outputs_); $sys_headers = ["vector", "mct/hash-map.hpp"]; $user_headers = []; $lib_headers = []; ?> using namespace std; class <?php echo $className; ?> ; class <?php echo $className; ?> { class UnionFindMap{ private: mct::closed_hash_map<uint64_t, uint64_t>* parent; mct::closed_hash_map<uint64_t, uint64_t> sz; const uint64_t NON_EXISTING_ID = -1; public: // constructor did nothing UnionFindMap(){ parent = new mct::closed_hash_map<uint64_t, uint64_t>(); } uint64_t Find(uint64_t i){ if ((*parent).find(i) == (*parent).end()){ return NON_EXISTING_ID; } // use path compression here while (i != (*parent)[i]){ (*parent)[i] = (*parent)[(*parent)[i]]; i = (*parent)[i]; } return i; } // put merge small tree into higher tree // if disjoint, merge and return false void Union(uint64_t i, uint64_t j){ uint64_t ip = Find(i); uint64_t jp = Find(j); if (ip != NON_EXISTING_ID && jp != NON_EXISTING_ID){// both exists if (ip != jp){ if (sz[ip] < sz[jp]){ (*parent)[ip] = jp; sz[jp] += sz[ip]; }else{ (*parent)[jp] = ip; sz[ip] += sz[jp]; } } }else if(ip == NON_EXISTING_ID && jp == NON_EXISTING_ID){// both new (*parent)[i] = i; sz[i] = 2; (*parent)[j] = i; }else if (jp == NON_EXISTING_ID){ // i exists (*parent)[j] = ip; sz[ip] ++; }else{ (*parent)[i] = jp; sz[jp] ++; } } mct::closed_hash_map<uint64_t, uint64_t>* GetUF(){ return parent; } bool IsEmpty(){ return (*parent).empty(); } uint64_t GetSize(){ return (uint64_t) (*parent).size(); } void SetData(mct::closed_hash_map<uint64_t, uint64_t>* other_data){ parent = other_data; } // void FinalizeRoot(){ for(mct::closed_hash_map<uint64_t, uint64_t>::iterator it = (*parent).begin(); it != (*parent).end(); ++ it){ it->second = Find(it->first); } } void Clear(){ (*parent).clear(); sz.clear(); } ~UnionFindMap(){ delete parent; } }; private: // union-find map data structure, which contains nodeID->compID information UnionFindMap primary_uf; mct::closed_hash_map<uint64_t, uint64_t>::iterator output_iterator, output_iterator_end; bool localFinalized = false; public: <?php echo $className; ?> () {} void AddItem(<?php echo const_typed_ref_args($inputs_); ?> ) { uint64_t src_ = Hash(src); uint64_t dst_ = Hash(dst); primary_uf.Union(src_, dst_); } void AddState(<?php echo $className; ?> &other) { FinalizeLocalState(); other.FinalizeLocalState(); mct::closed_hash_map<uint64_t, uint64_t>* this_state_data = primary_uf.GetUF(); mct::closed_hash_map<uint64_t, uint64_t>* other_state_data = other.primary_uf.GetUF(); if (primary_uf.GetSize() < other.primary_uf.GetSize()){ mct::closed_hash_map<uint64_t, uint64_t>* tmp = this_state_data; this_state_data = other_state_data; other_state_data = tmp; primary_uf.SetData(this_state_data); other.primary_uf.SetData(other_state_data); } assert(primary_uf.GetSize() >= other.primary_uf.GetSize()); UnionFindMap secondary_uf; //go over the other state, and maintain a secondary table for(auto const& entry:(*other_state_data)){ if ((*this_state_data).count(entry.first) == 1){// key exists in this state uint64_t this_comp_id = (*this_state_data)[entry.first]; if (this_comp_id != entry.second) // merge needed secondary_uf.Union(this_comp_id, entry.second); }else{ (*this_state_data)[entry.first] = entry.second; } } // check if side table empty if (secondary_uf.IsEmpty()){ return; } // apply the side table secondary_uf.FinalizeRoot(); mct::closed_hash_map<uint64_t, uint64_t>* secondary_state_data = secondary_uf.GetUF(); for (auto& p:(*this_state_data)){ if ((*secondary_state_data).find(p.second) != (*secondary_state_data).end()){ p.second = (*secondary_state_data)[p.second]; } } } void FinalizeLocalState(){ if (!localFinalized){ primary_uf.FinalizeRoot(); localFinalized = true; } } void Finalize(){ output_iterator = primary_uf.GetUF()->begin(); output_iterator_end = primary_uf.GetUF()->end(); } bool GetNextResult(<?php echo typed_ref_args($outputs_); ?> ) { if (output_iterator != output_iterator_end){ node = output_iterator->first; component = output_iterator->second; ++ output_iterator; return true; }else{ return false; } } }; <?php return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi']; }
/** * A GLA that estimates the cardinality of a dataset using the HyperLogLog * algorithm, with a configurable number of bins. */ function HyperLogLog(array $t_args, array $input, array $output) { $debug = get_default($t_args, 'debug', 0); grokit_assert(\count($output) == 1, 'HyperLogLog produces only 1 value, ' . \count($output) . ' outputs given.'); $outputName = array_keys($output)[0]; $outputType = array_get_index($output, 0); if (is_null($outputType)) { $outputType = lookupType('BASE::BIGINT'); } $output[$outputName] = $outputType; grokit_assert($outputType->is('numeric'), 'BloomFilter output must be numeric!'); $exp = get_first_key_default($t_args, ['bins.exponent'], 4); grokit_assert(is_integer($exp), 'HyperLogLog bins.exponent must be an integer'); // Set limit of 2^24 bins, because states past 16MB start to get silly grokit_assert($exp >= 4 && $exp < 24, 'HyperLogLog bins.exponent must be in range [4, 24]'); $useBuiltinCtz = get_default($t_args, 'use.builtin.ctz', true); $ctzFunc = $useBuiltinCtz ? '__builtin_ctzl' : 'ctz'; $bins = pow(2, $exp); // Determine the value of alpha based on $exp switch ($exp) { case 4: $alpha = 0.673; break; case 5: $alpha = 0.697; break; case 6: $alpha = 0.709; break; default: $alpha = 0.7213000000000001 / (1 + 1.079 / $bins); } $className = generate_name('HyperLogLog'); ?> class <?php echo $className; ?> { // Number of bins for registers static constexpr const size_t NUM_BINS = <?php echo $bins; ?> ; // Number of bits used to index into registers, log2(NUM_BINS) static constexpr const size_t INDEX_BITS = <?php echo $exp; ?> ; // Mask used to obtain register index from hash value static constexpr const size_t INDEX_MASK = NUM_BINS - 1; // Alpha coefficient used to correct cardinality estimate. Based on NUM_BINS. static constexpr const long double ALPHA = <?php echo $alpha; ?> ; // Value of cardinality estimate after which we must apply the // large range correction static constexpr const long double LARGE_BREAKPOINT = (1.0 / 30.0) * <?php echo pow(2, 32); ?> ; // Constants for population count static constexpr const uint64_t m1 = 0x5555555555555555; static constexpr const uint64_t m2 = 0x3333333333333333; static constexpr const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; static constexpr const uint64_t h01 = 0x0101010101010101; // The registers std::array<unsigned char, NUM_BINS> registers; // A count used to remember how many tuples were processed, mostly for debugging. size_t count; public: <?php echo $className; ?> (void) : registers() { for( auto & elem : registers ) { elem = 0; } } ~<?php echo $className; ?> () { } int popcount(uint64_t x) { // Put count of each 2 bits into those 2 bits x -= (x >> 1) & m1; // Put count of each 4 bits into those 4 bits x = (x & m2) + ((x >> 2) & m2); // Put count of each 8 bits into those 8 bits x = (x + (x >> 4)) & m4; // Returns left 8 bits of x + (x << 8) + (x << 16) + ... return (x * h01) >> 56; } int ctz(int64_t x) { return popcount((x & -x) - 1); } void AddItem( <?php echo const_typed_ref_args($input); ?> ) { count++; uint64_t hashVal = H_b; <?php foreach ($input as $name => $type) { ?> hashVal = CongruentHash(Hash(<?php echo $name; ?> ), hashVal); <?php } // for each input ?> const size_t registerIndex = hashVal & INDEX_MASK; uint64_t value = hashVal >> INDEX_BITS; unsigned char nZeros = <?php echo $ctzFunc; ?> (value); unsigned char & registerValue = registers[registerIndex]; registerValue = registerValue > nZeros ? registerValue : nZeros; } void AddState( <?php echo $className; ?> & other ) { for( size_t i = 0; NUM_BINS > i; i++ ) { unsigned char & rVal = registers[i]; unsigned char & oVal = other.registers[i]; rVal = rVal > oVal ? rVal : oVal; } } void GetResult( <?php echo $outputType; ?> & <?php echo $outputName; ?> ) { // Compute harmonic sum of registers and correct by alpha long double cardEst = 0; size_t nZeroRegisters = 0; for( auto elem : registers ) { long double power = - static_cast<long double>(elem); cardEst += std::pow(2.0, power); if( elem == 0 ) nZeroRegisters++; } const long double nBins = static_cast<long double>(NUM_BINS); const long double zeroBins = static_cast<long double>(nZeroRegisters); cardEst = 1 / cardEst; cardEst *= ALPHA * nBins * nBins; long double cardinality = cardEst; if( (cardEst < 2.5 * NUM_BINS) ) { //> // Possible small range correction if( nZeroRegisters > 0 ) { // Small range correction cardinality = nBins * std::log(nBins / zeroBins); } } // TODO: Figure out if the large range correction is needed for 64-bit // hashes. <?php echo $outputName; ?> = cardinality; } }; <?php $system_headers = ['cmath', 'array', 'cinttypes']; if ($debug > 0) { $system_headers[] = 'iostream'; } return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => 'single', 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers]; }
function CCompLP($t_args, $inputs, $outputs) { // Class name is randomly generated. $className = generate_name('CComp'); // Initializiation of argument names. $inputs_ = array_combine(['s', 't'], $inputs); $vertex = $inputs_['s']; // Construction of outputs. $outputs_ = ['node' => $vertex, 'component' => lookupType('int')]; $outputs = array_combine(array_keys($outputs), $outputs_); $sys_headers = ['armadillo', 'algorithm']; $user_headers = []; $lib_headers = []; $libraries = ['armadillo']; $properties = []; $extra = []; $result_type = ['multi']; ?> using namespace arma; using namespace std; class <?php echo $className; ?> ; <?php $constantState = lookupResource('graph::CCompLP_Constant_State', ['className' => $className]); ?> class <?php echo $className; ?> { public: // The constant state for this GLA. using ConstantState = <?php echo $constantState; ?> ; // The number of iterations to perform, not counting the initial set-up. static const constexpr int kIterations = 30; // The work is split into chunks of this size before being partitioned. static const constexpr int kBlock = 32; // The maximum number of fragments to use. static const constexpr int kMaxFragments = 64; private: // Node Component static arma::rowvec node_component; // The typical constant state for an iterable GLA. const ConstantState& constant_state; // The number of unique nodes seen. long num_nodes; // long output_iterator; // The current iteration. int iteration; // check if need more iterations long connections; public: <?php echo $className; ?> (const <?php echo $constantState; ?> & state) : constant_state(state), num_nodes(state.num_nodes), iteration(state.iteration),output_iterator(0),connections(0) { } // Basic dynamic array allocation. void AddItem(<?php echo const_typed_ref_args($inputs_); ?> ) { if (iteration == 0) { num_nodes = max((long) max(s, t), num_nodes); return; } /*else if (iteration == 1){ long max_known = (long) max(s, t); node_component(s) = max_known; node_component(t) = max_known; ++ connections; }*/else{ long s_id = node_component(s), t_id = node_component(t); if (s_id != t_id){ ++ connections; if (s_id > t_id) node_component(t) = s_id; else node_component(s) = t_id; } } } // Hashes are merged. void AddState(<?php echo $className; ?> &other) { if (iteration == 0) num_nodes = max(num_nodes, other.num_nodes); else connections += other.connections; } // Most computation that happens at the end of each iteration is parallelized // by performed it inside Finalize. bool ShouldIterate(ConstantState& state) { state.iteration = ++iteration; if (iteration == 1) {// allocate memory // num_nodes is incremented because IDs are 0-based. state.num_nodes = ++num_nodes; // Allocating space can't be parallelized. node_component.set_size(num_nodes); for (long i = 0; i < num_nodes; ++ i) node_component(i) = i; return true; } else { return connections > 0 && iteration < kIterations + 1; } } // Finalize does nothing void Finalize() {} bool GetNextResult(<?php echo typed_ref_args($outputs_); ?> ) { // should iterate is true if (connections > 0 && iteration < kIterations + 1) return false; if(output_iterator < num_nodes){ node = output_iterator++; component = node_component(node); return true; }else{ return false; } } }; // Initialize the static member types. arma::rowvec <?php echo $className; ?> ::node_component; <?php return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'properties' => $properties, 'extra' => $extra, 'iterable' => true, 'input' => $inputs, 'output' => $outputs, 'result_type' => $result_type, 'generated_state' => $constantState]; }
/** * A GLA that determines the distinct values of a dataset. */ function Distinct(array $t_args, array $input, array $output) { grokit_assert(\count($input) == \count($output), 'Distinct must have the same outputs as inputs.'); $outputsToInputs = []; $i = 0; foreach ($input as $name => $type) { $outputsToInputs[array_keys($output)[$i]] = $name; array_set_index($output, $i++, $type); } $useMCT = get_default($t_args, 'use.mct', true); $initSize = get_default($t_args, 'init.size', 65536); $keepHashes = get_default($t_args, 'mct.keep.hashes', false); $fragmentSize = get_default($t_args, 'fragment.size', 100000); $nullCheck = get_default($t_args, 'null.check', false); grokit_assert(is_bool($useMCT), 'Distinct use.mct argument must be boolean'); grokit_assert(is_integer($initSize), 'Distinct init.size argument must be an integer'); grokit_assert($initSize > 0, 'Distinct init.size argument must be positive'); grokit_assert(is_bool($keepHashes), 'Distinct mct.keep.hashes argument must be boolean'); grokit_assert(is_integer($fragmentSize), 'Distinct fragment.size argument must be integral'); grokit_assert($fragmentSize > 0, 'Distinct fragment.size argumenst must be positive'); $nullable = []; if (is_bool($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = $nullCheck; } } else { if (is_array($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = false; } foreach ($nullCheck as $index => $n) { grokit_assert(is_string($n), 'Distinct null.check has invalid value at position ' . $index); grokit_assert(array_key_exists($n, $nullable), 'Distinct null.check has unknown input ' . $n . ' at position ' . $index); $nullable[$n] = true; } } else { grokit_error('Distinct null.check must be boolean or list of inputs to check for nulls'); } } $keepHashesText = $keepHashes ? 'true' : 'false'; $system_headers = ['cinttypes', 'functional', 'vector']; if ($useMCT) { $system_headers[] = 'mct/hash-set.hpp'; $definedSet = "mct::closed_hash_set<Key, HashKey, std::equal_to<Key>, std::allocator<Key>, {$keepHashesText}>"; } else { $system_headers[] = 'unordered_map'; $definedSet = "std::unordered_set<Key, HashKey, std::equal_to<Key>, std::allocator<Key>>"; } $className = generate_name('Distinct'); ?> class <?php echo $className; ?> { public: // Value being placed into the set. struct Key { <?php foreach ($input as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // for each input ?> // Construct the value by copying all of the attributes. Key(<?php echo const_typed_ref_args($input); ?> ) : <?php $first = true; foreach ($input as $name => $type) { ?> <?php echo $first ? ' ' : ','; ?> <?php echo $name; ?> (<?php echo $name; ?> ) <?php $first = false; } // for each input ?> { } bool operator==(const Key & o ) const { return true <?php echo array_template("&& ({key} == o.{key})", ' ', $input); ?> ; } size_t hash_value() const { uint64_t hash = H_b; <?php foreach ($input as $name => $type) { ?> hash = CongruentHash(Hash(<?php echo $name; ?> ), hash); <?php } // for each input ?> return size_t(hash); } }; // Hashing functor for our value struct HashKey { size_t operator()(const Key& o) const { return o.hash_value(); } }; using Set = <?php echo $definedSet; ?> ; // Iterator object used in multi and fragment result types class Iterator { public: using iterator_t = Set::const_iterator; private: iterator_t start; iterator_t end; public: Iterator() : start(), end() { } Iterator( const iterator_t & _start, const iterator_t & _end ) : start(_start), end(_end) { } Iterator( const Iterator & o ) : start(o.start), end(o.end) { } bool GetNextResult(<?php echo typed_ref_args($output); ?> ) { if( start != end ) { <?php foreach ($output as $name => $type) { ?> <?php echo $name; ?> = start-><?php echo $outputsToInputs[$name]; ?> ; <?php } // for each output ?> start++; return true; } else { return false; } } }; private: // Constants static constexpr size_t INIT_SIZE = <?php echo $initSize; ?> ; static constexpr size_t FRAG_SIZE = <?php echo $fragmentSize; ?> ; // Member variables uint64_t count; // Total # tuples seen Set distinct; // Set of distinct values using IteratorList = std::vector<Iterator>; Iterator multiIterator; // Internal iterator for multi result type IteratorList fragments; // Iterator for fragments public: <?php echo $className; ?> () : count(0), distinct(INIT_SIZE), multiIterator(), fragments() { } ~<?php echo $className; ?> () { } void Reset(void) { count = 0; distinct.clear(); } void AddItem(<?php echo const_typed_ref_args($input); ?> ) { count++; <?php foreach ($nullable as $name => $check) { if ($check) { ?> if( IsNull( <?php echo $name; ?> ) ) return; <?php } // if checking for nulls } // foreach input ?> Key key(<?php echo args($input); ?> ); distinct.insert(key); /* auto it = distinct.find(key); if( it == distinct.end() ) { distinct.insert(key); } */ } void AddState( <?php echo $className; ?> & other ) { for( auto & elem : other.distinct ) { distinct.insert(elem); /* auto it = distinct.find(elem); if( it == distinct.end() ) { distinct.insert(elem); } */ } count += other.count; } // Multi interface void Finalize(void) { multiIterator = Iterator(distinct.cbegin(), distinct.cend()); } bool GetNextResult(<?php echo typed_ref_args($output); ?> ) { return multiIterator.GetNextResult(<?php echo args($output); ?> ); } // Fragment interface int GetNumFragments(void) { fragments.clear(); int nFrag = 0; Iterator::iterator_t prev = distinct.cbegin(); Iterator::iterator_t end = distinct.cend(); Iterator::iterator_t next = prev; while( next != end ) { for( size_t i = 0; next != end && FRAG_SIZE > i; i++ ) { next++; } Iterator nIter(prev, next); fragments.push_back(nIter); prev = next; nFrag++; } return nFrag; } Iterator * Finalize(int fragment) { return new Iterator(fragments[fragment]); } bool GetNextResult(Iterator * it, <?php echo typed_ref_args($output); ?> ) { return it->GetNextResult(<?php echo args($output); ?> ); } // General methods uint64_t get_count() const { return count; } uint64_t get_countDistinct() const { return distinct.size(); } const Set & get_distinct() const { return distinct; } }; typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => ['multi', 'fragment'], 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers, 'properties' => ['resettable']]; }
function ChunkCount(array $t_args, array $input, array $output) { $name = generate_name('ChunkCount'); grokit_assert(\count($output) == 2, 'ChunkCount only supports 2 outputs'); $numberName = array_keys($output)[0]; $countName = array_keys($output)[1]; $numberType = lookupType('BASE::BIGINT'); $countType = lookupType('BASE::BIGINT'); $output[$numberName] = $numberType; $output[$countName] = $countType; ?> class <?php echo $name; ?> { using map_type = std::unordered_map<<?php echo $numberType; ?> , <?php echo $countType; ?> >; map_type chunkCount; <?php echo $numberType; ?> curCount; map_type::const_iterator multiIter; public: <?php echo $name; ?> () : chunkCount(), curCount(0) { } void AddItem(<?php echo const_typed_ref_args($input); ?> ) { curCount++; } void AddState(const <?php echo $name; ?> & other) { for (auto elem: other.chunkCount) { chunkCount[elem.first] += elem.second; } } void ChunkBoundary() { chunkCount[curCount] += 1; curCount = 0; } void Finalize() { multiIter = chunkCount.cbegin(); } bool GetNextResult(<?php echo typed_ref_args($output); ?> ) { if (multiIter != chunkCount.cend()) { <?php echo $numberName; ?> = multiIter->first; <?php echo $countName; ?> = multiIter->second; multiIter++; return true; } else { return false; } } }; <?php return ['kind' => 'GLA', 'name' => $name, 'system_headers' => ['unordered_map'], 'input' => $input, 'output' => $output, 'result_type' => ['multi'], 'chunk_boundary' => true]; }
function Join($t_args, $inputs, $outputs, $states) { // Class name randomly generated $className = generate_name('Join'); // Processing of states; $states_ = array_combine(['mapping'], $states); // Processing of outputs. $values = $states_['mapping']->get('vals'); $outputs = array_combine(array_keys($outputs), $values); $sys_headers = ['map', 'tuple']; $user_headers = []; $lib_headers = []; $libraries = []; $properties = ['list']; $extra = []; $result_type = ['multi']; ?> class <?php echo $className; ?> ; <?php $constantState = lookupResource("Join_Constant_State", ['className' => $className, 'states' => $states]); ?> class <?php echo $className; ?> { private: using ConstantState = <?php echo $constantState; ?> ; using Mapping = ConstantState::Mapping; using Iter = Mapping::Map::const_iterator; // The GroupBy containing the hash. const <?php echo $constantState; ?> & constant_state; // The iterator used for the multi return. Iter it, end; public: <?php echo $className; ?> (const <?php echo $constantState; ?> & state) : constant_state(state) { } void ProcessTuple(<?php echo const_typed_ref_args($inputs); ?> ) { auto key = Mapping::ChainHash(<?php echo args($inputs); ?> ); auto pair = constant_state.map.equal_range(key); it = pair.first; end = pair.second; } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { if (it == end) return false; <?php foreach (array_keys($outputs) as $index => $name) { ?> <?php echo $name; ?> = std::get<<?php echo $index; ?> >(it->second); <?php } ?> ++it; return true; } }; <?php return ['kind' => 'GT', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'generated_state' => $constantState]; }
function GroupBy(array $t_args, array $inputs, array $outputs, array $states) { // Ensure we have valid inputs. if (\count($inputs) == 0) { // No inputs given, try to get them from template arguments. grokit_assert(array_key_exists('input', $t_args), 'No inputs given for GroupBy'); $inputs = $t_args['input']; if (!is_array($inputs)) { $inputs = [$inputs]; } foreach ($inputs as $name => &$type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Invalid type given for input ' . $name); } } grokit_assert(array_key_exists('group', $t_args), 'No groups specified for GroupBy'); $gbyAttMap = $t_args['group']; grokit_assert(is_array($gbyAttMap), 'Invalid value given for groups, expected an expression name or list of expression names'); $gbyAttMap = array_map('strval', $gbyAttMap); $gbyAttNames = array_keys($gbyAttMap); foreach ($gbyAttMap as $in => $out) { grokit_assert(array_key_exists($in, $inputs), 'Group ' . $in . ' not present in input'); grokit_assert(array_key_exists($out, $outputs), 'Output Attribute ' . $out . ' for group ' . $in . ' not found in outputs'); } $numGByAtts = \count($gbyAttNames); grokit_assert(array_key_exists('aggregate', $t_args), 'No aggregate specified for GroupBy'); $innerGLA = $t_args['aggregate']; grokit_assert(is_gla($innerGLA), 'Non-GLA specified as aggregate for GroupBy'); $debug = get_default($t_args, 'debug', 0); $init_size = get_default($t_args, 'init.size', 1024); $use_mct = get_default($t_args, 'use.mct', true); $keepHashes = get_default($t_args, 'mct.keep.hashes', false); grokit_assert(is_bool($keepHashes), 'GroupBy mct.keep.hashes argument must be boolean'); // determine the result type $use_fragments = get_default($t_args, 'use.fragments', true); $resType = $use_fragments ? ['fragment', 'multi'] : ['multi']; $fragSize = get_default($t_args, 'fragment.size', 2000000); // Always support state $resType[] = 'state'; // Class name randomly generated $className = generate_name("GroupBy"); // instantiate the inner GLA. input/output is derived from the main input/output $gbyAtts = []; $gbyAttsOut = []; $glaInputAtts = []; $glaOutputAtts = []; foreach ($inputs as $name => $type) { if (in_array($name, $gbyAttNames)) { $gbyAtts[$name] = $type; $gbyAttsOut[$gbyAttMap[$name]] = $type; $outputs[$gbyAttMap[$name]] = $type; } else { $glaInputAtts[$name] = $type; } } foreach ($outputs as $name => $type) { if (!in_array($name, $gbyAttMap)) { $glaOutputAtts[$name] = $type; } } $innerGLA = $innerGLA->apply($glaInputAtts, $glaOutputAtts, $states); $libraries = $innerGLA->libraries(); $innerRes = get_first_value($innerGLA->result_type(), ['multi', 'single', 'state']); if ($innerRes == 'state') { // If the result type is state, the only output is a state object // containing the GLA. $outputName = array_keys($glaOutputAtts)[0]; $innerOutputs = [$outputName => lookupType('base::STATE', ['type' => $innerGLA])]; } else { $innerOutputs = $innerGLA->output(); grokit_assert(\count($innerOutputs) == \count($glaOutputAtts), 'Expected ' . \count($glaOutputAtts) . ' outputs fromm Inner GLA, got ' . \count($innerOutputs)); } $constState = lookupResource('GroupByState', ['gla' => $innerGLA, 'groups' => $gbyAtts, 'debug' => $debug]); // constructor argumetns are inherited from inner GLA $configurable = $innerGLA->configurable(); $reqStates = $innerGLA->req_states(); // We need to specially create the constructor string because apparently // declaring Type Name(); is a function declaration instead of a variable // declaration for some reason. $constructorParts = []; if ($configurable) { $constructorParts[] = 'jsonInit'; } if ($innerGLA->has_state()) { $constructorParts[] = 'innerState'; } $constructorString = \count($constructorParts) > 0 ? '(' . implode(', ', $constructorParts) . ')' : ''; // add the outputs we got from the gla foreach ($innerOutputs as $name => $type) { grokit_assert(array_key_exists($name, $outputs), 'Inner GLA\'s outputs refer to unknown attribute ' . $name); grokit_assert($type !== null, 'GroupBy Inner GLA left output ' . $name . ' with no type'); $outputs[$name] = $type; } $iterable = $innerGLA->iterable(); // need to keep track of system includes needed $extraHeaders = array(); $allocatorText = "std::allocator<std::pair<const Key, {$innerGLA}> >"; if ($use_mct) { $keepHashesText = $keepHashes ? 'true' : 'false'; $extraHeaders[] = "mct/hash-map.hpp"; $map = "mct::closed_hash_map<Key, {$innerGLA}, HashKey, std::equal_to<Key>, {$allocatorText}, {$keepHashesText}>"; $mapType = 'mct::closed_hash_map'; } else { $extraHeaders[] = "unordered_map"; $map = "std::unordered_map<Key, {$innerGLA}, HashKey, std::equal_to<Key>, {$allocatorText}>"; $mapType = 'std::unordered_map'; } if ($debug > 0) { $extraHeaders[] = 'cstdio'; } ?> class <?php echo $className; ?> { public: using ConstantState = <?php echo $constState; ?> ; <?php if ($innerGLA->has_state()) { ?> using InnerState = ConstantState::InnerState; <?php } // if gla has state ?> using Key = ConstantState::Key; using HashKey = ConstantState::HashKey; using InnerGLA = <?php echo $innerGLA; ?> ; typedef <?php echo $map; ?> MapType; static const size_t INIT_SIZE = <?php echo $init_size; ?> ; public: class Iterator { MapType::iterator it; // current value MapType::iterator end; // last value in the fragment public: Iterator() { } Iterator(MapType::iterator _it, MapType::iterator _end): it(_it), end(_end) { if( it != end ) { <?php switch ($innerRes) { case 'multi': ?> it->second.Finalize(); <?php break; case 'state': if ($innerGLA->finalize_as_state()) { ?> it->second.FinalizeState(); <?php } // if we need to finalize as a state break; } // end switch inner restype ?> } } bool GetNextResult( <?php echo typed_ref_args($outputs); ?> ) { bool gotResult = false; while( it != end && !gotResult ) { <?php echo $innerGLA; ?> & gla = it->second; <?php foreach ($gbyAttMap as $in => $out) { ?> <?php echo $out; ?> = it->first.<?php echo $in; ?> ; <?php } // foreach grouping attribute ?> <?php switch ($innerRes) { case 'multi': ?> gotResult = gla.GetNextResult( <?php echo args($innerOutputs); ?> ); if( !gotResult ) { ++it; if( it != end ) { it->second.Finalize(); } } <?php break; case 'single': ?> gotResult = true; gla.GetResult(<?php echo args($innerOutputs); ?> ); ++it; <?php break; case 'state': reset($innerOutputs); // Assuming that $innerOutputs contains a single value that is // the state type. $oName = key($innerOutputs); $oType = current($innerOutputs); ?> gotResult = true; <?php echo $oName; ?> = <?php echo $oType; ?> ( &gla ); ++it; <?php } // switch inner result type ?> } return gotResult; } }; private: const ConstantState & constState; <?php if ($configurable) { ?> const Json::Value jsonInit; <?php } // if configurable ?> size_t count; MapType groupByMap; std::vector<MapType::iterator> theIterators; // the iterators, only 2 elements if multi, many if fragment Iterator multiIterator; public: <?php echo $className; ?> (<?php if ($configurable) { ?> const Json::Value & _jsonInit, <?php } ?> const ConstantState & _constState ) : constState(_constState) <?php if ($configurable) { ?> , jsonInit(_jsonInit) <?php } // if configurable ?> , count(0) , groupByMap( INIT_SIZE ) , theIterators() , multiIterator() { } ~<?php echo $className; ?> () {} void Reset(void) { count = 0; groupByMap.clear(); theIterators.clear(); } void AddItem(<?php echo array_template('const {val} & {key}', ', ', $inputs); ?> ) { count++; // check if _key is already in the map; if yes, add _value; else, add a new // entry (_key, _value) Key key(<?php echo array_template('{key}', ', ', $gbyAtts); ?> ); MapType::iterator it = groupByMap.find(key); if (it == groupByMap.end()) { // group does not exist // create an empty GLA and insert // better to not add the item here so we do not have // to transport a large state <?php if ($innerGLA->has_state()) { ?> const InnerState & innerState = constState.getConstState(key); <?php } // if gla has state ?> InnerGLA gla<?php echo $constructorString; ?> ; auto ret = groupByMap.insert(MapType::value_type(key, gla)); it = ret.first; // reposition } it->second.AddItem(<?php echo array_template('{key}', ', ', $glaInputAtts); ?> ); } void AddState(<?php echo $className; ?> & other) { count += other.count; // scan other hash and insert or update content in this one for (MapType::iterator it = other.groupByMap.begin(); it != other.groupByMap.end(); ++it) { const Key& okey = it->first; <?php echo $innerGLA; ?> & ogla = it->second; MapType::iterator itt = groupByMap.find(okey); if (itt != groupByMap.end()) { // found the group <?php echo $innerGLA; ?> & gla = itt->second; gla.AddState(ogla); } else { // add the other group to this hash groupByMap.insert(MapType::value_type(okey, ogla)); } } } <?php if ($iterable) { ?> bool ShouldIterate(ConstantState& modibleState) { <?php if ($debug > 0) { ?> fprintf(stderr, "<?php echo $className; ?> : ==== ShouldIterate ====\n"); <?php } // if debugging enabled ?> bool shouldIterate = false; for( MapType::iterator it = groupByMap.begin(); it != groupByMap.end(); ++it ) { const Key & key = it->first; InnerGLA & gla = it->second; <?php if ($innerGLA->has_state()) { ?> InnerState & innerState = modibleState.getModibleState(key); <?php } // if gla has state ?> bool glaRet = gla.ShouldIterate(innerState); shouldIterate = shouldIterate || glaRet; <?php if ($debug > 0) { ?> fprintf(stderr, "<?php echo $className; ?> : Key(%s) shouldIterate(%s)\n", key.to_string().c_str(), glaRet ? "true" : "false"); <?php } // if debugging enabled ?> } return shouldIterate; } <?php } // if iterable ?> <?php if (in_array('fragment', $resType)) { ?> int GetNumFragments(void){ int size = groupByMap.size(); int sizeFrag = <?php echo $fragSize; ?> ; // setup the fragment boundaries // scan via iterator and count int frag=0; int pos=0; MapType::iterator it = groupByMap.begin(); theIterators.clear(); theIterators.push_back( it ); // special case when size < num_fragments // > if (sizeFrag == 0){ it = groupByMap.end(); theIterators.push_back( it ); return 1; // one fragment } while(it!=groupByMap.end()){ while(it!=groupByMap.end() && pos<( frag + 1 )*sizeFrag){ //> ++it; pos++; } theIterators.push_back( it ); frag++; } <?php if ($debug > 0) { ?> fprintf(stderr, "<?php echo $className; ?> : fragments(%d)\n", frag); <?php } ?> return frag; } Iterator* Finalize(int fragment){ // Call finalize on all inner GLAs in this fragment. MapType::iterator iter = theIterators[fragment]; MapType::iterator iterEnd = theIterators[fragment+1]; Iterator* rez = new Iterator(theIterators[fragment], theIterators[fragment+1] ); return rez; } bool GetNextResult(Iterator* it, <?php echo array_template('{val} & {key}', ', ', $outputs); ?> ) { return it->GetNextResult(<?php echo args($outputs); ?> ); } <?php } // if using fragment interface ?> void Finalize() { multiIterator = Iterator( groupByMap.begin(), groupByMap.end() ); <?php if ($debug >= 1) { ?> fprintf(stderr, "<?php echo $className; ?> : groups(%lu) tuples(%lu)\n", groupByMap.size(), count); <?php } ?> } bool GetNextResult(<?php echo array_template('{val} & {key}', ', ', $outputs); ?> ) { return multiIterator.GetNextResult( <?php echo args($outputs); ?> ); } std::size_t size() const { return groupByMap.size(); } const MapType& GetMap() const { return groupByMap; } bool Contains(<?php echo const_typed_ref_args($gbyAtts); ?> ) const { Key key(<?php echo args($gbyAtts); ?> ); return groupByMap.count(key) > 0; } const InnerGLA& Get(<?php echo const_typed_ref_args($gbyAtts); ?> ) const { Key key(<?php echo args($gbyAtts); ?> ); return groupByMap.at(key); } bool Contains(Key key) const { return groupByMap.count(key) > 0; } const InnerGLA& Get(Key key) const { return groupByMap.at(key); } }; <?php if (in_array('fragment', $resType)) { ?> typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php } ?> <?php $sys_headers = array_merge(['iomanip', 'iostream', 'cstring'], $extraHeaders); return array('kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => array('HashFunctions.h'), 'input' => $inputs, 'output' => $outputs, 'result_type' => $resType, 'configurable' => $configurable, 'generated_state' => $constState, 'required_states' => $reqStates, 'iterable' => $iterable, 'properties' => ['resettable', 'finite container'], 'libraries' => $libraries, 'extra' => ['inner_gla' => $innerGLA]); }
function BernoulliSample($t_args, $inputs) { $p = get_first_key_default($t_args, ['p', 0], 0.5); grokit_assert(is_float($p) || is_integer($p), "BernoulliSample: p must be a number in the range [0, 1]"); grokit_assert($p >= 0 && $p <= 1, "BernoulliSample: p must be in the range [0, 1]"); $rng = get_first_key_default($t_args, ['rng'], 'mt19937_64'); $sys_headers = ['random']; // assuming std $libs = []; // assuming std $ns = 'std'; // assuming standard library $cState = lookupResource('base::BernoulliSampleState', ['sys_headers' => $sys_headers, 'libs' => $libs, 'namespace' => $ns]); $name = generate_name('BernoulliSample'); ?> class <?php echo $name; ?> { public: using state_type = <?php echo $cState; ?> ; using rng_type = <?php echo $ns; ?> ::<?php echo $rng; ?> ; using dist_type = <?php echo $ns; ?> ::bernoulli_distribution; private: rng_type rng; dist_type bernoulli; public: const constexpr static double P = <?php echo $p; ?> ; <?php echo $name; ?> (state_type & _state): rng(_state()), bernoulli(P) { } bool Filter( <?php echo const_typed_ref_args($inputs); ?> ) { return bernoulli(rng); } }; <?php return ['kind' => 'GF', 'name' => $name, 'input' => $inputs, 'system_headers' => $sys_headers, 'libraries' => $libs, 'generated_state' => $cState]; }
function page_rank_vd($t_args, $inputs, $outputs) { $className = generate_name('PageRank'); $inputs_ = array_combine(['s', 't'], $inputs); $vertex = $inputs_['s']; $outputs_ = ['node' => $vertex, 'rank' => lookupType('double')]; $outputs = array_combine(array_keys($outputs), $outputs_); $sys_headers = ['vector', 'algorithm']; $user_headers = []; $lib_headers = []; $libraries = []; $properties = []; $extra = []; $result_type = ['fragment']; ?> using namespace std; class <?php echo $className; ?> ; <?php $constantState = lookupResource('pagerankVD::page_rank_vd_Constant_State', ['className' => $className]); ?> class <?php echo $className; ?> { public: using ConstantState = <?php echo $constantState; ?> ; using Iterator = std::pair<int, int>; static const constexpr float kDamping = 0.85; static const constexpr int nIterations = 5; static const constexpr int kBlock = 32; static const constexpr int kMaxFragments = 64; private: static std::vector<double> weight; static std::vector<double> pagerank; static std::vector<double> oldPagerank; const ConstantState& constant_state; long nVertices; int iteration; int num_fragments; public: <?php echo $className; ?> (const <?php echo $constantState; ?> & state) : constant_state(state), nVertices(state.nVertices), iteration(state.iteration) { } void AddItem(<?php echo const_typed_ref_args($inputs_); ?> ) { if (iteration == 0) { nVertices = max((long) max(s, t), nVertices); return; } else if (iteration == 1) { weight[s]++; } else { oldPagerank[t] += weight[s] * pagerank[s]; } } void AddState(<?php echo $className; ?> &other) { if (iteration == 0) nVertices = max(nVertices, other.nVertices); } // Most computation that happens at the end of each iteration is parallelized // by performed it inside Finalize. bool ShouldIterate(ConstantState& state) { state.iteration = ++iteration; cout << "Finished Iteration: " << iteration << endl; if (iteration == 1) { state.nVertices = ++nVertices; cout << "num_nodes: " << nVertices << endl; oldPagerank.reserve(nVertices); pagerank.reserve(nVertices); weight.reserve(nVertices); std::fill (pagerank.begin(),pagerank.end(),1); return true; } else { return iteration < nIterations + 1; } } int GetNumFragments() { long size = (nVertices - 1) / kBlock + 1; // nVertices / kBlock rounded up. num_fragments = (iteration == 0) ? 0 : min(size, (long) kMaxFragments); cout << "Returning " << num_fragments << " fragments" << endl; return num_fragments; } Iterator* Finalize(long fragment) { int count = nVertices; int first = fragment * (count / kBlock) / num_fragments * kBlock; int final = (fragment == num_fragments - 1) ? count - 1 : (fragment + 1) * (count / kBlock) / num_fragments * kBlock - 1; if (iteration == 2) { for(int i = first; i <= final; i++){ pagerank[i] = 1; weight[i] = 1/weight[i]; oldPagerank[i] = 0; } } else { for(int i = first; i <= final; i++){ pagerank[i] = (1 - kDamping) + kDamping * oldPagerank[i]; oldPagerank[i] = 0; } } return new Iterator(first, final); } bool GetNextResult(Iterator* it, <?php echo typed_ref_args($outputs_); ?> ) { if (iteration < nIterations + 1) return false; if (it->first > it->second) return false; node = it->first; rank = pagerank[it->first]; it->first++; return true; } }; // Initialize the static member types. vector<double> <?php echo $className; ?> ::weight; vector<double> <?php echo $className; ?> ::pagerank; vector<double> <?php echo $className; ?> ::oldPagerank; typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'properties' => $properties, 'extra' => $extra, 'iterable' => true, 'intermediates' => true, 'input' => $inputs, 'output' => $outputs, 'result_type' => $result_type, 'generated_state' => $constantState]; }
/** * A GLA that counts the number of distinct elements by keeping track of the * distinct elements. * * Unless an exact count of the distinct is absolutely needed, consider using * an approximation of the distinct, such as a Bloom Filter. */ function CountDistinct(array $t_args, array $input, array $output) { grokit_assert(\count($output) == 1, 'CountDistinct should have only 1 output, ' . \count($output) . 'given'); $outputName = array_keys($output)[0]; $outputType = array_get_index($output, 0); if (is_null($outputType)) { $outputType = lookupType('BASE::BIGINT'); } $output[$outputName] = $outputType; grokit_assert($outputType->is('numeric'), 'CountDistinct output must be numeric!'); $useMCT = get_default($t_args, 'use.mct', true); $keepHashes = get_default($t_args, 'mct.keep.hashes', false); $initSize = get_default($t_args, 'init.size', 65536); $nullCheck = get_default($t_args, 'null.check', false); grokit_assert(is_bool($useMCT), 'CountDistinct use.mct argument must be boolean'); grokit_assert(is_integer($initSize), 'Distinct init.size argument must be an integer'); grokit_assert($initSize > 0, 'Distinct init.size argument must be positive'); grokit_assert(is_bool($keepHashes), 'CountDistinct mct.keep.hashes argument must be boolean'); $distTmpArgs = ['use.mct' => $useMCT, 'init.size' => $initSize, 'mct.keep.hashes' => $keepHashes, 'null.check' => $nullCheck]; $gla = lookupGLA('BASE::DISTINCT', $distTmpArgs, $input, $input); $className = generate_name('CountDistinct'); ?> class <?php echo $className; ?> { using Distinct = <?php echo $gla->value(); ?> ; Distinct distinctGLA; public: <?php echo $className; ?> (void): distinctGLA() { } ~<?php echo $className; ?> (void) { } void AddItem(<?php echo const_typed_ref_args($input); ?> ) { distinctGLA.AddItem(<?php echo args($input); ?> ); } void AddState(<?php echo $className; ?> & o) { distinctGLA.AddState(o.distinctGLA); } void GetResult(<?php echo $outputType; ?> & <?php echo $outputName; ?> ) { <?php echo $outputName; ?> = distinctGLA.get_countDistinct(); } }; <?php return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => 'single']; }
function Gather(array $t_args, array $inputs, array $outputs) { // Class name randomly generated $className = generate_name("Gather"); $outputs = array_combine(array_keys($outputs), $inputs); $sys_headers = ['vector']; $user_headers = []; $lib_headers = []; $libraries = []; $properties = ['list']; $extra = []; $result_type = 'single'; ?> class <?php echo $className; ?> ; class <?php echo $className; ?> { public: using Tuple = std::tuple<<?php echo typed($inputs); ?> >; using Vector = std::vector<Tuple>; private: // The container that gathers the input. Vector items; // The tuple to be filled with the current item. Tuple item; // Used for iterating over the container during GetNextResult. int return_counter; public: <?php echo $className; ?> (){} void AddItem(<?php echo const_typed_ref_args($inputs); ?> ) { <?php foreach (array_keys($inputs) as $index => $name) { ?> std::get<<?php echo $index; ?> >(item) = <?php echo $name; ?> ; <?php } ?> items.push_back(item); } void AddState(<?php echo $className; ?> & other) { items.reserve(items.size() + other.items.size()); items.insert(items.end(), other.items.begin(), other.items.end()); } void Finalize() { return_counter = 0; } void GetResult(<?php echo typed_ref_args($outputs); ?> , int index = 0) const { <?php foreach (array_keys($outputs) as $index => $name) { ?> <?php echo $name; ?> = std::get<<?php echo $index; ?> >(items[index]); <?php } ?> } int GetCount() const { return items.size(); } const Vector& GetList() const { return items; } }; <?php return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'properties' => $properties, 'extra' => $extra, 'iterable' => false, 'input' => $inputs, 'output' => $outputs, 'result_type' => $result_type]; }
function Match($inputs, $args) { // Processing of template arguments. $pattern = get_first_key($args, ['pattern', 0], 'Match: no pattern given.'); grokit_assert(is_string($pattern), 'Match: pattern should be a string.'); // Processing of inputs. $count = \count($inputs); grokit_assert($count == 1, "Match supports exactly 1 input. {$count} given"); $inputs_ = array_combine(['input'], $inputs); $input = $inputs_['input']->name(); grokit_assert(in_array($input, ['BASE::STRING_LITERAL', 'BASE::STRING']), "Match: input should be a string (literal). {$input} given."); $result = lookupType('BASE::BOOL'); $className = generate_name('Pattern'); $functName = generate_name('Matcher'); $userHeaders = ['PatternMatcherOnig.h']; ?> class <?php echo $className; ?> { public: static PatternMatcherOnig pattern; }; PatternMatcherOnig <?php echo $className; ?> ::pattern("<?php echo $pattern; ?> "); <?php echo $result; ?> <?php echo $functName; ?> (<?php echo const_typed_ref_args($inputs_); ?> ) { <?php if ($input == 'BASE::STRING_LITERAL') { ?> return <?php echo $className; ?> ::pattern.Match(input); <?php } else { ?> return <?php echo $className; ?> ::pattern.Match(input.ToString()); <?php } ?> } <?php return ['kind' => 'FUNCTION', 'name' => $functName, 'input' => $inputs, 'result' => $result, 'deterministic' => true, 'user_headers' => $userHeaders]; }
function OrderBy(array $t_args, array $inputs, array $outputs) { if (\count($inputs) == 0) { grokit_assert(array_key_exists('input', $t_args), 'No inputs given for OrderBy'); $inputs = $t_args['input']; foreach ($t_args['input'] as $name => &$type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Invalid type given for input ' . $name); } } grokit_assert(array_key_exists('order', $t_args), 'No ordering attributes given for OrderBy'); $ordering = $t_args['order']; $ascOpts = ['ASC', 'ASCENDING', '+', '>']; $descOpts = ['DESC', 'DESCENDING', 'DES', 'DSC', '-', '<']; $ascending = []; foreach ($ordering as $name => $order) { grokit_assert(array_key_exists($name, $inputs), 'Ordering attribute ' . $name . ' not present in input'); if (in_array_icase($order, $ascOpts)) { $ascending[$name] = true; } else { if (in_array_icase($order, $descOpts)) { $ascending[$name] = false; } else { grokit_error("Unknown ordering " . $order . " given for attribute " . $name); } } } $rankAtt = get_default($t_args, 'rank', null); grokit_assert(is_null($rankAtt) || is_attribute($rankAtt), 'Rank argument should be null or an attribute'); grokit_assert(is_null($rankAtt) || array_key_exists($rankAtt->name(), $outputs), 'Rank attribute does not exist in outputs'); if (!is_null($rankAtt) && is_null($outputs[$rankAtt->name()])) { $outputs[$rankAtt->name()] = lookupType('base::BIGINT'); } $outputPassthroughAtts = []; foreach ($outputs as $name => $type) { if (is_null($rankAtt) || $rankAtt->name() != $name) { $outputPassthroughAtts[$name] = $type; } } $outToIn = []; $nInputs = \count($inputs); reset($inputs); reset($outputPassthroughAtts); for ($i = 0; $i < $nInputs; $i++) { $outName = key($outputPassthroughAtts); $inName = key($inputs); $outToIn[$outName] = $inName; // Unify types $outputs[$outName] = $inputs[$inName]; $outputPassthroughAtts[$outName] = $inputs[$inName]; next($inputs); next($outputPassthroughAtts); } $orderAtts = []; $extraAtts = []; foreach ($inputs as $name => $type) { if (array_key_exists($name, $ordering)) { $orderAtts[$name] = $type; } else { $extraAtts[$name] = $type; } } // Give 2^32 as the default, which should be effectively infinite $limitDefault = pow(2, 32); $limit = get_default($t_args, 'limit', $limitDefault); $limit = $limit == 0 ? $limitDefault : $limit; grokit_assert($limit > 0, 'The OrderBy limit must be a positive integer'); $className = generate_name('OrderBy'); $debug = get_default($t_args, 'debug', 0); ?> class <?php echo $className; ?> { struct Tuple { <?php foreach ($inputs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } ?> Tuple( void ) = default; Tuple( const Tuple & other ) = default; Tuple( <?php echo array_template('const {val} & _{key}', ', ', $inputs); ?> ): <?php echo array_template('{key}(_{key})', ', ', $inputs); ?> { } Tuple & operator = (const Tuple & other ) = default; bool operator > ( const Tuple & other ) const { <?php foreach ($orderAtts as $name => $type) { $op1 = $ascending[$name] ? '<' : '>'; $op2 = !$ascending[$name] ? '<' : '>'; ?> if( <?php echo $name; ?> <?php echo $op1; ?> other.<?php echo $name; ?> ) return true; else if( <?php echo $name; ?> <?php echo $op2; ?> other.<?php echo $name; ?> ) return false; <?php } ?> return false; } bool operator < ( const Tuple& other ) const { return other > *this; } bool operator <= (const Tuple & other ) const { return ! (*this > other ); } bool operator >= (const Tuple & other ) const { return !( other > *this ); } <?php if ($debug > 0) { ?> std::string toString(void) const { std::ostringstream ss; ss << "( "; // > <?php $first = true; foreach ($inputs as $name => $type) { if ($first) { $first = false; } else { echo ' ss << ", ";' . PHP_EOL; } ?> ss << <?php echo $name; ?> ; // > <?php } // foreach input ?> ss << " )"; // > return ss.str(); } <?php } // debug > 0 ?> }; // struct Tuple typedef std::vector<Tuple> TupleVector; public: class Iterator { public: typedef TupleVector::const_iterator iter_type; private: iter_type begin; iter_type curr; iter_type end; public: Iterator(void) = default; Iterator( const iter_type & _begin, const iter_type & _end ) : begin(_begin), curr(_begin), end(_end) { } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { if( curr != end ) { <?php foreach ($outputPassthroughAtts as $name => $type) { ?> <?php echo $name; ?> = curr-><?php echo $outToIn[$name]; ?> ; <?php } if (!is_null($rankAtt)) { ?> <?php echo $rankAtt; ?> = (curr - begin) + 1; <?php } // if we need to output the rank ?> curr++; return true; } else { return false; } } }; private: uintmax_t __count; // number of tuples covered // K, as in Top-K static constexpr size_t K = <?php echo $limit; ?> ; TupleVector tuples; // Iterator for multi output type Iterator multiIterator; typedef std::greater<Tuple> TupleCompare; // Function to force sorting so that GetNext gets the tuples in order. void Sort(void) { TupleCompare comp; // If tuples doesn't contain at least K elements, it was never made into // a heap in the first place, so sort it normally. if( tuples.size() >= K ) { std::sort_heap(tuples.begin(), tuples.end(), comp); } else { std::sort(tuples.begin(), tuples.end(), comp); } } // Internal function to add a tuple to the heap void AddTupleInternal(Tuple & t ) { <?php if ($debug >= 1) { ?> { std::ostringstream ss; ss << "T ACK: " << t.toString() << std::endl; // > std::cerr << ss.str(); // > } <?php } ?> TupleCompare comp; if( tuples.size() >= K ) { <?php if ($debug >= 1) { ?> { std::ostringstream ss; ss << "T REP: " << tuples.front().toString() << std::endl; // > std::cerr << ss.str(); // > } <?php } ?> std::pop_heap(tuples.begin(), tuples.end(), comp); tuples.pop_back(); tuples.push_back(t); std::push_heap(tuples.begin(), tuples.end(), comp); } else { tuples.push_back(t); if( tuples.size() == K ) { std::make_heap(tuples.begin(), tuples.end(), comp); } } } public: <?php echo $className; ?> () : __count(0), tuples(), multiIterator() { } ~<?php echo $className; ?> () { } void AddItem(<?php echo const_typed_ref_args($inputs); ?> ) { __count++; Tuple t(<?php echo args($inputs); ?> ); <?php if ($debug >= 2) { ?> { std::ostringstream ss; ss << "T NEW: " << t.toString() << std::endl; // > std::cerr << ss.str(); // > } <?php } ?> if( tuples.size() == K && !(t > tuples.front()) ) return; AddTupleInternal(t); } void AddState( <?php echo $className; ?> & other ) { __count += other.__count; for( Tuple & el : other.tuples ) { if( tuples.size() < K /*>*/ || el > tuples.front() ) { AddTupleInternal(el); } } } void Finalize() { Sort(); Iterator::iter_type begin = tuples.cbegin(); Iterator::iter_type end = tuples.cend(); multiIterator = Iterator(begin, end); <?php if ($debug >= 1) { ?> std::ostringstream ss; ss << "[ "; //> bool first = true; for( auto el : tuples ) { if( first ) first = false; else ss << ", "; //>> ss << el.toString(); //>> } ss << " ]" << std::endl; // > std::cerr << ss.str(); //>> <?php } ?> } bool GetNextResult( <?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } }; <?php $system_headers = ['vector', 'algorithm', 'cinttypes']; if ($debug > 0) { $system_headers = array_merge($system_headers, ['iostream', 'sstream', 'string']); } return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'system_headers' => $system_headers); }
function Max(array $t_args, array $input, array $output) { grokit_assert(\count($output) >= 1, 'Max GLA produces at least one output!'); grokit_assert(\count($output) == \count($input), 'Max GLA should have the same number of inputs and outputs'); $nValues = \count($output); $inputNames = array_keys($input); $outputNames = array_keys($output); // Outputs should be the same type as the inputs for ($index = 0; $index < $nValues; $index++) { array_set_index($output, $index, array_get_index($input, $index)); } $name = generate_name('Max_'); ?> class <?php echo $name; ?> { uintmax_t count; <?php foreach ($output as $k => $v) { ?> <?php echo $v; ?> _<?php echo $k; ?> ; <?php } // foreach output ?> public: <?php echo $name; ?> () : <?php foreach ($output as $k => $v) { ?> _<?php echo $k; ?> (), <?php } // foreach output ?> count(0) { } void AddItem( <?php echo const_typed_ref_args($input); ?> ) { if( count > 0 ) { <?php for ($index = 0; $index < $nValues; $index++) { ?> _<?php echo $outputNames[$index]; ?> = std::max(_<?php echo $outputNames[$index]; ?> , <?php echo $inputNames[$index]; ?> ); <?php } // foreach value ?> } else { <?php for ($index = 0; $index < $nValues; $index++) { ?> _<?php echo $outputNames[$index]; ?> = <?php echo $inputNames[$index]; ?> ; <?php } // foreach value ?> } count++; } void AddState( <?php echo $name; ?> & o ) { if (count > 0 && o.count > 0) { <?php for ($index = 0; $index < $nValues; $index++) { ?> _<?php echo $outputNames[$index]; ?> = std::max(_<?php echo $outputNames[$index]; ?> , o._<?php echo $outputNames[$index]; ?> ); <?php } // foreach value ?> } else if(o.count > 0) { // count == 0 <?php for ($index = 0; $index < $nValues; $index++) { ?> _<?php echo $outputNames[$index]; ?> = o._<?php echo $outputNames[$index]; ?> ; <?php } // foreach value ?> } // Otherwise, count > 0 && o.count == 0, so just keep our values count += o.count; } void GetResult(<?php echo typed_ref_args($output); ?> ) { <?php foreach ($output as $k => $v) { ?> <?php echo $k; ?> = _<?php echo $k; ?> ; <?php } // foreach output ?> } }; <?php return ['kind' => 'GLA', 'name' => $name, 'input' => $input, 'output' => $output, 'result_type' => 'single', 'system_headers' => ['algorithm', 'cstdint']]; }
function Segmenter(array $t_args, array $input, array $output, array $given_states) { $resType = ['fragment', 'multi']; $system_headers = ['array', 'vector', 'memory', 'cinttypes', 'unordered_map']; $user_headers = ['HashFunctions.h']; $lib_headers = []; $preferFragment = get_default($t_args, 'inner.prefer.fragment', false); $wantedRes = $preferFragment ? ['fragment', 'multi'] : ['multi', 'fragment']; $nInputs = \count($input); grokit_assert($nInputs > 1, 'Segmenter: Not enough inputs specified!'); $keyName = array_keys($input)[0]; $keyType = array_get_index($input, 0); $innerInputs = array_slice($input, 1, $nInputs - 1, true); $gla = get_first_key($t_args, ['gla', 'GLA', 0]); grokit_assert(is_gla($gla), 'Segmenter: [gla] argument must be a valid GLA'); $gla = $gla->apply($innerInputs, $output, $given_states); $n_passes = get_default($t_args, 'passes', 1); grokit_assert(is_int($n_passes), 'Segmenter: [passes] argument must be an integer'); grokit_assert($n_passes > 0, 'Segmenter: [passes] argument must be > 0'); $libraries = $gla->libraries(); $innerRes = get_first_value($gla->result_type(), $wantedRes); $innerInputs = $gla->input(); $innerOutput = $gla->output(); $input = array_merge([$keyName => $keyType], $innerInputs); $output = $innerOutput; $segments = get_default($t_args, 'segments', 64); $constState = lookupResource('BASE::SegmenterState', ['gla' => $gla, 'passes' => $n_passes, 'segments' => $segments]); $className = generate_name('Segmenter_'); $savedArgs = []; $cArgs = []; $innerCArgs = []; if ($gla->configurable()) { $savedArgs['json_init'] = 'Json::Value'; $cArgs['json_init'] = 'Json::Value'; $innerCArgs[] = 'json_init'; } $cArgs['const_state'] = $constState; if ($gla->has_state()) { $innerCArgs[] = 'constState.inner_cstate'; } $cstStr = \count($innerCArgs) > 0 ? '(' . implode(',', $innerCArgs) . ')' : ''; grokit_assert(!$gla->iterable(), 'Segementer does not support iterable GLAs'); $iterable = $n_passes > 1; ?> class <?php echo $className; ?> { private: using ConstantState = <?php echo $constState; ?> ; using SplitState = ConstantState::SplitState; static constexpr const size_t NUM_STATES = SplitState::NUM_STATES; using InnerGLA = <?php echo $gla; ?> ; using InnerGLAPtr = std::unique_ptr<InnerGLA>; using GLA_Array = std::array<InnerGLAPtr, NUM_STATES>; public: using size_type = std::size_t; <?php if ($innerRes == 'fragment') { ?> class Iterator { private: InnerGLA * gla; int fragmentNum; InnerGLA::Iterator * innerIter; public: Iterator( InnerGLA * _gla, int _fragmentNum, int _innerFrag ) : gla(_gla), fragmentNum(_fragmentNum), innerIter(nullptr) { innerIter = gla->Finalize(_innerFrag); } ~Iterator(void) { if( innerIter != nullptr ) { delete innerIter; innerIter = nullptr; } } bool GetNextResult( <?php echo typed_ref_args($gla->output()); ?> ) { return innerIter->GetNextResult(<?php echo args($gla->output()); ?> ); } int FragmentNumber() { return fragmentNum; } }; <?php } else { // if inner result type is fragment ?> class Iterator { private: InnerGLA * gla; int fragmentNum; public: Iterator( InnerGLA * _gla, int fragNo ) : gla(_gla), fragmentNum(fragNo) { gla->Finalize(); } ~Iterator(void) { } bool GetNextResult( <?php echo typed_ref_args($gla->output()); ?> ) { return gla->GetNextResult(<?php echo args($gla->output()); ?> ); } int FragmentNumber() { return fragmentNum; } }; <?php } // if inner result type is multi ?> private: const ConstantState & constState; GLA_Array localState; // Iteration state for multi result type int numFrags; int multiFragNo; Iterator * multiIter; <?php if ($innerRes == 'fragment') { ?> using frag_info = std::pair<int, int>; using frag_map_t = std::unordered_map<int, frag_info>; frag_map_t fragMap; <?php } ?> <?php foreach ($savedArgs as $name => $type) { ?> const <?php echo $type; ?> <?php echo $name; ?> ; <?php } // foreach saved arg ?> public: // Constructor <?php echo $className; ?> ( <?php echo const_typed_ref_args($cArgs); ?> ) : constState(const_state) , localState() , numFrags(0) , multiFragNo(0) , multiIter(nullptr) <?php if ($innerRes == 'fragment') { ?> , fragMap() <?php } foreach ($savedArgs as $name => $type) { ?> , <?php echo $name; ?> (<?php echo $name; ?> ) <?php } // foreach constructor arg to save ?> { for( auto & elem : localState ) { elem.reset(new InnerGLA<?php echo $cstStr; ?> ); } } void AddItem( <?php echo const_typed_ref_args($input); ?> ) { uint64_t hashVal = CongruentHash(Hash(<?php echo $keyName; ?> ), H_b + 1); uint64_t passNum = (hashVal / NUM_STATES) % ConstantState::N_PASSES; uint64_t segNum = hashVal % NUM_STATES; <?php if ($n_passes > 1) { ?> if( passNum != constState.pass ) { return; } <?php } // more than 1 pass ?> localState[segNum]->AddItem(<?php echo args($innerInputs); ?> ); } void ChunkBoundary(void) { // Merge local states into the global state SplitState & globalStates = constState.segments; int theseAreOk[NUM_STATES]; for( int i = 0; NUM_STATES > i; i++ ) { theseAreOk[i] = 1; } int segsLeft = NUM_STATES; while( segsLeft > 0 ) { InnerGLA * checkedOut = nullptr; int whichOne = globalStates.CheckOutOne( theseAreOk, checkedOut ); if( checkedOut == NULL ) { checkedOut = new InnerGLA<?php echo $cstStr; ?> ; } checkedOut->AddState( *(localState[whichOne]) ); globalStates.CheckIn( whichOne, checkedOut ); theseAreOk[whichOne] = 0; segsLeft--; } // Re-initialize the local states for( auto & elem : localState ) { <?php if ($gla->is('resettable')) { ?> elem->Reset(); <?php } else { // if resettable ?> elem.reset(new InnerGLA<?php echo $cstStr; ?> ); <?php } // if not resettable ?> } } void AddState( <?php echo $className; ?> & o ) { // Do nothing } void Finalize() { SplitState & globalStates = constState.segments; if( multiIter != nullptr) delete multiIter; multiFragNo = 0; <?php if ($innerRes == 'fragment') { ?> frag_info fInfo = fragMap[multiFragNo]; multiIter = new Iterator(globalStates.Peek(fInfo.first), multiFragNo, fInfo.second); <?php } else { ?> multiIter = new Iterator(globalStates.Peek(multiFragNo), multiFragNo); <?php } ?> } bool GetNextResult(<?php echo typed_ref_args($output); ?> ) { bool gotResult = false; SplitState & globalStates = constState.segments; while( (multiFragNo < numFrags && multiIter != nullptr) && !gotResult ) { gotResult = multiIter->GetNextResult(<?php echo args($output); ?> ); if( !gotResult ) { multiFragNo++; delete multiIter; if( numFrags > multiFragNo ) { <?php if ($innerRes == 'fragment') { ?> frag_info fInfo = fragMap[multiFragNo]; multiIter = new Iterator(globalStates.Peek(fInfo.first), multiFragNo, fInfo.second); <?php } else { ?> multiIter = new Iterator(globalStates.Peek(multiFragNo), multiFragNo); <?php } ?> } else { multiIter = nullptr; } } } return gotResult; } int GetNumFragments(void) { <?php if ($innerRes == 'fragment') { ?> SplitState & globalStates = constState.segments; numFrags = 0; for (int i = 0; i < NUM_STATES; i++) { int curFrags = globalStates.Peek(i)->GetNumFragments(); for (int curFrag = 0; curFrag < curFrags; curFrag++) { fragMap[numFrags] = frag_info(i, curFrag); numFrags++; } } <?php } else { ?> numFrags = NUM_STATES; <?php } ?> return numFrags; } Iterator * Finalize( int fragment ) { SplitState & globalStates = constState.segments; <?php if ($innerRes == 'fragment') { ?> frag_info info = fragMap[fragment]; return new Iterator(globalStates.Peek(info.first), fragment, info.second); <?php } else { ?> return new Iterator(globalStates.Peek(fragment), fragment); <?php } ?> } bool GetNextResult( Iterator * it, <?php echo typed_ref_args($output); ?> ) { bool ret = it->GetNextResult(<?php echo args($output); ?> ); return ret; } <?php if ($iterable) { ?> bool ShouldIterate( ConstantState & modible ) { modible.pass++; return modible.pass < ConstantState::N_PASSES; } void PostFinalize() { constState.segments.Reset(); } <?php } // iterable ?> <?php if ($gla->is('finite container')) { ?> size_type size() { SplitState & globalStates = constState.segments; size_type s = 0; for( int i = 0; NUM_STATES > i; i++ ) { InnerGLA * ptr = globalStates.Peek(i); s += ptr->size(); } return s; } size_type size(int frag) { SplitState & globalStates = constState.segments; return globalStates.Peek(frag)->size(); } <?php } // if the gla is a container ?> }; typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $system_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'input' => $input, 'output' => $output, 'result_type' => $resType, 'generated_state' => $constState, 'required_states' => $gla->req_states(), 'chunk_boundary' => true, 'configurable' => $gla->configurable(), 'iterable' => $iterable, 'post_finalize' => $iterable, 'intermediates' => true]; }
function ExtremeTuples(array $t_args, array $inputs, array $outputs) { $extremes = get_first_key($t_args, ['extremes']); $nExt = \count($extremes); grokit_assert($nExt > 0, 'No extremes specified for ExtremeTuples GLA.'); if (\count($inputs) == 0) { grokit_assert(array_key_exists('inputs', $t_args), 'No arguments specified for ExtremeTuples GLA.'); $count = 0; foreach ($t_args['inputs'] as $type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Only datatypes can be specified as inputs to ' . 'the ExtremeTuples GLA'); $name = 'et_val' . $count; $inputs[$name] = $type; } } $outputMap = []; reset($outputs); foreach ($inputs as $name => $type) { $oKey = key($outputs); $outputs[$oKey] = $type; $outputMap[$oKey] = $name; next($outputs); } grokit_assert($nExt <= \count($inputs), 'There can not be more extreme values than there are inputs!'); $mainAtts = []; $extraAtts = []; $minOpts = ['MIN', 'MINIMUM', '-', '<']; $maxOpts = ['MAX', 'MAXIMUM', '+', '>']; $inArrayCase = function ($needle, $haystack) { foreach ($haystack as $item) { if (strcasecmp($needle, $item) == 0) { return true; } } return false; }; $minimum = []; foreach ($extremes as $name => $val) { grokit_assert(array_key_exists($name, $inputs), "ExtremeTuples: Expression with name " . $name . " specified as extreme not found in inputs"); } foreach ($inputs as $name => $type) { if (array_key_exists($name, $extremes)) { $mainAtts[$name] = $type; if ($inArrayCase($extremes[$name], $minOpts)) { $minimum[$name] = true; } else { if ($inArrayCase($extremes[$name], $maxOpts)) { $minimum[$name] = false; } else { grokit_error('Unknown extreme type ' . $extremes[$name] . ' specified for ' . $name); } } } else { $extraAtts[$name] = $type; } } $debug = get_default($t_args, 'debug', 0); $className = generate_name('ExtremeTuples'); ?> class <?php echo $className; ?> { struct Tuple { <?php foreach ($inputs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // foreach input ?> // Default Constructor, Copy Constructor, and Copy Assignment are all // default Tuple(void) = default; Tuple(const Tuple &) = default; Tuple & operator = (const Tuple &) = default; Tuple(<?php echo array_template('const {val} & _{key}', ', ', $inputs); ?> ) : <?php echo array_template('{key}(_{key})', ', ', $inputs); ?> { } // operator > means that this tuple is "better" than the other tuple. bool operator > ( const Tuple & other ) const { <?php foreach ($mainAtts as $name => $type) { $op1 = $minimum[$name] ? '<' : '>'; $op2 = !$minimum[$name] ? '<' : '>'; ?> if( <?php echo $name; ?> <?php echo $op1; ?> other.<?php echo $name; ?> ) return true; else if( <?php echo $name; ?> <?php echo $op2; ?> other.<?php echo $name; ?> ) return false; <?php } // foreach main attribute ?> return false; } bool operator < ( const Tuple& other ) const { return other > *this; } bool operator <= (const Tuple & other ) const { return ! (*this > other ); } bool operator >= (const Tuple & other ) const { return !( other > *this ); } bool operator == (const Tuple & other ) const { bool ret = true; <?php foreach ($mainAtts as $name => $type) { ?> ret &= <?php echo $name; ?> == other.<?php echo $name; ?> ; <?php } // foreach main attribute ?> return ret; } }; // struct Tuple typedef std::vector<Tuple> TupleVector; public: class Iterator { public: typedef TupleVector::const_iterator iter_type; private: iter_type begin; iter_type end; public: Iterator(void) = default; Iterator(const Iterator &) = default; Iterator( const iter_type & _begin, const iter_type & _end ) : begin(_begin), end(_end) { } Iterator( const iter_type && _begin, const iter_type && _end ) : begin(_begin), end(_end) { } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { if( begin != end ) { <?php foreach ($outputs as $name => $type) { ?> <?php echo $name; ?> = begin-><?php echo $outputMap[$name]; ?> ; <?php } ?> begin++; return true; } else { return false; } } }; private: uintmax_t __count; // number of tuples covered TupleVector tuples; // Iterator for multi output type Iterator multiIterator; public: // Constructor and destructor <?php echo $className; ?> (void) : __count(0), tuples(), multiIterator() { } ~<?php echo $className; ?> () { } void AddItem( <?php echo const_typed_ref_args($inputs); ?> ) { ++__count; Tuple t(<?php echo args($inputs); ?> ); if( tuples.empty() ) { tuples.push_back(t); } else if( t > tuples.front() ) { tuples.clear(); tuples.push_back(t); } else if( t == tuples.front() ) { tuples.push_back(t); } } void AddState( <?php echo $className; ?> & other ) { if( tuples.size() == 0 ) { tuples.swap(other.tuples); } else if( other.tuples.size() == 0 ) { // Do nothing } else if( tuples.front() > other.tuples.front() ) { // fast path } else if( other.tuples.front() > tuples.front() ) { tuples.swap(other.tuples); } else { for( Tuple & t : other.tuples ) { tuples.push_back(t); } } } void Finalize( void ) { multiIterator = Iterator(tuples.cbegin(), tuples.cend()); } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } }; // class <?php echo $className; ?> <?php $system_headers = ['vector', 'algorithm', 'cinttypes']; if ($debug > 0) { $system_headers = array_merge($system_headers, ['iostream', 'sstream', 'string']); } return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'system_headers' => $system_headers); }
function Multiplexer(array $t_args, array $inputs, array $outputs) { $className = generate_name('Multiplexer'); if (\count($inputs) == 0) { grokit_assert(array_key_exists('input', $t_args), 'No inputs specified for Multiplexer'); $inputs = $t_args['input']; foreach ($t_args['inputs'] as $name => &$type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Only types may be specified as inputs to Multiplexer.'); } $inputs = ensure_valid_names($inputs, 'multi_input'); } $glas = get_first_key($t_args, ['glas', 0]); grokit_assert(\count($glas) > 0, 'No GLAs specified for Multiplexer.'); $myGLAs = []; $glaInputs = []; $glaOutputs = []; $resultType = 'multi'; $usedOutputs = []; $libraries = []; $glaGenStates = []; $glaReqStates = []; $configurable = false; $constArgs = []; $genStates = []; $reqStates = []; $iterable = null; foreach ($glas as $name => $glaInfo) { grokit_assert(is_array($glaInfo), 'Template argument \'glas\' must be an array'); grokit_assert(array_key_exists('gla', $glaInfo), 'No GLA given for glas[' . $name . ']'); grokit_assert(array_key_exists('inputs', $glaInfo), 'No inputs given for glas[' . $name . ']'); grokit_assert(array_key_exists('outputs', $glaInfo), 'No outputs given for glas[' . $name . ']'); $gla = $glaInfo['gla']; $glaInAtts = $glaInfo['inputs']; $glaOutAtts = $glaInfo['outputs']; grokit_assert(is_gla($gla), 'Non-GLA given for glas[' . $name . '][gla]'); grokit_assert(is_array($glaInAtts), 'Non-array given for inputs for gla ' . $name); grokit_assert(is_array($glaOutAtts), 'Non-array given for outputs for gla ' . $name); $glaInAtts = array_map('strval', $glaInAtts); $glaOutAtts = array_map('strval', $glaOutAtts); $glaName = "innerGLA_" . $name; $glaInputs[$glaName] = []; $glaOutputs[$glaName] = []; foreach ($glaInAtts as $att) { grokit_assert(array_key_exists($att, $inputs), 'Input ' . $att . ' for GLA ' . $name . ' not found in inputs'); $glaInputs[$glaName][$att] = $inputs[$att]; } foreach ($glaOutAtts as $att) { grokit_assert(array_key_exists($att, $outputs), 'Output ' . $att . ' for GLA ' . $name . ' not found in outputs'); grokit_assert(!in_array($att, $usedOutputs), 'Output ' . $att . ' used by multiple GLAs'); $usedOutputs[] = $att; $glaOutputs[$glaName][$att] = $outputs[$att]; } //fwrite(STDERR, "Inputs for GLA " . $glaName . ": " . print_r($glaInputs[$glaName], true) . PHP_EOL ); //fwrite(STDERR, "Outputs for GLA " . $glaName . ": " . print_r($glaOutputs[$glaName], true) . PHP_EOL ); $gla = $gla->apply($glaInputs[$glaName], $glaOutputs[$glaName]); $myGLAs[$glaName] = $gla; $glaRez[$glaName] = get_first_value($gla->result_type(), ['multi', 'single', 'state']); $libraries = array_merge($libraries, $gla->libraries()); if ($glaRez[$glaName] == 'state') { grokit_assert(\count($glaOutputs[$glaName]) == 1, "GLA {$glaName} is produced as state, and thus must have exactly 1 output."); $stateType = lookupType('base::STATE', ['type' => $gla]); $glaOutputs[$glaName] = array_combine(array_keys($glaOutputs[$glaName]), [$stateType]); } else { grokit_assert(\count($glaOutputs[$glaName]) == \count($gla->output()), 'GLA ' . $glaName . ' produces different number of outputs than expected'); $glaOutputs[$glaName] = array_combine(array_keys($glaOutputs[$glaName]), $gla->output()); } // Set types for our output foreach ($glaOutputs[$glaName] as $attName => $type) { $outputs[$attName] = $type; } if (is_null($iterable)) { $iterable = $gla->iterable(); } else { grokit_assert($iterable == $gla->iterable(), 'Multiplexer does not support mixing iterable and non-iterable GLAs'); } $glaReqStates[$glaName] = $gla->req_states(); foreach ($gla->req_states() as $rstate) { $reqStates[] = $rstate; } $glaGenStates[$glaName] = $gla->state(); // TODO: Support constant states grokit_assert(!$gla->has_state(), 'Multiplexer currently does not support constant states.'); } $libraries = array_unique($libraries); $extra = ['glas' => $myGLAs]; ?> class <?php echo $className; ?> { <?php foreach ($myGLAs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // foreach inner gla ?> class Iterator { bool _gotResultsOnce; bool _valid; <?php foreach ($myGLAs as $name => $type) { ?> <?php echo $type; ?> * it_<?php echo $name; ?> ; <?php } // foreach inner gla ?> public: Iterator(void) : _gotResultsOnce(false), _valid(false), <?php echo array_template('it_{key}(nullptr)', ', ', $myGLAs); ?> { } Iterator(<?php echo typed_ref_args($myGLAs); ?> ) : _gotResultsOnce(false), _valid(true), <?php echo array_template('it_{key}(&{key})', ', ', $myGLAs); ?> { <?php foreach ($myGLAs as $name => $type) { if ($glaRez[$name] == 'multi') { ?> <?php echo $name; ?> .Finalize(); <?php } // if inner GLA is multi } // foreach inner gla ?> } Iterator( const Iterator & other) = default; ~Iterator() { } bool GetNextResult( <?php echo typed_ref_args($outputs); ?> ) { FATALIF(!_valid, "Tried to get results from an invalid iterator."); bool ret = !_gotResultsOnce; _gotResultsOnce = true; <?php foreach ($myGLAs as $name => $type) { if ($glaRez[$name] == 'multi') { ?> ret |= it_<?php echo $name; ?> ->GetNextResult(<?php echo args($glaOutputs[$name]); ?> ); <?php } // if inner GLA is multi } // foreach inner gla ?> if( ret ) { <?php foreach ($myGLAs as $name => $type) { if ($glaRez[$name] == 'single') { ?> it_<?php echo $name; ?> ->GetResult(<?php echo args($glaOutputs[$name]); ?> ); <?php } else { if ($glaRez[$name] == 'state') { $stateVar = array_keys($glaOutputs[$name])[0]; $stateType = $glaOutputs[$name][$stateVar]; ?> <?php echo $stateVar; ?> = <?php echo $stateType; ?> (it_<?php echo $name; ?> ); <?php } } // if inner GLA is state } // foreach inner gla ?> } return ret; } }; Iterator multiIterator; public: <?php echo $className; ?> () { } ~<?php echo $className; ?> () { } void AddItem(<?php echo const_typed_ref_args($inputs); ?> ) { // Call AddItem individually on each GLA. <?php foreach ($myGLAs as $gName => $gType) { ?> <?php echo $gName; ?> .AddItem(<?php echo args($glaInputs[$gName]); ?> ); <?php } // foreach inner gla ?> } void AddState( <?php echo $className; ?> & other ) { // Call AddState individually on each GLA. <?php foreach ($myGLAs as $gName => $gType) { ?> <?php echo $gName; ?> .AddState(other.<?php echo $gName; ?> ); <?php } // foreach inner gla ?> } void Finalize() { multiIterator = Iterator(<?php echo args($myGLAs); ?> ); } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } void GetResult(<?php echo typed_ref_args($outputs); ?> ) { Finalize(); GetNextResult(<?php echo args($outputs); ?> ); } <?php foreach (array_keys($myGLAs) as $index => $name) { ?> const <?php echo $myGLAs[$name]; ?> & GetGLA<?php echo $index; ?> () const { return <?php echo $name; ?> ; } <?php } ?> }; <?php return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => $resultType, 'libraries' => $libraries, 'configurable' => $configurable, 'extra' => $extra); }
/** * A GLA that estimates the cardinality of a dataset using a bloom filter of * a configurable size. * * Note: This filter has very high performance, so long as all of the states * fit into cache, preferably L1 or L2, but L3 is also fine. Once the states * are large enough that all of them cannot fit inside L3 cache at the same * time, performance takes a nose dive (4x loss minimum). */ function BloomFilter(array $t_args, array $input, array $output) { grokit_assert(\count($output) == 1, 'BloomFilter produces only 1 value, ' . \count($output) . ' outputs given.'); $outputName = array_keys($output)[0]; $outputType = array_get_index($output, 0); if (is_null($outputType)) { $outputType = lookupType('BASE::BIGINT'); } $output[$outputName] = $outputType; grokit_assert($outputType->is('numeric'), 'BloomFilter output must be numeric!'); $exp = get_first_key_default($t_args, ['exponent'], 16); grokit_assert(is_integer($exp), 'BloomFilter exponent must be an integer.'); grokit_assert($exp > 0 && $exp < 64, 'BloomFilter exponent must be in range (0,64), ' . $exp . ' given.'); $nullCheck = get_default($t_args, 'null.check', false); $nullable = []; if (is_bool($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = $nullCheck; } } else { if (is_array($nullCheck)) { foreach ($input as $name => $type) { $nullable[$name] = false; } foreach ($nullCheck as $index => $n) { grokit_assert(is_string($n), 'BloomFilster null.check has invalid value at position ' . $index); grokit_assert(array_key_exists($n, $nullable), 'BloomFilster null.check has unknown input ' . $n . ' at position ' . $index); $nullable[$n] = true; } } else { grokit_error('BloomFilster null.check must be boolean or list of inputs to check for nulls'); } } $debug = get_default($t_args, 'debug', 0); $bits = pow(2, $exp); $bytes = ceil($bits / 8.0); // Calculate the number of bits set for every possible value of a byte $nBits = []; for ($i = 0; $i < 256; $i++) { $n = $i; $b = 0; while ($n > 0) { $n &= $n - 1; $b++; } $nBits[$i] = $b; } $className = generate_name('BloomFilter'); ?> class <?php echo $className; ?> { static constexpr size_t BITS = <?php echo $bits; ?> ; static constexpr size_t BYTES = <?php echo $bytes; ?> ; static constexpr size_t MASK = BITS - 1; static constexpr std::array<unsigned char, 256> BITS_SET = { <?php echo implode(', ', $nBits); ?> }; static constexpr std::array<unsigned char, 8> BIT_MASKS = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }; size_t count; std::array<unsigned char, BYTES> set; //unsigned char set[BYTES]; //std::bitset<BITS> set; public: <?php echo $className; ?> () : count(0), set() { for( size_t i = 0; i < BYTES; i++ ) { //> set[i] = 0; } } ~<?php echo $className; ?> () { } void AddItem( <?php echo const_typed_ref_args($input); ?> ) { count++; <?php foreach ($nullable as $name => $check) { if ($check) { ?> if( IsNull( <?php echo $name; ?> ) ) return; <?php } // if checking for nulls } // foreach input ?> size_t hashVal = H_b; <?php foreach ($input as $name => $type) { ?> hashVal = CongruentHash(Hash(<?php echo $name; ?> ), hashVal); <?php } // foreach input ?> hashVal = hashVal & MASK; const size_t bucket = hashVal >> 3; const size_t bucket_index = hashVal & 0x07; const unsigned char mask = BIT_MASKS[bucket_index]; set[bucket] |= mask; } void AddState( <?php echo $className; ?> & o ) { count += o.count; for( size_t i = 0; i < BYTES; i++ ) { //> set[i] |= o.set[i]; } } void GetResult( <?php echo $outputType; ?> & <?php echo $outputName; ?> ) { size_t nBitsSet = 0; constexpr long double bits = static_cast<long double>(BITS); for( size_t i = 0; i < BYTES; i++ ) { //> nBitsSet += BITS_SET[set[i]]; } long double bitsSet = static_cast<long double>(nBitsSet); if( nBitsSet == BITS ) { // All Bits set, just give the cardinality as an estimate. <?php echo $outputName; ?> = count; } else { long double cardinality = - bits * std::log(1 - (bitsSet / bits)); <?php echo $outputName; ?> = cardinality; } <?php if ($debug > 0) { ?> std::cout << "BloomFilter:" << " bitsSet(" << bitsSet << ")" << " bits(" << bits << ")" << " cardinality(" << cardinality << ")" << " output(" << <?php echo $outputName; ?> << ")" << std::endl;; //> <?php } // if debugging enabled ?> } }; // Storage for static members constexpr std::array<unsigned char, 256> <?php echo $className; ?> ::BITS_SET; constexpr std::array<unsigned char, 8> <?php echo $className; ?> ::BIT_MASKS; <?php $system_headers = ['cmath', 'array']; if ($debug > 0) { $system_headers[] = 'iostream'; } return ['kind' => 'GLA', 'name' => $className, 'input' => $input, 'output' => $output, 'result_type' => 'single', 'user_headers' => ['HashFunctions.h'], 'system_headers' => $system_headers]; }
function Average(array $t_args, array $input, array $output) { $className = generate_name('Average'); grokit_assert(\count($input) == \count($output), 'Average must have the same number of inputs and outputs'); $outToIn = []; $internalTypes = []; $internalInit = []; reset($output); foreach ($input as $name => $type) { $outKey = key($output); $outToIn[$outKey] = $name; if ($type->is('numeric')) { $internalTypes[$name] = 'long double'; $internalInit[$name] = '0.0'; } else { $internalTypes[$name] = $type; $internalInit[$name] = ''; } if (is_null(current($output))) { if ($type->is('numeric')) { $output[$outKey] = lookupType('base::DOUBLE'); } else { $output[$outKey] = $type; } } next($output); } $countType = 'uint64_t'; $debug = get_default($t_args, 'debug', 0); ?> class <?php echo $className; ?> { private: <?php echo $countType; ?> count; // keeps the number of tuples aggregated <?php foreach ($internalTypes as $name => $type) { ?> <?php echo $type; ?> sum_<?php echo $name; ?> ; <?php } // foreach internal value ?> public: <?php echo $className; ?> () : count(0) <?php foreach ($internalInit as $name => $init) { ?> , sum_<?php echo $name; ?> (<?php echo $init; ?> ) <?php } // foreach internal initializer ?> {} void AddItem(<?php echo const_typed_ref_args($input); ?> ) { count++; <?php foreach ($input as $name => $type) { ?> sum_<?php echo $name; ?> += <?php echo $name; ?> ; <?php } // foreach input ?> } void AddState(<?php echo $className; ?> & o){ count += o.count; <?php foreach ($input as $name => $type) { ?> sum_<?php echo $name; ?> += o.sum_<?php echo $name; ?> ; <?php } // foreach input ?> } // we only support one tuple as output void GetResult(<?php echo typed_ref_args($output); ?> ) const { if( count > 0 ) { <?php foreach ($output as $name => $type) { $inName = $outToIn[$name]; ?> <?php echo $name; ?> = (sum_<?php echo $inName; ?> ) / count; <?php } // foreach output ?> } else { <?php foreach ($output as $name => $type) { ?> <?php echo $name; ?> = sum_<?php echo $inName; ?> ; <?php } // foreach output ?> } } }; <?php $sys_headers = ['cinttypes']; if ($debug > 0) { $sys_headers[] = 'iostream'; $sys_headers[] = 'sstream'; } return array('kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'input' => $input, 'output' => $output, 'result_type' => 'single'); }
function CCompUnionFindLPFrag($t_args, $inputs, $outputs) { // Class name is randomly generated. $className = generate_name('CCompUnionFindLPFrag'); // Initializiation of argument names. $inputs_ = array_combine(['s', 't'], $inputs); $vertex = $inputs_['s']; // Construction of outputs. $outputs_ = ['node' => $vertex, 'component' => lookupType('base::BIGINT')]; $outputs = array_combine(array_keys($outputs), $outputs_); $sys_headers = ['armadillo', 'algorithm']; $user_headers = []; $lib_headers = []; $libraries = ['armadillo']; $properties = []; $extra = []; $result_type = ['fragment']; ?> using namespace arma; using namespace std; class <?php echo $className; ?> ; <?php $constantState = lookupResource('graph::CCompUnionFindLPFrag_Constant_State', ['className' => $className]); ?> class <?php echo $className; ?> { public: // The constant state for this GLA. using ConstantState = <?php echo $constantState; ?> ; // The current and final indices of the result for the given fragment. using Iterator = std::pair<uint64_t, uint64_t>; // The number of iterations to perform, not counting the initial set-up. static const constexpr int kIterations = 10; // The work is split into chunks of this size before being partitioned. static const constexpr int kBlock = 32; // The maximum number of fragments to use. static const constexpr int kMaxFragments = 64; private: // Node Component static arma::rowvec node_component; // keep track of component size static arma::rowvec component_size; // The typical constant state for an iterable GLA. const ConstantState& constant_state; // The number of unique nodes seen. uint64_t num_nodes; // The current iteration. int iteration; // The number of fragmetns for the result. int num_fragments; // check if need more iterations uint64_t connections; public: <?php echo $className; ?> (const <?php echo $constantState; ?> & state) : constant_state(state), num_nodes(state.num_nodes), iteration(state.iteration),connections(0) { } uint64_t Find(uint64_t node_id){ // use path compression here while (node_id != node_component(node_id)){ node_component(node_id) = node_component(node_component(node_id)); node_id = node_component(node_id); } return node_id; } void Union(uint64_t pid, uint64_t qid){ // find their root pid = Find(pid); qid = Find(qid); if (pid == qid) return; ++ connections; uint64_t psz = component_size(pid), qsz = component_size(qid); if (psz > qsz){ node_component(qid) = pid; component_size(pid) += qsz; }else{ node_component(pid) = qid; component_size(qid) += psz; } } // Basic dynamic array allocation. void AddItem(<?php echo const_typed_ref_args($inputs_); ?> ) { if (iteration == 0) { num_nodes = max((uint64_t) max(s, t), num_nodes); return; } else{ Union((uint64_t) s, (uint64_t) t); } } // Hashes are merged. void AddState(<?php echo $className; ?> &other) { if (iteration == 0) num_nodes = max(num_nodes, other.num_nodes); else connections += other.connections; } // Most computation that happens at the end of each iteration is parallelized // by performed it inside Finalize. bool ShouldIterate(ConstantState& state) { state.iteration = ++iteration; printf("Entering ShouldIterate. connections: %lu, iteration: %d\n", connections, iteration); if (iteration == 1) {// allocate memory // num_nodes is incremented because IDs are 0-based. state.num_nodes = ++num_nodes; // Allocating space can't be parallelized. node_component.set_size(num_nodes); for (uint64_t i = 0; i < num_nodes; ++ i){ node_component(i) = i; } component_size.set_size(num_nodes); component_size.fill(1); return true; } else { return connections > 0 && iteration < kIterations + 1; } } int GetNumFragments() { uint64_t size = (num_nodes - 1) / kBlock + 1; // num_nodes / kBlock rounded up. num_fragments = (iteration == 0) ? 0 : min(size, (uint64_t) kMaxFragments); printf("num_nodes: %lu, size: %lu, num_fragments: %d\n", num_nodes, size, num_fragments); return num_fragments; } // Finalize does nothing Iterator* Finalize(int fragment) { uint64_t count = num_nodes; // The ordering of operations is important. Don't change it. uint64_t first = fragment * (count / kBlock) / num_fragments * kBlock; uint64_t final = (fragment == num_fragments - 1) ? count - 1 : (fragment + 1) * (count / kBlock) / num_fragments * kBlock - 1; printf("fragment: %lu\tcount: %lu\tfirst: %lu\tfinal: %lu\n", fragment, count, first, final); return new Iterator(first, final); } bool GetNextResult(Iterator* it, <?php echo typed_ref_args($outputs_); ?> ) { // should iterate is true if (connections > 0 && iteration < kIterations + 1){ printf("I need more iterations, because connections: %lu, iteration: %d\n", connections, iteration); return false; } if (it->first > it->second) return false; node = it->first++; component = Find(node); return true; } }; // Initialize the static member types. arma::rowvec <?php echo $className; ?> ::node_component; arma::rowvec <?php echo $className; ?> ::component_size; typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'properties' => $properties, 'extra' => $extra, 'iterable' => true, 'input' => $inputs, 'output' => $outputs, 'result_type' => $result_type, 'generated_state' => $constantState]; }
function Multi_Hash(array $t_args, array $inputs, array $outputs) { // Class name randomly generated $className = generate_name('Hash'); // Processing of template arguments. $split = $t_args['split']; // Processing of inputs. $keys = array_slice($inputs, 0, $split); $vals = array_slice($inputs, $split); $sys_headers = ['map', 'tuple', 'unordered_map']; $user_headers = []; $lib_headers = ['HashFct.h']; $libraries = []; $properties = []; $extra = ['keys' => $keys, 'vals' => $vals]; $result_type = ['state']; ?> class <?php echo $className; ?> ; class <?php echo $className; ?> { public: // The standard hashing for Grokit is used, which returns a uint64_t. using Key = uint64_t; // The value type for the map that contains the various values passed through. using Tuple = std::tuple<<?php echo typed($vals); ?> >; // The map used, which must be a multimap. using Map = std::unordered_multimap<uint64_t, Tuple>; private: // The container that gathers the input. Map map; public: <?php echo $className; ?> () {} void AddItem(<?php echo const_typed_ref_args($inputs); ?> ) { auto key = ChainHash(<?php echo args($keys); ?> ); auto val = std::make_tuple(<?php echo args($vals); ?> ); map.insert(std::make_pair(key, val)); } void AddState(<?php echo $className; ?> & other) { map.insert(other.map.begin(), other.map.end()); } const Map& GetMap() const { return map; } static uint64_t ChainHash(<?php echo const_typed_ref_args($keys); ?> ) { uint64_t offset = 1; <?php foreach (array_keys($keys) as $name) { ?> offset = CongruentHash(Hash(<?php echo $name; ?> ), offset); <?php } ?> return offset; } }; <?php return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'properties' => $properties, 'extra' => $extra, 'iterable' => false, 'input' => $inputs, 'output' => $outputs, 'result_type' => $result_type]; }