function highlight(&$code) { $ch = ''; $next_ch = ''; $l=strlen($code); $out=''; $i=0; while ($i < $l) { if ($this->state==S_NONE) { $ch = $code{$i}; if ($i+1 < $l) $next_ch = $code{$i+1}; else $next_ch = ''; if ($ch=='-' and $next_ch=='-') { $this->state=S_COMMENT1; $out = $ch.$next_ch; $i++; $i++; } else if ($ch=='/' and $next_ch=='*') { $this->state=S_COMMENT2; $out=$ch.$next_ch; $i++; $i++; } else if (is_identifier_open($ch)) { $this->state=S_KEYWORD; $out=$ch; $i++; } else if ($ch=='\'') { $this->state=S_STRING; $out=$ch; $i++; } else { $out=$ch; } $this->open_state=$this->state; $this->close_state=S_NONE; } if ($this->state!=S_NONE) { switch ($this->state) { case S_COMMENT1: $j=strpos($code,"\n",$i); if ($j===false) $j=$l-1; else $this->close_state=$this->state; $out.=substr($code, $i, $j - $i + 1); $i=$j; break; case S_COMMENT2: $j=strpos($code,'*/',$i); if ($j===false) $j = $l - 1; else $this->close_state=$this->state; $out.=substr($code, $i, $j + 1 - $i + 1); $i=$j + 1; break; case S_KEYWORD: { $j= $i; while ($j < $l) { if (!is_identifier($code{$j})) break; $j++; } $this->close_state=$this->state;//close if string breaked $out.=substr($code, $i, $j - $i); $i=$j - 1; if (!$this->keywords->found($out)) { $this->state=S_NONE; $this->open_state=S_NONE; $this->close_state=S_NONE; } break; } case S_STRING: { $j = $i; while ($j < $l) { if ($code{$j}=='\'') { $this->close_state=$this->state; break; } $j++; } $out.=substr($code, $i, $j - $i + 1); $i = $j; break; } } } $this->text_out($out); $i++; } }
function is_identifier_path($str, $slash_count = -1) { if (!is_string($str)) { return false; } $parts = explode('/', $str); if ($slash_count !== -1 && count($parts) !== $slash_count + 1) { return false; } foreach ($parts as $part) { if (!is_identifier($part)) { return false; } } return true; }
/** * GI that generates data in clusters, using a specified distribution for each * cluster. * * This GI requires the following template arguments: * - 'n' or 0 * The number of tuples to generate. Note: this value is per task. * The total number of tuples generated will be n_tasks * n * - 'centers' or 1 * A list of configuration for the centers. * * The following template arguments are optional: * - 'outputs' * If the outputs of the GI are not given implicitly, they can be * specified in this template argument. The number of dimensions will * be determined by the number of outputs. * * All output types must be numeric real types. The default type for * outputs is DOUBLE. * - 'dist.lib' = 'std' * Which library to use for generating distributions. * Valid options are: * - std * - boost * - 'seed' = null * The seed to be used for the random number generator. This seed will * be used to generate the seed for each task, and different runs with * the same seed will produce the same data. * - 'compute.sets' = 1 * The number of sets of tuples to compute at once. * * Each center configuration is a functor with the form: * dist_name(args) * * The following distributions are supported: * { Uniform Distributions } * - uniform(a = 0, b = 1) * * { Normal Distributions } * - normal(mean = 0.0, std_dev = 1.0) [ synonyms: gaussian ] * - inverse_gaussian(mean = 1, shape = 1) [ synonyms: inverse_normal ] * * { Bernoulli Distributions } * - binomial(t = 1, p = 0.5) * - negative_binomial(k = 1, p = 0.5) * * { Poisson Distributions } * - exponential( lambda = 1 ) * - gamma(alpha = 1, beta = 1) [ synonyms: Gamma ] */ function ClusterGen(array $t_args, array $outputs) { $sys_headers = ['array', 'cinttypes']; $user_headers = []; $libraries = []; if (\count($outputs) == 0) { grokit_assert(array_key_exists('outputs', $t_args), 'ClusterGen: No outputs specified'); $count = 0; foreach ($t_args['outputs'] as $type) { if (is_identifier($type)) { $type = lookupType($type); } grokit_assert(is_datatype($type), 'ClusterGen: Non data-type ' . $type . ' given as output'); $name = 'output' . $count++; $outputs[$name] = $type; } } foreach ($outputs as $name => &$type) { if (is_null($type)) { $type = lookupType('base::DOUBLE'); } else { grokit_assert($type->is('real'), 'ClusterGen: Non-real datatype ' . $type . ' given as output'); } } $myOutputs = []; foreach ($outputs as $name => $type) { $myOutputs[$name] = $type; } $tSize = \count($outputs); $seed = get_default($t_args, 'seed', null); if ($seed !== null) { grokit_assert(is_int($seed), 'ClusterGen: Seed must be an integer or null.'); } else { $user_headers[] = 'HashFunctions.h'; } $distLib = get_default($t_args, 'dist.lib', 'std'); $distNS = ''; switch ($distLib) { case 'std': $sys_headers[] = 'random'; $distNS = 'std'; break; case 'boost': $sys_headers[] = 'boost/random.hpp'; $distNS = 'boost::random'; $libraries[] = 'boost_random-mt'; if ($seed === null) { // Need random_device $sys_headers[] = 'boost/random/random_device.hpp'; $libraries[] = 'boost_system-mt'; } break; default: grokit_error('ClusterGen: Unknown RNG library ' . $distLib); } $distRNG = 'mt19937'; $RNGtype = $distNS . '::' . $distRNG; $nTuples = get_first_key($t_args, ['n', '0']); grokit_assert(is_int($nTuples), 'ClusterGen: the number of tuples to be produced must be an integer.'); $centers = get_first_key($t_args, ['centers', 1]); grokit_assert(is_array($centers), 'ClusterGen: centers must be an array of functors'); $handleDist = function ($name, $args, $oType) use($distNS) { $distName = ''; $distArgs = []; switch ($name) { case 'gaussian': case 'normal': $distName = $distNS . '::' . 'normal_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Normal distribution takes at most 2 arguments, ' . \count($args) . ' given'); $mean = get_default($args, ['mean', 0], 0.0); $sigma = get_default($args, ['std_dev', 'sigma', 1], 1.0); grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of binomial distribution must be a real number.'); grokit_assert(is_numeric($sigma), 'ClusterGen: sigma parameter of binomial distribution must be a real number.'); $mean = floatval($mean); $sigma = floatval($sigma); $distArgs = [$mean, $sigma]; break; case 'binomial': $distName = $distNS . '::' . 'binomial_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given'); $t = get_default($args, ['t', 0], 1); $p = get_default($args, ['p', 1], 0.5); grokit_assert(is_int($t), 'ClusterGen: t parameter of binomial distribution must be an integer.'); grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.'); $p = floatval($p); grokit_assert($p >= 0 && $p <= 1, 'ClusterGen: p parameter of binomial distribution must be in the range [0, 1]'); grokit_assert($t >= 0, 'ClusterGen: t parameter of binomial distribution must be in the range [0, +inf)'); $distArgs = [$t, $p]; break; case 'negative_binomial': $distName = $distNS . '::' . 'negative_binomial_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Negative Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given'); $k = get_default($args, ['k', 0], 1); $p = get_default($args, ['p', 1], 0.5); grokit_assert(is_int($k), 'ClusterGen: k parameter of binomial distribution must be an integer.'); grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.'); $p = floatval($p); grokit_assert($p > 0 && $p <= 1, 'ClusterGen: p parameter of negative binomial distribution must be in the range (0, 1]'); grokit_assert($k > 0, 'ClusterGen: k parameter of negative binomial distribution must be in the range (0, +inf)'); $distArgs = [$k, $p]; break; case 'inverse_gaussian': case 'inverse_normal': grokit_assert(\count($args) <= 2, 'ClusterGen: Inverse Gaussian distribution takes at most 2 arguments, ' . \count($args) . ' given'); $mean = get_default($args, ['mean', 0], 1); $shape = get_default($args, ['shape', 1], 1); grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of inverse gaussian distribution must be a real number.'); grokit_assert(is_numeric($shape), 'ClusterGen: shape parameter of inverse gaussian distribution must be a real number.'); $mean = floatval($mean); $shape = floatval($shape); grokit_assert($mean > 0, 'ClusterGen: mean of inverse gaussian distribution must be in range (0, inf)'); grokit_assert($shape > 0, 'ClusterGen: shape of inverse gaussian distribution must be in range (0, inf)'); $gen_args = ['output' => $oType, 'ns' => $distNS]; $distName = strval(lookupResource('datagen::InverseGaussianGen', $gen_args)); $distArgs = [$mean, $shape]; break; case 'uniform': $distName = $distNS . '::' . 'uniform_real_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Uniform distribution takes at most 2 arguments, ' . \count($args) . ' given'); $a = get_default($args, ['a', 0], 0.0); $b = get_default($args, ['b', 1], 1.0); grokit_assert(is_numeric($a), 'ClusterGen: `a` parameter of uniform distribution must be a real number.'); grokit_assert(is_numeric($b), 'ClusterGen: `b` parameter of uniform distribution must be a real number.'); $a = floatval($a); $b = floatval($b); grokit_assert($b >= $a, 'ClusterGen: `b` parameter of uniform distribution must be >= the `a` parameter.'); $distArgs = [$a, $b]; break; case 'exponential': $distName = $distNS . '::' . 'exponential_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 1, 'ClusterGen: Exponential distribution takes at most 1 argument.'); $lambda = get_default($args, ['lambda', 0], 1.0); grokit_assert(is_numeric($lambda), 'ClusterGen: `lambda` parameter of exponential distribution must be a real number.'); $lambda = floatval($lambda); grokit_assert($lambda > 0, 'ClusterGen: `lambda` parameter of exponential distribution must be in range (0, +inf).'); $distArgs = [$lambda]; break; case 'gamma': case 'Gamma': $distName = $distNS . '::' . 'gamma_distribution<' . $oType . '>'; grokit_assert(\count($args) <= 2, 'ClusterGen: Gamma distribution takes at most 2 arguments.'); $alpha = get_default($args, ['alpha', 0], 1.0); $beta = det_default($args, ['beta', 1], 1.0); grokit_assert(is_numeric($alpha), 'ClusterGen: `alpha` parameter of gamma distribution must be a real number.'); grokit_assert(is_numeric($beta), 'ClusterGen: `beta` parameter of gamma distribution must be a real number.'); $alpha = floatval($alpha); $beta = floatval($beta); $distArgs = [$alpha, $beta]; break; default: grokit_error('ClusterGen: Unknown distribution ' . $name . ' given for center'); } return [$distName, $distArgs]; }; $dists = []; $distArgs = []; $count = 0; $oType = ''; $nCenters = 1; reset($outputs); foreach ($centers as $val) { $cluster = $val; if (is_functor($val)) { $cluster = [$val]; } else { if (is_array($val)) { $nCenters = lcm($nCenters, \count($val)); } else { grokit_error('ClusterGen: center descriptions must be functors or list of functors'); } } $curDist = []; $curDistArgs = []; $curDistName = 'distribution' . $count++; $oType = strval(current($outputs)); $iCount = 0; foreach ($cluster as $functor) { grokit_assert(is_functor($functor), 'ClusterGen: center description must be a functor'); $vName = $curDistName . '_' . $iCount++; $ret = $handleDist($functor->name(), $functor->args(), $oType); $curDist[$vName] = $ret[0]; $curDistArgs[$vName] = $ret[1]; } next($outputs); $dists[$curDistName] = $curDist; $distArgs[$curDistName] = $curDistArgs; } // Determine the default number of sets to compute at a time. // We want to generate either $nTuples or 10,000 tuples, depending on which // is less. $defaultSetsTarget = min($nTuples, 10000); $setsToTarget = intval(ceil($defaultSetsTarget / $nCenters)); $computeSets = get_default($t_args, 'compute.sets', $setsToTarget); grokit_assert(is_int($computeSets) && $computeSets > 0, 'ClusterGen: compute.sets must be a positive integer, ' . $computeSets . ' given'); $className = generate_name('ClusterGen'); // For some BIZZARE reason, the $outputs array was getting modified while // traversing over the $dists array. Making a deep copy of the outputs and // then reassigning it seems to fix the issue. $outputs = $myOutputs; ?> class <?php echo $className; ?> { // The number of tuples to produce per task static constexpr size_t N = <?php echo $nTuples; ?> ; static constexpr size_t CacheSize = <?php echo $computeSets * $nCenters; ?> ; // Typedefs typedef std::tuple<<?php echo array_template('{val}', ', ', $outputs); ?> > Tuple; typedef std::array<Tuple, CacheSize> TupleArray; typedef TupleArray::const_iterator TupleIterator; typedef <?php echo $RNGtype; ?> RandGen; // Number of tuples produced. uintmax_t count; // Cache a number of outputs for efficiency TupleArray cache; TupleIterator cacheIt; // Random number generator RandGen rng; // Distributions <?php // This is the section causing issues. foreach ($dists as $name => $list) { foreach ($list as $vName => $type) { ?> <?php echo $type; ?> <?php echo $vName; ?> ; <?php } // foreach distribution } // foreach cluster set ?> // Helper function to generate tuples. void GenerateTuples(void) { <?php $tIndex = 0; foreach ($dists as $name => $list) { $lCenters = \count($list); // $nCenters has been defined to be the LCM of the number of centers in // any column, so $lCenter is guaranteed to divide evenly into // CacheSize ?> for( size_t index = 0; CacheSize > index; index += <?php echo $lCenters; ?> ) { <?php $index = 0; foreach ($list as $vName => $type) { ?> std::get<<?php echo $tIndex; ?> >(cache[index + <?php echo $index; ?> ]) = <?php echo $vName; ?> (rng); <?php $index++; } // foreach value in tuple ?> } <?php $tIndex++; } // foreach distribution ?> cacheIt = cache.cbegin(); } public: // Constructor <?php echo $className; ?> ( GIStreamProxy & _stream ) : cache() , cacheIt() , count(0) , rng() <?php foreach ($dists as $name => $list) { foreach ($list as $vName => $type) { ?> , <?php echo $vName; ?> (<?php echo implode(', ', $distArgs[$name][$vName]); ?> ) <?php } // foreach distribution } // foreach cluster set ?> { <?php if (is_null($seed)) { ?> <?php echo $distNS; ?> ::random_device rd; <?php } // if seed is null ?> RandGen::result_type seed = <?php echo is_null($seed) ? 'rd()' : "CongruentHash({$seed}, _stream.get_id() )"; ?> ; rng.seed(seed); cacheIt = cache.cend(); } // Destructor ~<?php echo $className; ?> (void) { } bool ProduceTuple(<?php echo typed_ref_args($outputs); ?> ) { if( N > count ) { if( cacheIt == cache.cend() ) { GenerateTuples(); } <?php $tIndex = 0; foreach ($outputs as $name => $type) { ?> <?php echo $name; ?> = std::get<<?php echo $tIndex; ?> >(*cacheIt); <?php $tIndex++; } // foreach output ?> ++cacheIt; ++count; return true; } else { return false; } } }; <?php return array('kind' => 'GI', 'name' => $className, 'output' => $outputs, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'libraries' => $libraries); }
function ExtremeTuples(array $t_args, array $inputs, array $outputs) { $extremes = get_first_key($t_args, ['extremes']); $nExt = \count($extremes); grokit_assert($nExt > 0, 'No extremes specified for ExtremeTuples GLA.'); if (\count($inputs) == 0) { grokit_assert(array_key_exists('inputs', $t_args), 'No arguments specified for ExtremeTuples GLA.'); $count = 0; foreach ($t_args['inputs'] as $type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Only datatypes can be specified as inputs to ' . 'the ExtremeTuples GLA'); $name = 'et_val' . $count; $inputs[$name] = $type; } } $outputMap = []; reset($outputs); foreach ($inputs as $name => $type) { $oKey = key($outputs); $outputs[$oKey] = $type; $outputMap[$oKey] = $name; next($outputs); } grokit_assert($nExt <= \count($inputs), 'There can not be more extreme values than there are inputs!'); $mainAtts = []; $extraAtts = []; $minOpts = ['MIN', 'MINIMUM', '-', '<']; $maxOpts = ['MAX', 'MAXIMUM', '+', '>']; $inArrayCase = function ($needle, $haystack) { foreach ($haystack as $item) { if (strcasecmp($needle, $item) == 0) { return true; } } return false; }; $minimum = []; foreach ($extremes as $name => $val) { grokit_assert(array_key_exists($name, $inputs), "ExtremeTuples: Expression with name " . $name . " specified as extreme not found in inputs"); } foreach ($inputs as $name => $type) { if (array_key_exists($name, $extremes)) { $mainAtts[$name] = $type; if ($inArrayCase($extremes[$name], $minOpts)) { $minimum[$name] = true; } else { if ($inArrayCase($extremes[$name], $maxOpts)) { $minimum[$name] = false; } else { grokit_error('Unknown extreme type ' . $extremes[$name] . ' specified for ' . $name); } } } else { $extraAtts[$name] = $type; } } $debug = get_default($t_args, 'debug', 0); $className = generate_name('ExtremeTuples'); ?> class <?php echo $className; ?> { struct Tuple { <?php foreach ($inputs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // foreach input ?> // Default Constructor, Copy Constructor, and Copy Assignment are all // default Tuple(void) = default; Tuple(const Tuple &) = default; Tuple & operator = (const Tuple &) = default; Tuple(<?php echo array_template('const {val} & _{key}', ', ', $inputs); ?> ) : <?php echo array_template('{key}(_{key})', ', ', $inputs); ?> { } // operator > means that this tuple is "better" than the other tuple. bool operator > ( const Tuple & other ) const { <?php foreach ($mainAtts as $name => $type) { $op1 = $minimum[$name] ? '<' : '>'; $op2 = !$minimum[$name] ? '<' : '>'; ?> if( <?php echo $name; ?> <?php echo $op1; ?> other.<?php echo $name; ?> ) return true; else if( <?php echo $name; ?> <?php echo $op2; ?> other.<?php echo $name; ?> ) return false; <?php } // foreach main attribute ?> return false; } bool operator < ( const Tuple& other ) const { return other > *this; } bool operator <= (const Tuple & other ) const { return ! (*this > other ); } bool operator >= (const Tuple & other ) const { return !( other > *this ); } bool operator == (const Tuple & other ) const { bool ret = true; <?php foreach ($mainAtts as $name => $type) { ?> ret &= <?php echo $name; ?> == other.<?php echo $name; ?> ; <?php } // foreach main attribute ?> return ret; } }; // struct Tuple typedef std::vector<Tuple> TupleVector; public: class Iterator { public: typedef TupleVector::const_iterator iter_type; private: iter_type begin; iter_type end; public: Iterator(void) = default; Iterator(const Iterator &) = default; Iterator( const iter_type & _begin, const iter_type & _end ) : begin(_begin), end(_end) { } Iterator( const iter_type && _begin, const iter_type && _end ) : begin(_begin), end(_end) { } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { if( begin != end ) { <?php foreach ($outputs as $name => $type) { ?> <?php echo $name; ?> = begin-><?php echo $outputMap[$name]; ?> ; <?php } ?> begin++; return true; } else { return false; } } }; private: uintmax_t __count; // number of tuples covered TupleVector tuples; // Iterator for multi output type Iterator multiIterator; public: // Constructor and destructor <?php echo $className; ?> (void) : __count(0), tuples(), multiIterator() { } ~<?php echo $className; ?> () { } void AddItem( <?php echo const_typed_ref_args($inputs); ?> ) { ++__count; Tuple t(<?php echo args($inputs); ?> ); if( tuples.empty() ) { tuples.push_back(t); } else if( t > tuples.front() ) { tuples.clear(); tuples.push_back(t); } else if( t == tuples.front() ) { tuples.push_back(t); } } void AddState( <?php echo $className; ?> & other ) { if( tuples.size() == 0 ) { tuples.swap(other.tuples); } else if( other.tuples.size() == 0 ) { // Do nothing } else if( tuples.front() > other.tuples.front() ) { // fast path } else if( other.tuples.front() > tuples.front() ) { tuples.swap(other.tuples); } else { for( Tuple & t : other.tuples ) { tuples.push_back(t); } } } void Finalize( void ) { multiIterator = Iterator(tuples.cbegin(), tuples.cend()); } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } }; // class <?php echo $className; ?> <?php $system_headers = ['vector', 'algorithm', 'cinttypes']; if ($debug > 0) { $system_headers = array_merge($system_headers, ['iostream', 'sstream', 'string']); } return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'system_headers' => $system_headers); }
function GroupBy(array $t_args, array $inputs, array $outputs, array $states) { // Ensure we have valid inputs. if (\count($inputs) == 0) { // No inputs given, try to get them from template arguments. grokit_assert(array_key_exists('input', $t_args), 'No inputs given for GroupBy'); $inputs = $t_args['input']; if (!is_array($inputs)) { $inputs = [$inputs]; } foreach ($inputs as $name => &$type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Invalid type given for input ' . $name); } } grokit_assert(array_key_exists('group', $t_args), 'No groups specified for GroupBy'); $gbyAttMap = $t_args['group']; grokit_assert(is_array($gbyAttMap), 'Invalid value given for groups, expected an expression name or list of expression names'); $gbyAttMap = array_map('strval', $gbyAttMap); $gbyAttNames = array_keys($gbyAttMap); foreach ($gbyAttMap as $in => $out) { grokit_assert(array_key_exists($in, $inputs), 'Group ' . $in . ' not present in input'); grokit_assert(array_key_exists($out, $outputs), 'Output Attribute ' . $out . ' for group ' . $in . ' not found in outputs'); } $numGByAtts = \count($gbyAttNames); grokit_assert(array_key_exists('aggregate', $t_args), 'No aggregate specified for GroupBy'); $innerGLA = $t_args['aggregate']; grokit_assert(is_gla($innerGLA), 'Non-GLA specified as aggregate for GroupBy'); $debug = get_default($t_args, 'debug', 0); $init_size = get_default($t_args, 'init.size', 1024); $use_mct = get_default($t_args, 'use.mct', true); $keepHashes = get_default($t_args, 'mct.keep.hashes', false); grokit_assert(is_bool($keepHashes), 'GroupBy mct.keep.hashes argument must be boolean'); // determine the result type $use_fragments = get_default($t_args, 'use.fragments', true); $resType = $use_fragments ? ['fragment', 'multi'] : ['multi']; $fragSize = get_default($t_args, 'fragment.size', 2000000); // Always support state $resType[] = 'state'; // Class name randomly generated $className = generate_name("GroupBy"); // instantiate the inner GLA. input/output is derived from the main input/output $gbyAtts = []; $gbyAttsOut = []; $glaInputAtts = []; $glaOutputAtts = []; foreach ($inputs as $name => $type) { if (in_array($name, $gbyAttNames)) { $gbyAtts[$name] = $type; $gbyAttsOut[$gbyAttMap[$name]] = $type; $outputs[$gbyAttMap[$name]] = $type; } else { $glaInputAtts[$name] = $type; } } foreach ($outputs as $name => $type) { if (!in_array($name, $gbyAttMap)) { $glaOutputAtts[$name] = $type; } } $innerGLA = $innerGLA->apply($glaInputAtts, $glaOutputAtts, $states); $libraries = $innerGLA->libraries(); $innerRes = get_first_value($innerGLA->result_type(), ['multi', 'single', 'state']); if ($innerRes == 'state') { // If the result type is state, the only output is a state object // containing the GLA. $outputName = array_keys($glaOutputAtts)[0]; $innerOutputs = [$outputName => lookupType('base::STATE', ['type' => $innerGLA])]; } else { $innerOutputs = $innerGLA->output(); grokit_assert(\count($innerOutputs) == \count($glaOutputAtts), 'Expected ' . \count($glaOutputAtts) . ' outputs fromm Inner GLA, got ' . \count($innerOutputs)); } $constState = lookupResource('GroupByState', ['gla' => $innerGLA, 'groups' => $gbyAtts, 'debug' => $debug]); // constructor argumetns are inherited from inner GLA $configurable = $innerGLA->configurable(); $reqStates = $innerGLA->req_states(); // We need to specially create the constructor string because apparently // declaring Type Name(); is a function declaration instead of a variable // declaration for some reason. $constructorParts = []; if ($configurable) { $constructorParts[] = 'jsonInit'; } if ($innerGLA->has_state()) { $constructorParts[] = 'innerState'; } $constructorString = \count($constructorParts) > 0 ? '(' . implode(', ', $constructorParts) . ')' : ''; // add the outputs we got from the gla foreach ($innerOutputs as $name => $type) { grokit_assert(array_key_exists($name, $outputs), 'Inner GLA\'s outputs refer to unknown attribute ' . $name); grokit_assert($type !== null, 'GroupBy Inner GLA left output ' . $name . ' with no type'); $outputs[$name] = $type; } $iterable = $innerGLA->iterable(); // need to keep track of system includes needed $extraHeaders = array(); $allocatorText = "std::allocator<std::pair<const Key, {$innerGLA}> >"; if ($use_mct) { $keepHashesText = $keepHashes ? 'true' : 'false'; $extraHeaders[] = "mct/hash-map.hpp"; $map = "mct::closed_hash_map<Key, {$innerGLA}, HashKey, std::equal_to<Key>, {$allocatorText}, {$keepHashesText}>"; $mapType = 'mct::closed_hash_map'; } else { $extraHeaders[] = "unordered_map"; $map = "std::unordered_map<Key, {$innerGLA}, HashKey, std::equal_to<Key>, {$allocatorText}>"; $mapType = 'std::unordered_map'; } if ($debug > 0) { $extraHeaders[] = 'cstdio'; } ?> class <?php echo $className; ?> { public: using ConstantState = <?php echo $constState; ?> ; <?php if ($innerGLA->has_state()) { ?> using InnerState = ConstantState::InnerState; <?php } // if gla has state ?> using Key = ConstantState::Key; using HashKey = ConstantState::HashKey; using InnerGLA = <?php echo $innerGLA; ?> ; typedef <?php echo $map; ?> MapType; static const size_t INIT_SIZE = <?php echo $init_size; ?> ; public: class Iterator { MapType::iterator it; // current value MapType::iterator end; // last value in the fragment public: Iterator() { } Iterator(MapType::iterator _it, MapType::iterator _end): it(_it), end(_end) { if( it != end ) { <?php switch ($innerRes) { case 'multi': ?> it->second.Finalize(); <?php break; case 'state': if ($innerGLA->finalize_as_state()) { ?> it->second.FinalizeState(); <?php } // if we need to finalize as a state break; } // end switch inner restype ?> } } bool GetNextResult( <?php echo typed_ref_args($outputs); ?> ) { bool gotResult = false; while( it != end && !gotResult ) { <?php echo $innerGLA; ?> & gla = it->second; <?php foreach ($gbyAttMap as $in => $out) { ?> <?php echo $out; ?> = it->first.<?php echo $in; ?> ; <?php } // foreach grouping attribute ?> <?php switch ($innerRes) { case 'multi': ?> gotResult = gla.GetNextResult( <?php echo args($innerOutputs); ?> ); if( !gotResult ) { ++it; if( it != end ) { it->second.Finalize(); } } <?php break; case 'single': ?> gotResult = true; gla.GetResult(<?php echo args($innerOutputs); ?> ); ++it; <?php break; case 'state': reset($innerOutputs); // Assuming that $innerOutputs contains a single value that is // the state type. $oName = key($innerOutputs); $oType = current($innerOutputs); ?> gotResult = true; <?php echo $oName; ?> = <?php echo $oType; ?> ( &gla ); ++it; <?php } // switch inner result type ?> } return gotResult; } }; private: const ConstantState & constState; <?php if ($configurable) { ?> const Json::Value jsonInit; <?php } // if configurable ?> size_t count; MapType groupByMap; std::vector<MapType::iterator> theIterators; // the iterators, only 2 elements if multi, many if fragment Iterator multiIterator; public: <?php echo $className; ?> (<?php if ($configurable) { ?> const Json::Value & _jsonInit, <?php } ?> const ConstantState & _constState ) : constState(_constState) <?php if ($configurable) { ?> , jsonInit(_jsonInit) <?php } // if configurable ?> , count(0) , groupByMap( INIT_SIZE ) , theIterators() , multiIterator() { } ~<?php echo $className; ?> () {} void Reset(void) { count = 0; groupByMap.clear(); theIterators.clear(); } void AddItem(<?php echo array_template('const {val} & {key}', ', ', $inputs); ?> ) { count++; // check if _key is already in the map; if yes, add _value; else, add a new // entry (_key, _value) Key key(<?php echo array_template('{key}', ', ', $gbyAtts); ?> ); MapType::iterator it = groupByMap.find(key); if (it == groupByMap.end()) { // group does not exist // create an empty GLA and insert // better to not add the item here so we do not have // to transport a large state <?php if ($innerGLA->has_state()) { ?> const InnerState & innerState = constState.getConstState(key); <?php } // if gla has state ?> InnerGLA gla<?php echo $constructorString; ?> ; auto ret = groupByMap.insert(MapType::value_type(key, gla)); it = ret.first; // reposition } it->second.AddItem(<?php echo array_template('{key}', ', ', $glaInputAtts); ?> ); } void AddState(<?php echo $className; ?> & other) { count += other.count; // scan other hash and insert or update content in this one for (MapType::iterator it = other.groupByMap.begin(); it != other.groupByMap.end(); ++it) { const Key& okey = it->first; <?php echo $innerGLA; ?> & ogla = it->second; MapType::iterator itt = groupByMap.find(okey); if (itt != groupByMap.end()) { // found the group <?php echo $innerGLA; ?> & gla = itt->second; gla.AddState(ogla); } else { // add the other group to this hash groupByMap.insert(MapType::value_type(okey, ogla)); } } } <?php if ($iterable) { ?> bool ShouldIterate(ConstantState& modibleState) { <?php if ($debug > 0) { ?> fprintf(stderr, "<?php echo $className; ?> : ==== ShouldIterate ====\n"); <?php } // if debugging enabled ?> bool shouldIterate = false; for( MapType::iterator it = groupByMap.begin(); it != groupByMap.end(); ++it ) { const Key & key = it->first; InnerGLA & gla = it->second; <?php if ($innerGLA->has_state()) { ?> InnerState & innerState = modibleState.getModibleState(key); <?php } // if gla has state ?> bool glaRet = gla.ShouldIterate(innerState); shouldIterate = shouldIterate || glaRet; <?php if ($debug > 0) { ?> fprintf(stderr, "<?php echo $className; ?> : Key(%s) shouldIterate(%s)\n", key.to_string().c_str(), glaRet ? "true" : "false"); <?php } // if debugging enabled ?> } return shouldIterate; } <?php } // if iterable ?> <?php if (in_array('fragment', $resType)) { ?> int GetNumFragments(void){ int size = groupByMap.size(); int sizeFrag = <?php echo $fragSize; ?> ; // setup the fragment boundaries // scan via iterator and count int frag=0; int pos=0; MapType::iterator it = groupByMap.begin(); theIterators.clear(); theIterators.push_back( it ); // special case when size < num_fragments // > if (sizeFrag == 0){ it = groupByMap.end(); theIterators.push_back( it ); return 1; // one fragment } while(it!=groupByMap.end()){ while(it!=groupByMap.end() && pos<( frag + 1 )*sizeFrag){ //> ++it; pos++; } theIterators.push_back( it ); frag++; } <?php if ($debug > 0) { ?> fprintf(stderr, "<?php echo $className; ?> : fragments(%d)\n", frag); <?php } ?> return frag; } Iterator* Finalize(int fragment){ // Call finalize on all inner GLAs in this fragment. MapType::iterator iter = theIterators[fragment]; MapType::iterator iterEnd = theIterators[fragment+1]; Iterator* rez = new Iterator(theIterators[fragment], theIterators[fragment+1] ); return rez; } bool GetNextResult(Iterator* it, <?php echo array_template('{val} & {key}', ', ', $outputs); ?> ) { return it->GetNextResult(<?php echo args($outputs); ?> ); } <?php } // if using fragment interface ?> void Finalize() { multiIterator = Iterator( groupByMap.begin(), groupByMap.end() ); <?php if ($debug >= 1) { ?> fprintf(stderr, "<?php echo $className; ?> : groups(%lu) tuples(%lu)\n", groupByMap.size(), count); <?php } ?> } bool GetNextResult(<?php echo array_template('{val} & {key}', ', ', $outputs); ?> ) { return multiIterator.GetNextResult( <?php echo args($outputs); ?> ); } std::size_t size() const { return groupByMap.size(); } const MapType& GetMap() const { return groupByMap; } bool Contains(<?php echo const_typed_ref_args($gbyAtts); ?> ) const { Key key(<?php echo args($gbyAtts); ?> ); return groupByMap.count(key) > 0; } const InnerGLA& Get(<?php echo const_typed_ref_args($gbyAtts); ?> ) const { Key key(<?php echo args($gbyAtts); ?> ); return groupByMap.at(key); } bool Contains(Key key) const { return groupByMap.count(key) > 0; } const InnerGLA& Get(Key key) const { return groupByMap.at(key); } }; <?php if (in_array('fragment', $resType)) { ?> typedef <?php echo $className; ?> ::Iterator <?php echo $className; ?> _Iterator; <?php } ?> <?php $sys_headers = array_merge(['iomanip', 'iostream', 'cstring'], $extraHeaders); return array('kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => array('HashFunctions.h'), 'input' => $inputs, 'output' => $outputs, 'result_type' => $resType, 'configurable' => $configurable, 'generated_state' => $constState, 'required_states' => $reqStates, 'iterable' => $iterable, 'properties' => ['resettable', 'finite container'], 'libraries' => $libraries, 'extra' => ['inner_gla' => $innerGLA]); }
function hashComplex($val) { $hasher = hash_init('sha256'); if (is_array($val)) { hash_update($hasher, '['); // Ensure array is sorted by keys ksort($val); foreach ($val as $name => $v) { hash_update($hasher, $name); hash_update($hasher, '=>'); hash_update($hasher, hashComplex($v)); } hash_update($hasher, ']'); } else { if (is_gla($val)) { hash_update($hasher, 'gla'); hash_update($hasher, $val->hash()); } else { if (is_gf($val)) { hash_update($hasher, 'gf'); hash_update($hasher, $val->hash()); } else { if (is_gt($val)) { hash_update($hasher, 'gt'); hash_update($hasher, $val->hash()); } else { if (is_gist($val)) { hash_update($hasher, 'gist'); hash_update($hasher, $val->hash()); } else { if (is_gi($val)) { hash_update($hasher, 'gi'); hash_update($hasher, $val->hash()); } else { if (is_datatype($val)) { hash_update($hasher, 'datatype'); hash_update($hasher, $val->hash()); } else { if (is_functor($val)) { hash_update($hasher, 'functor'); hash_update($hasher, $val->hash()); } else { if (is_identifier($val)) { hash_update($hasher, 'identifier'); hash_update($hasher, $val->hash()); } else { if (is_attribute($val)) { hash_update($hasher, 'attribute'); hash_update($hasher, strval($val)); } else { if (is_int($val) || is_float($val) || is_string($val) || is_bool($val)) { hash_update($hasher, gettype($val)); hash_update($hasher, $val); } else { if (is_null($val)) { hash_update($hasher, 'null'); } else { $valType = is_object($val) ? get_class($val) : gettype($val); grokit_logic_error('Unable to hash unknown type ' . $valType); } } } } } } } } } } } } return hash_final($hasher); }
function Multiplexer(array $t_args, array $inputs, array $outputs) { $className = generate_name('Multiplexer'); if (\count($inputs) == 0) { grokit_assert(array_key_exists('input', $t_args), 'No inputs specified for Multiplexer'); $inputs = $t_args['input']; foreach ($t_args['inputs'] as $name => &$type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Only types may be specified as inputs to Multiplexer.'); } $inputs = ensure_valid_names($inputs, 'multi_input'); } $glas = get_first_key($t_args, ['glas', 0]); grokit_assert(\count($glas) > 0, 'No GLAs specified for Multiplexer.'); $myGLAs = []; $glaInputs = []; $glaOutputs = []; $resultType = 'multi'; $usedOutputs = []; $libraries = []; $glaGenStates = []; $glaReqStates = []; $configurable = false; $constArgs = []; $genStates = []; $reqStates = []; $iterable = null; foreach ($glas as $name => $glaInfo) { grokit_assert(is_array($glaInfo), 'Template argument \'glas\' must be an array'); grokit_assert(array_key_exists('gla', $glaInfo), 'No GLA given for glas[' . $name . ']'); grokit_assert(array_key_exists('inputs', $glaInfo), 'No inputs given for glas[' . $name . ']'); grokit_assert(array_key_exists('outputs', $glaInfo), 'No outputs given for glas[' . $name . ']'); $gla = $glaInfo['gla']; $glaInAtts = $glaInfo['inputs']; $glaOutAtts = $glaInfo['outputs']; grokit_assert(is_gla($gla), 'Non-GLA given for glas[' . $name . '][gla]'); grokit_assert(is_array($glaInAtts), 'Non-array given for inputs for gla ' . $name); grokit_assert(is_array($glaOutAtts), 'Non-array given for outputs for gla ' . $name); $glaInAtts = array_map('strval', $glaInAtts); $glaOutAtts = array_map('strval', $glaOutAtts); $glaName = "innerGLA_" . $name; $glaInputs[$glaName] = []; $glaOutputs[$glaName] = []; foreach ($glaInAtts as $att) { grokit_assert(array_key_exists($att, $inputs), 'Input ' . $att . ' for GLA ' . $name . ' not found in inputs'); $glaInputs[$glaName][$att] = $inputs[$att]; } foreach ($glaOutAtts as $att) { grokit_assert(array_key_exists($att, $outputs), 'Output ' . $att . ' for GLA ' . $name . ' not found in outputs'); grokit_assert(!in_array($att, $usedOutputs), 'Output ' . $att . ' used by multiple GLAs'); $usedOutputs[] = $att; $glaOutputs[$glaName][$att] = $outputs[$att]; } //fwrite(STDERR, "Inputs for GLA " . $glaName . ": " . print_r($glaInputs[$glaName], true) . PHP_EOL ); //fwrite(STDERR, "Outputs for GLA " . $glaName . ": " . print_r($glaOutputs[$glaName], true) . PHP_EOL ); $gla = $gla->apply($glaInputs[$glaName], $glaOutputs[$glaName]); $myGLAs[$glaName] = $gla; $glaRez[$glaName] = get_first_value($gla->result_type(), ['multi', 'single', 'state']); $libraries = array_merge($libraries, $gla->libraries()); if ($glaRez[$glaName] == 'state') { grokit_assert(\count($glaOutputs[$glaName]) == 1, "GLA {$glaName} is produced as state, and thus must have exactly 1 output."); $stateType = lookupType('base::STATE', ['type' => $gla]); $glaOutputs[$glaName] = array_combine(array_keys($glaOutputs[$glaName]), [$stateType]); } else { grokit_assert(\count($glaOutputs[$glaName]) == \count($gla->output()), 'GLA ' . $glaName . ' produces different number of outputs than expected'); $glaOutputs[$glaName] = array_combine(array_keys($glaOutputs[$glaName]), $gla->output()); } // Set types for our output foreach ($glaOutputs[$glaName] as $attName => $type) { $outputs[$attName] = $type; } if (is_null($iterable)) { $iterable = $gla->iterable(); } else { grokit_assert($iterable == $gla->iterable(), 'Multiplexer does not support mixing iterable and non-iterable GLAs'); } $glaReqStates[$glaName] = $gla->req_states(); foreach ($gla->req_states() as $rstate) { $reqStates[] = $rstate; } $glaGenStates[$glaName] = $gla->state(); // TODO: Support constant states grokit_assert(!$gla->has_state(), 'Multiplexer currently does not support constant states.'); } $libraries = array_unique($libraries); $extra = ['glas' => $myGLAs]; ?> class <?php echo $className; ?> { <?php foreach ($myGLAs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } // foreach inner gla ?> class Iterator { bool _gotResultsOnce; bool _valid; <?php foreach ($myGLAs as $name => $type) { ?> <?php echo $type; ?> * it_<?php echo $name; ?> ; <?php } // foreach inner gla ?> public: Iterator(void) : _gotResultsOnce(false), _valid(false), <?php echo array_template('it_{key}(nullptr)', ', ', $myGLAs); ?> { } Iterator(<?php echo typed_ref_args($myGLAs); ?> ) : _gotResultsOnce(false), _valid(true), <?php echo array_template('it_{key}(&{key})', ', ', $myGLAs); ?> { <?php foreach ($myGLAs as $name => $type) { if ($glaRez[$name] == 'multi') { ?> <?php echo $name; ?> .Finalize(); <?php } // if inner GLA is multi } // foreach inner gla ?> } Iterator( const Iterator & other) = default; ~Iterator() { } bool GetNextResult( <?php echo typed_ref_args($outputs); ?> ) { FATALIF(!_valid, "Tried to get results from an invalid iterator."); bool ret = !_gotResultsOnce; _gotResultsOnce = true; <?php foreach ($myGLAs as $name => $type) { if ($glaRez[$name] == 'multi') { ?> ret |= it_<?php echo $name; ?> ->GetNextResult(<?php echo args($glaOutputs[$name]); ?> ); <?php } // if inner GLA is multi } // foreach inner gla ?> if( ret ) { <?php foreach ($myGLAs as $name => $type) { if ($glaRez[$name] == 'single') { ?> it_<?php echo $name; ?> ->GetResult(<?php echo args($glaOutputs[$name]); ?> ); <?php } else { if ($glaRez[$name] == 'state') { $stateVar = array_keys($glaOutputs[$name])[0]; $stateType = $glaOutputs[$name][$stateVar]; ?> <?php echo $stateVar; ?> = <?php echo $stateType; ?> (it_<?php echo $name; ?> ); <?php } } // if inner GLA is state } // foreach inner gla ?> } return ret; } }; Iterator multiIterator; public: <?php echo $className; ?> () { } ~<?php echo $className; ?> () { } void AddItem(<?php echo const_typed_ref_args($inputs); ?> ) { // Call AddItem individually on each GLA. <?php foreach ($myGLAs as $gName => $gType) { ?> <?php echo $gName; ?> .AddItem(<?php echo args($glaInputs[$gName]); ?> ); <?php } // foreach inner gla ?> } void AddState( <?php echo $className; ?> & other ) { // Call AddState individually on each GLA. <?php foreach ($myGLAs as $gName => $gType) { ?> <?php echo $gName; ?> .AddState(other.<?php echo $gName; ?> ); <?php } // foreach inner gla ?> } void Finalize() { multiIterator = Iterator(<?php echo args($myGLAs); ?> ); } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } void GetResult(<?php echo typed_ref_args($outputs); ?> ) { Finalize(); GetNextResult(<?php echo args($outputs); ?> ); } <?php foreach (array_keys($myGLAs) as $index => $name) { ?> const <?php echo $myGLAs[$name]; ?> & GetGLA<?php echo $index; ?> () const { return <?php echo $name; ?> ; } <?php } ?> }; <?php return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => $resultType, 'libraries' => $libraries, 'configurable' => $configurable, 'extra' => $extra); }
public function __construct($target_token) { if (is_array($target_token)) { if ($target_token === []) { return; } $target_name = $target_token[0]; if (isset($target_token[1]) && is_array($target_token[1])) { $this->params = $target_token[1]; } } else { if (is_string($target_token)) { if ($target_token === '') { return; } $params = []; if (strpos($target_token, '?') !== false) { list($target_name, $params_str) = explode('?', $target_token, 2); parse_str($params_str, $params); } else { $target_name = $target_token; } $this->params = $params; } else { throw new developer_error('bad target_token for target'); } } $this->target_name = $target_name; if ($target_name !== '') { if (strpos($target_name, '-') !== false) { $this->target_file = str_replace('-', '/', $target_name); list($module_name, $target_path) = explode('-', $target_name, 2); if (is_identifier($module_name)) { $this->has_module = true; $this->module_name = $module_name; } } else { $this->target_file = $target_name; $target_path = $target_name; } $this->target_path = $target_path; list($this->controller_name, $this->action_name) = explode('/', $target_path, 2); $this->target_pair = [$this->controller_name, $this->action_name]; } }
function OrderBy(array $t_args, array $inputs, array $outputs) { if (\count($inputs) == 0) { grokit_assert(array_key_exists('input', $t_args), 'No inputs given for OrderBy'); $inputs = $t_args['input']; foreach ($t_args['input'] as $name => &$type) { if (is_identifier($type)) { $type = lookupType(strval($type)); } grokit_assert(is_datatype($type), 'Invalid type given for input ' . $name); } } grokit_assert(array_key_exists('order', $t_args), 'No ordering attributes given for OrderBy'); $ordering = $t_args['order']; $ascOpts = ['ASC', 'ASCENDING', '+', '>']; $descOpts = ['DESC', 'DESCENDING', 'DES', 'DSC', '-', '<']; $ascending = []; foreach ($ordering as $name => $order) { grokit_assert(array_key_exists($name, $inputs), 'Ordering attribute ' . $name . ' not present in input'); if (in_array_icase($order, $ascOpts)) { $ascending[$name] = true; } else { if (in_array_icase($order, $descOpts)) { $ascending[$name] = false; } else { grokit_error("Unknown ordering " . $order . " given for attribute " . $name); } } } $rankAtt = get_default($t_args, 'rank', null); grokit_assert(is_null($rankAtt) || is_attribute($rankAtt), 'Rank argument should be null or an attribute'); grokit_assert(is_null($rankAtt) || array_key_exists($rankAtt->name(), $outputs), 'Rank attribute does not exist in outputs'); if (!is_null($rankAtt) && is_null($outputs[$rankAtt->name()])) { $outputs[$rankAtt->name()] = lookupType('base::BIGINT'); } $outputPassthroughAtts = []; foreach ($outputs as $name => $type) { if (is_null($rankAtt) || $rankAtt->name() != $name) { $outputPassthroughAtts[$name] = $type; } } $outToIn = []; $nInputs = \count($inputs); reset($inputs); reset($outputPassthroughAtts); for ($i = 0; $i < $nInputs; $i++) { $outName = key($outputPassthroughAtts); $inName = key($inputs); $outToIn[$outName] = $inName; // Unify types $outputs[$outName] = $inputs[$inName]; $outputPassthroughAtts[$outName] = $inputs[$inName]; next($inputs); next($outputPassthroughAtts); } $orderAtts = []; $extraAtts = []; foreach ($inputs as $name => $type) { if (array_key_exists($name, $ordering)) { $orderAtts[$name] = $type; } else { $extraAtts[$name] = $type; } } // Give 2^32 as the default, which should be effectively infinite $limitDefault = pow(2, 32); $limit = get_default($t_args, 'limit', $limitDefault); $limit = $limit == 0 ? $limitDefault : $limit; grokit_assert($limit > 0, 'The OrderBy limit must be a positive integer'); $className = generate_name('OrderBy'); $debug = get_default($t_args, 'debug', 0); ?> class <?php echo $className; ?> { struct Tuple { <?php foreach ($inputs as $name => $type) { ?> <?php echo $type; ?> <?php echo $name; ?> ; <?php } ?> Tuple( void ) = default; Tuple( const Tuple & other ) = default; Tuple( <?php echo array_template('const {val} & _{key}', ', ', $inputs); ?> ): <?php echo array_template('{key}(_{key})', ', ', $inputs); ?> { } Tuple & operator = (const Tuple & other ) = default; bool operator > ( const Tuple & other ) const { <?php foreach ($orderAtts as $name => $type) { $op1 = $ascending[$name] ? '<' : '>'; $op2 = !$ascending[$name] ? '<' : '>'; ?> if( <?php echo $name; ?> <?php echo $op1; ?> other.<?php echo $name; ?> ) return true; else if( <?php echo $name; ?> <?php echo $op2; ?> other.<?php echo $name; ?> ) return false; <?php } ?> return false; } bool operator < ( const Tuple& other ) const { return other > *this; } bool operator <= (const Tuple & other ) const { return ! (*this > other ); } bool operator >= (const Tuple & other ) const { return !( other > *this ); } <?php if ($debug > 0) { ?> std::string toString(void) const { std::ostringstream ss; ss << "( "; // > <?php $first = true; foreach ($inputs as $name => $type) { if ($first) { $first = false; } else { echo ' ss << ", ";' . PHP_EOL; } ?> ss << <?php echo $name; ?> ; // > <?php } // foreach input ?> ss << " )"; // > return ss.str(); } <?php } // debug > 0 ?> }; // struct Tuple typedef std::vector<Tuple> TupleVector; public: class Iterator { public: typedef TupleVector::const_iterator iter_type; private: iter_type begin; iter_type curr; iter_type end; public: Iterator(void) = default; Iterator( const iter_type & _begin, const iter_type & _end ) : begin(_begin), curr(_begin), end(_end) { } bool GetNextResult(<?php echo typed_ref_args($outputs); ?> ) { if( curr != end ) { <?php foreach ($outputPassthroughAtts as $name => $type) { ?> <?php echo $name; ?> = curr-><?php echo $outToIn[$name]; ?> ; <?php } if (!is_null($rankAtt)) { ?> <?php echo $rankAtt; ?> = (curr - begin) + 1; <?php } // if we need to output the rank ?> curr++; return true; } else { return false; } } }; private: uintmax_t __count; // number of tuples covered // K, as in Top-K static constexpr size_t K = <?php echo $limit; ?> ; TupleVector tuples; // Iterator for multi output type Iterator multiIterator; typedef std::greater<Tuple> TupleCompare; // Function to force sorting so that GetNext gets the tuples in order. void Sort(void) { TupleCompare comp; // If tuples doesn't contain at least K elements, it was never made into // a heap in the first place, so sort it normally. if( tuples.size() >= K ) { std::sort_heap(tuples.begin(), tuples.end(), comp); } else { std::sort(tuples.begin(), tuples.end(), comp); } } // Internal function to add a tuple to the heap void AddTupleInternal(Tuple & t ) { <?php if ($debug >= 1) { ?> { std::ostringstream ss; ss << "T ACK: " << t.toString() << std::endl; // > std::cerr << ss.str(); // > } <?php } ?> TupleCompare comp; if( tuples.size() >= K ) { <?php if ($debug >= 1) { ?> { std::ostringstream ss; ss << "T REP: " << tuples.front().toString() << std::endl; // > std::cerr << ss.str(); // > } <?php } ?> std::pop_heap(tuples.begin(), tuples.end(), comp); tuples.pop_back(); tuples.push_back(t); std::push_heap(tuples.begin(), tuples.end(), comp); } else { tuples.push_back(t); if( tuples.size() == K ) { std::make_heap(tuples.begin(), tuples.end(), comp); } } } public: <?php echo $className; ?> () : __count(0), tuples(), multiIterator() { } ~<?php echo $className; ?> () { } void AddItem(<?php echo const_typed_ref_args($inputs); ?> ) { __count++; Tuple t(<?php echo args($inputs); ?> ); <?php if ($debug >= 2) { ?> { std::ostringstream ss; ss << "T NEW: " << t.toString() << std::endl; // > std::cerr << ss.str(); // > } <?php } ?> if( tuples.size() == K && !(t > tuples.front()) ) return; AddTupleInternal(t); } void AddState( <?php echo $className; ?> & other ) { __count += other.__count; for( Tuple & el : other.tuples ) { if( tuples.size() < K /*>*/ || el > tuples.front() ) { AddTupleInternal(el); } } } void Finalize() { Sort(); Iterator::iter_type begin = tuples.cbegin(); Iterator::iter_type end = tuples.cend(); multiIterator = Iterator(begin, end); <?php if ($debug >= 1) { ?> std::ostringstream ss; ss << "[ "; //> bool first = true; for( auto el : tuples ) { if( first ) first = false; else ss << ", "; //>> ss << el.toString(); //>> } ss << " ]" << std::endl; // > std::cerr << ss.str(); //>> <?php } ?> } bool GetNextResult( <?php echo typed_ref_args($outputs); ?> ) { return multiIterator.GetNextResult(<?php echo args($outputs); ?> ); } }; <?php $system_headers = ['vector', 'algorithm', 'cinttypes']; if ($debug > 0) { $system_headers = array_merge($system_headers, ['iostream', 'sstream', 'string']); } return array('kind' => 'GLA', 'name' => $className, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'system_headers' => $system_headers); }
function CSVReader(array $t_args, array $output) { $my_output = []; // Handle case where outputs are given as template arguments // and not implied. if (\count($output) == 0) { grokit_assert(array_key_exists('output', $t_args), 'Did not receive any description of my output!'); $output_list = $t_args['output']; grokit_assert(is_array($output_list), 'Expected list of types for template argument "output"'); $i = 1; foreach ($outputs_list as $name => $out_type) { grokit_assert(is_datatype($out_type) || is_identifier($out_type), 'Expected only types in the "output" list'); if (is_identifier($out_type)) { $out_type = lookupType($out_type->value()); } $name = 'val_' . $i; $my_output[$name] = $out_type; $i += 1; } } else { foreach ($output as $key => $out) { $name = $key; $my_output[$name] = $out; } } $debug = get_default($t_args, 'debug', 0); $simple = get_default($t_args, 'simple', false); $trimCR = get_default($t_args, 'trim.cr', false); // Handle separator $separator = ','; if (array_key_exists('sep', $t_args) || array_key_exists('separator', $t_args)) { $sep = get_first_key($t_args, ['sep', 'separator']); grokit_assert(is_string($sep), "Got " . gettype($sep) . " instead of string for separator."); if (strtolower($sep) === 'tab') { $sep = '\\t'; } grokit_assert($sep != "\n", 'CSV column delimiter cannot be new line'); // Scream if separator is longer than one character grokit_assert(\strlen($sep) == 1 || $sep == '\\t', 'Expected string of length 1 for separator, got string <' . $sep . '> instead'); $separator = $sep; } // Handle quote character $quotechar = '"'; if (array_key_exists('quote', $t_args) && !is_null($t_args['quote'])) { grokit_assert(!$simple, 'Quote option not available for simple CSVReader'); $quote = $t_args['quote']; grokit_assert(is_string($quote), "Got " . gettype($quote) . " instead of string for quote."); // Scream if separator is longer than one character grokit_assert(\strlen($quote) == 1, 'Expected string of length 1 for quote character, got string <' . $quote . '> instead'); $quotechar = $quote; } $quotechar = addcslashes($quotechar, '\\\''); // Handle escape character $escapeChar = '\\'; if (array_key_exists('escape', $t_args) && !is_null($t_args['escape'])) { grokit_assert(!$simple, 'Escape option not available for simple CSVReader'); $escape = $t_args['escape']; grokit_assert(is_string($escape), 'Got ' . gettype($escape) . ' instead of string for escape character.'); grokit_assert(\strlen($escape) == 1, 'Expected string of length 1 for escape character, got string <' . $escape . '> instead'); $escapeChar = $escape; } $escapeChar = addcslashes($escapeChar, '\\\''); // Handle header lines $headerLines = 0; if (array_key_exists('skip', $t_args)) { $headerLines = $t_args['skip']; grokit_assert(is_int($headerLines), 'Got ' . gettype($headerLines) . ' instead of int for number of lines to skip.'); grokit_assert($headerLines >= 0, 'Cannot skip a negative number of lines.'); } // Maximum number of lines to read $maxLines = get_default($t_args, 'n', -1); grokit_assert(is_int($maxLines), 'Got ' . gettype($maxLines) . ' instead of int for template argument "n"'); $nullArg = get_first_key_default($t_args, ['nullable'], false); $nullable = []; $nullStr = []; foreach ($my_output as $name => $type) { $nullable[$name] = false; } if ($nullArg === true) { foreach ($my_output as $name => $type) { $nullable[$name] = true; $nullStr[$name] = 'NULL'; } } else { if (is_array($nullArg)) { foreach ($nullArg as $n => $v) { // If nullable value is an associative mapping, the value is either true/false // or the value of the null string if (is_string($n)) { grokit_assert(is_string($v) || is_bool($v), 'CSVReader: nullable associative mapping must have string or boolean values'); grokit_assert(array_key_exists($n, $nullable), 'CSVReader: cannot make unknown attribute ' . $n . ' nullable'); if (is_bool($v)) { $nullable[$n] = $v; $nullStr[$n] = 'NULL'; } else { $nullable[$n] = true; $nullStr[$n] = $v; } } else { if (is_array($v)) { grokit_assert(array_key_exists('attr', $v), 'CSVReader: Name of nullable attribute not specified'); $attrName = $v['attr']->name(); $nullable[$attrName] = true; $nullStr[$attrName] = array_key_exists('null', $v) ? $v['null'] : 'NULL'; } else { // Otherwise, it's just nullable $attrName = $v->name(); grokit_assert(array_key_exists($attrName, $nullable), 'CSVReader: cannot make unknown attribute ' . $v . ' nullable'); $nullable[$attrName] = true; $nullStr[$attrName] = 'NULL'; } } } } else { if ($nullArg === false) { // Nothing } else { if (is_string($nullArg)) { foreach ($my_output as $name => $type) { $nullable[$name] = true; $nullStr[$name] = $nullArg; } } else { grokit_error('Template argument "nullable" must be boolean or array, ' . typeof($nullArg) . ' given'); } } } } // Come up with a name for ourselves $className = generate_name('CSVReader'); if ($debug >= 2) { foreach ($my_output as $name => $type) { fwrite(STDERR, "CSVReader: {$name} is nullable: " . ($nullable[$name] ? 'true' : 'false') . PHP_EOL); } } ?> class <?php echo $className; ?> { std::istream& my_stream; std::string fileName; // Template parameters static constexpr size_t MAX_LINES = <?php echo $maxLines; ?> ; static constexpr size_t HEADER_LINES = <?php echo $headerLines; ?> ; static constexpr char DELIMITER = '<?php echo $separator; ?> '; <?php if (!$simple) { ?> static constexpr char QUOTE_CHAR = '<?php echo $quotechar; ?> '; static constexpr char ESCAPE_CHAR = '<?php echo $escapeChar; ?> '; typedef boost::escaped_list_separator<char> separator; typedef boost::tokenizer< separator > Tokenizer; separator my_separator; Tokenizer my_tokenizer; <?php } ?> // Prevent having to allocate this every time. std::string line; std::vector<std::string> tokens; size_t count; <?php \grokit\declareDictionaries($my_output); ?> public: <?php echo $className; ?> ( GIStreamProxy& _stream ) : my_stream(_stream.get_stream()) , fileName(_stream.get_file_name()) <?php if (!$simple) { ?> , my_separator(ESCAPE_CHAR, DELIMITER, QUOTE_CHAR) , my_tokenizer(std::string("")) <?php } ?> , count(0) { <?php if ($headerLines > 0) { ?> for( size_t i = 0; i < HEADER_LINES; ++i ) { FATALIF( !getline( my_stream, line ), "CSV Reader reached end of file before finishing header.\n" ); } <?php } // If headerLines > 0 ?> } // > bool ProduceTuple( <?php echo typed_ref_args($my_output); ?> ) { if (count < MAX_LINES) { //> count++; } else { return false; } if( getline( my_stream, line ) ) { <?php if ($trimCR) { ?> if( line.back() == '\r' ) { line.pop_back(); } <?php } // if trimCR if (!$simple) { if ($debug >= 1) { ?> try { <?php } // if debug >= 1 ?> my_tokenizer.assign( line, my_separator ); <?php if ($debug >= 1) { ?> } catch(...) { FATAL("CSVReader for file %s failed on line: %s", fileName.c_str(), line.c_str()); } <?php } // if debug >= 1 ?> Tokenizer::iterator it = my_tokenizer.begin(); <?php foreach ($my_output as $name => $type) { if ($nullable[$name]) { // nullable ?> <?php \grokit\fromStringNullable($name, $type, 'it->c_str()', true, $nullStr[$name]); ?> <?php } else { // not nullable ?> <?php echo \grokit\fromStringDict($name, $type, 'it->c_str()'); ?> ; <?php } // end nullable check ?> ++it; <?php } // foreach output } else { ?> for( char & c : line ) { if( c == DELIMITER ) c = '\0'; } const char * ptr = line.c_str(); <?php $first = true; foreach ($my_output as $name => $type) { if ($first) { $first = false; } else { ?> while( *(ptr++) != '\0' ) ; // Advance past next delimiter <?php } // not first output if ($nullable[$name]) { ?> <?php echo \grokit\fromStringNullable($name, $type, 'ptr', true, $nullStr[$name]); } else { // not nullable ?> <?php echo \grokit\fromStringDict($name, $type, 'ptr'); ?> ; <?php } // if nullable } // foreach output } // if simple reader ?> return true; } else { return false; } } <?php \grokit\declareDictionaryGetters($my_output); ?> }; <?php $sys_headers = ['vector', 'string', 'iostream', 'cstdint']; if (!$simple) { $sys_headers[] = 'boost/tokenizer.hpp'; } return ['name' => $className, 'kind' => 'GI', 'output' => $my_output, 'system_headers' => $sys_headers, 'user_headers' => ['GIStreamInfo.h', 'Dictionary.h', 'DictionaryManager.h']]; }