Beispiel #1
0
function CCompUnionFindLPFrag($t_args, $inputs, $outputs)
{
    // Class name is randomly generated.
    $className = generate_name('CCompUnionFindLPFrag');
    // Initializiation of argument names.
    $inputs_ = array_combine(['s', 't'], $inputs);
    $vertex = $inputs_['s'];
    // Construction of outputs.
    $outputs_ = ['node' => $vertex, 'component' => lookupType('base::BIGINT')];
    $outputs = array_combine(array_keys($outputs), $outputs_);
    $sys_headers = ['armadillo', 'algorithm'];
    $user_headers = [];
    $lib_headers = [];
    $libraries = ['armadillo'];
    $properties = [];
    $extra = [];
    $result_type = ['fragment'];
    ?>

using namespace arma;
using namespace std;

class <?php 
    echo $className;
    ?>
;

<?php 
    $constantState = lookupResource('graph::CCompUnionFindLPFrag_Constant_State', ['className' => $className]);
    ?>

class <?php 
    echo $className;
    ?>
 {
 public:
  // The constant state for this GLA.
  using ConstantState = <?php 
    echo $constantState;
    ?>
;
   // The current and final indices of the result for the given fragment.
  using Iterator = std::pair<uint64_t, uint64_t>;

  // The number of iterations to perform, not counting the initial set-up.
  static const constexpr int kIterations = 10;

  // The work is split into chunks of this size before being partitioned.
  static const constexpr int kBlock = 32;

  // The maximum number of fragments to use.
  static const constexpr int kMaxFragments = 64;

 private:
  
  // Node Component
  static arma::rowvec node_component;
  // keep track of component size
  static arma::rowvec component_size;

  // The typical constant state for an iterable GLA.
  const ConstantState& constant_state;

  // The number of unique nodes seen.
  uint64_t num_nodes;

  // The current iteration.
  int iteration;
  
  // The number of fragmetns for the result.
  int num_fragments;

  // check if need more iterations
  uint64_t connections;

 public:
  <?php 
    echo $className;
    ?>
(const <?php 
    echo $constantState;
    ?>
& state)
      : constant_state(state),
        num_nodes(state.num_nodes),
        iteration(state.iteration),connections(0) {
  }

  uint64_t Find(uint64_t node_id){
    // use path compression here
    while (node_id != node_component(node_id)){
        node_component(node_id) = node_component(node_component(node_id));
        node_id = node_component(node_id);
    }
    return node_id;
  }
  
  void Union(uint64_t pid, uint64_t qid){
    // find their root
    pid = Find(pid);
    qid = Find(qid);
    if (pid == qid)
      return;

    ++ connections;
    uint64_t psz = component_size(pid), qsz = component_size(qid);
    if (psz > qsz){
      node_component(qid) = pid;
      component_size(pid) += qsz; 
    }else{
      node_component(pid) = qid;
      component_size(qid) += psz; 
    }
  }

  // Basic dynamic array allocation.
  void AddItem(<?php 
    echo const_typed_ref_args($inputs_);
    ?>
) {
    if (iteration == 0) {
      num_nodes = max((uint64_t) max(s, t), num_nodes);
      return;
    } else{
      Union((uint64_t) s, (uint64_t) t);
    }
  }

  // Hashes are merged.
  void AddState(<?php 
    echo $className;
    ?>
 &other) {
    if (iteration == 0)
      num_nodes = max(num_nodes, other.num_nodes);
    else
      connections += other.connections;
  }

  // Most computation that happens at the end of each iteration is parallelized
  // by performed it inside Finalize.
  bool ShouldIterate(ConstantState& state) {
    state.iteration = ++iteration;
    printf("Entering ShouldIterate. connections: %lu, iteration: %d\n", connections, iteration);
    if (iteration == 1) {// allocate memory
      // num_nodes is incremented because IDs are 0-based.
      state.num_nodes = ++num_nodes;
      // Allocating space can't be parallelized.
      node_component.set_size(num_nodes);
      for (uint64_t i = 0; i < num_nodes; ++ i){
        node_component(i) = i;
      }
      component_size.set_size(num_nodes);
      component_size.fill(1);
      return true;
    } else {
      return connections > 0 && iteration < kIterations + 1;
    }
  }

   int GetNumFragments() {
    uint64_t size = (num_nodes - 1) / kBlock + 1;  // num_nodes / kBlock rounded up.
    num_fragments = (iteration == 0) ? 0 : min(size, (uint64_t) kMaxFragments);
    printf("num_nodes: %lu, size: %lu, num_fragments: %d\n", num_nodes, size, num_fragments);
    return num_fragments;
  }

  // Finalize does nothing
  Iterator* Finalize(int fragment) {
    uint64_t count = num_nodes;
    // The ordering of operations is important. Don't change it.
    uint64_t first = fragment * (count / kBlock) / num_fragments * kBlock;
    uint64_t final = (fragment == num_fragments - 1)
              ? count - 1
              : (fragment + 1) * (count / kBlock) / num_fragments * kBlock - 1;
    
    printf("fragment: %lu\tcount: %lu\tfirst: %lu\tfinal: %lu\n", fragment, count, first, final);
    return new Iterator(first, final);
  }

  bool GetNextResult(Iterator* it, <?php 
    echo typed_ref_args($outputs_);
    ?>
) {
    // should iterate is true
    if (connections > 0 && iteration < kIterations + 1){
      printf("I need more iterations, because connections: %lu, iteration: %d\n", connections, iteration);
      return false;
    }
    if (it->first > it->second)
      return false;
    
    node = it->first++;
    component = Find(node);
    return true;
  }
    
};

// Initialize the static member types.
arma::rowvec <?php 
    echo $className;
    ?>
::node_component;
arma::rowvec <?php 
    echo $className;
    ?>
::component_size;
typedef <?php 
    echo $className;
    ?>
::Iterator <?php 
    echo $className;
    ?>
_Iterator;

<?php 
    return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'properties' => $properties, 'extra' => $extra, 'iterable' => true, 'input' => $inputs, 'output' => $outputs, 'result_type' => $result_type, 'generated_state' => $constantState];
}
Beispiel #2
0
/**
 *  GI that generates data in clusters, using a specified distribution for each
 *  cluster.
 *
 *  This GI requires the following template arguments:
 *      - 'n' or 0
 *          The number of tuples to generate. Note: this value is per task.
 *          The total number of tuples generated will be n_tasks * n
 *      - 'centers' or 1
 *          A list of configuration for the centers.
 *
 *  The following template arguments are optional:
 *      - 'outputs'
 *          If the outputs of the GI are not given implicitly, they can be
 *          specified in this template argument. The number of dimensions will
 *          be determined by the number of outputs.
 *
 *          All output types must be numeric real types. The default type for
 *          outputs is DOUBLE.
 *      - 'dist.lib' = 'std'
 *          Which library to use for generating distributions.
 *          Valid options are:
 *              - std
 *              - boost
 *      - 'seed' = null
 *          The seed to be used for the random number generator. This seed will
 *          be used to generate the seed for each task, and different runs with
 *          the same seed will produce the same data.
 *      - 'compute.sets' = 1
 *          The number of sets of tuples to compute at once.
 *
 *  Each center configuration is a functor with the form:
 *      dist_name(args)
 *
 *  The following distributions are supported:
 *      { Uniform Distributions }
 *      - uniform(a = 0, b = 1)
 *
 *      { Normal Distributions }
 *      - normal(mean = 0.0, std_dev = 1.0) [ synonyms: gaussian ]
 *      - inverse_gaussian(mean = 1, shape = 1) [ synonyms: inverse_normal ]
 *
 *      { Bernoulli Distributions }
 *      - binomial(t = 1, p = 0.5)
 *      - negative_binomial(k = 1, p = 0.5)
 *
 *      { Poisson Distributions }
 *      - exponential( lambda = 1 )
 *      - gamma(alpha = 1, beta = 1)    [ synonyms: Gamma ]
 */
function ClusterGen(array $t_args, array $outputs)
{
    $sys_headers = ['array', 'cinttypes'];
    $user_headers = [];
    $libraries = [];
    if (\count($outputs) == 0) {
        grokit_assert(array_key_exists('outputs', $t_args), 'ClusterGen: No outputs specified');
        $count = 0;
        foreach ($t_args['outputs'] as $type) {
            if (is_identifier($type)) {
                $type = lookupType($type);
            }
            grokit_assert(is_datatype($type), 'ClusterGen: Non data-type ' . $type . ' given as output');
            $name = 'output' . $count++;
            $outputs[$name] = $type;
        }
    }
    foreach ($outputs as $name => &$type) {
        if (is_null($type)) {
            $type = lookupType('base::DOUBLE');
        } else {
            grokit_assert($type->is('real'), 'ClusterGen: Non-real datatype ' . $type . ' given as output');
        }
    }
    $myOutputs = [];
    foreach ($outputs as $name => $type) {
        $myOutputs[$name] = $type;
    }
    $tSize = \count($outputs);
    $seed = get_default($t_args, 'seed', null);
    if ($seed !== null) {
        grokit_assert(is_int($seed), 'ClusterGen: Seed must be an integer or null.');
    } else {
        $user_headers[] = 'HashFunctions.h';
    }
    $distLib = get_default($t_args, 'dist.lib', 'std');
    $distNS = '';
    switch ($distLib) {
        case 'std':
            $sys_headers[] = 'random';
            $distNS = 'std';
            break;
        case 'boost':
            $sys_headers[] = 'boost/random.hpp';
            $distNS = 'boost::random';
            $libraries[] = 'boost_random-mt';
            if ($seed === null) {
                // Need random_device
                $sys_headers[] = 'boost/random/random_device.hpp';
                $libraries[] = 'boost_system-mt';
            }
            break;
        default:
            grokit_error('ClusterGen: Unknown RNG library ' . $distLib);
    }
    $distRNG = 'mt19937';
    $RNGtype = $distNS . '::' . $distRNG;
    $nTuples = get_first_key($t_args, ['n', '0']);
    grokit_assert(is_int($nTuples), 'ClusterGen: the number of tuples to be produced must be an integer.');
    $centers = get_first_key($t_args, ['centers', 1]);
    grokit_assert(is_array($centers), 'ClusterGen: centers must be an array of functors');
    $handleDist = function ($name, $args, $oType) use($distNS) {
        $distName = '';
        $distArgs = [];
        switch ($name) {
            case 'gaussian':
            case 'normal':
                $distName = $distNS . '::' . 'normal_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Normal distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $mean = get_default($args, ['mean', 0], 0.0);
                $sigma = get_default($args, ['std_dev', 'sigma', 1], 1.0);
                grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of binomial distribution must be a real number.');
                grokit_assert(is_numeric($sigma), 'ClusterGen: sigma parameter of binomial distribution must be a real number.');
                $mean = floatval($mean);
                $sigma = floatval($sigma);
                $distArgs = [$mean, $sigma];
                break;
            case 'binomial':
                $distName = $distNS . '::' . 'binomial_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $t = get_default($args, ['t', 0], 1);
                $p = get_default($args, ['p', 1], 0.5);
                grokit_assert(is_int($t), 'ClusterGen: t parameter of binomial distribution must be an integer.');
                grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.');
                $p = floatval($p);
                grokit_assert($p >= 0 && $p <= 1, 'ClusterGen: p parameter of binomial distribution must be in the range [0, 1]');
                grokit_assert($t >= 0, 'ClusterGen: t parameter of binomial distribution must be in the range [0, +inf)');
                $distArgs = [$t, $p];
                break;
            case 'negative_binomial':
                $distName = $distNS . '::' . 'negative_binomial_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Negative Binomial distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $k = get_default($args, ['k', 0], 1);
                $p = get_default($args, ['p', 1], 0.5);
                grokit_assert(is_int($k), 'ClusterGen: k parameter of binomial distribution must be an integer.');
                grokit_assert(is_numeric($p), 'ClusterGen: p parameter of binomial distribution must be a real number.');
                $p = floatval($p);
                grokit_assert($p > 0 && $p <= 1, 'ClusterGen: p parameter of negative binomial distribution must be in the range (0, 1]');
                grokit_assert($k > 0, 'ClusterGen: k parameter of negative binomial distribution must be in the range (0, +inf)');
                $distArgs = [$k, $p];
                break;
            case 'inverse_gaussian':
            case 'inverse_normal':
                grokit_assert(\count($args) <= 2, 'ClusterGen: Inverse Gaussian distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $mean = get_default($args, ['mean', 0], 1);
                $shape = get_default($args, ['shape', 1], 1);
                grokit_assert(is_numeric($mean), 'ClusterGen: mean parameter of inverse gaussian distribution must be a real number.');
                grokit_assert(is_numeric($shape), 'ClusterGen: shape parameter of inverse gaussian distribution must be a real number.');
                $mean = floatval($mean);
                $shape = floatval($shape);
                grokit_assert($mean > 0, 'ClusterGen: mean of inverse gaussian distribution must be in range (0, inf)');
                grokit_assert($shape > 0, 'ClusterGen: shape of inverse gaussian distribution must be in range (0, inf)');
                $gen_args = ['output' => $oType, 'ns' => $distNS];
                $distName = strval(lookupResource('datagen::InverseGaussianGen', $gen_args));
                $distArgs = [$mean, $shape];
                break;
            case 'uniform':
                $distName = $distNS . '::' . 'uniform_real_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Uniform distribution takes at most 2 arguments, ' . \count($args) . ' given');
                $a = get_default($args, ['a', 0], 0.0);
                $b = get_default($args, ['b', 1], 1.0);
                grokit_assert(is_numeric($a), 'ClusterGen: `a` parameter of uniform distribution must be a real number.');
                grokit_assert(is_numeric($b), 'ClusterGen: `b` parameter of uniform distribution must be a real number.');
                $a = floatval($a);
                $b = floatval($b);
                grokit_assert($b >= $a, 'ClusterGen: `b` parameter of uniform distribution must be >= the `a` parameter.');
                $distArgs = [$a, $b];
                break;
            case 'exponential':
                $distName = $distNS . '::' . 'exponential_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 1, 'ClusterGen: Exponential distribution takes at most 1 argument.');
                $lambda = get_default($args, ['lambda', 0], 1.0);
                grokit_assert(is_numeric($lambda), 'ClusterGen: `lambda` parameter of exponential distribution must be a real number.');
                $lambda = floatval($lambda);
                grokit_assert($lambda > 0, 'ClusterGen: `lambda` parameter of exponential distribution must be in range (0, +inf).');
                $distArgs = [$lambda];
                break;
            case 'gamma':
            case 'Gamma':
                $distName = $distNS . '::' . 'gamma_distribution<' . $oType . '>';
                grokit_assert(\count($args) <= 2, 'ClusterGen: Gamma distribution takes at most 2 arguments.');
                $alpha = get_default($args, ['alpha', 0], 1.0);
                $beta = det_default($args, ['beta', 1], 1.0);
                grokit_assert(is_numeric($alpha), 'ClusterGen: `alpha` parameter of gamma distribution must be a real number.');
                grokit_assert(is_numeric($beta), 'ClusterGen: `beta` parameter of gamma distribution must be a real number.');
                $alpha = floatval($alpha);
                $beta = floatval($beta);
                $distArgs = [$alpha, $beta];
                break;
            default:
                grokit_error('ClusterGen: Unknown distribution ' . $name . ' given for center');
        }
        return [$distName, $distArgs];
    };
    $dists = [];
    $distArgs = [];
    $count = 0;
    $oType = '';
    $nCenters = 1;
    reset($outputs);
    foreach ($centers as $val) {
        $cluster = $val;
        if (is_functor($val)) {
            $cluster = [$val];
        } else {
            if (is_array($val)) {
                $nCenters = lcm($nCenters, \count($val));
            } else {
                grokit_error('ClusterGen: center descriptions must be functors or list of functors');
            }
        }
        $curDist = [];
        $curDistArgs = [];
        $curDistName = 'distribution' . $count++;
        $oType = strval(current($outputs));
        $iCount = 0;
        foreach ($cluster as $functor) {
            grokit_assert(is_functor($functor), 'ClusterGen: center description must be a functor');
            $vName = $curDistName . '_' . $iCount++;
            $ret = $handleDist($functor->name(), $functor->args(), $oType);
            $curDist[$vName] = $ret[0];
            $curDistArgs[$vName] = $ret[1];
        }
        next($outputs);
        $dists[$curDistName] = $curDist;
        $distArgs[$curDistName] = $curDistArgs;
    }
    // Determine the default number of sets to compute at a time.
    // We want to generate either $nTuples or 10,000 tuples, depending on which
    // is less.
    $defaultSetsTarget = min($nTuples, 10000);
    $setsToTarget = intval(ceil($defaultSetsTarget / $nCenters));
    $computeSets = get_default($t_args, 'compute.sets', $setsToTarget);
    grokit_assert(is_int($computeSets) && $computeSets > 0, 'ClusterGen: compute.sets must be a positive integer, ' . $computeSets . ' given');
    $className = generate_name('ClusterGen');
    // For some BIZZARE reason, the $outputs array was getting modified while
    // traversing over the $dists array. Making a deep copy of the outputs and
    // then reassigning it seems to fix the issue.
    $outputs = $myOutputs;
    ?>

class <?php 
    echo $className;
    ?>
 {

    // The number of tuples to produce per task
    static constexpr size_t N = <?php 
    echo $nTuples;
    ?>
;
    static constexpr size_t CacheSize = <?php 
    echo $computeSets * $nCenters;
    ?>
;

    // Typedefs
    typedef std::tuple<<?php 
    echo array_template('{val}', ', ', $outputs);
    ?>
> Tuple;
    typedef std::array<Tuple, CacheSize> TupleArray;
    typedef TupleArray::const_iterator TupleIterator;
    typedef <?php 
    echo $RNGtype;
    ?>
 RandGen;

    // Number of tuples produced.
    uintmax_t count;

    // Cache a number of outputs for efficiency
    TupleArray cache;
    TupleIterator cacheIt;

    // Random number generator
    RandGen rng;

    // Distributions
<?php 
    // This is the section causing issues.
    foreach ($dists as $name => $list) {
        foreach ($list as $vName => $type) {
            ?>
    <?php 
            echo $type;
            ?>
 <?php 
            echo $vName;
            ?>
;
<?php 
        }
        // foreach distribution
    }
    // foreach cluster set
    ?>

    // Helper function to generate tuples.
    void GenerateTuples(void) {
<?php 
    $tIndex = 0;
    foreach ($dists as $name => $list) {
        $lCenters = \count($list);
        // $nCenters has been defined to be the LCM of the number of centers in
        // any column, so $lCenter is guaranteed to divide evenly into
        // CacheSize
        ?>
        for( size_t index = 0; CacheSize > index; index += <?php 
        echo $lCenters;
        ?>
 ) {
<?php 
        $index = 0;
        foreach ($list as $vName => $type) {
            ?>
            std::get<<?php 
            echo $tIndex;
            ?>
>(cache[index + <?php 
            echo $index;
            ?>
]) = <?php 
            echo $vName;
            ?>
(rng);
<?php 
            $index++;
        }
        // foreach value in tuple
        ?>
        }
<?php 
        $tIndex++;
    }
    // foreach distribution
    ?>
        cacheIt = cache.cbegin();
    }

public:
    // Constructor
    <?php 
    echo $className;
    ?>
( GIStreamProxy & _stream ) :
        cache()
        , cacheIt()
        , count(0)
        , rng()
<?php 
    foreach ($dists as $name => $list) {
        foreach ($list as $vName => $type) {
            ?>
        , <?php 
            echo $vName;
            ?>
(<?php 
            echo implode(', ', $distArgs[$name][$vName]);
            ?>
)
<?php 
        }
        // foreach distribution
    }
    // foreach cluster set
    ?>

    {
<?php 
    if (is_null($seed)) {
        ?>
        <?php 
        echo $distNS;
        ?>
::random_device rd;
<?php 
    }
    // if seed is null
    ?>
        RandGen::result_type seed = <?php 
    echo is_null($seed) ? 'rd()' : "CongruentHash({$seed}, _stream.get_id() )";
    ?>
;
        rng.seed(seed);

        cacheIt = cache.cend();
    }

    // Destructor
    ~<?php 
    echo $className;
    ?>
(void) { }

    bool ProduceTuple(<?php 
    echo typed_ref_args($outputs);
    ?>
) {
        if( N > count ) {
            if( cacheIt == cache.cend() ) {
                GenerateTuples();
            }
<?php 
    $tIndex = 0;
    foreach ($outputs as $name => $type) {
        ?>
            <?php 
        echo $name;
        ?>
 = std::get<<?php 
        echo $tIndex;
        ?>
>(*cacheIt);
<?php 
        $tIndex++;
    }
    // foreach output
    ?>

            ++cacheIt;
            ++count;

            return true;
        }
        else {
            return false;
        }
    }
};

<?php 
    return array('kind' => 'GI', 'name' => $className, 'output' => $outputs, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'libraries' => $libraries);
}
Beispiel #3
0
function Join($t_args, $inputs, $outputs, $states)
{
    // Class name randomly generated
    $className = generate_name('Join');
    // Processing of states;
    $states_ = array_combine(['mapping'], $states);
    // Processing of outputs.
    $values = $states_['mapping']->get('vals');
    $outputs = array_combine(array_keys($outputs), $values);
    $sys_headers = ['map', 'tuple'];
    $user_headers = [];
    $lib_headers = [];
    $libraries = [];
    $properties = ['list'];
    $extra = [];
    $result_type = ['multi'];
    ?>

class <?php 
    echo $className;
    ?>
;

<?php 
    $constantState = lookupResource("Join_Constant_State", ['className' => $className, 'states' => $states]);
    ?>

class <?php 
    echo $className;
    ?>
 {
 private:
  using ConstantState = <?php 
    echo $constantState;
    ?>
;
  using Mapping = ConstantState::Mapping;
  using Iter = Mapping::Map::const_iterator;

  // The GroupBy containing the hash.
  const <?php 
    echo $constantState;
    ?>
& constant_state;

  // The iterator used for the multi return.
  Iter it, end;

 public:
  <?php 
    echo $className;
    ?>
(const <?php 
    echo $constantState;
    ?>
& state)
      : constant_state(state) {
  }

  void ProcessTuple(<?php 
    echo const_typed_ref_args($inputs);
    ?>
) {
    auto key = Mapping::ChainHash(<?php 
    echo args($inputs);
    ?>
);
    auto pair = constant_state.map.equal_range(key);
    it = pair.first;
    end = pair.second;
  }

  bool GetNextResult(<?php 
    echo typed_ref_args($outputs);
    ?>
) {
    if (it == end)
      return false;
<?php 
    foreach (array_keys($outputs) as $index => $name) {
        ?>
    <?php 
        echo $name;
        ?>
 = std::get<<?php 
        echo $index;
        ?>
>(it->second);
<?php 
    }
    ?>
    ++it;
    return true;
  }
};

<?php 
    return ['kind' => 'GT', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'input' => $inputs, 'output' => $outputs, 'result_type' => 'multi', 'generated_state' => $constantState];
}
Beispiel #4
0
function GroupBy(array $t_args, array $inputs, array $outputs, array $states)
{
    // Ensure we have valid inputs.
    if (\count($inputs) == 0) {
        // No inputs given, try to get them from template arguments.
        grokit_assert(array_key_exists('input', $t_args), 'No inputs given for GroupBy');
        $inputs = $t_args['input'];
        if (!is_array($inputs)) {
            $inputs = [$inputs];
        }
        foreach ($inputs as $name => &$type) {
            if (is_identifier($type)) {
                $type = lookupType(strval($type));
            }
            grokit_assert(is_datatype($type), 'Invalid type given for input ' . $name);
        }
    }
    grokit_assert(array_key_exists('group', $t_args), 'No groups specified for GroupBy');
    $gbyAttMap = $t_args['group'];
    grokit_assert(is_array($gbyAttMap), 'Invalid value given for groups, expected an expression name or list of expression names');
    $gbyAttMap = array_map('strval', $gbyAttMap);
    $gbyAttNames = array_keys($gbyAttMap);
    foreach ($gbyAttMap as $in => $out) {
        grokit_assert(array_key_exists($in, $inputs), 'Group ' . $in . ' not present in input');
        grokit_assert(array_key_exists($out, $outputs), 'Output Attribute ' . $out . ' for group ' . $in . ' not found in outputs');
    }
    $numGByAtts = \count($gbyAttNames);
    grokit_assert(array_key_exists('aggregate', $t_args), 'No aggregate specified for GroupBy');
    $innerGLA = $t_args['aggregate'];
    grokit_assert(is_gla($innerGLA), 'Non-GLA specified as aggregate for GroupBy');
    $debug = get_default($t_args, 'debug', 0);
    $init_size = get_default($t_args, 'init.size', 1024);
    $use_mct = get_default($t_args, 'use.mct', true);
    $keepHashes = get_default($t_args, 'mct.keep.hashes', false);
    grokit_assert(is_bool($keepHashes), 'GroupBy mct.keep.hashes argument must be boolean');
    // determine the result type
    $use_fragments = get_default($t_args, 'use.fragments', true);
    $resType = $use_fragments ? ['fragment', 'multi'] : ['multi'];
    $fragSize = get_default($t_args, 'fragment.size', 2000000);
    // Always support state
    $resType[] = 'state';
    // Class name randomly generated
    $className = generate_name("GroupBy");
    // instantiate the inner GLA. input/output is derived from the main input/output
    $gbyAtts = [];
    $gbyAttsOut = [];
    $glaInputAtts = [];
    $glaOutputAtts = [];
    foreach ($inputs as $name => $type) {
        if (in_array($name, $gbyAttNames)) {
            $gbyAtts[$name] = $type;
            $gbyAttsOut[$gbyAttMap[$name]] = $type;
            $outputs[$gbyAttMap[$name]] = $type;
        } else {
            $glaInputAtts[$name] = $type;
        }
    }
    foreach ($outputs as $name => $type) {
        if (!in_array($name, $gbyAttMap)) {
            $glaOutputAtts[$name] = $type;
        }
    }
    $innerGLA = $innerGLA->apply($glaInputAtts, $glaOutputAtts, $states);
    $libraries = $innerGLA->libraries();
    $innerRes = get_first_value($innerGLA->result_type(), ['multi', 'single', 'state']);
    if ($innerRes == 'state') {
        // If the result type is state, the only output is a state object
        // containing the GLA.
        $outputName = array_keys($glaOutputAtts)[0];
        $innerOutputs = [$outputName => lookupType('base::STATE', ['type' => $innerGLA])];
    } else {
        $innerOutputs = $innerGLA->output();
        grokit_assert(\count($innerOutputs) == \count($glaOutputAtts), 'Expected ' . \count($glaOutputAtts) . ' outputs fromm Inner GLA, got ' . \count($innerOutputs));
    }
    $constState = lookupResource('GroupByState', ['gla' => $innerGLA, 'groups' => $gbyAtts, 'debug' => $debug]);
    // constructor argumetns are inherited from inner GLA
    $configurable = $innerGLA->configurable();
    $reqStates = $innerGLA->req_states();
    // We need to specially create the constructor string because apparently
    // declaring Type Name(); is a function declaration instead of a variable
    // declaration for some reason.
    $constructorParts = [];
    if ($configurable) {
        $constructorParts[] = 'jsonInit';
    }
    if ($innerGLA->has_state()) {
        $constructorParts[] = 'innerState';
    }
    $constructorString = \count($constructorParts) > 0 ? '(' . implode(', ', $constructorParts) . ')' : '';
    // add the outputs we got from the gla
    foreach ($innerOutputs as $name => $type) {
        grokit_assert(array_key_exists($name, $outputs), 'Inner GLA\'s outputs refer to unknown attribute ' . $name);
        grokit_assert($type !== null, 'GroupBy Inner GLA left output ' . $name . ' with no type');
        $outputs[$name] = $type;
    }
    $iterable = $innerGLA->iterable();
    // need to keep track of system includes needed
    $extraHeaders = array();
    $allocatorText = "std::allocator<std::pair<const Key, {$innerGLA}> >";
    if ($use_mct) {
        $keepHashesText = $keepHashes ? 'true' : 'false';
        $extraHeaders[] = "mct/hash-map.hpp";
        $map = "mct::closed_hash_map<Key, {$innerGLA}, HashKey, std::equal_to<Key>, {$allocatorText}, {$keepHashesText}>";
        $mapType = 'mct::closed_hash_map';
    } else {
        $extraHeaders[] = "unordered_map";
        $map = "std::unordered_map<Key, {$innerGLA}, HashKey, std::equal_to<Key>, {$allocatorText}>";
        $mapType = 'std::unordered_map';
    }
    if ($debug > 0) {
        $extraHeaders[] = 'cstdio';
    }
    ?>


class <?php 
    echo $className;
    ?>
{
public:
    using ConstantState = <?php 
    echo $constState;
    ?>
;
<?php 
    if ($innerGLA->has_state()) {
        ?>
    using InnerState = ConstantState::InnerState;
<?php 
    }
    // if gla has state
    ?>
    using Key = ConstantState::Key;
    using HashKey = ConstantState::HashKey;
    using InnerGLA = <?php 
    echo $innerGLA;
    ?>
;

    typedef <?php 
    echo $map;
    ?>
 MapType;
    static const size_t INIT_SIZE = <?php 
    echo $init_size;
    ?>
;

public:
    class Iterator {
        MapType::iterator it; // current value
        MapType::iterator end; // last value in the fragment

    public:
        Iterator() { }

        Iterator(MapType::iterator _it, MapType::iterator _end):
            it(_it), end(_end)
        {
            if( it != end ) {

<?php 
    switch ($innerRes) {
        case 'multi':
            ?>
            it->second.Finalize();
<?php 
            break;
        case 'state':
            if ($innerGLA->finalize_as_state()) {
                ?>
            it->second.FinalizeState();
<?php 
            }
            // if we need to finalize as a state
            break;
    }
    // end switch inner restype
    ?>
            }
        }

        bool GetNextResult( <?php 
    echo typed_ref_args($outputs);
    ?>
 ) {
            bool gotResult = false;
            while( it != end && !gotResult ) {
                <?php 
    echo $innerGLA;
    ?>
 & gla = it->second;
<?php 
    foreach ($gbyAttMap as $in => $out) {
        ?>
                <?php 
        echo $out;
        ?>
 = it->first.<?php 
        echo $in;
        ?>
;
<?php 
    }
    // foreach grouping attribute
    ?>

<?php 
    switch ($innerRes) {
        case 'multi':
            ?>
                gotResult = gla.GetNextResult( <?php 
            echo args($innerOutputs);
            ?>
);
                if( !gotResult ) {
                    ++it;
                    if( it != end ) {
                        it->second.Finalize();
                    }
                }
<?php 
            break;
        case 'single':
            ?>
                gotResult = true;
                gla.GetResult(<?php 
            echo args($innerOutputs);
            ?>
);
                ++it;
<?php 
            break;
        case 'state':
            reset($innerOutputs);
            // Assuming that $innerOutputs contains a single value that is
            // the state type.
            $oName = key($innerOutputs);
            $oType = current($innerOutputs);
            ?>
                gotResult = true;
                <?php 
            echo $oName;
            ?>
 = <?php 
            echo $oType;
            ?>
( &gla );
                ++it;
<?php 
    }
    // switch inner result type
    ?>
            }

            return gotResult;
        }
    };

private:
    const ConstantState & constState;

<?php 
    if ($configurable) {
        ?>
    const Json::Value jsonInit;
<?php 
    }
    // if configurable
    ?>

    size_t count;

    MapType groupByMap;

    std::vector<MapType::iterator> theIterators;  // the iterators, only 2 elements if multi, many if fragment
    Iterator multiIterator;

public:

    <?php 
    echo $className;
    ?>
(<?php 
    if ($configurable) {
        ?>
const Json::Value & _jsonInit, <?php 
    }
    ?>
const ConstantState & _constState ) :
        constState(_constState)
<?php 
    if ($configurable) {
        ?>
        , jsonInit(_jsonInit)
<?php 
    }
    // if configurable
    ?>
        , count(0)
        , groupByMap( INIT_SIZE )
        , theIterators()
        , multiIterator()
    { }

    ~<?php 
    echo $className;
    ?>
() {}

    void Reset(void) {
        count = 0;
        groupByMap.clear();
        theIterators.clear();
    }

    void AddItem(<?php 
    echo array_template('const {val} & {key}', ', ', $inputs);
    ?>
) {
        count++;
        // check if _key is already in the map; if yes, add _value; else, add a new
        // entry (_key, _value)
        Key key(<?php 
    echo array_template('{key}', ', ', $gbyAtts);
    ?>
);

        MapType::iterator it = groupByMap.find(key);
        if (it == groupByMap.end()) { // group does not exist
            // create an empty GLA and insert
            // better to not add the item here so we do not have
            // to transport a large state

<?php 
    if ($innerGLA->has_state()) {
        ?>
            const InnerState & innerState = constState.getConstState(key);
<?php 
    }
    // if gla has state
    ?>
            InnerGLA gla<?php 
    echo $constructorString;
    ?>
;
            auto ret = groupByMap.insert(MapType::value_type(key, gla));
            it = ret.first; // reposition
        }
        it->second.AddItem(<?php 
    echo array_template('{key}', ', ', $glaInputAtts);
    ?>
);
    }

    void AddState(<?php 
    echo $className;
    ?>
& other) {
        count += other.count;
        // scan other hash and insert or update content in this one
        for (MapType::iterator it = other.groupByMap.begin(); it != other.groupByMap.end();
                ++it) {
            const Key& okey = it->first;
            <?php 
    echo $innerGLA;
    ?>
& ogla = it->second;

            MapType::iterator itt = groupByMap.find(okey);
            if (itt != groupByMap.end()) { // found the group
                <?php 
    echo $innerGLA;
    ?>
& gla = itt->second;
                gla.AddState(ogla);
            } else {
                // add the other group to this hash
                groupByMap.insert(MapType::value_type(okey, ogla));
            }
        }
    }

<?php 
    if ($iterable) {
        ?>
    bool ShouldIterate(ConstantState& modibleState) {
<?php 
        if ($debug > 0) {
            ?>
        fprintf(stderr, "<?php 
            echo $className;
            ?>
: ==== ShouldIterate ====\n");
<?php 
        }
        // if debugging enabled
        ?>
        bool shouldIterate = false;
        for( MapType::iterator it = groupByMap.begin(); it != groupByMap.end(); ++it ) {
            const Key & key = it->first;
            InnerGLA & gla = it->second;
<?php 
        if ($innerGLA->has_state()) {
            ?>
            InnerState & innerState = modibleState.getModibleState(key);
<?php 
        }
        // if gla has state
        ?>
            bool glaRet = gla.ShouldIterate(innerState);
            shouldIterate = shouldIterate || glaRet;
<?php 
        if ($debug > 0) {
            ?>
            fprintf(stderr, "<?php 
            echo $className;
            ?>
: Key(%s) shouldIterate(%s)\n",
                key.to_string().c_str(),
                glaRet ? "true" : "false");
<?php 
        }
        // if debugging enabled
        ?>
        }

        return shouldIterate;
    }
<?php 
    }
    // if iterable
    ?>

<?php 
    if (in_array('fragment', $resType)) {
        ?>

    int GetNumFragments(void){
        int size = groupByMap.size();
        int sizeFrag = <?php 
        echo $fragSize;
        ?>
;
        // setup the fragment boundaries
        // scan via iterator and count
        int frag=0;
        int pos=0;
        MapType::iterator it = groupByMap.begin();
        theIterators.clear();
        theIterators.push_back( it );
        // special case when size < num_fragments
        // >
        if (sizeFrag == 0){
            it = groupByMap.end();
            theIterators.push_back( it );
            return 1; // one fragment
        }

        while(it!=groupByMap.end()){
            while(it!=groupByMap.end() && pos<( frag + 1 )*sizeFrag){
//>
                ++it;
                pos++;
            }
            theIterators.push_back( it );
            frag++;
        }

<?php 
        if ($debug > 0) {
            ?>
        fprintf(stderr, "<?php 
            echo $className;
            ?>
: fragments(%d)\n", frag);
<?php 
        }
        ?>

        return frag;

    }

    Iterator* Finalize(int fragment){
        // Call finalize on all inner GLAs in this fragment.
        MapType::iterator iter = theIterators[fragment];
        MapType::iterator iterEnd = theIterators[fragment+1];

        Iterator* rez
            = new Iterator(theIterators[fragment], theIterators[fragment+1] );
        return rez;
    }

    bool GetNextResult(Iterator* it,  <?php 
        echo array_template('{val} & {key}', ', ', $outputs);
        ?>
) {
        return it->GetNextResult(<?php 
        echo args($outputs);
        ?>
);
    }
<?php 
    }
    // if using fragment interface
    ?>

    void Finalize() {
        multiIterator = Iterator( groupByMap.begin(), groupByMap.end() );

<?php 
    if ($debug >= 1) {
        ?>
        fprintf(stderr, "<?php 
        echo $className;
        ?>
: groups(%lu) tuples(%lu)\n", groupByMap.size(), count);
<?php 
    }
    ?>
    }

    bool GetNextResult(<?php 
    echo array_template('{val} & {key}', ', ', $outputs);
    ?>
) {
        return multiIterator.GetNextResult( <?php 
    echo args($outputs);
    ?>
 );
    }

    std::size_t size() const {
        return groupByMap.size();
    }

    const MapType& GetMap() const {
      return groupByMap;
    }

    bool Contains(<?php 
    echo const_typed_ref_args($gbyAtts);
    ?>
) const {
      Key key(<?php 
    echo args($gbyAtts);
    ?>
);
      return groupByMap.count(key) > 0;
    }

    const InnerGLA& Get(<?php 
    echo const_typed_ref_args($gbyAtts);
    ?>
) const {
      Key key(<?php 
    echo args($gbyAtts);
    ?>
);
      return groupByMap.at(key);
    }

    bool Contains(Key key) const {
      return groupByMap.count(key) > 0;
    }

    const InnerGLA& Get(Key key) const {
      return groupByMap.at(key);
    }
};

<?php 
    if (in_array('fragment', $resType)) {
        ?>
typedef <?php 
        echo $className;
        ?>
::Iterator <?php 
        echo $className;
        ?>
_Iterator;
<?php 
    }
    ?>

<?php 
    $sys_headers = array_merge(['iomanip', 'iostream', 'cstring'], $extraHeaders);
    return array('kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => array('HashFunctions.h'), 'input' => $inputs, 'output' => $outputs, 'result_type' => $resType, 'configurable' => $configurable, 'generated_state' => $constState, 'required_states' => $reqStates, 'iterable' => $iterable, 'properties' => ['resettable', 'finite container'], 'libraries' => $libraries, 'extra' => ['inner_gla' => $innerGLA]);
}
Beispiel #5
0
function CCompLP($t_args, $inputs, $outputs)
{
    // Class name is randomly generated.
    $className = generate_name('CComp');
    // Initializiation of argument names.
    $inputs_ = array_combine(['s', 't'], $inputs);
    $vertex = $inputs_['s'];
    // Construction of outputs.
    $outputs_ = ['node' => $vertex, 'component' => lookupType('int')];
    $outputs = array_combine(array_keys($outputs), $outputs_);
    $sys_headers = ['armadillo', 'algorithm'];
    $user_headers = [];
    $lib_headers = [];
    $libraries = ['armadillo'];
    $properties = [];
    $extra = [];
    $result_type = ['multi'];
    ?>

using namespace arma;
using namespace std;

class <?php 
    echo $className;
    ?>
;

<?php 
    $constantState = lookupResource('graph::CCompLP_Constant_State', ['className' => $className]);
    ?>

class <?php 
    echo $className;
    ?>
 {
 public:
  // The constant state for this GLA.
  using ConstantState = <?php 
    echo $constantState;
    ?>
;

  // The number of iterations to perform, not counting the initial set-up.
  static const constexpr int kIterations = 30;

  // The work is split into chunks of this size before being partitioned.
  static const constexpr int kBlock = 32;

  // The maximum number of fragments to use.
  static const constexpr int kMaxFragments = 64;

 private:
  
  // Node Component
  static arma::rowvec node_component;

  // The typical constant state for an iterable GLA.
  const ConstantState& constant_state;

  // The number of unique nodes seen.
  long num_nodes;

  // 
  long output_iterator;

  // The current iteration.
  int iteration;
  
  // check if need more iterations
  long connections;

 public:
  <?php 
    echo $className;
    ?>
(const <?php 
    echo $constantState;
    ?>
& state)
      : constant_state(state),
        num_nodes(state.num_nodes),
        iteration(state.iteration),output_iterator(0),connections(0) {
  }

  // Basic dynamic array allocation.
  void AddItem(<?php 
    echo const_typed_ref_args($inputs_);
    ?>
) {
    if (iteration == 0) {
      num_nodes = max((long) max(s, t), num_nodes);
      return;
    } /*else if (iteration == 1){
      long max_known = (long) max(s, t);
      node_component(s) = max_known;
      node_component(t) = max_known;
      ++ connections;
    }*/else{
      long s_id = node_component(s), t_id = node_component(t);
      if (s_id != t_id){
        ++ connections;
        if (s_id > t_id)
          node_component(t) = s_id;
        else
          node_component(s) = t_id;
      }
    }
  }

  // Hashes are merged.
  void AddState(<?php 
    echo $className;
    ?>
 &other) {
    if (iteration == 0)
      num_nodes = max(num_nodes, other.num_nodes);
    else
      connections += other.connections;
  }

  // Most computation that happens at the end of each iteration is parallelized
  // by performed it inside Finalize.
  bool ShouldIterate(ConstantState& state) {
    state.iteration = ++iteration;

    if (iteration == 1) {// allocate memory
      // num_nodes is incremented because IDs are 0-based.
      state.num_nodes = ++num_nodes;
      // Allocating space can't be parallelized.
      node_component.set_size(num_nodes);
      for (long i = 0; i < num_nodes; ++ i)
        node_component(i) = i;
        
      return true;
    } else {
      return connections > 0 && iteration < kIterations + 1;
    }
  }
  // Finalize does nothing
  void Finalize() {}

  bool GetNextResult(<?php 
    echo typed_ref_args($outputs_);
    ?>
) {
    // should iterate is true
    if (connections > 0 && iteration < kIterations + 1)
      return false;
    
    if(output_iterator < num_nodes){
      node = output_iterator++;
      component = node_component(node);
      return true;
    }else{
      return false;
    }
  }
    
};

// Initialize the static member types.
arma::rowvec <?php 
    echo $className;
    ?>
::node_component;

<?php 
    return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'properties' => $properties, 'extra' => $extra, 'iterable' => true, 'input' => $inputs, 'output' => $outputs, 'result_type' => $result_type, 'generated_state' => $constantState];
}
Beispiel #6
0
function Segmenter(array $t_args, array $input, array $output, array $given_states)
{
    $resType = ['fragment', 'multi'];
    $system_headers = ['array', 'vector', 'memory', 'cinttypes', 'unordered_map'];
    $user_headers = ['HashFunctions.h'];
    $lib_headers = [];
    $preferFragment = get_default($t_args, 'inner.prefer.fragment', false);
    $wantedRes = $preferFragment ? ['fragment', 'multi'] : ['multi', 'fragment'];
    $nInputs = \count($input);
    grokit_assert($nInputs > 1, 'Segmenter: Not enough inputs specified!');
    $keyName = array_keys($input)[0];
    $keyType = array_get_index($input, 0);
    $innerInputs = array_slice($input, 1, $nInputs - 1, true);
    $gla = get_first_key($t_args, ['gla', 'GLA', 0]);
    grokit_assert(is_gla($gla), 'Segmenter: [gla] argument must be a valid GLA');
    $gla = $gla->apply($innerInputs, $output, $given_states);
    $n_passes = get_default($t_args, 'passes', 1);
    grokit_assert(is_int($n_passes), 'Segmenter: [passes] argument must be an integer');
    grokit_assert($n_passes > 0, 'Segmenter: [passes] argument must be > 0');
    $libraries = $gla->libraries();
    $innerRes = get_first_value($gla->result_type(), $wantedRes);
    $innerInputs = $gla->input();
    $innerOutput = $gla->output();
    $input = array_merge([$keyName => $keyType], $innerInputs);
    $output = $innerOutput;
    $segments = get_default($t_args, 'segments', 64);
    $constState = lookupResource('BASE::SegmenterState', ['gla' => $gla, 'passes' => $n_passes, 'segments' => $segments]);
    $className = generate_name('Segmenter_');
    $savedArgs = [];
    $cArgs = [];
    $innerCArgs = [];
    if ($gla->configurable()) {
        $savedArgs['json_init'] = 'Json::Value';
        $cArgs['json_init'] = 'Json::Value';
        $innerCArgs[] = 'json_init';
    }
    $cArgs['const_state'] = $constState;
    if ($gla->has_state()) {
        $innerCArgs[] = 'constState.inner_cstate';
    }
    $cstStr = \count($innerCArgs) > 0 ? '(' . implode(',', $innerCArgs) . ')' : '';
    grokit_assert(!$gla->iterable(), 'Segementer does not support iterable GLAs');
    $iterable = $n_passes > 1;
    ?>

class <?php 
    echo $className;
    ?>
 {
private:

    using ConstantState = <?php 
    echo $constState;
    ?>
;
    using SplitState = ConstantState::SplitState;

    static constexpr const size_t NUM_STATES = SplitState::NUM_STATES;

    using InnerGLA = <?php 
    echo $gla;
    ?>
;
    using InnerGLAPtr = std::unique_ptr<InnerGLA>;
    using GLA_Array = std::array<InnerGLAPtr, NUM_STATES>;

public:
    using size_type = std::size_t;

<?php 
    if ($innerRes == 'fragment') {
        ?>
    class Iterator {
    private:
        InnerGLA * gla;
        int fragmentNum;

        InnerGLA::Iterator * innerIter;

    public:
        Iterator( InnerGLA * _gla, int _fragmentNum, int _innerFrag ) :
            gla(_gla), fragmentNum(_fragmentNum),
            innerIter(nullptr)
        {
            innerIter = gla->Finalize(_innerFrag);
        }

        ~Iterator(void) {
            if( innerIter != nullptr ) {
                delete innerIter;
                innerIter = nullptr;
            }
        }

        bool GetNextResult( <?php 
        echo typed_ref_args($gla->output());
        ?>
 ) {
            return innerIter->GetNextResult(<?php 
        echo args($gla->output());
        ?>
);
        }

        int FragmentNumber() {
            return fragmentNum;
        }
    };
<?php 
    } else {
        // if inner result type is fragment
        ?>
    class Iterator {
    private:
        InnerGLA * gla;
        int fragmentNum;

    public:
        Iterator( InnerGLA * _gla, int fragNo ) : gla(_gla), fragmentNum(fragNo) {
            gla->Finalize();
        }

        ~Iterator(void) { }

        bool GetNextResult( <?php 
        echo typed_ref_args($gla->output());
        ?>
 ) {
            return gla->GetNextResult(<?php 
        echo args($gla->output());
        ?>
);
        }

        int FragmentNumber() {
            return fragmentNum;
        }
    };
<?php 
    }
    // if inner result type is multi
    ?>

private:

    const ConstantState & constState;
    GLA_Array localState;

    // Iteration state for multi result type
    int numFrags;
    int multiFragNo;
    Iterator * multiIter;

<?php 
    if ($innerRes == 'fragment') {
        ?>
    using frag_info = std::pair<int, int>;
    using frag_map_t = std::unordered_map<int, frag_info>;
    frag_map_t fragMap;
<?php 
    }
    ?>


<?php 
    foreach ($savedArgs as $name => $type) {
        ?>
    const <?php 
        echo $type;
        ?>
 <?php 
        echo $name;
        ?>
;
<?php 
    }
    // foreach saved arg
    ?>

public:

    // Constructor
    <?php 
    echo $className;
    ?>
( <?php 
    echo const_typed_ref_args($cArgs);
    ?>
 ) :
        constState(const_state)
        , localState()
        , numFrags(0)
        , multiFragNo(0)
        , multiIter(nullptr)
<?php 
    if ($innerRes == 'fragment') {
        ?>
        , fragMap()
<?php 
    }
    foreach ($savedArgs as $name => $type) {
        ?>
        , <?php 
        echo $name;
        ?>
(<?php 
        echo $name;
        ?>
)
<?php 
    }
    // foreach constructor arg to save
    ?>
    {
        for( auto & elem : localState ) {
            elem.reset(new InnerGLA<?php 
    echo $cstStr;
    ?>
);
        }
    }

    void AddItem( <?php 
    echo const_typed_ref_args($input);
    ?>
 ) {
        uint64_t hashVal = CongruentHash(Hash(<?php 
    echo $keyName;
    ?>
), H_b + 1);
        uint64_t passNum = (hashVal / NUM_STATES) % ConstantState::N_PASSES;
        uint64_t segNum = hashVal % NUM_STATES;

<?php 
    if ($n_passes > 1) {
        ?>
        if( passNum != constState.pass ) {
            return;
        }
<?php 
    }
    // more than 1 pass
    ?>
        localState[segNum]->AddItem(<?php 
    echo args($innerInputs);
    ?>
);
    }

    void ChunkBoundary(void) {
        // Merge local states into the global state

        SplitState & globalStates = constState.segments;

        int theseAreOk[NUM_STATES];
        for( int i = 0; NUM_STATES > i; i++ ) {
            theseAreOk[i] = 1;
        }

        int segsLeft = NUM_STATES;

        while( segsLeft > 0 ) {
            InnerGLA * checkedOut = nullptr;
            int whichOne = globalStates.CheckOutOne( theseAreOk, checkedOut );

            if( checkedOut == NULL ) {
                checkedOut = new InnerGLA<?php 
    echo $cstStr;
    ?>
;
            }

            checkedOut->AddState( *(localState[whichOne]) );

            globalStates.CheckIn( whichOne, checkedOut );

            theseAreOk[whichOne] = 0;
            segsLeft--;
        }

        // Re-initialize the local states
        for( auto & elem : localState ) {
<?php 
    if ($gla->is('resettable')) {
        ?>
            elem->Reset();
<?php 
    } else {
        // if resettable
        ?>
            elem.reset(new InnerGLA<?php 
        echo $cstStr;
        ?>
);
<?php 
    }
    // if not resettable
    ?>
        }
    }

    void AddState( <?php 
    echo $className;
    ?>
 & o ) {
        // Do nothing
    }

    void Finalize() {
        SplitState & globalStates = constState.segments;

        if( multiIter != nullptr)
            delete multiIter;

        multiFragNo = 0;
<?php 
    if ($innerRes == 'fragment') {
        ?>
        frag_info fInfo = fragMap[multiFragNo];
        multiIter = new Iterator(globalStates.Peek(fInfo.first), multiFragNo,
            fInfo.second);
<?php 
    } else {
        ?>
        multiIter = new Iterator(globalStates.Peek(multiFragNo), multiFragNo);
<?php 
    }
    ?>
    }

    bool GetNextResult(<?php 
    echo typed_ref_args($output);
    ?>
) {
        bool gotResult = false;
        SplitState & globalStates = constState.segments;

        while( (multiFragNo < numFrags && multiIter != nullptr) && !gotResult ) {
            gotResult = multiIter->GetNextResult(<?php 
    echo args($output);
    ?>
);

            if( !gotResult ) {
                multiFragNo++;
                delete multiIter;

                if( numFrags > multiFragNo ) {
<?php 
    if ($innerRes == 'fragment') {
        ?>
                    frag_info fInfo = fragMap[multiFragNo];
                    multiIter = new Iterator(globalStates.Peek(fInfo.first), multiFragNo,
                        fInfo.second);
<?php 
    } else {
        ?>
                    multiIter = new Iterator(globalStates.Peek(multiFragNo), multiFragNo);
<?php 
    }
    ?>
                } else {
                    multiIter = nullptr;
                }
            }
        }

        return gotResult;
    }

    int GetNumFragments(void) {
<?php 
    if ($innerRes == 'fragment') {
        ?>
        SplitState & globalStates = constState.segments;
        numFrags = 0;

        for (int i = 0; i < NUM_STATES; i++) {
            int curFrags = globalStates.Peek(i)->GetNumFragments();

            for (int curFrag = 0; curFrag < curFrags; curFrag++) {
                fragMap[numFrags] = frag_info(i, curFrag);

                numFrags++;
            }
        }
<?php 
    } else {
        ?>
        numFrags = NUM_STATES;
<?php 
    }
    ?>
        return numFrags;
    }

    Iterator * Finalize( int fragment ) {
        SplitState & globalStates = constState.segments;

<?php 
    if ($innerRes == 'fragment') {
        ?>
        frag_info info = fragMap[fragment];
        return new Iterator(globalStates.Peek(info.first), fragment, info.second);
<?php 
    } else {
        ?>
        return new Iterator(globalStates.Peek(fragment), fragment);
<?php 
    }
    ?>
    }

    bool GetNextResult( Iterator * it, <?php 
    echo typed_ref_args($output);
    ?>
 ) {
        bool ret = it->GetNextResult(<?php 
    echo args($output);
    ?>
);

        return ret;
    }

<?php 
    if ($iterable) {
        ?>
    bool ShouldIterate( ConstantState & modible ) {
        modible.pass++;

        return modible.pass < ConstantState::N_PASSES;
    }

    void PostFinalize() {
        constState.segments.Reset();
    }
<?php 
    }
    // iterable
    ?>

<?php 
    if ($gla->is('finite container')) {
        ?>
    size_type size() {
        SplitState & globalStates = constState.segments;
        size_type s = 0;
        for( int i = 0; NUM_STATES > i; i++ ) {
            InnerGLA * ptr = globalStates.Peek(i);
            s += ptr->size();
        }

        return s;
    }

    size_type size(int frag) {
        SplitState & globalStates = constState.segments;
        return globalStates.Peek(frag)->size();
    }
<?php 
    }
    // if the gla is a container
    ?>
};

typedef <?php 
    echo $className;
    ?>
::Iterator <?php 
    echo $className;
    ?>
_Iterator;

<?php 
    return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $system_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'input' => $input, 'output' => $output, 'result_type' => $resType, 'generated_state' => $constState, 'required_states' => $gla->req_states(), 'chunk_boundary' => true, 'configurable' => $gla->configurable(), 'iterable' => $iterable, 'post_finalize' => $iterable, 'intermediates' => true];
}
Beispiel #7
0
function BernoulliSample($t_args, $inputs)
{
    $p = get_first_key_default($t_args, ['p', 0], 0.5);
    grokit_assert(is_float($p) || is_integer($p), "BernoulliSample: p must be a number in the range [0, 1]");
    grokit_assert($p >= 0 && $p <= 1, "BernoulliSample: p must be in the range [0, 1]");
    $rng = get_first_key_default($t_args, ['rng'], 'mt19937_64');
    $sys_headers = ['random'];
    // assuming std
    $libs = [];
    // assuming std
    $ns = 'std';
    // assuming standard library
    $cState = lookupResource('base::BernoulliSampleState', ['sys_headers' => $sys_headers, 'libs' => $libs, 'namespace' => $ns]);
    $name = generate_name('BernoulliSample');
    ?>

class <?php 
    echo $name;
    ?>
 {
public:
	using state_type = <?php 
    echo $cState;
    ?>
;
	using rng_type = <?php 
    echo $ns;
    ?>
::<?php 
    echo $rng;
    ?>
;
	using dist_type = <?php 
    echo $ns;
    ?>
::bernoulli_distribution;

private:

	rng_type rng;
	dist_type bernoulli;

public:
	const constexpr static double P = <?php 
    echo $p;
    ?>
;

	<?php 
    echo $name;
    ?>
(state_type & _state):
		rng(_state()),
		bernoulli(P)
	{ }

	bool Filter( <?php 
    echo const_typed_ref_args($inputs);
    ?>
 ) {
		return bernoulli(rng);
	}
};

<?php 
    return ['kind' => 'GF', 'name' => $name, 'input' => $inputs, 'system_headers' => $sys_headers, 'libraries' => $libs, 'generated_state' => $cState];
}
function page_rank_vd($t_args, $inputs, $outputs)
{
    $className = generate_name('PageRank');
    $inputs_ = array_combine(['s', 't'], $inputs);
    $vertex = $inputs_['s'];
    $outputs_ = ['node' => $vertex, 'rank' => lookupType('double')];
    $outputs = array_combine(array_keys($outputs), $outputs_);
    $sys_headers = ['vector', 'algorithm'];
    $user_headers = [];
    $lib_headers = [];
    $libraries = [];
    $properties = [];
    $extra = [];
    $result_type = ['fragment'];
    ?>

using namespace std;

class <?php 
    echo $className;
    ?>
;

<?php 
    $constantState = lookupResource('pagerankVD::page_rank_vd_Constant_State', ['className' => $className]);
    ?>

class <?php 
    echo $className;
    ?>
 {
 public:
  using ConstantState = <?php 
    echo $constantState;
    ?>
;
  using Iterator = std::pair<int, int>;
  static const constexpr float kDamping = 0.85;
  static const constexpr int nIterations = 5;
  static const constexpr int kBlock = 32;
  static const constexpr int kMaxFragments = 64;

 private:

  static std::vector<double> weight;
  static std::vector<double> pagerank;
  static std::vector<double> oldPagerank;

  const ConstantState& constant_state;
  long nVertices;
  int iteration;
  int num_fragments;

 public:
  <?php 
    echo $className;
    ?>
(const <?php 
    echo $constantState;
    ?>
& state)
      : constant_state(state),
        nVertices(state.nVertices),
        iteration(state.iteration) {
  }

  void AddItem(<?php 
    echo const_typed_ref_args($inputs_);
    ?>
) {
    if (iteration == 0) {
      nVertices = max((long) max(s, t), nVertices);
      return;
    } else if (iteration == 1) {
      weight[s]++;
    } else {
      oldPagerank[t] += weight[s] * pagerank[s];
    }
  }

  void AddState(<?php 
    echo $className;
    ?>
 &other) {
    if (iteration == 0)
      nVertices = max(nVertices, other.nVertices);
  }

  // Most computation that happens at the end of each iteration is parallelized
  // by performed it inside Finalize.
  bool ShouldIterate(ConstantState& state) {
    state.iteration = ++iteration;
    cout << "Finished Iteration: " << iteration << endl;
    if (iteration == 1) {
      state.nVertices = ++nVertices;
      cout << "num_nodes: " << nVertices << endl;
      oldPagerank.reserve(nVertices);
      pagerank.reserve(nVertices);
      weight.reserve(nVertices);
      std::fill (pagerank.begin(),pagerank.end(),1);
      return true;
    } else {
      return iteration < nIterations + 1;
    }
  }

  int GetNumFragments() {
    long size = (nVertices - 1) / kBlock + 1;  // nVertices / kBlock rounded up.
    num_fragments = (iteration == 0) ? 0 : min(size, (long) kMaxFragments);
    cout << "Returning " << num_fragments << " fragments" << endl;
    return num_fragments;
  }

  Iterator* Finalize(long fragment) {
    int count = nVertices;
    int first = fragment * (count / kBlock) / num_fragments * kBlock;
    int final = (fragment == num_fragments - 1) ? count - 1 : (fragment + 1) * (count / kBlock) / num_fragments * kBlock - 1;
    if  (iteration == 2) {
      for(int i = first; i <= final; i++){
        pagerank[i] = 1;
        weight[i] = 1/weight[i];
        oldPagerank[i] = 0;
      }
    } else {
      for(int i = first; i <= final; i++){
        pagerank[i] = (1 - kDamping) + kDamping * oldPagerank[i];
        oldPagerank[i] = 0;
      }
    }
    return new Iterator(first, final);
  }

  bool GetNextResult(Iterator* it, <?php 
    echo typed_ref_args($outputs_);
    ?>
) {
    if (iteration < nIterations + 1)
      return false;
    if (it->first > it->second)
      return false;
    node = it->first;
    rank = pagerank[it->first];
    it->first++;
    return true;
  }
};

// Initialize the static member types.
vector<double> <?php 
    echo $className;
    ?>
::weight;
vector<double> <?php 
    echo $className;
    ?>
::pagerank;
vector<double> <?php 
    echo $className;
    ?>
::oldPagerank;

typedef <?php 
    echo $className;
    ?>
::Iterator <?php 
    echo $className;
    ?>
_Iterator;

<?php 
    return ['kind' => 'GLA', 'name' => $className, 'system_headers' => $sys_headers, 'user_headers' => $user_headers, 'lib_headers' => $lib_headers, 'libraries' => $libraries, 'properties' => $properties, 'extra' => $extra, 'iterable' => true, 'intermediates' => true, 'input' => $inputs, 'output' => $outputs, 'result_type' => $result_type, 'generated_state' => $constantState];
}