function SelectionGenerate($wpName, $queries, $attMap) { //echo PHP_EOL . '/*' . PHP_EOL; //print_r($wpName); //print_r($queries); //print_r($attMap); //echo PHP_EOL . '*/' . PHP_EOL; ?> // module specific headers to allow separate compilation #include "GLAData.h" #include "Errors.h" //+{"kind":"WPF", "name":"Pre-Processing", "action":"start"} extern "C" int SelectionPreProcessWorkFunc_<?php echo $wpName; ?> (WorkDescription& workDescription, ExecEngineData& result) { SelectionPreProcessWD myWork; myWork.swap(workDescription); QueryExitContainer& queries = myWork.get_whichQueryExits(); QueryToGLASContMap & requiredStates = myWork.get_requiredStates(); QueryToGLAStateMap constStates; <?php cgDeclareQueryIDs($queries); ?> <?php foreach ($queries as $query => $info) { $gf = $info['gf']; if (!is_null($gf) && $gf->has_state()) { $state = $gf->state(); if ($state->configurable()) { $carg = $info['cargs']; echo ' // JSON Configuration for query ' . queryName($query) . PHP_EOL; $carg->init(); echo PHP_EOL; } // if gf const state is configurable } // if gf has state } //foreach query ?> FOREACH_TWL(iter, queries) { <?php foreach ($queries as $query => $val) { ?> if( iter.query == <?php echo queryName($query); ?> ) { <?php if ($val['gf'] !== null) { // This is a generalized filter $gf = $val['gf']; $given_states = $val['states']; if ($gf->has_state()) { $cstArgs = []; $state = $gf->state(); // If the state is configurable, give it the JSON carg if ($state->configurable()) { $carg = $query['cargs']; $cstArgs[] = $carg->name(); } // if gf state is configurable if (\count($given_states) > 0) { ?> FATALIF(!requiredStates.IsThere(<?php echo queryName($query); ?> ), "No required states received for query that declared required states"); GLAStateContainer& givenStates = requiredStates.Find(<?php echo queryName($query); ?> ); givenStates.MoveToStart(); GLAPtr reqTemp; <?php foreach ($givenStates as $gs) { $cstArgs[] = $gs->name(); ?> // Extract state from waypoint[<?php echo $gs->waypoint(); ?> ] <?php echo $gs->type(); ?> * <?php echo $gs->name(); ?> = nullptr; reqTemp.Swap(givenStates.Current()); FATALIF( reqTemp.get_glaType() != <?php echo $gs->type()->cHash(); ?> , "Got different type than expected for required state of type <?php echo $gs > type(); ?> "); <?php echo $gs->name(); ?> = (<?php echo $gs->type(); ?> *) reqTemp.get_glaPtr(); reqTemp.swap(givenStates.Current()); givenStates.Advance(); <?php } // foreach given state } // if we have given states $cstStr = \count($cstArgs) > 0 ? '(' . implode(', ', $cstArgs) . ')' : ''; ?> <?php echo $state; ?> * temp = new <?php echo $state; echo $cstStr; ?> ; GLAPtr newPtr( <?php echo $state->cHash(); ?> , (void *) temp ); QueryID qryID = <?php echo queryName($query); ?> ; constStates.Insert(qryID, newPtr); <?php } // if gf has state } // if( $val['gf'] !== null ) ?> } // if <?php echo queryName($query); ?> is current query <?php } // foreach query ?> } END_FOREACH; SelectionPreProcessRez myRez( constStates ); myRez.swap(result); return WP_PREPROCESSING; // for PreProcess } //+{"kind":"WPF", "name":"Pre-Processing", "action":"end"} //+{"kind":"WPF", "name":"Process Chunk", "action":"start"} extern "C" int SelectionProcessChunkWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { // go to the work description and get the input chunk SelectionProcessChunkWD myWork; myWork.swap (workDescription); Chunk &input = myWork.get_chunkToProcess (); QueryToGLAStateMap& constStates = myWork.get_constStates(); PROFILING2_START; QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits ()); <?php cgDeclareQueryIDs($queries); cgAccessColumns($attMap, 'input', $wpName); // Declare the constants needed by the filters and synth expressions. foreach ($queries as $query => $val) { ?> // Constants for query <?php echo queryName($query); ?> : <?php $filters = $val['filters']; $synths = $val['synths']; cgDeclareConstants($filters); cgDeclareConstants($synths); } // foreach query ?> // prepare bitstring iterator Column inBitCol; BStringIterator queries; input.SwapBitmap (queries); // creating storage for syhthesized attributes <?php foreach ($queries as $query => $val) { $synList = $val['synths']; foreach ($synList as $att => $syn) { ?> MMappedStorage <?php echo attStorage($att); ?> ; Column <?php echo attCol($att); ?> (<?php echo attStorage($att); ?> ); <?php echo attIteratorType($att); ?> colI_<?php echo $att; ?> (<?php echo attCol($att); ?> ); <?php echo attType($att); ?> <?php echo $att; ?> ; <?php } // foreach synthesized attribute } // foreach query ?> <?php foreach ($queries as $query => $val) { $givenStates = $val['states']; $gf = $val['gf']; $cargs = $val['cargs']; grokit_assert($gf !== null || count($givenStates) == 0, 'We were given states for query ' . $query . ' when we have no GF!'); if (!is_null($gf) && $gf->has_state()) { $state = $gf->state(); $stateName = 'cst_state_' . queryName($query); $constMod = $state->mutable() ? '' : 'const '; ?> // Extracting constant state for query <?php echo queryName($query); ?> FATALIF(!constStates.IsThere(<?php echo queryName($query); ?> ), "No constant state found for query <?php echo queryName($query); ?> ."); <?php echo $constMod; echo $state; ?> * <?php echo $stateName; ?> = nullptr; { GLAState& curState = constStates.Find(<?php echo queryName($query); ?> ); GLAPtr tmp; tmp.swap(curState); FATALIF( tmp.get_glaType() != <?php echo $state->cHash(); ?> , "Got different type than expected for constant state of type <?php echo $state; ?> "); <?php echo $stateName; ?> = (<?php echo $constMod; echo $state; ?> *) tmp.get_glaPtr(); tmp.swap(curState); } <?php } // if gf requires constant state if ($gf !== null) { $ctrArgs = []; if ($gf->configurable()) { echo ' // JSON initialiser for query ' . queryName($query) . PHP_EOL; $cargs->init(); echo PHP_EOL; $ctrArgs[] = $cargs->name(); } if ($gf->has_state()) { $ctrArgs[] = '*' . $stateName; } $ctrStr = \count($ctrArgs) > 0 ? '(' . implode(', ', $ctrArgs) . ')' : ''; ?> // Construct GF for query <?php echo queryName($query); ?> <?php echo $gf->value(); ?> <?php echo queryName($query); ?> _state<?php echo $ctrStr; ?> ; <?php } // if we have a GF } // foreach query ?> MMappedStorage bitStore; Column outBitCol(bitStore); BStringIterator outQueries (outBitCol, queriesToRun); #ifdef PER_QUERY_PROFILE <?php foreach ($queries as $query => $val) { ?> int64_t n_tuples_<?php echo queryName($query); ?> = 0; <?php } // foreach query ?> #endif // PER_QUERY_PROFILE int64_t numTuples = 0; while (!queries.AtEndOfColumn ()) { ++numTuples; QueryIDSet qry; qry = queries.GetCurrent(); qry.Intersect(queriesToRun); queries.Advance(); //selection code for all the predicates <?php cgAccessAttributes($attMap); foreach ($queries as $query => $val) { $gf = $val['gf']; $filters = $val['filters']; $synths = $val['synths']; $stateName = queryName($query) . '_state'; $filterVals = array_map(function ($expr) { return '(' . $expr . ')'; }, $filters); if ($gf === null) { // Simple set of expressions. if (\count($filterVals) > 0) { $selExpr = implode(' && ', $filterVals); } else { $selExpr = 'true'; } } else { // We have a GF $selExpr = "{$stateName}.Filter(" . implode(', ', $filterVals) . ")"; } ?> // do <?php echo queryName($query); ?> : <?php foreach ($synths as $att => $syn) { ?> <?php echo attType($att); ?> <?php echo $att; ?> ; <?php } // foreach synthesized attribute ?> if( qry.Overlaps(<?php echo queryName($query); ?> ) ) { #ifdef PER_QUERY_PROFILE ++numTuples_<?php echo queryName($query); ?> ; #endif // PER_QUERY_PROFILE <?php cgDeclarePreprocessing($filters, 2); ?> if( <?php echo $selExpr; ?> ) { // compute synthesized <?php cgDeclarePreprocessing($synths, 3); foreach ($synths as $att => $expr) { ?> <?php echo $att; ?> = <?php echo $expr; ?> ; <?php } //foreach synthesized attribute ?> } else { qry.Difference(<?php echo queryName($query); ?> ); } } <?php foreach ($synths as $att => $syn) { ?> colI_<?php echo $att; ?> .Insert(<?php echo $att; ?> ); colI_<?php echo $att; ?> .Advance(); <?php } // foreach synthesized attribute } // foreach query ?> outQueries.Insert(qry); outQueries.Advance(); <?php cgAdvanceAttributes($attMap); ?> } // while we still have tuples remaining // finally, if there were any results, put the data back in the chunk <?php cgPutbackColumns($attMap, 'input', $wpName); foreach ($queries as $query => $val) { $synths = $val['synths']; ?> if (<?php echo queryName($query); ?> .Overlaps(queriesToRun)) { <?php foreach ($synths as $att => $expr) { ?> colI_<?php echo $att; ?> .Done(<?php echo attCol($att); ?> ); input.SwapColumn(<?php echo attCol($att); ?> , <?php echo attSlot($att); ?> ); <?php } //foreach synthesized attribute ?> } // If <?php echo queryName($query); ?> overlaps queriesToRun <?php } // foreach query ?> // put in the output bitmap outQueries.Done (); input.SwapBitmap (outQueries); // Finish performance counters PROFILING2_END; PCounterList counterList; PCounter totalCnt("tpi", numTuples, "<?php echo $wpName; ?> "); counterList.Append(totalCnt); PCounter tplOutCnt("tpo", numTuples, "<?php echo $wpName; ?> "); counterList.Append(tplOutCnt); #ifdef PER_QUERY_PROFILE <?php foreach ($queries as $query => $val) { $filters = $val['filters']; ?> if( <?php echo queryName($query); ?> .Overlaps(queriesToRun)) { PCounter cnt("<?php echo queryName($query); ?> ", numTuples_<?php echo queryName($query); ?> , "<?php echo $wpName; ?> "); counterList.Append(cnt); } <?php } // foreach query ?> #endif // PER_QUERY_PROFILE PROFILING2_SET(counterList, "<?php echo $wpName; ?> "); ChunkContainer tempResult (input); tempResult.swap (result); return WP_PROCESS_CHUNK; // For Process Chunk } //+{"kind":"WPF", "name":"Process Chunk", "action":"end"} <?php }
function JoinLHS($wpName, $jDesc) { $rhsAttOrder = []; foreach ($jDesc->hash_RHS_attr as $attr) { $att = lookupAttribute($attr); $rhsAttOrder[$att->slot()] = $attr; } ksort($rhsAttOrder); $jDesc->hash_RHS_attr = $rhsAttOrder; ?> //+{"kind":"WPF", "name":"LHS Lookup", "action":"start"} extern "C" int JoinLHSWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { double start_time = global_clock.GetTime(); PROFILING2_START; // this is the area where all of the intermediate, serialized records are stored SerializedSegmentArray serializedSegments [NUM_SEGS]; // this is the area where all of the records are serialized to; // 10K bytes are initially used for this char *serializeHere = (char *) malloc (10000); // this is the output chunk Chunk output; // go to the work description and get the input chunk JoinLHSWorkDescription myWork; myWork.swap (workDescription); Chunk &input = myWork.get_chunkToProcess (); // get the waypoint ID from the chunk int wayPointID = myWork.get_wayPointID (); QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits ()); <?php cgAccessColumns($jDesc->attribute_queries_LHS, 'input', $wpName); ?> BStringIterator myInBStringIter; input.SwapBitmap (myInBStringIter); // start the iterators for the output columns for LHS; used only if stillShallow = 0 <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $queries) { ?> <?php echo attIteratorType($att); ?> <?php echo $att; ?> _Column_Out; <?php } /*foreach*/ ?> // these manage the output columns that come from the RHS (now stored in the hash table) <?php cgConstructColumns(array_keys($jDesc->attribute_queries_RHS_copy)); ?> // this is the ouput bitstring MMappedStorage myStore; Column bitmapOut (myStore); BStringIterator myOutBStringIter (bitmapOut, queriesToRun); // now we extract all of the hash table segments... after this, myEntries will hold them all HashTableView myView; myWork.get_centralHashTable ().EnterReader (myView); HashTableSegment myEntries[NUM_SEGS]; myView.ExtractAllSegments (myEntries); // this tells us that we are "still shallow"---not making a deep copy of the LHS atts to the output int stillShallow = 1; // the bitstring that will be exracted from the hash table QueryIDSet *bitstringRHS = 0; QueryIDSet existsTarget(<?php echo $jDesc->exists_target; ?> , true); QueryIDSet notExistsTarget(<?php echo $jDesc->not_exists_target; ?> , true); // these are all of the attribute values that come from the hash table... // for each att we need a pointer as well as a dummy value that the pointer will be set to by default <?php foreach ($jDesc->attribute_queries_RHS as $att => $queries) { ?> QueryIDSet <?php echo attQrys($att); ?> _RHS(<?php echo $queries; ?> , true); <?php echo attType($att); ?> <?php echo $att; ?> RHSShadow; <?php echo attType($att); ?> *<?php echo $att; ?> RHS = NULL; <?php echo attType($att); ?> <?php echo $att; ?> RHSobj; <?php } /*foreach*/ ?> // now actually try to match up all of the tuples! int totalNum = 0; while (!myInBStringIter.AtEndOfColumn ()) { // TBD, probably this is not working TBD // counts how many matches for this query int numHits = 0; // extract values of attributes from streams // now go through the LHS input atts one at a time and extract if it is needed by an active query // see which queries match up QueryIDSet curBits = myInBStringIter.GetCurrent (); curBits.Intersect (queriesToRun); QueryIDSet exists; // keeps track of the queries for which a match is found QueryIDSet oldBitstringLHS; // last value of bistringLHS // if the input query is not empty if (!curBits.IsEmpty ()) { totalNum++; // compute the hash for LHS HT_INDEX_TYPE hashValue = HASH_INIT; <?php foreach ($jDesc->LHS_keys as $att) { ?> hashValue = CongruentHash(Hash(<?php echo $att; ?> _Column.GetCurrent()), hashValue); <?php } /*foreach*/ ?> // figure out which of the hash buckets it goes into unsigned int index = WHICH_SEGMENT (hashValue); // now, go to that index and extract matching tuples! HT_INDEX_TYPE curSlot = WHICH_SLOT (hashValue); hashValue = curSlot; // this loops through all of the possible RHS hits while (1) { // this is the bitstring that will go in the output QueryIDSet bitstringLHS; // for safety (in case we look at a bitstring that spans multiple // entries that is not done being written by a concurrent writer) // empty out the inital bitstring ((QueryIDSet *) serializeHere)->Empty (); // give safe "shadow" values to all of the RHS attributes <?php foreach ($jDesc->hash_RHS_attr as $att) { ?> <?php echo $att; ?> RHS = &<?php echo $att; ?> RHSShadow; <?php } /*foreach*/ ?> // here we go through and extract the atts one at a time from the hash // table. Note that the atts must be extracted IN ORDER. That is, the // bitstring is first, followed by the att mapped to the lowerest column // position, followed by the att mapped to the next lowest, and so on. // The Extract function pulls an attribute out of the hash table... int lenSoFar = 0, dummy, done; int lastLen = myEntries[index].Extract (serializeHere, curSlot, hashValue, wayPointID, BITMAP, dummy, done); // if we cannot find a bitstring, there was no tuple here, and we are done if (lastLen == 0) { break; } // remember the bitstring bitstringRHS = (QueryIDSet *) serializeHere; lenSoFar += lastLen; // next look for other hashed attributes <?php foreach ($jDesc->hash_RHS_attr as $att) { ?> lastLen = myEntries[index].Extract (serializeHere + lenSoFar, curSlot, hashValue, wayPointID, <?php echo attSlot($att); ?> , dummy, done); // see if we got attribute if (lastLen > 0) { Deserialize(serializeHere + lenSoFar, <?php echo $att; ?> RHSobj); //<?php echo attOptimizedDeserialize($att, $att . "RHSobj", "serializeHere", "lenSoFar"); ?> ; <?php echo $att; ?> RHS = &<?php echo $att; ?> RHSobj; lenSoFar += lastLen; } else { FATALIF(<?php echo attQrys($att); ?> _RHS.Overlaps(*bitstringRHS), "Did not find attribute <?php echo $att; ?> in active RHS tuple"); } <?php } /*foreach*/ ?> // see if we have any query matches bitstringRHS->Intersect (curBits); QueryIDSet qBits; //printf("TPLLLLL: cust_acctbal = %f orders_custkey = %d cust_custkey = %d\n", *customer_c_acctbalRHS, orders_o_custkey_Column.GetCurrent(), *customer_c_custkeyRHS); <?php foreach ($jDesc->queries_attribute_comparison as $qClass) { ?> // See if any query in query class is eligible for this comparision qBits = QueryIDSet(<?php echo $qClass->qClass; ?> , true); qBits.Intersect(*bitstringRHS); if ( !qBits.IsEmpty () && <?php foreach ($qClass->att_pairs as $pair) { ?> *<?php echo $pair->rhs; ?> RHS == <?php echo $pair->lhs; ?> _Column.GetCurrent() && <?php } /*foreach pair*/ ?> 1 ) { bitstringLHS.Union (qBits); } <?php } /*foreach query class*/ ?> // if any of them hit... if (!bitstringLHS.IsEmpty ()) { exists.Union(bitstringLHS); numHits++; // see if we need to move from shallow to deep if (numHits == 2 && stillShallow) { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> <?php echo attData($att); ?> _Out.CreateDeepCopy (<?php echo attData($att); ?> ); <?php echo attData($att); ?> _Out.Insert (<?php echo attData($att); ?> .GetCurrent()); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> stillShallow = 0; } // now, add all of the outputs over... first deal with the LHS input atts // that get copied into output atts if (!stillShallow) { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> <?php echo attData($att); ?> _Out.Insert (<?php echo attData($att); ?> .GetCurrent()); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> } // now, deal with the output atts that come from the hash table <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $qrys) { ?> <?php echo attData($att); ?> _Out.Insert (*<?php echo $att; ?> RHS); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> // finally, set the bitmap. We are one element behind if (!oldBitstringLHS.IsEmpty()){ myOutBStringIter.Insert (oldBitstringLHS); myOutBStringIter.Advance (); } oldBitstringLHS=bitstringLHS; } // empty bistring } } // compute the true exist queries QueryIDSet tmp = existsTarget; tmp.Intersect(exists); tmp.Intersect(curBits); // not needed but I'm paranoid // compute the true not exits queries QueryIDSet tmp2 = curBits; tmp2.Intersect(notExistsTarget); tmp2.Difference(exists); // now put everything in bitstringLHS oldBitstringLHS.Union(tmp); oldBitstringLHS.Union(tmp2); if (!oldBitstringLHS.IsEmpty()){ myOutBStringIter.Insert (oldBitstringLHS); myOutBStringIter.Advance (); } // at this point, we are done trying to join this tuple... any join results have been // written to the output columns. Note that we don't have to advance in the output data // columns; if we are shallow, we don't touch the output columns. If we are not shallow, // if there were no results, we have nothing to write. HOWEVER, if we are shallow and // we did not get a match, we need to add an empty btstring if (stillShallow && numHits == 0) { <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $qrys) { ?> <?php echo attType($att); ?> tmp_<?php echo attData($att); ?> ; <?php echo attData($att); ?> _Out.Insert (tmp_<?php echo attData($att); ?> ); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> if (oldBitstringLHS.IsEmpty()){ // no not exist and no join match myOutBStringIter.Insert (oldBitstringLHS); myOutBStringIter.Advance (); } } // lastly, we need to advance in the INPUT tuples <?php foreach ($jDesc->attribute_queries_LHS as $att => $qrys) { ?> <?php echo attData($att); ?> .Advance(); <?php } /*foreach*/ ?> // advance the input bitstring myInBStringIter.Advance (); } // DONE! So construct the output tuple // if we are still shallow, put the original data into the output if (stillShallow) { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> Column col_<?php echo $att; ?> ; <?php echo attData($att); ?> .Done(col_<?php echo $att; ?> ); output.SwapColumn (col_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /*foreach*/ ?> } else { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> Column col_<?php echo $att; ?> ; <?php echo attData($att); ?> _Out.Done(col_<?php echo $att; ?> ); output.SwapColumn (col_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /*foreach*/ ?> } { <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $qrys) { ?> Column col_<?php echo $att; ?> ; <?php echo attData($att); ?> _Out.Done(col_<?php echo $att; ?> ); output.SwapColumn (col_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /*foreach*/ ?> } // put in the output bitmap myOutBStringIter.Done (); output.SwapBitmap (myOutBStringIter); // and give back the result ChunkContainer tempResult (output); tempResult.swap (result); PROFILING2_END; PROFILING(start_time, "<?php echo $wpName; ?> ", "LHS_lookup", "%d", totalNum); PROFILING(0.0, "HashTable", "fillrate", "%2.4f", HashTableSegment::globalFillRate*100.0); // Finish performance counters // Use the Set functionality in case we add additional counters later. PCounterList counterList; PCounter totalCnt("tpi lhs", totalNum, "<?php echo $wpName; ?> "); counterList.Append(totalCnt); PROFILING2_SET(counterList, "<?php echo $wpName; ?> "); int64_t hFillRate = int64_t(HashTableSegment::globalFillRate * 1000); PROFILING2_INSTANT("hfr", hFillRate, "global"); free (serializeHere); return 1; } //+{"kind":"WPF", "name":"LHS Lookup", "action":"end"} <?php }
function cgAccessAttributes($att_map) { ?> // extract values of attributes from streams <?php foreach ($att_map as $att => $qry) { ?> const <?php echo attType($att); ?> & <?php echo $att; ?> = <?php echo attData($att); ?> .GetCurrent(); <?php } ?> <?php }