function JoinLHS($wpName, $jDesc) { $rhsAttOrder = []; foreach ($jDesc->hash_RHS_attr as $attr) { $att = lookupAttribute($attr); $rhsAttOrder[$att->slot()] = $attr; } ksort($rhsAttOrder); $jDesc->hash_RHS_attr = $rhsAttOrder; ?> //+{"kind":"WPF", "name":"LHS Lookup", "action":"start"} extern "C" int JoinLHSWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { double start_time = global_clock.GetTime(); PROFILING2_START; // this is the area where all of the intermediate, serialized records are stored SerializedSegmentArray serializedSegments [NUM_SEGS]; // this is the area where all of the records are serialized to; // 10K bytes are initially used for this char *serializeHere = (char *) malloc (10000); // this is the output chunk Chunk output; // go to the work description and get the input chunk JoinLHSWorkDescription myWork; myWork.swap (workDescription); Chunk &input = myWork.get_chunkToProcess (); // get the waypoint ID from the chunk int wayPointID = myWork.get_wayPointID (); QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits ()); <?php cgAccessColumns($jDesc->attribute_queries_LHS, 'input', $wpName); ?> BStringIterator myInBStringIter; input.SwapBitmap (myInBStringIter); // start the iterators for the output columns for LHS; used only if stillShallow = 0 <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $queries) { ?> <?php echo attIteratorType($att); ?> <?php echo $att; ?> _Column_Out; <?php } /*foreach*/ ?> // these manage the output columns that come from the RHS (now stored in the hash table) <?php cgConstructColumns(array_keys($jDesc->attribute_queries_RHS_copy)); ?> // this is the ouput bitstring MMappedStorage myStore; Column bitmapOut (myStore); BStringIterator myOutBStringIter (bitmapOut, queriesToRun); // now we extract all of the hash table segments... after this, myEntries will hold them all HashTableView myView; myWork.get_centralHashTable ().EnterReader (myView); HashTableSegment myEntries[NUM_SEGS]; myView.ExtractAllSegments (myEntries); // this tells us that we are "still shallow"---not making a deep copy of the LHS atts to the output int stillShallow = 1; // the bitstring that will be exracted from the hash table QueryIDSet *bitstringRHS = 0; QueryIDSet existsTarget(<?php echo $jDesc->exists_target; ?> , true); QueryIDSet notExistsTarget(<?php echo $jDesc->not_exists_target; ?> , true); // these are all of the attribute values that come from the hash table... // for each att we need a pointer as well as a dummy value that the pointer will be set to by default <?php foreach ($jDesc->attribute_queries_RHS as $att => $queries) { ?> QueryIDSet <?php echo attQrys($att); ?> _RHS(<?php echo $queries; ?> , true); <?php echo attType($att); ?> <?php echo $att; ?> RHSShadow; <?php echo attType($att); ?> *<?php echo $att; ?> RHS = NULL; <?php echo attType($att); ?> <?php echo $att; ?> RHSobj; <?php } /*foreach*/ ?> // now actually try to match up all of the tuples! int totalNum = 0; while (!myInBStringIter.AtEndOfColumn ()) { // TBD, probably this is not working TBD // counts how many matches for this query int numHits = 0; // extract values of attributes from streams // now go through the LHS input atts one at a time and extract if it is needed by an active query // see which queries match up QueryIDSet curBits = myInBStringIter.GetCurrent (); curBits.Intersect (queriesToRun); QueryIDSet exists; // keeps track of the queries for which a match is found QueryIDSet oldBitstringLHS; // last value of bistringLHS // if the input query is not empty if (!curBits.IsEmpty ()) { totalNum++; // compute the hash for LHS HT_INDEX_TYPE hashValue = HASH_INIT; <?php foreach ($jDesc->LHS_keys as $att) { ?> hashValue = CongruentHash(Hash(<?php echo $att; ?> _Column.GetCurrent()), hashValue); <?php } /*foreach*/ ?> // figure out which of the hash buckets it goes into unsigned int index = WHICH_SEGMENT (hashValue); // now, go to that index and extract matching tuples! HT_INDEX_TYPE curSlot = WHICH_SLOT (hashValue); hashValue = curSlot; // this loops through all of the possible RHS hits while (1) { // this is the bitstring that will go in the output QueryIDSet bitstringLHS; // for safety (in case we look at a bitstring that spans multiple // entries that is not done being written by a concurrent writer) // empty out the inital bitstring ((QueryIDSet *) serializeHere)->Empty (); // give safe "shadow" values to all of the RHS attributes <?php foreach ($jDesc->hash_RHS_attr as $att) { ?> <?php echo $att; ?> RHS = &<?php echo $att; ?> RHSShadow; <?php } /*foreach*/ ?> // here we go through and extract the atts one at a time from the hash // table. Note that the atts must be extracted IN ORDER. That is, the // bitstring is first, followed by the att mapped to the lowerest column // position, followed by the att mapped to the next lowest, and so on. // The Extract function pulls an attribute out of the hash table... int lenSoFar = 0, dummy, done; int lastLen = myEntries[index].Extract (serializeHere, curSlot, hashValue, wayPointID, BITMAP, dummy, done); // if we cannot find a bitstring, there was no tuple here, and we are done if (lastLen == 0) { break; } // remember the bitstring bitstringRHS = (QueryIDSet *) serializeHere; lenSoFar += lastLen; // next look for other hashed attributes <?php foreach ($jDesc->hash_RHS_attr as $att) { ?> lastLen = myEntries[index].Extract (serializeHere + lenSoFar, curSlot, hashValue, wayPointID, <?php echo attSlot($att); ?> , dummy, done); // see if we got attribute if (lastLen > 0) { Deserialize(serializeHere + lenSoFar, <?php echo $att; ?> RHSobj); //<?php echo attOptimizedDeserialize($att, $att . "RHSobj", "serializeHere", "lenSoFar"); ?> ; <?php echo $att; ?> RHS = &<?php echo $att; ?> RHSobj; lenSoFar += lastLen; } else { FATALIF(<?php echo attQrys($att); ?> _RHS.Overlaps(*bitstringRHS), "Did not find attribute <?php echo $att; ?> in active RHS tuple"); } <?php } /*foreach*/ ?> // see if we have any query matches bitstringRHS->Intersect (curBits); QueryIDSet qBits; //printf("TPLLLLL: cust_acctbal = %f orders_custkey = %d cust_custkey = %d\n", *customer_c_acctbalRHS, orders_o_custkey_Column.GetCurrent(), *customer_c_custkeyRHS); <?php foreach ($jDesc->queries_attribute_comparison as $qClass) { ?> // See if any query in query class is eligible for this comparision qBits = QueryIDSet(<?php echo $qClass->qClass; ?> , true); qBits.Intersect(*bitstringRHS); if ( !qBits.IsEmpty () && <?php foreach ($qClass->att_pairs as $pair) { ?> *<?php echo $pair->rhs; ?> RHS == <?php echo $pair->lhs; ?> _Column.GetCurrent() && <?php } /*foreach pair*/ ?> 1 ) { bitstringLHS.Union (qBits); } <?php } /*foreach query class*/ ?> // if any of them hit... if (!bitstringLHS.IsEmpty ()) { exists.Union(bitstringLHS); numHits++; // see if we need to move from shallow to deep if (numHits == 2 && stillShallow) { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> <?php echo attData($att); ?> _Out.CreateDeepCopy (<?php echo attData($att); ?> ); <?php echo attData($att); ?> _Out.Insert (<?php echo attData($att); ?> .GetCurrent()); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> stillShallow = 0; } // now, add all of the outputs over... first deal with the LHS input atts // that get copied into output atts if (!stillShallow) { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> <?php echo attData($att); ?> _Out.Insert (<?php echo attData($att); ?> .GetCurrent()); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> } // now, deal with the output atts that come from the hash table <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $qrys) { ?> <?php echo attData($att); ?> _Out.Insert (*<?php echo $att; ?> RHS); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> // finally, set the bitmap. We are one element behind if (!oldBitstringLHS.IsEmpty()){ myOutBStringIter.Insert (oldBitstringLHS); myOutBStringIter.Advance (); } oldBitstringLHS=bitstringLHS; } // empty bistring } } // compute the true exist queries QueryIDSet tmp = existsTarget; tmp.Intersect(exists); tmp.Intersect(curBits); // not needed but I'm paranoid // compute the true not exits queries QueryIDSet tmp2 = curBits; tmp2.Intersect(notExistsTarget); tmp2.Difference(exists); // now put everything in bitstringLHS oldBitstringLHS.Union(tmp); oldBitstringLHS.Union(tmp2); if (!oldBitstringLHS.IsEmpty()){ myOutBStringIter.Insert (oldBitstringLHS); myOutBStringIter.Advance (); } // at this point, we are done trying to join this tuple... any join results have been // written to the output columns. Note that we don't have to advance in the output data // columns; if we are shallow, we don't touch the output columns. If we are not shallow, // if there were no results, we have nothing to write. HOWEVER, if we are shallow and // we did not get a match, we need to add an empty btstring if (stillShallow && numHits == 0) { <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $qrys) { ?> <?php echo attType($att); ?> tmp_<?php echo attData($att); ?> ; <?php echo attData($att); ?> _Out.Insert (tmp_<?php echo attData($att); ?> ); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> if (oldBitstringLHS.IsEmpty()){ // no not exist and no join match myOutBStringIter.Insert (oldBitstringLHS); myOutBStringIter.Advance (); } } // lastly, we need to advance in the INPUT tuples <?php foreach ($jDesc->attribute_queries_LHS as $att => $qrys) { ?> <?php echo attData($att); ?> .Advance(); <?php } /*foreach*/ ?> // advance the input bitstring myInBStringIter.Advance (); } // DONE! So construct the output tuple // if we are still shallow, put the original data into the output if (stillShallow) { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> Column col_<?php echo $att; ?> ; <?php echo attData($att); ?> .Done(col_<?php echo $att; ?> ); output.SwapColumn (col_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /*foreach*/ ?> } else { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> Column col_<?php echo $att; ?> ; <?php echo attData($att); ?> _Out.Done(col_<?php echo $att; ?> ); output.SwapColumn (col_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /*foreach*/ ?> } { <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $qrys) { ?> Column col_<?php echo $att; ?> ; <?php echo attData($att); ?> _Out.Done(col_<?php echo $att; ?> ); output.SwapColumn (col_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /*foreach*/ ?> } // put in the output bitmap myOutBStringIter.Done (); output.SwapBitmap (myOutBStringIter); // and give back the result ChunkContainer tempResult (output); tempResult.swap (result); PROFILING2_END; PROFILING(start_time, "<?php echo $wpName; ?> ", "LHS_lookup", "%d", totalNum); PROFILING(0.0, "HashTable", "fillrate", "%2.4f", HashTableSegment::globalFillRate*100.0); // Finish performance counters // Use the Set functionality in case we add additional counters later. PCounterList counterList; PCounter totalCnt("tpi lhs", totalNum, "<?php echo $wpName; ?> "); counterList.Append(totalCnt); PROFILING2_SET(counterList, "<?php echo $wpName; ?> "); int64_t hFillRate = int64_t(HashTableSegment::globalFillRate * 1000); PROFILING2_INSTANT("hfr", hFillRate, "global"); free (serializeHere); return 1; } //+{"kind":"WPF", "name":"LHS Lookup", "action":"end"} <?php }
function cgExtractColumnFragment($att, $chunk, $start, $end, $wpName) { ?> // extracting <?php echo $att; ?> : Column <?php echo attCol($att); ?> ; if (<?php echo attQrys($att); ?> .Overlaps(queriesToRun)){ <?php echo $chunk; ?> .SwapColumn(<?php echo attCol($att); ?> , <?php echo attSlot($att); ?> ); if (! <?php echo attCol($att); ?> .IsValid()){ FATAL("Error: Column <?php echo $att; ?> not found in <?php echo $wpName; ?> \n"); } } <?php echo attIteratorType($att); ?> <?php echo attData($att); ?> (<?php echo attCol($att); ?> /*, 8192*/, <?php echo $start; ?> , <?php echo $end; ?> ); <?php }
function SelectionGenerate($wpName, $queries, $attMap) { //echo PHP_EOL . '/*' . PHP_EOL; //print_r($wpName); //print_r($queries); //print_r($attMap); //echo PHP_EOL . '*/' . PHP_EOL; ?> // module specific headers to allow separate compilation #include "GLAData.h" #include "Errors.h" //+{"kind":"WPF", "name":"Pre-Processing", "action":"start"} extern "C" int SelectionPreProcessWorkFunc_<?php echo $wpName; ?> (WorkDescription& workDescription, ExecEngineData& result) { SelectionPreProcessWD myWork; myWork.swap(workDescription); QueryExitContainer& queries = myWork.get_whichQueryExits(); QueryToGLASContMap & requiredStates = myWork.get_requiredStates(); QueryToGLAStateMap constStates; <?php cgDeclareQueryIDs($queries); ?> <?php foreach ($queries as $query => $info) { $gf = $info['gf']; if (!is_null($gf) && $gf->has_state()) { $state = $gf->state(); if ($state->configurable()) { $carg = $info['cargs']; echo ' // JSON Configuration for query ' . queryName($query) . PHP_EOL; $carg->init(); echo PHP_EOL; } // if gf const state is configurable } // if gf has state } //foreach query ?> FOREACH_TWL(iter, queries) { <?php foreach ($queries as $query => $val) { ?> if( iter.query == <?php echo queryName($query); ?> ) { <?php if ($val['gf'] !== null) { // This is a generalized filter $gf = $val['gf']; $given_states = $val['states']; if ($gf->has_state()) { $cstArgs = []; $state = $gf->state(); // If the state is configurable, give it the JSON carg if ($state->configurable()) { $carg = $query['cargs']; $cstArgs[] = $carg->name(); } // if gf state is configurable if (\count($given_states) > 0) { ?> FATALIF(!requiredStates.IsThere(<?php echo queryName($query); ?> ), "No required states received for query that declared required states"); GLAStateContainer& givenStates = requiredStates.Find(<?php echo queryName($query); ?> ); givenStates.MoveToStart(); GLAPtr reqTemp; <?php foreach ($givenStates as $gs) { $cstArgs[] = $gs->name(); ?> // Extract state from waypoint[<?php echo $gs->waypoint(); ?> ] <?php echo $gs->type(); ?> * <?php echo $gs->name(); ?> = nullptr; reqTemp.Swap(givenStates.Current()); FATALIF( reqTemp.get_glaType() != <?php echo $gs->type()->cHash(); ?> , "Got different type than expected for required state of type <?php echo $gs > type(); ?> "); <?php echo $gs->name(); ?> = (<?php echo $gs->type(); ?> *) reqTemp.get_glaPtr(); reqTemp.swap(givenStates.Current()); givenStates.Advance(); <?php } // foreach given state } // if we have given states $cstStr = \count($cstArgs) > 0 ? '(' . implode(', ', $cstArgs) . ')' : ''; ?> <?php echo $state; ?> * temp = new <?php echo $state; echo $cstStr; ?> ; GLAPtr newPtr( <?php echo $state->cHash(); ?> , (void *) temp ); QueryID qryID = <?php echo queryName($query); ?> ; constStates.Insert(qryID, newPtr); <?php } // if gf has state } // if( $val['gf'] !== null ) ?> } // if <?php echo queryName($query); ?> is current query <?php } // foreach query ?> } END_FOREACH; SelectionPreProcessRez myRez( constStates ); myRez.swap(result); return WP_PREPROCESSING; // for PreProcess } //+{"kind":"WPF", "name":"Pre-Processing", "action":"end"} //+{"kind":"WPF", "name":"Process Chunk", "action":"start"} extern "C" int SelectionProcessChunkWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { // go to the work description and get the input chunk SelectionProcessChunkWD myWork; myWork.swap (workDescription); Chunk &input = myWork.get_chunkToProcess (); QueryToGLAStateMap& constStates = myWork.get_constStates(); PROFILING2_START; QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits ()); <?php cgDeclareQueryIDs($queries); cgAccessColumns($attMap, 'input', $wpName); // Declare the constants needed by the filters and synth expressions. foreach ($queries as $query => $val) { ?> // Constants for query <?php echo queryName($query); ?> : <?php $filters = $val['filters']; $synths = $val['synths']; cgDeclareConstants($filters); cgDeclareConstants($synths); } // foreach query ?> // prepare bitstring iterator Column inBitCol; BStringIterator queries; input.SwapBitmap (queries); // creating storage for syhthesized attributes <?php foreach ($queries as $query => $val) { $synList = $val['synths']; foreach ($synList as $att => $syn) { ?> MMappedStorage <?php echo attStorage($att); ?> ; Column <?php echo attCol($att); ?> (<?php echo attStorage($att); ?> ); <?php echo attIteratorType($att); ?> colI_<?php echo $att; ?> (<?php echo attCol($att); ?> ); <?php echo attType($att); ?> <?php echo $att; ?> ; <?php } // foreach synthesized attribute } // foreach query ?> <?php foreach ($queries as $query => $val) { $givenStates = $val['states']; $gf = $val['gf']; $cargs = $val['cargs']; grokit_assert($gf !== null || count($givenStates) == 0, 'We were given states for query ' . $query . ' when we have no GF!'); if (!is_null($gf) && $gf->has_state()) { $state = $gf->state(); $stateName = 'cst_state_' . queryName($query); $constMod = $state->mutable() ? '' : 'const '; ?> // Extracting constant state for query <?php echo queryName($query); ?> FATALIF(!constStates.IsThere(<?php echo queryName($query); ?> ), "No constant state found for query <?php echo queryName($query); ?> ."); <?php echo $constMod; echo $state; ?> * <?php echo $stateName; ?> = nullptr; { GLAState& curState = constStates.Find(<?php echo queryName($query); ?> ); GLAPtr tmp; tmp.swap(curState); FATALIF( tmp.get_glaType() != <?php echo $state->cHash(); ?> , "Got different type than expected for constant state of type <?php echo $state; ?> "); <?php echo $stateName; ?> = (<?php echo $constMod; echo $state; ?> *) tmp.get_glaPtr(); tmp.swap(curState); } <?php } // if gf requires constant state if ($gf !== null) { $ctrArgs = []; if ($gf->configurable()) { echo ' // JSON initialiser for query ' . queryName($query) . PHP_EOL; $cargs->init(); echo PHP_EOL; $ctrArgs[] = $cargs->name(); } if ($gf->has_state()) { $ctrArgs[] = '*' . $stateName; } $ctrStr = \count($ctrArgs) > 0 ? '(' . implode(', ', $ctrArgs) . ')' : ''; ?> // Construct GF for query <?php echo queryName($query); ?> <?php echo $gf->value(); ?> <?php echo queryName($query); ?> _state<?php echo $ctrStr; ?> ; <?php } // if we have a GF } // foreach query ?> MMappedStorage bitStore; Column outBitCol(bitStore); BStringIterator outQueries (outBitCol, queriesToRun); #ifdef PER_QUERY_PROFILE <?php foreach ($queries as $query => $val) { ?> int64_t n_tuples_<?php echo queryName($query); ?> = 0; <?php } // foreach query ?> #endif // PER_QUERY_PROFILE int64_t numTuples = 0; while (!queries.AtEndOfColumn ()) { ++numTuples; QueryIDSet qry; qry = queries.GetCurrent(); qry.Intersect(queriesToRun); queries.Advance(); //selection code for all the predicates <?php cgAccessAttributes($attMap); foreach ($queries as $query => $val) { $gf = $val['gf']; $filters = $val['filters']; $synths = $val['synths']; $stateName = queryName($query) . '_state'; $filterVals = array_map(function ($expr) { return '(' . $expr . ')'; }, $filters); if ($gf === null) { // Simple set of expressions. if (\count($filterVals) > 0) { $selExpr = implode(' && ', $filterVals); } else { $selExpr = 'true'; } } else { // We have a GF $selExpr = "{$stateName}.Filter(" . implode(', ', $filterVals) . ")"; } ?> // do <?php echo queryName($query); ?> : <?php foreach ($synths as $att => $syn) { ?> <?php echo attType($att); ?> <?php echo $att; ?> ; <?php } // foreach synthesized attribute ?> if( qry.Overlaps(<?php echo queryName($query); ?> ) ) { #ifdef PER_QUERY_PROFILE ++numTuples_<?php echo queryName($query); ?> ; #endif // PER_QUERY_PROFILE <?php cgDeclarePreprocessing($filters, 2); ?> if( <?php echo $selExpr; ?> ) { // compute synthesized <?php cgDeclarePreprocessing($synths, 3); foreach ($synths as $att => $expr) { ?> <?php echo $att; ?> = <?php echo $expr; ?> ; <?php } //foreach synthesized attribute ?> } else { qry.Difference(<?php echo queryName($query); ?> ); } } <?php foreach ($synths as $att => $syn) { ?> colI_<?php echo $att; ?> .Insert(<?php echo $att; ?> ); colI_<?php echo $att; ?> .Advance(); <?php } // foreach synthesized attribute } // foreach query ?> outQueries.Insert(qry); outQueries.Advance(); <?php cgAdvanceAttributes($attMap); ?> } // while we still have tuples remaining // finally, if there were any results, put the data back in the chunk <?php cgPutbackColumns($attMap, 'input', $wpName); foreach ($queries as $query => $val) { $synths = $val['synths']; ?> if (<?php echo queryName($query); ?> .Overlaps(queriesToRun)) { <?php foreach ($synths as $att => $expr) { ?> colI_<?php echo $att; ?> .Done(<?php echo attCol($att); ?> ); input.SwapColumn(<?php echo attCol($att); ?> , <?php echo attSlot($att); ?> ); <?php } //foreach synthesized attribute ?> } // If <?php echo queryName($query); ?> overlaps queriesToRun <?php } // foreach query ?> // put in the output bitmap outQueries.Done (); input.SwapBitmap (outQueries); // Finish performance counters PROFILING2_END; PCounterList counterList; PCounter totalCnt("tpi", numTuples, "<?php echo $wpName; ?> "); counterList.Append(totalCnt); PCounter tplOutCnt("tpo", numTuples, "<?php echo $wpName; ?> "); counterList.Append(tplOutCnt); #ifdef PER_QUERY_PROFILE <?php foreach ($queries as $query => $val) { $filters = $val['filters']; ?> if( <?php echo queryName($query); ?> .Overlaps(queriesToRun)) { PCounter cnt("<?php echo queryName($query); ?> ", numTuples_<?php echo queryName($query); ?> , "<?php echo $wpName; ?> "); counterList.Append(cnt); } <?php } // foreach query ?> #endif // PER_QUERY_PROFILE PROFILING2_SET(counterList, "<?php echo $wpName; ?> "); ChunkContainer tempResult (input); tempResult.swap (result); return WP_PROCESS_CHUNK; // For Process Chunk } //+{"kind":"WPF", "name":"Process Chunk", "action":"end"} <?php }
function JoinMerge($wpName, $jDesc) { ?> #define USE_PQ #ifdef USE_PQ //#include <algorithm> #include <vector> #else #include <vector> #include<set> #endif #include "WorkDescription.h" #include "ExecEngineData.h" #include "Column.h" #include "ColumnIterator.cc" #include "MMappedStorage.h" #include "BString.h" #include "BStringIterator.h" #include "HashTableMacros.h" #include <string.h> using namespace std; #define MAX_HASH 0xffffffffffff #define MAX_NUM_CHUNKS 128 // maximum number of chunks we can have for linear scan solution #ifndef USE_PQ struct HashWrapper { HashWrapper() : hash(MAX_HASH), chunkNo(0) {} __uint64_t hash:48; __uint64_t chunkNo:16; }; #endif <?php // Macro to find minimum value in a list // this is the loop that finds the best entry using linear scan // # minIndex$1 is set at the best position // in put assumed in hashes$1 // Arguments: // $side is "LHS" or "RHS" $FIND_MIN = function ($side) { ?> best<?php echo $side; ?> = MAX_HASH; for (int i=0; i < num<?php echo $side; ?> Chunks; i++) { HT_INDEX_TYPE hash = hashes<?php echo $side; ?> [i]; // current value minIndex<?php echo $side; ?> = (best<?php echo $side; ?> > hash) ? i : minIndex<?php echo $side; ?> ; best<?php echo $side; ?> = (best<?php echo $side; ?> > hash) ? hash: best<?php echo $side; ?> ; } // remove this after debugging FATALIF(best<?php echo $side; ?> != MAX_HASH && WHICH_SEGMENT(best<?php echo $side; ?> ) !=0, "Hash out of range %llu", (unsigned long long) best<?php echo $side; ?> ); <?php }; ?> /* void check_correctness(int start, int end, BStringIterator& biter, ColumnIterator<__uint64_t>& hiter) { assert(start == end); int s = 0; int e = 262143; if (start > 0) { s = start * 262143 + (start-1); e = s + 262143 + 1; } int tuple = 0; while (!biter.AtEndOfColumn()) { if (!(s < hiter.GetCurrent() && hiter.GetCurrent() <= e)) printf("\n ------- (%d,%d)(%d,%d) currVal = %d", start,end,s,e,hiter.GetCurrent()); assert(s <= hiter.GetCurrent() && hiter.GetCurrent() <= e); hiter.Advance(); biter.Advance(); tuple++; } printf("\n num tuples = %d", tuple); } */ #ifndef USE_PQ // make heap comparator struct compare_key { bool operator()( const HashWrapper lhs, const HashWrapper rhs ) { return (rhs.hash < lhs.hash); } }; struct compare_key_rev { bool operator()( const HashWrapper lhs, const HashWrapper rhs ) { return (lhs.hash < rhs.hash); } }; #endif <?php // More macros // Macro to Advance() columns // $side is LHS or RHS // $chunkNum is the chunk number // $sde is Lhs or Rhs $ADVANCE_CALL = function ($side, $chunkNum, $sde) use($jDesc) { $list = "attribute_queries_" . $side; foreach ($jDesc->{$list} as $att => $queries) { ?> col<?php echo $side; ?> IterVec_<?php echo $att; ?> [<?php echo $chunkNum; ?> ].Advance(); <?php } ?> myInBStringIter<?php echo $sde; ?> Vec[<?php echo $chunkNum; ?> ].Advance(); col<?php echo $side; ?> IterVecHash[<?php echo $chunkNum; ?> ].Advance(); <?php }; $ADVANCE_SEARCH_CALL = function ($col, $index, $side, $sde) use($jDesc, $FIND_MIN) { $list = "attribute_queries_" . $side; ?> //while (!myInBStringIter$4Vec[$2].AtEndOfColumn()) if (!myInBStringIter<?php echo $sde; ?> Vec[<?php echo $index; ?> ].AtEndOfColumn()) { //while (!<?php echo $col; ?> [<?php echo $index; ?> ].AtUnwrittenByte()) // Now find the first tuple for which query is active //Bitstring curBits(myInBStringIterLhsVec[<?php echo $index; ?> ].GetCurrent ()); //curBits.Intersect (queriesToRun); // If tuple has some active query, fill the heap //if (!curBits.IsEmpty()) totalguys<?php echo $side; ?> ++; #ifdef USE_PQ // set new value in place of old value > // ALIN: Please fix the line below, I don't know what the $1 is supposed to be ' hashes<?php echo $side; ?> [minIndex<?php echo $side; ?> ] = <?php echo $col; ?> [minIndex<?php echo $side; ?> ].GetCurrent(); #else HashWrapper w; w.hash = <?php echo $col; ?> [<?php echo $index; ?> ].GetCurrent(); // assume first column of each chunk is hash w.chunkNo = <?php echo $index; ?> ; minHeap<?php echo $side; ?> .insert (w); #endif /* break; // while loop } else { // If this tuple don't have active query, advance all the columns of this chunk ' // including Bitstring column <?php foreach ($jDesc->{$list} as $att) { ?> col<?php echo $side; ?> IterVec_<?php echo $att; ?> [<?php echo $index; ?> ].Advance(); <?php } /* foreach */ ?> myInBStringIter<?php echo $sde; ?> Vec[<?php echo $index; ?> ].Advance(); col<?php echo $side; ?> IterVecHash[<?php echo $index; ?> ].Advance(); } */ } #ifdef USE_PQ <?php $FIND_MIN($side); ?> #endif <?php }; /* ADVANCE_SEARCH_CALL */ ?> /* This takes two sorted list of chunks, lhs list and rhs list, and do the sort merge join. It maintaines a min heap of hash values on top of each list and virtually now we have just 2 lists to do sort merge. When some value matches from LHS and RHS heap, we need to checkpoint all the iterators of all the columns of all the RHS chunks including storing the heap, so that we can restore them all if another consecutive LHS value matches. */ //+{"kind":"WPF", "name":"Merge", "action":"start"} extern "C" int JoinMergeWorkFunc_<?php echo $wpName; ?> _writer (WorkDescription &workDescription, ExecEngineData &result) { int totalguysLHS = 0; int totalguysRHS = 0; int total = 0; int totalhash = 0; double start_time = global_clock.GetTime(); // get the input LHS and RHS chunk container from work descripton JoinMergeWorkDescription myWork; myWork.swap (workDescription); ContainerOfChunks &inputLHSList = myWork.get_chunksLHS (); ContainerOfChunks &inputRHSList = myWork.get_chunksRHS (); int start = myWork.get_start(); int end = myWork.get_end(); // get the number of chunks in each list to create vectors of this length int numLHSChunks = inputLHSList.Length(); int numRHSChunks = inputRHSList.Length(); // get the waypoint identifier unsigned int wayPointID = myWork.get_wayPointID ().GetID(); vector<ColumnIterator<__uint64_t> > colLHSIterVecHash; vector<ColumnIterator<__uint64_t> > colRHSIterVecHash; colLHSIterVecHash.resize(numLHSChunks); colRHSIterVecHash.resize(numRHSChunks); // get the input bitmap out of the LHS input chunks vector<BStringIterator> myInBStringIterLhsVec; myInBStringIterLhsVec.resize(numLHSChunks); // set the vector size assert(inputLHSList.Length()); inputLHSList.MoveToStart(); int i = 0; while (inputLHSList.RightLength()) { BStringIterator myInBStringIter; inputLHSList.Current().SwapBitmap (myInBStringIter); myInBStringIterLhsVec[i].swap (myInBStringIter); myInBStringIterLhsVec[i].SetFragmentRange(start, end); // get the hash now Column col_hash; inputLHSList.Current().SwapHash(col_hash); assert(col_hash.IsValid()); ColumnIterator<__uint64_t> iter (col_hash, start, end); colLHSIterVecHash[i].swap(iter); //check_correctness(start, end, myInBStringIterLhsVec[i], colLHSIterVecHash[i]); inputLHSList.Advance (); i++; } // get the input bitmap out of the RHS input chunks vector<BStringIterator> myInBStringIterRhsVec; myInBStringIterRhsVec.resize(numRHSChunks); // set the vector size assert(inputRHSList.Length()); inputRHSList.MoveToStart(); i = 0; while (inputRHSList.RightLength()) { BStringIterator myInBStringIter; inputRHSList.Current().SwapBitmap(myInBStringIter); myInBStringIterRhsVec[i].swap (myInBStringIter); myInBStringIterRhsVec[i].SetFragmentRange(start, end); //fprintf(stderr, "\nRHS %d", myInBStringIterRhsVec[i].GetNumTuples()); // get the hash now Column col_hash; inputRHSList.Current().SwapHash(col_hash); assert(col_hash.IsValid()); ColumnIterator<__uint64_t> iter (col_hash, start, end); colRHSIterVecHash[i].swap(iter); //check_correctness(start, end, myInBStringIterRhsVec[i], colRHSIterVecHash[i]); inputRHSList.Advance (); i++; } // get all of the queries that are active here QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits ()); // this is the output chunk Chunk output; // create output iterators <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $queries) { ?> MMappedStorage store_<?php echo $att; ?> ; Column col_<?php echo $att; ?> (store_<?php echo $att; ?> ); <?php echo attIteratorType($att); ?> colLHSOutIter_<?php echo $att; ?> (col_<?php echo $att; ?> ); <?php } /*foreach*/ ?> <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $queries) { ?> MMappedStorage store_<?php echo $att; ?> ; Column col_<?php echo $att; ?> (store_<?php echo $att; ?> ); <?php echo attIteratorType($att); ?> colRHSOutIter_<?php echo $att; ?> (col_<?php echo $att; ?> ); <?php } /*foreach*/ ?> // Create output BitString MMappedStorage myStore; Column bitmapOut (myStore); BStringIterator myOutBStringIter (bitmapOut, queriesToRun); // Build input iterators vectors first // Define only those attributes iterator which are required by some query // vector contains same type of column in all chunks, each vector index represents // a chunk <?php foreach ($jDesc->attribute_queries_LHS as $att => $queries) { ?> vector< <?php echo attIteratorType($att); ?> > colLHSIterVec_<?php echo $att; ?> ; colLHSIterVec_<?php echo $att; ?> .resize(numLHSChunks); <?php } /* foreach */ ?> <?php foreach ($jDesc->attribute_queries_RHS as $att => $queries) { ?> vector< <?php echo attIteratorType($att); ?> > colRHSIterVec_<?php echo $att; ?> ; colRHSIterVec_<?php echo $att; ?> .resize(numRHSChunks); <?php } /* foreach */ ?> // Extract columns now. // This extracts columns if there is any query for it in this WP i = 0; <?php foreach ($jDesc->attribute_queries_LHS as $att => $queries) { ?> { i = 0; inputLHSList.MoveToStart(); while (inputLHSList.RightLength()) { // WP node queries intersect Translator provided queries QueryIDSet <?php echo attQrys($att); ?> (<?php echo $queries; ?> , true); <?php cgExtractColumnFragment($att, "inputLHSList.Current()", "start", "end", $wpName); ?> if (<?php echo attQrys($att); ?> .Overlaps(queriesToRun)){ colLHSIterVec_<?php echo $att; ?> [i].swap(<?php echo attData($att); ?> ); } inputLHSList.Advance (); i++; } } <?php } /* foreach */ ?> <?php foreach ($jDesc->attribute_queries_RHS as $att => $queries) { ?> { i = 0; inputRHSList.MoveToStart(); while (inputRHSList.RightLength()) { // WP node queries intersect Translator provided queries QueryIDSet <?php echo attQrys($att); ?> (<?php echo $queries; ?> , true); <?php cgExtractColumnFragment($att, "inputRHSList.Current()", "start", "end", $wpName); ?> if (<?php echo attQrys($att); ?> .Overlaps(queriesToRun)){ colRHSIterVec_<?php echo $att; ?> [i].swap(<?php echo attData($att); ?> ); } inputRHSList.Advance (); i++; } } <?php } /* foreach */ ?> // Here we start the sort merge join // Create priority queue for hash column values #ifdef USE_PQ HT_INDEX_TYPE hashesLHS[MAX_NUM_CHUNKS]; HT_INDEX_TYPE hashesRHS[MAX_NUM_CHUNKS]; FATALIF(numLHSChunks >= MAX_NUM_CHUNKS, "Too many chunks for LHS in Merge"); FATALIF(numRHSChunks >= MAX_NUM_CHUNKS, "Too many chunks for RHS in Merge"); HT_INDEX_TYPE bestLHS = MAX_HASH; HT_INDEX_TYPE bestRHS = MAX_HASH; int minIndexLHS = -1; // this indicates the chunk that is the best at this point // this invariant is maintained throught the code int minIndexRHS = -1; #else multiset<HashWrapper, compare_key_rev> minHeapLHS; multiset<HashWrapper, compare_key_rev> minHeapRHS; #endif // Fill the first value of each LHS chunk in LHS heap for (i = 0; i < numLHSChunks; i++) { #ifdef USE_PQ minIndexLHS = i; #endif <?php $ADVANCE_SEARCH_CALL("colLHSIterVecHash", "i", "LHS", "Lhs"); ?> } // Fill the first value of each RHS chunk in RHS heap for (i = 0; i < numRHSChunks; i++) { #ifdef USE_PQ minIndexRHS = i; #endif <?php $ADVANCE_SEARCH_CALL("colRHSIterVecHash", "i", "RHS", "Rhs"); ?> } // Now pick one of each heap and keep comparing until one of the heap is exhausted #ifdef USE_PQ /* while (hashesLHS[minIndexLHS] != MAX_HASH && hashesRHS[minIndexRHS] != MAX_HASH) { HT_INDEX_TYPE wl = hashesLHS[minIndexLHS]; HT_INDEX_TYPE wr = hashesRHS[minIndexRHS]; -- INEFFICIENT */ while (bestLHS != MAX_HASH && bestRHS != MAX_HASH) { HT_INDEX_TYPE wl = bestLHS; HT_INDEX_TYPE wr = bestRHS; #else while (!minHeapLHS.empty() && !minHeapRHS.empty()) { HashWrapper wlT = *(minHeapLHS.begin()); HashWrapper wrT = *(minHeapRHS.begin()); HT_INDEX_TYPE wl = wlT.hash; HT_INDEX_TYPE wr = wrT.hash; int minIndexLHS = wlT.chunkNo; int minIndexRHS = wrT.chunkNo; #endif if (wl < wr) { // printf("\n Hash val not found is = %ld, segment = %ld, slot = %ld", wl, WHICH_SEGMENT (wl), WHICH_SLOT (wl)); fflush(stdout); assert(0); // erase the minimum element #ifdef USE_PQ hashesLHS[minIndexLHS] = MAX_HASH; #else assert (!minHeapLHS.empty()); minHeapLHS.erase(minHeapLHS.begin()); #endif // here advance all columns of LHS of chunk number wl.chunkNo <?php $ADVANCE_CALL("LHS", "minIndexLHS", "Lhs"); $ADVANCE_SEARCH_CALL("colLHSIterVecHash", "minIndexLHS", "LHS", "Lhs"); ?> } else if (wl > wr) { // erase the minimum element #ifdef USE_PQ hashesRHS[minIndexRHS] = MAX_HASH; #else assert (!minHeapRHS.empty()); minHeapRHS.erase(minHeapRHS.begin()); #endif // here advance all columns of RHS of chunk number wr.chunkNo <?php $ADVANCE_CALL("RHS", "minIndexRHS", "Rhs"); $ADVANCE_SEARCH_CALL("colRHSIterVecHash", "minIndexRHS", "RHS", "Rhs"); ?> } else { // (wl == wr) HT_INDEX_TYPE matchingHash = wl; // Save checkpoint for all rhs columns before incrementing both chunks for (int chk = 0; chk < numRHSChunks; chk++) { <?php foreach ($jDesc->attribute_queries_RHS as $att => $queries) { ?> colRHSIterVec_<?php echo $att; ?> [chk].CheckpointSave(); <?php } /* foreach */ ?> myInBStringIterRhsVec[chk].CheckpointSave(); colRHSIterVecHash[chk].CheckpointSave(); } // Also save the state of the heap #ifdef USE_PQ HT_INDEX_TYPE hashesRHS_copy[MAX_NUM_CHUNKS]; memcpy(hashesRHS_copy, hashesRHS, numRHSChunks*sizeof(HT_INDEX_TYPE)); int minIndexRHS_copy = minIndexRHS; #else multiset<HashWrapper, compare_key_rev> minHeapRHSCheckpoint(minHeapRHS); #endif #ifdef USE_PQ while (bestLHS != MAX_HASH) #else while (!minHeapLHS.empty()) // break from this if 2 consecutive LHS mismatches #endif { #ifdef USE_PQ // HT_INDEX_TYPE wl1 = hashesLHS[minIndexLHS]; HT_INDEX_TYPE wl1 = bestLHS; #else HashWrapper wl1T = *(minHeapLHS.begin()); HT_INDEX_TYPE wl1 = wl1T.hash; int minIndexLHS = wl1T.chunkNo; #endif if (wl1 != matchingHash) { // next LHS dont match to previous LHS hash, first time always match break; } else { // restore everything // restore the RHS Column iterators to original value for (int chk = 0; chk < numRHSChunks; chk++) { <?php foreach ($jDesc->attribute_queries_RHS as $att => $queries) { ?> colRHSIterVec_<?php echo $att; ?> [chk].CheckpointRestore(); <?php } /* foreach */ ?> myInBStringIterRhsVec[chk].CheckpointRestore(); colRHSIterVecHash[chk].CheckpointRestore(); } // restore the original heap state #ifdef USE_PQ memcpy(hashesRHS, hashesRHS_copy, numRHSChunks*sizeof(HT_INDEX_TYPE)); minIndexRHS = minIndexRHS_copy; bestRHS = hashesRHS[minIndexRHS_copy]; #else minHeapRHS.clear(); minHeapRHS = minHeapRHSCheckpoint; #endif } #ifdef USE_PQ // while (hashesRHS[minIndexRHS] != MAX_HASH) { while (bestRHS != MAX_HASH) { #else while (!minHeapRHS.empty()) { #endif #ifdef USE_PQ // HT_INDEX_TYPE wr1 = hashesRHS[minIndexRHS]; HT_INDEX_TYPE wr1 = bestRHS; #else HashWrapper wr1T = *(minHeapRHS.begin()); HT_INDEX_TYPE wr1 = wr1T.hash; int minIndexRHS = wr1T.chunkNo; #endif if (wl1 == wr1) { // first one will obviously match as it matched before // Merge all columns here for wl1.chunkNo and wr1.chunkNo after matching attributes // Make sure both of their bitstrings intersect Bitstring rez = queriesToRun; rez.Intersect (myInBStringIterRhsVec[minIndexRHS].GetCurrent ()); rez.Intersect (myInBStringIterLhsVec[minIndexLHS].GetCurrent ()); // This contains union of all translater queries Bitstring uni = 0; Bitstring qBits; totalhash++; // Do the actual comparision bool anyOneMatch = false; <?php foreach ($jDesc->queries_attribute_comparison as $qClass) { ?> // See if any query in query class is eligible for this comparision qBits = QueryIDSet(<?php echo $qClass->qClass; ?> , true); qBits.Intersect(rez); if ( /* !qBits.IsEmpty () && */ <?php foreach ($qClass->att_pairs as $pair) { ?> colLHSIterVec_<?php echo $pair->lhs; ?> [minIndexLHS].GetCurrent() == colRHSIterVec_<?php echo $pair->rhs; ?> [minIndexRHS].GetCurrent() && <?php } /*foreach pair*/ ?> 1 ) { anyOneMatch = true; uni.Union (qBits); } <?php } /* foreach qClass */ ?> if (anyOneMatch) { // fill the output iterators <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $queries) { ?> //if (rez.Overlaps(<?php echo $queries; ?> )) { Should we check translator queries? colLHSOutIter_<?php echo $att; ?> .Insert(colLHSIterVec_<?php echo $att; ?> [minIndexLHS].GetCurrent()); colLHSOutIter_<?php echo $att; ?> .Advance(); //} <?php } /* foreach */ ?> <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $queries) { ?> //if (rez.Overlaps(<?php echo $queries; ?> )) { Should we check translator queries? colRHSOutIter_<?php echo $att; ?> .Insert(colRHSIterVec_<?php echo $att; ?> [minIndexRHS].GetCurrent()); colRHSOutIter_<?php echo $att; ?> .Advance(); //} <?php } /* foreach */ ?> myOutBStringIter.Insert (uni); total++; myOutBStringIter.Advance (); } else { //Bitstring b(0, true); //myOutBStringIter.Insert (b); //myOutBStringIter.Advance (); } // erase the minimum element #ifdef USE_PQ hashesRHS[minIndexRHS] = MAX_HASH; #else minHeapRHS.erase(minHeapRHS.begin()); #endif // here advance all columns of RHS of chunk number wr1.chunkNo <?php $ADVANCE_CALL("RHS", "minIndexRHS", "Rhs"); $ADVANCE_SEARCH_CALL("colRHSIterVecHash", "minIndexRHS", "RHS", "Rhs"); ?> } else { break; } } // erase the minimum element #ifdef USE_PQ hashesLHS[minIndexLHS] = MAX_HASH; #else minHeapLHS.erase(minHeapLHS.begin()); #endif // here advance all columns of LHS of chunk number wl1.chunkNo <?php $ADVANCE_CALL("LHS", "minIndexLHS", "Lhs"); $ADVANCE_SEARCH_CALL("colLHSIterVecHash", "minIndexLHS", "LHS", "Lhs"); ?> } } } // fill the output iterators <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $queries) { ?> Column collhs_<?php echo $att; ?> ; colLHSOutIter_<?php echo $att; ?> .Done(collhs_<?php echo $att; ?> ); output.SwapColumn (collhs_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /* foreach */ foreach ($jDesc->attribute_queries_RHS_copy as $att => $queries) { ?> Column colrhs_<?php echo $att; ?> ; colRHSOutIter_<?php echo $att; ?> .Done(colrhs_<?php echo $att; ?> ); output.SwapColumn (colrhs_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /* foreach */ ?> //myOutBStringIter.Done (bitmapOut); myOutBStringIter.Done (); // printf("\nTuples %d %d %d %d %d", myOutBStringIter.GetNumTuples(), total, totalguysLHS, totalguysRHS, totalhash); // fflush(stdout); PROFILING(start_time, "<?php echo $wpName; ?> ", "Merge", "%d\t%d", totalhash, total); //output.SwapBitmap (bitmapOut); output.SwapBitmap (myOutBStringIter); /* MMappedStorage st; Column co (st); Bitstring patt(0xf, true); BStringIterator iter(co, patt, 0); output.SwapBitmap(iter); */ // and give back the result ChunkContainer tempResult (output); tempResult.swap (result); return 0; // have to return something } //+{"kind":"WPF", "name":"Merge", "action":"end"} <?php }