function JoinLHS($wpName, $jDesc) { $rhsAttOrder = []; foreach ($jDesc->hash_RHS_attr as $attr) { $att = lookupAttribute($attr); $rhsAttOrder[$att->slot()] = $attr; } ksort($rhsAttOrder); $jDesc->hash_RHS_attr = $rhsAttOrder; ?> //+{"kind":"WPF", "name":"LHS Lookup", "action":"start"} extern "C" int JoinLHSWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { double start_time = global_clock.GetTime(); PROFILING2_START; // this is the area where all of the intermediate, serialized records are stored SerializedSegmentArray serializedSegments [NUM_SEGS]; // this is the area where all of the records are serialized to; // 10K bytes are initially used for this char *serializeHere = (char *) malloc (10000); // this is the output chunk Chunk output; // go to the work description and get the input chunk JoinLHSWorkDescription myWork; myWork.swap (workDescription); Chunk &input = myWork.get_chunkToProcess (); // get the waypoint ID from the chunk int wayPointID = myWork.get_wayPointID (); QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits ()); <?php cgAccessColumns($jDesc->attribute_queries_LHS, 'input', $wpName); ?> BStringIterator myInBStringIter; input.SwapBitmap (myInBStringIter); // start the iterators for the output columns for LHS; used only if stillShallow = 0 <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $queries) { ?> <?php echo attIteratorType($att); ?> <?php echo $att; ?> _Column_Out; <?php } /*foreach*/ ?> // these manage the output columns that come from the RHS (now stored in the hash table) <?php cgConstructColumns(array_keys($jDesc->attribute_queries_RHS_copy)); ?> // this is the ouput bitstring MMappedStorage myStore; Column bitmapOut (myStore); BStringIterator myOutBStringIter (bitmapOut, queriesToRun); // now we extract all of the hash table segments... after this, myEntries will hold them all HashTableView myView; myWork.get_centralHashTable ().EnterReader (myView); HashTableSegment myEntries[NUM_SEGS]; myView.ExtractAllSegments (myEntries); // this tells us that we are "still shallow"---not making a deep copy of the LHS atts to the output int stillShallow = 1; // the bitstring that will be exracted from the hash table QueryIDSet *bitstringRHS = 0; QueryIDSet existsTarget(<?php echo $jDesc->exists_target; ?> , true); QueryIDSet notExistsTarget(<?php echo $jDesc->not_exists_target; ?> , true); // these are all of the attribute values that come from the hash table... // for each att we need a pointer as well as a dummy value that the pointer will be set to by default <?php foreach ($jDesc->attribute_queries_RHS as $att => $queries) { ?> QueryIDSet <?php echo attQrys($att); ?> _RHS(<?php echo $queries; ?> , true); <?php echo attType($att); ?> <?php echo $att; ?> RHSShadow; <?php echo attType($att); ?> *<?php echo $att; ?> RHS = NULL; <?php echo attType($att); ?> <?php echo $att; ?> RHSobj; <?php } /*foreach*/ ?> // now actually try to match up all of the tuples! int totalNum = 0; while (!myInBStringIter.AtEndOfColumn ()) { // TBD, probably this is not working TBD // counts how many matches for this query int numHits = 0; // extract values of attributes from streams // now go through the LHS input atts one at a time and extract if it is needed by an active query // see which queries match up QueryIDSet curBits = myInBStringIter.GetCurrent (); curBits.Intersect (queriesToRun); QueryIDSet exists; // keeps track of the queries for which a match is found QueryIDSet oldBitstringLHS; // last value of bistringLHS // if the input query is not empty if (!curBits.IsEmpty ()) { totalNum++; // compute the hash for LHS HT_INDEX_TYPE hashValue = HASH_INIT; <?php foreach ($jDesc->LHS_keys as $att) { ?> hashValue = CongruentHash(Hash(<?php echo $att; ?> _Column.GetCurrent()), hashValue); <?php } /*foreach*/ ?> // figure out which of the hash buckets it goes into unsigned int index = WHICH_SEGMENT (hashValue); // now, go to that index and extract matching tuples! HT_INDEX_TYPE curSlot = WHICH_SLOT (hashValue); hashValue = curSlot; // this loops through all of the possible RHS hits while (1) { // this is the bitstring that will go in the output QueryIDSet bitstringLHS; // for safety (in case we look at a bitstring that spans multiple // entries that is not done being written by a concurrent writer) // empty out the inital bitstring ((QueryIDSet *) serializeHere)->Empty (); // give safe "shadow" values to all of the RHS attributes <?php foreach ($jDesc->hash_RHS_attr as $att) { ?> <?php echo $att; ?> RHS = &<?php echo $att; ?> RHSShadow; <?php } /*foreach*/ ?> // here we go through and extract the atts one at a time from the hash // table. Note that the atts must be extracted IN ORDER. That is, the // bitstring is first, followed by the att mapped to the lowerest column // position, followed by the att mapped to the next lowest, and so on. // The Extract function pulls an attribute out of the hash table... int lenSoFar = 0, dummy, done; int lastLen = myEntries[index].Extract (serializeHere, curSlot, hashValue, wayPointID, BITMAP, dummy, done); // if we cannot find a bitstring, there was no tuple here, and we are done if (lastLen == 0) { break; } // remember the bitstring bitstringRHS = (QueryIDSet *) serializeHere; lenSoFar += lastLen; // next look for other hashed attributes <?php foreach ($jDesc->hash_RHS_attr as $att) { ?> lastLen = myEntries[index].Extract (serializeHere + lenSoFar, curSlot, hashValue, wayPointID, <?php echo attSlot($att); ?> , dummy, done); // see if we got attribute if (lastLen > 0) { Deserialize(serializeHere + lenSoFar, <?php echo $att; ?> RHSobj); //<?php echo attOptimizedDeserialize($att, $att . "RHSobj", "serializeHere", "lenSoFar"); ?> ; <?php echo $att; ?> RHS = &<?php echo $att; ?> RHSobj; lenSoFar += lastLen; } else { FATALIF(<?php echo attQrys($att); ?> _RHS.Overlaps(*bitstringRHS), "Did not find attribute <?php echo $att; ?> in active RHS tuple"); } <?php } /*foreach*/ ?> // see if we have any query matches bitstringRHS->Intersect (curBits); QueryIDSet qBits; //printf("TPLLLLL: cust_acctbal = %f orders_custkey = %d cust_custkey = %d\n", *customer_c_acctbalRHS, orders_o_custkey_Column.GetCurrent(), *customer_c_custkeyRHS); <?php foreach ($jDesc->queries_attribute_comparison as $qClass) { ?> // See if any query in query class is eligible for this comparision qBits = QueryIDSet(<?php echo $qClass->qClass; ?> , true); qBits.Intersect(*bitstringRHS); if ( !qBits.IsEmpty () && <?php foreach ($qClass->att_pairs as $pair) { ?> *<?php echo $pair->rhs; ?> RHS == <?php echo $pair->lhs; ?> _Column.GetCurrent() && <?php } /*foreach pair*/ ?> 1 ) { bitstringLHS.Union (qBits); } <?php } /*foreach query class*/ ?> // if any of them hit... if (!bitstringLHS.IsEmpty ()) { exists.Union(bitstringLHS); numHits++; // see if we need to move from shallow to deep if (numHits == 2 && stillShallow) { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> <?php echo attData($att); ?> _Out.CreateDeepCopy (<?php echo attData($att); ?> ); <?php echo attData($att); ?> _Out.Insert (<?php echo attData($att); ?> .GetCurrent()); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> stillShallow = 0; } // now, add all of the outputs over... first deal with the LHS input atts // that get copied into output atts if (!stillShallow) { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> <?php echo attData($att); ?> _Out.Insert (<?php echo attData($att); ?> .GetCurrent()); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> } // now, deal with the output atts that come from the hash table <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $qrys) { ?> <?php echo attData($att); ?> _Out.Insert (*<?php echo $att; ?> RHS); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> // finally, set the bitmap. We are one element behind if (!oldBitstringLHS.IsEmpty()){ myOutBStringIter.Insert (oldBitstringLHS); myOutBStringIter.Advance (); } oldBitstringLHS=bitstringLHS; } // empty bistring } } // compute the true exist queries QueryIDSet tmp = existsTarget; tmp.Intersect(exists); tmp.Intersect(curBits); // not needed but I'm paranoid // compute the true not exits queries QueryIDSet tmp2 = curBits; tmp2.Intersect(notExistsTarget); tmp2.Difference(exists); // now put everything in bitstringLHS oldBitstringLHS.Union(tmp); oldBitstringLHS.Union(tmp2); if (!oldBitstringLHS.IsEmpty()){ myOutBStringIter.Insert (oldBitstringLHS); myOutBStringIter.Advance (); } // at this point, we are done trying to join this tuple... any join results have been // written to the output columns. Note that we don't have to advance in the output data // columns; if we are shallow, we don't touch the output columns. If we are not shallow, // if there were no results, we have nothing to write. HOWEVER, if we are shallow and // we did not get a match, we need to add an empty btstring if (stillShallow && numHits == 0) { <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $qrys) { ?> <?php echo attType($att); ?> tmp_<?php echo attData($att); ?> ; <?php echo attData($att); ?> _Out.Insert (tmp_<?php echo attData($att); ?> ); <?php echo attData($att); ?> _Out.Advance(); <?php } /*foreach*/ ?> if (oldBitstringLHS.IsEmpty()){ // no not exist and no join match myOutBStringIter.Insert (oldBitstringLHS); myOutBStringIter.Advance (); } } // lastly, we need to advance in the INPUT tuples <?php foreach ($jDesc->attribute_queries_LHS as $att => $qrys) { ?> <?php echo attData($att); ?> .Advance(); <?php } /*foreach*/ ?> // advance the input bitstring myInBStringIter.Advance (); } // DONE! So construct the output tuple // if we are still shallow, put the original data into the output if (stillShallow) { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> Column col_<?php echo $att; ?> ; <?php echo attData($att); ?> .Done(col_<?php echo $att; ?> ); output.SwapColumn (col_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /*foreach*/ ?> } else { <?php foreach ($jDesc->attribute_queries_LHS_copy as $att => $qrys) { ?> Column col_<?php echo $att; ?> ; <?php echo attData($att); ?> _Out.Done(col_<?php echo $att; ?> ); output.SwapColumn (col_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /*foreach*/ ?> } { <?php foreach ($jDesc->attribute_queries_RHS_copy as $att => $qrys) { ?> Column col_<?php echo $att; ?> ; <?php echo attData($att); ?> _Out.Done(col_<?php echo $att; ?> ); output.SwapColumn (col_<?php echo $att; ?> , <?php echo attSlot($att); ?> ); <?php } /*foreach*/ ?> } // put in the output bitmap myOutBStringIter.Done (); output.SwapBitmap (myOutBStringIter); // and give back the result ChunkContainer tempResult (output); tempResult.swap (result); PROFILING2_END; PROFILING(start_time, "<?php echo $wpName; ?> ", "LHS_lookup", "%d", totalNum); PROFILING(0.0, "HashTable", "fillrate", "%2.4f", HashTableSegment::globalFillRate*100.0); // Finish performance counters // Use the Set functionality in case we add additional counters later. PCounterList counterList; PCounter totalCnt("tpi lhs", totalNum, "<?php echo $wpName; ?> "); counterList.Append(totalCnt); PROFILING2_SET(counterList, "<?php echo $wpName; ?> "); int64_t hFillRate = int64_t(HashTableSegment::globalFillRate * 1000); PROFILING2_INSTANT("hfr", hFillRate, "global"); free (serializeHere); return 1; } //+{"kind":"WPF", "name":"LHS Lookup", "action":"end"} <?php }
function parseAttribute($ast) { assert_ast_type($ast, NodeType::ATTRIBUTE); $data = ast_node_data($ast); $name = ast_get($data, NodeKey::NAME); // Look up attribute information in Attribute Manager return lookupAttribute($name); }
function cgConstructColumns($atts, $postfix = "") { foreach ($atts as $attr) { $att = lookupAttribute(strval($attr)); $attName = $att . $postfix; ?> MMappedStorage <?php echo $attName; ?> _Column_store; Column::Destroyer <?php echo $attName; ?> _destroyer; <?php if ($att->type()->destroy()) { ?> <?php echo $attName; ?> _destroyer = [] (Column& c) { <?php echo $att->type()->iterator(); ?> iter(c); while( !iter.AtUnwrittenByte() ) { <?php echo $att->type(); ?> & val = const_cast< <?php echo $att->type(); ?> & >( iter.GetCurrent() ); val.Destroy(); iter.Advance(); } iter.Done(c); }; <?php } // if attribute must be destroyed ?> Column <?php echo $attName; ?> _Column_Ocol(<?php echo $attName; ?> _Column_store, <?php echo $attName; ?> _destroyer); <?php echo $att->type()->iterator(); ?> <?php echo $attName; ?> _Column_Out(<?php echo $attName; ?> _Column_Ocol); <?php echo $att->type(); ?> <?php echo $attName; ?> ; // Container for value to be written <?php } // foreach attribute }
function parseClusterWP($ast, $name, $header) { ob_start(); LibraryManager::Push(); $res = new GenerationInfo(); /*************** PROCESS AST ***************/ $attr = lookupAttribute(ast_get($ast, NodeKey::PAYLOAD)); $attrType = $attr->type(); grokit_assert($attrType->is('clusterable'), 'Attempting to cluster on unclusterable attribute ' . $attr->name()); /*************** END PROCESS AST ***************/ // Get our headers $myHeaders = $header . PHP_EOL . ob_get_clean(); $filename = $name . '.cc'; $res->addFile($filename, $name); _startFile($filename); ClusterGenerate($name, $attr); _endFile($filename, $myHeaders); LibraryManager::Pop(); return $res; }
function JoinRHS($wpName, $jDesc) { ?> //+{"kind":"WPF", "name":"RHS Hash", "action":"start"} extern "C" int JoinRHSWorkFunc_<?php echo $wpName; ?> (WorkDescription &workDescription, ExecEngineData &result) { double start_time = global_clock.GetTime(); PROFILING2_START; // this is the area where all of the intermediate, serialized records are stored SerializedSegmentArray serializedSegments [NUM_SEGS]; // this is the area where all of the records are serialized to; // 10K bytes are initially used for this char *serializeHere = (char *) malloc (10000); int storageSize = 10000; // go to the work description and get the input chunk JoinRHSWorkDescription myWork; myWork.swap (workDescription); Chunk &input = myWork.get_chunkToProcess (); // get the waypoint identifier unsigned int wayPointID = myWork.get_wayPointID (); QueryIDSet queriesToRun = QueryExitsToQueries(myWork.get_whichQueryExits ()); <?php cgAccessColumns($jDesc->attribute_queries_RHS, 'input', $wpName); ?> // prepare bitstring iterator Column inBitCol; BStringIterator queries; input.SwapBitmap (queries); int totalNum = 0; // counter for the tuples processed // now actually hash all of the tuples! while (!queries.AtEndOfColumn ()){ QueryIDSet qry; qry = queries.GetCurrent(); qry.Intersect(queriesToRun); queries.Advance(); // extract values of attributes from streams <?php cgAccessAttributes($jDesc->attribute_queries_RHS); ?> if (qry.IsEmpty()){ <?php cgAdvanceAttributes($jDesc->attribute_queries_RHS); ?> continue; } totalNum++; <?php foreach ($jDesc->query_classes_hash as $qClass) { $attOrder = []; foreach ($qClass->att_queries as $att => $qrys) { $attr = lookupAttribute($att); $attOrder[$att] = $attr->slot(); } asort($attOrder); ?> // Dealing with join attributes <?php echo implode(",", $qClass->att_list); ?> if (qry.Overlaps(QueryIDSet(<?php echo $qClass->qClass; ?> , true))) { HT_INDEX_TYPE hashValue = HASH_INIT; <?php foreach ($qClass->rhs_keys as $att) { ?> hashValue = CongruentHash(Hash(<?php echo $att; ?> ), hashValue); <?php } /*foreach attribute*/ ?> // figure out which of the hash buckets it goes into unsigned int index = WHICH_SEGMENT (hashValue); // and serialize the record! Begin with the bitstring. // TBD TBD SS: check if Bitstring takes value that way ! Bitstring myInBString(<?php echo $qClass->qClass; ?> , true); myInBString.Intersect(qry); int bytesUsed = sizeof(Bitstring); // Make sure we have the storage... if (bytesUsed > storageSize) { storageSize = bytesUsed; free (serializeHere); serializeHere = (char *) malloc (storageSize); } // do the serialization... void *location = (void*)&myInBString; // remember the serialized value serializedSegments[index].StartNew (WHICH_SLOT (hashValue), wayPointID, 1, location, bytesUsed); // now, go thru all of the attributes that are used <?php foreach ($attOrder as $att => $slot) { $qrys = $qClass->att_queries->{$att}; ?> if (myInBString.Overlaps(QueryIDSet(<?php echo $qrys; ?> , true))){ //bytesUsed = <?php echo attSerializedSize($att, $att); ?> ; bytesUsed = SerializedSize(<?php echo $att; ?> ); if (bytesUsed > storageSize) { storageSize = bytesUsed; free (serializeHere); serializeHere = (char *) malloc (storageSize); } // and record the serialized value //location = <?php echo attOptimizedSerialize($att, $att, "serializeHere"); ?> ; Serialize(serializeHere, <?php echo $att; ?> ); serializedSegments[index].Append (<?php echo $slot; ?> ,(void *) serializeHere, bytesUsed); } <?php } /*foreach attribute*/ ?> } <?php } /*foreach query class*/ /* Is this correct. Should it be inside the loop for the class? */ cgAdvanceAttributes($jDesc->attribute_queries_RHS); ?> } // now we are done serializing the chunk free (serializeHere); // so actually do the hashing... first set up the list of the guys we want to hash int theseAreOK [NUM_SEGS]; for (int i = 0; i < NUM_SEGS; i++) { theseAreOK[i] = 1; } // this is the set of sample collisions taken from the over-full segments HashSegmentSample mySamples; // now go through and, one-at-a-time, add the data to each table segment for (int i = 0; i < NUM_SEGS; i++) { // first get a segment to add data to HashTableSegment checkedOutCopy; int whichOne = myWork.get_centralHashTable ().CheckOutOne (theseAreOK, checkedOutCopy); theseAreOK[whichOne] = 0; // now add the data HashSegmentSample mySample; if (checkedOutCopy.Insert (serializedSegments[whichOne], mySample)) { // if we are in here, it means that the segment was over-full, so note that we will // need to empty it out... we record all of the samples mySamples.MoveToFinish (); mySample.MoveToStart (); mySamples.SwapRights (mySample); } // and then put the segment back in the hash table myWork.get_centralHashTable ().CheckIn (whichOne); } <?php cgPutbackColumns($jDesc->attribute_queries_RHS, 'input', $wpName); ?> PROFILING2_END; PROFILING(start_time, "<?php echo $wpName; ?> ", "RHS_hash", "%d", totalNum); PROFILING(0.0, "HashTable", "fillrate", "%2.4f", HashTableSegment::globalFillRate*100.0); // Finish performance counters // Use the Set functionality in case we add additional counters later. PCounterList counterList; PCounter totalCnt("RHS", totalNum, "<?php echo $wpName; ?> "); counterList.Append(totalCnt); PCounter globalCnt("jRHS", totalNum, "global"); counterList.Append(globalCnt); PROFILING2_SET(counterList, "<?php echo $wpName; ?> "); int64_t hFillRate = int64_t(HashTableSegment::globalFillRate * 1000); PROFILING2_INSTANT("hfr", hFillRate, "global"); // now we are finally done! JoinHashResult myResult (mySamples); myResult.swap (result); return 0; } // JoinRHSWorkFunc_<?php echo $wpName; ?> function //+{"kind":"WPF", "name":"RHS Hash", "action":"end"} <?php }