public class HashPartition extends Object implements HashJoinMemoryCalculator.PartitionStat
Created to represent an active partition for the Hash-Join operator (active means: currently receiving data, or its data is being probed; as opposed to fully spilled partitions). After all the build/inner data is read for this partition - if all its data is in memory, then a hash table and a helper are created, and later this data would be probed. If all this partition's build/inner data was spilled, then it begins to work as an outer partition (see the flag "processingOuter") -- reusing some of the fields (e.g., currentBatch, currHVVector, writer, spillFile, partitionBatchesCount) for the outer.
Modifier and Type | Field and Description |
---|---|
static String |
HASH_VALUE_COLUMN_NAME |
static TypeProtos.MajorType |
HVtype |
Constructor and Description |
---|
HashPartition(FragmentContext context,
BufferAllocator allocator,
ChainedHashTable baseHashTable,
RecordBatch buildBatch,
RecordBatch probeBatch,
boolean semiJoin,
int recordsPerBatch,
SpillSet spillSet,
int partNum,
int cycleNum,
int numPartitions) |
Modifier and Type | Method and Description |
---|---|
void |
allocateNewCurrentBatchAndHV()
Allocate a new current Vector Container and current HV vector
|
void |
appendBatch(VectorAccessible batch)
Append the incoming batch (actually only the vectors of that batch) into the tmp list
|
void |
appendInnerRow(VectorContainer buildContainer,
int ind,
int hashCode,
HashJoinMemoryCalculator.BuildSidePartitioning calc)
Spills if needed
|
void |
appendOuterRow(int hashCode,
int recordsProcessed)
Outer always spills when batch is full
|
void |
buildContainersHashTableAndHelper()
Creates the hash table and join helper for this partition.
|
void |
cleanup(boolean deleteFile)
Free all in-memory allocated structures.
|
void |
close() |
void |
closeWriter()
Close the writer without deleting the spill file
|
void |
completeAnInnerBatch(boolean toInitialize,
boolean needsSpill) |
void |
completeAnOuterBatch(boolean toInitialize) |
int |
getBuildHashCode(int ind) |
ArrayList<VectorContainer> |
getContainers() |
List<HashJoinMemoryCalculator.BatchStat> |
getInMemoryBatches() |
long |
getInMemorySize() |
int |
getNextIndex(int compositeIndex) |
com.carrotsearch.hppc.IntArrayList |
getNextUnmatchedIndex() |
int |
getNumInMemoryBatches() |
long |
getNumInMemoryRecords() |
int |
getPartitionBatchesCount() |
int |
getPartitionNum() |
int |
getProbeHashCode(int ind) |
String |
getSpillFile() |
org.apache.commons.lang3.tuple.Pair<Integer,Boolean> |
getStartIndex(int probeIndex) |
void |
getStats(HashTableStats newStats) |
boolean |
isSpilled() |
String |
makeDebugString()
Creates a debugging string containing information about memory usage.
|
org.apache.commons.lang3.tuple.Pair<VectorContainer,Integer> |
nextBatch() |
int |
probeForKey(int recordsProcessed,
int hashCode) |
boolean |
setRecordMatched(int compositeIndex) |
void |
spillThisPartition() |
void |
updateBatches() |
void |
updateProbeRecordsPerBatch(int newRecordsPerBatch)
Configure a different temporary batch size when spilling probe batches.
|
public static final String HASH_VALUE_COLUMN_NAME
public static final TypeProtos.MajorType HVtype
public HashPartition(FragmentContext context, BufferAllocator allocator, ChainedHashTable baseHashTable, RecordBatch buildBatch, RecordBatch probeBatch, boolean semiJoin, int recordsPerBatch, SpillSet spillSet, int partNum, int cycleNum, int numPartitions)
public void updateProbeRecordsPerBatch(int newRecordsPerBatch)
newRecordsPerBatch
- The new temporary batch size to use.public void allocateNewCurrentBatchAndHV()
public void appendInnerRow(VectorContainer buildContainer, int ind, int hashCode, HashJoinMemoryCalculator.BuildSidePartitioning calc)
public void appendOuterRow(int hashCode, int recordsProcessed)
public void completeAnOuterBatch(boolean toInitialize)
public void completeAnInnerBatch(boolean toInitialize, boolean needsSpill)
public void appendBatch(VectorAccessible batch)
public void spillThisPartition()
public int probeForKey(int recordsProcessed, int hashCode) throws SchemaChangeException
SchemaChangeException
public org.apache.commons.lang3.tuple.Pair<Integer,Boolean> getStartIndex(int probeIndex)
public int getNextIndex(int compositeIndex)
public boolean setRecordMatched(int compositeIndex)
public com.carrotsearch.hppc.IntArrayList getNextUnmatchedIndex()
public int getBuildHashCode(int ind) throws SchemaChangeException
SchemaChangeException
public int getProbeHashCode(int ind) throws SchemaChangeException
SchemaChangeException
public ArrayList<VectorContainer> getContainers()
public void updateBatches() throws SchemaChangeException
SchemaChangeException
public org.apache.commons.lang3.tuple.Pair<VectorContainer,Integer> nextBatch()
public List<HashJoinMemoryCalculator.BatchStat> getInMemoryBatches()
getInMemoryBatches
in interface HashJoinMemoryCalculator.PartitionStat
public int getNumInMemoryBatches()
getNumInMemoryBatches
in interface HashJoinMemoryCalculator.PartitionStat
public boolean isSpilled()
isSpilled
in interface HashJoinMemoryCalculator.PartitionStat
public long getNumInMemoryRecords()
getNumInMemoryRecords
in interface HashJoinMemoryCalculator.PartitionStat
public long getInMemorySize()
getInMemorySize
in interface HashJoinMemoryCalculator.PartitionStat
public String getSpillFile()
public int getPartitionBatchesCount()
public int getPartitionNum()
public void closeWriter()
public void buildContainersHashTableAndHelper() throws SchemaChangeException
SchemaChangeException
public void getStats(HashTableStats newStats)
public void cleanup(boolean deleteFile)
deleteFile
- - whether to delete the spill file or notpublic void close()
public String makeDebugString()
Copyright © 1970 The Apache Software Foundation. All rights reserved.