public abstract class AbstractParquetGroupScan extends AbstractGroupScanWithMetadata<ParquetMetadataProvider>
Modifier and Type | Class and Description |
---|---|
protected static class |
AbstractParquetGroupScan.RowGroupScanFilterer<B extends AbstractParquetGroupScan.RowGroupScanFilterer<B>>
This class is responsible for filtering different metadata levels including row group level.
|
AbstractGroupScanWithMetadata.GroupScanWithMetadataFilterer<B extends AbstractGroupScanWithMetadata.GroupScanWithMetadataFilterer<B>>
Modifier and Type | Field and Description |
---|---|
protected List<ReadEntryWithPath> |
entries |
protected org.apache.drill.shaded.guava.com.google.common.collect.ListMultimap<Integer,RowGroupInfo> |
mappings |
protected ParquetReaderConfig |
readerConfig |
protected org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,RowGroupMetadata> |
rowGroups |
columns, files, fileSet, filter, limit, matchAllMetadata, metadataProvider, nonInterestingColumnsMetadata, partitionColumns, partitions, segments, tableMetadata, usedMetastore
INIT_ALLOCATION, initialAllocation, MAX_ALLOCATION, maxAllocation, userName
ALL_COLUMNS
Modifier | Constructor and Description |
---|---|
protected |
AbstractParquetGroupScan(AbstractParquetGroupScan that) |
protected |
AbstractParquetGroupScan(String userName,
List<SchemaPath> columns,
List<ReadEntryWithPath> entries,
ParquetReaderConfig readerConfig,
LogicalExpression filter) |
Modifier and Type | Method and Description |
---|---|
void |
applyAssignments(List<CoordinationProtos.DrillbitEndpoint> incomingEndpoints) |
AbstractGroupScanWithMetadata<?> |
applyFilter(LogicalExpression filterExpr,
UdfUtilities udfUtilities,
FunctionImplementationRegistry functionImplementationRegistry,
OptionManager optionManager)
Applies specified filter
filterExpr to current group scan and produces filtering at:
table level:
if filter matches all the the data or prunes all the data, sets corresponding value to
AbstractGroupScanWithMetadata.isMatchAllMetadata() and returns null
segment level:
if filter matches all the the data or prunes all the data, sets corresponding value to
AbstractGroupScanWithMetadata.isMatchAllMetadata() and returns null
if segment metadata was pruned, prunes underlying metadata
partition level:
if filter matches all the the data or prunes all the data, sets corresponding value to
AbstractGroupScanWithMetadata.isMatchAllMetadata() and returns null
if partition metadata was pruned, prunes underlying metadata
file level:
if filter matches all the the data or prunes all the data, sets corresponding value to
AbstractGroupScanWithMetadata.isMatchAllMetadata() and returns null
|
GroupScan |
applyLimit(int maxRecords)
By default, return null to indicate row count based prune is not supported.
|
boolean |
canPushdownProjects(List<SchemaPath> columns)
GroupScan should check the list of columns, and see if it could support all the columns in the list.
|
protected abstract AbstractParquetGroupScan |
cloneWithFileSelection(Collection<org.apache.hadoop.fs.Path> filePaths) |
protected abstract ParquetMetadataProviderBuilder<?> |
defaultTableMetadataProviderBuilder(MetadataProviderManager source)
Returns
TableMetadataProviderBuilder instance which may provide metadata
without using Drill Metastore. |
protected abstract Collection<CoordinationProtos.DrillbitEndpoint> |
getDrillbits() |
List<ReadEntryWithPath> |
getEntries() |
Collection<org.apache.hadoop.fs.Path> |
getFiles()
This method is excluded from serialization in this group scan
since the actual files list to scan in this class is handled by
entries field. |
protected abstract AbstractParquetGroupScan.RowGroupScanFilterer<? extends AbstractParquetGroupScan.RowGroupScanFilterer<?>> |
getFilterer()
Returns holder for metadata values which provides API to filter metadata
and build new group scan instance using filtered metadata.
|
int |
getMaxParallelizationWidth() |
List<EndpointAffinity> |
getOperatorAffinity()
Calculates the affinity each endpoint has for this scan,
by adding up the affinity each endpoint has for each rowGroup.
|
protected List<RowGroupReadEntry> |
getReadEntries(int minorFragmentId) |
ParquetReaderConfig |
getReaderConfig() |
ParquetReaderConfig |
getReaderConfigForSerialization() |
protected org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,RowGroupMetadata> |
getRowGroupsMetadata() |
void |
modifyFileSelection(FileSelection selection) |
protected static <T extends BaseMetadata & LocationProvider> |
pruneForPartitions(org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,T> metadataToPrune,
List<PartitionMetadata> filteredPartitionMetadata)
Removes metadata which does not belong to any of partitions in metadata list.
|
protected org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,RowGroupMetadata> |
pruneRowGroupsForFiles(Map<org.apache.hadoop.fs.Path,FileMetadata> filteredFileMetadata) |
boolean |
supportsFilterPushDown()
Checks whether this group scan supports filter push down.
|
checkMetadataConsistency, getColumns, getColumnValueCount, getDigest, getFileSet, getFilesMetadata, getFilter, getFilterPredicate, getFilterPredicate, getFilterString, getLimit, getMetadataProvider, getNextOrEmpty, getNonInterestingColumnsMetadata, getPartitionColumns, getPartitionsMetadata, getPartitionValue, getPartitionValues, getScanStats, getSchema, getSegmentsMetadata, getTableMetadata, getTypeForColumn, hasFiles, init, isAllDataPruned, isGroupScanFullyMatchesFilter, isImplicitOrPartCol, isMatchAllMetadata, limitMetadata, pruneForPartitions, setFilter, setFilterForRuntime, supportsFileImplicitColumns, supportsLimitPushdown, tableMetadataProviderBuilder, usedMetastore
clone, supportsPartitionFilterPushdown
accept, clone, enforceWidth, getAnalyzeInfoProvider, getDistributionAffinity, getInitialAllocation, getMaxAllocation, getMinParallelizationWidth, getOperatorType, getScanStats, getScanStats, getSelectionRoot, isDistributed, isExecutable, iterator
accept, getCost, getOperatorId, getSVMode, getUserName, isBufferedOperator, setCost, setMaxAllocation, setOperatorId
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
clone, enforceWidth, getAnalyzeInfoProvider, getMinParallelizationWidth, getScanStats, getScanStats, getSelectionRoot, getSpecificScan, isDistributed
accept, getCost, getInitialAllocation, getMaxAllocation, getNewWithChildren, getOperatorId, getOperatorType, getSVMode, getUserName, isBufferedOperator, isExecutable, setCost, setMaxAllocation, setOperatorId
accept
forEach, iterator, spliterator
getDistributionAffinity
protected List<ReadEntryWithPath> entries
protected org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,RowGroupMetadata> rowGroups
protected org.apache.drill.shaded.guava.com.google.common.collect.ListMultimap<Integer,RowGroupInfo> mappings
protected ParquetReaderConfig readerConfig
protected AbstractParquetGroupScan(String userName, List<SchemaPath> columns, List<ReadEntryWithPath> entries, ParquetReaderConfig readerConfig, LogicalExpression filter)
protected AbstractParquetGroupScan(AbstractParquetGroupScan that)
public List<ReadEntryWithPath> getEntries()
public ParquetReaderConfig getReaderConfigForSerialization()
public ParquetReaderConfig getReaderConfig()
public Collection<org.apache.hadoop.fs.Path> getFiles()
entries
field.getFiles
in interface GroupScan
getFiles
in class AbstractGroupScanWithMetadata<ParquetMetadataProvider>
public boolean canPushdownProjects(List<SchemaPath> columns)
GroupScan
canPushdownProjects
in interface GroupScan
canPushdownProjects
in class AbstractGroupScan
public boolean supportsFilterPushDown()
GroupScan
supportsFilterPushDown
in interface GroupScan
supportsFilterPushDown
in class AbstractGroupScan
true
if this group scan supports filter push down,
false
otherwisepublic List<EndpointAffinity> getOperatorAffinity()
getOperatorAffinity
in interface HasAffinity
getOperatorAffinity
in class AbstractGroupScan
public void applyAssignments(List<CoordinationProtos.DrillbitEndpoint> incomingEndpoints)
public int getMaxParallelizationWidth()
protected List<RowGroupReadEntry> getReadEntries(int minorFragmentId)
public AbstractGroupScanWithMetadata<?> applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtilities, FunctionImplementationRegistry functionImplementationRegistry, OptionManager optionManager)
filterExpr
to current group scan and produces filtering at:
AbstractGroupScanWithMetadata.isMatchAllMetadata()
and returns nullAbstractGroupScanWithMetadata.isMatchAllMetadata()
and returns nullAbstractGroupScanWithMetadata.isMatchAllMetadata()
and returns nullAbstractGroupScanWithMetadata.isMatchAllMetadata()
and returns nullAbstractGroupScanWithMetadata.isMatchAllMetadata()
and returns nullapplyFilter
in interface GroupScan
applyFilter
in class AbstractGroupScanWithMetadata<ParquetMetadataProvider>
filterExpr
- filter expression to buildudfUtilities
- udf utilitiesfunctionImplementationRegistry
- context to find drill function holderoptionManager
- option managerprotected org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,RowGroupMetadata> pruneRowGroupsForFiles(Map<org.apache.hadoop.fs.Path,FileMetadata> filteredFileMetadata)
public GroupScan applyLimit(int maxRecords)
AbstractGroupScan
applyLimit
in interface GroupScan
applyLimit
in class AbstractGroupScanWithMetadata<ParquetMetadataProvider>
maxRecords
- : the number of rows requested from group scan.public void modifyFileSelection(FileSelection selection)
modifyFileSelection
in interface FileGroupScan
modifyFileSelection
in class AbstractGroupScanWithMetadata<ParquetMetadataProvider>
protected org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,RowGroupMetadata> getRowGroupsMetadata()
protected static <T extends BaseMetadata & LocationProvider> org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,T> pruneForPartitions(org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,T> metadataToPrune, List<PartitionMetadata> filteredPartitionMetadata)
T
- type of metadata to filtermetadataToPrune
- list of metadata which should be prunedfilteredPartitionMetadata
- list of partition metadata which was prunedprotected abstract Collection<CoordinationProtos.DrillbitEndpoint> getDrillbits()
protected abstract AbstractParquetGroupScan cloneWithFileSelection(Collection<org.apache.hadoop.fs.Path> filePaths) throws IOException
IOException
protected abstract ParquetMetadataProviderBuilder<?> defaultTableMetadataProviderBuilder(MetadataProviderManager source)
AbstractGroupScanWithMetadata
TableMetadataProviderBuilder
instance which may provide metadata
without using Drill Metastore.defaultTableMetadataProviderBuilder
in class AbstractGroupScanWithMetadata<ParquetMetadataProvider>
source
- metadata provider managerTableMetadataProviderBuilder
instanceprotected abstract AbstractParquetGroupScan.RowGroupScanFilterer<? extends AbstractParquetGroupScan.RowGroupScanFilterer<?>> getFilterer()
AbstractGroupScanWithMetadata
getFilterer
in class AbstractGroupScanWithMetadata<ParquetMetadataProvider>
Copyright © 1970 The Apache Software Foundation. All rights reserved.