-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathexample.yml
33 lines (31 loc) · 1.15 KB
/
example.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Path where HDFS NameNode stores the fsimage files
# See https://hadoop.apache.org/docs/r2.7.3/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml#dfs.namenode.name.dir
fsImagePath : 'src/test/resources'
# Skip file size distribution for group based stats
skipFileDistributionForGroupStats : true
# Skip file size distribution for user based stats
# Good for figuring out who has too many small files.
skipFileDistributionForUserStats : false
# Compute per path stats
# Supports regex matching for direct child directories
paths:
- '/tmp'
- '/datalake/a.*'
- '/user/m.*'
# Skip file size distribution for path based stats
skipFileDistributionForPathStats : true
# Path sets are grouped paths by an identifier
pathSets:
'userMmAndFooAndAsset1' : [
'/datalake/asset3',
'/user/mm',
'/user/foo'
]
'datalakeAsset1and2' : [
'/datalake/asset1',
'/datalake/asset2'
]
# Skip file size distribution for path sets based stats
skipFileDistributionForPathSetStats : true
# Configure file size distribution buckets, supporting IEC units of KiB, MiB, GiB, TiB, PiB
fileSizeDistributionBuckets: ['0','1MiB', '32MiB', '64MiB', '128MiB', '1GiB', '10GiB']