-
Notifications
You must be signed in to change notification settings - Fork 16
/
meltano.yml
156 lines (155 loc) · 4.37 KB
/
meltano.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
version: 1
default_environment: dev
send_anonymous_usage_stats: false
project_id: 33080f0b-2d0f-4fba-b7f1-727bbf080f91
cli:
log_level: debug
plugins:
extractors:
- name: tap-csv
variant: meltanolabs
pip_url: git+https://github.com/MeltanoLabs/tap-csv.git
- name: tap-csv--multi
inherit_from: tap-csv
- name: tap-smoke-test
namespace: tap_smoke_test
pip_url: git+https://github.com/meltano/tap-smoke-test.git
executable: tap-smoke-test
capabilities:
- discover
- catalog
settings:
- name: schema_inference_record_count
kind: integer
- name: streams
kind: array
- name: people
namespace: people
executable: scripts/people.sh
- name: nested
namespace: nested
executable: scripts/nested.sh
loaders:
- name: target-sqlite
variant: meltanolabs
pip_url: git+https://github.com/MeltanoLabs/target-sqlite.git
config:
batch_size: 200
database: output/$MELTANO_EXTRACTOR_NAMESPACE.db
mappers:
- name: meltano-map-transformer
namespace: meltano_map_transformer
pip_url: -e .
executable: meltano-map-transform
settings:
- name: stream_maps
kind: object
mappings:
- name: hash_email
config:
stream_maps:
customers:
id: id
email: # drop the PII field from RECORD and SCHEMA messages
email_domain: email.split('@')[-1]
email_hash: md5(config['hash_seed'] + email)
__else__: null
stream_map_config:
hash_seed: 01AWZh7A6DzGm6iJZZ2T
- name: whitelist
config:
stream_maps:
# Whitelist `id` and `description` fields, drop all others
animals:
id: id
description: description
__else__: __NULL__
# Put some fields into a JSON object
- name: json_field
config:
stream_maps:
# JSON-ify the some fields
animals:
id: id
data: "json.dumps({'description': description, 'verified': verified, 'views': views, 'created_at': created_at})"
__else__: null
- name: flatten
config:
stream_maps: {}
flattening_enabled: true
flattening_max_depth: 1
- name: comprehension
config:
stream_maps:
users:
id: id
fields: "[f for f in fields if f['key'] != 'age']"
- name: fake
config:
stream_maps:
customers:
id: id
first_name: first_name
cc: fake.credit_card_number() # add a new field with a fake credit card number
__else__: __NULL__
faker_config:
locale: [en_US]
seed: 123456
# Test glob patterns in stream names
- name: glob
config:
stream_maps:
"*":
id: id
first_name: first_name
cc: fake.credit_card_number() # add a new field with a fake credit card number
email: # drop the PII field from RECORD and SCHEMA messages
email_domain: email.split('@')[-1]
email_hash: md5(config['hash_seed'] + email)
__else__: null
stream_map_config:
hash_seed: 01AWZh7A6DzGm6iJZZ2T
faker_config:
locale: [en_US]
seed: 123456
# Load records into a single field, applied to all streams
- name: single_field
config:
stream_maps:
"*":
id: id
data: record # `record` is a special keyword that refers to the entire record
__else__: null
# Alias a property across all streams
- name: alias_property_all_streams
config:
stream_maps:
"*":
name: first_name
first_name: __NULL__
environments:
- name: dev
config:
plugins:
extractors:
- name: tap-csv
config:
files:
- entity: customers
path: fixtures/customers.csv
keys: [id]
- name: tap-csv--multi
config:
files:
- entity: customers
path: fixtures/customers.csv
keys: [id]
- entity: employees
path: fixtures/employees.csv
keys: [id]
- name: tap-smoke-test
config:
schema_inference_record_count: 5
streams:
- stream_name: animals
input_filename: fixtures/animals-data.jsonl