-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* setup trino * temp: update branch to watch * Add trino to prod * Add trino * test * fix * fix: destroy trino * fix: restore trino * fix * fix: increase writers by 10 * update log * some changes for trino * fixes * use postgres * further improvements * More fixes * redeploy trino * fix * more fix * go big * make weekly less concurrent * restore flux branch
- Loading branch information
Showing
14 changed files
with
402 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
# Manual work | ||
|
||
```sql | ||
create table "source"."default"."artifacts_by_project_v1_source"( | ||
artifact_id VARCHAR, | ||
artifact_source_id VARCHAR, | ||
artifact_source VARCHAR, | ||
artifact_namespace VARCHAR, | ||
artifact_name VARCHAR, | ||
project_id VARCHAR, | ||
project_source VARCHAR, | ||
project_namespace VARCHAR, | ||
project_name VARCHAR | ||
) | ||
with ( | ||
external_location = 'gs://oso-dataset-transfer-bucket/trino/20240930/artifacts_by_project_v1/', | ||
format = 'PARQUET' | ||
); | ||
``` | ||
|
||
```sql | ||
create table "source"."default"."projects_by_collection_v1_source"( | ||
project_id VARCHAR, | ||
project_source VARCHAR, | ||
project_namespace VARCHAR, | ||
project_name VARCHAR, | ||
collection_id VARCHAR, | ||
collection_source VARCHAR, | ||
collection_namespace VARCHAR, | ||
collection_name VARCHAR | ||
) | ||
with ( | ||
external_location = 'gs://oso-dataset-transfer-bucket/trino/20240930/projects_by_collection_v1/', | ||
format = 'PARQUET' | ||
); | ||
``` | ||
|
||
```sql | ||
create table "source"."default"."timeseries_events_by_artifact_v0_source"( | ||
time TIMESTAMP, | ||
to_artifact_id VARCHAR, | ||
from_artifact_id VARCHAR, | ||
event_type VARCHAR, | ||
event_source_id VARCHAR, | ||
event_source VARCHAR, | ||
amount DOUBLE | ||
) | ||
with ( | ||
external_location = 'gs://oso-dataset-transfer-bucket/trino/20240930/timeseries_events_by_artifact_v0/', | ||
format = 'PARQUET' | ||
); | ||
``` | ||
|
||
```sql | ||
create table "metrics"."default"."timeseries_events_by_artifact_v0"( | ||
time TIMESTAMP, | ||
to_artifact_id VARCHAR, | ||
from_artifact_id VARCHAR, | ||
event_type VARCHAR, | ||
event_source_id VARCHAR, | ||
event_source VARCHAR, | ||
amount DOUBLE | ||
) | ||
with (partitioning = array['day(time)', 'event_type']) | ||
``` | ||
|
||
```sql | ||
create table "metrics"."default"."projects_by_collection_v1"( | ||
project_id VARCHAR, | ||
project_source VARCHAR, | ||
project_namespace VARCHAR, | ||
project_name VARCHAR, | ||
collection_id VARCHAR, | ||
collection_source VARCHAR, | ||
collection_namespace VARCHAR, | ||
collection_name VARCHAR | ||
) | ||
``` | ||
|
||
```sql | ||
create table "metrics"."default"."artifacts_by_project_v1"( | ||
artifact_id VARCHAR, | ||
artifact_source_id VARCHAR, | ||
artifact_source VARCHAR, | ||
artifact_namespace VARCHAR, | ||
artifact_name VARCHAR, | ||
project_id VARCHAR, | ||
project_source VARCHAR, | ||
project_namespace VARCHAR, | ||
project_name VARCHAR | ||
) | ||
``` | ||
|
||
Write the data from the parquet files into iceberg | ||
|
||
```sql | ||
INSERT INTO "metrics"."default"."timeseries_events_by_artifact_v0" | ||
SELECT * FROM "source"."default"."timeseries_events_by_artifact_v0_source"; | ||
``` | ||
|
||
```sql | ||
INSERT INTO "metrics"."default"."projects_by_collection_v1" | ||
SELECT * FROM "source"."default"."projects_by_collection_v1_source"; | ||
``` | ||
|
||
```sql | ||
INSERT INTO "metrics"."default"."artifacts_by_project_v1" | ||
SELECT * FROM "source"."default"."artifacts_by_project_v1_source"; | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
apiVersion: kustomize.config.k8s.io/v1beta1 | ||
kind: Kustomization | ||
namespace: base-trino | ||
resources: | ||
- trino.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
apiVersion: v1 | ||
kind: Namespace | ||
metadata: | ||
name: base-trino | ||
labels: | ||
toolkit.fluxcd.io/tenant: apps | ||
ops.opensource.observer/environment: base | ||
kube-secrets-init.doit-intl.com/enable-mutation: "true" | ||
--- | ||
apiVersion: source.toolkit.fluxcd.io/v1 | ||
kind: HelmRepository | ||
metadata: | ||
name: trino | ||
namespace: base-trino | ||
spec: | ||
interval: 5m | ||
url: https://trinodb.github.io/charts | ||
--- | ||
apiVersion: helm.toolkit.fluxcd.io/v2 | ||
kind: HelmRelease | ||
metadata: | ||
name: trino | ||
namespace: base-trino | ||
spec: | ||
chart: | ||
spec: | ||
chart: trino | ||
version: "0.30.0" | ||
sourceRef: | ||
kind: HelmRepository | ||
name: trino | ||
interval: 50m | ||
install: | ||
remediation: | ||
retries: 3 | ||
values: | ||
serviceAccount: | ||
create: true | ||
name: base-trino | ||
coordinator: | ||
jvm: | ||
maxHeapSize: "25G" | ||
resources: | ||
requests: | ||
cpu: 2000m | ||
memory: 25600Mi | ||
tolerations: | ||
- key: pool_type | ||
operator: Equal | ||
value: trino-coordinator | ||
effect: NoSchedule | ||
nodeSelector: | ||
pool_type: trino-coordinator | ||
|
||
worker: | ||
config: | ||
query: | ||
maxMemoryPerNode: 15GB | ||
jvm: | ||
maxHeapSize: "40G" | ||
resources: | ||
requests: | ||
cpu: 2000m | ||
memory: 40960Mi | ||
tolerations: | ||
- key: pool_type | ||
operator: Equal | ||
value: trino-worker | ||
effect: NoSchedule | ||
nodeSelector: | ||
pool_type: trino-worker | ||
|
||
server: | ||
config: | ||
query: | ||
maxMemory: "500GB" | ||
workers: 2 | ||
autoscaling: | ||
enabled: true | ||
maxReplicas: 100 | ||
targetCPUUtilizationPercentage: 70 | ||
behavior: | ||
scaleDown: | ||
stabilizationWindowSeconds: 300 | ||
policies: | ||
- type: Percent | ||
value: 100 | ||
periodSeconds: 15 | ||
scaleUp: | ||
stabilizationWindowSeconds: 0 | ||
policies: | ||
- type: Percent | ||
value: 100 | ||
periodSeconds: 15 | ||
- type: Pods | ||
value: 4 | ||
periodSeconds: 15 | ||
selectPolicy: Max | ||
catalogs: | ||
metrics: | | ||
connector.name=iceberg | ||
iceberg.catalog.type=hive_metastore | ||
hive.metastore.uri=thrift://10.145.192.27:9083 | ||
hive.metastore-cache-ttl=0s | ||
hive.metastore-refresh-interval=5s | ||
hive.metastore-timeout=10s | ||
iceberg.use-file-size-from-metadata=false | ||
fs.native-gcs.enabled=true | ||
gcs.project-id=opensource-observer | ||
iceberg.max-partitions-per-writer=1000 | ||
# gcs.use-access-token=true | ||
source: | | ||
connector.name=hive | ||
hive.metastore.uri=thrift://10.145.192.27:9083 | ||
fs.native-gcs.enabled=true | ||
gcs.project-id=opensource-observer | ||
# gcs.use-access-token=true | ||
# bigquery: | | ||
# connector.name=bigquery | ||
# bigquery.project-id=opensource-observer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,4 @@ resources: | |
- ./dagster | ||
- ./cloudsql-proxy | ||
- ./redis | ||
- ./trino |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
apiVersion: helm.toolkit.fluxcd.io/v2 | ||
kind: HelmRelease | ||
metadata: | ||
name: production-trino | ||
spec: | ||
values: | ||
serviceAccount: | ||
name: production-trino |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
apiVersion: kustomize.config.k8s.io/v1beta1 | ||
kind: Kustomization | ||
resources: | ||
- ../../base/trino | ||
namespace: production-trino | ||
patches: | ||
- path: ./custom-helm-values.yaml | ||
target: | ||
kind: HelmRelease | ||
options: | ||
allowNameChange: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.