Skip to content

Commit dc2d6fb

Browse files
Merge pull request #2 from ritual-net/dev
Major release 1.0.0
2 parents 6f69975 + fbf0b72 commit dc2d6fb

29 files changed

+546
-272
lines changed

.github/workflows/pr.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ jobs:
1818
with:
1919
extra_args: --all-files --show-diff-on-failure
2020

21+
- name: Run Format (AWS)
22+
run: cd procure/aws && terraform fmt -check
23+
24+
- name: Run Format (GCP)
25+
run: cd procure/gcp && terraform fmt -check
26+
2127
- name: Setup TFLint
2228
uses: terraform-linters/setup-tflint@v3
2329
with:

CHANGELOG.md

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Changelog
2+
3+
All notable changes to this project will be documented in this file.
4+
5+
- ##### The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6+
- ##### This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7+
8+
## [1.0.0] - UNRELEASED
9+
10+
### Added
11+
- Support for multi-region, multi-zone deployments in GCP.
12+
- Support for multi-zone deployments in AWS. Since multi-region deployments require
13+
separate provider blocks, we don't allow multiple regions to avoid increased repo complexity.
14+
- Support for GPUs on GCP (via the terraform `accelerator` block) and AWS. Includes driver installation script
15+
and a gpu-specific `docker-compose.yaml` file to expose GPUs to the node container for diagnostics.
16+
- Terraform formatter in pipeline and README.
17+
18+
### Changed
19+
- Format of node specification in `.tfvars`. Nodes are now specified via a map (see `variables.tf`) where keys correspond to node IDs.
20+
- Format of router specification in `.tfvars`. Router is now specified via a map (see `variables.tf`).
21+
- Naming conventions for configuration `.json` files. One file per deployed node, names (without `.json` postfix) matching the node IDs (keys of `nodes` from `variables.tf`), are now the only requirements.
22+
23+
### Fixed
24+
- All created resources are now parametrized by cluster name, so no conflicts arise from successive deployments within the same project.
25+
- Omissions in Makefile.
26+
27+
## [0.1.0] - 2024-01-18
28+
29+
### Added
30+
- Initial release of Infernet Deploy.

README.md

+17-5
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@ Deploy a cluster of heterogenous [Infernet](https://github.com/ritual-net/infern
77
1. [Install Terraform](https://developer.hashicorp.com/terraform/install)
88
2. **Configure nodes**: A node configuration file **for each** node being deployed.
99
- See [example configuration](configs/0.json.example).
10-
- They must be named `0.json`, `1.json`, etc...
11-
- Misnamed files are ignored.
10+
- They must have **unique** names
11+
- A straightforward approach would be `0.json`, `1.json`, etc...
1212
- They must be placed under the top-level `configs/` directory.
13+
- Number and name of `.json` files must match the number and name of *keys* in the `nodes` variable in `terraform.tfvars`.
14+
- See [terraform.tfvars.example](./procure/aws/terraform.tfvars.example).
15+
- Each key should correspond to the name of a `.json` file, *excluding* the `.json` postfix.
1316
- Each node *strictly* requires its own configuration `.json` file, even if those are identical.
14-
- Number of `.json` files must match the `node_count` variable in `terraform.tfvars`.
15-
- Extra files are ignored.
16-
- For instructions on configuring nodes, refer to the [Infernet Node](https://github.com/ritual-net/infernet-node).
17+
- For instructions on configuring individual nodes, refer to the [Infernet Node](https://github.com/ritual-net/infernet-node).
1718

1819
#### Infernet Router:
1920
The Infernet Router REST server is configured automatically by Terraform. However, if you plan to use it, you need to understand its implications:
@@ -106,6 +107,17 @@ tflint --init
106107
tflint --recursive
107108
```
108109

110+
### Using Terraform Format
111+
```bash
112+
# Format AWS files
113+
cd procure/aws
114+
terraform fmt
115+
116+
# Format GCP files
117+
cd procure/gcp
118+
terraform fmt
119+
```
120+
109121
## License
110122

111123
[BSD 3-clause Clear](./LICENSE)

configs/0.json.example

+21-5
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,24 @@
66
"chain": {
77
"enabled": true,
88
"rpc_url": "http://127.0.0.1:8545",
9-
"coordinator_address": "0x...",
109
"trail_head_blocks": 4,
1110
"wallet": {
1211
"max_gas_limit": 100000,
13-
"private_key": "12345s"
12+
"private_key": "12345s",
13+
"payment_address": "0x...",
14+
"allowed_sim_errors": []
15+
},
16+
"snapshot_sync": {
17+
"sleep": 1.5,
18+
"batch_size": 200
1419
}
1520
},
1621
"docker": {
1722
"username": "username",
1823
"password": "password"
1924
},
2025
"redis": {
21-
"host": "localhost",
26+
"host": "redis",
2227
"port": 6379
2328
},
2429
"forward_stats": true,
@@ -40,7 +45,12 @@
4045
"KEY1": "VALUE1",
4146
"KEY2": "VALUE2"
4247
},
43-
"gpu": true
48+
"gpu": false,
49+
"accepted_payments": {
50+
"0x0000000000000000000000000000000000000000": 1000000000000000000,
51+
"0x59F2f1fCfE2474fD5F0b9BA1E73ca90b143Eb8d0": 1000000000000000000
52+
},
53+
"generates_proofs": true
4454
},
4555
{
4656
"id": "container-2",
@@ -58,7 +68,13 @@
5868
"env": {
5969
"KEY3": "VALUE3",
6070
"KEY4": "VALUE4"
61-
}
71+
},
72+
"gpu": true,
73+
"accepted_payments": {
74+
"0x0000000000000000000000000000000000000000": 1000000000000000000,
75+
"0x59F2f1fCfE2474fD5F0b9BA1E73ca90b143Eb8d0": 1000000000000000000
76+
},
77+
"generates_proofs": false
6278
}
6379
]
6480
}

deploy/docker-compose-gpu.yaml

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
version: '3'
2+
3+
services:
4+
node:
5+
image: ritualnetwork/infernet-node:latest-gpu
6+
ports:
7+
- "0.0.0.0:4000:4000"
8+
volumes:
9+
- ./config.json:/app/config.json
10+
- node-logs:/logs
11+
- /var/run/docker.sock:/var/run/docker.sock
12+
tty: true
13+
networks:
14+
- network
15+
deploy:
16+
resources:
17+
reservations:
18+
devices:
19+
- driver: nvidia
20+
count: all
21+
capabilities: [gpu]
22+
depends_on:
23+
- redis
24+
restart:
25+
on-failure
26+
extra_hosts:
27+
- "host.docker.internal:host-gateway"
28+
stop_grace_period: 1m
29+
30+
redis:
31+
image: redis:latest
32+
expose:
33+
- "6379"
34+
networks:
35+
- network
36+
volumes:
37+
- ./redis.conf:/usr/local/etc/redis/redis.conf
38+
- redis-data:/data
39+
restart:
40+
on-failure
41+
42+
fluentbit:
43+
image: fluent/fluent-bit:latest
44+
expose:
45+
- "24224"
46+
environment:
47+
- FLUENTBIT_CONFIG_PATH=/fluent-bit/etc/fluent-bit.conf
48+
volumes:
49+
- ./fluent-bit.conf:/fluent-bit/etc/fluent-bit.conf
50+
- /var/log:/var/log:ro
51+
networks:
52+
- network
53+
restart:
54+
on-failure
55+
56+
networks:
57+
network:
58+
59+
60+
volumes:
61+
node-logs:
62+
redis-data:

deploy/docker-compose.yaml

+9-10
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,28 @@ version: '3'
22

33
services:
44
node:
5-
image: ritualnetwork/infernet-node:0.1.0
5+
image: ritualnetwork/infernet-node:latest
66
ports:
77
- "0.0.0.0:4000:4000"
88
volumes:
9-
- type: bind
10-
source: ./config.json
11-
target: /app/config.json
9+
- ./config.json:/app/config.json
1210
- node-logs:/logs
1311
- /var/run/docker.sock:/var/run/docker.sock
12+
tty: true
1413
networks:
1514
- network
16-
restart:
17-
on-failure
1815
depends_on:
1916
- redis
17+
restart:
18+
on-failure
2019
extra_hosts:
2120
- "host.docker.internal:host-gateway"
2221
stop_grace_period: 1m
2322

2423
redis:
2524
image: redis:latest
26-
ports:
27-
- "6379:6379"
25+
expose:
26+
- "6379"
2827
networks:
2928
- network
3029
volumes:
@@ -35,8 +34,8 @@ services:
3534

3635
fluentbit:
3736
image: fluent/fluent-bit:latest
38-
ports:
39-
- "24224:24224"
37+
expose:
38+
- "24224"
4039
environment:
4140
- FLUENTBIT_CONFIG_PATH=/fluent-bit/etc/fluent-bit.conf
4241
volumes:

procure/Makefile

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
SHELL := /bin/bash
2+
13
# Define the Terraform command
24
TERRAFORM_CMD := terraform
35

@@ -21,6 +23,9 @@ init: check-provider
2123

2224
# Define the plan target
2325
plan: check-provider
26+
@echo "Preparing deployment files..."
27+
@chmod +x prepare_files.sh
28+
@./prepare_files.sh
2429
@echo "Generating Terraform plan..."
2530
$(TERRAFORM_CMD) -chdir=$(provider) plan
2631

@@ -33,7 +38,6 @@ apply: check-provider
3338
@$(TERRAFORM_CMD) -chdir=$(provider) apply
3439

3540
# Define the destroy target
36-
destroy:
37-
@[[ "$(provider)" == "aws" || "$(provider)" == "gcp" ]] || (echo "Usage: 'make destroy provider={aws, gcp}'" && exit 1)
41+
destroy: check-provider
3842
@echo "Destroying Terraform resources..."
3943
@$(TERRAFORM_CMD) -chdir=$(provider) destroy

procure/aws/main.tf

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,5 @@ terraform {
1616
provider "aws" {
1717
access_key = var.access_key_id
1818
secret_key = var.secret_access_key
19-
region = var.region
19+
region = var.region
2020
}

procure/aws/metadata.tf

+9-7
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
# Config files as secrets
22
resource "aws_ssm_parameter" "config_file" {
3-
count = var.node_count
3+
for_each = var.nodes
44

5-
name = "config_${count.index}"
5+
name = "${each.key}.json"
66
type = "SecureString"
7-
value = filebase64("${path.module}/../../configs/${count.index}.json")
7+
value = filebase64("${path.module}/../../configs/${each.key}.json")
88
}
99

1010
# Deployment files
1111
resource "aws_ssm_parameter" "deploy_tar" {
12-
name = "deploy_tar"
12+
for_each = var.nodes
13+
14+
name = "deploy-tar-${each.key}"
1315
type = "SecureString"
14-
value = filebase64("${path.module}/../deploy.tar.gz")
16+
value = each.value.has_gpu ? filebase64("${path.module}/../deploy-gpu.tar.gz") : filebase64("${path.module}/../deploy.tar.gz")
1517
}
1618

1719
# Node IPs
1820
resource "aws_ssm_parameter" "node_ips" {
19-
name = "node_ips"
21+
name = "node-ips-${var.name}"
2022
type = "String"
21-
value = join("\n", [for ip in aws_eip.static_ip[*].public_ip : "${ip}:4000"])
23+
value = join("\n", [for key, _ in aws_instance.nodes : "${aws_eip.static_ip[key].public_ip}:4000"])
2224
}

0 commit comments

Comments
 (0)