Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
adhorn committed Dec 2, 2021
0 parents commit c792ac6
Show file tree
Hide file tree
Showing 19 changed files with 21,597 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
*.js
!jest.config.js
*.d.ts
node_modules

# CDK asset staging directory
.cdk.staging
cdk.out
6 changes: 6 additions & 0 deletions .npmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
*.ts
!*.d.ts

# CDK asset staging directory
.cdk.staging
cdk.out
21 changes: 21 additions & 0 deletions LICENCE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2021 Adrian Hornsby

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
46 changes: 46 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Chaos Engineering with AWS Fault Injection Simulator (FIS)


![Issues](https://img.shields.io/github/issues/adhorn/aws-fis-templates-cdk)
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://gitHub.com/adhorn/aws-fis-templates-cdk/graphs/commit-activity)
[![Twitter](https://img.shields.io/twitter/url/https/github.com/adhorn/aws-fis-experiment-templates?style=social)](https://twitter.com/intent/tweet?text=Wow:&url=https%3A%2F%2Fgithub.com%2Fadhorn%2Faws-fis-templates-cdk)


# Collection of [FIS Experiment Templates](https://docs.aws.amazon.com/fis/latest/userguide/experiment-templates.html)

These templates let you perform fault injection experiments on resources (applications, network, and infrastructure) in the [AWS Cloud](https://aws.amazon.com).

## Prerequisites:

- [What is AWS Fault Injection Simulator?](https://docs.aws.amazon.com/fis/latest/userguide/what-is.html)
- [Experiment templates for AWS FIS](https://docs.aws.amazon.com/fis/latest/userguide/experiment-templates.html)
- [How AWS Fault Injection Simulator works with IAM](https://docs.aws.amazon.com/fis/latest/userguide/security_iam_service-with-iam.html)


## Deploy via CDK:

```bash
npm install
cdk deploy
```

## Cancel experiment using CloudWatch alarm
All templates have the same synthetic CloudWatch Alarm to get you started using the `stop-condition`. To use this alarm to test canceling experiments, do the following:

```bash
aws cloudwatch set-alarm-state --alarm-name "NetworkInAbnormal" --state-value "ALARM" --state-reason "testing FIS"
```

Once familiar with the `stop-condition`, update the CloudWatch alarms with more appropriate ones.


### Other useful CDK commands:

* `npm run build` compile typescript to js
* `npm run watch` watch for changes and compile
* `npm run test` perform the jest unit tests
* `cdk deploy` deploy this stack to your default AWS account/region
* `cdk diff` compare deployed stack with current state
* `cdk synth` emits the synthesized CloudFormation template

The `cdk.json` file tells the CDK Toolkit how to execute your app.
21 changes: 21 additions & 0 deletions bin/aws-fis-cdk-templates.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env node
import 'source-map-support/register';
import * as cdk from '@aws-cdk/core';
import { FIS } from '../lib/parent-stack';

const app = new cdk.App();
new FIS(app, 'FISPa', {
/* If you don't specify 'env', this stack will be environment-agnostic.
* Account/Region-dependent features and context lookups will not work,
* but a single synthesized template can be deployed anywhere. */

/* Uncomment the next line to specialize this stack for the AWS Account
* and Region that are implied by the current CLI configuration. */
// env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION },

/* Uncomment the next line if you know exactly what Account and Region you
* want to deploy the stack to. */
// env: { account: '123456789012', region: 'us-east-1' },

/* For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html */
});
14 changes: 14 additions & 0 deletions cdk.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"app": "npx ts-node --prefer-ts-exts bin/aws-fis-cdk-templates.ts",
"context": {
"@aws-cdk/core:enableStackNameDuplicates": "true",
"aws-cdk:enableDiffNoFail": "true",
"@aws-cdk/core:stackRelativeExports": "true",
"@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true,
"@aws-cdk/aws-secretsmanager:parseOwnedSecretName": true,
"@aws-cdk/aws-kms:defaultKeyPolicies": true,
"@aws-cdk/aws-s3:grantWriteWithoutAcl": true,
"@aws-cdk/aws-ecs-patterns:removeDefaultDesiredCount": true,
"@aws-cdk/aws-rds:lowercaseDbIdentifier": true
}
}
225 changes: 225 additions & 0 deletions documents/ssma-nacl-faults.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
---
description: Modify Subnets to deny traffic in a particular AZ. Rollback on Cancel or Failure.
schemaVersion: '0.3'
assumeRole: "{{ AutomationAssumeRole }}"
parameters:
Region:
type: String
description: "The Region to impact"
default: "us-east-1"
AvailabilityZone:
type: String
description: "(Required) The Availability Zone to impact"
default: "us-east-1a"
VPCId:
type: String
description: "The ID of the VPC where the subnet reside"
default: "vpc-01316e63b948d889d"
Duration:
type: String
description: "The duration - in ISO-8601 format - of the attack. (Required)"
default: "PT1M"
AutomationAssumeRole:
type: String
description: "(Optional) The ARN of the role that allows Automation to perform
the actions on your behalf."
mainSteps:
- name: InjectFaultNacl
onFailure: 'step:rollback'
onCancel: 'step:rollback'
action: aws:executeScript
inputs:
Runtime: python3.6
Handler: inject_fault
InputPayload:
region:
- "{{ Region }}"
az_name:
- "{{ AvailabilityZone }}"
vpc_id:
- "{{ VPCId }}"
Script: |-
import logging
import boto3
import time
def create_chaos_nacl(ec2_client, vpc_id):
logger = logging.getLogger(__name__)
logger.info('Create a Chaos Network ACL')
# Create a Chaos Network ACL
chaos_nacl = ec2_client.create_network_acl(
VpcId=vpc_id,
)
associations = chaos_nacl['NetworkAcl']
chaos_nacl_id = associations['NetworkAclId']
time.sleep(5)
# Tagging the network ACL with chaos for obvious reasons
ec2_client.create_tags(
Resources=[
chaos_nacl_id,
],
Tags=[
{
'Key': 'Name',
'Value': 'chaos-nacl'
},
]
)
# Create Egress and Ingress rule blocking all inbound and outbound traffic
# Egress
ec2_client.create_network_acl_entry(
CidrBlock='0.0.0.0/0',
Egress=True,
PortRange={'From': 0, 'To': 65535, },
NetworkAclId=chaos_nacl_id,
Protocol='-1',
RuleAction='deny',
RuleNumber=100,
)
# Ingress
ec2_client.create_network_acl_entry(
CidrBlock='0.0.0.0/0',
Egress=False,
PortRange={'From': 0, 'To': 65535, },
NetworkAclId=chaos_nacl_id,
Protocol='-1',
RuleAction='deny',
RuleNumber=101,
)
return chaos_nacl_id
def get_subnets_to_chaos(ec2_client, vpc_id, az_name):
logger = logging.getLogger(__name__)
logger.info('Getting the list of subnets to fail')
# Describe the subnet so you can see if it is in the AZ
subnets_response = ec2_client.describe_subnets(
Filters=[
{
'Name': 'availability-zone',
'Values': [az_name]
},
{
'Name': 'vpc-id',
'Values': [vpc_id]
}
]
)
subnets_to_chaos = [
subnet['SubnetId'] for subnet in subnets_response['Subnets']
]
return subnets_to_chaos
def get_nacls_to_chaos(ec2_client, subnets_to_chaos):
logger = logging.getLogger(__name__)
logger.info('Getting the list of NACLs to blackhole')
# Find network acl associations mapped to the subnets_to_chaos
acls_response = ec2_client.describe_network_acls(
Filters=[
{
'Name': 'association.subnet-id',
'Values': subnets_to_chaos
}
]
)
network_acls = acls_response['NetworkAcls']
# SAVE THEM so it can revert
nacl_ids = []
for nacl in network_acls:
for nacl_association in nacl['Associations']:
if nacl_association['SubnetId'] in subnets_to_chaos:
nacl_association_id, nacl_id = nacl_association[
'NetworkAclAssociationId'], nacl_association['NetworkAclId']
nacl_ids.append((nacl_association_id, nacl_id))
return nacl_ids
def apply_chaos_config(ec2_client, nacl_ids, chaos_nacl_id):
logger = logging.getLogger(__name__)
logger.info('Saving original config & applying new chaos config')
save_for_rollback = []
# Modify the association of the subnets_to_chaos with the Chaos NetworkACL
for nacl_association_id, nacl_id in nacl_ids:
response = ec2_client.replace_network_acl_association(
AssociationId=nacl_association_id,
NetworkAclId=chaos_nacl_id
)
save_for_rollback.append(
{
"NewAssociationId": response['NewAssociationId'],
"Nacl_Id": nacl_id
}
)
return save_for_rollback
def inject_fault(events, context):
region = events['region'][0]
az_name = events['az_name'][0]
vpc_id = events['vpc_id'][0]
logger = logging.getLogger(__name__)
logger.info('Setting up ec2 client for region %s ', region)
ec2_client = boto3.client('ec2', region_name=region)
chaos_nacl_id = create_chaos_nacl(ec2_client, vpc_id)
subnets_to_chaos = get_subnets_to_chaos(ec2_client, vpc_id, az_name)
nacl_ids = get_nacls_to_chaos(ec2_client, subnets_to_chaos)
# Blackhole networking to EC2 instances in failed AZ
save_for_rollback = apply_chaos_config(ec2_client, nacl_ids, chaos_nacl_id)
return {
'saved_configuration': save_for_rollback,
'chaos_nacl_id': chaos_nacl_id
}
outputs:
- Name: Rollback
Selector: $.Payload.saved_configuration
Type: MapList
- Name: ChaosNacl
Selector: $.Payload.chaos_nacl_id
Type: String
- name: sleep
action: aws:sleep
onFailure: 'step:rollback'
onCancel: 'step:rollback'
inputs:
Duration: "{{ Duration }}"
- name: rollback
action: aws:executeScript
inputs:
Runtime: python3.6
Handler: rollback
InputPayload:
region:
- "{{ Region }}"
rollback:
- "{{ InjectFaultNacl.Rollback }}"
chaos_nacl:
- "{{ InjectFaultNacl.ChaosNacl }}"
Script: |-
import logging
import boto3
def rollback(events, context):
save_for_rollback = events['rollback']
region = events['region'][0]
logger = logging.getLogger(__name__)
logger.info('Rolling back Network ACL to original configuration')
ec2_client = boto3.client('ec2', region_name=region)
# Rollback the initial association
for conf in save_for_rollback:
ec2_client.replace_network_acl_association(
AssociationId=conf["NewAssociationId"],
NetworkAclId=conf["Nacl_Id"]
)
logger.info('Deleting the Chaos NACL')
# delete the Chaos NACL
ec2_client.delete_network_acl(
NetworkAclId=events['chaos_nacl'][0]
)
7 changes: 7 additions & 0 deletions jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module.exports = {
roots: ['<rootDir>/test'],
testMatch: ['**/*.test.ts'],
transform: {
'^.+\\.tsx?$': 'ts-jest'
}
};
Loading

0 comments on commit c792ac6

Please sign in to comment.