Skip to content

Commit

Permalink
Initial migration to private DDN and creation of AML demo (#118)
Browse files Browse the repository at this point in the history
* Initial migration to private DDN

I've decided in this commit to move all of the telco data and metadata into an isolated directory.
After working with this for a while, I think the value of shared directories for shared metadata is low,
so it makes more sense to keep each in an individual location.

Also within this, I've taken out all region config for connectors since we'll push to a single region for demos.
My view on this is that we'll have one project for each demo rather than one in each region.

* Adds AML data and metadata

* Adds large files to LFS

* More required changes for the AML demo
  • Loading branch information
typhonius authored Feb 23, 2025
1 parent 52c3f90 commit 7f53765
Show file tree
Hide file tree
Showing 115 changed files with 10,105 additions and 295 deletions.
60 changes: 60 additions & 0 deletions .data/aml/compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: ${CONTAINER_PREFIX}
services:
# Databases
postgres:
image: postgres
container_name: ${CONTAINER_PREFIX}_postgres
restart: unless-stopped
volumes:
- postgres-data:/var/lib/postgresql/data
- ./postgres/:/docker-entrypoint-initdb.d/
environment:
PGUSER: postgres
POSTGRES_USER: postgres
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
ports:
- "5432:5432"
healthcheck:
test: "pg_isready -q -h postgres"
interval: 10s
timeout: 5s
retries: 30

mongodb:
image: mongo
container_name: ${CONTAINER_PREFIX}_mongodb
restart: unless-stopped
ports:
- "27017:27017"
command: mongod --auth
volumes:
- mongodb-data:/data/db
- ./mongodb:/docker-entrypoint-initdb.d/
environment:
- MONGO_INITDB_DATABASE=aml
- MONGO_INITDB_ROOT_USERNAME=root
- MONGO_INITDB_ROOT_PASSWORD=${MONGO_PASSWORD}

redis:
image: redis:latest
restart: unless-stopped
ports:
- 6379:6379

caching:
build:
context: https://github.com/hasura/engine-plugin-caching.git#f59acfe4004c869181de83be78d0f5d48459ef3c
restart: unless-stopped
ports:
- 8787:8787
environment:
CACHING_PLUGIN_SECRET: ${CACHING_PLUGIN_SECRET}
CACHING_PLUGIN_REDIS_URL: ${CACHING_PLUGIN_REDIS_URL}
extra_hosts:
- local.hasura.dev=host-gateway
volumes:
- ../plugins/caching-config.js:/app/src/config.js

volumes:
postgres-data:
mongodb-data:
3 changes: 3 additions & 0 deletions .data/aml/mongodb/accounts.bson
Git LFS file not shown
15 changes: 15 additions & 0 deletions .data/aml/mongodb/mongoimport.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
set -e

echo "Starting MongoDB restore..."

DB_NAME="aml"
INITDB_DIR="/docker-entrypoint-initdb.d"

for bson_file in "$INITDB_DIR"/*.bson; do
collection_name=$(basename "$bson_file" .bson)
echo "Restoring collection: $collection_name"
mongorestore --db="$DB_NAME" --collection="$collection_name" "$bson_file"
done

echo "MongoDB restore completed successfully."
3 changes: 3 additions & 0 deletions .data/aml/mongodb/sanctioned.bson
Git LFS file not shown
3 changes: 3 additions & 0 deletions .data/aml/mongodb/sanctioned_list.bson
Git LFS file not shown
3 changes: 3 additions & 0 deletions .data/aml/mongodb/transactions.bson
Git LFS file not shown
51 changes: 51 additions & 0 deletions .data/aml/postgres/1-import-pg.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
CREATE DATABASE aml;

\c aml;

CREATE TABLE IF NOT EXISTS saml_d (
time TIME,
date DATE,
sender_account BIGINT,
receiver_account BIGINT,
amount DECIMAL(15,2),
payment_currency VARCHAR(20),
received_currency VARCHAR(20),
sender_bank_location VARCHAR(50),
receiver_bank_location VARCHAR(50),
payment_type VARCHAR(50),
is_laundering BOOLEAN,
laundering_type VARCHAR(50)
);

CREATE TABLE IF NOT EXISTS customers (
customer_id BIGSERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
account BIGINT UNIQUE NOT NULL,
dob DATE NOT NULL,
nationality VARCHAR(100),
risk_level VARCHAR(10) CHECK (risk_level IN ('low', 'medium', 'high')),
pep_status BOOLEAN DEFAULT FALSE,
blacklisted BOOLEAN DEFAULT FALSE
);

CREATE TABLE IF NOT EXISTS sars (
sar_id BIGSERIAL PRIMARY KEY,
customer_id BIGINT REFERENCES customers(customer_id) ON DELETE CASCADE,
transaction_id BIGINT NOT NULL,
reason TEXT NOT NULL,
status VARCHAR(10) CHECK (status IN ('pending', 'filed', 'dismissed')) DEFAULT 'pending',
filed_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE INDEX idx_customers_nationality ON customers(nationality);
CREATE INDEX idx_sars_customer_id ON sars(customer_id);
CREATE INDEX idx_sars_transaction_id ON sars(transaction_id);
CREATE INDEX idx_saml_d_sender_account ON saml_d(sender_account);
CREATE INDEX idx_saml_d_receiver_account ON saml_d(receiver_account);
CREATE INDEX idx_saml_d_date ON saml_d(date);
CREATE INDEX idx_saml_d_payment_type ON saml_d(payment_type);
CREATE INDEX idx_saml_d_is_laundering ON saml_d(is_laundering);

COPY customers FROM '/docker-entrypoint-initdb.d/customers.csv' DELIMITER ',' CSV HEADER;
COPY sars FROM '/docker-entrypoint-initdb.d/sars.csv' DELIMITER ',' CSV HEADER;
COPY saml_d FROM '/docker-entrypoint-initdb.d/SAML-D.csv' DELIMITER ',' CSV HEADER;
3 changes: 3 additions & 0 deletions .data/aml/postgres/SAML-D.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions .data/aml/postgres/customers.csv
Git LFS file not shown
160 changes: 160 additions & 0 deletions .data/aml/postgres/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import csv
import random
import datetime
from faker import Faker

# Initialize Faker for random data generation
fake = Faker()

# Number of records to generate
NUM_CUSTOMERS = 100000
NUM_SARS = 50000

# Risk levels and statuses
RISK_LEVELS = ["low", "medium", "high"]
SAR_STATUSES = ["pending", "filed", "dismissed"]

# Generate customers data
def generate_customers():
customers = []
for customer_id in range(1, NUM_CUSTOMERS + 1):
name = fake.name()
account = str(random.randint(100000000, 9999999999))
dob = fake.date_of_birth(minimum_age=18, maximum_age=80).strftime("%Y-%m-%d")
nationality = fake.country()
risk_level = random.choice(RISK_LEVELS)
pep_status = random.choice([True, False])
blacklisted = random.choice([True, False]) if risk_level == "high" else False
customers.append([customer_id, name, account, dob, nationality, risk_level, pep_status, blacklisted])
return customers

# Generate SARs data
def generate_sars(customers):
import random

aml_reasons = [
"rapid large deposits",
"multiple small cash deposits",
"frequent wire transfers",
"high volume of transactions",
"unexplained wealth increase",
"rapid movement of funds",
"transactions just below reporting threshold",
"frequent ATM withdrawals",
"multiple high-value purchases",
"inconsistent deposit amounts",
"repeated failed transactions",
"high-risk merchant activity",
"large round-dollar transactions",
"unusual payment frequency",
"excessive refund requests",
"transactions with sanctioned countries",
"cross-border high-risk jurisdiction",
"frequent offshore transactions",
"use of multiple foreign accounts",
"payments to unknown international entities",
"transfers to high-risk tax havens",
"funds originating from conflict zones",
"frequent cross-border remittances",
"sudden shift in transaction locations",
"transactions from high-risk jurisdictions",
"shell company detected",
"business lacks operational activity",
"high volume of cash transactions",
"transactions do not match business profile",
"unregistered business receiving payments",
"high-risk industry involvement",
"front company suspected",
"misuse of business accounts",
"fake invoices detected",
"unusual supplier payments",
"business operating with no clear income source",
"charity receiving unexplained large donations",
"structuring detected",
"layering through multiple accounts",
"use of third-party intermediaries",
"funds rapidly transferred across multiple accounts",
"complex transaction chain with no clear purpose",
"transaction smurfing detected",
"rapid movement between personal and business accounts",
"multiple deposits from unknown sources",
"withdrawal pattern inconsistent with income",
"mismatched identity details",
"account holder uses multiple aliases",
"multiple accounts linked to single individual",
"synthetic identity detected",
"high-risk account takeover attempt",
"fraudulent ID used for onboarding",
"accounts opened with false information",
"loan applications using stolen credentials",
"identity inconsistencies across accounts",
"duplicate account credentials detected",
"large cash deposit without declared source",
"frequent deposits at multiple locations",
"cash transactions inconsistent with known income",
"sudden increase in cash deposits",
"high cash activity without justification",
"cash deposits followed by immediate withdrawals",
"large-volume cash business with no payroll activity",
"real estate purchase with no clear source of funds",
"luxury purchases inconsistent with income",
"high-value asset flipping detected",
"multiple property transactions in short period",
"unregistered ownership changes",
"cash purchase of high-value assets",
"credit card laundering suspected",
"excessive prepaid card loading",
"use of multiple credit cards in short time",
"digital wallet with frequent high-value deposits",
"unexplained gift card transactions",
"cryptocurrency exchange activity without declared source of funds",
"funds linked to darknet marketplaces",
"use of privacy coins",
"anonymized transactions detected",
"multiple IP addresses accessing account",
"high volume of online gambling transactions",
"sudden increase in e-commerce refunds",
"use of VPN or TOR network in transactions",
"payments to social media-based traders",
"funds received from unrelated third parties",
"transactions between linked individuals flagged",
"repeated transactions with same unknown entity",
"funds moved through multiple third-party accounts",
"proxy account suspected",
"multiple account holders accessing single device",
"third-party involvement without clear business reason",
"abnormal behavior detected in transaction monitoring",
"PEP transaction flagged",
"unusual financial product usage",
"misuse of government stimulus funds",
"emergency loans fraud detected",
"fake employment claims linked to account",
"transaction pattern linked to known fraud cases",
"large transactions immediately reversed"
]
sars = []
for sar_id in range(1, NUM_SARS + 1):
customer = random.choice(customers)
customer_id = customer[0]
transaction_id = random.randint(1000000000, 9999999999)
reason = random.choice(aml_reasons)
status = random.choice(SAR_STATUSES)
filed_date = fake.date_time_this_year().strftime("%Y-%m-%d %H:%M:%S")
sars.append([sar_id, customer_id, transaction_id, reason, status, filed_date])
return sars

# Save to CSV
def save_to_csv(filename, data, headers):
with open(filename, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(headers)
writer.writerows(data)
print(f"✅ Generated {filename}")

# Generate data
customers_data = generate_customers()
sars_data = generate_sars(customers_data)

# Save data to CSV files
save_to_csv("customers.csv", customers_data, ["customer_id", "name", "account", "dob", "nationality", "risk_level", "pep_status", "blacklisted"])
save_to_csv("sars.csv", sars_data, ["sar_id", "customer_id", "transaction_id", "reason", "status", "filed_date"])
3 changes: 3 additions & 0 deletions .data/aml/postgres/sars.csv
Git LFS file not shown
38 changes: 1 addition & 37 deletions .data/starter/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ services:
restart: unless-stopped
volumes:
- postgres-data:/var/lib/postgresql/data
- ../build/postgres/:/docker-entrypoint-initdb.d/
- ./postgres/:/docker-entrypoint-initdb.d/
environment:
PGUSER: postgres
POSTGRES_USER: postgres
Expand All @@ -20,40 +20,6 @@ services:
timeout: 5s
retries: 30

pgvector:
image: ankane/pgvector:latest
container_name: ${CONTAINER_PREFIX}_pgvector
restart: unless-stopped
ports:
- "5433:5432"
volumes:
- pgvector-data:/var/lib/postgresql/data
- ../common/pgvector/:/docker-entrypoint-initdb.d/
environment:
PGUSER: postgres
POSTGRES_USER: postgres
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
healthcheck:
test: "pg_isready -q -h postgres"
interval: 10s
timeout: 5s
retries: 30

mongodb:
image: mongo
container_name: ${CONTAINER_PREFIX}_mongodb
restart: unless-stopped
ports:
- "27017:27017"
command: mongod --auth
volumes:
- mongodb-data:/data/db
- ../build/mongodb:/docker-entrypoint-initdb.d/
environment:
- MONGO_INITDB_DATABASE=holotel
- MONGO_INITDB_ROOT_USERNAME=root
- MONGO_INITDB_ROOT_PASSWORD=${MONGO_PASSWORD}

redis:
image: redis:latest
restart: unless-stopped
Expand All @@ -76,5 +42,3 @@ services:

volumes:
postgres-data:
pgvector-data:
mongodb-data:
Loading

0 comments on commit 7f53765

Please sign in to comment.