-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial migration to private DDN and creation of AML demo (#118)
* Initial migration to private DDN I've decided in this commit to move all of the telco data and metadata into an isolated directory. After working with this for a while, I think the value of shared directories for shared metadata is low, so it makes more sense to keep each in an individual location. Also within this, I've taken out all region config for connectors since we'll push to a single region for demos. My view on this is that we'll have one project for each demo rather than one in each region. * Adds AML data and metadata * Adds large files to LFS * More required changes for the AML demo
- Loading branch information
Showing
115 changed files
with
10,105 additions
and
295 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
name: ${CONTAINER_PREFIX} | ||
services: | ||
# Databases | ||
postgres: | ||
image: postgres | ||
container_name: ${CONTAINER_PREFIX}_postgres | ||
restart: unless-stopped | ||
volumes: | ||
- postgres-data:/var/lib/postgresql/data | ||
- ./postgres/:/docker-entrypoint-initdb.d/ | ||
environment: | ||
PGUSER: postgres | ||
POSTGRES_USER: postgres | ||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} | ||
ports: | ||
- "5432:5432" | ||
healthcheck: | ||
test: "pg_isready -q -h postgres" | ||
interval: 10s | ||
timeout: 5s | ||
retries: 30 | ||
|
||
mongodb: | ||
image: mongo | ||
container_name: ${CONTAINER_PREFIX}_mongodb | ||
restart: unless-stopped | ||
ports: | ||
- "27017:27017" | ||
command: mongod --auth | ||
volumes: | ||
- mongodb-data:/data/db | ||
- ./mongodb:/docker-entrypoint-initdb.d/ | ||
environment: | ||
- MONGO_INITDB_DATABASE=aml | ||
- MONGO_INITDB_ROOT_USERNAME=root | ||
- MONGO_INITDB_ROOT_PASSWORD=${MONGO_PASSWORD} | ||
|
||
redis: | ||
image: redis:latest | ||
restart: unless-stopped | ||
ports: | ||
- 6379:6379 | ||
|
||
caching: | ||
build: | ||
context: https://github.com/hasura/engine-plugin-caching.git#f59acfe4004c869181de83be78d0f5d48459ef3c | ||
restart: unless-stopped | ||
ports: | ||
- 8787:8787 | ||
environment: | ||
CACHING_PLUGIN_SECRET: ${CACHING_PLUGIN_SECRET} | ||
CACHING_PLUGIN_REDIS_URL: ${CACHING_PLUGIN_REDIS_URL} | ||
extra_hosts: | ||
- local.hasura.dev=host-gateway | ||
volumes: | ||
- ../plugins/caching-config.js:/app/src/config.js | ||
|
||
volumes: | ||
postgres-data: | ||
mongodb-data: |
Git LFS file not shown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
echo "Starting MongoDB restore..." | ||
|
||
DB_NAME="aml" | ||
INITDB_DIR="/docker-entrypoint-initdb.d" | ||
|
||
for bson_file in "$INITDB_DIR"/*.bson; do | ||
collection_name=$(basename "$bson_file" .bson) | ||
echo "Restoring collection: $collection_name" | ||
mongorestore --db="$DB_NAME" --collection="$collection_name" "$bson_file" | ||
done | ||
|
||
echo "MongoDB restore completed successfully." |
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
CREATE DATABASE aml; | ||
|
||
\c aml; | ||
|
||
CREATE TABLE IF NOT EXISTS saml_d ( | ||
time TIME, | ||
date DATE, | ||
sender_account BIGINT, | ||
receiver_account BIGINT, | ||
amount DECIMAL(15,2), | ||
payment_currency VARCHAR(20), | ||
received_currency VARCHAR(20), | ||
sender_bank_location VARCHAR(50), | ||
receiver_bank_location VARCHAR(50), | ||
payment_type VARCHAR(50), | ||
is_laundering BOOLEAN, | ||
laundering_type VARCHAR(50) | ||
); | ||
|
||
CREATE TABLE IF NOT EXISTS customers ( | ||
customer_id BIGSERIAL PRIMARY KEY, | ||
name VARCHAR(255) NOT NULL, | ||
account BIGINT UNIQUE NOT NULL, | ||
dob DATE NOT NULL, | ||
nationality VARCHAR(100), | ||
risk_level VARCHAR(10) CHECK (risk_level IN ('low', 'medium', 'high')), | ||
pep_status BOOLEAN DEFAULT FALSE, | ||
blacklisted BOOLEAN DEFAULT FALSE | ||
); | ||
|
||
CREATE TABLE IF NOT EXISTS sars ( | ||
sar_id BIGSERIAL PRIMARY KEY, | ||
customer_id BIGINT REFERENCES customers(customer_id) ON DELETE CASCADE, | ||
transaction_id BIGINT NOT NULL, | ||
reason TEXT NOT NULL, | ||
status VARCHAR(10) CHECK (status IN ('pending', 'filed', 'dismissed')) DEFAULT 'pending', | ||
filed_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP | ||
); | ||
|
||
CREATE INDEX idx_customers_nationality ON customers(nationality); | ||
CREATE INDEX idx_sars_customer_id ON sars(customer_id); | ||
CREATE INDEX idx_sars_transaction_id ON sars(transaction_id); | ||
CREATE INDEX idx_saml_d_sender_account ON saml_d(sender_account); | ||
CREATE INDEX idx_saml_d_receiver_account ON saml_d(receiver_account); | ||
CREATE INDEX idx_saml_d_date ON saml_d(date); | ||
CREATE INDEX idx_saml_d_payment_type ON saml_d(payment_type); | ||
CREATE INDEX idx_saml_d_is_laundering ON saml_d(is_laundering); | ||
|
||
COPY customers FROM '/docker-entrypoint-initdb.d/customers.csv' DELIMITER ',' CSV HEADER; | ||
COPY sars FROM '/docker-entrypoint-initdb.d/sars.csv' DELIMITER ',' CSV HEADER; | ||
COPY saml_d FROM '/docker-entrypoint-initdb.d/SAML-D.csv' DELIMITER ',' CSV HEADER; |
Git LFS file not shown
Git LFS file not shown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
import csv | ||
import random | ||
import datetime | ||
from faker import Faker | ||
|
||
# Initialize Faker for random data generation | ||
fake = Faker() | ||
|
||
# Number of records to generate | ||
NUM_CUSTOMERS = 100000 | ||
NUM_SARS = 50000 | ||
|
||
# Risk levels and statuses | ||
RISK_LEVELS = ["low", "medium", "high"] | ||
SAR_STATUSES = ["pending", "filed", "dismissed"] | ||
|
||
# Generate customers data | ||
def generate_customers(): | ||
customers = [] | ||
for customer_id in range(1, NUM_CUSTOMERS + 1): | ||
name = fake.name() | ||
account = str(random.randint(100000000, 9999999999)) | ||
dob = fake.date_of_birth(minimum_age=18, maximum_age=80).strftime("%Y-%m-%d") | ||
nationality = fake.country() | ||
risk_level = random.choice(RISK_LEVELS) | ||
pep_status = random.choice([True, False]) | ||
blacklisted = random.choice([True, False]) if risk_level == "high" else False | ||
customers.append([customer_id, name, account, dob, nationality, risk_level, pep_status, blacklisted]) | ||
return customers | ||
|
||
# Generate SARs data | ||
def generate_sars(customers): | ||
import random | ||
|
||
aml_reasons = [ | ||
"rapid large deposits", | ||
"multiple small cash deposits", | ||
"frequent wire transfers", | ||
"high volume of transactions", | ||
"unexplained wealth increase", | ||
"rapid movement of funds", | ||
"transactions just below reporting threshold", | ||
"frequent ATM withdrawals", | ||
"multiple high-value purchases", | ||
"inconsistent deposit amounts", | ||
"repeated failed transactions", | ||
"high-risk merchant activity", | ||
"large round-dollar transactions", | ||
"unusual payment frequency", | ||
"excessive refund requests", | ||
"transactions with sanctioned countries", | ||
"cross-border high-risk jurisdiction", | ||
"frequent offshore transactions", | ||
"use of multiple foreign accounts", | ||
"payments to unknown international entities", | ||
"transfers to high-risk tax havens", | ||
"funds originating from conflict zones", | ||
"frequent cross-border remittances", | ||
"sudden shift in transaction locations", | ||
"transactions from high-risk jurisdictions", | ||
"shell company detected", | ||
"business lacks operational activity", | ||
"high volume of cash transactions", | ||
"transactions do not match business profile", | ||
"unregistered business receiving payments", | ||
"high-risk industry involvement", | ||
"front company suspected", | ||
"misuse of business accounts", | ||
"fake invoices detected", | ||
"unusual supplier payments", | ||
"business operating with no clear income source", | ||
"charity receiving unexplained large donations", | ||
"structuring detected", | ||
"layering through multiple accounts", | ||
"use of third-party intermediaries", | ||
"funds rapidly transferred across multiple accounts", | ||
"complex transaction chain with no clear purpose", | ||
"transaction smurfing detected", | ||
"rapid movement between personal and business accounts", | ||
"multiple deposits from unknown sources", | ||
"withdrawal pattern inconsistent with income", | ||
"mismatched identity details", | ||
"account holder uses multiple aliases", | ||
"multiple accounts linked to single individual", | ||
"synthetic identity detected", | ||
"high-risk account takeover attempt", | ||
"fraudulent ID used for onboarding", | ||
"accounts opened with false information", | ||
"loan applications using stolen credentials", | ||
"identity inconsistencies across accounts", | ||
"duplicate account credentials detected", | ||
"large cash deposit without declared source", | ||
"frequent deposits at multiple locations", | ||
"cash transactions inconsistent with known income", | ||
"sudden increase in cash deposits", | ||
"high cash activity without justification", | ||
"cash deposits followed by immediate withdrawals", | ||
"large-volume cash business with no payroll activity", | ||
"real estate purchase with no clear source of funds", | ||
"luxury purchases inconsistent with income", | ||
"high-value asset flipping detected", | ||
"multiple property transactions in short period", | ||
"unregistered ownership changes", | ||
"cash purchase of high-value assets", | ||
"credit card laundering suspected", | ||
"excessive prepaid card loading", | ||
"use of multiple credit cards in short time", | ||
"digital wallet with frequent high-value deposits", | ||
"unexplained gift card transactions", | ||
"cryptocurrency exchange activity without declared source of funds", | ||
"funds linked to darknet marketplaces", | ||
"use of privacy coins", | ||
"anonymized transactions detected", | ||
"multiple IP addresses accessing account", | ||
"high volume of online gambling transactions", | ||
"sudden increase in e-commerce refunds", | ||
"use of VPN or TOR network in transactions", | ||
"payments to social media-based traders", | ||
"funds received from unrelated third parties", | ||
"transactions between linked individuals flagged", | ||
"repeated transactions with same unknown entity", | ||
"funds moved through multiple third-party accounts", | ||
"proxy account suspected", | ||
"multiple account holders accessing single device", | ||
"third-party involvement without clear business reason", | ||
"abnormal behavior detected in transaction monitoring", | ||
"PEP transaction flagged", | ||
"unusual financial product usage", | ||
"misuse of government stimulus funds", | ||
"emergency loans fraud detected", | ||
"fake employment claims linked to account", | ||
"transaction pattern linked to known fraud cases", | ||
"large transactions immediately reversed" | ||
] | ||
sars = [] | ||
for sar_id in range(1, NUM_SARS + 1): | ||
customer = random.choice(customers) | ||
customer_id = customer[0] | ||
transaction_id = random.randint(1000000000, 9999999999) | ||
reason = random.choice(aml_reasons) | ||
status = random.choice(SAR_STATUSES) | ||
filed_date = fake.date_time_this_year().strftime("%Y-%m-%d %H:%M:%S") | ||
sars.append([sar_id, customer_id, transaction_id, reason, status, filed_date]) | ||
return sars | ||
|
||
# Save to CSV | ||
def save_to_csv(filename, data, headers): | ||
with open(filename, mode='w', newline='', encoding='utf-8') as file: | ||
writer = csv.writer(file) | ||
writer.writerow(headers) | ||
writer.writerows(data) | ||
print(f"✅ Generated {filename}") | ||
|
||
# Generate data | ||
customers_data = generate_customers() | ||
sars_data = generate_sars(customers_data) | ||
|
||
# Save data to CSV files | ||
save_to_csv("customers.csv", customers_data, ["customer_id", "name", "account", "dob", "nationality", "risk_level", "pep_status", "blacklisted"]) | ||
save_to_csv("sars.csv", sars_data, ["sar_id", "customer_id", "transaction_id", "reason", "status", "filed_date"]) |
Git LFS file not shown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.