Initial commit

Signed-off-by: Luke Hinds <[email protected]>
StacklokLabs · Feb 22, 2024 · b747d3c · b747d3c
commit b747d3c
Show file tree

Hide file tree

Showing 7 changed files with 203 additions and 0 deletions.
diff --git a/.github/workflows/train_and_inference.yml b/.github/workflows/train_and_inference.yml
@@ -0,0 +1,33 @@
+name: Train and Inference
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  train_and_inference:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Generate Dataset
+      run: python generate_dataset.py
+
+    - name: Train Model
+      run: python train_model.py
+
+    - name: Run Inference
+      run: python run_inference.py
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,30 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+
+# PyTorch
+*.pth
+
+# macOS
+.DS_Store
+
+# Virtual Environment
+venv/
+env/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# Visual Studio Code
+.vscode/
+
+# Sublime Text
+*.sublime-workspace
+
+# IntelliJ
+.idea/
+
+# Training / inference files
+*.pth
+*.npz
diff --git a/README.md b/README.md
@@ -0,0 +1,38 @@
+# Simple PyTorch Model Pipeline
+
+This is a simple PyTorch model pipeline that can be used to train and evaluate a model. The pipeline is designed to be used with a simple feedforward neural network, but can be easily adapted to other models.
+
+This is for experimental around model provenance and integrity.
+
+A simple github action is used to run the pipeline.
+
+## Usage
+
+Set up a virtualenv and install the requirements:
+
+```bash
+virtualenv -p python3 venv
+
+source venv/bin/activate
+
+pip install -r requirements.txt
+```
+
+Generate the training set:
+
+```bash
+python generate_dataset.py
+```
+
+Train the model:
+
+```bash
+python train_model.py
+```
+
+Inference:
+
+```bash
+python run_inference.py
+```
+
diff --git a/generate_dataset.py b/generate_dataset.py
@@ -0,0 +1,16 @@
+import numpy as np
+from sklearn.datasets import make_classification
+
+# Generate a synthetic dataset
+X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, weights=[0.9, 0.1], random_state=42)
+
+# Save the dataset to a file
+np.savez('dataset.npz', X=X, y=y)
+
+# Print information about the generated dataset
+print("Synthetic dataset generated successfully!")
+print("Number of samples:", X.shape[0])
+print("Number of features:", X.shape[1])
+print("Class distribution:")
+print("Class 0:", np.sum(y == 0))
+print("Class 1:", np.sum(y == 1))
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,14 @@
+filelock==3.13.1
+fsspec==2024.2.0
+Jinja2==3.1.3
+joblib==1.3.2
+MarkupSafe==2.1.5
+mpmath==1.3.0
+networkx==3.2.1
+numpy==1.26.4
+scikit-learn==1.4.1.post1
+scipy==1.12.0
+sympy==1.12
+threadpoolctl==3.3.0
+torch==2.2.0
+typing_extensions==4.9.0
diff --git a/run_inference.py b/run_inference.py
@@ -0,0 +1,20 @@
+import torch
+import numpy as np
+from train_model import SimpleNN  # Assuming SimpleNN is defined in model.py
+
+# Load the saved model parameters
+model = SimpleNN(input_dim=20)  # Assuming 20 features
+model.load_state_dict(torch.load('model.pth'))
+model.eval()  # Set the model to evaluation mode
+
+# Define a function to make predictions
+def predict(input_data):
+    input_tensor = torch.tensor(input_data, dtype=torch.float32)
+    with torch.no_grad():
+        output = model(input_tensor)
+    return output.numpy().squeeze()
+
+# Example usage:
+new_data = np.random.randn(10, 20)  # Example: 10 samples with 20 features each
+predictions = predict(new_data)
+print("Predictions:", predictions)
diff --git a/train_model.py b/train_model.py
@@ -0,0 +1,52 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+
+# Define a simple feedforward neural network using PyTorch
+class SimpleNN(nn.Module):
+    def __init__(self, input_dim):
+        super(SimpleNN, self).__init__()
+        self.fc = nn.Linear(input_dim, 1)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        x = self.fc(x)
+        x = self.sigmoid(x)
+        return x
+
+# Load the dataset
+dataset = np.load('dataset.npz')
+X_train, y_train = dataset['X'], dataset['y']
+
+# Reshape the target array to be a column vector
+y_train = y_train.reshape(-1, 1)
+
+# Initialize the model
+model = SimpleNN(input_dim=X_train.shape[1])
+
+# Define loss function and optimizer
+criterion = nn.BCEWithLogitsLoss()
+optimizer = optim.SGD(model.parameters(), lr=0.1)
+
+# Convert data to PyTorch tensors
+X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
+y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
+
+# Training loop
+epochs = 100
+for epoch in range(epochs):
+    # Forward pass
+    outputs = model(X_train_tensor)
+    loss = criterion(outputs, y_train_tensor)
+
+    # Backward pass and optimization
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+
+    if epoch % 10 == 0:
+        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")
+
+# Save the trained model parameters
+torch.save(model.state_dict(), 'model.pth')