diff --git a/.github/workflows/train_and_inference.yml b/.github/workflows/train_and_inference.yml new file mode 100644 index 0000000..b20dbf9 --- /dev/null +++ b/.github/workflows/train_and_inference.yml @@ -0,0 +1,33 @@ +name: Train and Inference + +on: + push: + branches: + - main + +jobs: + train_and_inference: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Generate Dataset + run: python generate_dataset.py + + - name: Train Model + run: python train_model.py + + - name: Run Inference + run: python run_inference.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9b511e8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,30 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class + +# PyTorch +*.pth + +# macOS +.DS_Store + +# Virtual Environment +venv/ +env/ + +# Jupyter Notebook +.ipynb_checkpoints + +# Visual Studio Code +.vscode/ + +# Sublime Text +*.sublime-workspace + +# IntelliJ +.idea/ + +# Training / inference files +*.pth +*.npz \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..9015b62 --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +# Simple PyTorch Model Pipeline + +This is a simple PyTorch model pipeline that can be used to train and evaluate a model. The pipeline is designed to be used with a simple feedforward neural network, but can be easily adapted to other models. + +This is for experimental around model provenance and integrity. + +A simple github action is used to run the pipeline. + +## Usage + +Set up a virtualenv and install the requirements: + +```bash +virtualenv -p python3 venv + +source venv/bin/activate + +pip install -r requirements.txt +``` + +Generate the training set: + +```bash +python generate_dataset.py +``` + +Train the model: + +```bash +python train_model.py +``` + +Inference: + +```bash +python run_inference.py +``` + diff --git a/generate_dataset.py b/generate_dataset.py new file mode 100644 index 0000000..c2dda3d --- /dev/null +++ b/generate_dataset.py @@ -0,0 +1,16 @@ +import numpy as np +from sklearn.datasets import make_classification + +# Generate a synthetic dataset +X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, weights=[0.9, 0.1], random_state=42) + +# Save the dataset to a file +np.savez('dataset.npz', X=X, y=y) + +# Print information about the generated dataset +print("Synthetic dataset generated successfully!") +print("Number of samples:", X.shape[0]) +print("Number of features:", X.shape[1]) +print("Class distribution:") +print("Class 0:", np.sum(y == 0)) +print("Class 1:", np.sum(y == 1)) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6b6b8ae --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +filelock==3.13.1 +fsspec==2024.2.0 +Jinja2==3.1.3 +joblib==1.3.2 +MarkupSafe==2.1.5 +mpmath==1.3.0 +networkx==3.2.1 +numpy==1.26.4 +scikit-learn==1.4.1.post1 +scipy==1.12.0 +sympy==1.12 +threadpoolctl==3.3.0 +torch==2.2.0 +typing_extensions==4.9.0 diff --git a/run_inference.py b/run_inference.py new file mode 100644 index 0000000..d960e23 --- /dev/null +++ b/run_inference.py @@ -0,0 +1,20 @@ +import torch +import numpy as np +from train_model import SimpleNN # Assuming SimpleNN is defined in model.py + +# Load the saved model parameters +model = SimpleNN(input_dim=20) # Assuming 20 features +model.load_state_dict(torch.load('model.pth')) +model.eval() # Set the model to evaluation mode + +# Define a function to make predictions +def predict(input_data): + input_tensor = torch.tensor(input_data, dtype=torch.float32) + with torch.no_grad(): + output = model(input_tensor) + return output.numpy().squeeze() + +# Example usage: +new_data = np.random.randn(10, 20) # Example: 10 samples with 20 features each +predictions = predict(new_data) +print("Predictions:", predictions) diff --git a/train_model.py b/train_model.py new file mode 100644 index 0000000..51d4d3d --- /dev/null +++ b/train_model.py @@ -0,0 +1,52 @@ +import torch +import torch.nn as nn +import torch.optim as optim +import numpy as np + +# Define a simple feedforward neural network using PyTorch +class SimpleNN(nn.Module): + def __init__(self, input_dim): + super(SimpleNN, self).__init__() + self.fc = nn.Linear(input_dim, 1) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + x = self.fc(x) + x = self.sigmoid(x) + return x + +# Load the dataset +dataset = np.load('dataset.npz') +X_train, y_train = dataset['X'], dataset['y'] + +# Reshape the target array to be a column vector +y_train = y_train.reshape(-1, 1) + +# Initialize the model +model = SimpleNN(input_dim=X_train.shape[1]) + +# Define loss function and optimizer +criterion = nn.BCEWithLogitsLoss() +optimizer = optim.SGD(model.parameters(), lr=0.1) + +# Convert data to PyTorch tensors +X_train_tensor = torch.tensor(X_train, dtype=torch.float32) +y_train_tensor = torch.tensor(y_train, dtype=torch.float32) + +# Training loop +epochs = 100 +for epoch in range(epochs): + # Forward pass + outputs = model(X_train_tensor) + loss = criterion(outputs, y_train_tensor) + + # Backward pass and optimization + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if epoch % 10 == 0: + print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}") + +# Save the trained model parameters +torch.save(model.state_dict(), 'model.pth')