Skip to content

Commit 26cd840

Browse files
Ci quality workflows (#1423)
* Add inference tests * Clean up * Rename test graph file * Add readme for tests * Separate server fixture * test file name change * Assert images are generated * Clean up comments * Add __init__.py so tests can run with command line `pytest` * Fix command line args for pytest * Loop all samplers/schedulers in test_inference.py * Ci quality workflows compare (#1) * Add image comparison tests * Comparison tests do not pass with empty metadata * Ensure tests are run in correct order * Save image files with test name * Update tests readme * Reduce step counts in tests to ~halve runtime * Ci quality workflows build (#2) * Add build test github workflow
1 parent b92bf81 commit 26cd840

10 files changed

+728
-0
lines changed

.github/workflows/test-build.yml

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Build package
2+
3+
#
4+
# This workflow is a test of the python package build.
5+
# Install Python dependencies across different Python versions.
6+
#
7+
8+
on:
9+
push:
10+
paths:
11+
- "requirements.txt"
12+
- ".github/workflows/test-build.yml"
13+
14+
jobs:
15+
build:
16+
name: Build Test
17+
runs-on: ubuntu-latest
18+
strategy:
19+
fail-fast: false
20+
matrix:
21+
python-version: ["3.8", "3.9", "3.10", "3.11"]
22+
steps:
23+
- uses: actions/checkout@v2
24+
- name: Set up Python ${{ matrix.python-version }}
25+
uses: actions/setup-python@v2
26+
with:
27+
python-version: ${{ matrix.python-version }}
28+
- name: Install dependencies
29+
run: |
30+
python -m pip install --upgrade pip
31+
pip install -r requirements.txt

pytest.ini

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[pytest]
2+
markers =
3+
inference: mark as inference test (deselect with '-m "not inference"')
4+
testpaths = tests
5+
addopts = -s

tests/README.md

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Automated Testing
2+
3+
## Running tests locally
4+
5+
Additional requirements for running tests:
6+
```
7+
pip install pytest
8+
pip install websocket-client==1.6.1
9+
opencv-python==4.6.0.66
10+
scikit-image==0.21.0
11+
```
12+
Run inference tests:
13+
```
14+
pytest tests/inference
15+
```
16+
17+
## Quality regression test
18+
Compares images in 2 directories to ensure they are the same
19+
20+
1) Run an inference test to save a directory of "ground truth" images
21+
```
22+
pytest tests/inference --output_dir tests/inference/baseline
23+
```
24+
2) Make code edits
25+
26+
3) Run inference and quality comparison tests
27+
```
28+
pytest
29+
```

tests/__init__.py

Whitespace-only changes.

tests/compare/conftest.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import os
2+
import pytest
3+
4+
# Command line arguments for pytest
5+
def pytest_addoption(parser):
6+
parser.addoption('--baseline_dir', action="store", default='tests/inference/baseline', help='Directory for ground-truth images')
7+
parser.addoption('--test_dir', action="store", default='tests/inference/samples', help='Directory for images to test')
8+
parser.addoption('--metrics_file', action="store", default='tests/metrics.md', help='Output file for metrics')
9+
parser.addoption('--img_output_dir', action="store", default='tests/compare/samples', help='Output directory for diff metric images')
10+
11+
# This initializes args at the beginning of the test session
12+
@pytest.fixture(scope="session", autouse=True)
13+
def args_pytest(pytestconfig):
14+
args = {}
15+
args['baseline_dir'] = pytestconfig.getoption('baseline_dir')
16+
args['test_dir'] = pytestconfig.getoption('test_dir')
17+
args['metrics_file'] = pytestconfig.getoption('metrics_file')
18+
args['img_output_dir'] = pytestconfig.getoption('img_output_dir')
19+
20+
# Initialize metrics file
21+
with open(args['metrics_file'], 'a') as f:
22+
# if file is empty, write header
23+
if os.stat(args['metrics_file']).st_size == 0:
24+
f.write("| date | run | file | status | value | \n")
25+
f.write("| --- | --- | --- | --- | --- | \n")
26+
27+
return args
28+
29+
30+
def gather_file_basenames(directory: str):
31+
files = []
32+
for file in os.listdir(directory):
33+
if file.endswith(".png"):
34+
files.append(file)
35+
return files
36+
37+
# Creates the list of baseline file names to use as a fixture
38+
def pytest_generate_tests(metafunc):
39+
if "baseline_fname" in metafunc.fixturenames:
40+
baseline_fnames = gather_file_basenames(metafunc.config.getoption("baseline_dir"))
41+
metafunc.parametrize("baseline_fname", baseline_fnames)

tests/compare/test_quality.py

+195
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
import datetime
2+
import numpy as np
3+
import os
4+
from PIL import Image
5+
import pytest
6+
from pytest import fixture
7+
from typing import Tuple, List
8+
9+
from cv2 import imread, cvtColor, COLOR_BGR2RGB
10+
from skimage.metrics import structural_similarity as ssim
11+
12+
13+
"""
14+
This test suite compares images in 2 directories by file name
15+
The directories are specified by the command line arguments --baseline_dir and --test_dir
16+
17+
"""
18+
# ssim: Structural Similarity Index
19+
# Returns a tuple of (ssim, diff_image)
20+
def ssim_score(img0: np.ndarray, img1: np.ndarray) -> Tuple[float, np.ndarray]:
21+
score, diff = ssim(img0, img1, channel_axis=-1, full=True)
22+
# rescale the difference image to 0-255 range
23+
diff = (diff * 255).astype("uint8")
24+
return score, diff
25+
26+
# Metrics must return a tuple of (score, diff_image)
27+
METRICS = {"ssim": ssim_score}
28+
METRICS_PASS_THRESHOLD = {"ssim": 0.95}
29+
30+
31+
class TestCompareImageMetrics:
32+
@fixture(scope="class")
33+
def test_file_names(self, args_pytest):
34+
test_dir = args_pytest['test_dir']
35+
fnames = self.gather_file_basenames(test_dir)
36+
yield fnames
37+
del fnames
38+
39+
@fixture(scope="class", autouse=True)
40+
def teardown(self, args_pytest):
41+
yield
42+
# Runs after all tests are complete
43+
# Aggregate output files into a grid of images
44+
baseline_dir = args_pytest['baseline_dir']
45+
test_dir = args_pytest['test_dir']
46+
img_output_dir = args_pytest['img_output_dir']
47+
metrics_file = args_pytest['metrics_file']
48+
49+
grid_dir = os.path.join(img_output_dir, "grid")
50+
os.makedirs(grid_dir, exist_ok=True)
51+
52+
for metric_dir in METRICS.keys():
53+
metric_path = os.path.join(img_output_dir, metric_dir)
54+
for file in os.listdir(metric_path):
55+
if file.endswith(".png"):
56+
score = self.lookup_score_from_fname(file, metrics_file)
57+
image_file_list = []
58+
image_file_list.append([
59+
os.path.join(baseline_dir, file),
60+
os.path.join(test_dir, file),
61+
os.path.join(metric_path, file)
62+
])
63+
# Create grid
64+
image_list = [[Image.open(file) for file in files] for files in image_file_list]
65+
grid = self.image_grid(image_list)
66+
grid.save(os.path.join(grid_dir, f"{metric_dir}_{score:.3f}_{file}"))
67+
68+
# Tests run for each baseline file name
69+
@fixture()
70+
def fname(self, baseline_fname):
71+
yield baseline_fname
72+
del baseline_fname
73+
74+
def test_directories_not_empty(self, args_pytest):
75+
baseline_dir = args_pytest['baseline_dir']
76+
test_dir = args_pytest['test_dir']
77+
assert len(os.listdir(baseline_dir)) != 0, f"Baseline directory {baseline_dir} is empty"
78+
assert len(os.listdir(test_dir)) != 0, f"Test directory {test_dir} is empty"
79+
80+
def test_dir_has_all_matching_metadata(self, fname, test_file_names, args_pytest):
81+
# Check that all files in baseline_dir have a file in test_dir with matching metadata
82+
baseline_file_path = os.path.join(args_pytest['baseline_dir'], fname)
83+
file_paths = [os.path.join(args_pytest['test_dir'], f) for f in test_file_names]
84+
file_match = self.find_file_match(baseline_file_path, file_paths)
85+
assert file_match is not None, f"Could not find a file in {args_pytest['test_dir']} with matching metadata to {baseline_file_path}"
86+
87+
# For a baseline image file, finds the corresponding file name in test_dir and
88+
# compares the images using the metrics in METRICS
89+
@pytest.mark.parametrize("metric", METRICS.keys())
90+
def test_pipeline_compare(
91+
self,
92+
args_pytest,
93+
fname,
94+
test_file_names,
95+
metric,
96+
):
97+
baseline_dir = args_pytest['baseline_dir']
98+
test_dir = args_pytest['test_dir']
99+
metrics_output_file = args_pytest['metrics_file']
100+
img_output_dir = args_pytest['img_output_dir']
101+
102+
baseline_file_path = os.path.join(baseline_dir, fname)
103+
104+
# Find file match
105+
file_paths = [os.path.join(test_dir, f) for f in test_file_names]
106+
test_file = self.find_file_match(baseline_file_path, file_paths)
107+
108+
# Run metrics
109+
sample_baseline = self.read_img(baseline_file_path)
110+
sample_secondary = self.read_img(test_file)
111+
112+
score, metric_img = METRICS[metric](sample_baseline, sample_secondary)
113+
metric_status = score > METRICS_PASS_THRESHOLD[metric]
114+
115+
# Save metric values
116+
with open(metrics_output_file, 'a') as f:
117+
run_info = os.path.splitext(fname)[0]
118+
metric_status_str = "PASS ✅" if metric_status else "FAIL ❌"
119+
date_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
120+
f.write(f"| {date_str} | {run_info} | {metric} | {metric_status_str} | {score} | \n")
121+
122+
# Save metric image
123+
metric_img_dir = os.path.join(img_output_dir, metric)
124+
os.makedirs(metric_img_dir, exist_ok=True)
125+
output_filename = f'{fname}'
126+
Image.fromarray(metric_img).save(os.path.join(metric_img_dir, output_filename))
127+
128+
assert score > METRICS_PASS_THRESHOLD[metric]
129+
130+
def read_img(self, filename: str) -> np.ndarray:
131+
cvImg = imread(filename)
132+
cvImg = cvtColor(cvImg, COLOR_BGR2RGB)
133+
return cvImg
134+
135+
def image_grid(self, img_list: list[list[Image.Image]]):
136+
# imgs is a 2D list of images
137+
# Assumes the input images are a rectangular grid of equal sized images
138+
rows = len(img_list)
139+
cols = len(img_list[0])
140+
141+
w, h = img_list[0][0].size
142+
grid = Image.new('RGB', size=(cols*w, rows*h))
143+
144+
for i, row in enumerate(img_list):
145+
for j, img in enumerate(row):
146+
grid.paste(img, box=(j*w, i*h))
147+
return grid
148+
149+
def lookup_score_from_fname(self,
150+
fname: str,
151+
metrics_output_file: str
152+
) -> float:
153+
fname_basestr = os.path.splitext(fname)[0]
154+
with open(metrics_output_file, 'r') as f:
155+
for line in f:
156+
if fname_basestr in line:
157+
score = float(line.split('|')[5])
158+
return score
159+
raise ValueError(f"Could not find score for {fname} in {metrics_output_file}")
160+
161+
def gather_file_basenames(self, directory: str):
162+
files = []
163+
for file in os.listdir(directory):
164+
if file.endswith(".png"):
165+
files.append(file)
166+
return files
167+
168+
def read_file_prompt(self, fname:str) -> str:
169+
# Read prompt from image file metadata
170+
img = Image.open(fname)
171+
img.load()
172+
return img.info['prompt']
173+
174+
def find_file_match(self, baseline_file: str, file_paths: List[str]):
175+
# Find a file in file_paths with matching metadata to baseline_file
176+
baseline_prompt = self.read_file_prompt(baseline_file)
177+
178+
# Do not match empty prompts
179+
if baseline_prompt is None or baseline_prompt == "":
180+
return None
181+
182+
# Find file match
183+
# Reorder test_file_names so that the file with matching name is first
184+
# This is an optimization because matching file names are more likely
185+
# to have matching metadata if they were generated with the same script
186+
basename = os.path.basename(baseline_file)
187+
file_path_basenames = [os.path.basename(f) for f in file_paths]
188+
if basename in file_path_basenames:
189+
match_index = file_path_basenames.index(basename)
190+
file_paths.insert(0, file_paths.pop(match_index))
191+
192+
for f in file_paths:
193+
test_file_prompt = self.read_file_prompt(f)
194+
if baseline_prompt == test_file_prompt:
195+
return f

tests/conftest.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import os
2+
import pytest
3+
4+
# Command line arguments for pytest
5+
def pytest_addoption(parser):
6+
parser.addoption('--output_dir', action="store", default='tests/inference/samples', help='Output directory for generated images')
7+
parser.addoption("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0", help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)")
8+
parser.addoption("--port", type=int, default=8188, help="Set the listen port.")
9+
10+
# This initializes args at the beginning of the test session
11+
@pytest.fixture(scope="session", autouse=True)
12+
def args_pytest(pytestconfig):
13+
args = {}
14+
args['output_dir'] = pytestconfig.getoption('output_dir')
15+
args['listen'] = pytestconfig.getoption('listen')
16+
args['port'] = pytestconfig.getoption('port')
17+
18+
os.makedirs(args['output_dir'], exist_ok=True)
19+
20+
return args
21+
22+
def pytest_collection_modifyitems(items):
23+
# Modifies items so tests run in the correct order
24+
25+
LAST_TESTS = ['test_quality']
26+
27+
# Move the last items to the end
28+
last_items = []
29+
for test_name in LAST_TESTS:
30+
for item in items.copy():
31+
print(item.module.__name__, item)
32+
if item.module.__name__ == test_name:
33+
last_items.append(item)
34+
items.remove(item)
35+
36+
items.extend(last_items)

tests/inference/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)