Skip to content

Commit e4a3452

Browse files
committed
update trt example
1 parent afba506 commit e4a3452

File tree

1 file changed

+39
-14
lines changed

1 file changed

+39
-14
lines changed

quantization/image_classification/trt/resnet50/e2e_tensorrt_resnet_example.py

+39-14
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import os
22
import onnx
3-
import glob
43
import scipy.io
54
import numpy as np
65
import logging
76
from PIL import Image
87
import onnx
98
import onnxruntime
10-
from onnxruntime.quantization import CalibrationDataReader, create_calibrator, write_calibration_table
9+
import time
1110

11+
from onnxruntime.quantization import CalibrationDataReader, create_calibrator, write_calibration_table
1212

1313
class ImageNetDataReader(CalibrationDataReader):
1414
def __init__(self,
@@ -126,10 +126,10 @@ def preprocess_imagenet(self, images_folder, height, width, start_index=0, size_
126126
return: list of matrices characterizing multiple images
127127
'''
128128
def preprocess_images(input, channels=3, height=224, width=224):
129-
image = input.resize((width, height), Image.ANTIALIAS)
129+
image = input.resize((width, height), Image.Resampling.LANCZOS)
130130
input_data = np.asarray(image).astype(np.float32)
131131
if len(input_data.shape) != 2:
132-
input_data = input_data.transpose([2, 0, 1])
132+
input_data = input_data.transpose([2, 0, 1])[:3]
133133
else:
134134
input_data = np.stack([input_data] * 3)
135135
mean = np.array([0.079, 0.05, 0]) + 0.406
@@ -217,6 +217,7 @@ def __init__(self,
217217
self.data_reader = data_reader
218218
self.providers = providers
219219
self.prediction_result_list = []
220+
self.inference_latency_list = []
220221
self.synset_id = synset_id
221222

222223
def get_result(self):
@@ -233,7 +234,12 @@ def predict(self):
233234
inputs = self.data_reader.get_next()
234235
if not inputs:
235236
break
237+
238+
start_ns = time.perf_counter_ns()
236239
output = session.run(None, inputs)
240+
end_ns = time.perf_counter_ns()
241+
self.inference_latency_list.append(end_ns - start_ns)
242+
237243
inference_outputs_list.append(output)
238244
self.prediction_result_list = inference_outputs_list
239245

@@ -254,6 +260,9 @@ def evaluate(self, prediction_results):
254260
i = i + batch_size
255261
print("top 1: ", self.top_k_accuracy(self.synset_id, y_prediction, k=1))
256262
print("top 5: ", self.top_k_accuracy(self.synset_id, y_prediction, k=5))
263+
if self.inference_latency_list:
264+
print("average latency:", sum(self.inference_latency_list) / len(self.inference_latency_list) / 1e6, " ms")
265+
257266

258267

259268
def convert_model_batch_to_dynamic(model_path):
@@ -303,7 +312,7 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
303312
4. Extract development kit to 'ILSVRC2012/devkit'. Two files in the development kit are used, 'ILSVRC2012_validation_ground_truth.txt' and 'meta.mat'.
304313
5. Download 'synset_words.txt' from https://github.com/HoldenCaulfieldRye/caffe/blob/master/data/ilsvrc12/synset_words.txt into 'ILSVRC2012/'.
305314
306-
Please download Resnet50 model from ONNX model zoo https://github.com/onnx/models/blob/master/vision/classification/resnet/model/resnet50-v2-7.tar.gz
315+
Please download Resnet50 model from ONNX model zoo https://github.com/onnx/models/raw/refs/heads/main/validated/vision/classification/resnet/model/resnet50-v2-7.onnx
307316
Untar the model into the workspace
308317
'''
309318

@@ -317,13 +326,6 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
317326
# INT8 calibration setting
318327
calibration_table_generation_enable = True # Enable/Disable INT8 calibration
319328

320-
# TensorRT EP INT8 settings
321-
os.environ["ORT_TENSORRT_FP16_ENABLE"] = "1" # Enable FP16 precision
322-
os.environ["ORT_TENSORRT_INT8_ENABLE"] = "1" # Enable INT8 precision
323-
os.environ["ORT_TENSORRT_INT8_CALIBRATION_TABLE_NAME"] = "calibration.flatbuffers" # Calibration table name
324-
os.environ["ORT_TENSORRT_ENGINE_CACHE_ENABLE"] = "1" # Enable engine caching
325-
execution_provider = ["TensorrtExecutionProvider"]
326-
327329
# Convert static batch to dynamic batch
328330
[new_model_path, input_name] = convert_model_batch_to_dynamic(model_path)
329331

@@ -343,7 +345,7 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
343345
model_path=augmented_model_path,
344346
input_name=input_name)
345347
calibrator.collect_data(data_reader)
346-
write_calibration_table(calibrator.compute_range())
348+
write_calibration_table(calibrator.compute_data())
347349

348350
# Run prediction in Tensorrt EP
349351
data_reader = ImageNetDataReader(ilsvrc2012_dataset_path,
@@ -355,7 +357,30 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
355357
input_name=input_name)
356358
synset_id = data_reader.get_synset_id(ilsvrc2012_dataset_path, calibration_dataset_size,
357359
prediction_dataset_size) # Generate synset id
358-
evaluator = ImageClassificationEvaluator(new_model_path, synset_id, data_reader, providers=execution_provider)
360+
361+
# providers = ["CUDAExecutionProvider"]
362+
# -----H100-----
363+
# top 1: 0.7419183673469387
364+
# top 5: 0.9174897959183673
365+
# average latency: 5.676715467755102 ms
366+
367+
# providers = [('TensorrtExecutionProvider', {"trt_fp16_enable": True})]
368+
# -----H100-----
369+
# top 1: 0.7421020408163266
370+
# top 5: 0.917530612244898
371+
# average latency: 27.816876598367347 ms
372+
373+
providers = [('TensorrtExecutionProvider', {
374+
"trt_fp16_enable": True,
375+
"trt_int8_enable": True,
376+
"trt_int8_calibration_table_name": "calibration.flatbuffers",
377+
"trt_engine_cache_enable": True})]
378+
# -----H100-----
379+
# top 1: 0.7101020408163266
380+
# top 5: 0.898061224489796
381+
# average latency: 2.2716067718367348 ms
382+
383+
evaluator = ImageClassificationEvaluator(new_model_path, synset_id, data_reader, providers=providers)
359384
evaluator.predict()
360385
result = evaluator.get_result()
361386
evaluator.evaluate(result)

0 commit comments

Comments
 (0)