1
1
import os
2
2
import onnx
3
- import glob
4
3
import scipy .io
5
4
import numpy as np
6
5
import logging
7
6
from PIL import Image
8
7
import onnx
9
8
import onnxruntime
10
- from onnxruntime . quantization import CalibrationDataReader , create_calibrator , write_calibration_table
9
+ import time
11
10
11
+ from onnxruntime .quantization import CalibrationDataReader , create_calibrator , write_calibration_table
12
12
13
13
class ImageNetDataReader (CalibrationDataReader ):
14
14
def __init__ (self ,
@@ -126,10 +126,10 @@ def preprocess_imagenet(self, images_folder, height, width, start_index=0, size_
126
126
return: list of matrices characterizing multiple images
127
127
'''
128
128
def preprocess_images (input , channels = 3 , height = 224 , width = 224 ):
129
- image = input .resize ((width , height ), Image .ANTIALIAS )
129
+ image = input .resize ((width , height ), Image .Resampling . LANCZOS )
130
130
input_data = np .asarray (image ).astype (np .float32 )
131
131
if len (input_data .shape ) != 2 :
132
- input_data = input_data .transpose ([2 , 0 , 1 ])
132
+ input_data = input_data .transpose ([2 , 0 , 1 ])[: 3 ]
133
133
else :
134
134
input_data = np .stack ([input_data ] * 3 )
135
135
mean = np .array ([0.079 , 0.05 , 0 ]) + 0.406
@@ -217,6 +217,7 @@ def __init__(self,
217
217
self .data_reader = data_reader
218
218
self .providers = providers
219
219
self .prediction_result_list = []
220
+ self .inference_latency_list = []
220
221
self .synset_id = synset_id
221
222
222
223
def get_result (self ):
@@ -233,7 +234,12 @@ def predict(self):
233
234
inputs = self .data_reader .get_next ()
234
235
if not inputs :
235
236
break
237
+
238
+ start_ns = time .perf_counter_ns ()
236
239
output = session .run (None , inputs )
240
+ end_ns = time .perf_counter_ns ()
241
+ self .inference_latency_list .append (end_ns - start_ns )
242
+
237
243
inference_outputs_list .append (output )
238
244
self .prediction_result_list = inference_outputs_list
239
245
@@ -254,6 +260,9 @@ def evaluate(self, prediction_results):
254
260
i = i + batch_size
255
261
print ("top 1: " , self .top_k_accuracy (self .synset_id , y_prediction , k = 1 ))
256
262
print ("top 5: " , self .top_k_accuracy (self .synset_id , y_prediction , k = 5 ))
263
+ if self .inference_latency_list :
264
+ print ("average latency:" , sum (self .inference_latency_list ) / len (self .inference_latency_list ) / 1e6 , " ms" )
265
+
257
266
258
267
259
268
def convert_model_batch_to_dynamic (model_path ):
@@ -303,7 +312,7 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
303
312
4. Extract development kit to 'ILSVRC2012/devkit'. Two files in the development kit are used, 'ILSVRC2012_validation_ground_truth.txt' and 'meta.mat'.
304
313
5. Download 'synset_words.txt' from https://github.com/HoldenCaulfieldRye/caffe/blob/master/data/ilsvrc12/synset_words.txt into 'ILSVRC2012/'.
305
314
306
- Please download Resnet50 model from ONNX model zoo https://github.com/onnx/models/blob/master/ vision/classification/resnet/model/resnet50-v2-7.tar.gz
315
+ Please download Resnet50 model from ONNX model zoo https://github.com/onnx/models/raw/refs/heads/main/validated/ vision/classification/resnet/model/resnet50-v2-7.onnx
307
316
Untar the model into the workspace
308
317
'''
309
318
@@ -317,13 +326,6 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
317
326
# INT8 calibration setting
318
327
calibration_table_generation_enable = True # Enable/Disable INT8 calibration
319
328
320
- # TensorRT EP INT8 settings
321
- os .environ ["ORT_TENSORRT_FP16_ENABLE" ] = "1" # Enable FP16 precision
322
- os .environ ["ORT_TENSORRT_INT8_ENABLE" ] = "1" # Enable INT8 precision
323
- os .environ ["ORT_TENSORRT_INT8_CALIBRATION_TABLE_NAME" ] = "calibration.flatbuffers" # Calibration table name
324
- os .environ ["ORT_TENSORRT_ENGINE_CACHE_ENABLE" ] = "1" # Enable engine caching
325
- execution_provider = ["TensorrtExecutionProvider" ]
326
-
327
329
# Convert static batch to dynamic batch
328
330
[new_model_path , input_name ] = convert_model_batch_to_dynamic (model_path )
329
331
@@ -343,7 +345,7 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
343
345
model_path = augmented_model_path ,
344
346
input_name = input_name )
345
347
calibrator .collect_data (data_reader )
346
- write_calibration_table (calibrator .compute_range ())
348
+ write_calibration_table (calibrator .compute_data ())
347
349
348
350
# Run prediction in Tensorrt EP
349
351
data_reader = ImageNetDataReader (ilsvrc2012_dataset_path ,
@@ -355,7 +357,30 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
355
357
input_name = input_name )
356
358
synset_id = data_reader .get_synset_id (ilsvrc2012_dataset_path , calibration_dataset_size ,
357
359
prediction_dataset_size ) # Generate synset id
358
- evaluator = ImageClassificationEvaluator (new_model_path , synset_id , data_reader , providers = execution_provider )
360
+
361
+ # providers = ["CUDAExecutionProvider"]
362
+ # -----H100-----
363
+ # top 1: 0.7419183673469387
364
+ # top 5: 0.9174897959183673
365
+ # average latency: 5.676715467755102 ms
366
+
367
+ # providers = [('TensorrtExecutionProvider', {"trt_fp16_enable": True})]
368
+ # -----H100-----
369
+ # top 1: 0.7421020408163266
370
+ # top 5: 0.917530612244898
371
+ # average latency: 27.816876598367347 ms
372
+
373
+ providers = [('TensorrtExecutionProvider' , {
374
+ "trt_fp16_enable" : True ,
375
+ "trt_int8_enable" : True ,
376
+ "trt_int8_calibration_table_name" : "calibration.flatbuffers" ,
377
+ "trt_engine_cache_enable" : True })]
378
+ # -----H100-----
379
+ # top 1: 0.7101020408163266
380
+ # top 5: 0.898061224489796
381
+ # average latency: 2.2716067718367348 ms
382
+
383
+ evaluator = ImageClassificationEvaluator (new_model_path , synset_id , data_reader , providers = providers )
359
384
evaluator .predict ()
360
385
result = evaluator .get_result ()
361
386
evaluator .evaluate (result )
0 commit comments