Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: triton configuration refactoring #91

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 41 additions & 53 deletions src/transformer_deploy/triton/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@
Generate Nvidia Triton server configuration files.
"""


import os
import shutil
from abc import ABC
from enum import Enum
from pathlib import Path
from typing import List, Optional

import tritonclient.grpc.model_config_pb2 as model_config
from google.protobuf import text_format
from transformers import PretrainedConfig, PreTrainedTokenizer


Expand All @@ -37,7 +38,6 @@ class EngineType(Enum):


class Configuration(ABC):

engine_type: Optional[EngineType]
python_code: Optional[str]

Expand Down Expand Up @@ -65,28 +65,39 @@ def __init__(
self.nb_instance = nb_instance
self.tensor_input_names = tensor_input_names
self.working_dir: Path = Path(working_directory)
self.device_kind = "KIND_GPU" if device == "cuda" else "KIND_CPU"
self.device_kind = (
model_config.ModelInstanceGroup.Kind.KIND_GPU
if device == "cuda"
else model_config.ModelInstanceGroup.Kind.KIND_CPU
)

@property
def python_folder_name(self) -> str:
raise Exception("to implement")

def _get_tokens(self) -> str:
def _get_tokens(self) -> List[str]:
"""
Generate input tensor configuration
:return: input tensor configuration string
"""
result: List[str] = list()
for input_name in self.tensor_input_names:
result.append(
model_config.ModelInput(name=input_name, data_type=model_config.DataType.TYPE_INT32, dims=[-1, -1])
)
return result

def _get_tokens_output(self) -> List[str]:
"""
Generate input tensor configuration
:return: input tensor configuration string
"""
result: List[str] = list()
for input_name in self.tensor_input_names:
text = f"""
{{
name: "{input_name}"
data_type: TYPE_INT32
dims: [-1, -1]
}}
""".strip()
result.append(text)
return ",\n".join(result)
result.append(
model_config.ModelOutput(name=input_name, data_type=model_config.DataType.TYPE_INT32, dims=[-1, -1])
)
return result

@property
def model_name(self) -> str:
Expand All @@ -110,56 +121,33 @@ def inference_platform(self) -> str:
else:
raise Exception(f"unknown model type: {self.engine_type}")

def _instance_group(self) -> str:
"""
Generate instance configuration.
:return: instance configuration
"""
return f"""
instance_group [
{{
count: {self.nb_instance}
kind: {self.device_kind}
}}
]
""".strip()

@staticmethod
def _get_header(name: str, platform: Optional[str] = None, backend: Optional[str] = None):
def _get_model_base(name: str, platform: Optional[str] = None, backend: Optional[str] = None):
assert platform is not None or backend is not None
text = f"""
name: "{name}"
max_batch_size: 0
""".strip()
config = model_config.ModelConfig(name=name, max_batch_size=0)
if platform is not None:
text += f'\nplatform: "{platform}"'
config.platform = platform
if backend is not None:
text += f'\nbackend: "{backend}"'
return text
config.backend = backend
return config

def get_model_conf(self) -> str:
"""
Generate model configuration.
:return: model configuration
"""
return f"""
name: "{self.model_folder_name}"
max_batch_size: 0
platform: "{self.inference_platform}"
default_model_filename: "model.bin"

input [
{self._get_tokens()}
]

output {{
name: "output"
data_type: TYPE_FP32
dims: {str(self.dim_output)}
}}

{self._instance_group()}
""".strip()
config = model_config.ModelConfig(
name=self.model_folder_name,
max_batch_size=0,
platform=self.inference_platform,
default_model_filename="model.bin",
input=self._get_tokens(),
output=[
model_config.ModelOutput(name="output", data_type=model_config.DataType.TYPE_FP32, dims=self.dim_output)
],
instance_group=[model_config.ModelInstanceGroup(count=self.nb_instance, kind=self.device_kind)],
)
return text_format.MessageToString(config)

def create_configs(
self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
Expand Down
40 changes: 12 additions & 28 deletions src/transformer_deploy/triton/configuration_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import inspect
from pathlib import Path

import tritonclient.grpc.model_config_pb2 as model_config
from google.protobuf import text_format
from transformers import PretrainedConfig, PreTrainedTokenizer

from transformer_deploy.triton.configuration import Configuration, EngineType
Expand All @@ -38,34 +40,16 @@ def get_generation_conf(self) -> str:
Generate sequence configuration.
:return: Generate sequence configuration
"""
return f"""
{self._get_header(name=self.python_folder_name, backend="python")}

input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]

output [
{{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]

{self._instance_group()}

parameters: {{
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {{
string_value:"no"
}}
}}
""".strip()
config = self._get_model_base(name=self.python_folder_name, backend="python")
config.input.append(
model_config.ModelInput(name="TEXT", data_type=model_config.DataType.TYPE_STRING, dims=[-1])
)
config.output.append(
model_config.ModelOutput(name="output", data_type=model_config.DataType.TYPE_STRING, dims=[-1])
)
config.instance_group.append(model_config.ModelInstanceGroup(count=self.nb_instance, kind=self.device_kind))
config.parameters["FORCE_CPU_ONLY_INPUT_TENSORS"].string_value = "no"
return text_format.MessageToString(config)

def create_configs(
self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
Expand Down
107 changes: 36 additions & 71 deletions src/transformer_deploy/triton/configuration_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import inspect
from pathlib import Path

import tritonclient.grpc.model_config_pb2 as model_config
from google.protobuf import text_format
from transformers import PretrainedConfig, PreTrainedTokenizer

from transformer_deploy.triton.configuration import Configuration, EngineType
Expand All @@ -38,85 +40,48 @@ def get_tokenize_conf(self) -> str:
Generate tokenization step configuration.
:return: tokenization step configuration
"""
return f"""
{self._get_header(name=self.python_folder_name, backend="python")}

input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]

output [
{self._get_tokens()}
]

{self._instance_group()}
""".strip()
config = self._get_model_base(name=self.python_folder_name, backend="python")
config.input.append(
model_config.ModelInput(name="TEXT", data_type=model_config.DataType.TYPE_STRING, dims=[-1])
)
config.output.extend(self._get_tokens_output())
config.instance_group.append(model_config.ModelInstanceGroup(count=self.nb_instance, kind=self.device_kind))
return text_format.MessageToString(config)

def get_inference_conf(self) -> str:
"""
Generate inference step configuration.
:return: inference step configuration
"""
output_map_blocks = list()
mapping_keys = {}
for input_name in self.tensor_input_names:
output_map_text = f"""
{{
key: "{input_name}"
value: "{input_name}"
}}
""".strip()
output_map_blocks.append(output_map_text)

mapping_keys = ",\n".join(output_map_blocks)

return f"""
{self._get_header(name=self.inference_folder_name, platform="ensemble")}

input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]

output {{
name: "output"
data_type: TYPE_FP32
dims: {str(self.dim_output)}
}}

ensemble_scheduling {{
step [
{{
model_name: "{self.python_folder_name}"
model_version: -1
input_map {{
key: "TEXT"
value: "TEXT"
}}
output_map [
{mapping_keys}
]
}},
{{
model_name: "{self.model_folder_name}"
model_version: -1
input_map [
{mapping_keys}
mapping_keys[input_name] = input_name

config = self._get_model_base(name=self.inference_folder_name, platform="ensemble")
config.input.append(
model_config.ModelInput(name="TEXT", data_type=model_config.DataType.TYPE_STRING, dims=[-1])
)
config.output.append(
model_config.ModelOutput(name="output", data_type=model_config.DataType.TYPE_FP32, dims=self.dim_output)
)
config.ensemble_scheduling.step.extend(
[
model_config.ModelEnsembling.Step(
model_name=self.python_folder_name,
model_version=-1,
input_map={"TEXT": "TEXT"},
output_map=mapping_keys,
),
model_config.ModelEnsembling.Step(
model_name=self.python_folder_name,
model_version=-1,
input_map=mapping_keys,
output_map={"output": "output"},
),
]
output_map {{
key: "output"
value: "output"
}}
}}
]
}}
""".strip()
)

return text_format.MessageToString(config)

def create_configs(
self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
Expand Down
40 changes: 12 additions & 28 deletions src/transformer_deploy/triton/configuration_token_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import inspect
from pathlib import Path

import tritonclient.grpc.model_config_pb2 as model_config
from google.protobuf import text_format
from transformers import PretrainedConfig, PreTrainedTokenizer

from transformer_deploy.triton.configuration import Configuration, EngineType
Expand All @@ -38,35 +40,17 @@ def get_inference_conf(self) -> str:
Generate inference step configuration.
:return: inference step configuration
"""
return f"""
{self._get_header(name=self.python_folder_name, backend="python")}

input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]

output [
{{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]

{self._instance_group()}


parameters: {{
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {{
string_value:"no"
}}
}}
""".strip()
config = self._get_model_base(name=self.python_folder_name, backend="python")
config.input.append(
model_config.ModelInput(name="TEXT", data_type=model_config.DataType.TYPE_STRING, dims=[-1])
)
config.output.append(
model_config.ModelOutput(name="output", data_type=model_config.DataType.TYPE_STRING, dims=[-1])
)
config.instance_group.append(model_config.ModelInstanceGroup(count=self.nb_instance, kind=self.device_kind))
config.parameters["FORCE_CPU_ONLY_INPUT_TENSORS"].string_value = "no"
return text_format.MessageToString(config)

def create_configs(
self,
Expand Down
Loading