Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto assign GPUs for NIM deployments when a cached optimized profile is specified #142

Merged
merged 2 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 54 additions & 22 deletions internal/controller/platform/standalone/nimservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,50 +459,82 @@ func (r *NIMServiceReconciler) getNIMCacheProfile(ctx context.Context, nimServic
return nil, nil
}

// getTensorParallelismByProfile returns the value of tensor parallelism parameter in the given NIM profile
func (r *NIMServiceReconciler) getTensorParallelismByProfile(ctx context.Context, profile *appsv1alpha1.NIMProfile) (string, error) {
// List of possible keys for tensor parallelism
possibleKeys := []string{"tensorParallelism", "tp"}
shivamerla marked this conversation as resolved.
Show resolved Hide resolved

tensorParallelism := ""
if tp, exists := profile.Config["tp"]; exists {
tensorParallelism = tp

// Iterate through possible keys and return the first valid value
for _, key := range possibleKeys {
if value, exists := profile.Config[key]; exists {
tensorParallelism = value
break
}
}

return tensorParallelism, nil
}

// assignGPUResources automatically assigns GPU resources to the NIMService based on the provided profile,
// but retains any user-specified GPU resources if they are explicitly provided.
//
// This function retrieves the tensor parallelism (TP) value from the provided profile config to determine
// the number of GPUs to be allocated. If the TP value is defined and no GPU resources have been
// explicitly provided by the user, the function allocates GPUs according to the TP value.
// If the TP value is not present, the function defaults to allocating 1 GPU.
func (r *NIMServiceReconciler) assignGPUResources(ctx context.Context, nimService *appsv1alpha1.NIMService, profile *appsv1alpha1.NIMProfile, deploymentParams *rendertypes.DeploymentParams) error {
logger := log.FromContext(ctx)

// Assign GPU resources
// TODO: Make the resource name configurable
const gpuResourceName = corev1.ResourceName("nvidia.com/gpu")

// Check if the user has already provided a GPU resource quantity in the requests or limits
if deploymentParams.Resources != nil {
if _, gpuRequested := deploymentParams.Resources.Requests[gpuResourceName]; gpuRequested {
logger.V(2).Info("User has provided GPU resource requests, skipping auto-assignment", "gpuResource", gpuResourceName)
return nil
}
if _, gpuLimit := deploymentParams.Resources.Limits[gpuResourceName]; gpuLimit {
logger.V(2).Info("User has provided GPU resource limits, skipping auto-assignment", "gpuResource", gpuResourceName)
return nil
}
}

// If no user-provided GPU resource is found, proceed with auto-assignment
// Get tensorParallelism from the profile
tensorParallelism, err := r.getTensorParallelismByProfile(ctx, profile)
if err != nil {
logger.Error(err, "Failed to retrieve tensorParallelism")
return err
}

// Check if tensorParallelism is defined in the profile, and automatically assign GPU resources.
// Note: This will override any manual GPU assignments made by the user.
// The number of GPUs for an optimized profile is fixed.
// Allocating more GPUs than required may result in underutilization,
// while allocating fewer GPUs will likely cause failures.
// Initialize the Resources field if not already initialized
if deploymentParams.Resources == nil {
deploymentParams.Resources = &corev1.ResourceRequirements{
Requests: corev1.ResourceList{},
Limits: corev1.ResourceList{},
}
}

// Assign GPU resources based on tensorParallelism, or default to 1 GPU if tensorParallelism is not available
gpuQuantity := apiResource.MustParse("1") // Default to 1 GPU

if tensorParallelism != "" {
shivamerla marked this conversation as resolved.
Show resolved Hide resolved
gpuQuantity, err := apiResource.ParseQuantity(tensorParallelism)
gpuQuantity, err = apiResource.ParseQuantity(tensorParallelism)
if err != nil {
return fmt.Errorf("failed to parse tensorParallelism: %w", err)
}

// Ensure that the Resources field is initialized
if deploymentParams.Resources == nil {
deploymentParams.Resources = &corev1.ResourceRequirements{
Requests: corev1.ResourceList{},
Limits: corev1.ResourceList{},
}
}

// TODO: Make the resource name configurable
const gpuResourceName = corev1.ResourceName("nvidia.com/gpu")

deploymentParams.Resources.Requests[corev1.ResourceName(gpuResourceName)] = gpuQuantity
deploymentParams.Resources.Limits[corev1.ResourceName(gpuResourceName)] = gpuQuantity
logger.V(2).Info("Auto-assigning GPU resources based on tensorParallelism", "tensorParallelism", tensorParallelism, "gpuQuantity", gpuQuantity.String())
} else {
logger.V(2).Info("tensorParallelism not found, assigning 1 GPU by default", "Profile", profile.Name)
}

shivamerla marked this conversation as resolved.
Show resolved Hide resolved
// Assign the GPU quantity for both requests and limits
deploymentParams.Resources.Requests[gpuResourceName] = gpuQuantity
deploymentParams.Resources.Limits[gpuResourceName] = gpuQuantity

return nil
}
34 changes: 31 additions & 3 deletions internal/controller/platform/standalone/nimservice_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -607,35 +607,63 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() {
})

Describe("assignGPUResources", func() {
It("should retain user-provided GPU resources and not override them", func() {
profile := &appsv1alpha1.NIMProfile{
Name: "test-profile",
Config: map[string]string{"tp": "4"},
}

// Initialize deployment params with user-provided GPU resources
deploymentParams := &rendertypes.DeploymentParams{
Resources: &corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceName("nvidia.com/gpu"): apiResource.MustParse("8"),
},
Limits: corev1.ResourceList{
corev1.ResourceName("nvidia.com/gpu"): apiResource.MustParse("8"),
},
},
}

Expect(reconciler.assignGPUResources(context.TODO(), nimService, profile, deploymentParams)).To(Succeed())

// Ensure the user-provided GPU resources (8) are retained and not overridden
Expect(deploymentParams.Resources.Requests).To(HaveKeyWithValue(corev1.ResourceName("nvidia.com/gpu"), apiResource.MustParse("8")))
Expect(deploymentParams.Resources.Limits).To(HaveKeyWithValue(corev1.ResourceName("nvidia.com/gpu"), apiResource.MustParse("8")))
})

It("should assign GPU resources when tensor parallelism is provided", func() {
profile := &appsv1alpha1.NIMProfile{
Name: "test-profile",
Config: map[string]string{"tp": "4"},
}
// Initialize deployment params with no user-provided GPU resources
deploymentParams := &rendertypes.DeploymentParams{}

Expect(reconciler.assignGPUResources(context.TODO(), nimService, profile, deploymentParams)).To(Succeed())
Expect(deploymentParams.Resources.Requests).To(HaveKeyWithValue(corev1.ResourceName("nvidia.com/gpu"), apiResource.MustParse("4")))
Expect(deploymentParams.Resources.Limits).To(HaveKeyWithValue(corev1.ResourceName("nvidia.com/gpu"), apiResource.MustParse("4")))
})

It("should not assign GPU resources if tensor parallelism is not provided", func() {
It("should assign 1 GPU resource if tensor parallelism is not provided", func() {
profile := &appsv1alpha1.NIMProfile{
Name: "test-profile",
Config: map[string]string{},
}
// Initialize deployment params with no user-provided GPU resources
deploymentParams := &rendertypes.DeploymentParams{}

Expect(reconciler.assignGPUResources(context.TODO(), nimService, profile, deploymentParams)).To(Succeed())
Expect(deploymentParams.Resources).To(BeNil())
Expect(deploymentParams.Resources).To(BeNil())
Expect(deploymentParams.Resources.Requests).To(HaveKeyWithValue(corev1.ResourceName("nvidia.com/gpu"), apiResource.MustParse("1")))
Expect(deploymentParams.Resources.Limits).To(HaveKeyWithValue(corev1.ResourceName("nvidia.com/gpu"), apiResource.MustParse("1")))
})

It("should return an error if tensor parallelism cannot be parsed", func() {
profile := &appsv1alpha1.NIMProfile{
Name: "test-profile",
Config: map[string]string{"tp": "invalid"},
}
// Initialize deployment params with no user-provided GPU resources
deploymentParams := &rendertypes.DeploymentParams{}

err := reconciler.assignGPUResources(context.TODO(), nimService, profile, deploymentParams)
Expand Down