- Ajouter 1 node GPU
- Déployer l'appli, et scale à 0
kubectl apply --namespace gpu-operator -f app-1/deployment-1.yaml
Open https://chocol-ai-tine.devfest-toulouse.opsrel.io/
kubectl apply --namespace gpu-operator -f app-1/deployment-2.yaml
# helm repo add nvidia https://helm.ngc.nvidia.com/nvidia
# helm repo update
helm upgrade --install gpu-operator nvidia/gpu-operator --namespace gpu-operator --create-namespace --wait --version=24.6.2 -f manifests/gpu-operator-values.yaml
kubectl apply --namespace gpu-operator -f app-2/deployment-2.yaml
Open https://pech-ai-bou.devfest-toulouse.opsrel.io/
# helm
mig:
strategy: mixed
Scale des déploiements à 0 pour permettre la reconfiguration du driver
kubectl scale deployment pech-ai-bou --replicas 0
kubectl scale deployment chocol-ai-tine --replicas 0
kubectl label node -l "node.k8s.ovh/type=gpu" "nvidia.com/mig.config=all-3g.40gb" --overwrite
# deployment
resources:
limits:
#nvidia.com/gpu: 1
nvidia.com/mig-3g.40gb: 1
kubectl apply --namespace gpu-operator -f app-1/deployment-3.yaml
kubectl apply --namespace gpu-operator -f app-2/deployment-3.yaml
# helm
devicePlugin:
config:
create: true
default: "mps4"
name: "mps-parted-config"
data:
mps4: |-
version: v1
sharing:
mps:
resources:
- name: nvidia.com/gpu
replicas: 4