Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Test][HA] Ray Autoscaler enabled + Ray Serve autoscaling-enabled #2485

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 0 additions & 26 deletions ray-operator/test/e2e/locustfile.py

This file was deleted.

172 changes: 107 additions & 65 deletions ray-operator/test/e2e/rayservice_ha_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,83 +5,125 @@ import (

. "github.com/onsi/gomega"

corev1ac "k8s.io/client-go/applyconfigurations/core/v1"

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/common"
rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1"
. "github.com/ray-project/kuberay/ray-operator/test/support"
)

func TestRayService(t *testing.T) {
func TestStaticRayService(t *testing.T) {
rayserviceYamlFile := "testdata/rayservice.static.yaml"
locustYamlFile := "testdata/locust-cluster.const-rate.yaml"

test := With(t)
g := NewWithT(t)

// Create a namespace
namespace := test.NewTestNamespace()
test.StreamKubeRayOperatorLogs()

// Scripts for creating and terminating detached actors to trigger autoscaling
scriptsAC := newConfigMap(namespace.Name, "scripts", files(test, "locustfile.py", "locust_runner.py"))
scripts, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), scriptsAC, TestApplyOptions)
// Create a ConfigMap with Locust runner script
configMapAC := newConfigMap(namespace.Name, "locust-runner-script", files(test, "locust_runner.py"))
configMap, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), configMapAC, TestApplyOptions)
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name)

// Create the RayService for testing
KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name)
rayService, err := GetRayService(test, namespace.Name, "test-rayservice")
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created ConfigMap %s/%s successfully", scripts.Namespace, scripts.Name)

test.T().Run("Static RayService", func(_ *testing.T) {
rayServiceAC := rayv1ac.RayService("static-raysvc", namespace.Name).
WithSpec(rayv1ac.RayServiceSpec().
WithServeConfigV2(`
proxy_location: EveryNode
applications:
- name: no_ops
route_prefix: /
import_path: microbenchmarks.no_ops:app_builder
args:
num_forwards: 0
runtime_env:
working_dir: https://github.com/ray-project/serve_workloads/archive/a2e2405f3117f1b4134b6924b5f44c4ff0710c00.zip
deployments:
- name: NoOp
num_replicas: 2
max_replicas_per_node: 1
ray_actor_options:
num_cpus: 1
`).
WithRayClusterSpec(newRayClusterSpec()))

rayService, err := test.Client().Ray().RayV1().RayServices(namespace.Name).Apply(test.Ctx(), rayServiceAC, TestApplyOptions)
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created RayService %s/%s successfully", rayService.Namespace, rayService.Name)

test.T().Logf("Waiting for RayService %s/%s to running", rayService.Namespace, rayService.Name)
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium).
Should(WithTransform(RayServiceStatus, Equal(rayv1.Running)))

locustClusterAC := rayv1ac.RayCluster("locust-cluster", namespace.Name).
WithSpec(rayv1ac.RayClusterSpec().
WithRayVersion(GetRayVersion()).
WithHeadGroupSpec(rayv1ac.HeadGroupSpec().
WithRayStartParams(map[string]string{"dashboard-host": "0.0.0.0"}).
WithTemplate(apply(headPodTemplateApplyConfiguration(), mountConfigMap[corev1ac.PodTemplateSpecApplyConfiguration](scripts, "/home/ray/test_scripts")))))
locustCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), locustClusterAC, TestApplyOptions)
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created Locust RayCluster %s/%s successfully", locustCluster.Namespace, locustCluster.Name)

// Wait for RayCluster to become ready and verify the number of available worker replicas.
g.Eventually(RayCluster(test, locustCluster.Namespace, locustCluster.Name), TestTimeoutMedium).
Should(WithTransform(RayClusterState, Equal(rayv1.Ready)))
g.Expect(GetRayCluster(test, locustCluster.Namespace, locustCluster.Name)).To(WithTransform(RayClusterDesiredWorkerReplicas, Equal(int32(0))))

headPod, err := GetHeadPod(test, locustCluster)
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name)

// Install Locust in the head Pod
ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"pip", "install", "locust"})

// Run Locust test
ExecPodCmd(test, headPod, common.RayHeadContainer, []string{
"python", "/home/ray/test_scripts/locust_runner.py", "-f", "/home/ray/test_scripts/locustfile.py", "--host", "http://static-raysvc-serve-svc:8000",
})
test.T().Logf("Created RayService %s/%s successfully", rayService.Namespace, rayService.Name)

test.T().Logf("Waiting for RayService %s/%s to running", rayService.Namespace, rayService.Name)
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium).
Should(WithTransform(RayServiceStatus, Equal(rayv1.Running)))

// Create Locust RayCluster
KubectlApplyYAML(test, locustYamlFile, namespace.Name)
locustCluster, err := GetRayCluster(test, namespace.Name, "locust-cluster")
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created Locust RayCluster %s/%s successfully", locustCluster.Namespace, locustCluster.Name)

g.Eventually(RayCluster(test, locustCluster.Namespace, locustCluster.Name), TestTimeoutMedium).
Should(WithTransform(RayClusterState, Equal(rayv1.Ready)))
g.Expect(GetRayCluster(test, locustCluster.Namespace, locustCluster.Name)).To(WithTransform(RayClusterDesiredWorkerReplicas, Equal(int32(0))))

headPod, err := GetHeadPod(test, locustCluster)
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name)

// Install Locust in the head Pod
ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"pip", "install", "locust"})

// Run Locust test
ExecPodCmd(test, headPod, common.RayHeadContainer, []string{
"python", "/locust-runner/locust_runner.py", "-f", "/locustfile/locustfile.py", "--host", "http://test-rayservice-serve-svc:8000",
})
}

func TestAutoscalingRayService(t *testing.T) {
rayserviceYamlFile := "testdata/rayservice.autoscaling.yaml"
locustYamlFile := "testdata/locust-cluster.burst.yaml"
numberOfPodsWhenSteady := 1

test := With(t)
g := NewWithT(t)

// Create a namespace
namespace := test.NewTestNamespace()
test.StreamKubeRayOperatorLogs()

// Create a ConfigMap with Locust runner script
configMapAC := newConfigMap(namespace.Name, "locust-runner-script", files(test, "locust_runner.py"))
configMap, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Apply(test.Ctx(), configMapAC, TestApplyOptions)
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name)

// Create the RayService for testing
KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name)
rayService, err := GetRayService(test, namespace.Name, "test-rayservice")
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created RayService %s/%s successfully", rayService.Namespace, rayService.Name)

test.T().Logf("Waiting for RayService %s/%s to running", rayService.Namespace, rayService.Name)
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium).
Should(WithTransform(RayServiceStatus, Equal(rayv1.Running)))

// Get the underlying RayCluster of the RayService
rayService, err = GetRayService(test, namespace.Name, rayService.Name)
g.Expect(err).NotTo(HaveOccurred())
rayServiceUnderlyingRayCluster, err := GetRayCluster(test, namespace.Name, rayService.Status.ActiveServiceStatus.RayClusterName)
g.Expect(err).NotTo(HaveOccurred())

// Check the number of worker pods is correct when RayService is steady
g.Eventually(WorkerPods(test, rayServiceUnderlyingRayCluster), TestTimeoutShort).Should(HaveLen(numberOfPodsWhenSteady))

// Create Locust RayCluster
KubectlApplyYAML(test, locustYamlFile, namespace.Name)
locustCluster, err := GetRayCluster(test, namespace.Name, "locust-cluster")
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created Locust RayCluster %s/%s successfully", locustCluster.Namespace, locustCluster.Name)

g.Eventually(RayCluster(test, locustCluster.Namespace, locustCluster.Name), TestTimeoutMedium).
Should(WithTransform(RayClusterState, Equal(rayv1.Ready)))
g.Expect(GetRayCluster(test, locustCluster.Namespace, locustCluster.Name)).To(WithTransform(RayClusterDesiredWorkerReplicas, Equal(int32(0))))

headPod, err := GetHeadPod(test, locustCluster)
g.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name)

// Install Locust in the head Pod
ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"pip", "install", "locust"})

// Run Locust test
ExecPodCmd(test, headPod, common.RayHeadContainer, []string{
MortalHappiness marked this conversation as resolved.
Show resolved Hide resolved
"python", "/locust-runner/locust_runner.py", "-f", "/locustfile/locustfile.py", "--host", "http://test-rayservice-serve-svc:8000",
})

// Check the number of worker pods is more when RayService right after the burst
pods, err := GetWorkerPods(test, rayServiceUnderlyingRayCluster)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(len(pods)).Should(BeNumerically(">", numberOfPodsWhenSteady))

// Check the number of worker pods is correct when RayService is steady
g.Eventually(WorkerPods(test, rayServiceUnderlyingRayCluster), TestTimeoutLong).Should(HaveLen(numberOfPodsWhenSteady))
}
71 changes: 71 additions & 0 deletions ray-operator/test/e2e/testdata/locust-cluster.burst.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
name: locust-cluster
spec:
rayVersion: '2.9.0'
headGroupSpec:
rayStartParams: {}
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.9.0
resources:
requests:
cpu: 300m
memory: 1G
limits:
cpu: 500m
memory: 2G
ports:
- containerPort: 6379
name: gcs-server
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
volumeMounts:
- mountPath: /locustfile
name: locustfile-volume
- mountPath: /locust-runner
name: locust-runner-volume
volumes:
- name: locustfile-volume
configMap:
name: locustfile-config
- name: locust-runner-volume
configMap:
name: locust-runner-script
---
apiVersion: v1
kind: ConfigMap
metadata:
name: locustfile-config
data:
locustfile.py: |
from locust import FastHttpUser, task, constant, LoadTestShape
import os

class ConstantUser(FastHttpUser):
wait_time = constant(1)
network_timeout = None
connection_timeout = None

@task
def hello_world(self):
self.client.post("/")

class StagesShape(LoadTestShape):
stages = [
{"duration": 30, "users": 10, "spawn_rate": 10},
{"duration": 60, "users": 120, "spawn_rate": 10},
]

def tick(self):
run_time = self.get_run_time()
for stage in self.stages:
if run_time < stage["duration"]:
tick_data = (stage["users"], stage["spawn_rate"])
return tick_data
return None
70 changes: 70 additions & 0 deletions ray-operator/test/e2e/testdata/locust-cluster.const-rate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
name: locust-cluster
spec:
rayVersion: '2.9.0'
headGroupSpec:
rayStartParams: {}
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.9.0
resources:
requests:
cpu: 300m
memory: 1G
limits:
cpu: 500m
memory: 2G
ports:
- containerPort: 6379
name: gcs-server
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
volumeMounts:
- mountPath: /locustfile
name: locustfile-volume
- mountPath: /locust-runner
name: locust-runner-volume
volumes:
- name: locustfile-volume
configMap:
name: locustfile-config
- name: locust-runner-volume
configMap:
name: locust-runner-script
---
apiVersion: v1
kind: ConfigMap
metadata:
name: locustfile-config
data:
locustfile.py: |
from locust import FastHttpUser, task, constant, LoadTestShape
import os

class ConstantUser(FastHttpUser):
wait_time = constant(1)
network_timeout = None
connection_timeout = None

@task
def hello_world(self):
self.client.post("/")

class StagesShape(LoadTestShape):
stages = [
{"duration": 60, "users": 10, "spawn_rate": 10},
]

def tick(self):
run_time = self.get_run_time()
for stage in self.stages:
if run_time < stage["duration"]:
tick_data = (stage["users"], stage["spawn_rate"])
return tick_data
return None
Loading
Loading