Skip to content

Commit 34b8c1c

Browse files
authored
Simulate scheduler with debuggable scheduler container (#370)
* move simulator entrypoint to simulator/cmd/simulator * fix config position * fix Dockerfile * can restart pod from server * fix docker-compose * tweak * tweak * update extender.md * delete StartScheduler * tweak * delete unused func * add --master option * rename WriteConfig to UpdateSchedulerConfig, and write func description RestartScheduler * tweak errors * refactor restartContainer * delete unnecessary file copy * scheduler config use default config * extender example * tweak * tweak * add comment to init-container * add error log for restartContainer * tweak * fix extender document * %w -> %v * fix can't find simulator-scheduler case * add comment * return error when failed init * update how-it-works.md * change log text * bug fix
1 parent 18fe137 commit 34b8c1c

File tree

15 files changed

+381
-254
lines changed

15 files changed

+381
-254
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,4 @@ docker_down:
7474

7575
.PHONY: docker_down_local
7676
docker_down_local:
77-
docker compose -f compose.yml -f compose.local.yml down
77+
docker compose -f compose.yml -f compose.local.yml down --volumes

compose.local.yml

+30-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,25 @@
1+
# Note for maintainers: When making a change in this file,
2+
# you're likely to make the same change in simulator/docs/sample/plugin-extender/docker-compose.yaml.
3+
version: "3.7"
14
services:
5+
# This container copies the data,
6+
# so any changes made to the configuration files within the Pod will not affect the original files.
7+
init-container:
8+
image: busybox
9+
volumes:
10+
- conf:/config
11+
- ${PWD}/simulator/cmd/scheduler:/host-config:ro
12+
command: sh -c "cp -rf /host-config/* /config/"
213
simulator-scheduler:
314
image: simulator-scheduler
415
container_name: simulator-scheduler
516
environment:
6-
- KUBE_APISERVER_URL=http://simulator-cluster:3131
7-
- KUBECONFIG=/root/.kube/kubeconfig.yaml
17+
- KUBECONFIG=/config/kubeconfig.yaml
818
volumes:
9-
- ./simulator/docs/sample/debuggable-scheduler/kubeconfig.yaml:/root/.kube/kubeconfig.yaml
19+
- conf:/config
20+
depends_on:
21+
- init-container
22+
- simulator-cluster
1023
restart: always
1124
tty: true
1225
networks:
@@ -15,6 +28,14 @@ services:
1528
image: simulator-server
1629
volumes:
1730
- ./simulator/kubeconfig.yaml:/kubeconfig.yaml
31+
- /var/run/docker.sock:/var/run/docker.sock
32+
- conf:/config
33+
ports:
34+
- "1212:1212"
35+
restart: always
36+
tty: true
37+
networks:
38+
- simulator-internal-network
1839
depends_on:
1940
fake-source-cluster:
2041
condition: "service_healthy"
@@ -39,3 +60,9 @@ services:
3960
- simulator-internal-network
4061
profiles:
4162
- externalImportEnabled
63+
networks:
64+
simulator-internal-network:
65+
driver: bridge
66+
volumes:
67+
simulator-etcd-data:
68+
conf:

simulator/cmd/scheduler/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ FROM alpine:3.14.0
2424
COPY --from=build-env /go/src/simulator/bin/scheduler /scheduler
2525
RUN chmod a+x /scheduler
2626

27-
CMD ["/scheduler"]
27+
CMD ["/scheduler", "--config", "/config/scheduler.yaml", "--master", "http://simulator-cluster:3131"]
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: v1
2+
kind: Config
3+
4+
clusters:
5+
- cluster:
6+
server: http://simulator-cluster:3131
7+
name: simulator
8+
9+
contexts:
10+
- context:
11+
cluster: simulator
12+
name: simulator
13+
14+
current-context: simulator
+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
kind: KubeSchedulerConfiguration
2+
apiVersion: kubescheduler.config.k8s.io/v1

simulator/cmd/simulator/simulator.go

+4-6
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,6 @@ func startSimulator() error {
9898
if err != nil {
9999
return xerrors.Errorf("create di container: %w", err)
100100
}
101-
if !cfg.ExternalSchedulerEnabled {
102-
if err := dic.SchedulerService().StartScheduler(cfg.InitialSchedulerCfg); err != nil {
103-
return xerrors.Errorf("start scheduler: %w", err)
104-
}
105-
defer dic.SchedulerService().ShutdownScheduler()
106-
}
107101

108102
// If ExternalImportEnabled is enabled, the simulator import resources
109103
// from the target cluster that indicated by the `KUBECONFIG`.
@@ -116,6 +110,10 @@ func startSimulator() error {
116110
}
117111
}
118112

113+
if !cfg.ExternalSchedulerEnabled {
114+
dic.SchedulerService().SetSchedulerConfig(cfg.InitialSchedulerCfg)
115+
}
116+
119117
if cfg.ResourceSyncEnabled {
120118
// Start the resource syncer to sync resources from the target cluster.
121119
if err = dic.ResourceSyncer().Run(ctx); err != nil {

simulator/config/config.go

+8-3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ var ErrEmptyConfig = errors.New("config is required, but empty")
2525
// configYaml represents the value from the config file.
2626
var configYaml = &v1alpha1.SimulatorConfiguration{}
2727

28+
// defaultSchedulerCfgPath is where we have the scheduler config in the container by default.
29+
const defaultSchedulerCfgPath = "/config/scheduler.yaml"
30+
2831
// Config is configuration for simulator.
2932
type Config struct {
3033
Port int
@@ -91,7 +94,7 @@ func NewConfig() (*Config, error) {
9194
}
9295
}
9396

94-
initialschedulerCfg, err := getSchedulerCfg()
97+
initialschedulerCfg, err := GetSchedulerCfg()
9598
if err != nil {
9699
return nil, xerrors.Errorf("get SchedulerCfg: %w", err)
97100
}
@@ -225,23 +228,25 @@ func parseStringListEnv(e string) []string {
225228
return list
226229
}
227230

228-
// getSchedulerCfg reads KUBE_SCHEDULER_CONFIG_PATH which means initial kube-scheduler configuration
231+
// GetSchedulerCfg reads KUBE_SCHEDULER_CONFIG_PATH which means initial kube-scheduler configuration
229232
// if empty from the config file.
230233
// and converts it into *configv1.KubeSchedulerConfiguration.
231234
// KUBE_SCHEDULER_CONFIG_PATH is not required.
232235
// If KUBE_SCHEDULER_CONFIG_PATH is not set, the default configuration of kube-scheduler will be used.
233-
func getSchedulerCfg() (*configv1.KubeSchedulerConfiguration, error) {
236+
func GetSchedulerCfg() (*configv1.KubeSchedulerConfiguration, error) {
234237
kubeSchedulerConfigPath := os.Getenv("KUBE_SCHEDULER_CONFIG_PATH")
235238
if kubeSchedulerConfigPath == "" {
236239
kubeSchedulerConfigPath = configYaml.KubeSchedulerConfigPath
237240
if kubeSchedulerConfigPath == "" {
241+
config.SetKubeSchedulerCfgPath(defaultSchedulerCfgPath)
238242
dsc, err := config.DefaultSchedulerConfig()
239243
if err != nil {
240244
return nil, xerrors.Errorf("create default scheduler config: %w", err)
241245
}
242246
return dsc, nil
243247
}
244248
}
249+
config.SetKubeSchedulerCfgPath(kubeSchedulerConfigPath)
245250
data, err := os.ReadFile(kubeSchedulerConfigPath)
246251
if err != nil {
247252
return nil, xerrors.Errorf("read scheduler config file: %w", err)

simulator/docs/extender.md

+23-20
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,12 @@ The simulator stores the results of each Extender in the annotation of a pod.
1111
Note: This feature is not available in [external scheduler](./external-scheduler.md).
1212

1313
## How to use
14+
In this example, we describe how you can run an extender with the simulator, using [k8s-scheduler-extender-example](https://github.com/everpeace/k8s-scheduler-extender-example).
1415

15-
You need to configure your extender in KubeSchedulerConfig.
16-
(via [the simulator config](./simulator-server-config.md) or WebUI)
16+
+ Create k8s-scheduler-extender-example's Image: Clone [k8s-scheduler-extender-example](https://github.com/everpeace/k8s-scheduler-extender-example) repository, and follow the step `1 build a docker image` on README.
1717

18-
(No required special configuration is for the simulator to use this feature.)
19-
20-
For example, if you run the server on `http://localhost:8080/scheduler/`,
21-
the configuration will look like this.
18+
+ Set up your extender in KubeSchedulerConfiguration either through [`kubeSchedulerConfigPath`](./simulator-server-config.md) or the Web UI.
19+
For example, if you are running the server on http://kube-scheduler-simulator-extender-1:80/scheduler/, your configuration might look like the following:
2220

2321
```yaml
2422
apiVersion: kubescheduler.config.k8s.io/v1
@@ -28,7 +26,7 @@ leaderElection:
2826
profiles:
2927
- schedulerName: default-scheduler
3028
extenders:
31-
- urlPrefix: "http://localhost:8080/scheduler/"
29+
- urlPrefix: "http://kube-scheduler-simulator-extender-1:80/scheduler"
3230
filterVerb: "predicates/always_true"
3331
prioritizeVerb: "priorities/zero_score"
3432
preemptVerb: "preemption"
@@ -38,26 +36,31 @@ extenders:
3836
nodeCacheCapable: false
3937
```
4038
41-
After the above settings are made, when the simulator is started and the pod is scheduled,
42-
you will see each Pod gets many results on the annotation like this:
39+
+ Run Simulator:
40+
We have an example [`docker-compose.yaml`](./example/docker-compose.yaml); you can overwrite the [`docker-compose-local.yaml`](../../docker-compose-local.yml) file with this file, but make sure to update the extender's image name there.
41+
42+
To run the simulator, use the following commands:
43+
```sh
44+
$ make docker_build docker_up_local
45+
```
46+
47+
+ Create a Pod and examine your Extender's Results:
48+
The simulator started with the above steps should have your extender(s) enabled. You can create Pod(s) in the simulator and see the result.
49+
The result shows up in the Pod's annotations `scheduler-simulator/extender-xxx` like the following:
4350

4451
```yaml
4552
kind: Pod
4653
apiVersion: v1
4754
metadata:
4855
name: pod-2rsvz
4956
...
50-
annotations:
51-
scheduler-simulator/bind-result: '{"DefaultBinder":"success"}'
52-
scheduler-simulator/extender-bind-result: '{}'
53-
scheduler-simulator/extender-filter-result: >-
54-
{"http://localhost:8080/scheduler/":{"Nodes":{"metadata":{},"items":[{"metadata":{"name":"node-sc9ns","generateName":"node-","uid":"4b008c90-971e-4816-a0f4-dc1a3b6e856e","resourceVersion":"208","creationTimestamp":"2023-03-03T16:03:50Z","managedFields":[{"manager":"simulator","operation":"Update","apiVersion":"v1","time":"2023-03-03T16:03:50Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:generateName":{}}}}]},"spec":{},"status":{"capacity":{"cpu":"4","memory":"32Gi","pods":"110"},"allocatable":{"cpu":"4","memory":"32Gi","pods":"110"},"phase":"Running","conditions":[{"type":"Ready","status":"True","lastHeartbeatTime":null,"lastTransitionTime":null}],"daemonEndpoints":{"kubeletEndpoint":{"Port":0}},"nodeInfo":{"machineID":"","systemUUID":"","bootID":"","kernelVersion":"","osImage":"","containerRuntimeVersion":"","kubeletVersion":"","kubeProxyVersion":"","operatingSystem":"","architecture":""}}},{"metadata":{"name":"node-pwzdq","generateName":"node-","uid":"b24f918d-94ae-4c35-9e2c-2376998dbede","resourceVersion":"209","creationTimestamp":"2023-03-03T16:03:53Z","managedFields":[{"manager":"simulator","operation":"Update","apiVersion":"v1","time":"2023-03-03T16:03:53Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:generateName":{}}}}]},"spec":{},"status":{"capacity":{"cpu":"4","memory":"32Gi","pods":"110"},"allocatable":{"cpu":"4","memory":"32Gi","pods":"110"},"phase":"Running","conditions":[{"type":"Ready","status":"True","lastHeartbeatTime":null,"lastTransitionTime":null}],"daemonEndpoints":{"kubeletEndpoint":{"Port":0}},"nodeInfo":{"machineID":"","systemUUID":"","bootID":"","kernelVersion":"","osImage":"","containerRuntimeVersion":"","kubeletVersion":"","kubeProxyVersion":"","operatingSystem":"","architecture":""}}}]},"NodeNames":null,"FailedNodes":{},"FailedAndUnresolvableNodes":null,"Error":""}}
55-
scheduler-simulator/extender-preempt-result: '{}'
56-
scheduler-simulator/extender-prioritize-result: >-
57-
{"http://localhost:8080/scheduler/":[{"Host":"node-sc9ns","Score":0},{"Host":"node-pwzdq","Score":0}]}
58-
scheduler-simulator/score-result: >-
59-
{"node-282x7":{"ImageLocality":"0","InterPodAffinity":"0","NodeAffinity":"0","NodeNumber":"0","NodeResourcesBalancedAllocation":"52","NodeResourcesFit":"47","PodTopologySpread":"0","TaintToleration":"0","VolumeBinding":"0"},"node-gp9t4":{"ImageLocality":"0","InterPodAffinity":"0","NodeAffinity":"0","NodeNumber":"0","NodeResourcesBalancedAllocation":"76","NodeResourcesFit":"73","PodTopologySpread":"0","TaintToleration":"0","VolumeBinding":"0"}}
60-
...
57+
annotations:
58+
scheduler-simulator/extender-bind-result: '{}'
59+
scheduler-simulator/extender-filter-result: '{"http://kube-scheduler-simulator-extender-1:80/scheduler":{"Nodes":{"metadata":{},"items":[{"metadata":{"name":"node-tzjll","generateName":"node-","uid":"a3e39211-2200-4dee-99a8-a27b2ac528b3","resourceVersion":"223","creationTimestamp":"2024-09-25T12:24:50Z","annotations":{"node.alpha.kubernetes.io/ttl":"0"},"managedFields":[{"manager":"kube-controller-manager","operation":"Update","apiVersion":"v1","time":"2024-09-25T12:24:50Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:annotations":{".":{},"f:node.alpha.kubernetes.io/ttl":{}}}}},{"manager":"simulator","operation":"Update","apiVersion":"v1","time":"2024-09-25T12:24:50Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:generateName":{}}}}]},"spec":{},"status":{"capacity":{"cpu":"4","memory":"32Gi","pods":"110"},"allocatable":{"cpu":"4","memory":"32Gi","pods":"110"},"phase":"Running","conditions":[{"type":"Ready","status":"True","lastHeartbeatTime":null,"lastTransitionTime":null}],"daemonEndpoints":{"kubeletEndpoint":{"Port":0}},"nodeInfo":{"machineID":"","systemUUID":"","bootID":"","kernelVersion":"","osImage":"","containerRuntimeVersion":"","kubeletVersion":"","kubeProxyVersion":"","operatingSystem":"","architecture":""}}}]},"NodeNames":null,"FailedNodes":{},"FailedAndUnresolvableNodes":null,"Error":""}}'
60+
scheduler-simulator/extender-preempt-result: '{}'
61+
scheduler-simulator/extender-prioritize-result: '{}'
62+
....
6163
```
6264

65+
You can also view the annotation results from the web UI. Simply select the Pod you created and scheduled, then check the Resource Definition section to see the annotations.
6366

simulator/docs/how-it-works.md

+2-4
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,9 @@ This page describes how the simulator works.
44

55
### 0. starts the simulator.
66

7-
The simulator server works with the following:
8-
- scheduler
9-
- [HTTP server](api.md)
7+
The simulator server works with the [HTTP server](api.md).
108

11-
In advance, the simulator needs to launch etcd, controller-manager and kube-apiserver outside.
9+
In advance, the simulator needs to launch debuggable-scheduler, etcd, controller-manager and kube-apiserver outside.
1210
We recommend using [KWOK](https://github.com/kubernetes-sigs/kwok), see [compose.yml](../../compose.yml) to know how we wire things up.
1311

1412
### 1. users request creating resource.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
version: "3.7"
2+
services:
3+
extender:
4+
image: test/test:test # TODO(user): replace it with your extender's image
5+
ports:
6+
- "80:80"
7+
networks:
8+
- simulator-internal-network
9+
init-container:
10+
image: busybox
11+
volumes:
12+
- conf:/config
13+
- ${PWD}/simulator/cmd/scheduler:/host-config:ro
14+
command: sh -c "cp -rf /host-config/* /config/"
15+
simulator-scheduler:
16+
image: simulator-scheduler
17+
container_name: simulator-scheduler
18+
environment:
19+
- KUBECONFIG=/config/kubeconfig.yaml
20+
volumes:
21+
- conf:/config
22+
depends_on:
23+
- init-container
24+
- simulator-cluster
25+
restart: always
26+
tty: true
27+
networks:
28+
- simulator-internal-network
29+
simulator-server:
30+
image: simulator-server
31+
container_name: simulator-server
32+
environment:
33+
- PORT=1212
34+
- KUBE_SCHEDULER_SIMULATOR_ETCD_URL=http://simulator-cluster:2379
35+
- KUBE_APISERVER_URL=http://simulator-cluster:3131
36+
volumes:
37+
- ./simulator/config.yaml:/config.yaml
38+
- ./simulator/kubeconfig.yaml:/kubeconfig.yaml
39+
- /var/run/docker.sock:/var/run/docker.sock
40+
- conf:/config
41+
ports:
42+
- "1212:1212"
43+
restart: always
44+
tty: true
45+
networks:
46+
- simulator-internal-network
47+
depends_on:
48+
fake-source-cluster:
49+
condition: "service_healthy"
50+
required: false
51+
simulator-frontend:
52+
image: simulator-frontend
53+
restart: always
54+
container_name: simulator-frontend
55+
environment:
56+
- HOST=0.0.0.0
57+
- BASE_URL=http://${SIMULATOR_EXTERNAL_IP:-localhost}:1212
58+
- KUBE_API_SERVER_URL=http://${SIMULATOR_EXTERNAL_IP:-localhost}:3131
59+
ports:
60+
- "3000:3000"
61+
tty: true
62+
simulator-cluster:
63+
image: registry.k8s.io/kwok/cluster:v0.6.0-k8s.v1.30.2
64+
container_name: simulator-cluster
65+
restart: always
66+
ports:
67+
- "3131:3131"
68+
volumes:
69+
- simulator-etcd-data:/var/lib/etcd
70+
- ./kwok.yaml:/root/.kwok/kwok.yaml
71+
environment:
72+
- KWOK_KUBE_APISERVER_PORT=3131
73+
networks:
74+
- simulator-internal-network
75+
fake-source-cluster:
76+
image: registry.k8s.io/kwok/cluster:v0.6.0-k8s.v1.30.2
77+
container_name: fake-source-cluster
78+
restart: always
79+
healthcheck:
80+
test: "kwokctl kubectl get nodes"
81+
start_period: 60s
82+
start_interval: 1s
83+
interval: 600s
84+
ports:
85+
- "3132:3132"
86+
environment:
87+
- KWOK_KUBE_APISERVER_PORT=3132
88+
networks:
89+
- simulator-internal-network
90+
profiles:
91+
- externalImportEnabled
92+
networks:
93+
simulator-internal-network:
94+
driver: bridge
95+
volumes:
96+
simulator-etcd-data:
97+
conf:

0 commit comments

Comments
 (0)