Skip to content

Commit 301be4e

Browse files
author
Kubernetes Submit Queue
authored
Merge pull request kubernetes#29272 from mksalawa/pending
Automatic merge from submit-queue Modify predicate() interface to return all failed predicates As stated in the comments below, this is the first step of showing the user all predicates that failed for a given node when scheduling of a given pod failed on every node. ref kubernetes#20064 <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.kubernetes.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.kubernetes.io/reviews/kubernetes/kubernetes/29272) <!-- Reviewable:end -->
2 parents da53a24 + 2749ec7 commit 301be4e

File tree

10 files changed

+526
-367
lines changed

10 files changed

+526
-367
lines changed

pkg/controller/daemon/daemoncontroller.go

+6-12
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,7 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *api.Node, ds *exte
693693
if pod.Status.Phase == api.PodSucceeded || pod.Status.Phase == api.PodFailed {
694694
continue
695695
}
696-
// ignore pods that belong to the daemonset when taking into account wheter
696+
// ignore pods that belong to the daemonset when taking into account whether
697697
// a daemonset should bind to a node.
698698
if pds := dsc.getPodDaemonSet(pod); pds != nil && ds.Name == pds.Name {
699699
continue
@@ -703,18 +703,12 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *api.Node, ds *exte
703703

704704
nodeInfo := schedulercache.NewNodeInfo(pods...)
705705
nodeInfo.SetNode(node)
706-
fit, err := predicates.GeneralPredicates(newPod, nil, nodeInfo)
706+
fit, reasons, err := predicates.GeneralPredicates(newPod, nil, nodeInfo)
707707
if err != nil {
708-
if re, ok := err.(*predicates.PredicateFailureError); ok {
709-
message := re.Error()
710-
glog.V(2).Infof("Predicate failed on Pod: %s, for reason: %v", newPod.Name, message)
711-
}
712-
if re, ok := err.(*predicates.InsufficientResourceError); ok {
713-
message := re.Error()
714-
glog.V(2).Infof("Predicate failed on Pod: %s, for reason: %v", newPod.Name, message)
715-
}
716-
message := fmt.Sprintf("GeneralPredicates failed due to %v.", err)
717-
glog.Warningf("Predicate failed on Pod %s - %s", newPod.Name, message)
708+
glog.Warningf("GeneralPredicates failed on pod %s due to unexpected error: %v", newPod.Name, err)
709+
}
710+
for _, r := range reasons {
711+
glog.V(2).Infof("GeneralPredicates failed on pod %s for reason: %v", newPod.Name, r.GetReason())
718712
}
719713
return fit
720714
}

pkg/kubelet/kubelet.go

+29-12
Original file line numberDiff line numberDiff line change
@@ -2059,23 +2059,40 @@ func (kl *Kubelet) canAdmitPod(pods []*api.Pod, pod *api.Pod) (bool, string, str
20592059
}
20602060
nodeInfo := schedulercache.NewNodeInfo(pods...)
20612061
nodeInfo.SetNode(node)
2062-
fit, err := predicates.GeneralPredicates(pod, nil, nodeInfo)
2062+
fit, reasons, err := predicates.GeneralPredicates(pod, nil, nodeInfo)
2063+
if err != nil {
2064+
message := fmt.Sprintf("GeneralPredicates failed due to %v, which is unexpected.", err)
2065+
glog.Warningf("Failed to admit pod %v - %s", format.Pod(pod), message)
2066+
return fit, "UnexpectedError", message
2067+
}
20632068
if !fit {
2064-
if re, ok := err.(*predicates.PredicateFailureError); ok {
2065-
reason := re.PredicateName
2066-
message := re.Error()
2069+
var reason string
2070+
var message string
2071+
if len(reasons) == 0 {
2072+
message = fmt.Sprint("GeneralPredicates failed due to unknown reason, which is unexpected.")
2073+
glog.Warningf("Failed to admit pod %v - %s", format.Pod(pod), message)
2074+
return fit, "UnknownReason", message
2075+
}
2076+
// If there are failed predicates, we only return the first one as a reason.
2077+
r := reasons[0]
2078+
switch re := r.(type) {
2079+
case *predicates.PredicateFailureError:
2080+
reason = re.PredicateName
2081+
message = re.Error()
20672082
glog.V(2).Infof("Predicate failed on Pod: %v, for reason: %v", format.Pod(pod), message)
2068-
return fit, reason, message
2069-
}
2070-
if re, ok := err.(*predicates.InsufficientResourceError); ok {
2071-
reason := fmt.Sprintf("OutOf%s", re.ResourceName)
2083+
case *predicates.InsufficientResourceError:
2084+
reason = fmt.Sprintf("OutOf%s", re.ResourceName)
20722085
message := re.Error()
20732086
glog.V(2).Infof("Predicate failed on Pod: %v, for reason: %v", format.Pod(pod), message)
2074-
return fit, reason, message
2087+
case *predicates.FailureReason:
2088+
reason = re.GetReason()
2089+
message = fmt.Sprintf("Failure: %s", re.GetReason())
2090+
glog.V(2).Infof("Predicate failed on Pod: %v, for reason: %v", format.Pod(pod), message)
2091+
default:
2092+
reason = "UnexpectedPredicateFailureType"
2093+
message := fmt.Sprintf("GeneralPredicates failed due to %v, which is unexpected.", r)
2094+
glog.Warningf("Failed to admit pod %v - %s", format.Pod(pod), message)
20752095
}
2076-
reason := "UnexpectedPredicateFailureType"
2077-
message := fmt.Sprintf("GeneralPredicates failed due to %v, which is unexpected.", err)
2078-
glog.Warningf("Failed to admit pod %v - %s", format.Pod(pod), message)
20792096
return fit, reason, message
20802097
}
20812098
// TODO: When disk space scheduling is implemented (#11976), remove the out-of-disk check here and

plugin/pkg/scheduler/algorithm/predicates/error.go

+26-10
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,9 @@ limitations under the License.
1616

1717
package predicates
1818

19-
import "fmt"
20-
21-
const (
22-
podCountResourceName string = "PodCount"
23-
cpuResourceName string = "CPU"
24-
memoryResourceName string = "Memory"
25-
nvidiaGpuResourceName string = "NvidiaGpu"
19+
import (
20+
"fmt"
21+
"k8s.io/kubernetes/pkg/api"
2622
)
2723

2824
var (
@@ -49,13 +45,13 @@ var (
4945
// hit and caused the unfitting failure.
5046
type InsufficientResourceError struct {
5147
// resourceName is the name of the resource that is insufficient
52-
ResourceName string
48+
ResourceName api.ResourceName
5349
requested int64
5450
used int64
5551
capacity int64
5652
}
5753

58-
func newInsufficientResourceError(resourceName string, requested, used, capacity int64) *InsufficientResourceError {
54+
func NewInsufficientResourceError(resourceName api.ResourceName, requested, used, capacity int64) *InsufficientResourceError {
5955
return &InsufficientResourceError{
6056
ResourceName: resourceName,
6157
requested: requested,
@@ -69,14 +65,34 @@ func (e *InsufficientResourceError) Error() string {
6965
e.ResourceName, e.requested, e.used, e.capacity)
7066
}
7167

68+
func (e *InsufficientResourceError) GetReason() string {
69+
return fmt.Sprintf("Insufficient %v", e.ResourceName)
70+
}
71+
7272
type PredicateFailureError struct {
7373
PredicateName string
7474
}
7575

7676
func newPredicateFailureError(predicateName string) *PredicateFailureError {
77-
return &PredicateFailureError{predicateName}
77+
return &PredicateFailureError{PredicateName: predicateName}
7878
}
7979

8080
func (e *PredicateFailureError) Error() string {
8181
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
8282
}
83+
84+
func (e *PredicateFailureError) GetReason() string {
85+
return e.PredicateName
86+
}
87+
88+
type FailureReason struct {
89+
reason string
90+
}
91+
92+
func NewFailureReason(msg string) *FailureReason {
93+
return &FailureReason{reason: msg}
94+
}
95+
96+
func (e *FailureReason) GetReason() string {
97+
return e.reason
98+
}

0 commit comments

Comments
 (0)