Skip to content

Commit 940c08c

Browse files
committed
Changing how kubelet-clone stop is detected
1 parent 165711b commit 940c08c

1 file changed

Lines changed: 14 additions & 16 deletions

File tree

test/extended/two_node/tnf_kubelet_disruption.go

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,10 @@ import (
1717
)
1818

1919
const (
20-
kubeletDisruptionTimeout = 10 * time.Minute // Timeout for kubelet disruption scenarios
21-
kubeletRestoreTimeout = 5 * time.Minute // Time to wait for kubelet service restore
22-
kubeletPollInterval = 10 * time.Second // Poll interval for kubelet status checks
23-
kubeletGracePeriod = 30 * time.Second // Grace period for kubelet to start/stop
24-
pacemakerMonitorDetectPeriod = 15 * time.Second // Time to wait for Pacemaker to detect kubelet state changes
20+
kubeletDisruptionTimeout = 10 * time.Minute // Timeout for kubelet disruption scenarios
21+
kubeletRestoreTimeout = 5 * time.Minute // Time to wait for kubelet service restore
22+
kubeletPollInterval = 10 * time.Second // Poll interval for kubelet status checks
23+
kubeletGracePeriod = 30 * time.Second // Grace period for kubelet to start/stop
2524
)
2625

2726
var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:DualReplica][Suite:openshift/two-node][Serial][Slow][Disruptive] Two Node with Fencing cluster", func() {
@@ -196,17 +195,16 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual
196195
err = utils.StopKubeletService(oc, targetNode.Name)
197196
o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected to stop kubelet service on node %s without errors", targetNode.Name))
198197

199-
g.By("Waiting for Pacemaker to detect kubelet stopped")
200-
time.Sleep(pacemakerMonitorDetectPeriod)
201-
202-
g.By("Verifying Pacemaker monitor detected kubelet as inactive")
203-
journalCmd := "sudo journalctl --no-pager --since '60 seconds ago' | grep 'Result of monitor operation for kubelet' | grep -i 'not running' || true"
204-
logs, logErr := exutil.DebugNodeRetryWithOptionsAndChroot(
205-
oc, targetNode.Name, "default", "bash", "-c", journalCmd)
206-
207-
o.Expect(logErr).To(o.BeNil(), "Should retrieve journal logs")
208-
o.Expect(logs).ToNot(o.BeEmpty(), "Pacemaker should have detected kubelet as 'not running (inactive)'")
209-
framework.Logf("Pacemaker monitor detection: %s", logs)
198+
g.By("Verifying Pacemaker detected kubelet as stopped via pcs status")
199+
o.Eventually(func() bool {
200+
stopped, err := utils.IsResourceStopped(oc, survivingNode.Name, "kubelet-clone")
201+
if err != nil {
202+
framework.Logf("Error checking kubelet-clone status: %v", err)
203+
return false
204+
}
205+
framework.Logf("kubelet-clone stopped on %s: %v", targetNode.Name, stopped)
206+
return stopped
207+
}, kubeletRestoreTimeout, kubeletPollInterval).Should(o.BeTrue(), "Pacemaker should detect kubelet-clone as stopped")
210208

211209
g.By("Verifying Pacemaker restarted kubelet-clone service")
212210
o.Eventually(func() bool {

0 commit comments

Comments
 (0)