diff --git a/assets/optional/node-exporter/00-namespace.yaml b/assets/optional/node-exporter/00-namespace.yaml new file mode 100644 index 0000000000..17f727565a --- /dev/null +++ b/assets/optional/node-exporter/00-namespace.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-monitoring + labels: + name: openshift-monitoring + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/warn: privileged diff --git a/assets/optional/node-exporter/01-cluster-role-binding.yaml b/assets/optional/node-exporter/01-cluster-role-binding.yaml new file mode 100644 index 0000000000..b6790fa9b4 --- /dev/null +++ b/assets/optional/node-exporter/01-cluster-role-binding.yaml @@ -0,0 +1,18 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: node-exporter +subjects: +- kind: ServiceAccount + name: node-exporter + namespace: openshift-monitoring diff --git a/assets/optional/node-exporter/01-cluster-role.yaml b/assets/optional/node-exporter/01-cluster-role.yaml new file mode 100644 index 0000000000..50d7a5e755 --- /dev/null +++ b/assets/optional/node-exporter/01-cluster-role.yaml @@ -0,0 +1,31 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - security.openshift.io + resourceNames: + - node-exporter + resources: + - securitycontextconstraints + verbs: + - use diff --git a/assets/optional/node-exporter/01-security-context-constraints.yaml b/assets/optional/node-exporter/01-security-context-constraints.yaml new file mode 100644 index 0000000000..1caaf72fcd --- /dev/null +++ b/assets/optional/node-exporter/01-security-context-constraints.yaml @@ -0,0 +1,22 @@ +allowHostDirVolumePlugin: true +allowHostNetwork: true +allowHostPID: true +allowHostPorts: true +allowPrivilegedContainer: true +apiVersion: security.openshift.io/v1 +kind: SecurityContextConstraints +metadata: + annotations: + kubernetes.io/description: node-exporter scc is used for the Prometheus node exporter + labels: + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/part-of: openshift-monitoring + name: node-exporter +readOnlyRootFilesystem: false +runAsUser: + type: RunAsAny +seLinuxContext: + type: RunAsAny +seccompProfiles: +- runtime/default +users: [] diff --git a/assets/optional/node-exporter/01-service-account.yaml b/assets/optional/node-exporter/01-service-account.yaml new file mode 100644 index 0000000000..c3d1dc95c9 --- /dev/null +++ b/assets/optional/node-exporter/01-service-account.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter + namespace: openshift-monitoring diff --git a/assets/optional/node-exporter/02-accelerators-collector-configmap.yaml b/assets/optional/node-exporter/02-accelerators-collector-configmap.yaml new file mode 100644 index 0000000000..9ddda923f2 --- /dev/null +++ b/assets/optional/node-exporter/02-accelerators-collector-configmap.yaml @@ -0,0 +1,141 @@ +apiVersion: v1 +data: + config.yaml: |- + - "models": + - "modelName": "NVIDIA A800 PCIe 80GB" + "pciID": "0x20f5" + - "modelName": "NVIDIA A800 40GB PCIe active cooled" + "pciID": "0x20f6" + - "modelName": "NVIDIA AX800" + "pciID": "0x20fd" + - "modelName": "NVIDIA A100 PCIe 40GB" + "pciID": "0x20f1" + - "modelName": "NVIDIA A100 PCIe 80GB" + "pciID": "0x20b5" + - "modelName": "NVIDIA A40" + "pciID": "0x2235" + - "modelName": "NVIDIA A30" + "pciID": "0x20b7" + - "modelName": "NVIDIA A10" + "pciID": "0x2236" + - "modelName": "NVIDIA A16" + "pciID": "0x25b6" + - "modelName": "H800 NVL" + "pciID": "0x2322" + - "modelName": "NVIDIA H100 NVL" + "pciID": "0x2321" + - "modelName": "NVIDIA H100 PCIe 80GB" + "pciID": "0x2331" + - "modelName": "NVIDIA L40" + "pciID": "0x26b5" + - "modelName": "NVIDIA L40S" + "pciID": "0x26b9" + - "modelName": "NVIDIA L20 liquid cooled" + "pciID": "0x26bA" + - "modelName": "NVIDIA L4" + "pciID": "0x27b8" + - "modelName": "NVIDIA L2" + "pciID": "0x27b6" + - "modelName": "NVIDIA RTX 6000 Ada" + "pciID": "0x26b1" + - "modelName": "NVIDIA RTX 5880 Ada" + "pciID": "0x26b3" + - "modelName": "NVIDIA RTX 5000 Ada" + "pciID": "0x2231" + - "modelName": "NVIDIA RTX A6000" + "pciID": "0x2230" + - "modelName": "NVIDIA RTX A5500" + "pciID": "0x2233" + - "modelName": "NVIDIA RTX 8000 passive" + "pciID": "0x1e30" + - "modelName": "NVIDIA RTX A2000" + "pciID": "0x2531" + - "modelName": "NVIDIA A100 SXM4 40GB" + "pciID": "0x20b0" + - "modelName": "NVIDIA H800 NVL" + "pciID": "0x233a" + - "modelName": "NVIDIA H200 NVL" + "pciID": "0x233b" + - "modelName": "NVIDIA A100 SXM4 80GB" + "pciID": "0x20b2" + - "modelName": "NVIDIA A100 SXM 64GB" + "pciID": "0x20b3" + - "modelName": "NVIDIA A800 SXM4 40GB" + "pciID": "0x20bd" + - "modelName": "NVIDIA A800 SXM4 80GB" + "pciID": "0x20f3" + - "modelName": "NVIDIA RTX A1000" + "pciID": "0x25b0" + - "modelName": "Blackwell RTX PRO 6000" + "pciID": "0x2bb5" + - "modelName": "Blackwell GB100" + "pciID": "0x2941" + - "modelName": "NVIDIA H200" + "pciID": "0x2335" + "vendorID": "0x10de" + "vendorName": "NVIDIA" + - "models": + - "modelName": "AMD MI210" + "pciID": "0x740f" + - "modelName": "AMD MI250" + "pciID": "0x740c" + - "modelName": "AMD MI250X" + "pciID": "0x7408" + - "modelName": "AMD MI300" + "pciID": "0x74a0" + - "modelName": "AMD MI300X" + "pciID": "0x74a1" + - "modelName": "AMD MI325X" + "pciID": "0x74a5" + - "modelName": "AMD MI308X" + "pciID": "0x7aa2" + - "modelName": "AMD MI300X VF" + "pciID": "0x74b5" + - "modelName": "AMD MI210 VF" + "pciID": "0x7410" + "vendorID": "0x1002" + "vendorName": "AMD" + - "models": + - "modelName": "Gaudi 1" + "pciID": "0x1000" + - "modelName": "Gaudi 2" + "pciID": "0x1020" + "vendorID": "0x1da3" + "vendorName": "GAUDI" + - "models": + - "modelName": "Intel Data Center GPU Max 1550" + "pciID": "0x0bd5" + - "modelName": "Intel Data Center GPU Max 1100" + "pciID": "0x0bda" + - "modelName": "Intel Data Center GPU Flex 170" + "pciID": "0x56c0" + - "modelName": "Intel Data Center GPU Flex 140" + "pciID": "0x56c1" + - "modelName": "Intel IPU Data Path" + "pciID": "0x1452" + "vendorID": "0x8086" + "vendorName": "Intel" + - "models": + - "modelName": "Qualcomm AI 100" + "pciID": "0xa100" + - "modelName": "Qualcomm AI 80" + "pciID": "0xa080" + "vendorID": "0x17cb" + "vendorName": "Qualcomm" + - "models": + - "modelName": "Marvell OCTEON 10 CN10XXX" + "pciID": "0xb900" + "vendorID": "0x177d" + "vendorName": "Marvell" + - "models": + - "modelName": "BlueField-3 integrated ConnectX-7" + "pciID": "0xa2dc" + "vendorID": "0x15b3" + "vendorName": "Mellanox" +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/part-of: openshift-monitoring + name: node-exporter-accelerators-collector-config + namespace: openshift-monitoring diff --git a/assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml b/assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml new file mode 100644 index 0000000000..e02d0bb40b --- /dev/null +++ b/assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +data: {} +kind: Secret +metadata: + labels: + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/part-of: openshift-monitoring + name: node-exporter-kube-rbac-proxy-config + namespace: openshift-monitoring +stringData: + config.yaml: |- + "authorization": + "static": + - "path": "/metrics" + "resourceRequest": false + "verb": "get" +type: Opaque diff --git a/assets/optional/node-exporter/03-daemonset.yaml b/assets/optional/node-exporter/03-daemonset.yaml new file mode 100644 index 0000000000..0b0d5cfd05 --- /dev/null +++ b/assets/optional/node-exporter/03-daemonset.yaml @@ -0,0 +1,199 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter + namespace: openshift-monitoring +spec: + selector: + matchLabels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + template: + metadata: + annotations: + cluster-autoscaler.kubernetes.io/enable-ds-eviction: "false" + kubectl.kubernetes.io/default-container: node-exporter + openshift.io/required-scc: node-exporter + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + spec: + automountServiceAccountToken: true + containers: + - args: + - --web.listen-address=127.0.0.1:9101 + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --path.procfs=/host/root/proc + - --path.udev.data=/host/root/run/udev/data + - --no-collector.wifi + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.netclass.ignored-devices=^.*$ + - --collector.netdev.device-exclude=^.*$ + - --collector.cpu.info + - --collector.textfile.directory=/var/node_exporter/textfile + - --no-collector.btrfs + command: + - /bin/sh + - -c + - | + export GOMAXPROCS=4 + # We don't take CPU affinity into account as the container doesn't have integer CPU requests. + # In case of error, fallback to the default value. + NUM_CPUS=$(grep -c '^processor' "/proc/cpuinfo" 2>/dev/null || echo "0") + if [ "$NUM_CPUS" -lt "$GOMAXPROCS" ]; then + export GOMAXPROCS="$NUM_CPUS" + fi + echo "ts=$(date --iso-8601=seconds) num_cpus=$NUM_CPUS gomaxprocs=$GOMAXPROCS" + exec /bin/node_exporter "$0" "$@" + env: + - name: DBUS_SYSTEM_BUS_ADDRESS + value: unix:path=/host/root/var/run/dbus/system_bus_socket + image: "quay.io/openshift/node-exporter" + name: node-exporter + resources: + requests: + cpu: 8m + memory: 32Mi + securityContext: {} + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /host/sys + mountPropagation: HostToContainer + name: sys + readOnly: true + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + - mountPath: /var/node_exporter/textfile + name: node-exporter-textfile + readOnly: true + - mountPath: /var/node_exporter/accelerators_collector_config + name: node-exporter-accelerators-collector-config + readOnly: true + workingDir: /var/node_exporter/textfile + - args: + - --secure-listen-address=0.0.0.0:9100 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 + - --upstream=http://127.0.0.1:9101/ + - --tls-cert-file=/etc/tls/private/tls.crt + - --tls-private-key-file=/etc/tls/private/tls.key + - --client-ca-file=/etc/tls/client-ca/ca.crt + - --config-file=/etc/kube-rbac-policy/config.yaml + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: "quay.io/openshift/kube-rbac-proxy" + name: kube-rbac-proxy + ports: + - containerPort: 9100 + hostPort: 9100 + name: https + resources: + requests: + cpu: 1m + memory: 15Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 + seccompProfile: + type: RuntimeDefault + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/tls/private + name: node-exporter-tls + readOnly: true + - mountPath: /etc/kube-rbac-policy + name: node-exporter-kube-rbac-proxy-config + readOnly: true + - mountPath: /etc/tls/client-ca/ca.crt + name: admin-kubeconfig-signer-ca + readOnly: true + hostNetwork: true + hostPID: true + initContainers: + - command: + - /bin/sh + - -c + - '[[ ! -d /node_exporter/collectors/init ]] || find /node_exporter/collectors/init -perm /111 -type f -exec {} \;' + env: + - name: TMPDIR + value: /tmp + image: "quay.io/openshift/node-exporter" + name: init-textfile + resources: + requests: + cpu: 1m + memory: 1Mi + securityContext: + privileged: true + runAsUser: 0 + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/node_exporter/textfile + name: node-exporter-textfile + readOnly: false + - mountPath: /var/log/wtmp + name: node-exporter-wtmp + readOnly: true + workingDir: /var/node_exporter/textfile + nodeSelector: + kubernetes.io/os: linux + priorityClassName: system-cluster-critical + securityContext: {} + serviceAccountName: node-exporter + tolerations: + - operator: Exists + volumes: + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root + - emptyDir: {} + name: node-exporter-textfile + - name: node-exporter-tls + secret: + secretName: node-exporter-tls + - hostPath: + path: /var/log/wtmp + type: File + name: node-exporter-wtmp + - name: node-exporter-kube-rbac-proxy-config + secret: + secretName: node-exporter-kube-rbac-proxy-config + - configMap: + items: + - key: config.yaml + path: config.yaml + name: node-exporter-accelerators-collector-config + name: node-exporter-accelerators-collector-config + - hostPath: + path: /var/lib/microshift/certs/admin-kubeconfig-signer/ca.crt + type: File + name: admin-kubeconfig-signer-ca + updateStrategy: + rollingUpdate: + maxUnavailable: 10% + type: RollingUpdate diff --git a/assets/optional/node-exporter/04-service.yaml b/assets/optional/node-exporter/04-service.yaml new file mode 100644 index 0000000000..37b420ccdb --- /dev/null +++ b/assets/optional/node-exporter/04-service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + openshift.io/description: Expose the `/metrics` endpoint on port 9100. This port is for internal use, and no other usage is guaranteed. + service.beta.openshift.io/serving-cert-secret-name: node-exporter-tls + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring + app.kubernetes.io/version: 1.11.1 + name: node-exporter + namespace: openshift-monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9100 + targetPort: https + selector: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: openshift-monitoring diff --git a/assets/optional/node-exporter/kustomization.aarch64.yaml b/assets/optional/node-exporter/kustomization.aarch64.yaml new file mode 100644 index 0000000000..7686f7f2ba --- /dev/null +++ b/assets/optional/node-exporter/kustomization.aarch64.yaml @@ -0,0 +1,7 @@ +images: + - name: quay.io/openshift/kube-rbac-proxy + newName: quay.io/openshift-release-dev/ocp-v5.0-art-dev + digest: sha256:8a74d54a45421f51cfc1d50b7fca04e177c8601cec4cf5ecfdac250e36904819 + - name: quay.io/openshift/node-exporter + newName: quay.io/openshift-release-dev/ocp-v5.0-art-dev + digest: sha256:b908cd047a093dd789377c350e13832eb8dee38964b07e2f67a8c3cf1c9a5fc9 diff --git a/assets/optional/node-exporter/kustomization.x86_64.yaml b/assets/optional/node-exporter/kustomization.x86_64.yaml new file mode 100644 index 0000000000..c3c1e34433 --- /dev/null +++ b/assets/optional/node-exporter/kustomization.x86_64.yaml @@ -0,0 +1,7 @@ +images: + - name: quay.io/openshift/kube-rbac-proxy + newName: quay.io/openshift-release-dev/ocp-v5.0-art-dev + digest: sha256:b23eabd4a8578c71398ccde56be77ded55c7cbea36e592f3800347c33ca47c55 + - name: quay.io/openshift/node-exporter + newName: quay.io/openshift-release-dev/ocp-v5.0-art-dev + digest: sha256:c39c86b8b8b39ff6db9818e1a4f7dcde73d26ee427bded9c772bfe477ec020f9 diff --git a/assets/optional/node-exporter/kustomization.yaml b/assets/optional/node-exporter/kustomization.yaml new file mode 100644 index 0000000000..20b41b0f31 --- /dev/null +++ b/assets/optional/node-exporter/kustomization.yaml @@ -0,0 +1,12 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - 00-namespace.yaml + - 01-service-account.yaml + - 01-cluster-role.yaml + - 01-cluster-role-binding.yaml + - 01-security-context-constraints.yaml + - 02-kube-rbac-proxy-secret.yaml + - 02-accelerators-collector-configmap.yaml + - 03-daemonset.yaml + - 04-service.yaml diff --git a/assets/optional/node-exporter/release-node-exporter-aarch64.json b/assets/optional/node-exporter/release-node-exporter-aarch64.json new file mode 100644 index 0000000000..a1e3d0ae0f --- /dev/null +++ b/assets/optional/node-exporter/release-node-exporter-aarch64.json @@ -0,0 +1,8 @@ +{ + "release": { + "base": "5.0.0-0.nightly-arm64-2026-06-19-154904" + }, + "images": { + "node_exporter": "quay.io/openshift-release-dev/ocp-v5.0-art-dev@sha256:b908cd047a093dd789377c350e13832eb8dee38964b07e2f67a8c3cf1c9a5fc9" + } +} diff --git a/assets/optional/node-exporter/release-node-exporter-x86_64.json b/assets/optional/node-exporter/release-node-exporter-x86_64.json new file mode 100644 index 0000000000..399aa1c470 --- /dev/null +++ b/assets/optional/node-exporter/release-node-exporter-x86_64.json @@ -0,0 +1,8 @@ +{ + "release": { + "base": "5.0.0-0.nightly-2026-06-19-155631" + }, + "images": { + "node_exporter": "quay.io/openshift-release-dev/ocp-v5.0-art-dev@sha256:c39c86b8b8b39ff6db9818e1a4f7dcde73d26ee427bded9c772bfe477ec020f9" + } +} diff --git a/packaging/rpm/microshift.spec b/packaging/rpm/microshift.spec index 6362e4f552..8a59cd3144 100644 --- a/packaging/rpm/microshift.spec +++ b/packaging/rpm/microshift.spec @@ -261,6 +261,25 @@ The microshift-cert-manager-release-info package provides release information fi release. These files contain the list of container image references used by Cert Manager and can be used to embed those images into osbuilder blueprints or bootc containerfiles. +%package metrics-node-exporter +Summary: Prometheus node-exporter for MicroShift +ExclusiveArch: x86_64 aarch64 +Requires: microshift = %{version} + +%description metrics-node-exporter +The microshift-metrics-node-exporter package provides the Prometheus node-exporter for MicroShift. +Install this package to expose host-level hardware and OS metrics. + +%package metrics-node-exporter-release-info +Summary: Release information for node-exporter for MicroShift +BuildArch: noarch +Requires: microshift-release-info = %{version} + +%description metrics-node-exporter-release-info +The microshift-metrics-node-exporter-release-info package provides release information files for this +release. These files contain the list of container image references used by node-exporter +and can be used to embed those images into osbuilder blueprints or bootc containerfiles. + %package sriov Summary: SR-IOV Network Operator for MicroShift ExclusiveArch: x86_64 aarch64 @@ -599,6 +618,29 @@ cat assets/optional/cert-manager/manager/images-x86_64.yaml >> %{buildroot}/%{_p mkdir -p -m755 %{buildroot}%{_datadir}/microshift/release install -p -m644 assets/optional/cert-manager/release-cert-manager-{x86_64,aarch64}.json %{buildroot}%{_datadir}/microshift/release/ +# node-exporter +install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/00-namespace.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/01-service-account.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/01-cluster-role.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/01-cluster-role-binding.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/01-security-context-constraints.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/02-accelerators-collector-configmap.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/03-daemonset.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/04-service.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +install -p -m644 assets/optional/node-exporter/kustomization.yaml %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter + +%ifarch %{arm} aarch64 +cat assets/optional/node-exporter/kustomization.aarch64.yaml >> %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter/kustomization.yaml +%endif +%ifarch x86_64 +cat assets/optional/node-exporter/kustomization.x86_64.yaml >> %{buildroot}/%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter/kustomization.yaml +%endif + +# node-exporter-release-info +install -p -m644 assets/optional/node-exporter/release-node-exporter-{x86_64,aarch64}.json %{buildroot}%{_datadir}/microshift/release/ + # sriov install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/070-microshift-sriov install -d -m755 %{buildroot}/%{_prefix}/lib/microshift/manifests.d/070-microshift-sriov/crd @@ -802,6 +844,13 @@ fi %files cert-manager-release-info %{_datadir}/microshift/release/release-cert-manager-{x86_64,aarch64}.json +%files metrics-node-exporter +%dir %{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter +%{_prefix}/lib/microshift/manifests.d/082-microshift-node-exporter/* + +%files metrics-node-exporter-release-info +%{_datadir}/microshift/release/release-node-exporter-{x86_64,aarch64}.json + %files sriov %dir %{_prefix}/lib/microshift/manifests.d/070-microshift-sriov %dir %{_prefix}/lib/microshift/manifests.d/070-microshift-sriov/crd diff --git a/pkg/healthcheck/microshift_optional_workloads.go b/pkg/healthcheck/microshift_optional_workloads.go index 80e2d9a3b0..e104bbec8e 100644 --- a/pkg/healthcheck/microshift_optional_workloads.go +++ b/pkg/healthcheck/microshift_optional_workloads.go @@ -1,6 +1,8 @@ package healthcheck import ( + "slices" + "github.com/openshift/microshift/pkg/config" "github.com/openshift/microshift/pkg/util" "k8s.io/klog/v2" @@ -38,6 +40,20 @@ var optionalWorkloadPaths = map[string]optionalWorkloads{ Namespace: "sriov-network-operator", Workloads: NamespaceWorkloads{Deployments: []string{"sriov-network-operator"}}, }, + + "/usr/lib/microshift/manifests.d/082-microshift-node-exporter": { + Namespace: "openshift-monitoring", + Workloads: NamespaceWorkloads{DaemonSets: []string{"node-exporter"}}, + }, +} + +// mergeWorkloads combines two NamespaceWorkloads into one. +func mergeWorkloads(existing, incoming NamespaceWorkloads) NamespaceWorkloads { + return NamespaceWorkloads{ + Deployments: slices.Concat(existing.Deployments, incoming.Deployments), + DaemonSets: slices.Concat(existing.DaemonSets, incoming.DaemonSets), + StatefulSets: slices.Concat(existing.StatefulSets, incoming.StatefulSets), + } } // fillOptionalMicroShiftWorkloads assembles list of optional MicroShift workloads @@ -73,7 +89,7 @@ func fillOptionalMicroShiftWorkloads(workloadsToCheck map[string]NamespaceWorklo } klog.Infof("Optional component path exists and is configured: %s - expecting %v in namespace %q", path, ow.Workloads.String(), ow.Namespace) - workloadsToCheck[ow.Namespace] = ow.Workloads + workloadsToCheck[ow.Namespace] = mergeWorkloads(workloadsToCheck[ow.Namespace], ow.Workloads) } return nil } diff --git a/scripts/auto-rebase/assets_cluster_monitoring_operator.yaml b/scripts/auto-rebase/assets_cluster_monitoring_operator.yaml new file mode 100644 index 0000000000..8b1e6f0cc5 --- /dev/null +++ b/scripts/auto-rebase/assets_cluster_monitoring_operator.yaml @@ -0,0 +1,35 @@ +assets: + - dir: optional/node-exporter/ + no_clean: True + src: cluster-monitoring-operator/assets/node-exporter/ + files: + - file: 00-namespace.yaml + ignore: "MicroShift-specific, no upstream equivalent" + git_restore: True + - file: 01-cluster-role.yaml + src: cluster-role.yaml + - file: 01-cluster-role-binding.yaml + src: cluster-role-binding.yaml + - file: 01-service-account.yaml + src: service-account.yaml + - file: 01-security-context-constraints.yaml + src: security-context-constraints.yaml + - file: 02-accelerators-collector-configmap.yaml + src: accelerators-collector-configmap.yaml + - file: 02-kube-rbac-proxy-secret.yaml + src: kube-rbac-proxy-secret.yaml + - file: 03-daemonset.yaml + src: daemonset.yaml + - file: 04-service.yaml + src: service.yaml + - file: kustomization.yaml + ignore: "MicroShift-specific kustomization" + git_restore: True + - file: kustomization.x86_64.yaml + ignore: "gets generated during image rebase" + - file: kustomization.aarch64.yaml + ignore: "gets generated during image rebase" + - file: release-node-exporter-x86_64.json + ignore: "gets generated during image rebase" + - file: release-node-exporter-aarch64.json + ignore: "gets generated during image rebase" diff --git a/scripts/auto-rebase/last_rebase_cluster_monitoring_operator.sh b/scripts/auto-rebase/last_rebase_cluster_monitoring_operator.sh new file mode 100755 index 0000000000..f61200df82 --- /dev/null +++ b/scripts/auto-rebase/last_rebase_cluster_monitoring_operator.sh @@ -0,0 +1,2 @@ +#!/bin/bash -x +./scripts/auto-rebase/rebase_cluster_monitoring_operator.sh to "registry.ci.openshift.org/ocp/release-5:5.0.0-0.nightly-2026-06-19-155631" "registry.ci.openshift.org/ocp-arm64/release-5-arm64:5.0.0-0.nightly-arm64-2026-06-19-154904" diff --git a/scripts/auto-rebase/rebase_cluster_monitoring_operator.sh b/scripts/auto-rebase/rebase_cluster_monitoring_operator.sh new file mode 100755 index 0000000000..be1124f04e --- /dev/null +++ b/scripts/auto-rebase/rebase_cluster_monitoring_operator.sh @@ -0,0 +1,375 @@ +#!/usr/bin/env bash +# shellcheck disable=all +# Copyright 2022 The MicroShift authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -o errexit +set -o errtrace +set -o nounset +set -o pipefail + +shopt -s expand_aliases +shopt -s extglob + +#debugging options +#trap 'echo "#L$LINENO: $BASH_COMMAND" >&2' DEBUG +#set -xo functrace +#PS4='+ $LINENO ' +REPOROOT="$(readlink -f "$(dirname "${BASH_SOURCE[0]}")/../..")" +STAGING_DIR="$REPOROOT/_output/staging" +PULL_SECRET_FILE="${HOME}/.pull-secret.json" +REBASE_USE_SSH="${REBASE_USE_SSH:-false}" + +declare -a ARCHS=("amd64" "arm64") +declare -A GOARCH_TO_UNAME_MAP=( ["amd64"]="x86_64" ["arm64"]="aarch64" ) + +# Maps kustomization image name -> OCP release tag name +declare -A IMAGE_MAP=( + ["quay.io/openshift/kube-metrics-server"]="kube-metrics-server" + ["quay.io/openshift/kube-state-metrics"]="kube-state-metrics" + ["quay.io/openshift/node-exporter"]="prometheus-node-exporter" + ["quay.io/openshift/kube-rbac-proxy"]="kube-rbac-proxy" +) + +# Maps component dir -> release JSON key +declare -A COMPONENT_JSON_KEY=( + ["metrics-server"]="metrics_server" + ["kube-state-metrics"]="kube_state_metrics" + ["node-exporter"]="node_exporter" +) + +# Maps release JSON key -> OCP release tag name +declare -A EXPORTER_TAG_MAP=( + ["metrics_server"]="kube-metrics-server" + ["kube_state_metrics"]="kube-state-metrics" + ["node_exporter"]="prometheus-node-exporter" +) + +title() { + echo -e "\E[34m$1\E[00m"; +} + +retry_cmd() { + local -r max_attempts=5 + local timeout=1 + local attempt=1 + local exit_code=0 + + while (( attempt <= max_attempts )); do + if "$@"; then + return 0 + else + exit_code=$? + fi + echo "Attempt ${attempt} of ${max_attempts} failed (exit code ${exit_code}). Retrying in ${timeout}s..." + sleep "${timeout}" + attempt=$(( attempt + 1 )) + timeout=$(( timeout * 2 )) + done + + echo "Command failed after ${max_attempts} attempts: $@" + return "${exit_code}" +} + +check_preconditions() { + if ! hash yq; then + title "Installing yq" + sudo DEST_DIR=/usr/bin/ "${REPOROOT}/scripts/fetch_tools.sh" yq + fi + + if ! hash python3; then + echo "ERROR: python3 is not present on the system - please install" + exit 1 + fi + + if ! python3 -c "import yaml"; then + echo "ERROR: missing python's yaml library - please install" + exit 1 + fi +} + +clone_repo() { + local repo="$1" + local commit="$2" + local destdir="$3" + + local repodir="${destdir}/${repo##*/}" + + if [[ -d "${repodir}" ]]; then + return + fi + + if "${REBASE_USE_SSH}"; then + repo="git@github.com:${repo#https://github.com/}" + fi + + git init "${repodir}" + pushd "${repodir}" >/dev/null + git remote add origin "${repo}" + retry_cmd git fetch origin --quiet --filter=tree:0 --tags "${commit}" + git checkout "${commit}" + popd >/dev/null +} + +download_cluster_monitoring_operator() { + local release_image_amd64="$1" + local release_image_arm64="$2" + + rm -rf "${STAGING_DIR}" + mkdir -p "${STAGING_DIR}" + pushd "${STAGING_DIR}" >/dev/null + + local authentication="" + if [[ -f "${PULL_SECRET_FILE}" ]]; then + authentication="-a ${PULL_SECRET_FILE}" + else + >&2 echo "Warning: no pull secret found at ${PULL_SECRET_FILE}" + fi + + title "# Fetching release info for ${release_image_amd64} (amd64)" + oc adm release info ${authentication} "${release_image_amd64}" -o json > release_amd64.json + title "# Fetching release info for ${release_image_arm64} (arm64)" + oc adm release info ${authentication} "${release_image_arm64}" -o json > release_arm64.json + + title "# Extracting cluster-monitoring-operator source commit" + cat release_amd64.json \ + | jq -r '.references.spec.tags[] | "\(.name) \(.annotations."io.openshift.build.source-location") \(.annotations."io.openshift.build.commit.id")"' > source-commits + + local cmo_line + cmo_line=$(grep '^cluster-monitoring-operator ' source-commits) || { + >&2 echo "ERROR: cluster-monitoring-operator not found in release payload" + return 1 + } + + local repo commit + repo=$(echo "${cmo_line}" | cut -d ' ' -f 2) + commit=$(echo "${cmo_line}" | cut -d ' ' -f 3) + + title "# Cloning cluster-monitoring-operator at ${commit}" + clone_repo "${repo}" "${commit}" "." + + popd >/dev/null +} + +update_metrics_server_manifests() { + [[ -d "${REPOROOT}/assets/optional/metrics-server" ]] || return 0 + + title "Rebasing metrics-server manifests" + + local ms_crb="${REPOROOT}/assets/optional/metrics-server/01-cluster-role-binding.yaml" + yq -i '.subjects += [{"kind": "User", "name": "system:metrics-server"}]' "$ms_crb" + + local ms_deploy="${REPOROOT}/assets/optional/metrics-server/03-deployment.yaml" + yq -i '.spec.replicas = 1' "$ms_deploy" + yq -i '.spec.strategy = {"type": "Recreate"}' "$ms_deploy" + yq -i 'del(.spec.template.spec.affinity)' "$ms_deploy" + yq -i '.spec.template.spec.containers[0].image = "quay.io/openshift/kube-metrics-server"' "$ms_deploy" + yq -i '.spec.template.spec.containers[0].securityContext.capabilities.drop = ["ALL"]' "$ms_deploy" +} + +update_kube_state_metrics_manifests() { + [[ -d "${REPOROOT}/assets/optional/kube-state-metrics" ]] || return 0 + + title "Rebasing kube-state-metrics manifests" + + local ksm_deploy="${REPOROOT}/assets/optional/kube-state-metrics/03-deployment.yaml" + + yq -i '.spec.template.spec.containers[0].image = "quay.io/openshift/kube-state-metrics"' "$ksm_deploy" + yq -i '.spec.template.spec.containers[1].image = "quay.io/openshift/kube-rbac-proxy"' "$ksm_deploy" + yq -i '.spec.template.spec.containers[2].image = "quay.io/openshift/kube-rbac-proxy"' "$ksm_deploy" + + yq -i '.spec.template.spec.containers[0].securityContext = {"allowPrivilegeEscalation": false, "readOnlyRootFilesystem": true, "runAsNonRoot": true}' "$ksm_deploy" + yq -i '.spec.template.spec.containers[1].securityContext = {"allowPrivilegeEscalation": false, "readOnlyRootFilesystem": true, "runAsNonRoot": true}' "$ksm_deploy" + yq -i '.spec.template.spec.containers[2].securityContext = {"allowPrivilegeEscalation": false, "readOnlyRootFilesystem": true, "runAsNonRoot": true}' "$ksm_deploy" + yq -i '.spec.template.spec.securityContext = {"runAsNonRoot": true}' "$ksm_deploy" + + yq -i '.spec.template.spec.containers[0].resources.limits = {"cpu": "100m", "memory": "200Mi"}' "$ksm_deploy" + yq -i '.spec.template.spec.containers[1].resources.limits = {"cpu": "20m", "memory": "40Mi"}' "$ksm_deploy" + yq -i '.spec.template.spec.containers[2].resources.limits = {"cpu": "20m", "memory": "40Mi"}' "$ksm_deploy" + + yq -i '(.spec.template.spec.containers[1].volumeMounts[] | select(.name == "kube-state-metrics-tls")).readOnly = true' "$ksm_deploy" + yq -i '(.spec.template.spec.containers[2].volumeMounts[] | select(.name == "kube-state-metrics-tls")).readOnly = true' "$ksm_deploy" + + yq -i '(.spec.template.spec.containers[1].args[] | select(test("--client-ca-file="))) |= "--client-ca-file=/etc/tls/client-ca/ca.crt"' "$ksm_deploy" + yq -i '(.spec.template.spec.containers[2].args[] | select(test("--client-ca-file="))) |= "--client-ca-file=/etc/tls/client-ca/ca.crt"' "$ksm_deploy" + yq -i 'del(.spec.template.spec.volumes[] | select(.name == "metrics-client-ca"))' "$ksm_deploy" + yq -i '.spec.template.spec.volumes += [{"hostPath": {"path": "/var/lib/microshift/certs/admin-kubeconfig-signer/ca.crt", "type": "File"}, "name": "admin-kubeconfig-signer-ca"}]' "$ksm_deploy" + yq -i 'del(.spec.template.spec.containers[1].volumeMounts[] | select(.name == "metrics-client-ca"))' "$ksm_deploy" + yq -i 'del(.spec.template.spec.containers[2].volumeMounts[] | select(.name == "metrics-client-ca"))' "$ksm_deploy" + yq -i '.spec.template.spec.containers[1].volumeMounts += [{"mountPath": "/etc/tls/client-ca/ca.crt", "name": "admin-kubeconfig-signer-ca", "readOnly": true}]' "$ksm_deploy" + yq -i '.spec.template.spec.containers[2].volumeMounts += [{"mountPath": "/etc/tls/client-ca/ca.crt", "name": "admin-kubeconfig-signer-ca", "readOnly": true}]' "$ksm_deploy" + + local ksm_secret="${REPOROOT}/assets/optional/kube-state-metrics/02-kube-rbac-proxy-secret.yaml" + sed -i '/"user":/,/"name":/d' "$ksm_secret" +} + +update_node_exporter_manifests() { + [[ -d "${REPOROOT}/assets/optional/node-exporter" ]] || return 0 + + title "Rebasing node-exporter manifests" + + local ne_ds="${REPOROOT}/assets/optional/node-exporter/03-daemonset.yaml" + + yq -i '.spec.template.spec.containers[0].image = "quay.io/openshift/node-exporter"' "$ne_ds" + yq -i '.spec.template.spec.containers[1].image = "quay.io/openshift/kube-rbac-proxy"' "$ne_ds" + yq -i '.spec.template.spec.initContainers[0].image = "quay.io/openshift/node-exporter"' "$ne_ds" + + yq -i '(.spec.template.spec.containers[1].args[] | select(test("--secure-listen-address="))) |= "--secure-listen-address=0.0.0.0:9100"' "$ne_ds" + + yq -i '(.spec.template.spec.containers[1].args[] | select(test("--client-ca-file="))) |= "--client-ca-file=/etc/tls/client-ca/ca.crt"' "$ne_ds" + yq -i 'del(.spec.template.spec.volumes[] | select(.name == "metrics-client-ca"))' "$ne_ds" + yq -i '.spec.template.spec.volumes += [{"hostPath": {"path": "/var/lib/microshift/certs/admin-kubeconfig-signer/ca.crt", "type": "File"}, "name": "admin-kubeconfig-signer-ca"}]' "$ne_ds" + yq -i 'del(.spec.template.spec.containers[1].volumeMounts[] | select(.name == "metrics-client-ca"))' "$ne_ds" + yq -i '.spec.template.spec.containers[1].volumeMounts += [{"mountPath": "/etc/tls/client-ca/ca.crt", "name": "admin-kubeconfig-signer-ca", "readOnly": true}]' "$ne_ds" + + yq -i '(.spec.template.spec.containers[1].volumeMounts[] | select(.name == "node-exporter-tls")).readOnly = true' "$ne_ds" + + local ne_secret="${REPOROOT}/assets/optional/node-exporter/02-kube-rbac-proxy-secret.yaml" + sed -i '/"user":/,/"name":/d' "$ne_secret" +} + +update_cluster_monitoring_operator_images() { + title "Rebasing metrics component images" + + for goarch in amd64 arm64; do + local arch=${GOARCH_TO_UNAME_MAP["${goarch}"]:-noarch} + local release_file="${STAGING_DIR}/release_${goarch}.json" + + local base_release + base_release=$(jq -r ".metadata.version" "${release_file}") + + for component_dir in metrics-server kube-state-metrics node-exporter; do + [[ -d "${REPOROOT}/assets/optional/${component_dir}" ]] || continue + + local json_key="${COMPONENT_JSON_KEY[$component_dir]}" + local release_tag="${EXPORTER_TAG_MAP[$json_key]}" + local new_image + new_image=$(jq -r ".references.spec.tags[] | select(.name == \"${release_tag}\") | .from.name" "${release_file}") + if [[ -z "${new_image}" || "${new_image}" == "null" ]]; then + >&2 echo "ERROR: Release tag '${release_tag}' not found in payload for ${component_dir}" + return 1 + fi + local component_release_json="${REPOROOT}/assets/optional/${component_dir}/release-${component_dir}-${arch}.json" + jq -n --arg base "$base_release" --arg img "${new_image}" \ + "{\"release\": {\"base\": \$base}, \"images\": {\"${json_key}\": \$img}}" > "${component_release_json}" + + local kustomization_arch_file="${REPOROOT}/assets/optional/${component_dir}/kustomization.${arch}.yaml" + + cat < "${kustomization_arch_file}" +images: +EOF + + local image_names + image_names=$(grep -h 'image:' "${REPOROOT}/assets/optional/${component_dir}/"*.yaml 2>/dev/null \ + | sed 's/.*image: *//; s/"//g; s/:.*//; s/@.*//' | sort -u | grep -v '^$') + + for orig_image in ${image_names}; do + local release_tag="${IMAGE_MAP[$orig_image]:-}" + if [[ -z "${release_tag}" ]]; then + >&2 echo "ERROR: Unknown metrics image '${orig_image}' in ${component_dir}" + return 1 + fi + + local new_image + new_image=$(jq -r ".references.spec.tags[] | select(.name == \"${release_tag}\") | .from.name" "${release_file}") + if [[ -z "${new_image}" || "${new_image}" == "null" ]]; then + >&2 echo "ERROR: Image for release tag '${release_tag}' not found in payload for ${component_dir}" + return 1 + fi + local new_image_name="${new_image%@*}" + local new_image_digest="${new_image#*@}" + + cat <> "${kustomization_arch_file}" + - name: ${orig_image} + newName: ${new_image_name} + digest: ${new_image_digest} +EOF + done + done + done +} + +copy_manifests() { + title "Copying manifests" + "$REPOROOT/scripts/auto-rebase/handle_assets.py" "./scripts/auto-rebase/assets_cluster_monitoring_operator.yaml" +} + +update_last_rebase() { + local release_image_amd64="$1" + local release_image_arm64="$2" + + title "## Updating last_rebase_cluster_monitoring_operator.sh" + + local last_rebase_script="${REPOROOT}/scripts/auto-rebase/last_rebase_cluster_monitoring_operator.sh" + + rm -f "${last_rebase_script}" + cat - >"${last_rebase_script}" <