diff --git a/hardware/priority_core_turbo/Dockerfile.sst b/hardware/priority_core_turbo/Dockerfile.sst new file mode 100644 index 0000000..323913f --- /dev/null +++ b/hardware/priority_core_turbo/Dockerfile.sst @@ -0,0 +1,43 @@ +# Dockerfile +# Build intel-speed-select from Linux kernel v6.8 tools tree on Ubuntu. +# +# This image produces /usr/local/bin/intel-speed-select inside the container. +# To *use* it against host hardware, you will typically need --privileged and +# access to /dev/cpu/*/msr (plus relevant /sys entries) on the host. + +FROM ubuntu:24.04 + +ARG DEBIAN_FRONTEND=noninteractive +ARG KERNEL_TAG=v6.8 + +# Build dependencies and runtime basics +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + git \ + make \ + gcc \ + g++ \ + libc6-dev \ + pkg-config \ + libcap2-bin \ + libnl-3-dev \ + libnl-genl-3-dev \ + libnl-route-3-dev \ + kmod \ + numactl \ + jq \ + bc \ + util-linux \ + python3 \ + sudo \ + && rm -rf /var/lib/apt/lists/* + +# Fetch Linux kernel source (tools tree includes intel-speed-select) +WORKDIR /opt +RUN git clone --depth 1 --branch ${KERNEL_TAG} https://github.com/torvalds/linux.git + +# Build only intel-speed-select +WORKDIR /opt/linux/tools/power/x86/intel-speed-select +RUN make -j"$(nproc)" && make install +# No ENTRYPOINT so docker compose can run arbitrary commands (bash, etc.) +CMD ["intel-speed-select", "--help"] diff --git a/hardware/priority_core_turbo/README.md b/hardware/priority_core_turbo/README.md new file mode 100644 index 0000000..65f3998 --- /dev/null +++ b/hardware/priority_core_turbo/README.md @@ -0,0 +1,475 @@ +# Enabling Priority Core Turbo (PCT) for GPU Performance + +## Overview + +**Intel® Priority Core Turbo (PCT)** is part of **Intel® Speed Select Technology – Turbo Frequency (SST-TF)**. +It allows a subset of CPU cores to operate at **higher turbo frequencies**, while remaining cores run closer to base frequency. + +This is particularly effective for **GPU-accelerated AI inference**, where a small number of CPU threads handle +**latency-critical, mostly serial tasks** such as tokenization, scheduling, and feeding GPUs. +Running these threads on **High-Priority (HP) cores** improves GPU utilization, TTFT, and tail latency. + +Validated platforms: + +- **Intel® Xeon® 6776P** + +## How PCT Works +
+ PCT Details + +PCT relies on **two Intel Speed Select features**: + +- **SST-TF (Turbo Frequency)** + Defines the high-priority turbo buckets and the number of physical cores that can use each bucket. + +- **SST-CP (Core Power / CLOS)** + Assigns CPUs to **Classes of Service (CLOS)**. + CPUs assigned to **CLOS0** are treated as **High-Priority** by PCT. + +> **Important:** PCT is only effective when CPUs are explicitly assigned to **CLOS0** and Core Power / CLOS is enabled. + +### PCT bucket-count interpretation + +`intel-speed-select turbo-freq info -l ` may print the same `bucket-0`, +`bucket-1`, and `bucket-2` SST-TF table under multiple `powerdomain-*` anchors. + +For PCT **capacity**, this flow counts `bucket-0` **once per package/socket**: + +```text +bucket-0 high-priority-cores-count:8 @ 4600 MHz +=> 8 PCT physical cores per package/socket +``` + +On a two-socket Intel® Xeon® 6776P system with Hyper-Threading enabled: + +```text +2 packages × 8 physical PCT cores/package = 16 physical PCT cores total +16 physical PCT cores × 2 threads/core = 32 logical PCT CPUs total +``` + +### Capacity versus placement + +There are two different concepts: + +| Concept | Correct model | +| --- | --- | +| **PCT capacity** | Count `bucket-0` once per package/socket | +| **HP CPU placement** | Dispatch the package-level PCT core budget across the package's PCT reporting powerdomain anchors | + +For Intel® Xeon® 6776P system, `bucket-0` reports: + +```text +PCT_CORES_PER_PACKAGE=8 +PCT_ACTIVE_PACKAGES=2 +PCT_TOTAL_PHYSICAL_CORES=16 +THREADS_PER_CORE=2 +PCT_TOTAL_LOGICAL_CPUS=32 +``` + +But the `turbo-freq` output shows two reporting anchors per package: + +```text +package 0: anchor cpu0, anchor cpu32 +package 1: anchor cpu64, anchor cpu96 +``` + +Therefore, the set script dispatches the **8 physical PCT cores per package** +across the package's two reporting anchors: + +```text +package 0: 4 physical cores from cpu0 + 4 physical cores from cpu32 +package 1: 4 physical cores from cpu64 + 4 physical cores from cpu96 +``` + +With Hyper-Threading included, this becomes: + +```text +0-3,32-35,64-67,96-99,128-131,160-163,192-195,224-227 +``` + +This is the default strict bucket-0 PCT placement used by the updated set script. +
+ +## 1. Build the Environment + +
+ Build Details + +Export the kernel build variables first: + +```bash +source ./set_kernel_env.sh +``` + +Build the Docker image with required tools: + +```bash +docker compose --progress=plain build --no-cache +``` + +Verify `intel-speed-select` exists inside the image: + +```bash +docker compose run --rm intel-speed-select-shell 'which intel-speed-select && intel-speed-select --help | head' +``` + +
+ +## 2. Check PCT Status + +
+ This step verifies: + +- Hardware support for Intel® Speed Select features +- SST-TF/PCT bucket-0 capacity +- Correct package/socket-based PCT capacity counting +- Core Power and CLOS enablement +- Current CPU-to-CLOS mapping +- Whether the current `TARGET_CLOS` CPU count matches the expected PCT logical CPU budget + +
+ +Export the kernel build variables first: + +```bash +source ./set_kernel_env.sh +``` + +Run: + +```bash +docker compose --progress=plain --profile check up --abort-on-container-exit +``` + +Example results when PCT and CLOS are enabled successfully: + +
+ Example results + +```bash +------------------------------------------------------------ +CPU and Intel Speed Select Capability +------------------------------------------------------------ +Intel(R) SST-PP (feature perf-profile) is supported +Intel(R) SST-TF (feature turbo-freq) is supported +Intel(R) SST-BF (feature base-freq) is not supported +Intel(R) SST-CP (feature core-power) is supported +Intel(R) Speed Select Technology +Executing on CPU model:173[0xad] + +------------------------------------------------------------ +PCT Capacity from SST-TF bucket-0 +------------------------------------------------------------ +✅ PCT/SST-TF turbo tables detected. +PCT_BUCKET=bucket-0 +PCT_REPORTING_ANCHORS=4 +PCT_ACTIVE_PACKAGES=2 +PCT_CORES_PER_PACKAGE=8 +PCT_TOTAL_PHYSICAL_CORES=16 +PCT_MAX_FREQ_MHZ=4600 +PCT_DOMAIN_ANCHORS=pkg0/die0/pd0/cpu0:cores8:freq4600,pkg0/die0/pd1/cpu32:cores8:freq4600,pkg1/die1/pd0/cpu64:cores8:freq4600,pkg1/die1/pd1/cpu96:cores8:freq4600 +PCT_PACKAGE_SUMMARY=pkg0:cores8:freq4600:anchors2,pkg1:cores8:freq4600:anchors2 +THREADS_PER_CORE=2 +PCT_TOTAL_LOGICAL_CPUS=32 + +------------------------------------------------------------ +Core Power (CLOS) Feature Status +------------------------------------------------------------ +✅ Core Power feature ENABLED +✅ CLOS ENABLED + +------------------------------------------------------------ +CPU -> CLOS Mapping via get-assoc +------------------------------------------------------------ +CLOS distribution (count by clos id): + clos:0 -> 32 CPUs + clos:2 -> 224 CPUs + +------------------------------------------------------------ +CPU list for TARGET_CLOS=0 +------------------------------------------------------------ +clos:0 CPU list: 0-3,32-35,64-67,96-99,128-131,160-163,192-195,224-227 +Wrote clos:0 CPU list to /workspace/benchmarks/results/clos0_cpulist.txt + +------------------------------------------------------------ +PCT Budget Validation for CLOS0 +------------------------------------------------------------ +CLOS0 CPU count : 32 +PCT bucket : bucket-0 +PCT reporting anchors : 4 +PCT active packages/sockets : 2 +PCT cores per package/socket : 8 +PCT physical core budget : 16 +PCT max frequency : 4600 MHz +Threads per core : 2 +Expected PCT logical CPU budget : 32 +✅ CLOS0 CPU count exactly matches the bucket-0 PCT logical budget. + +------------------------------------------------------------ +Summary +------------------------------------------------------------ +✅ PCT turbo tables detected +✅ PCT capacity detected: 16 physical HP cores total, 32 logical CPUs with HT=2 + Count model: bucket-0 counted once per package/socket, not once per powerdomain anchor. +✅ Core Power enabled +✅ CLOS enabled +Done. +``` + +
+ +The check script writes the current target-CLOS CPU list to: + +```text +./results/clos0_cpulist.txt +``` + +For the example above, `clos0_cpulist.txt` contains 32 logical CPUs. With +Hyper-Threading enabled, that corresponds to 16 physical PCT cores. + +## 3. Set PCT and Assigned HP CPUs + +This step **activates PCT in practice** by assigning selected HP CPUs to **CLOS0**. + +The setup script intentionally **overwrites existing BIOS/runtime CLOS settings**: + +1. Enable Core Power / CLOS. +2. Move **all online CPUs → `OTHER_CLOS`**. +3. Move selected HP CPUs → `HP_CLOS`. + +This prevents stale BIOS or previous runtime CLOS assignments from leaving unexpected +CPUs in CLOS0. + +### Set-script behavior + +
+ The setup script performs the following actions: + +- Detects PCT capacity from `intel-speed-select turbo-freq info -l `. +- Counts `bucket-0` once per package/socket. +- Derives `HP_PER_PACKAGE` from `PCT_CORES_PER_PACKAGE` unless overridden. +- Reads the PCT reporting anchors from `PCT_DOMAIN_ANCHORS`. +- Dispatches each package's `HP_PER_PACKAGE` physical-core budget across that package's reporting powerdomain anchors. +- Selects contiguous physical CPUs starting from each reporting anchor CPU. +- Includes Hyper-Threading siblings by default with `INCLUDE_HT=1`. +- Assigns: + - **Selected HP CPUs → CLOS0** by default + - **All remaining CPUs → CLOS2** by default + +
+ +Export the kernel build variables first: + +```bash +source ./set_kernel_env.sh +``` + +Run the setup: + +```bash +docker compose --progress=plain --profile set up --abort-on-container-exit +``` + +Or test the selection without changing the system: + +```bash +DRY_RUN=1 docker compose --progress=plain --profile set up --abort-on-container-exit +``` + +### Example: package capacity dispatched across reporting powerdomain anchors + +
+ Example results + +```bash +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | PCT capacity from SST-TF bucket-0 +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | PCT_BUCKET=bucket-0 +intel-speed-select-set-1 | PCT_REPORTING_ANCHORS=4 +intel-speed-select-set-1 | PCT_ACTIVE_PACKAGES=2 +intel-speed-select-set-1 | PCT_CORES_PER_PACKAGE=8 +intel-speed-select-set-1 | PCT_TOTAL_PHYSICAL_CORES=16 +intel-speed-select-set-1 | PCT_MAX_FREQ_MHZ=4600 +intel-speed-select-set-1 | PCT_DOMAIN_ANCHORS=pkg0/die0/pd0/cpu0:cores8:freq4600,pkg0/die0/pd1/cpu32:cores8:freq4600,pkg1/die1/pd0/cpu64:cores8:freq4600,pkg1/die1/pd1/cpu96:cores8:freq4600 +intel-speed-select-set-1 | PCT_PACKAGE_SUMMARY=pkg0:cores8:freq4600:anchors2,pkg1:cores8:freq4600:anchors2 +intel-speed-select-set-1 | +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | Config +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | ACTION=set +intel-speed-select-set-1 | HP_BUCKET=0 TDP_LEVEL=1 +intel-speed-select-set-1 | HP_PER_PACKAGE=8 +intel-speed-select-set-1 | INCLUDE_HT=1 +intel-speed-select-set-1 | HP_CLOS=0 OTHER_CLOS=2 +intel-speed-select-set-1 | DEBUG_MODE=0 DRY_RUN=0 DEBUG_VERBOSE=0 DEBUG_MAP=0 +intel-speed-select-set-1 | +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | Powerdomain-anchor HP CPU dispatch +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | package 0: HP_PER_PACKAGE=8, reporting_anchors=2, dispatch_per_anchor=[4, 4] +intel-speed-select-set-1 | pkg0/pd0/anchor_cpu0 -> 4 physical cores -> core0:0/128 core1:1/129 core2:2/130 core3:3/131 +intel-speed-select-set-1 | pkg0/pd1/anchor_cpu32 -> 4 physical cores -> core32:32/160 core33:33/161 core34:34/162 core35:35/163 +intel-speed-select-set-1 | package 1: HP_PER_PACKAGE=8, reporting_anchors=2, dispatch_per_anchor=[4, 4] +intel-speed-select-set-1 | pkg1/pd0/anchor_cpu64 -> 4 physical cores -> core64:64/192 core65:65/193 core66:66/194 core67:67/195 +intel-speed-select-set-1 | pkg1/pd1/anchor_cpu96 -> 4 physical cores -> core96:96/224 core97:97/225 core98:98/226 core99:99/227 +intel-speed-select-set-1 | HP_EFFECTIVE=0-3,32-35,64-67,96-99,128-131,160-163,192-195,224-227 +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | Computed CPU lists +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | HP effective : 0-3,32-35,64-67,96-99,128-131,160-163,192-195,224-227 +intel-speed-select-set-1 | HP CPU count : 32 +intel-speed-select-set-1 | Non-HP : 4-31,36-63,68-95,100-127,132-159,164-191,196-223,228-255 +intel-speed-select-set-1 | +intel-speed-select-set-1 | PCT active packages/sockets : 2 +intel-speed-select-set-1 | PCT reporting anchors : 4 +intel-speed-select-set-1 | PCT cores per package/socket : 8 +intel-speed-select-set-1 | PCT physical core budget : 16 +intel-speed-select-set-1 | PCT max frequency : 4600 MHz +intel-speed-select-set-1 | +intel-speed-select-set-1 | Expected HP CPU count for this INCLUDE_HT setting: 32 +intel-speed-select-set-1 | +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | Apply CLOS assignments (overwrite existing BIOS/runtime mapping) +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | Setting ALL CPUs -> CLOS2 first +intel-speed-select-set-1 | Setting selected HP CPUs -> CLOS0 +intel-speed-select-set-1 | Applied. +intel-speed-select-set-1 | +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | Verification (concise CPU->CLOS) +intel-speed-select-set-1 | ------------------------------------------------------------ +intel-speed-select-set-1 | HP list should be clos:0 +intel-speed-select-set-1 | cpu-0 clos:0 +intel-speed-select-set-1 | cpu-1 clos:0 +intel-speed-select-set-1 | … (showing first 2 lines) +intel-speed-select-set-1 | +intel-speed-select-set-1 | Non-HP list should be clos:2 +intel-speed-select-set-1 | cpu-4 clos:2 +intel-speed-select-set-1 | cpu-5 clos:2 +intel-speed-select-set-1 | … (showing first 2 lines) +intel-speed-select-set-1 | +intel-speed-select-set-1 | Done. +``` + +
+ +After applying the set flow, run the check flow again. +The check output should show: + +```text +clos:0 -> 32 CPUs +clos:0 CPU list: 0-3,32-35,64-67,96-99,128-131,160-163,192-195,224-227 +Expected PCT logical CPU budget : 32 +✅ CLOS0 CPU count exactly matches the bucket-0 PCT logical budget. +``` + +## 4. Benchmark CLOS0 CPUs with Host PerfSpect + +Use Docker only to configure and verify PCT/CLOS. Run PerfSpect on the host so +the frequency benchmark can access host CPU frequency interfaces directly. + +### Prerequisites + +
+ Details + +The host benchmark script reads the CPU list generated by the check profile: + +```bash +./results/clos0_cpulist.txt +``` + +Install PerfSpect on the host first: + +```bash +mkdir -p "${HOME}/tools" +cd "${HOME}/tools" + +wget -qO- https://github.com/intel/PerfSpect/releases/latest/download/perfspect.tgz | tar -xz + +sudo ln -sf "${HOME}/tools/perfspect/perfspect" /usr/local/bin/perfspect +``` + +Confirm it is available: + +```bash +which perfspect +perfspect --help | head +``` + +
+ +### Run the benchmark + +Run the full flow: + +```bash +docker compose --progress=plain --profile set up --abort-on-container-exit +docker compose --progress=plain --profile check up --abort-on-container-exit + +./run_host_perfspect_benchmark.sh +``` + +
+ Details + +Default host benchmark command: + +```bash +sudo taskset -c "${CLOS_CPUS}" perfspect benchmark --speed --frequency --no-summary --output +``` + +Override the PerfSpect benchmark options with `PERFSPECT_ARGS`: + +```bash +PERFSPECT_ARGS="--speed --frequency --memory --no-summary" \ +./run_host_perfspect_benchmark.sh +``` +
+ +### Analyze results + +Benchmark output is written under: + +```bash +./results/perfspect_host_clos0_/ +``` + +The directory includes: + +```text +clos0_cpulist.txt +perfspect_benchmark.log +perfspect/ +``` +Check Frequency section in HTML file. +This is the frequency diagram on Xeon 6776P with PCT on. +image + +This is the expected pattern: small active core counts hold the highest PCT turbo +frequency, and frequency gradually steps down as more physical cores become active. + +## 5. Debug / Manual Inspection (Optional) + +
+ Debug Details + +This section is useful for **troubleshooting**, **validation**, or **manual experimentation** +with Intel® Speed Select and PCT behavior. + +Start an interactive shell with the required tools installed: + +```bash +docker compose run --rm intel-speed-select-shell +``` + +Useful commands: + +```bash +intel-speed-select --info +intel-speed-select turbo-freq info -l 1 +intel-speed-select core-power info +intel-speed-select -c 0 core-power get-assoc +``` +
diff --git a/hardware/priority_core_turbo/check_pct_status.sh b/hardware/priority_core_turbo/check_pct_status.sh new file mode 100644 index 0000000..bbc4224 --- /dev/null +++ b/hardware/priority_core_turbo/check_pct_status.sh @@ -0,0 +1,482 @@ +#!/usr/bin/env bash +# +# check_pct_status.sh — verify Intel Priority Core Turbo (PCT) / CLOS status. +# +# Latest behavior: +# - Parse SST-TF bucket capacity from: +# intel-speed-select turbo-freq info -l +# - Treat bucket-0 as the PCT bucket by default +# - Correctly count PCT capacity ONCE PER PACKAGE/SOCKET, not once per powerdomain anchor +# - Report PCT reporting anchors, active packages, PCT cores/package, physical/logical budget +# - Print current CLOS distribution +# - Print TARGET_CLOS CPU list and compare its count to PCT logical budget +# +# Config via env: +# TARGET_CLOS=0 # which CLOS to print as HP/PCT list +# CHUNK=64 # CPUs per get-assoc call +# HP_BUCKET=0 # bucket-0 is the PCT bucket by default +# TDP_LEVEL=1 # intel-speed-select turbo-freq info -l +# DEBUG_MAP=0 # 1 = show tmp_map with invisible chars +# +# Important interpretation: +# - intel-speed-select may repeat bucket-0/1/2 under multiple powerdomain anchors. +# - Do NOT sum bucket-0 across all anchors. +# - For PCT capacity, group by package/socket and count bucket-0 once per package. +# + +set -euo pipefail + +TARGET_CLOS="${TARGET_CLOS:-0}" +CHUNK="${CHUNK:-64}" +HP_BUCKET="${HP_BUCKET:-0}" +TDP_LEVEL="${TDP_LEVEL:-1}" +DEBUG_MAP="${DEBUG_MAP:-0}" + +SUDO="" +if [ "$(id -u)" -ne 0 ]; then + SUDO="sudo" +fi + +RESULTS_DIR="/workspace/benchmarks/results" +mkdir -p "${RESULTS_DIR}" + +print_header() { + echo "------------------------------------------------------------" + echo "$1" + echo "------------------------------------------------------------" +} + +die() { + echo "ERROR: $*" >&2 + exit 1 +} + +count_cpulist() { + python3 - "$1" <<'PY' +import sys +s = sys.argv[1].strip() +count = 0 +if s: + for part in s.split(','): + part = part.strip() + if not part: + continue + if '-' in part: + a, b = map(int, part.split('-', 1)) + count += b - a + 1 + else: + count += 1 +print(count) +PY +} + +get_threads_per_core() { + local tpc + tpc="$(lscpu | awk -F: '/Thread\(s\) per core/{gsub(/[[:space:]]/,"",$2); print $2}' | head -n1)" + if [[ -z "${tpc:-}" ]]; then + echo 1 + else + echo "$tpc" + fi +} + +get_assoc_for_cpulist() { + local cpu_list="$1" + $SUDO intel-speed-select -c "$cpu_list" core-power get-assoc 2>&1 | + while IFS= read -r line; do + if [[ "$line" =~ cpu-([0-9]+) ]]; then + cur_cpu="${BASH_REMATCH[1]}" + fi + if [[ "$line" =~ clos:([0-9]+) ]]; then + printf "%s %s\n" "${cur_cpu:-?}" "${BASH_REMATCH[1]}" + fi + done +} + +detect_pct_capacity() { + local tdp_level="${1:-1}" + local bucket="${2:-0}" + local out + + out="$($SUDO intel-speed-select turbo-freq info -l "$tdp_level" 2>&1 || true)" + + if ! echo "$out" | grep -q "high-priority-cores-count"; then + echo "PCT_BUCKET=bucket-${bucket}" + echo "PCT_REPORTING_ANCHORS=0" + echo "PCT_ACTIVE_PACKAGES=0" + echo "PCT_CORES_PER_PACKAGE=0" + echo "PCT_TOTAL_PHYSICAL_CORES=0" + echo "PCT_MAX_FREQ_MHZ=0" + echo "PCT_DOMAIN_ANCHORS=" + echo "PCT_PACKAGE_SUMMARY=" + return 0 + fi + + PCT_TF_OUT="$out" python3 - "$bucket" <<'PY' +import os +import re +import sys +from collections import defaultdict + +want_bucket = f"bucket-{sys.argv[1]}" +lines = os.environ.get('PCT_TF_OUT', '').splitlines() + +anchors = [] +cur_pkg = None +cur_die = None +cur_pd = None +cur_cpu = None +cur_bucket = None + +for line in lines: + s = line.strip() + + m = re.match(r'package-(\d+)', s) + if m: + cur_pkg = int(m.group(1)) + continue + + m = re.match(r'die-(\d+)', s) + if m: + cur_die = int(m.group(1)) + continue + + m = re.match(r'powerdomain-(\d+)', s) + if m: + cur_pd = int(m.group(1)) + cur_cpu = None + continue + + if s == 'cpu-None': + cur_cpu = None + continue + + m = re.match(r'cpu-(\d+)', s) + if m: + cur_cpu = int(m.group(1)) + continue + + m = re.match(r'bucket-(\d+)', s) + if m: + cur_bucket = f"bucket-{m.group(1)}" + continue + + if cur_bucket == want_bucket and 'high-priority-cores-count:' in s: + count = int(re.sub(r'.*high-priority-cores-count:\s*', '', s).split()[0]) + anchors.append({ + 'package': cur_pkg, + 'die': cur_die, + 'powerdomain': cur_pd, + 'cpu': cur_cpu, + 'count': count, + 'freq': None, + }) + continue + + if cur_bucket == want_bucket and 'high-priority-max-level-0-frequency(MHz):' in s: + freq = int(re.sub(r'.*frequency\(MHz\):\s*', '', s).split()[0]) + if anchors: + anchors[-1]['freq'] = freq + continue + +# Keep only anchors that have a real CPU. cpu-None sections do not expose HP buckets. +active_anchors = [d for d in anchors if d['cpu'] is not None] + +if not active_anchors: + print(f'PCT_BUCKET={want_bucket}') + print('PCT_REPORTING_ANCHORS=0') + print('PCT_ACTIVE_PACKAGES=0') + print('PCT_CORES_PER_PACKAGE=0') + print('PCT_TOTAL_PHYSICAL_CORES=0') + print('PCT_MAX_FREQ_MHZ=0') + print('PCT_DOMAIN_ANCHORS=') + print('PCT_PACKAGE_SUMMARY=') + raise SystemExit(0) + +# Correct capacity model: +# Bucket data can repeat under multiple powerdomain anchors. +# PCT capacity is counted once per package/socket. +by_pkg = defaultdict(list) +for d in active_anchors: + by_pkg[d['package']].append(d) + +pkg_counts = {} +pkg_freqs = {} +for pkg, ds in sorted(by_pkg.items()): + counts = sorted(set(d['count'] for d in ds)) + freqs = sorted(set(d['freq'] for d in ds if d['freq'] is not None)) + # Normally all reporting anchors in a package agree. + # If not, use the smallest count as the safe PCT bucket capacity for that package. + pkg_counts[pkg] = min(counts) if counts else 0 + pkg_freqs[pkg] = max(freqs) if freqs else None + +active_packages = len(pkg_counts) +total_physical = sum(pkg_counts.values()) +all_counts = sorted(set(pkg_counts.values())) +all_freqs = sorted(set(v for v in pkg_freqs.values() if v is not None)) + +domain_anchors = ','.join( + f"pkg{d['package']}/die{d['die']}/pd{d['powerdomain']}/cpu{d['cpu']}:cores{d['count']}:freq{d['freq']}" + for d in active_anchors +) + +package_summary = ','.join( + f"pkg{pkg}:cores{pkg_counts[pkg]}:freq{pkg_freqs[pkg]}:anchors{len(by_pkg[pkg])}" + for pkg in sorted(pkg_counts) +) + +print(f'PCT_BUCKET={want_bucket}') +print(f'PCT_REPORTING_ANCHORS={len(active_anchors)}') +print(f'PCT_ACTIVE_PACKAGES={active_packages}') +print(f"PCT_CORES_PER_PACKAGE={','.join(map(str, all_counts))}") +print(f'PCT_TOTAL_PHYSICAL_CORES={total_physical}') +print(f"PCT_MAX_FREQ_MHZ={','.join(map(str, all_freqs)) if all_freqs else 'unknown'}") +print(f'PCT_DOMAIN_ANCHORS={domain_anchors}') +print(f'PCT_PACKAGE_SUMMARY={package_summary}') +PY +} + +# --- 1. Basic CPU / tool check ------------------------------------------- + +print_header "CPU and Intel Speed Select Capability" + +if ! command -v intel-speed-select &>/dev/null; then + echo "❌ intel-speed-select not found. Please install/build it first." + exit 1 +fi + +command -v python3 >/dev/null 2>&1 || die "python3 not found" +command -v lscpu >/dev/null 2>&1 || die "lscpu not found" + +$SUDO intel-speed-select --info 2>&1 | grep -E "Intel|Executing|Supported|Features" || true +echo + +# --- 2. Check Turbo Frequency / PCT bucket capacity ----------------------- + +print_header "PCT Capacity from SST-TF bucket-${HP_BUCKET}" + +TF_OUT="$($SUDO intel-speed-select turbo-freq info -l "$TDP_LEVEL" 2>&1 || true)" + +if echo "$TF_OUT" | grep -qi "Invalid command: specify tdp_level"; then + echo "⚠️ Multiple TDP levels detected. Set TDP_LEVEL and retry." + echo " Example: TDP_LEVEL=0 $0" +elif echo "$TF_OUT" | grep -qi "Failed to get turbo-freq info"; then + echo "⚠️ turbo-freq info failed at TDP_LEVEL=${TDP_LEVEL}." +elif echo "$TF_OUT" | grep -qi "high-priority"; then + echo "✅ PCT/SST-TF turbo tables detected." +else + echo "⚠️ turbo-freq data not returned. PCT turbo tables may be unavailable or BIOS not configured." +fi + +PCT_CAPACITY="$(detect_pct_capacity "$TDP_LEVEL" "$HP_BUCKET")" +echo "$PCT_CAPACITY" +eval "$PCT_CAPACITY" + +THREADS_PER_CORE="$(get_threads_per_core)" + +if [[ "${PCT_TOTAL_PHYSICAL_CORES:-0}" =~ ^[0-9]+$ ]]; then + PCT_TOTAL_LOGICAL_CPUS=$(( PCT_TOTAL_PHYSICAL_CORES * THREADS_PER_CORE )) +else + PCT_TOTAL_LOGICAL_CPUS=0 +fi + +echo "THREADS_PER_CORE=${THREADS_PER_CORE}" +echo "PCT_TOTAL_LOGICAL_CPUS=${PCT_TOTAL_LOGICAL_CPUS}" +echo + +# --- 3. Check Core Power / CLOS status ------------------------------------ + +print_header "Core Power (CLOS) Feature Status" + +CP_OUT="$($SUDO intel-speed-select core-power info 2>&1 || true)" + +CORE_POWER_ENABLED=0 +CLOS_ENABLED=0 + +if echo "$CP_OUT" | grep -q "support-status:supported"; then + if echo "$CP_OUT" | grep -q "enable-status:enabled"; then + CORE_POWER_ENABLED=1 + echo "✅ Core Power feature ENABLED" + else + echo "⚠️ Core Power supported but DISABLED in BIOS/runtime" + fi + + if echo "$CP_OUT" | grep -q "clos-enable-status:enabled"; then + CLOS_ENABLED=1 + echo "✅ CLOS ENABLED" + else + echo "⚠️ CLOS disabled" + fi +else + echo "❌ Core Power not supported on this system" +fi +echo + +# --- 4. Enumerate CPU -> CLOS mapping ------------------------------------- + +print_header "CPU -> CLOS Mapping via get-assoc" + +MAX_CPU="$(lscpu -p=CPU | grep -v '^#' | cut -d, -f1 | sort -n | tail -n 1 || true)" +if [[ -z "${MAX_CPU:-}" ]]; then + echo "❌ Could not determine CPU range from lscpu." + exit 1 +fi + +if ! $SUDO intel-speed-select -c 0 core-power get-assoc >/dev/null 2>&1; then + echo "❌ This intel-speed-select build does not support: core-power get-assoc" + exit 1 +fi + +tmp_map="$(mktemp)" +trap 'rm -f "$tmp_map"' EXIT + +start=0 +while (( start <= MAX_CPU )); do + end=$(( start + CHUNK - 1 )) + if (( end > MAX_CPU )); then + end="$MAX_CPU" + fi + range="${start}-${end}" + get_assoc_for_cpulist "$range" >> "$tmp_map" + start=$(( end + 1 )) +done + +if [[ "$DEBUG_MAP" == "1" ]]; then + echo "DEBUG_MAP=1: Showing first 40 tmp_map lines with invisible chars:" + cat -A "$tmp_map" | head -n 40 + echo +fi + +echo "CLOS distribution (count by clos id):" +python3 - <<'PY' "$tmp_map" +import re +import sys +path = sys.argv[1] +counts = {} +with open(path, 'r', errors='replace') as f: + for line in f: + parts = line.strip().split() + if len(parts) < 2: + continue + clos = re.sub(r'[^0-9]', '', parts[1]) + if not clos: + continue + counts[clos] = counts.get(clos, 0) + 1 +for k in sorted(counts, key=lambda x: int(x)): + print(f' clos:{k} -> {counts[k]} CPUs') +PY +echo + +# --- 5. Print target CLOS list and validate against PCT budget ------------ + +print_header "CPU list for TARGET_CLOS=${TARGET_CLOS}" + +CLOS_LINE="$( +python3 - <<'PY' "$tmp_map" "$TARGET_CLOS" +import re +import sys +path = sys.argv[1] +target = str(sys.argv[2]) +cpus = [] +with open(path, 'r', errors='replace') as f: + for line in f: + parts = line.strip().split() + if len(parts) < 2: + continue + cpu = re.sub(r'[^0-9]', '', parts[0]) + clos = re.sub(r'[^0-9]', '', parts[1]) + if not cpu or not clos: + continue + if clos == target: + cpus.append(int(cpu)) +cpus = sorted(set(cpus)) +if not cpus: + print(f'⚠️ No CPUs currently report clos:{target}.') + raise SystemExit(0) +res = [] +i = 0 +while i < len(cpus): + j = i + while j + 1 < len(cpus) and cpus[j + 1] == cpus[j] + 1: + j += 1 + res.append(str(cpus[i]) if i == j else f'{cpus[i]}-{cpus[j]}') + i = j + 1 +print(f"clos:{target} CPU list: {','.join(res)}") +PY +)" + +echo "${CLOS_LINE}" + +if [[ "${CLOS_LINE}" =~ ^clos:${TARGET_CLOS}[[:space:]]CPU[[:space:]]list:\ (.*)$ ]]; then + CLOS_LIST="${BASH_REMATCH[1]}" + OUT_FILE="${RESULTS_DIR}/clos${TARGET_CLOS}_cpulist.txt" + echo "${CLOS_LIST}" > "${OUT_FILE}" + echo "Wrote clos:${TARGET_CLOS} CPU list to ${OUT_FILE}" + + CLOS_CPU_COUNT="$(count_cpulist "$CLOS_LIST")" + echo + print_header "PCT Budget Validation for CLOS${TARGET_CLOS}" + echo "CLOS${TARGET_CLOS} CPU count : ${CLOS_CPU_COUNT}" + echo "PCT bucket : ${PCT_BUCKET:-bucket-${HP_BUCKET}}" + echo "PCT reporting anchors : ${PCT_REPORTING_ANCHORS:-0}" + echo "PCT active packages/sockets : ${PCT_ACTIVE_PACKAGES:-0}" + echo "PCT cores per package/socket : ${PCT_CORES_PER_PACKAGE:-0}" + echo "PCT physical core budget : ${PCT_TOTAL_PHYSICAL_CORES:-0}" + echo "PCT max frequency : ${PCT_MAX_FREQ_MHZ:-0} MHz" + echo "Threads per core : ${THREADS_PER_CORE}" + echo "Expected PCT logical CPU budget : ${PCT_TOTAL_LOGICAL_CPUS}" + + if (( PCT_TOTAL_LOGICAL_CPUS == 0 )); then + echo "⚠️ Could not validate CLOS${TARGET_CLOS} count because PCT logical budget is 0/unknown." + elif (( CLOS_CPU_COUNT > PCT_TOTAL_LOGICAL_CPUS )); then + echo "⚠️ CLOS${TARGET_CLOS} has more CPUs than the bucket-${HP_BUCKET} PCT logical budget." + echo " This may fall into a lower SST-TF TRL bucket instead of true PCT frequency." + elif (( CLOS_CPU_COUNT == PCT_TOTAL_LOGICAL_CPUS )); then + echo "✅ CLOS${TARGET_CLOS} CPU count exactly matches the bucket-${HP_BUCKET} PCT logical budget." + else + echo "✅ CLOS${TARGET_CLOS} CPU count is within the bucket-${HP_BUCKET} PCT logical budget." + echo " This is a subset of PCT-capable logical CPUs." + fi + + if (( CORE_POWER_ENABLED == 0 || CLOS_ENABLED == 0 )); then + echo + echo "⚠️ CLOS assignments are visible, but Core Power/CLOS enforcement is not fully enabled." + echo " get-assoc can report mappings even when core-power info says disabled." + echo " For PCT enforcement, run the set flow or enable Core Power/CLOS before benchmarking." + fi +else + echo "WARNING: Did not write clos list file; unexpected output: ${CLOS_LINE}" >&2 +fi + +echo + +# --- 6. Friendly summary --------------------------------------------------- + +print_header "Summary" + +if echo "$TF_OUT" | grep -qi "high-priority"; then + echo "✅ PCT turbo tables detected" +else + echo "⚠️ PCT turbo tables not confirmed via turbo-freq output" +fi + +if [[ "${PCT_TOTAL_PHYSICAL_CORES:-0}" != "0" ]]; then + echo "✅ PCT capacity detected: ${PCT_TOTAL_PHYSICAL_CORES} physical HP cores total, ${PCT_TOTAL_LOGICAL_CPUS} logical CPUs with HT=${THREADS_PER_CORE}" + echo " Count model: bucket-${HP_BUCKET} counted once per package/socket, not once per powerdomain anchor." +else + echo "⚠️ PCT capacity not detected" +fi + +if (( CORE_POWER_ENABLED == 1 )); then + echo "✅ Core Power enabled" +else + echo "❌ Core Power disabled" +fi + +if (( CLOS_ENABLED == 1 )); then + echo "✅ CLOS enabled" +else + echo "❌ CLOS disabled" +fi + +echo "Done." diff --git a/hardware/priority_core_turbo/docker-compose.yml b/hardware/priority_core_turbo/docker-compose.yml new file mode 100644 index 0000000..1b5f586 --- /dev/null +++ b/hardware/priority_core_turbo/docker-compose.yml @@ -0,0 +1,45 @@ +x-intel-speed-select-base: &base + image: intel-speed-select:${KERNEL_MM:-6.8} + build: + context: . + dockerfile: Dockerfile.sst + args: + KERNEL_TAG: ${KERNEL_TAG:-v6.8} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + privileged: true + user: "0:0" + tty: true + stdin_open: true + volumes: + - /dev/cpu:/dev/cpu + - /sys:/sys + - "${PWD}/check_pct_status.sh:/workspace/check_pct_status.sh:ro" + - "${PWD}/pct_map_and_set_clos.sh:/workspace/pct_map_and_set_clos.sh:ro" + - "${PWD}/results:/workspace/benchmarks/results" + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + working_dir: /workspace + entrypoint: ["bash", "-lc"] + +services: + intel-speed-select-check: + <<: *base + profiles: ["check"] + command: ["bash /workspace/check_pct_status.sh"] + + intel-speed-select-set: + <<: *base + profiles: ["set"] + command: ["bash /workspace/pct_map_and_set_clos.sh"] + + intel-speed-select-unset: + <<: *base + profiles: ["unset"] + command: ["ACTION=unset bash /workspace/pct_map_and_set_clos.sh"] + intel-speed-select-shell: + <<: *base + command: ["bash"] diff --git a/hardware/priority_core_turbo/pct_map_and_set_clos.sh b/hardware/priority_core_turbo/pct_map_and_set_clos.sh new file mode 100644 index 0000000..e70b30c --- /dev/null +++ b/hardware/priority_core_turbo/pct_map_and_set_clos.sh @@ -0,0 +1,565 @@ +#!/usr/bin/env bash +# pct_map_and_set_clos.sh +# +# SET mode (default): +# - Detect PCT capacity from `intel-speed-select turbo-freq info -l `. +# - Treat bucket-0 as the PCT bucket by default. +# - Count PCT capacity ONCE PER PACKAGE/SOCKET, not once per powerdomain anchor. +# - Dispatch the package-level PCT physical-core budget across that package's +# PCT reporting powerdomain anchors. +# - Select contiguous physical CPUs starting from each reporting anchor CPU. +# - Include Hyper-Threading siblings by default. +# - Overwrite existing BIOS/runtime CLOS assignment: +# 1. all online CPUs -> OTHER_CLOS +# 2. selected HP CPUs -> HP_CLOS +# +# Example on a 2-socket system: +# bucket-0 reports 8 PCT physical cores per package. +# turbo-freq output has 2 reporting anchors per package: +# package 0: cpu0, cpu32 +# package 1: cpu64, cpu96 +# Default dispatch: +# package 0: 4 physical cores from cpu0 + 4 from cpu32 +# package 1: 4 physical cores from cpu64 + 4 from cpu96 +# With INCLUDE_HT=1: +# HP effective: 0-3,32-35,64-67,96-99,128-131,160-163,192-195,224-227 +# +# UNSET mode: +# - Set ALL CPUs -> OTHER_CLOS +# - Disable core-power / CLOS best-effort across intel-speed-select builds +# +set -euo pipefail + +ACTION="${ACTION:-set}" # set | unset +HP_BUCKET="${HP_BUCKET:-0}" +TDP_LEVEL="${TDP_LEVEL:-1}" + +# Optional override. If unset/0, use bucket-0 cores per package from SST-TF. +# HP_PER_DOMAIN is accepted as a backward-compatible alias, but is interpreted +# as per-package budget, not per-domain budget. +HP_PER_PACKAGE="${HP_PER_PACKAGE:-${HP_PER_DOMAIN:-}}" + +INCLUDE_HT="${INCLUDE_HT:-1}" +HP_CLOS="${HP_CLOS:-0}" +OTHER_CLOS="${OTHER_CLOS:-2}" + +DEBUG_MODE="${DEBUG_MODE:-0}" +DRY_RUN="${DRY_RUN:-0}" +DEBUG_VERBOSE="${DEBUG_VERBOSE:-0}" +DEBUG_MAP="${DEBUG_MAP:-0}" +SHOW_VERIFY_LINES="${SHOW_VERIFY_LINES:-40}" + +SUDO="" +[[ "$(id -u)" -ne 0 ]] && SUDO="sudo" +ISS="${ISS:-$SUDO intel-speed-select}" + +print_header() { + echo "------------------------------------------------------------" + echo "$1" + echo "------------------------------------------------------------" +} + +die() { echo "ERROR: $*" >&2; exit 1; } + +count_cpulist() { + python3 - "$1" <<'PY' +import sys +s = sys.argv[1].strip() +count = 0 +if s: + for part in s.split(','): + part = part.strip() + if not part: + continue + if '-' in part: + a, b = map(int, part.split('-', 1)) + count += b - a + 1 + else: + count += 1 +print(count) +PY +} + +build_non_hp_ranges() { + local hp_list="$1" + local all_cpus="$2" + HP_LIST="$hp_list" ALL_CPUS="$all_cpus" python3 - <<'PY' +import os +hp = os.environ['HP_LIST'] +all_ = os.environ['ALL_CPUS'] + +def expand(s): + out = set() + for part in s.split(','): + part = part.strip() + if not part: + continue + if '-' in part: + a, b = part.split('-', 1) + out.update(range(int(a), int(b) + 1)) + else: + out.add(int(part)) + return out + +hp_set = expand(hp) +all_set = expand(all_) +non = sorted(all_set - hp_set) +res = [] +i = 0 +while i < len(non): + j = i + while j + 1 < len(non) and non[j + 1] == non[j] + 1: + j += 1 + res.append(str(non[i]) if i == j else f"{non[i]}-{non[j]}") + i = j + 1 +print(','.join(res)) +PY +} + +detect_pct_capacity() { + local tdp_level="${1:-1}" + local bucket="${2:-0}" + local out + + out="$($ISS turbo-freq info -l "$tdp_level" 2>&1 || true)" + + if ! echo "$out" | grep -q "high-priority-cores-count"; then + echo "PCT_BUCKET=bucket-${bucket}" + echo "PCT_REPORTING_ANCHORS=0" + echo "PCT_ACTIVE_PACKAGES=0" + echo "PCT_CORES_PER_PACKAGE=0" + echo "PCT_TOTAL_PHYSICAL_CORES=0" + echo "PCT_MAX_FREQ_MHZ=0" + echo "PCT_DOMAIN_ANCHORS=" + echo "PCT_PACKAGE_SUMMARY=" + return 0 + fi + + PCT_TF_OUT="$out" python3 - "$bucket" <<'PY' +import os, re, sys +from collections import defaultdict + +want_bucket = f"bucket-{sys.argv[1]}" +lines = os.environ.get('PCT_TF_OUT', '').splitlines() +anchors = [] +cur_pkg = cur_die = cur_pd = cur_cpu = cur_bucket = None + +for line in lines: + s = line.strip() + m = re.match(r'package-(\d+)', s) + if m: + cur_pkg = int(m.group(1)); continue + m = re.match(r'die-(\d+)', s) + if m: + cur_die = int(m.group(1)); continue + m = re.match(r'powerdomain-(\d+)', s) + if m: + cur_pd = int(m.group(1)); cur_cpu = None; continue + if s == 'cpu-None': + cur_cpu = None; continue + m = re.match(r'cpu-(\d+)', s) + if m: + cur_cpu = int(m.group(1)); continue + m = re.match(r'bucket-(\d+)', s) + if m: + cur_bucket = f"bucket-{m.group(1)}"; continue + + if cur_bucket == want_bucket and 'high-priority-cores-count:' in s: + count = int(re.sub(r'.*high-priority-cores-count:\s*', '', s).split()[0]) + anchors.append({'package': cur_pkg, 'die': cur_die, 'powerdomain': cur_pd, + 'cpu': cur_cpu, 'count': count, 'freq': None}) + continue + + if cur_bucket == want_bucket and 'high-priority-max-level-0-frequency(MHz):' in s: + freq = int(re.sub(r'.*frequency\(MHz\):\s*', '', s).split()[0]) + if anchors: + anchors[-1]['freq'] = freq + continue + +active = [d for d in anchors if d['cpu'] is not None] +if not active: + print(f'PCT_BUCKET={want_bucket}') + print('PCT_REPORTING_ANCHORS=0') + print('PCT_ACTIVE_PACKAGES=0') + print('PCT_CORES_PER_PACKAGE=0') + print('PCT_TOTAL_PHYSICAL_CORES=0') + print('PCT_MAX_FREQ_MHZ=0') + print('PCT_DOMAIN_ANCHORS=') + print('PCT_PACKAGE_SUMMARY=') + raise SystemExit(0) + +by_pkg = defaultdict(list) +for d in active: + by_pkg[d['package']].append(d) + +pkg_counts = {} +pkg_freqs = {} +for pkg, ds in sorted(by_pkg.items()): + counts = sorted(set(d['count'] for d in ds)) + freqs = sorted(set(d['freq'] for d in ds if d['freq'] is not None)) + pkg_counts[pkg] = min(counts) if counts else 0 + pkg_freqs[pkg] = max(freqs) if freqs else None + +all_counts = sorted(set(pkg_counts.values())) +all_freqs = sorted(set(v for v in pkg_freqs.values() if v is not None)) +domain_anchors = ','.join( + f"pkg{d['package']}/die{d['die']}/pd{d['powerdomain']}/cpu{d['cpu']}:cores{d['count']}:freq{d['freq']}" + for d in active +) +package_summary = ','.join( + f"pkg{pkg}:cores{pkg_counts[pkg]}:freq{pkg_freqs[pkg]}:anchors{len(by_pkg[pkg])}" + for pkg in sorted(pkg_counts) +) + +print(f'PCT_BUCKET={want_bucket}') +print(f'PCT_REPORTING_ANCHORS={len(active)}') +print(f'PCT_ACTIVE_PACKAGES={len(pkg_counts)}') +print(f"PCT_CORES_PER_PACKAGE={','.join(map(str, all_counts))}") +print(f'PCT_TOTAL_PHYSICAL_CORES={sum(pkg_counts.values())}') +print(f"PCT_MAX_FREQ_MHZ={','.join(map(str, all_freqs)) if all_freqs else 'unknown'}") +print(f'PCT_DOMAIN_ANCHORS={domain_anchors}') +print(f'PCT_PACKAGE_SUMMARY={package_summary}') +PY +} + +select_hp_cpus_by_powerdomain_anchors() { + local hp_per_package="$1" + local include_ht="$2" + local domain_anchors="$3" + + [[ "$hp_per_package" =~ ^[0-9]+$ ]] || die "HP_PER_PACKAGE must be numeric, got '$hp_per_package'" + (( hp_per_package > 0 )) || die "HP_PER_PACKAGE must be > 0" + [[ -n "$domain_anchors" ]] || die "PCT_DOMAIN_ANCHORS is empty" + + LSC_CPU_TOPO="$(lscpu -p=CPU,SOCKET,CORE 2>/dev/null | grep -v '^#' || true)" + [[ -n "$LSC_CPU_TOPO" ]] || die "Could not read lscpu -p=CPU,SOCKET,CORE" + + LSC_CPU_TOPO="$LSC_CPU_TOPO" \ + HP_PER_PACKAGE="$hp_per_package" \ + INCLUDE_HT="$include_ht" \ + PCT_DOMAIN_ANCHORS="$domain_anchors" \ + python3 - <<'PY' +import os, re +from collections import defaultdict + +topo = os.environ['LSC_CPU_TOPO'].splitlines() +hp_per_package = int(os.environ['HP_PER_PACKAGE']) +include_ht = os.environ['INCLUDE_HT'] == '1' +anchors_text = os.environ['PCT_DOMAIN_ANCHORS'].strip() + +logical_by_socket_core = defaultdict(list) +for line in topo: + line = line.strip() + if not line: + continue + parts = line.split(',') + if len(parts) < 3: + continue + cpu, socket, core = map(int, parts[:3]) + logical_by_socket_core[(socket, core)].append(cpu) + +ordered_cores = defaultdict(list) +for (socket, core), cpus in logical_by_socket_core.items(): + cpus = sorted(cpus) + ordered_cores[socket].append((cpus[0], core, cpus)) +for socket in ordered_cores: + ordered_cores[socket].sort(key=lambda x: x[0]) + +anchors_by_pkg = defaultdict(list) +for rec in anchors_text.split(','): + rec = rec.strip() + if not rec: + continue + m = re.match(r'pkg(\d+)/die(\d+)/pd(\d+)/cpu(\d+):cores(\d+):freq([^,]+)', rec) + if not m: + continue + pkg = int(m.group(1)) + die = int(m.group(2)) + pd = int(m.group(3)) + anchor_cpu = int(m.group(4)) + reported_count = int(m.group(5)) + freq = m.group(6) + + # On this platform package id maps to socket id. If not, fall back by locating anchor_cpu. + socket = pkg + found = False + for s, items in ordered_cores.items(): + for _, core, logicals in items: + if anchor_cpu in logicals: + socket = s + found = True + break + if found: + break + if not found: + raise SystemExit(f'ERROR: anchor cpu{anchor_cpu} not found in lscpu topology') + + anchors_by_pkg[pkg].append({ + 'pkg': pkg, 'socket': socket, 'die': die, 'pd': pd, + 'anchor_cpu': anchor_cpu, 'reported_count': reported_count, 'freq': freq, + }) + +if not anchors_by_pkg: + raise SystemExit('ERROR: no PCT reporting anchors parsed') + +selected = [] +for pkg in sorted(anchors_by_pkg): + anchors = sorted(anchors_by_pkg[pkg], key=lambda a: a['anchor_cpu']) + n = len(anchors) + base = hp_per_package // n + rem = hp_per_package % n + per_anchor = [base + (1 if i < rem else 0) for i in range(n)] + + print(f'package {pkg}: HP_PER_PACKAGE={hp_per_package}, reporting_anchors={n}, dispatch_per_anchor={per_anchor}') + + for idx, anchor in enumerate(anchors): + take = per_anchor[idx] + if take == 0: + continue + socket = anchor['socket'] + anchor_cpu = anchor['anchor_cpu'] + items = ordered_cores[socket] + + pos = None + for i, (first_cpu, core, logicals) in enumerate(items): + if first_cpu == anchor_cpu or anchor_cpu in logicals: + pos = i + break + if pos is None: + raise SystemExit(f'ERROR: could not locate anchor cpu{anchor_cpu} in socket {socket}') + if pos + take > len(items): + raise SystemExit( + f'ERROR: anchor cpu{anchor_cpu} in socket {socket} requested {take} cores, ' + f'but only {len(items)-pos} physical cores remain from that anchor' + ) + + chosen = items[pos:pos + take] + desc = [] + chosen_cpus = [] + for _, core, logicals in chosen: + use = logicals if include_ht else logicals[:1] + chosen_cpus.extend(use) + desc.append(f'core{core}:' + '/'.join(map(str, use))) + print(f" pkg{pkg}/pd{anchor['pd']}/anchor_cpu{anchor_cpu} -> {take} physical cores -> " + ' '.join(desc)) + selected.extend(chosen_cpus) + +xs = sorted(set(selected)) +if not xs: + raise SystemExit('ERROR: selected HP CPU list is empty') +res = [] +i = 0 +while i < len(xs): + j = i + while j + 1 < len(xs) and xs[j + 1] == xs[j] + 1: + j += 1 + res.append(str(xs[i]) if i == j else f'{xs[i]}-{xs[j]}') + i = j + 1 +print('HP_EFFECTIVE=' + ','.join(res)) +PY +} + +enable_clos_or_die() { + local ok=0 + local cmd + for cmd in \ + "$ISS core-power enable --priority 1" \ + "$ISS core-power enable --clos" \ + "$ISS core-power enable" + do + [[ "$DEBUG_VERBOSE" == "1" ]] && echo "Trying: $cmd" + if eval "$cmd" >/dev/null 2>&1; then + ok=1 + [[ "$DEBUG_VERBOSE" == "1" ]] && echo "OK: $cmd" + break + fi + done + (( ok == 1 )) || die "Could not enable core-power/CLOS with this intel-speed-select build" +} + +disable_core_power_best_effort() { + local ok=0 + local cmd + for cmd in \ + "$ISS core-power disable --clos" \ + "$ISS core-power disable" + do + [[ "$DEBUG_VERBOSE" == "1" ]] && echo "Trying: $cmd" + if eval "$cmd" >/dev/null 2>&1; then + ok=1 + [[ "$DEBUG_VERBOSE" == "1" ]] && echo "OK: $cmd" + break + fi + done + if [[ "$ok" -ne 1 ]]; then + echo "WARN: Could not disable core-power via intel-speed-select on this build." >&2 + echo " You can still consider PCT 'unset' because all CPUs were moved to CLOS${OTHER_CLOS}." >&2 + fi +} + +apply_assoc_quiet_or_die() { + local cpu_list="$1" + local clos="$2" + [[ -n "$cpu_list" ]] || die "apply_assoc got empty cpu_list (clos=$clos)" + [[ "$clos" =~ ^[0-9]+$ ]] || die "apply_assoc got non-numeric clos='$clos'" + + if [[ "$DEBUG_VERBOSE" == "1" ]]; then + $ISS -c "$cpu_list" core-power assoc --clos "$clos" + return 0 + fi + + local out rc=0 + out="$($ISS -c "$cpu_list" core-power assoc --clos "$clos" 2>&1 >/dev/null)" || rc=$? + if (( rc != 0 )) || echo "$out" | grep -qiE 'malformed arguments|Error:'; then + echo "$out" >&2 + die "intel-speed-select assoc failed (clos=$clos cpu_list=$cpu_list)" + fi +} + +get_assoc_pairs() { + local cpu_list="$1" + [[ -n "$cpu_list" ]] || return 0 + $ISS -c "$cpu_list" core-power get-assoc 2>&1 | awk ' + /cpu-[0-9]+/{ + cpu=$0; sub(/^.*cpu-/,"",cpu); sub(/[^0-9].*$/, "", cpu); next + } + /clos:[0-9]+/{ + clos=$0; sub(/^.*clos:/,"",clos); sub(/[^0-9].*$/, "", clos); + if (cpu!="") printf "cpu-%s clos:%s\n", cpu, clos + }' +} + +command -v lscpu >/dev/null 2>&1 || die "lscpu not found" +command -v python3 >/dev/null 2>&1 || die "python3 not found" +command -v intel-speed-select >/dev/null 2>&1 || die "intel-speed-select not found" + +ALL_CPUS_CSV="$(lscpu -p=CPU | grep -v '^#' | cut -d, -f1 | sort -n | uniq | paste -sd, -)" +[[ -n "$ALL_CPUS_CSV" ]] || die "Could not enumerate online CPUs" + +if [[ "$ACTION" == "unset" ]]; then + print_header "UNSET: move all CPUs to OTHER_CLOS and disable core-power" + echo "OTHER_CLOS=$OTHER_CLOS" + echo + + if [[ "$DEBUG_MODE" == "1" || "$DRY_RUN" == "1" ]]; then + print_header "READ-ONLY / DRY-RUN" + echo "Would run:" + echo " $ISS core-power enable --priority 1 (or compatible fallback)" + echo " $ISS -c \"$ALL_CPUS_CSV\" core-power assoc --clos $OTHER_CLOS" + echo " $ISS core-power disable --clos (or disable)" + exit 0 + fi + + enable_clos_or_die + apply_assoc_quiet_or_die "$ALL_CPUS_CSV" "$OTHER_CLOS" + disable_core_power_best_effort + + echo + print_header "Verification (sample CPU->CLOS after UNSET)" + get_assoc_pairs "$ALL_CPUS_CSV" | head -n "$SHOW_VERIFY_LINES" || true + echo "… (showing first $SHOW_VERIFY_LINES lines)" + echo + echo "Done." + exit 0 +fi + +[[ "$ACTION" == "set" ]] || die "Unknown ACTION='$ACTION' (use set|unset)" + +print_header "PCT capacity from SST-TF bucket-${HP_BUCKET}" +PCT_CAPACITY="$(detect_pct_capacity "$TDP_LEVEL" "$HP_BUCKET")" +echo "$PCT_CAPACITY" +eval "$PCT_CAPACITY" +echo + +if [[ "${PCT_TOTAL_PHYSICAL_CORES:-0}" == "0" ]]; then + die "Could not detect PCT capacity from turbo-freq bucket-${HP_BUCKET}" +fi + +if [[ -z "${HP_PER_PACKAGE}" || "${HP_PER_PACKAGE}" == "0" ]]; then + HP_PER_PACKAGE="$(echo "${PCT_CORES_PER_PACKAGE}" | awk -F, '{print $1}')" +fi +[[ "$HP_PER_PACKAGE" =~ ^[0-9]+$ ]] || die "Could not derive numeric HP_PER_PACKAGE from PCT_CORES_PER_PACKAGE=${PCT_CORES_PER_PACKAGE}" + +print_header "Config" +echo "ACTION=$ACTION" +echo "HP_BUCKET=$HP_BUCKET TDP_LEVEL=$TDP_LEVEL" +echo "HP_PER_PACKAGE=$HP_PER_PACKAGE" +echo "INCLUDE_HT=$INCLUDE_HT" +echo "HP_CLOS=$HP_CLOS OTHER_CLOS=$OTHER_CLOS" +echo "DEBUG_MODE=$DEBUG_MODE DRY_RUN=$DRY_RUN DEBUG_VERBOSE=$DEBUG_VERBOSE DEBUG_MAP=$DEBUG_MAP" +echo + +print_header "Powerdomain-anchor HP CPU dispatch" +SELECTION_OUT="$(select_hp_cpus_by_powerdomain_anchors "$HP_PER_PACKAGE" "$INCLUDE_HT" "$PCT_DOMAIN_ANCHORS")" +echo "$SELECTION_OUT" +HP_EFFECTIVE="$(echo "$SELECTION_OUT" | awk -F= '/^HP_EFFECTIVE=/{print $2}' | tail -n1)" +[[ -n "$HP_EFFECTIVE" ]] || die "HP_EFFECTIVE is empty" + +HP_EFFECTIVE_COUNT="$(count_cpulist "$HP_EFFECTIVE")" +NON_HP_RANGES="$(build_non_hp_ranges "$HP_EFFECTIVE" "$ALL_CPUS_CSV")" +[[ -n "$NON_HP_RANGES" ]] || die "NON_HP_RANGES is empty" + +print_header "Computed CPU lists" +echo "HP effective : $HP_EFFECTIVE" +echo "HP CPU count : $HP_EFFECTIVE_COUNT" +echo "Non-HP : $NON_HP_RANGES" +echo + +echo "PCT active packages/sockets : ${PCT_ACTIVE_PACKAGES:-0}" +echo "PCT reporting anchors : ${PCT_REPORTING_ANCHORS:-0}" +echo "PCT cores per package/socket : ${PCT_CORES_PER_PACKAGE:-0}" +echo "PCT physical core budget : ${PCT_TOTAL_PHYSICAL_CORES:-0}" +echo "PCT max frequency : ${PCT_MAX_FREQ_MHZ:-0} MHz" +echo + +if [[ "$INCLUDE_HT" == "1" ]]; then + EXPECTED_HP_COUNT=$(( PCT_TOTAL_PHYSICAL_CORES * 2 )) +else + EXPECTED_HP_COUNT=$(( PCT_TOTAL_PHYSICAL_CORES )) +fi + +echo "Expected HP CPU count for this INCLUDE_HT setting: $EXPECTED_HP_COUNT" +if (( HP_EFFECTIVE_COUNT != EXPECTED_HP_COUNT )); then + echo "WARN: HP CPU count ($HP_EFFECTIVE_COUNT) does not match expected count ($EXPECTED_HP_COUNT)." >&2 + echo " Verify PCT_DOMAIN_ANCHORS, lscpu topology, and INCLUDE_HT before benchmarking." >&2 +fi + +echo + +if [[ "$DEBUG_MODE" == "1" ]]; then + print_header "DEBUG_MODE=1 (read-only)" + echo "No CLOS changes applied. No verification performed." + exit 0 +fi + +if [[ "$DRY_RUN" == "1" ]]; then + print_header "DRY_RUN=1 (no changes)" + echo "Would run:" + echo " $ISS core-power enable --priority 1 (or compatible fallback)" + echo " $ISS -c \"$ALL_CPUS_CSV\" core-power assoc --clos $OTHER_CLOS" + echo " $ISS -c \"$HP_EFFECTIVE\" core-power assoc --clos $HP_CLOS" + exit 0 +fi + +print_header "Apply CLOS assignments (overwrite existing BIOS/runtime mapping)" +echo "Setting ALL CPUs -> CLOS${OTHER_CLOS} first" +echo "Setting selected HP CPUs -> CLOS${HP_CLOS}" + +enable_clos_or_die +apply_assoc_quiet_or_die "$ALL_CPUS_CSV" "$OTHER_CLOS" +apply_assoc_quiet_or_die "$HP_EFFECTIVE" "$HP_CLOS" + +echo "Applied." +echo + +print_header "Verification (concise CPU->CLOS)" +echo "HP list should be clos:$HP_CLOS" +get_assoc_pairs "$HP_EFFECTIVE" | head -n "$SHOW_VERIFY_LINES" || true +echo "… (showing first $SHOW_VERIFY_LINES lines)" +echo + +echo "Non-HP list should be clos:$OTHER_CLOS" +get_assoc_pairs "$NON_HP_RANGES" | head -n "$SHOW_VERIFY_LINES" || true +echo "… (showing first $SHOW_VERIFY_LINES lines)" +echo + +echo "Done." diff --git a/hardware/priority_core_turbo/run_host_perfspect_benchmark.sh b/hardware/priority_core_turbo/run_host_perfspect_benchmark.sh new file mode 100755 index 0000000..f1e2d76 --- /dev/null +++ b/hardware/priority_core_turbo/run_host_perfspect_benchmark.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +set -euo pipefail + +RESULTS_DIR="${RESULTS_DIR:-results}" +CLOS_ID="${CLOS_ID:-0}" +CLOS_CPU_FILE="${CLOS_CPU_FILE:-${RESULTS_DIR}/clos${CLOS_ID}_cpulist.txt}" +PERFSPECT_ARGS="${PERFSPECT_ARGS:---speed --frequency --no-summary}" + +if ! command -v perfspect >/dev/null 2>&1; then + echo "ERROR: perfspect not found on host PATH." + echo "Install PerfSpect on the host first, then rerun this script." + exit 1 +fi + +if [[ ! -f "${CLOS_CPU_FILE}" ]]; then + echo "ERROR: ${CLOS_CPU_FILE} not found." + echo "Run the check profile first:" + echo " docker compose --progress=plain --profile check up --abort-on-container-exit" + exit 1 +fi + +CLOS_CPUS="$(tr -d '[:space:]' < "${CLOS_CPU_FILE}")" + +if [[ -z "${CLOS_CPUS}" ]]; then + echo "ERROR: empty CLOS CPU list from ${CLOS_CPU_FILE}" + exit 1 +fi + +OUT_DIR="${RESULTS_DIR}/perfspect_host_clos${CLOS_ID}_$(date +%Y%m%d_%H%M%S)" +PERFSPECT_OUTPUT="${OUT_DIR}/perfspect" +mkdir -p "${OUT_DIR}" + +echo "------------------------------------------------------------" +echo "Host PerfSpect benchmark on CLOS${CLOS_ID} CPUs" +echo "------------------------------------------------------------" +echo "CLOS_ID=${CLOS_ID}" +echo "CLOS_CPU_FILE=${CLOS_CPU_FILE}" +echo "CLOS_CPUS=${CLOS_CPUS}" +echo "PERFSPECT_ARGS=${PERFSPECT_ARGS}" +echo "OUT_DIR=${OUT_DIR}" +echo + +echo "${CLOS_CPUS}" > "${OUT_DIR}/clos${CLOS_ID}_cpulist.txt" + +echo "Command:" +echo "sudo taskset -c ${CLOS_CPUS} perfspect benchmark ${PERFSPECT_ARGS} --output ${PERFSPECT_OUTPUT}" +echo + +set +e +sudo taskset -c "${CLOS_CPUS}" perfspect benchmark ${PERFSPECT_ARGS} \ + --output "${PERFSPECT_OUTPUT}" \ + 2>&1 | tee "${OUT_DIR}/perfspect_benchmark.log" +RC=${PIPESTATUS[0]} +set -e + +echo +echo "------------------------------------------------------------" +echo "Host PerfSpect benchmark completed" +echo "------------------------------------------------------------" +echo "Exit code: ${RC}" +echo "Output dir: ${OUT_DIR}" + +exit "${RC}" diff --git a/hardware/priority_core_turbo/set_kernel_env.sh b/hardware/priority_core_turbo/set_kernel_env.sh new file mode 100644 index 0000000..87dc8db --- /dev/null +++ b/hardware/priority_core_turbo/set_kernel_env.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +# Source this file before running docker compose build: +# source ./set_kernel_env.sh +# +# This exports: +# KERNEL_MM - image tag suffix, for example 6.8 +# KERNEL_TAG - Linux kernel git tag, for example v6.8 + +kernel_release="$(uname -r)" +kernel_mm="$(printf '%s\n' "${kernel_release}" | awk -F. '{print $1 "." $2}')" + +if [[ ! "${kernel_mm}" =~ ^[0-9]+\.[0-9]+$ ]]; then + echo "ERROR: failed to detect kernel major.minor from uname -r: ${kernel_release}" >&2 + return 1 2>/dev/null || exit 1 +fi + +export KERNEL_MM="${kernel_mm}" +export KERNEL_TAG="v${kernel_mm}" + +echo "Exported KERNEL_MM=${KERNEL_MM}" +echo "Exported KERNEL_TAG=${KERNEL_TAG}" + +if [[ "${KERNEL_MM}" != "6.8" ]]; then + echo "WARN: validated GNR PCT flow expects KERNEL_MM=6.8 and KERNEL_TAG=v6.8." >&2 + echo " Override manually if needed: export KERNEL_MM=6.8 KERNEL_TAG=v6.8" >&2 +fi