Skip to content

Commit f8cc1b8

Browse files
Fixed BBRv3
Fixed nanopi builds Signed-off-by: Nicholas Sun <nicholas-sun@outlook.com>
1 parent 85f4456 commit f8cc1b8

4 files changed

Lines changed: 141 additions & 4 deletions
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
From e9a688bcb19348862afe30d7c85bc37c4c293471 Mon Sep 17 00:00:00 2001
2+
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
3+
Date: Sat, 8 Oct 2022 20:42:54 -0600
4+
Subject: [PATCH] random: use rejection sampling for uniform bounded random
5+
integers
6+
7+
Until the very recent commits, many bounded random integers were
8+
calculated using `get_random_u32() % max_plus_one`, which not only
9+
incurs the price of a division -- indicating performance mostly was not
10+
a real issue -- but also does not result in a uniformly distributed
11+
output if max_plus_one is not a power of two. Recent commits moved to
12+
using `prandom_u32_max(max_plus_one)`, which replaces the division with
13+
a faster multiplication, but still does not solve the issue with
14+
non-uniform output.
15+
16+
For some users, maybe this isn't a problem, and for others, maybe it is,
17+
but for the majority of users, probably the question has never been
18+
posed and analyzed, and nobody thought much about it, probably assuming
19+
random is random is random. In other words, the unthinking expectation
20+
of most users is likely that the resultant numbers are uniform.
21+
22+
So we implement here an efficient way of generating uniform bounded
23+
random integers. Through use of compile-time evaluation, and avoiding
24+
divisions as much as possible, this commit introduces no measurable
25+
overhead. At least for hot-path uses tested, any potential difference
26+
was lost in the noise. On both clang and gcc, code generation is pretty
27+
small.
28+
29+
The new function, get_random_u32_below(), lives in random.h, rather than
30+
prandom.h, and has a "get_random_xxx" function name, because it is
31+
suitable for all uses, including cryptography.
32+
33+
In order to be efficient, we implement a kernel-specific variant of
34+
Daniel Lemire's algorithm from "Fast Random Integer Generation in an
35+
Interval", linked below. The kernel's variant takes advantage of
36+
constant folding to avoid divisions entirely in the vast majority of
37+
cases, works on both 32-bit and 64-bit architectures, and requests a
38+
minimal amount of bytes from the RNG.
39+
40+
Link: https://arxiv.org/pdf/1805.10941.pdf
41+
Cc: stable@vger.kernel.org # to ease future backports that use this api
42+
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
43+
---
44+
drivers/char/random.c | 22 ++++++++++++++++++++++
45+
include/linux/prandom.h | 18 ++----------------
46+
include/linux/random.h | 40 ++++++++++++++++++++++++++++++++++++++++
47+
3 files changed, 64 insertions(+), 16 deletions(-)
48+
49+
--- a/drivers/char/random.c
50+
+++ b/drivers/char/random.c
51+
@@ -199,6 +199,7 @@ static void __cold process_random_ready_
52+
*
53+
* void get_random_bytes(void *buf, size_t len)
54+
* u32 get_random_u32()
55+
+ * u32 get_random_u32_below(u32 ceil)
56+
* u64 get_random_u64()
57+
* unsigned int get_random_int()
58+
* unsigned long get_random_long()
59+
@@ -553,6 +554,27 @@ EXPORT_SYMBOL(get_random_ ##type);
60+
DEFINE_BATCHED_ENTROPY(u64)
61+
DEFINE_BATCHED_ENTROPY(u32)
62+
63+
+u32 __get_random_u32_below(u32 ceil)
64+
+{
65+
+ /*
66+
+ * This is the slow path for variable ceil. It is still fast, most of
67+
+ * the time, by doing traditional reciprocal multiplication and
68+
+ * opportunistically comparing the lower half to ceil itself, before
69+
+ * falling back to computing a larger bound, and then rejecting samples
70+
+ * whose lower half would indicate a range indivisible by ceil. The use
71+
+ * of `-ceil % ceil` is analogous to `2^32 % ceil`, but is computable
72+
+ * in 32-bits.
73+
+ */
74+
+ u64 mult = (u64)ceil * get_random_u32();
75+
+ if (unlikely((u32)mult < ceil)) {
76+
+ u32 bound = -ceil % ceil;
77+
+ while (unlikely((u32)mult < bound))
78+
+ mult = (u64)ceil * get_random_u32();
79+
+ }
80+
+ return mult >> 32;
81+
+}
82+
+EXPORT_SYMBOL(__get_random_u32_below);
83+
+
84+
#ifdef CONFIG_SMP
85+
/*
86+
* This function is called when the CPU is coming up, with entry
87+
--- a/include/linux/random.h
88+
+++ b/include/linux/random.h
89+
@@ -45,6 +45,46 @@ static inline unsigned long get_random_l
90+
#endif
91+
}
92+
93+
+u32 __get_random_u32_below(u32 ceil);
94+
+
95+
+/*
96+
+ * Returns a random integer in the interval [0, ceil), with uniform
97+
+ * distribution, suitable for all uses. Fastest when ceil is a constant, but
98+
+ * still fast for variable ceil as well.
99+
+ */
100+
+static inline u32 get_random_u32_below(u32 ceil)
101+
+{
102+
+ if (!__builtin_constant_p(ceil))
103+
+ return __get_random_u32_below(ceil);
104+
+
105+
+ /*
106+
+ * For the fast path, below, all operations on ceil are precomputed by
107+
+ * the compiler, so this incurs no overhead for checking pow2, doing
108+
+ * divisions, or branching based on integer size. The resultant
109+
+ * algorithm does traditional reciprocal multiplication (typically
110+
+ * optimized by the compiler into shifts and adds), rejecting samples
111+
+ * whose lower half would indicate a range indivisible by ceil.
112+
+ */
113+
+ BUILD_BUG_ON_MSG(!ceil, "get_random_u32_below() must take ceil > 0");
114+
+ if (ceil <= 1)
115+
+ return 0;
116+
+ for (;;) {
117+
+ if (ceil <= 1U << 8) {
118+
+ u32 mult = ceil * (get_random_u32() & 0xff);
119+
+ if (likely(is_power_of_2(ceil) || (u8)mult >= (1U << 8) % ceil))
120+
+ return mult >> 8;
121+
+ } else if (ceil <= 1U << 16) {
122+
+ u32 mult = ceil * (get_random_u32() & 0xffff);
123+
+ if (likely(is_power_of_2(ceil) || (u16)mult >= (1U << 16) % ceil))
124+
+ return mult >> 16;
125+
+ } else {
126+
+ u64 mult = (u64)ceil * get_random_u32();
127+
+ if (likely(is_power_of_2(ceil) || (u32)mult >= -ceil % ceil))
128+
+ return mult >> 32;
129+
+ }
130+
+ }
131+
+}
132+
+
133+
/*
134+
* On 64-bit architectures, protect against non-terminated C string overflows
135+
* by zeroing out the first byte of the canary; this leaves 56 bits of entropy.

PATCH/BBRv3/kernel/010-bbr3-0016-net-tcp_bbr-v3-update-TCP-bbr-congestion-control-mod.patch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1949,10 +1949,10 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
19491949
+
19501950
+ /* Decide the random round-trip bound for wait until probe: */
19511951
+ bbr->rounds_since_probe =
1952-
+ prandom_u32_max(bbr_param(sk, bw_probe_rand_rounds));
1952+
+ get_random_u32_below(bbr_param(sk, bw_probe_rand_rounds));
19531953
+ /* Decide the random wall clock bound for wait until probe: */
19541954
+ bbr->probe_wait_us = bbr_param(sk, bw_probe_base_us) +
1955-
+ prandom_u32_max(bbr_param(sk, bw_probe_rand_us));
1955+
+ get_random_u32_below(bbr_param(sk, bw_probe_rand_us));
19561956
+}
19571957
+
19581958
+static void bbr_set_cycle_idx(struct sock *sk, int cycle_idx)

SCRIPTS/02_prepare_package.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ sed -i 's,rootwait,rootwait mitigations=off,g' target/linux/rockchip/image/mmc.b
7676
sed -i 's,noinitrd,noinitrd mitigations=off,g' target/linux/x86/image/grub-efi.cfg
7777
sed -i 's,noinitrd,noinitrd mitigations=off,g' target/linux/x86/image/grub-iso.cfg
7878
sed -i 's,noinitrd,noinitrd mitigations=off,g' target/linux/x86/image/grub-pc.cfg
79+
# Temp changed
80+
wget -qO - https://github.com/immortalwrt/immortalwrt/commit/4e7e1e851ff3c9b9af9dda83d4a7baea83c8ebdf.patch | patch -Rp1
7981

8082
## Extra Packages
8183
# AutoCore

SCRIPTS/switch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#
99
# Change Lines
1010
#
11-
# Happy Holidays
11+
# Happy 2024
1212
#
13-
# 28 Dec
13+
# 5 Jan
1414

0 commit comments

Comments
 (0)