Skip to content

Commit eb259e8

Browse files
authored
Some small perf improvements (#95)
* Do conflict-set hash lookups once, not twice This makes the small wasmtime bz2 benchmark 1% faster, per Hyperfine and Sightglass. The effect disappears into the noise on larger benchmarks. * Inline PosWithPrio::key When compiling the pulldown-cmark benchmark from Sightglass, this is the single most frequently called function: it's invoked 2.5 million times. Inlining it reduces instructions retired by 1.5% on that benchmark, according to `valgrind --tool=callgrind`. This patch is "1.01 ± 0.01 times faster" according to Hyperfine for the bz2, pulldown-cmark, and spidermonkey benchmarks from Sightglass. Sightglass, in turn, agrees that all three benchmarks are 1.01x faster by instructions retired, and the first two are around 1.01x faster by CPU cycles as well. * Inline and simplify AdaptiveMap::expand Previously, `get_or_insert` would iterate over the keys to find one that matched; then, if none did, iterate over the values to check if any are 0; then iterate again to remove all zero values and compact the map. This commit instead focuses on picking an index to use: preferably one where the key already exists; but if it's not in the map, then an unused index; but if there aren't any, then an index where the value is zero. As a result this iterates the two arrays at most once each, and both iterations can stop early. The downside is that keys whose value is zero are not removed as aggressively. It might be worth pruning such keys in `IndexSet::set`. Also: - `#[inline]` both implementations of `Iterator::next` - Replace `set_bits` with using the `SetBitsIter` constructor directly These changes together reduce instructions retired when compiling the pulldown-cmark benchmark by 0.9%.
1 parent 1efaa73 commit eb259e8

3 files changed

Lines changed: 34 additions & 66 deletions

File tree

src/indexset.rs

Lines changed: 32 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -36,73 +36,41 @@ impl AdaptiveMap {
3636
}
3737
}
3838

39-
/// Expand into `Large` mode if we are at capacity and have no
40-
/// zero-value pairs that can be trimmed.
41-
#[inline(never)]
42-
fn expand(&mut self) {
43-
match self {
44-
&mut Self::Small {
45-
ref mut len,
46-
ref mut keys,
47-
ref mut values,
48-
} => {
49-
// Note: we *may* remain as `Small` if there are any
50-
// zero elements. Try removing them first, before we
51-
// commit to a memory allocation.
52-
if values.iter().any(|v| *v == 0) {
53-
let mut out = 0;
54-
for i in 0..(*len as usize) {
55-
if values[i] == 0 {
56-
continue;
57-
}
58-
if out < i {
59-
keys[out] = keys[i];
60-
values[out] = values[i];
61-
}
62-
out += 1;
63-
}
64-
*len = out as u32;
65-
} else {
66-
let mut map = FxHashMap::default();
67-
for i in 0..(*len as usize) {
68-
map.insert(keys[i], values[i]);
69-
}
70-
*self = Self::Large(map);
71-
}
72-
}
73-
_ => {}
74-
}
75-
}
7639
#[inline(always)]
7740
fn get_or_insert<'a>(&'a mut self, key: u32) -> &'a mut u64 {
7841
// Check whether the key is present and we are in small mode;
7942
// if no to both, we need to expand first.
80-
let (needs_expand, small_mode_idx) = match self {
81-
&mut Self::Small { len, ref keys, .. } => {
43+
let small_mode_idx = match self {
44+
&mut Self::Small {
45+
len,
46+
ref mut keys,
47+
ref values,
48+
} => {
8249
// Perform this scan but do not return right away;
8350
// doing so runs into overlapping-borrow issues
8451
// because the current non-lexical lifetimes
8552
// implementation is not able to see that the `self`
8653
// mutable borrow on return is only on the
8754
// early-return path.
88-
let small_mode_idx = keys.iter().take(len as usize).position(|k| *k == key);
89-
let needs_expand = small_mode_idx.is_none() && len == SMALL_ELEMS as u32;
90-
(needs_expand, small_mode_idx)
55+
if let Some(i) = keys[..len as usize].iter().position(|&k| k == key) {
56+
Some(i)
57+
} else if len != SMALL_ELEMS as u32 {
58+
debug_assert!(len < SMALL_ELEMS as u32);
59+
None
60+
} else if let Some(i) = values.iter().position(|&v| v == 0) {
61+
// If an existing value is zero, reuse that slot.
62+
keys[i] = key;
63+
Some(i)
64+
} else {
65+
*self = Self::Large(keys.iter().copied().zip(values.iter().copied()).collect());
66+
None
67+
}
9168
}
92-
_ => (false, None),
69+
_ => None,
9370
};
9471

95-
if needs_expand {
96-
debug_assert!(small_mode_idx.is_none());
97-
self.expand();
98-
}
99-
10072
match self {
101-
&mut Self::Small {
102-
ref mut len,
103-
ref mut keys,
104-
ref mut values,
105-
} => {
73+
Self::Small { len, keys, values } => {
10674
// If we found the key already while checking whether
10775
// we need to expand above, use that index to return
10876
// early.
@@ -112,15 +80,16 @@ impl AdaptiveMap {
11280
// Otherwise, the key must not be present; add a new
11381
// entry.
11482
debug_assert!(*len < SMALL_ELEMS as u32);
115-
let idx = *len;
83+
let idx = *len as usize;
11684
*len += 1;
117-
keys[idx as usize] = key;
118-
values[idx as usize] = 0;
119-
&mut values[idx as usize]
85+
keys[idx] = key;
86+
values[idx] = 0;
87+
&mut values[idx]
12088
}
121-
&mut Self::Large(ref mut map) => map.entry(key).or_insert(0),
89+
Self::Large(map) => map.entry(key).or_insert(0),
12290
}
12391
}
92+
12493
#[inline(always)]
12594
fn get_mut(&mut self, key: u32) -> Option<&mut u64> {
12695
match self {
@@ -180,6 +149,8 @@ enum AdaptiveMapIter<'a> {
180149

181150
impl<'a> std::iter::Iterator for AdaptiveMapIter<'a> {
182151
type Item = (u32, u64);
152+
153+
#[inline]
183154
fn next(&mut self) -> Option<Self::Item> {
184155
match self {
185156
&mut Self::Small(ref mut keys, ref mut values) => {
@@ -285,7 +256,7 @@ impl IndexSet {
285256
pub fn iter<'a>(&'a self) -> impl Iterator<Item = usize> + 'a {
286257
self.elems.iter().flat_map(|(word_idx, bits)| {
287258
let word_idx = word_idx as usize;
288-
set_bits(bits).map(move |i| BITS_PER_WORD * word_idx + i)
259+
SetBitsIter(bits).map(move |i| BITS_PER_WORD * word_idx + i)
289260
})
290261
}
291262

@@ -299,15 +270,12 @@ impl IndexSet {
299270
}
300271
}
301272

302-
fn set_bits(bits: u64) -> impl Iterator<Item = usize> {
303-
let iter = SetBitsIter(bits);
304-
iter
305-
}
306-
307273
pub struct SetBitsIter(u64);
308274

309275
impl Iterator for SetBitsIter {
310276
type Item = usize;
277+
278+
#[inline]
311279
fn next(&mut self) -> Option<usize> {
312280
// Build an `Option<NonZeroU64>` so that on the nonzero path,
313281
// the compiler can optimize the trailing-zeroes operator

src/ion/data_structures.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,7 @@ pub struct PosWithPrio {
630630
}
631631

632632
impl PosWithPrio {
633+
#[inline]
633634
pub fn key(self) -> u64 {
634635
u64_key(self.pos.to_index(), self.prio)
635636
}

src/ion/process.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,8 @@ impl<'a, F: Function> Env<'a, F> {
157157
// conflicts list.
158158
let conflict_bundle = self.ranges[preg_range.index()].bundle;
159159
trace!(" -> conflict bundle {:?}", conflict_bundle);
160-
if !self.conflict_set.contains(&conflict_bundle) {
160+
if self.conflict_set.insert(conflict_bundle) {
161161
conflicts.push(conflict_bundle);
162-
self.conflict_set.insert(conflict_bundle);
163162
max_conflict_weight = std::cmp::max(
164163
max_conflict_weight,
165164
self.bundles[conflict_bundle.index()].cached_spill_weight(),

0 commit comments

Comments
 (0)