From a176a870987f61b04e001a7c4d0863fdeb427083 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Wed, 18 Dec 2024 13:38:49 -0800 Subject: [PATCH 01/13] Simplify extend_impl --- src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 42c04c3..e0f7328 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1502,11 +1502,11 @@ impl SmallVec { let mut iter = iter.fuse(); let (lower_bound, _) = iter.size_hint(); self.reserve(lower_bound); - let mut len = self.len(); let mut capacity = self.capacity(); let mut ptr = self.as_mut_ptr(); unsafe { loop { + let mut len = self.len(); // SAFETY: ptr is valid for `capacity - len` writes ptr = ptr.add(len); let mut guard = DropGuard { ptr, len: 0 }; @@ -1520,7 +1520,6 @@ impl SmallVec { // At this point we either consumed all capacity or the iterator is exhausted (fused) if let Some(item) = iter.next() { self.push(item); - len += 1; } else { return; } From 1289db6817c5369c5450afc2d97622bf513282a7 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Wed, 15 Jan 2025 15:47:17 -0800 Subject: [PATCH 02/13] Impl Send/Sync for IntoIter (#368) Closes #367. --- src/lib.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index e0f7328..37c2123 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -500,6 +500,11 @@ pub struct IntoIter { _marker: PhantomData, } +// SAFETY: IntoIter has unique ownership of its contents. Sending (or sharing) an `IntoIter` +// is equivalent to sending (or sharing) a `SmallVec`. +unsafe impl Send for IntoIter where T: Send {} +unsafe impl Sync for IntoIter where T: Sync {} + impl IntoIter { #[inline] const fn is_zst() -> bool { From 9a23ebf1883247f91c50d429714773b46957a688 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Wed, 15 Jan 2025 15:48:28 -0800 Subject: [PATCH 03/13] Version 2.0.0-alpha.10 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 220f8b2..f77070f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smallvec" -version = "2.0.0-alpha.9" +version = "2.0.0-alpha.10" edition = "2021" rust-version = "1.57" authors = ["The Servo Project Developers"] From 30c30a628cc0ca838f6bd4afc3dd1640fa48c2d6 Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Tue, 11 Mar 2025 15:07:28 +1300 Subject: [PATCH 04/13] Implement `MallocSizeOf` for SmallVec (v2) (#369) * Implement MallocSizeOf for SmallVec (v2) Signed-off-by: Nico Burns * Bump malloc_size_of crate to version with lower MSRV Signed-off-by: Nico Burns --------- Signed-off-by: Nico Burns --- .github/workflows/main.yml | 3 +++ Cargo.toml | 1 + src/lib.rs | 24 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 149116b..b17cda8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,6 +39,9 @@ jobs: - name: Cargo test w/ serde run: cargo test --verbose --features serde + - name: Cargo test w/ malloc_size_of + run: cargo test --verbose --features malloc_size_of + - name: Cargo check w/o default features if: matrix.toolchain == 'nightly' run: cargo check --verbose --no-default-features diff --git a/Cargo.toml b/Cargo.toml index f77070f..b2961dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ extract_if = [] [dependencies] serde = { version = "1", optional = true, default-features = false } +malloc_size_of = { version = "0.1.1", optional = true, default-features = false } [dev-dependencies] bincode = "1.0.1" diff --git a/src/lib.rs b/src/lib.rs index 37c2123..3459da5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -92,6 +92,8 @@ use core::ptr::copy; use core::ptr::copy_nonoverlapping; use core::ptr::NonNull; +#[cfg(feature = "malloc_size_of")] +use malloc_size_of::{MallocShallowSizeOf, MallocSizeOf, MallocSizeOfOps}; #[cfg(feature = "serde")] use serde::{ de::{Deserialize, Deserializer, SeqAccess, Visitor}, @@ -2176,6 +2178,28 @@ where } } +#[cfg(feature = "malloc_size_of")] +impl MallocShallowSizeOf for SmallVec { + fn shallow_size_of(&self, ops: &mut MallocSizeOfOps) -> usize { + if self.spilled() { + unsafe { ops.malloc_size_of(self.as_ptr()) } + } else { + 0 + } + } +} + +#[cfg(feature = "malloc_size_of")] +impl MallocSizeOf for SmallVec { + fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize { + let mut n = self.shallow_size_of(ops); + for elem in self.iter() { + n += elem.size_of(ops); + } + n + } +} + #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl io::Write for SmallVec { From 4aaa02887f2ae18247e0dacb3a64d750bf56729e Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Fri, 21 Mar 2025 00:42:09 +0800 Subject: [PATCH 05/13] feat: impl `bytes::BufMut` for `SmallVec` (v2) (#371) --- Cargo.toml | 1 + src/lib.rs | 70 +++++++++++++++++++++++++++++++++++++ src/tests.rs | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 165 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b2961dc..bb74a99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ may_dangle = [] extract_if = [] [dependencies] +bytes = { version = "1", optional = true, default-features = false } serde = { version = "1", optional = true, default-features = false } malloc_size_of = { version = "0.1.1", optional = true, default-features = false } diff --git a/src/lib.rs b/src/lib.rs index 3459da5..f05bc3f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -92,6 +92,8 @@ use core::ptr::copy; use core::ptr::copy_nonoverlapping; use core::ptr::NonNull; +#[cfg(feature = "bytes")] +use bytes::{buf::UninitSlice, BufMut}; #[cfg(feature = "malloc_size_of")] use malloc_size_of::{MallocShallowSizeOf, MallocSizeOf, MallocSizeOfOps}; #[cfg(feature = "serde")] @@ -2220,3 +2222,71 @@ impl io::Write for SmallVec { Ok(()) } } + +#[cfg(feature = "bytes")] +unsafe impl BufMut for SmallVec { + #[inline] + fn remaining_mut(&self) -> usize { + // A vector can never have more than isize::MAX bytes + isize::MAX as usize - self.len() + } + + #[inline] + unsafe fn advance_mut(&mut self, cnt: usize) { + let len = self.len(); + let remaining = self.capacity() - len; + + if remaining < cnt { + panic!("advance out of bounds: the len is {remaining} but advancing by {cnt}"); + } + + // Addition will not overflow since the sum is at most the capacity. + self.set_len(len + cnt); + } + + #[inline] + fn chunk_mut(&mut self) -> &mut UninitSlice { + if self.capacity() == self.len() { + self.reserve(64); // Grow the smallvec + } + + let cap = self.capacity(); + let len = self.len(); + + let ptr = self.as_mut_ptr(); + // SAFETY: Since `ptr` is valid for `cap` bytes, `ptr.add(len)` must be + // valid for `cap - len` bytes. The subtraction will not underflow since + // `len <= cap`. + unsafe { UninitSlice::from_raw_parts_mut(ptr.add(len), cap - len) } + } + + // Specialize these methods so they can skip checking `remaining_mut` + // and `advance_mut`. + #[inline] + fn put(&mut self, mut src: T) + where + Self: Sized, + { + // In case the src isn't contiguous, reserve upfront. + self.reserve(src.remaining()); + + while src.has_remaining() { + let s = src.chunk(); + let l = s.len(); + self.extend_from_slice(s); + src.advance(l); + } + } + + #[inline] + fn put_slice(&mut self, src: &[u8]) { + self.extend_from_slice(src); + } + + #[inline] + fn put_bytes(&mut self, val: u8, cnt: usize) { + // If the addition overflows, then the `resize` will fail. + let new_len = self.len().saturating_add(cnt); + self.resize(new_len, val); + } +} diff --git a/src/tests.rs b/src/tests.rs index 925eb3b..624f44c 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1154,9 +1154,7 @@ fn collect_from_iter() { #[test] fn test_collect_with_spill() { let input = "0123456"; - let collected: SmallVec = input - .chars() - .collect(); + let collected: SmallVec = input.chars().collect(); assert_eq!(collected, &['0', '1', '2', '3', '4', '5', '6']); } @@ -1186,3 +1184,96 @@ fn test_spare_capacity_mut() { assert!(spare.len() >= 1); assert_eq!(spare.as_ptr().cast::(), unsafe { v.as_ptr().add(3) }); } + +// Adopted from `tests/test_buf_mut.rs` in the `bytes` crate. +#[cfg(feature = "bytes")] +mod buf_mut { + use bytes::BufMut as _; + + type SmallVec = crate::SmallVec; + + #[test] + fn test_smallvec_as_mut_buf() { + let mut buf = SmallVec::with_capacity(64); + + assert_eq!(buf.remaining_mut(), isize::MAX as usize); + + assert!(buf.chunk_mut().len() >= 64); + + buf.put(&b"zomg"[..]); + + assert_eq!(&buf, b"zomg"); + + assert_eq!(buf.remaining_mut(), isize::MAX as usize - 4); + assert_eq!(buf.capacity(), 64); + + for _ in 0..16 { + buf.put(&b"zomg"[..]); + } + + assert_eq!(buf.len(), 68); + } + + #[test] + fn test_smallvec_put_bytes() { + let mut buf = SmallVec::new(); + buf.push(17); + buf.put_bytes(19, 2); + assert_eq!([17, 19, 19], &buf[..]); + } + + #[test] + fn test_put_u8() { + let mut buf = SmallVec::with_capacity(8); + buf.put_u8(33); + assert_eq!(b"\x21", &buf[..]); + } + + #[test] + fn test_put_u16() { + let mut buf = SmallVec::with_capacity(8); + buf.put_u16(8532); + assert_eq!(b"\x21\x54", &buf[..]); + + buf.clear(); + buf.put_u16_le(8532); + assert_eq!(b"\x54\x21", &buf[..]); + } + + #[test] + fn test_put_int() { + let mut buf = SmallVec::with_capacity(8); + buf.put_int(0x1020304050607080, 3); + assert_eq!(b"\x60\x70\x80", &buf[..]); + } + + #[test] + #[should_panic] + fn test_put_int_nbytes_overflow() { + let mut buf = SmallVec::with_capacity(8); + buf.put_int(0x1020304050607080, 9); + } + + #[test] + fn test_put_int_le() { + let mut buf = SmallVec::with_capacity(8); + buf.put_int_le(0x1020304050607080, 3); + assert_eq!(b"\x80\x70\x60", &buf[..]); + } + + #[test] + #[should_panic] + fn test_put_int_le_nbytes_overflow() { + let mut buf = SmallVec::with_capacity(8); + buf.put_int_le(0x1020304050607080, 9); + } + + #[test] + #[should_panic(expected = "advance out of bounds: the len is 8 but advancing by 12")] + fn test_smallvec_advance_mut() { + let mut buf = SmallVec::with_capacity(8); + unsafe { + buf.advance_mut(12); + } + } +} From 2390f2d09e47ab30709faad146346b4555506977 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Thu, 20 Mar 2025 09:45:23 -0700 Subject: [PATCH 06/13] Version 2.0.0-alpha.11 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bb74a99..e08efc6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smallvec" -version = "2.0.0-alpha.10" +version = "2.0.0-alpha.11" edition = "2021" rust-version = "1.57" authors = ["The Servo Project Developers"] From 395763cd928f2d52844aa334a94ca373f7cd8ca0 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 21 Mar 2025 13:22:19 +0700 Subject: [PATCH 07/13] Fix typo in comment (#372) --- src/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests.rs b/src/tests.rs index 624f44c..675e35c 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -861,7 +861,7 @@ fn test_from_vec() { #[test] fn test_retain() { - // Test inline data storate + // Test inline data storage let mut sv: SmallVec = SmallVec::from_slice(&[1, 2, 3, 3, 4]); sv.retain(|&mut i| i != 3); assert_eq!(sv.pop(), Some(4)); From 6ba116c54995c5836e6a81a580e093bdb82f4c9e Mon Sep 17 00:00:00 2001 From: otcova <65542832+otcova@users.noreply.github.com> Date: Mon, 28 Apr 2025 06:26:59 +0200 Subject: [PATCH 08/13] reimplement from_buf (#377) --- src/lib.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f05bc3f..21bde96 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -673,11 +673,27 @@ impl SmallVec { } #[inline] - pub const fn from_buf(buf: [T; N]) -> Self { - // SAFETY: all the members in 0..N are initialized + pub const fn from_buf(elements: [T; S]) -> Self { + assert!(S <= N); // Free check since the values are known at compile time + + // Althought we create a new buffer, since S and N are known at compile time, + // even with `-C opt-level=1`, it gets optimized as best as it could be. (Checked with ) + let mut buf: MaybeUninit<[T; N]> = MaybeUninit::uninit(); + + // SAFETY: buf and elements do not overlap, are aligned and have space + // for at least S elements since S <= N. + // We will drop the elements only once since we do forget(elements). + unsafe { + copy_nonoverlapping(elements.as_ptr(), buf.as_mut_ptr() as *mut T, S); + } + + // `elements` have been moved into buf and will be droped by SmallVec + core::mem::forget(elements); + + // SAFETY: all the members in 0..S are initialized Self { - len: TaggedLen::new(N, false, Self::is_zst()), - raw: RawSmallVec::new_inline(MaybeUninit::new(buf)), + len: TaggedLen::new(S, false, Self::is_zst()), + raw: RawSmallVec::new_inline(buf), _marker: PhantomData, } } From 3906fe419285250261af4376a4224f075ca37eab Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Mon, 28 Apr 2025 07:18:08 -0700 Subject: [PATCH 09/13] v2: Bump MSRV to 1.83 (#378) * v2: Bump MSRV to 1.83 * Run GitHub checks on merge_group --- .github/workflows/main.yml | 4 +++- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b17cda8..79edf65 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,6 +4,8 @@ on: push: branches: [v2] pull_request: + merge_group: + types: [checks_requested] workflow_dispatch: jobs: @@ -12,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - toolchain: ["stable", "beta", "nightly", "1.57.0"] + toolchain: ["stable", "beta", "nightly", "1.83.0"] include: - toolchain: stable fuzz: 1 diff --git a/Cargo.toml b/Cargo.toml index e08efc6..d90e58f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "smallvec" version = "2.0.0-alpha.11" edition = "2021" -rust-version = "1.57" +rust-version = "1.83" authors = ["The Servo Project Developers"] license = "MIT OR Apache-2.0" repository = "https://github.com/servo/rust-smallvec" From dff2390f8a80b673a78675a0d4231438c3832b6f Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Tue, 27 May 2025 22:31:00 +0200 Subject: [PATCH 10/13] Unconditionally implement `core::error::Error` (#380) --- src/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 21bde96..4d04cba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -121,9 +121,7 @@ impl core::fmt::Display for CollectionAllocErr { } } -#[cfg(feature = "std")] -#[cfg_attr(docsrs, doc(cfg(feature = "std")))] -impl std::error::Error for CollectionAllocErr {} +impl core::error::Error for CollectionAllocErr {} /// Either a stack array with `length <= N` or a heap array /// whose pointer and capacity are stored here. From 05ca4ed6d15d5a77cf706f8d046370aab050526f Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Wed, 28 May 2025 00:50:05 +0200 Subject: [PATCH 11/13] Update the `extract_if` feature (#381) * Update the `extract_if` feature * doctest --- src/lib.rs | 73 +++++++++++++++++++++++++++++++++++++++++++++------- src/tests.rs | 6 ++--- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4d04cba..19d472a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -403,6 +403,8 @@ where vec: &'a mut SmallVec, /// The index of the item that will be inspected by the next call to `next`. idx: usize, + /// Elements at and beyond this point will be retained. Must be equal or smaller than `old_len`. + end: usize, /// The number of items that have been drained (removed) thus far. del: usize, /// The original length of `vec` prior to draining. @@ -433,7 +435,7 @@ where fn next(&mut self) -> Option { unsafe { - while self.idx < self.old_len { + while self.idx < self.end { let i = self.idx; let v = core::slice::from_raw_parts_mut(self.vec.as_mut_ptr(), self.old_len); let drained = (self.pred)(&mut v[i]); @@ -456,7 +458,7 @@ where } fn size_hint(&self) -> (usize, Option) { - (0, Some(self.old_len - self.idx)) + (0, Some(self.end - self.idx)) } } @@ -903,12 +905,15 @@ impl SmallVec { } #[cfg(feature = "extract_if")] - /// Creates an iterator which uses a closure to determine if an element should be removed. + /// Creates an iterator which uses a closure to determine if element in the range should be removed. /// - /// If the closure returns true, the element is removed and yielded. + /// If the closure returns true, then the element is removed and yielded. /// If the closure returns false, the element will remain in the vector and will not be yielded /// by the iterator. /// + /// Only elements that fall in the provided range are considered for extraction, but any elements + /// after the range will still have to be moved if any element has been extracted. + /// /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating /// or the iteration short-circuits, then the remaining elements will be retained. /// Use [`retain`] with a negated predicate if you do not need the returned iterator. @@ -918,10 +923,12 @@ impl SmallVec { /// Using this method is equivalent to the following code: /// ``` /// # use smallvec::SmallVec; + /// # use std::cmp::min; /// # let some_predicate = |x: &mut i32| { *x == 2 || *x == 3 || *x == 6 }; /// # let mut vec: SmallVec = SmallVec::from_slice(&[1i32, 2, 3, 4, 5, 6]); + /// # let range = 1..4; /// let mut i = 0; - /// while i < vec.len() { + /// while i < min(vec.len(), range.end) { /// if some_predicate(&mut vec[i]) { /// let val = vec.remove(i); /// // your code here @@ -936,8 +943,12 @@ impl SmallVec { /// But `extract_if` is easier to use. `extract_if` is also more efficient, /// because it can backshift the elements of the array in bulk. /// - /// Note that `extract_if` also lets you mutate every element in the filter closure, - /// regardless of whether you choose to keep or remove it. + /// Note that `extract_if` also lets you mutate the elements passed to the filter closure, + /// regardless of whether you choose to keep or remove them. + /// + /// # Panics + /// + /// If `range` is out of bounds. /// /// # Examples /// @@ -947,17 +958,58 @@ impl SmallVec { /// # use smallvec::SmallVec; /// let mut numbers: SmallVec = SmallVec::from_slice(&[1i32, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15]); /// - /// let evens = numbers.extract_if(|x| *x % 2 == 0).collect::>(); + /// let evens = numbers.extract_if(.., |x| *x % 2 == 0).collect::>(); /// let odds = numbers; /// /// assert_eq!(evens, SmallVec::::from_slice(&[2i32, 4, 6, 8, 14])); /// assert_eq!(odds, SmallVec::::from_slice(&[1i32, 3, 5, 9, 11, 13, 15])); /// ``` - pub fn extract_if(&mut self, filter: F) -> ExtractIf<'_, T, N, F> + /// + /// Using the range argument to only process a part of the vector: + /// + /// ``` + /// # use smallvec::SmallVec; + /// let mut items: SmallVec = SmallVec::from_slice(&[0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 2, 1, 2]); + /// let ones = items.extract_if(7.., |x| *x == 1).collect::>(); + /// assert_eq!(items, SmallVec::::from_slice(&[0, 0, 0, 0, 0, 0, 0, 2, 2, 2])); + /// assert_eq!(ones.len(), 3); + /// ``` + pub fn extract_if(&mut self, range: R, filter: F) -> ExtractIf<'_, T, N, F> where F: FnMut(&mut T) -> bool, + R: core::ops::RangeBounds, { let old_len = self.len(); + // This line can be used instead once `core::slice::range` is stable. + //let core::ops::Range { start, end } = core::slice::range(range, ..old_len); + let (start, end) = { + let len = old_len; + + let start = match range.start_bound() { + core::ops::Bound::Included(&start) => start, + core::ops::Bound::Excluded(start) => { + start.checked_add(1).unwrap_or_else(|| panic!("attempted to index slice from after maximum usize")) + } + core::ops::Bound::Unbounded => 0, + }; + + let end = match range.end_bound() { + core::ops::Bound::Included(end) => { + end.checked_add(1).unwrap_or_else(|| panic!("attempted to index slice up to maximum usize")) + } + core::ops::Bound::Excluded(&end) => end, + core::ops::Bound::Unbounded => len, + }; + + if start > end { + panic!("slice index starts at {start} but ends at {end}"); + } + if end > len { + panic!("range end index {end} out of range for slice of length {len}"); + } + + (start, end) + }; // Guard against us getting leaked (leak amplification) unsafe { @@ -966,7 +1018,8 @@ impl SmallVec { ExtractIf { vec: self, - idx: 0, + idx: start, + end, del: 0, old_len, pred: filter, diff --git a/src/tests.rs b/src/tests.rs index 675e35c..a70f281 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1088,11 +1088,11 @@ fn test_clone_from() { #[cfg(feature = "extract_if")] #[test] fn test_extract_if() { - let mut a: SmallVec = smallvec![1u8, 2, 3, 4, 5, 6, 7, 8]; + let mut a: SmallVec = smallvec![0, 1u8, 2, 3, 4, 5, 6, 7, 8, 0]; - let b: SmallVec = a.extract_if(|x| *x % 3 == 0).collect(); + let b: SmallVec = a.extract_if(1..9, |x| *x % 3 == 0).collect(); - assert_eq!(a, SmallVec::::from_slice(&[1u8, 2, 4, 5, 7, 8])); + assert_eq!(a, SmallVec::::from_slice(&[0, 1u8, 2, 4, 5, 7, 8, 0])); assert_eq!(b, SmallVec::::from_slice(&[3u8, 6])); } From 4fe40688e97555f93a6861ff14720657c5161c07 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Sat, 31 May 2025 01:47:45 +0200 Subject: [PATCH 12/13] Add missing `Vec` functionality (#382) --- benches/bench.rs | 19 -- fuzz/fuzz_targets/smallvec_ops.rs | 4 +- src/lib.rs | 517 +++++++++++++++++++----------- src/tests.rs | 204 ++---------- 4 files changed, 362 insertions(+), 382 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index b5a109e..987bef9 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -278,25 +278,6 @@ fn gen_from_elem>(n: usize, b: &mut Bencher) { }); } -#[bench] -fn bench_insert_many(b: &mut Bencher) { - #[inline(never)] - fn insert_many_noinline>( - vec: &mut SmallVec, - index: usize, - iterable: I, - ) { - vec.insert_many(index, iterable) - } - - b.iter(|| { - let mut vec = SmallVec::::new(); - insert_many_noinline(&mut vec, 0, 0..SPILLED_SIZE as _); - insert_many_noinline(&mut vec, 0, 0..SPILLED_SIZE as _); - vec - }); -} - #[bench] fn bench_insert_from_slice(b: &mut Bencher) { let v: Vec = (0..SPILLED_SIZE as _).collect(); diff --git a/fuzz/fuzz_targets/smallvec_ops.rs b/fuzz/fuzz_targets/smallvec_ops.rs index 5733da9..10747a0 100644 --- a/fuzz/fuzz_targets/smallvec_ops.rs +++ b/fuzz/fuzz_targets/smallvec_ops.rs @@ -94,7 +94,7 @@ fn do_test(data: &[u8]) -> SmallVec { let insert_pos = next_usize!(bytes) % (v.len() + 1); let how_many = next_usize!(bytes); let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - v.insert_many(insert_pos, (0..how_many).map(|_| bytes.next().unwrap())); + v.splice(insert_pos..insert_pos, (0..how_many).map(|_| bytes.next().unwrap())); })); if result.is_err() { @@ -107,7 +107,7 @@ fn do_test(data: &[u8]) -> SmallVec { 19 => { let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - v.retain(|e| { + v.retain_mut(|e| { let alt_e = bytes.next().unwrap(); let retain = *e >= alt_e; *e = e.wrapping_add(alt_e); diff --git a/src/lib.rs b/src/lib.rs index 19d472a..8ab158e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -176,7 +176,7 @@ impl RawSmallVec { } #[inline] - fn as_mut_ptr_inline(&mut self) -> *mut T { + const fn as_mut_ptr_inline(&mut self) -> *mut T { // SAFETY: See above. (unsafe { addr_of_mut!(self.inline) }) as *mut T } @@ -193,7 +193,7 @@ impl RawSmallVec { /// /// The vector must be on the heap #[inline] - unsafe fn as_mut_ptr_heap(&mut self) -> *mut T { + const unsafe fn as_mut_ptr_heap(&mut self) -> *mut T { self.heap.0.as_ptr() } @@ -364,29 +364,123 @@ impl core::iter::FusedIterator for Drain<'_, T, N> {} impl<'a, T: 'a, const N: usize> Drop for Drain<'a, T, N> { fn drop(&mut self) { - if core::mem::needs_drop::() { - self.for_each(drop); + /// Moves back the un-`Drain`ed elements to restore the original `Vec`. + struct DropGuard<'r, 'a, T, const N: usize>(&'r mut Drain<'a, T, N>); + + impl<'r, 'a, T, const N: usize> Drop for DropGuard<'r, 'a, T, N> { + fn drop(&mut self) { + if self.0.tail_len > 0 { + unsafe { + let source_vec = self.0.vec.as_mut(); + // memmove back untouched tail, update to new length + let start = source_vec.len(); + let tail = self.0.tail_start; + if tail != start { + let ptr = source_vec.as_mut_ptr(); + let src = ptr.add(tail); + let dst = ptr.add(start); + core::ptr::copy(src, dst, self.0.tail_len); + } + source_vec.set_len(start + self.0.tail_len); + } + } + } } - if self.tail_len > 0 { - // SAFETY: we're copying initialized members back to the end of the vector - // then updating its length + let iter = core::mem::take(&mut self.iter); + let drop_len = iter.len(); + + let mut vec = self.vec; + + if SmallVec::::is_zst() { + // ZSTs have no identity, so we don't need to move them around, we only need to drop the correct amount. + // this can be achieved by manipulating the Vec length instead of moving values out from `iter`. unsafe { - let source_vec = self.vec.as_mut(); - - let start = source_vec.len(); - let tail = self.tail_start; - if tail != start { - // as_mut_ptr creates a &mut, invalidating other pointers. - // This pattern avoids calling it with a pointer already present. - let ptr = source_vec.as_mut_ptr(); - let src = ptr.add(tail); - let dst = ptr.add(start); - copy(src, dst, self.tail_len); - } - source_vec.set_len(start + self.tail_len); + let vec = vec.as_mut(); + let old_len = vec.len(); + vec.set_len(old_len + drop_len + self.tail_len); + vec.truncate(old_len + self.tail_len); + } + + return; + } + + // ensure elements are moved back into their appropriate places, even when drop_in_place panics + let _guard = DropGuard(self); + + if drop_len == 0 { + return; + } + + // as_slice() must only be called when iter.len() is > 0 because + // it also gets touched by vec::Splice which may turn it into a dangling pointer + // which would make it and the vec pointer point to different allocations which would + // lead to invalid pointer arithmetic below. + let drop_ptr = iter.as_slice().as_ptr(); + + unsafe { + // drop_ptr comes from a slice::Iter which only gives us a &[T] but for drop_in_place + // a pointer with mutable provenance is necessary. Therefore we must reconstruct + // it from the original vec but also avoid creating a &mut to the front since that could + // invalidate raw pointers to it which some unsafe code might rely on. + let vec_ptr = vec.as_mut().as_mut_ptr(); + // May be replaced with the line below later, once this crate's MSRV is >= 1.87. + //let drop_offset = drop_ptr.offset_from_unsigned(vec_ptr); + let drop_offset = drop_ptr.offset_from(vec_ptr) as usize; + let to_drop = core::ptr::slice_from_raw_parts_mut(vec_ptr.add(drop_offset), drop_len); + core::ptr::drop_in_place(to_drop); + } + } +} + +impl Drain<'_, T, N> { + #[must_use] + pub fn as_slice(&self) -> &[T] { + self.iter.as_slice() + } + + /// The range from `self.vec.len` to `self.tail_start` contains elements + /// that have been moved out. + /// Fill that range as much as possible with new elements from the `replace_with` iterator. + /// Returns `true` if we filled the entire range. (`replace_with.next()` didn’t return `None`.) + unsafe fn fill>(&mut self, replace_with: &mut I) -> bool { + let vec = unsafe { self.vec.as_mut() }; + let range_start = vec.len(); + let range_end = self.tail_start; + let range_slice = unsafe { + core::slice::from_raw_parts_mut(vec.as_mut_ptr().add(range_start), range_end - range_start) + }; + + for place in range_slice { + if let Some(new_item) = replace_with.next() { + unsafe { core::ptr::write(place, new_item) }; + vec.set_len(vec.len() + 1); + } else { + return false; } } + true + } + + /// Makes room for inserting more elements before the tail. + #[track_caller] + unsafe fn move_tail(&mut self, additional: usize) { + let vec = unsafe { self.vec.as_mut() }; + let len = self.tail_start + self.tail_len; + + // Test + let old_len = vec.len(); + vec.set_len(len); + vec.reserve(additional); + vec.set_len(old_len); + + let new_tail_start = self.tail_start + additional; + unsafe { + let src = vec.as_ptr().add(self.tail_start); + let dst = vec.as_mut_ptr().add(new_tail_start); + core::ptr::copy(src, dst, self.tail_len); + } + self.tail_start = new_tail_start; } } @@ -487,6 +581,88 @@ where } } +pub struct Splice<'a, I: Iterator + 'a, const N: usize> { + drain: Drain<'a, I::Item, N>, + replace_with: I, +} + +impl<'a, I, const N: usize> core::fmt::Debug for Splice<'a, I, N> +where + I: Debug + Iterator + 'a, + ::Item: Debug, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("Splice") + .field(&self.drain) + .finish() + } +} + +impl Iterator for Splice<'_, I, N> { + type Item = I::Item; + + fn next(&mut self) -> Option { + self.drain.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.drain.size_hint() + } +} + +impl DoubleEndedIterator for Splice<'_, I, N> { + fn next_back(&mut self) -> Option { + self.drain.next_back() + } +} + +impl ExactSizeIterator for Splice<'_, I, N> {} + +impl Drop for Splice<'_, I, N> { + fn drop(&mut self) { + self.drain.by_ref().for_each(drop); + // At this point draining is done and the only remaining tasks are splicing + // and moving things into the final place. + // Which means we can replace the slice::Iter with pointers that won't point to deallocated + // memory, so that Drain::drop is still allowed to call iter.len(), otherwise it would break + // the ptr.sub_ptr contract. + self.drain.iter = (&[]).iter(); + + unsafe { + if self.drain.tail_len == 0 { + self.drain.vec.as_mut().extend(self.replace_with.by_ref()); + return; + } + + // First fill the range left by drain(). + if !self.drain.fill(&mut self.replace_with) { + return; + } + + // There may be more elements. Use the lower bound as an estimate. + // FIXME: Is the upper bound a better guess? Or something else? + let (lower_bound, _upper_bound) = self.replace_with.size_hint(); + if lower_bound > 0 { + self.drain.move_tail(lower_bound); + if !self.drain.fill(&mut self.replace_with) { + return; + } + } + + // Collect any remaining elements. + let mut collected = self.replace_with.by_ref().collect::>().into_iter(); + // Now we have an exact count. + if collected.len() > 0 { + self.drain.move_tail(collected.len()); + let filled = self.drain.fill(&mut collected); + debug_assert!(filled); + debug_assert_eq!(collected.len(), 0); + } + } + // Let `Drain::drop` move the tail back if necessary and restore `vec.len`. + } +} + /// An iterator that consumes a `SmallVec` and yields its items by value. /// /// Returned from [`SmallVec::into_iter`][1]. @@ -527,7 +703,7 @@ impl IntoIter { } #[inline] - fn as_mut_ptr(&mut self) -> *mut T { + const fn as_mut_ptr(&mut self) -> *mut T { let on_heap = self.end.on_heap(Self::is_zst()); if on_heap { // SAFETY: vector is on the heap @@ -538,7 +714,7 @@ impl IntoIter { } #[inline] - pub fn as_slice(&self) -> &[T] { + pub const fn as_slice(&self) -> &[T] { // SAFETY: The members in self.begin..self.end.value() are all initialized // So the pointer arithmetic is valid, and so is the construction of the slice unsafe { @@ -551,7 +727,7 @@ impl IntoIter { } #[inline] - pub fn as_mut_slice(&mut self) -> &mut [T] { + pub const fn as_mut_slice(&mut self) -> &mut [T] { // SAFETY: see above unsafe { let ptr = self.as_mut_ptr(); @@ -611,11 +787,6 @@ impl ExactSizeIterator for IntoIter {} impl core::iter::FusedIterator for IntoIter {} impl SmallVec { - #[inline] - const fn is_zst() -> bool { - size_of::() == 0 - } - #[inline] pub const fn new() -> SmallVec { Self { @@ -634,47 +805,9 @@ impl SmallVec { this } - #[inline] - pub fn from_vec(vec: Vec) -> Self { - if vec.capacity() == 0 { - return Self::new(); - } - - if Self::is_zst() { - // "Move" elements to stack buffer. They're ZST so we don't actually have to do - // anything. Just make sure they're not dropped. - // We don't wrap the vector in ManuallyDrop so that when it's dropped, the memory is - // deallocated, if it needs to be. - let mut vec = vec; - let len = vec.len(); - - // SAFETY: `0` is less than the vector's capacity. - // old_len..new_len is an empty range. So there are no uninitialized elements - unsafe { vec.set_len(0) }; - Self { - len: TaggedLen::new(len, false, Self::is_zst()), - raw: RawSmallVec::new(), - _marker: PhantomData, - } - } else { - let mut vec = ManuallyDrop::new(vec); - let len = vec.len(); - let cap = vec.capacity(); - // SAFETY: vec.capacity is not `0` (checked above), so the pointer - // can not dangle and thus specifically cannot be null. - let ptr = unsafe { NonNull::new_unchecked(vec.as_mut_ptr()) }; - - Self { - len: TaggedLen::new(len, true, Self::is_zst()), - raw: RawSmallVec::new_heap(ptr, cap), - _marker: PhantomData, - } - } - } - #[inline] pub const fn from_buf(elements: [T; S]) -> Self { - assert!(S <= N); // Free check since the values are known at compile time + const { assert!(S <= N); } // Althought we create a new buffer, since S and N are known at compile time, // even with `-C opt-level=1`, it gets optimized as best as it could be. (Checked with ) @@ -751,6 +884,51 @@ impl SmallVec { _marker: PhantomData, } } +} + +impl SmallVec { + #[inline] + const fn is_zst() -> bool { + size_of::() == 0 + } + + #[inline] + pub fn from_vec(vec: Vec) -> Self { + if vec.capacity() == 0 { + return Self::new(); + } + + if Self::is_zst() { + // "Move" elements to stack buffer. They're ZST so we don't actually have to do + // anything. Just make sure they're not dropped. + // We don't wrap the vector in ManuallyDrop so that when it's dropped, the memory is + // deallocated, if it needs to be. + let mut vec = vec; + let len = vec.len(); + + // SAFETY: `0` is less than the vector's capacity. + // old_len..new_len is an empty range. So there are no uninitialized elements + unsafe { vec.set_len(0) }; + Self { + len: TaggedLen::new(len, false, Self::is_zst()), + raw: RawSmallVec::new(), + _marker: PhantomData, + } + } else { + let mut vec = ManuallyDrop::new(vec); + let len = vec.len(); + let cap = vec.capacity(); + // SAFETY: vec.capacity is not `0` (checked above), so the pointer + // can not dangle and thus specifically cannot be null. + let ptr = unsafe { NonNull::new_unchecked(vec.as_mut_ptr()) }; + + Self { + len: TaggedLen::new(len, true, Self::is_zst()), + raw: RawSmallVec::new_heap(ptr, cap), + _marker: PhantomData, + } + } + } /// Sets the tag to be on the heap /// @@ -804,7 +982,7 @@ impl SmallVec { #[must_use] #[inline] - pub fn is_empty(&self) -> bool { + pub const fn is_empty(&self) -> bool { self.len() == 0 } @@ -900,6 +1078,7 @@ impl SmallVec { iter: range_slice.iter(), // Since self is a &mut, passing it to a function would invalidate the slice iterator. vec: core::ptr::NonNull::new_unchecked(self as *mut _), + //vec: core::ptr::NonNull::from(self), } } } @@ -1026,6 +1205,14 @@ impl SmallVec { } } + pub fn splice(&mut self, range: R, replace_with: I) -> Splice<'_, I::IntoIter, N> + where + R: core::ops::RangeBounds, + I: IntoIterator, + { + Splice { drain: self.drain(range), replace_with: replace_with.into_iter() } + } + #[inline] pub fn push(&mut self, value: T) { let len = self.len(); @@ -1055,6 +1242,12 @@ impl SmallVec { } } + #[inline] + pub fn pop_if(&mut self, predicate: impl FnOnce(&mut T) -> bool) -> Option { + let last = self.last_mut()?; + if predicate(last) { self.pop() } else { None } + } + #[inline] pub fn append(&mut self, other: &mut SmallVec) { // can't overflow since both are smaller than isize::MAX and 2 * isize::MAX < usize::MAX @@ -1200,6 +1393,35 @@ impl SmallVec { } } + #[inline] + pub fn shrink_to(&mut self, min_capacity: usize) { + if !self.spilled() { + return; + } + if self.capacity() > min_capacity { + let len = self.len(); + let target = core::cmp::max(len, min_capacity); + if target <= Self::inline_size() { + // SAFETY: self.spilled() is true, so we're on the heap + unsafe { + let (ptr, capacity) = self.raw.heap; + self.raw = RawSmallVec::new_inline(MaybeUninit::uninit()); + copy_nonoverlapping(ptr.as_ptr(), self.raw.as_mut_ptr_inline(), len); + self.set_inline(); + alloc::alloc::dealloc( + ptr.cast().as_ptr(), + Layout::from_size_align_unchecked(capacity * size_of::(), align_of::()), + ); + } + } else if target < self.capacity() { + // SAFETY: len > Self::inline_size() >= 0 + // so new capacity is non zero, it is equal to the length + // T can't be a ZST because SmallVec is never spilled. + unsafe { infallible(self.raw.try_grow_raw(self.len, target)) }; + } + } + } + #[inline] pub fn truncate(&mut self, len: usize) { let old_len = self.len(); @@ -1216,56 +1438,42 @@ impl SmallVec { } } - #[inline] - pub fn as_slice(&self) -> &[T] { - let len = self.len(); - let ptr = self.as_ptr(); - // SAFETY: all the elements in `..len` are initialized - unsafe { core::slice::from_raw_parts(ptr, len) } - } - - #[inline] - pub fn as_mut_slice(&mut self) -> &mut [T] { - let len = self.len(); - let ptr = self.as_mut_ptr(); - // SAFETY: see above - unsafe { core::slice::from_raw_parts_mut(ptr, len) } - } - #[inline] pub fn swap_remove(&mut self, index: usize) -> T { let len = self.len(); - assert!(index < len); + assert!(index < len, "swap_remove index (is {index}) should be < len (is {len})"); // This can't overflow since `len > index >= 0` let new_len = len - 1; unsafe { - // SAFETY: we set len to a smaller value + // We replace self[index] with the last element. Note that if the + // bounds check above succeeds there must be a last element (which + // can be self[index] itself). + let value = core::ptr::read(self.as_ptr().add(index)); + let base_ptr = self.as_mut_ptr(); + core::ptr::copy(base_ptr.add(new_len), base_ptr.add(index), 1); self.set_len(new_len); - let ptr = self.as_mut_ptr(); - let last = ptr.add(new_len); - let ith = ptr.add(index); - // This item is initialized since it was in the vector just before - let last_item = last.read(); - // This item is initialized since index < len - let ith_item = ith.read(); - - // Note that these may be the same element. - // This is fine since in this case we just write it back to the pointer past the end of - // the vector, so the vector no longer owns it - ith.write(last_item); - ith_item + value } } #[inline] pub fn clear(&mut self) { - self.truncate(0); + // SAFETY: we set `len` to a smaller value + // then we drop the previously initialized elements + unsafe { + let old_len = self.len(); + self.set_len(0); + core::ptr::drop_in_place(core::ptr::slice_from_raw_parts_mut( + self.as_mut_ptr(), + old_len, + )); + } } #[inline] pub fn remove(&mut self, index: usize) -> T { let len = self.len(); - assert!(index < len); + assert!(index < len, "removal index (is {index}) should be < len (is {len})"); let new_len = len - 1; unsafe { // SAFETY: new_len < len @@ -1282,7 +1490,7 @@ impl SmallVec { #[inline] pub fn insert(&mut self, index: usize, value: T) { let len = self.len(); - assert!(index <= len); + assert!(index <= len, "insertion index (is {index}) should be <= len (is {len})"); self.reserve(1); let ptr = self.as_mut_ptr(); unsafe { @@ -1298,34 +1506,20 @@ impl SmallVec { } } - fn insert_many_impl>(&mut self, mut index: usize, iter: I) { + #[inline] + pub const fn as_slice(&self) -> &[T] { let len = self.len(); - if index == len { - return self.extend(iter); - } - - let mut iter = iter.fuse(); - let (lower_bound, _) = iter.size_hint(); - self.reserve(lower_bound); - - let count = unsafe { - let ptr = self.as_mut_ptr(); - // SAFETY: ptr is valid for `lower_bound` writes since we just reserved that much - let count = insert_many_batch(ptr, index, lower_bound, len, &mut iter); - // SAFETY: insert_many_batch_phase returns the number of elements it initialized, and - // leaves the vector in a valid state, without setting the new length - self.set_len(len + count); - count - }; - - index += count; - iter.enumerate() - .for_each(|(i, item)| self.insert(index + i, item)); + let ptr = self.as_ptr(); + // SAFETY: all the elements in `..len` are initialized + unsafe { core::slice::from_raw_parts(ptr, len) } } #[inline] - pub fn insert_many>(&mut self, index: usize, iterable: I) { - self.insert_many_impl(index, iterable.into_iter()); + pub const fn as_mut_slice(&mut self) -> &mut [T] { + let len = self.len(); + let ptr = self.as_mut_ptr(); + // SAFETY: see above + unsafe { core::slice::from_raw_parts_mut(ptr, len) } } #[inline] @@ -1339,7 +1533,7 @@ impl SmallVec { } #[inline] - pub fn as_mut_ptr(&mut self) -> *mut T { + pub const fn as_mut_ptr(&mut self) -> *mut T { if self.len.on_heap(Self::is_zst()) { // SAFETY: see above unsafe { self.raw.as_mut_ptr_heap() } @@ -1400,7 +1594,13 @@ impl SmallVec { } } - pub fn retain bool>(&mut self, mut f: F) { + #[inline] + pub fn retain bool>(&mut self, mut f: F) { + self.retain_mut(|elem| f(elem)) + } + + #[inline] + pub fn retain_mut bool>(&mut self, mut f: F) { let mut del = 0; let len = self.len(); let ptr = self.as_mut_ptr(); @@ -1418,11 +1618,6 @@ impl SmallVec { self.truncate(len - del); } - #[inline] - pub fn retain_mut bool>(&mut self, f: F) { - self.retain(f) - } - #[inline] pub fn dedup(&mut self) where @@ -1489,6 +1684,11 @@ impl SmallVec { } } + pub fn leak<'a>(self) -> &'a mut [T] { + let mut me = ManuallyDrop::new(self); + unsafe { core::slice::from_raw_parts_mut(me.as_mut_ptr(), me.len()) } + } + /// Returns the remaining spare capacity of the vector as a slice of /// `MaybeUninit`. /// @@ -1678,7 +1878,7 @@ impl SmallVec { pub fn resize(&mut self, len: usize, value: T) { let old_len = self.len(); if len > old_len { - self.extend(core::iter::repeat(value).take(len - old_len)); + self.extend(core::iter::repeat_n(value, len - old_len)); } else { self.truncate(len); } @@ -1709,22 +1909,6 @@ impl SmallVec { } } -struct DropShiftGuard { - ptr: *mut T, - len: usize, - shifted_ptr: *const T, - shifted_len: usize, -} -impl Drop for DropShiftGuard { - #[inline] - fn drop(&mut self) { - unsafe { - core::ptr::slice_from_raw_parts_mut(self.ptr, self.len).drop_in_place(); - copy(self.shifted_ptr, self.ptr, self.shifted_len); - } - } -} - struct DropGuard { ptr: *mut T, len: usize, @@ -1738,39 +1922,6 @@ impl Drop for DropGuard { } } -// Safety: -// -// `ptr..ptr + lower_bound` must be valid for writes -#[inline] -unsafe fn insert_many_batch>( - ptr: *mut T, - index: usize, - lower_bound: usize, - len: usize, - iter: &mut I, -) -> usize { - // shift elements to the right to make space for the initial elements from the iterator - copy(ptr.add(index), ptr.add(index + lower_bound), len - index); - let ptr_ith = ptr.add(index); - let mut guard = DropShiftGuard { - ptr: ptr_ith, - len: 0, - shifted_ptr: ptr_ith.add(lower_bound), - shifted_len: len - index, - }; - iter.take(lower_bound).enumerate().for_each(|(i, item)| { - ptr_ith.add(i).write(item); - guard.len = i + 1; - }); - let count = guard.len; - core::mem::forget(guard); - - if count < lower_bound { - copy(ptr_ith.add(lower_bound), ptr_ith.add(count), len - index); - } - count -} - impl Extend for SmallVec { #[inline] fn extend>(&mut self, iterable: I) { diff --git a/src/tests.rs b/src/tests.rs index a70f281..18665b4 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -141,6 +141,30 @@ fn drain_forget() { assert_eq!(v.len(), 2); } +#[test] +fn splice() { + // The range starts right before the end. + let mut v: SmallVec = smallvec![0, 1, 2, 3, 4, 5, 6]; + let new = [7, 8, 9, 10]; + let u: SmallVec = v.splice(6.., new).collect(); + assert_eq!(v, [0, 1, 2, 3, 4, 5, 7, 8, 9, 10]); + assert_eq!(u, [6]); + + // The range is empty. + let mut v: SmallVec = smallvec![0, 1, 2, 3, 4, 5, 6]; + let new = [7, 8, 9, 10]; + let u: SmallVec = v.splice(1..1, new).collect(); + assert_eq!(v, [0, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6]); + assert_eq!(u, []); + + // The range is at the beginning and nonempty. + let mut v: SmallVec = smallvec![0, 1, 2, 3, 4, 5, 6]; + let new = [7, 8, 9, 10]; + let u: SmallVec = v.splice(..3, new).collect(); + assert_eq!(v, [7, 8, 9, 10, 3, 4, 5, 6]); + assert_eq!(u, [0, 1, 2]); +} + #[test] fn into_iter() { let mut v: SmallVec = SmallVec::new(); @@ -315,20 +339,6 @@ fn test_split_off_take_all() { assert_ne!(split_off.as_ptr(), orig_ptr); } -#[test] -fn test_insert_many() { - let mut v: SmallVec = SmallVec::new(); - for x in 0..4 { - v.push(x); - } - assert_eq!(v.len(), 4); - v.insert_many(1, [5, 6].iter().cloned()); - assert_eq!( - &v.iter().map(|v| *v).collect::>(), - &[0, 5, 6, 1, 2, 3] - ); -} - #[test] fn test_append() { let mut v: SmallVec = SmallVec::new(); @@ -348,155 +358,6 @@ fn test_append() { ); } -struct MockHintIter { - x: T, - hint: usize, -} -impl Iterator for MockHintIter { - type Item = T::Item; - fn next(&mut self) -> Option { - self.x.next() - } - fn size_hint(&self) -> (usize, Option) { - (self.hint, None) - } -} - -#[test] -fn test_insert_many_short_hint() { - let mut v: SmallVec = SmallVec::new(); - for x in 0..4 { - v.push(x); - } - assert_eq!(v.len(), 4); - v.insert_many( - 1, - MockHintIter { - x: [5, 6].iter().cloned(), - hint: 5, - }, - ); - assert_eq!( - &v.iter().map(|v| *v).collect::>(), - &[0, 5, 6, 1, 2, 3] - ); -} - -#[test] -fn test_insert_many_long_hint() { - let mut v: SmallVec = SmallVec::new(); - for x in 0..4 { - v.push(x); - } - assert_eq!(v.len(), 4); - v.insert_many( - 1, - MockHintIter { - x: [5, 6].iter().cloned(), - hint: 1, - }, - ); - assert_eq!( - &v.iter().map(|v| *v).collect::>(), - &[0, 5, 6, 1, 2, 3] - ); -} - -// https://github.com/servo/rust-smallvec/issues/96 -mod insert_many_panic { - use crate::{smallvec, SmallVec}; - use alloc::boxed::Box; - - struct PanicOnDoubleDrop { - dropped: Box, - } - - impl PanicOnDoubleDrop { - fn new() -> Self { - Self { - dropped: Box::new(false), - } - } - } - - impl Drop for PanicOnDoubleDrop { - fn drop(&mut self) { - assert!(!*self.dropped, "already dropped"); - *self.dropped = true; - } - } - - /// Claims to yield `hint` items, but actually yields `count`, then panics. - struct BadIter { - hint: usize, - count: usize, - } - - impl Iterator for BadIter { - type Item = PanicOnDoubleDrop; - fn size_hint(&self) -> (usize, Option) { - (self.hint, None) - } - fn next(&mut self) -> Option { - if self.count == 0 { - panic!() - } - self.count -= 1; - Some(PanicOnDoubleDrop::new()) - } - } - - #[test] - fn panic_early_at_start() { - let mut vec: SmallVec = - smallvec![PanicOnDoubleDrop::new(), PanicOnDoubleDrop::new(),]; - let result = ::std::panic::catch_unwind(move || { - vec.insert_many(0, BadIter { hint: 1, count: 0 }); - }); - assert!(result.is_err()); - } - - #[test] - fn panic_early_in_middle() { - let mut vec: SmallVec = - smallvec![PanicOnDoubleDrop::new(), PanicOnDoubleDrop::new(),]; - let result = ::std::panic::catch_unwind(move || { - vec.insert_many(1, BadIter { hint: 4, count: 2 }); - }); - assert!(result.is_err()); - } - - #[test] - fn panic_early_at_end() { - let mut vec: SmallVec = - smallvec![PanicOnDoubleDrop::new(), PanicOnDoubleDrop::new(),]; - let result = ::std::panic::catch_unwind(move || { - vec.insert_many(2, BadIter { hint: 3, count: 1 }); - }); - assert!(result.is_err()); - } - - #[test] - fn panic_late_at_start() { - let mut vec: SmallVec = - smallvec![PanicOnDoubleDrop::new(), PanicOnDoubleDrop::new(),]; - let result = ::std::panic::catch_unwind(move || { - vec.insert_many(0, BadIter { hint: 3, count: 5 }); - }); - assert!(result.is_err()); - } - - #[test] - fn panic_late_at_end() { - let mut vec: SmallVec = - smallvec![PanicOnDoubleDrop::new(), PanicOnDoubleDrop::new(),]; - let result = ::std::panic::catch_unwind(move || { - vec.insert_many(2, BadIter { hint: 3, count: 5 }); - }); - assert!(result.is_err()); - } -} - #[test] #[should_panic] fn test_invalid_grow() { @@ -863,7 +724,7 @@ fn test_from_vec() { fn test_retain() { // Test inline data storage let mut sv: SmallVec = SmallVec::from_slice(&[1, 2, 3, 3, 4]); - sv.retain(|&mut i| i != 3); + sv.retain(|&i| i != 3); assert_eq!(sv.pop(), Some(4)); assert_eq!(sv.pop(), Some(2)); assert_eq!(sv.pop(), Some(1)); @@ -871,7 +732,7 @@ fn test_retain() { // Test spilled data storage let mut sv: SmallVec = SmallVec::from_slice(&[1, 2, 3, 3, 4]); - sv.retain(|&mut i| i != 3); + sv.retain(|&i| i != 3); assert_eq!(sv.pop(), Some(4)); assert_eq!(sv.pop(), Some(2)); assert_eq!(sv.pop(), Some(1)); @@ -1050,19 +911,6 @@ fn zero_size_items() { SmallVec::<(), 0>::new().push(()); } -#[test] -fn test_insert_many_overflow() { - let mut v: SmallVec = SmallVec::new(); - v.push(123); - - // Prepare an iterator with small lower bound - let iter = (0u8..5).filter(|n| n % 2 == 0); - assert_eq!(iter.size_hint().0, 0); - - v.insert_many(0, iter); - assert_eq!(&*v, &[0, 2, 4, 123]); -} - #[test] fn test_clone_from() { let mut a: SmallVec = SmallVec::new(); From 71c8abba754b9fb5d004d4e0742fc6d0a856a223 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 5 Jun 2025 11:01:14 +1000 Subject: [PATCH 13/13] Shrink code produced by `smallvec![]`. (#385) Currently `smallvec![]` expands to this: ``` { let count = 0usize; #[allow(unused_mut)] let mut vec = ::smallvec::SmallVec::new(); if count <= vec.capacity() { vec } else { ::smallvec::SmallVec::from_vec(::alloc::vec::Vec::new()) } }; ``` This commit adds a rule to the `smallvec!` macro for the zero-length case so it instead expands to this: ``` ::smallvec::SmallVec::new() ``` The `std::vec!` macro already has a similar special case. This commit also improves the non-zero case. - It removes the `#[allow(unused_mut)]`, which was only needed for the zero-length case. - It changes the `*` repetitions to `+`. (Again, like `std::vec`.) --- src/lib.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8ab158e..a2d1b7d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2140,18 +2140,20 @@ impl Clone for IntoIter { macro_rules! smallvec { // count helper: transform any expression into 1 (@one $x:expr) => (1usize); + () => ( + $crate::SmallVec::new() + ); ($elem:expr; $n:expr) => ({ $crate::SmallVec::from_elem($elem, $n) }); - ($($x:expr),*$(,)?) => ({ - let count = 0usize $(+ $crate::smallvec!(@one $x))*; - #[allow(unused_mut)] + ($($x:expr),+$(,)?) => ({ + let count = 0usize $(+ $crate::smallvec!(@one $x))+; let mut vec = $crate::SmallVec::new(); if count <= vec.capacity() { $(vec.push($x);)* vec } else { - $crate::SmallVec::from_vec($crate::alloc::vec![$($x,)*]) + $crate::SmallVec::from_vec($crate::alloc::vec![$($x,)+]) } }); }