From ef861742ce9f98bf9d3dc2d8c61f7f170b9c74d8 Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Sat, 28 Feb 2026 00:52:15 -0500 Subject: [PATCH 1/3] Implemented DoubledEndedIterator for Ancestors<'_> + added tests and updated doc comments --- library/std/src/path.rs | 285 ++++++++++++++++++++- library/std/tests/path_ancestors.rs | 369 ++++++++++++++++++++++++++++ 2 files changed, 647 insertions(+), 7 deletions(-) create mode 100644 library/std/tests/path_ancestors.rs diff --git a/library/std/src/path.rs b/library/std/src/path.rs index bf27df7b04281..8bc7a833f461e 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -1089,6 +1089,22 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm Iterator::cmp(left, right) } +/// This is what the first component of our path is +/// +/// In previous stable versions of Ancestors<'_> +/// the last component of relative paths produces +/// an "" at the end, so we must preserve that behavior +#[derive(Copy, Clone, Debug)] +enum FirstComponent { + /// For all paths starting with `/` + AbsolutePath, + /// For paths without root path like `.`, `..`, `a/` + RelativePath, + /// For Window specific paths like (`C:`, `\\?\UNC\server\share`, + /// `\\.\COM42`, etc.) + Prefix, +} + /// An iterator over [`Path`] and its ancestors. /// /// This `struct` is created by the [`ancestors`] method on [`Path`]. @@ -1111,7 +1127,139 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm #[must_use = "iterators are lazy and do nothing unless consumed"] #[stable(feature = "path_ancestors", since = "1.28.0")] pub struct Ancestors<'a> { - next: Option<&'a Path>, + path: &'a [u8], + front: usize, + back: usize, + trailing_seps: usize, + first_comp: Option, +} + +impl<'a> Ancestors<'a> { + /// This is a helper function for consuming the physical first component in + /// either `Ancestors<'_>` `.next()` or `.next_back()`. + /// + /// There are four cases we can have here: + /// - We have an unconsumed absolute component (`/`). We should just output `/` + /// in this case (with trailing separators if this is our original path). + /// - In previous implementations of `Ancestors<'_>`, our last component + /// produced is `""`. We also need to ensure that the first component of the + /// reverse ancestor returns `""` for symmetry as well. + /// - We have an unconsumed prefix component (Windows specific, e.g. `C:`). + /// We should just return that prefix component (with trailing separators + /// if this is our original path). + /// - We don't have a start component (frequent case), which means we just + /// return `None`. + #[inline] + fn consume_first_component(&mut self, dir_front: bool) -> Option<&'a Path> { + match self.first_comp { + Some(first_comp) => { + let sliced_path: &Path; + let path_len = self.path.len(); + match first_comp { + FirstComponent::AbsolutePath => { + sliced_path = if dir_front { + self.advance_through_trailing_sep_front(); + // This won't overflow because advance_through_trailing_sep_front() + // stops where `self.back` is at (and we know `self.back` max value is + // `path_len` - `self.trailing_seps`) + if self.front + self.trailing_seps == path_len { + // SAFETY: This contains the whole original path + unsafe { Path::from_u8_slice(&self.path[0..path_len]) } + } else { + Path::new("/") + } + } else { + if self.back + self.trailing_seps == path_len { + // SAFETY: This contains the whole original path + unsafe { Path::from_u8_slice(&self.path[0..path_len]) } + } else { + Path::new("/") + } + }; + } + FirstComponent::RelativePath => { + sliced_path = Path::new(""); + } + FirstComponent::Prefix => { + if dir_front { + let curr_front = self.front; + self.advance_through_trailing_sep_front(); + // SAFETY: We either get the original path + // or slice at an ascii separator byte + sliced_path = unsafe { + if self.front + self.trailing_seps == path_len { + Path::from_u8_slice(&self.path[0..path_len]) + } else { + Path::from_u8_slice(&self.path[0..curr_front]) + } + }; + } else { + sliced_path = unsafe { + if self.back + self.trailing_seps == path_len { + Path::from_u8_slice(&self.path[0..path_len]) + } else { + Path::from_u8_slice(&self.path[0..self.back]) + } + }; + } + } + } + self.first_comp = None; + return Some(Path::new(sliced_path)); + } + None => return None, + } + } + + /// Skip any trailing separators in the forward direction + #[inline] + fn advance_through_trailing_sep_front(&mut self) { + loop { + if self.front == self.back || !is_sep_byte(self.path[self.front]) { + break; + } + self.front += 1; + } + } + + /// Skip any trailing separators in the backward direction + #[inline] + fn advance_through_trailing_sep_back(&mut self) { + loop { + if self.back == self.front || !is_sep_byte(self.path[self.back - 1]) { + break; + } + self.back -= 1; + } + } + + /// Increments our front pointer until we find the + /// next separator byte or have reached the component + /// that back index is pointing at + #[inline] + fn find_next_separator_front(&mut self) { + while self.front < self.back { + if is_sep_byte(self.path[self.front]) { + self.front += 1; + break; + } + self.front += 1; + } + } + + /// Decrements our back pointer until we find the + /// next separator byte or have reached the component + /// that front index is pointing to + #[inline] + fn find_next_separator_back(&mut self) { + while self.back > self.front { + if is_sep_byte(self.path[self.back - 1]) { + self.back -= 1; + break; + } + self.back -= 1; + } + } } #[stable(feature = "path_ancestors", since = "1.28.0")] @@ -1120,9 +1268,74 @@ impl<'a> Iterator for Ancestors<'a> { #[inline] fn next(&mut self) -> Option { - let next = self.next; - self.next = next.and_then(Path::parent); - next + // We reach here when we no longer have anymore paths + // to consume, we're dealing with relative paths and + // need to output "", or we need to output Prefix component + if self.back <= self.front { + return self.consume_first_component(false); + } + + let path_len = self.path.len(); + // Our current `self.back` index at this point encompasses + // the parent path + let curr_back = self.back; + + // We trace our `self.back` idx up the path until we reach a + // separator byte. This prepares the path we return on the next + // call to this function. + self.find_next_separator_back(); + // Skip trailing seps + self.advance_through_trailing_sep_back(); + + // The first path our back pointer must return is the original path + if curr_back + self.trailing_seps == path_len { + // SAFETY: This contains the whole original path + let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..path_len]) }; + return Some(Path::new(sliced_path)); + } + + // SAFETY: Our curr_back index is always stationed at an ascii separator byte + // so our u8 slice will always contain a valid path + let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..curr_back]) }; + // We don't have to trim separator here because it's excluded by 0..curr_back + Some(Path::new(sliced_path)) + } +} + +#[stable(feature = "reverse_ancestors", since = "CURRENT_RUSTC_VERSION")] +impl<'a> DoubleEndedIterator for Ancestors<'a> { + #[inline] + fn next_back(&mut self) -> Option { + // We reach this case when we no longer have anymore paths + // to consume (return `None`), or if our front idx was initially + // equal to back idx (e.g. if we had `C:`, `.`, `/`) + if self.front >= self.back { + return self.consume_first_component(true); + } + + // Consume our first component if we haven't already. + if let Some(sliced_path) = self.consume_first_component(true) { + return Some(sliced_path); + } + + let path_len = self.path.len(); + // We trace our `self.front` idx down the path until + // we hit a separator. + self.find_next_separator_front(); + // Skip trailing seps + self.advance_through_trailing_sep_front(); + + // The last path front must return is the original path + if self.front + self.trailing_seps == path_len { + // SAFETY: This contains the whole original path + let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..path_len]) }; + return Some(Path::new(sliced_path)); + } + + // SAFETY: Our front index always stops at an ascii separator byte + // so our u8 slice will always contain a valid path + let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..self.front]) }; + Some(Path::new(sliced_path).trim_trailing_sep()) } } @@ -2616,11 +2829,13 @@ impl Path { /// Produces an iterator over `Path` and its ancestors. /// - /// The iterator will yield the `Path` that is returned if the [`parent`] method is used zero - /// or more times. If the [`parent`] method returns [`None`], the iterator will do likewise. /// The iterator will always yield at least one value, namely `Some(&self)`. Next it will yield /// `&self.parent()`, `&self.parent().and_then(Path::parent)` and so on. /// + /// The iterator also allows you to yield `Path`(s) in the forward direction using + /// `.next_back()` or `.rev().next()`. It will always be symmetrical with the `.next()` + /// direction. + /// /// # Examples /// /// ``` @@ -2640,11 +2855,67 @@ impl Path { /// assert_eq!(ancestors.next(), None); /// ``` /// + /// ``` + /// use std::path::Path; + /// + /// let mut ancestors = Path::new("/foo/bar").ancestors(); + /// assert_eq!(ancestors.next_back(), Some(Path::new("/"))); + /// assert_eq!(ancestors.next_back(), Some(Path::new("/foo"))); + /// assert_eq!(ancestors.next_back(), Some(Path::new("/foo/bar"))); + /// assert_eq!(ancestors.next_back(), None); + /// + /// let mut ancestors = Path::new("../foo/bar").ancestors(); + /// assert_eq!(ancestors.next_back(), Some(Path::new(""))); + /// assert_eq!(ancestors.next_back(), Some(Path::new(".."))); + /// assert_eq!(ancestors.next_back(), Some(Path::new("../foo"))); + /// assert_eq!(ancestors.next_back(), Some(Path::new("../foo/bar"))); + /// assert_eq!(ancestors.next_back(), None); + /// ``` + /// /// [`parent`]: Path::parent #[stable(feature = "path_ancestors", since = "1.28.0")] #[inline] pub fn ancestors(&self) -> Ancestors<'_> { - Ancestors { next: Some(&self) } + let os_str_path = self.as_os_str(); + let path_bytes = os_str_path.as_encoded_bytes(); + let path_len = path_bytes.len(); + let trailing_seps = if self.has_trailing_sep() { + // this won't panic because "" does not have + // a trailing separator + let mut idx = path_len; + while idx > 0 { + if !is_sep_byte(path_bytes[idx - 1]) { + break; + } + idx -= 1; + } + path_len - idx + } else { + 0 + }; + + // Windows specific component + let prefix = parse_prefix(os_str_path); + let prefix_exist = prefix.map(|_| true).unwrap_or(false); + + // Parse what our start component, which is needed in cases where + // `self.front` == `self.back`, or we're dealing with symmetry with + // relative path on returning `""` at the start/end of an iterator + let first_comp = if prefix_exist { + Some(FirstComponent::Prefix) + } else if self.is_relative() { + Some(FirstComponent::RelativePath) + } else { + Some(FirstComponent::AbsolutePath) + }; + + // If we have a prefix, we encode that index into front + let front = prefix.map(|prefix| prefix.len()).unwrap_or(0); + // Set our back pointer to the last separator byte (without trailing) + // or last byte + let back = path_len - trailing_seps; + + Ancestors { path: path_bytes, front, back, trailing_seps, first_comp } } /// Returns the final component of the `Path`, if there is one. diff --git a/library/std/tests/path_ancestors.rs b/library/std/tests/path_ancestors.rs new file mode 100644 index 0000000000000..d26ffe03dcf96 --- /dev/null +++ b/library/std/tests/path_ancestors.rs @@ -0,0 +1,369 @@ +use std::ffi::OsStr; +use std::path::Path; + +#[test] +fn empty_path_ancestors() { + let path = Path::new(""); + + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); +} + +#[test] +fn curr_dir_only_path_ancestors() { + let path = Path::new("."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(".")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new(".")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn curr_dir_only_path_ancestors_rev() { + let path = Path::new("."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(".")); + // next_back() should only see "" leftover + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + // We have consumed "." and "", we should only observe + // `None` being returned from either end + assert_eq!(ancestors.next(), None); + assert_eq!(ancestors.next_back(), None); + + // operates like next_back() + let mut rev_ancestors = path.ancestors().rev(); + assert_eq!(rev_ancestors.next().unwrap().as_os_str(), OsStr::new("")); + + // operates like next() + let mut rev_ancestors = rev_ancestors.rev(); + assert_eq!(rev_ancestors.next().unwrap().as_os_str(), OsStr::new(".")); + + // fully consumed, should return None + let mut rev_ancestors = rev_ancestors.rev(); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn curr_dir_only_path_ancestors_rev_2() { + let path = Path::new("."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + // next_back() should only see "." leftover + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(".")); + // We have consumed "" and ".", we should only observe + // `None` being returned from either end + assert_eq!(ancestors.next(), None); + assert_eq!(ancestors.next_back(), None); + + // operates like next() + let mut rev_ancestors = path.ancestors().rev(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new(".")); + + // operates like next_back() + let mut rev_ancestors = rev_ancestors.rev(); + assert_eq!(rev_ancestors.next().unwrap().as_os_str(), OsStr::new("")); + + // fully consumed, should return None + let mut rev_ancestors = rev_ancestors.rev(); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn single_letter_path_ancestors() { + let path = Path::new("a"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("a")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("a")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn single_letter_trailing_path_ancestors() { + let path = Path::new("a/"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("a/")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("a/")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn curr_dir_relative_path_ancestors() { + let path = Path::new("./foo/bar"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("./foo/bar")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("./foo")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(".")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new(".")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("./foo")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("./foo/bar")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn parent_dir_only_path_ancestors() { + let path = Path::new(".."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("..")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("..")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn parent_dir_relative_path_ancestors() { + let path = Path::new("../foo/bar/"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("../foo/bar/")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("../foo")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("..")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("..")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("../foo")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("../foo/bar/")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn relative_path_ancestors() { + let path = Path::new("foo/bar/baz/"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("foo/bar/baz/")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("foo/bar")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("foo")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("foo")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("foo/bar")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("foo/bar/baz/")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn one_letter_relative_path_ancestors() { + let path = Path::new("a/"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("a/")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("a/")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn root_dir_only_path_ancestors() { + let path = Path::new("/"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn root_dir_trailing_path_ancestors() { + let path = Path::new("////"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("////")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("////")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn absolute_path_ancestors() { + let path = Path::new("/foo/bar/"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo/bar/")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo/bar/")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn absolute_with_in_between_trailing_seps_path_ancestors() { + let path = Path::new("/foo/////bar/"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo/////bar/")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo/////bar/")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn absolute_rev_path_ancestors() { + let path = Path::new("/foo/bar/baz/"); + let mut ancestors = path.ancestors(); + + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo/bar/baz/")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo/bar")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo")); + assert_eq!(ancestors.next(), None); // Fully consumed + assert_eq!(ancestors.next_back(), None); // Fully consumed +} + +#[cfg(windows)] +#[test] +fn verbatim_prefix_component_path_ancestors() { + let path = Path::new(r"\\\\?\\UNC\\server\\share\\.."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\\\?\\UNC\\server\\share\\..")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\\\?\\UNC\\server\\share\\")); + assert_eq!(ancestors.next(), None); + + let mut ancestors = path.ancestors(); + assert_eq!( + ancestors.next_back().unwrap().as_os_str(), + OsStr::new(r"\\\\?\\UNC\\server\\share\\") + ); + assert_eq!( + ancestors.next_back().unwrap().as_os_str(), + OsStr::new(r"\\\\?\\UNC\\server\\share\\..") + ); + assert_eq!(ancestors.next_back(), None); +} + +#[cfg(windows)] +#[test] +fn verbatim_unc_prefix_component_path_ancestors() { + let path = Path::new(r"\\?\pictures\kittens"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\?\pictures\kittens")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\?\pictures")); + assert_eq!(ancestors.next(), None); + + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"\\?\pictures\")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"\\?\pictures\kittens")); + assert_eq!(ancestors.next_back(), None); +} + +#[cfg(windows)] +#[test] +fn verbatim_disk_prefix_component_path_ancestors() { + let path = Path::new(r"\\?\c:\Test"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\?\c:\Test")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\?\c:\")); + assert_eq!(ancestors.next(), None); + + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"\\?\c:\")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"\\?\c:\Test")); + assert_eq!(ancestors.next_back(), None); +} + +#[cfg(windows)] +#[test] +fn device_ns_prefix_component_path_ancestors() { + // No this will not execute notepad.exe + let path = Path::new(r"\\.\c:\Windows\System32\notepad.exe"); + let mut ancestors = path.ancestors(); + assert_eq!( + ancestors.next().unwrap().as_os_str(), + OsStr::new(r"\\.\c:\Windows\System32\notepad.exe") + ); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\.\c:\Windows\System32")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\.\c:\Windows")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\.\c:\")); + assert_eq!(ancestors.next(), None); + + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"\\.\c:")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"\\.\c:\Windows")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"\\.\c:\Windows\System32")); + assert_eq!( + ancestors.next_back().unwrap().as_os_str(), + OsStr::new(r"\\.\c:\Windows\System32\notepad.exe") + ); + assert_eq!(ancestors.next_back(), None); +} + +#[cfg(windows)] +#[test] +fn unc_prefix_component_path_ancestors() { + let path = Path::new(r"\\server\share\test"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\server\share\test")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"\\server\share")); + assert_eq!(ancestors.next(), None); + + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"\\server\share")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"\\server\share\test")); + assert_eq!(ancestors.next_back(), None); +} + +#[cfg(windows)] +#[test] +fn disk_prefix_component_path_ancestors() { + let path = Path::new(r"C:a\..\.."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"C:a\..\..")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"C:a\..")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"C:a")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(r"C:")); + assert_eq!(ancestors.next(), None); + + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"C:")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"C:a")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"C:a\..")); + assert_eq!(ancestors.next_back().unwrap().as_os_str(), OsStr::new(r"C:a\..\..")); + assert_eq!(ancestors.next_back(), None); +} From 1a0f4372ea405bb56f4a4d38596c87b4361ecde5 Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Tue, 24 Mar 2026 20:41:57 -0400 Subject: [PATCH 2/3] Add normalization for current directory components (trailing and non-trailing) in Ancestors<'_> --- library/std/src/path.rs | 168 ++++++++++++++++++++++++---- library/std/tests/path_ancestors.rs | 90 +++++++++++++++ 2 files changed, 239 insertions(+), 19 deletions(-) diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 8bc7a833f461e..555b8f14874d0 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -1211,31 +1211,116 @@ impl<'a> Ancestors<'a> { } } - /// Skip any trailing separators in the forward direction + /// Normalizes away trailing separators and current directory ('.') components + /// in the forward direction. #[inline] fn advance_through_trailing_sep_front(&mut self) { + // `Some(false)` is used to denote that + // we haven't seen a '.' component *yet*, + // `Some(true)` means we have seen a '.' component, + // and `None` means that the component is not '.' + let mut curr_dir = Some(false); + // We rebound to the original index for path components + // like '..' or 'abc.' + let mut rebound_ind: Option = None; loop { - if self.front == self.back || !is_sep_byte(self.path[self.front]) { + if self.front == self.back { + if let Some(front_ind) = rebound_ind { + self.front = front_ind; + } break; } + + if is_sep_byte(self.path[self.front]) { + if let Some(curr_dir_present) = curr_dir + && curr_dir_present + { + curr_dir = Some(false); + rebound_ind = None; + } + } else { + if self.path[self.front] == b'.' { + if let Some(curr_dir_present) = curr_dir { + if !curr_dir_present { + curr_dir = Some(true); + rebound_ind = Some(self.front); + } else { + curr_dir = None; + } + } else { + if let Some(front_ind) = rebound_ind { + self.front = front_ind; + } + break; + } + } else { + if let Some(front_ind) = rebound_ind { + self.front = front_ind; + } + break; + } + } + self.front += 1; } } - /// Skip any trailing separators in the backward direction + /// Normalizes away trailing separators and current directory ('.') components + /// in the backward direction #[inline] fn advance_through_trailing_sep_back(&mut self) { + // `Some(false)` is used to denote that + // we haven't seen a '.' component *yet*, + // `Some(true)` means we have seen a '.' component, + // and `None` means that the component is not '.' + let mut curr_dir = Some(false); + // We rebound to the original index for path components + // like '..' or 'abc.' + let mut rebound_ind: Option = None; loop { - if self.back == self.front || !is_sep_byte(self.path[self.back - 1]) { + if self.back == self.front { + if let Some(back_ind) = rebound_ind { + self.back = back_ind; + } break; } + + if is_sep_byte(self.path[self.back - 1]) { + if let Some(curr_dir_present) = curr_dir + && curr_dir_present + { + curr_dir = Some(false); + rebound_ind = None; + } + } else { + if self.path[self.back - 1] == b'.' { + if let Some(curr_dir_present) = curr_dir { + if !curr_dir_present { + curr_dir = Some(true); + rebound_ind = Some(self.back); + } else { + curr_dir = None; + } + } else { + if let Some(back_ind) = rebound_ind { + self.back = back_ind; + } + break; + } + } else { + if let Some(back_ind) = rebound_ind { + self.back = back_ind; + } + break; + } + } self.back -= 1; } } /// Increments our front pointer until we find the /// next separator byte or have reached the component - /// that back index is pointing at + /// that back index is pointing at. #[inline] fn find_next_separator_front(&mut self) { while self.front < self.back { @@ -1249,7 +1334,7 @@ impl<'a> Ancestors<'a> { /// Decrements our back pointer until we find the /// next separator byte or have reached the component - /// that front index is pointing to + /// that front index is pointing to. #[inline] fn find_next_separator_back(&mut self) { while self.back > self.front { @@ -1284,7 +1369,8 @@ impl<'a> Iterator for Ancestors<'a> { // separator byte. This prepares the path we return on the next // call to this function. self.find_next_separator_back(); - // Skip trailing seps + // Normalizes trailing seps and curr dirs in preparation for + // next front component self.advance_through_trailing_sep_back(); // The first path our back pointer must return is the original path @@ -1322,7 +1408,11 @@ impl<'a> DoubleEndedIterator for Ancestors<'a> { // We trace our `self.front` idx down the path until // we hit a separator. self.find_next_separator_front(); - // Skip trailing seps + // In case paths like "././././a", we just want the first + // '.' path and normalize the rest away + let curr_front = self.front; + // Normalizes trailing seps and curr dirs in preparation for + // next front component self.advance_through_trailing_sep_front(); // The last path front must return is the original path @@ -1334,7 +1424,7 @@ impl<'a> DoubleEndedIterator for Ancestors<'a> { // SAFETY: Our front index always stops at an ascii separator byte // so our u8 slice will always contain a valid path - let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..self.front]) }; + let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..curr_front]) }; Some(Path::new(sliced_path).trim_trailing_sep()) } } @@ -2876,23 +2966,63 @@ impl Path { #[stable(feature = "path_ancestors", since = "1.28.0")] #[inline] pub fn ancestors(&self) -> Ancestors<'_> { - let os_str_path = self.as_os_str(); - let path_bytes = os_str_path.as_encoded_bytes(); - let path_len = path_bytes.len(); - let trailing_seps = if self.has_trailing_sep() { + /// Normalizes the trailing portion of given path + /// and returns the number of bytes that it occupied + #[inline] + fn trailing_path_length(path_bytes: &[u8]) -> usize { + let path_len = path_bytes.len(); // this won't panic because "" does not have // a trailing separator let mut idx = path_len; + + // `Some(false)` is used to denote that + // we haven't seen a '.' component *yet*, + // `Some(true)` means we have seen a '.' component, + // and `None` means that the component is not '.' + let mut curr_dir = false; + // We rebound to the original index for path components + // like '..' or 'abc.' + let mut rebound_idx: Option = None; while idx > 0 { - if !is_sep_byte(path_bytes[idx - 1]) { - break; + if is_sep_byte(path_bytes[idx - 1]) { + if curr_dir { + rebound_idx = None; + curr_dir = false; + } + } else { + if path_bytes[idx - 1] == b'.' { + if !curr_dir { + rebound_idx = Some(idx); + curr_dir = true; + } else { + if let Some(r_idx) = rebound_idx { + curr_dir = false; + idx = r_idx; + } + break; + } + } else { + if let Some(r_idx) = rebound_idx { + curr_dir = false; + idx = r_idx; + } + break; + } } idx -= 1; } + + // If our path is `./a/b/c`, this `.` is not normalized + // away because it's treated as its own component + if curr_dir { + idx += 1; + } path_len - idx - } else { - 0 - }; + } + + let os_str_path = self.as_os_str(); + let path_bytes = os_str_path.as_encoded_bytes(); + let trailing_seps = trailing_path_length(path_bytes); // Windows specific component let prefix = parse_prefix(os_str_path); @@ -2913,7 +3043,7 @@ impl Path { let front = prefix.map(|prefix| prefix.len()).unwrap_or(0); // Set our back pointer to the last separator byte (without trailing) // or last byte - let back = path_len - trailing_seps; + let back = path_bytes.len() - trailing_seps; Ancestors { path: path_bytes, front, back, trailing_seps, first_comp } } diff --git a/library/std/tests/path_ancestors.rs b/library/std/tests/path_ancestors.rs index d26ffe03dcf96..697c3910e3a79 100644 --- a/library/std/tests/path_ancestors.rs +++ b/library/std/tests/path_ancestors.rs @@ -124,6 +124,26 @@ fn curr_dir_relative_path_ancestors() { assert_eq!(rev_ancestors.next_back(), None); } +#[test] +fn multiple_curr_dir_relative_path_ancestors() { + let path = Path::new("././././baz/beam/boo"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("././././baz/beam/boo")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("././././baz/beam")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("././././baz")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new(".")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new(".")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("././././baz")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("././././baz/beam")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("././././baz/beam/boo")); + assert_eq!(rev_ancestors.next_back(), None); +} + #[test] fn parent_dir_only_path_ancestors() { let path = Path::new(".."); @@ -228,6 +248,44 @@ fn absolute_path_ancestors() { assert_eq!(rev_ancestors.next_back(), None); } +#[test] +fn absolute_path_with_curr_dir_path_ancestors() { + let path = Path::new("/."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/.")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/.")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn absolute_path_with_trailing_curr_dir_path_ancestors() { + let path = Path::new("/./././././."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/./././././.")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/./././././.")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn absolute_path_with_parent_dir_path_ancestors() { + let path = Path::new("/.."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/..")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/..")); + assert_eq!(rev_ancestors.next_back(), None); +} + #[test] fn absolute_with_in_between_trailing_seps_path_ancestors() { let path = Path::new("/foo/////bar/"); @@ -244,6 +302,38 @@ fn absolute_with_in_between_trailing_seps_path_ancestors() { assert_eq!(rev_ancestors.next_back(), None); } +#[test] +fn absolute_curr_dir_and_trailing_seps_path_ancestors() { + let path = Path::new("/foo/bar/./././."); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo/bar/./././.")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo/bar/./././.")); + assert_eq!(rev_ancestors.next_back(), None); +} + +#[test] +fn absolute_curr_dir_and_in_between_trailing_seps_path_ancestors() { + let path = Path::new("/foo////.////bar"); + let mut ancestors = path.ancestors(); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo////.////bar")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/foo")); + assert_eq!(ancestors.next().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(ancestors.next(), None); + + let mut rev_ancestors = path.ancestors(); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo")); + assert_eq!(rev_ancestors.next_back().unwrap().as_os_str(), OsStr::new("/foo////.////bar")); + assert_eq!(rev_ancestors.next_back(), None); +} + #[test] fn absolute_rev_path_ancestors() { let path = Path::new("/foo/bar/baz/"); From 4a9b6e9b23180ffa10e1eff4162ec6cc1402e627 Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Mon, 27 Apr 2026 12:41:25 -0400 Subject: [PATCH 3/3] Implement DoubleEndedIterator trait for Ancestors<'_> using Path::components --- library/std/src/path.rs | 439 ++++++++-------------------------------- 1 file changed, 79 insertions(+), 360 deletions(-) diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 555b8f14874d0..f8e8ab0c56725 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -604,7 +604,7 @@ impl AsRef for Component<'_> { /// ``` /// /// [`components`]: Path::components -#[derive(Clone)] +#[derive(Copy, Clone)] #[must_use = "iterators are lazy and do nothing unless consumed"] #[stable(feature = "rust1", since = "1.0.0")] pub struct Components<'a> { @@ -1089,22 +1089,6 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm Iterator::cmp(left, right) } -/// This is what the first component of our path is -/// -/// In previous stable versions of Ancestors<'_> -/// the last component of relative paths produces -/// an "" at the end, so we must preserve that behavior -#[derive(Copy, Clone, Debug)] -enum FirstComponent { - /// For all paths starting with `/` - AbsolutePath, - /// For paths without root path like `.`, `..`, `a/` - RelativePath, - /// For Window specific paths like (`C:`, `\\?\UNC\server\share`, - /// `\\.\COM42`, etc.) - Prefix, -} - /// An iterator over [`Path`] and its ancestors. /// /// This `struct` is created by the [`ancestors`] method on [`Path`]. @@ -1128,223 +1112,11 @@ enum FirstComponent { #[stable(feature = "path_ancestors", since = "1.28.0")] pub struct Ancestors<'a> { path: &'a [u8], - front: usize, - back: usize, + components: Components<'a>, + front_bytes: usize, + back_bytes: usize, trailing_seps: usize, - first_comp: Option, -} - -impl<'a> Ancestors<'a> { - /// This is a helper function for consuming the physical first component in - /// either `Ancestors<'_>` `.next()` or `.next_back()`. - /// - /// There are four cases we can have here: - /// - We have an unconsumed absolute component (`/`). We should just output `/` - /// in this case (with trailing separators if this is our original path). - /// - In previous implementations of `Ancestors<'_>`, our last component - /// produced is `""`. We also need to ensure that the first component of the - /// reverse ancestor returns `""` for symmetry as well. - /// - We have an unconsumed prefix component (Windows specific, e.g. `C:`). - /// We should just return that prefix component (with trailing separators - /// if this is our original path). - /// - We don't have a start component (frequent case), which means we just - /// return `None`. - #[inline] - fn consume_first_component(&mut self, dir_front: bool) -> Option<&'a Path> { - match self.first_comp { - Some(first_comp) => { - let sliced_path: &Path; - let path_len = self.path.len(); - match first_comp { - FirstComponent::AbsolutePath => { - sliced_path = if dir_front { - self.advance_through_trailing_sep_front(); - // This won't overflow because advance_through_trailing_sep_front() - // stops where `self.back` is at (and we know `self.back` max value is - // `path_len` - `self.trailing_seps`) - if self.front + self.trailing_seps == path_len { - // SAFETY: This contains the whole original path - unsafe { Path::from_u8_slice(&self.path[0..path_len]) } - } else { - Path::new("/") - } - } else { - if self.back + self.trailing_seps == path_len { - // SAFETY: This contains the whole original path - unsafe { Path::from_u8_slice(&self.path[0..path_len]) } - } else { - Path::new("/") - } - }; - } - FirstComponent::RelativePath => { - sliced_path = Path::new(""); - } - FirstComponent::Prefix => { - if dir_front { - let curr_front = self.front; - self.advance_through_trailing_sep_front(); - // SAFETY: We either get the original path - // or slice at an ascii separator byte - sliced_path = unsafe { - if self.front + self.trailing_seps == path_len { - Path::from_u8_slice(&self.path[0..path_len]) - } else { - Path::from_u8_slice(&self.path[0..curr_front]) - } - }; - } else { - sliced_path = unsafe { - if self.back + self.trailing_seps == path_len { - Path::from_u8_slice(&self.path[0..path_len]) - } else { - Path::from_u8_slice(&self.path[0..self.back]) - } - }; - } - } - } - self.first_comp = None; - return Some(Path::new(sliced_path)); - } - None => return None, - } - } - - /// Normalizes away trailing separators and current directory ('.') components - /// in the forward direction. - #[inline] - fn advance_through_trailing_sep_front(&mut self) { - // `Some(false)` is used to denote that - // we haven't seen a '.' component *yet*, - // `Some(true)` means we have seen a '.' component, - // and `None` means that the component is not '.' - let mut curr_dir = Some(false); - // We rebound to the original index for path components - // like '..' or 'abc.' - let mut rebound_ind: Option = None; - loop { - if self.front == self.back { - if let Some(front_ind) = rebound_ind { - self.front = front_ind; - } - break; - } - - if is_sep_byte(self.path[self.front]) { - if let Some(curr_dir_present) = curr_dir - && curr_dir_present - { - curr_dir = Some(false); - rebound_ind = None; - } - } else { - if self.path[self.front] == b'.' { - if let Some(curr_dir_present) = curr_dir { - if !curr_dir_present { - curr_dir = Some(true); - rebound_ind = Some(self.front); - } else { - curr_dir = None; - } - } else { - if let Some(front_ind) = rebound_ind { - self.front = front_ind; - } - break; - } - } else { - if let Some(front_ind) = rebound_ind { - self.front = front_ind; - } - break; - } - } - - self.front += 1; - } - } - - /// Normalizes away trailing separators and current directory ('.') components - /// in the backward direction - #[inline] - fn advance_through_trailing_sep_back(&mut self) { - // `Some(false)` is used to denote that - // we haven't seen a '.' component *yet*, - // `Some(true)` means we have seen a '.' component, - // and `None` means that the component is not '.' - let mut curr_dir = Some(false); - // We rebound to the original index for path components - // like '..' or 'abc.' - let mut rebound_ind: Option = None; - loop { - if self.back == self.front { - if let Some(back_ind) = rebound_ind { - self.back = back_ind; - } - break; - } - - if is_sep_byte(self.path[self.back - 1]) { - if let Some(curr_dir_present) = curr_dir - && curr_dir_present - { - curr_dir = Some(false); - rebound_ind = None; - } - } else { - if self.path[self.back - 1] == b'.' { - if let Some(curr_dir_present) = curr_dir { - if !curr_dir_present { - curr_dir = Some(true); - rebound_ind = Some(self.back); - } else { - curr_dir = None; - } - } else { - if let Some(back_ind) = rebound_ind { - self.back = back_ind; - } - break; - } - } else { - if let Some(back_ind) = rebound_ind { - self.back = back_ind; - } - break; - } - } - self.back -= 1; - } - } - - /// Increments our front pointer until we find the - /// next separator byte or have reached the component - /// that back index is pointing at. - #[inline] - fn find_next_separator_front(&mut self) { - while self.front < self.back { - if is_sep_byte(self.path[self.front]) { - self.front += 1; - break; - } - self.front += 1; - } - } - - /// Decrements our back pointer until we find the - /// next separator byte or have reached the component - /// that front index is pointing to. - #[inline] - fn find_next_separator_back(&mut self) { - while self.back > self.front { - if is_sep_byte(self.path[self.back - 1]) { - self.back -= 1; - break; - } - self.back -= 1; - } - } + is_relative: bool, } #[stable(feature = "path_ancestors", since = "1.28.0")] @@ -1353,38 +1125,42 @@ impl<'a> Iterator for Ancestors<'a> { #[inline] fn next(&mut self) -> Option { + let path_len = self.path.len(); // We reach here when we no longer have anymore paths // to consume, we're dealing with relative paths and - // need to output "", or we need to output Prefix component - if self.back <= self.front { - return self.consume_first_component(false); + // need to output "" + if self.front_bytes + self.back_bytes >= path_len - self.trailing_seps { + if self.is_relative { + self.is_relative = false; + return Some(Path::new("")); + } + return None; } - let path_len = self.path.len(); - // Our current `self.back` index at this point encompasses - // the parent path - let curr_back = self.back; - - // We trace our `self.back` idx up the path until we reach a - // separator byte. This prepares the path we return on the next - // call to this function. - self.find_next_separator_back(); - // Normalizes trailing seps and curr dirs in preparation for - // next front component - self.advance_through_trailing_sep_back(); - - // The first path our back pointer must return is the original path - if curr_back + self.trailing_seps == path_len { + // `Ancestors::next` presents the current path + let curr_back_bytes = self.back_bytes; + let curr_path_bytes = self.components.as_path().as_u8_slice(); + + // Consume back component + self.components.next_back(); + let next_path_bytes = self.components.as_path().as_u8_slice(); + self.back_bytes += curr_path_bytes.len() - next_path_bytes.len(); + + // `Ancestors::next` first path to present will always be the entire + // path untrimmed + if curr_back_bytes == 0 { // SAFETY: This contains the whole original path let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..path_len]) }; return Some(Path::new(sliced_path)); } - // SAFETY: Our curr_back index is always stationed at an ascii separator byte - // so our u8 slice will always contain a valid path - let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..curr_back]) }; - // We don't have to trim separator here because it's excluded by 0..curr_back - Some(Path::new(sliced_path)) + let back_ind = path_len - self.trailing_seps - curr_back_bytes; + // SAFETY: Traversing through component should stop at a valid separator byte + // so this should always be a valid u8 slice + let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..back_ind]) }; + // we use `Path::components` here instead of `Path::trim_trailing_seps` because + // the latter method does not normalize curr dir components (i.e. "/foo////.////bar") + Some(Path::components(sliced_path).as_path()) } } @@ -1392,40 +1168,52 @@ impl<'a> Iterator for Ancestors<'a> { impl<'a> DoubleEndedIterator for Ancestors<'a> { #[inline] fn next_back(&mut self) -> Option { - // We reach this case when we no longer have anymore paths - // to consume (return `None`), or if our front idx was initially - // equal to back idx (e.g. if we had `C:`, `.`, `/`) - if self.front >= self.back { - return self.consume_first_component(true); + let path_len = self.path.len(); + // We reach here when we no longer have anymore paths + // to consume, we're dealing with relative paths and + // need to output "" + if self.front_bytes + self.back_bytes >= path_len - self.trailing_seps { + // This is needed for mixing `Ancestors::next`/`Ancestors::next_back` + // on "." directory + if self.is_relative { + self.is_relative = false; + return Some(Path::new("")); + } + return None; } - // Consume our first component if we haven't already. - if let Some(sliced_path) = self.consume_first_component(true) { - return Some(sliced_path); + // If path is relative, the first path that `Ancestors::next_back` + // produce is an empty path + if self.is_relative { + self.is_relative = false; + return Some(Path::new("")); } - let path_len = self.path.len(); - // We trace our `self.front` idx down the path until - // we hit a separator. - self.find_next_separator_front(); - // In case paths like "././././a", we just want the first - // '.' path and normalize the rest away - let curr_front = self.front; - // Normalizes trailing seps and curr dirs in preparation for - // next front component - self.advance_through_trailing_sep_front(); - - // The last path front must return is the original path - if self.front + self.trailing_seps == path_len { + // `Ancestors::next_back` presents the path given by the next consumed + // components + let curr_path_bytes = self.components.as_path().as_u8_slice(); + // Consume front component + self.components.next(); + let next_path_bytes = self.components.as_path().as_u8_slice(); + + // We add up how many bytes we have read between curr_path_bytes and + // next_path_bytes to check if our `Ancestors::next_back` is presenting + // the last path component (in which case we need to present the whole + // path untrimmed) + self.front_bytes += curr_path_bytes.len() - next_path_bytes.len(); + if self.front_bytes == path_len - self.trailing_seps { + self.front_bytes += self.trailing_seps; // SAFETY: This contains the whole original path let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..path_len]) }; return Some(Path::new(sliced_path)); } - // SAFETY: Our front index always stops at an ascii separator byte - // so our u8 slice will always contain a valid path - let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..curr_front]) }; - Some(Path::new(sliced_path).trim_trailing_sep()) + // SAFETY: Traversing through component should stop at a valid separator byte + // so this should always be a valid u8 slice + let sliced_path = unsafe { Path::from_u8_slice(&self.path[0..self.front_bytes]) }; + // we use `Path::components` here instead of `Path::trim_trailing_seps` because + // the latter method does not normalize curr dir components (i.e. "/foo////.////bar") + Some(Path::components(sliced_path).as_path()) } } @@ -2966,86 +2754,17 @@ impl Path { #[stable(feature = "path_ancestors", since = "1.28.0")] #[inline] pub fn ancestors(&self) -> Ancestors<'_> { - /// Normalizes the trailing portion of given path - /// and returns the number of bytes that it occupied - #[inline] - fn trailing_path_length(path_bytes: &[u8]) -> usize { - let path_len = path_bytes.len(); - // this won't panic because "" does not have - // a trailing separator - let mut idx = path_len; - - // `Some(false)` is used to denote that - // we haven't seen a '.' component *yet*, - // `Some(true)` means we have seen a '.' component, - // and `None` means that the component is not '.' - let mut curr_dir = false; - // We rebound to the original index for path components - // like '..' or 'abc.' - let mut rebound_idx: Option = None; - while idx > 0 { - if is_sep_byte(path_bytes[idx - 1]) { - if curr_dir { - rebound_idx = None; - curr_dir = false; - } - } else { - if path_bytes[idx - 1] == b'.' { - if !curr_dir { - rebound_idx = Some(idx); - curr_dir = true; - } else { - if let Some(r_idx) = rebound_idx { - curr_dir = false; - idx = r_idx; - } - break; - } - } else { - if let Some(r_idx) = rebound_idx { - curr_dir = false; - idx = r_idx; - } - break; - } - } - idx -= 1; - } - - // If our path is `./a/b/c`, this `.` is not normalized - // away because it's treated as its own component - if curr_dir { - idx += 1; - } - path_len - idx + let path = self.as_os_str().as_encoded_bytes(); + let trailing_seps = path.len() - self.components().as_path().as_u8_slice().len(); + let is_relative = self.is_relative(); + Ancestors { + path, + components: self.components(), + front_bytes: 0, + back_bytes: 0, + trailing_seps, + is_relative, } - - let os_str_path = self.as_os_str(); - let path_bytes = os_str_path.as_encoded_bytes(); - let trailing_seps = trailing_path_length(path_bytes); - - // Windows specific component - let prefix = parse_prefix(os_str_path); - let prefix_exist = prefix.map(|_| true).unwrap_or(false); - - // Parse what our start component, which is needed in cases where - // `self.front` == `self.back`, or we're dealing with symmetry with - // relative path on returning `""` at the start/end of an iterator - let first_comp = if prefix_exist { - Some(FirstComponent::Prefix) - } else if self.is_relative() { - Some(FirstComponent::RelativePath) - } else { - Some(FirstComponent::AbsolutePath) - }; - - // If we have a prefix, we encode that index into front - let front = prefix.map(|prefix| prefix.len()).unwrap_or(0); - // Set our back pointer to the last separator byte (without trailing) - // or last byte - let back = path_bytes.len() - trailing_seps; - - Ancestors { path: path_bytes, front, back, trailing_seps, first_comp } } /// Returns the final component of the `Path`, if there is one.