Added str::char_offset_iter() and str::rev_char_offset_iter()

Renamed bytes_iter to byte_iter to match other iterators
Refactored str Iterators to use DoubleEnded Iterators and typedefs instead of wrapper structs
Reordered the Iterator section
Whitespace fixup
Moved clunky `each_split_within` function to the one place in the tree where it's actually needed
Replaced all block doccomments in str with line doccomments
This commit is contained in:
Marvin Löbel
2013-07-27 23:38:38 +02:00
parent d75ab4a5d7
commit e33fca9ffe
7 changed files with 592 additions and 597 deletions
+98 -2
View File
@@ -476,7 +476,6 @@ pub mod groups {
use getopts::{HasArg, Long, Maybe, Multi, No, Occur, Opt, Optional, Req};
use getopts::{Short, Yes};
use std::str;
use std::vec;
/** one group of options, e.g., both -h and --help, along with
@@ -667,7 +666,7 @@ pub fn usage(brief: &str, opts: &[OptGroup]) -> ~str {
// FIXME: #5516
let mut desc_rows = ~[];
for str::each_split_within(desc_normalized_whitespace, 54) |substr| {
for each_split_within(desc_normalized_whitespace, 54) |substr| {
desc_rows.push(substr.to_owned());
}
@@ -683,6 +682,103 @@ pub fn usage(brief: &str, opts: &[OptGroup]) -> ~str {
rows.collect::<~[~str]>().connect("\n") +
"\n\n";
}
/** Splits a string into substrings with possibly internal whitespace,
* each of them at most `lim` bytes long. The substrings have leading and trailing
* whitespace removed, and are only cut at whitespace boundaries.
*
* Note: Function was moved here from `std::str` because this module is the only place that
* uses it, and because it was to specific for a general string function.
*
* #Failure:
*
* Fails during iteration if the string contains a non-whitespace
* sequence longer than the limit.
*/
priv fn each_split_within<'a>(ss: &'a str,
lim: uint,
it: &fn(&'a str) -> bool) -> bool {
// Just for fun, let's write this as an state machine:
enum SplitWithinState {
A, // leading whitespace, initial state
B, // words
C, // internal and trailing whitespace
}
enum Whitespace {
Ws, // current char is whitespace
Cr // current char is not whitespace
}
enum LengthLimit {
UnderLim, // current char makes current substring still fit in limit
OverLim // current char makes current substring no longer fit in limit
}
let mut slice_start = 0;
let mut last_start = 0;
let mut last_end = 0;
let mut state = A;
let mut fake_i = ss.len();
let mut lim = lim;
let mut cont = true;
let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
// if the limit is larger than the string, lower it to save cycles
if (lim >= fake_i) {
lim = fake_i;
}
let machine: &fn((uint, char)) -> bool = |(i, c)| {
let whitespace = if ::std::char::is_whitespace(c) { Ws } else { Cr };
let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
state = match (state, whitespace, limit) {
(A, Ws, _) => { A }
(A, Cr, _) => { slice_start = i; last_start = i; B }
(B, Cr, UnderLim) => { B }
(B, Cr, OverLim) if (i - last_start + 1) > lim
=> fail!("word starting with %? longer than limit!",
ss.slice(last_start, i + 1)),
(B, Cr, OverLim) => { slice(); slice_start = last_start; B }
(B, Ws, UnderLim) => { last_end = i; C }
(B, Ws, OverLim) => { last_end = i; slice(); A }
(C, Cr, UnderLim) => { last_start = i; B }
(C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
(C, Ws, OverLim) => { slice(); A }
(C, Ws, UnderLim) => { C }
};
cont
};
ss.iter().enumerate().advance(|x| machine(x));
// Let the automaton 'run out' by supplying trailing whitespace
while cont && match state { B | C => true, A => false } {
machine((fake_i, ' '));
fake_i += 1;
}
return cont;
}
#[test]
priv fn test_split_within() {
fn t(s: &str, i: uint, u: &[~str]) {
let mut v = ~[];
for each_split_within(s, i) |s| { v.push(s.to_owned()) }
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t("", 0, []);
t("", 15, []);
t("hello", 15, [~"hello"]);
t("\nMary had a little lamb\nLittle lamb\n", 15,
[~"Mary had a", ~"little lamb", ~"Little lamb"]);
t("\nMary had a little lamb\nLittle lamb\n", ::std::uint::max_value,
[~"Mary had a little lamb\nLittle lamb"]);
}
} // end groups module
#[cfg(test)]
+1 -1
View File
@@ -260,7 +260,7 @@ pub fn rfc3339(&self) -> ~str {
priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
fn match_str(s: &str, pos: uint, needle: &str) -> bool {
let mut i = pos;
for needle.bytes_iter().advance |ch| {
for needle.byte_iter().advance |ch| {
if s[i] != ch {
return false;
}
+489 -590
View File
@@ -25,6 +25,7 @@
use iter::Times;
use iterator::{Iterator, FromIterator, Extendable, IteratorUtil};
use iterator::{Filter, AdditiveIterator, Map};
use iterator::{Invert, DoubleEndedIterator, DoubleEndedIteratorUtil};
use libc;
use num::Zero;
use option::{None, Option, Some};
@@ -39,6 +40,7 @@
/*
Section: Conditions
*/
condition! {
not_utf8: (~str) -> ~str;
}
@@ -47,13 +49,11 @@
Section: Creating a string
*/
/**
* Convert a vector of bytes to a new UTF-8 string
*
* # Failure
*
* Raises the `not_utf8` condition if invalid UTF-8
*/
/// Convert a vector of bytes to a new UTF-8 string
///
/// # Failure
///
/// Raises the `not_utf8` condition if invalid UTF-8
pub fn from_bytes(vv: &[u8]) -> ~str {
use str::not_utf8::cond;
@@ -61,19 +61,16 @@ pub fn from_bytes(vv: &[u8]) -> ~str {
let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
first_bad_byte as uint))
}
else {
} else {
return unsafe { raw::from_bytes(vv) }
}
}
/**
* Consumes a vector of bytes to create a new utf-8 string
*
* # Failure
*
* Raises the `not_utf8` condition if invalid UTF-8
*/
/// Consumes a vector of bytes to create a new utf-8 string
///
/// # Failure
///
/// Raises the `not_utf8` condition if invalid UTF-8
pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
use str::not_utf8::cond;
@@ -86,33 +83,29 @@ pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
}
}
/**
* Convert a vector of bytes to a UTF-8 string.
* The vector needs to be one byte longer than the string, and end with a 0 byte.
*
* Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
*
* # Failure
*
* Fails if invalid UTF-8
* Fails if not null terminated
*/
/// Convert a vector of bytes to a UTF-8 string.
/// The vector needs to be one byte longer than the string, and end with a 0 byte.
///
/// Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
///
/// # Failure
///
/// Fails if invalid UTF-8
/// Fails if not null terminated
pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str {
assert_eq!(vv[vv.len() - 1], 0);
assert!(is_utf8(vv));
return unsafe { raw::from_bytes_with_null(vv) };
}
/**
* Converts a vector to a string slice without performing any allocations.
*
* Once the slice has been validated as utf-8, it is transmuted in-place and
* returned as a '&str' instead of a '&[u8]'
*
* # Failure
*
* Fails if invalid UTF-8
*/
/// Converts a vector to a string slice without performing any allocations.
///
/// Once the slice has been validated as utf-8, it is transmuted in-place and
/// returned as a '&str' instead of a '&[u8]'
///
/// # Failure
///
/// Fails if invalid UTF-8
pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
unsafe {
assert!(is_utf8(vector));
@@ -135,13 +128,11 @@ impl ToStr for @str {
fn to_str(&self) -> ~str { self.to_owned() }
}
/**
* Convert a byte to a UTF-8 string
*
* # Failure
*
* Fails if invalid UTF-8
*/
/// Convert a byte to a UTF-8 string
///
/// # Failure
///
/// Fails if invalid UTF-8
pub fn from_byte(b: u8) -> ~str {
assert!(b < 128u8);
unsafe { cast::transmute(~[b, 0u8]) }
@@ -249,18 +240,21 @@ pub trait CharEq {
/// which can allow for a faster implementation.
fn only_ascii(&self) -> bool;
}
impl CharEq for char {
#[inline]
fn matches(&self, c: char) -> bool { *self == c }
fn only_ascii(&self) -> bool { (*self as uint) < 128 }
}
impl<'self> CharEq for &'self fn(char) -> bool {
#[inline]
fn matches(&self, c: char) -> bool { (*self)(c) }
fn only_ascii(&self) -> bool { false }
}
impl CharEq for extern "Rust" fn(char) -> bool {
#[inline]
fn matches(&self, c: char) -> bool { (*self)(c) }
@@ -279,6 +273,70 @@ fn only_ascii(&self) -> bool {
}
}
/*
Section: Iterators
*/
/// External iterator for a string's characters and their byte offsets.
/// Use with the `std::iterator` module.
#[deriving(Clone)]
pub struct CharOffsetIterator<'self> {
priv index_front: uint,
priv index_back: uint,
priv string: &'self str,
}
impl<'self> Iterator<(uint, char)> for CharOffsetIterator<'self> {
#[inline]
fn next(&mut self) -> Option<(uint, char)> {
if self.index_front < self.index_back {
let CharRange {ch, next} = self.string.char_range_at(self.index_front);
let index = self.index_front;
self.index_front = next;
Some((index, ch))
} else {
None
}
}
}
impl<'self> DoubleEndedIterator<(uint, char)> for CharOffsetIterator<'self> {
#[inline]
fn next_back(&mut self) -> Option<(uint, char)> {
if self.index_front < self.index_back {
let CharRange {ch, next} = self.string.char_range_at_reverse(self.index_back);
self.index_back = next;
Some((next, ch))
} else {
None
}
}
}
/// External iterator for a string's characters and their byte offsets in reverse order.
/// Use with the `std::iterator` module.
pub type CharOffsetRevIterator<'self> =
Invert<CharOffsetIterator<'self>>;
/// External iterator for a string's characters.
/// Use with the `std::iterator` module.
pub type CharIterator<'self> =
Map<'self, (uint, char), char, CharOffsetIterator<'self>>;
/// External iterator for a string's characters in reverse order.
/// Use with the `std::iterator` module.
pub type CharRevIterator<'self> =
Invert<Map<'self, (uint, char), char, CharOffsetIterator<'self>>>;
/// External iterator for a string's bytes.
/// Use with the `std::iterator` module.
pub type ByteIterator<'self> =
Map<'self, &'self u8, u8, vec::VecIterator<'self, u8>>;
/// External iterator for a string's bytes in reverse order.
/// Use with the `std::iterator` module.
pub type ByteRevIterator<'self> =
Invert<Map<'self, &'self u8, u8, vec::VecIterator<'self, u8>>>;
/// An iterator over the substrings of a string, separated by `sep`.
#[deriving(Clone)]
@@ -414,97 +472,17 @@ fn next(&mut self) -> Option<&'self str> {
}
}
/** Splits a string into substrings with possibly internal whitespace,
* each of them at most `lim` bytes long. The substrings have leading and trailing
* whitespace removed, and are only cut at whitespace boundaries.
*
* #Failure:
*
* Fails during iteration if the string contains a non-whitespace
* sequence longer than the limit.
*/
pub fn each_split_within<'a>(ss: &'a str,
lim: uint,
it: &fn(&'a str) -> bool) -> bool {
// Just for fun, let's write this as an state machine:
enum SplitWithinState {
A, // leading whitespace, initial state
B, // words
C, // internal and trailing whitespace
}
enum Whitespace {
Ws, // current char is whitespace
Cr // current char is not whitespace
}
enum LengthLimit {
UnderLim, // current char makes current substring still fit in limit
OverLim // current char makes current substring no longer fit in limit
}
let mut slice_start = 0;
let mut last_start = 0;
let mut last_end = 0;
let mut state = A;
let mut fake_i = ss.len();
let mut lim = lim;
let mut cont = true;
let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
// if the limit is larger than the string, lower it to save cycles
if (lim >= fake_i) {
lim = fake_i;
}
let machine: &fn((uint, char)) -> bool = |(i, c)| {
let whitespace = if char::is_whitespace(c) { Ws } else { Cr };
let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
state = match (state, whitespace, limit) {
(A, Ws, _) => { A }
(A, Cr, _) => { slice_start = i; last_start = i; B }
(B, Cr, UnderLim) => { B }
(B, Cr, OverLim) if (i - last_start + 1) > lim
=> fail!("word starting with %? longer than limit!",
ss.slice(last_start, i + 1)),
(B, Cr, OverLim) => { slice(); slice_start = last_start; B }
(B, Ws, UnderLim) => { last_end = i; C }
(B, Ws, OverLim) => { last_end = i; slice(); A }
(C, Cr, UnderLim) => { last_start = i; B }
(C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
(C, Ws, OverLim) => { slice(); A }
(C, Ws, UnderLim) => { C }
};
cont
};
ss.iter().enumerate().advance(|x| machine(x));
// Let the automaton 'run out' by supplying trailing whitespace
while cont && match state { B | C => true, A => false } {
machine((fake_i, ' '));
fake_i += 1;
}
return cont;
}
/**
* Replace all occurrences of one string with another
*
* # Arguments
*
* * s - The string containing substrings to replace
* * from - The string to replace
* * to - The replacement string
*
* # Return value
*
* The original string with all occurances of `from` replaced with `to`
*/
/// Replace all occurrences of one string with another
///
/// # Arguments
///
/// * s - The string containing substrings to replace
/// * from - The string to replace
/// * to - The replacement string
///
/// # Return value
///
/// The original string with all occurances of `from` replaced with `to`
pub fn replace(s: &str, from: &str, to: &str) -> ~str {
let mut result = ~"";
let mut last_end = 0;
@@ -578,7 +556,7 @@ pub fn eq(a: &~str, b: &~str) -> bool {
// Utility used by various searching functions
fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
let mut i = at;
for needle.bytes_iter().advance |c| { if haystack[i] != c { return false; } i += 1u; }
for needle.byte_iter().advance |c| { if haystack[i] != c { return false; } i += 1u; }
return true;
}
@@ -663,9 +641,7 @@ pub fn utf16_chars(v: &[u16], f: &fn(char)) {
}
}
/**
* Allocates a new string from the utf-16 slice provided
*/
/// Allocates a new string from the utf-16 slice provided
pub fn from_utf16(v: &[u16]) -> ~str {
let mut buf = ~"";
buf.reserve(v.len());
@@ -673,10 +649,8 @@ pub fn from_utf16(v: &[u16]) -> ~str {
buf
}
/**
* Allocates a new string with the specified capacity. The string returned is
* the empty string, but has capacity for much more.
*/
/// Allocates a new string with the specified capacity. The string returned is
/// the empty string, but has capacity for much more.
#[inline]
pub fn with_capacity(capacity: uint) -> ~str {
let mut buf = ~"";
@@ -684,19 +658,17 @@ pub fn with_capacity(capacity: uint) -> ~str {
buf
}
/**
* As char_len but for a slice of a string
*
* # Arguments
*
* * s - A valid string
* * start - The position inside `s` where to start counting in bytes
* * end - The position where to stop counting
*
* # Return value
*
* The number of Unicode characters in `s` between the given indices.
*/
/// As char_len but for a slice of a string
///
/// # Arguments
///
/// * s - A valid string
/// * start - The position inside `s` where to start counting in bytes
/// * end - The position where to stop counting
///
/// # Return value
///
/// The number of Unicode characters in `s` between the given indices.
pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
assert!(s.is_char_boundary(start));
assert!(s.is_char_boundary(end));
@@ -851,16 +823,14 @@ pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
cast::transmute(v)
}
/**
* Takes a bytewise (not UTF-8) slice from a string.
*
* Returns the substring from [`begin`..`end`).
*
* # Failure
*
* If begin is greater than end.
* If end is greater than the length of the string.
*/
/// Takes a bytewise (not UTF-8) slice from a string.
///
/// Returns the substring from [`begin`..`end`).
///
/// # Failure
///
/// If begin is greater than end.
/// If end is greater than the length of the string.
#[inline]
pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
do s.as_imm_buf |sbuf, n| {
@@ -931,6 +901,10 @@ fn test_from_buf_len() {
}
/*
Section: Trait implementations
*/
#[cfg(not(test))]
pub mod traits {
use ops::Add;
@@ -949,7 +923,7 @@ fn add(&self, rhs: & &'self str) -> ~str {
impl<'self> TotalOrd for &'self str {
#[inline]
fn cmp(&self, other: & &'self str) -> Ordering {
for self.bytes_iter().zip(other.bytes_iter()).advance |(s_b, o_b)| {
for self.byte_iter().zip(other.byte_iter()).advance |(s_b, o_b)| {
match s_b.cmp(&o_b) {
Greater => return Greater,
Less => return Less,
@@ -1081,12 +1055,14 @@ impl<'self> Str for &'self str {
#[inline]
fn as_slice<'a>(&'a self) -> &'a str { *self }
}
impl<'self> Str for ~str {
#[inline]
fn as_slice<'a>(&'a self) -> &'a str {
let s: &'a str = *self; s
}
}
impl<'self> Str for @str {
#[inline]
fn as_slice<'a>(&'a self) -> &'a str {
@@ -1121,15 +1097,16 @@ fn clear(&mut self) {
}
}
#[allow(missing_doc)]
pub trait StrSlice<'self> {
fn contains<'a>(&self, needle: &'a str) -> bool;
fn contains_char(&self, needle: char) -> bool;
fn iter(&self) -> CharIterator<'self>;
fn rev_iter(&self) -> CharRevIterator<'self>;
fn bytes_iter(&self) -> BytesIterator<'self>;
fn bytes_rev_iter(&self) -> BytesRevIterator<'self>;
fn byte_iter(&self) -> ByteIterator<'self>;
fn byte_rev_iter(&self) -> ByteRevIterator<'self>;
fn char_offset_iter(&self) -> CharOffsetIterator<'self>;
fn char_offset_rev_iter(&self) -> CharOffsetRevIterator<'self>;
fn split_iter<Sep: CharEq>(&self, sep: Sep) -> CharSplitIterator<'self, Sep>;
fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitIterator<'self, Sep>;
fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
@@ -1190,28 +1167,26 @@ fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_
/// Extension methods for strings
impl<'self> StrSlice<'self> for &'self str {
/**
* Returns true if one string contains another
*
* # Arguments
*
* * needle - The string to look for
*/
/// Returns true if one string contains another
///
/// # Arguments
///
/// * needle - The string to look for
#[inline]
fn contains<'a>(&self, needle: &'a str) -> bool {
self.find_str(needle).is_some()
}
/**
* Returns true if a string contains a char.
*
* # Arguments
*
* * needle - The char to look for
*/
/// Returns true if a string contains a char.
///
/// # Arguments
///
/// * needle - The char to look for
#[inline]
fn contains_char(&self, needle: char) -> bool {
self.find(needle).is_some()
}
/// An iterator over the characters of `self`. Note, this iterates
/// over unicode code-points, not unicode graphemes.
///
@@ -1223,29 +1198,41 @@ fn contains_char(&self, needle: char) -> bool {
/// ~~~
#[inline]
fn iter(&self) -> CharIterator<'self> {
CharIterator {
index: 0,
string: *self
}
self.char_offset_iter().transform(|(_, c)| c)
}
/// An iterator over the characters of `self`, in reverse order.
#[inline]
fn rev_iter(&self) -> CharRevIterator<'self> {
CharRevIterator {
index: self.len(),
string: *self
}
self.iter().invert()
}
/// An iterator over the bytes of `self`
#[inline]
fn bytes_iter(&self) -> BytesIterator<'self> {
BytesIterator { it: self.as_bytes().iter() }
fn byte_iter(&self) -> ByteIterator<'self> {
self.as_bytes().iter().transform(|&b| b)
}
/// An iterator over the bytes of `self`, in reverse order
#[inline]
fn bytes_rev_iter(&self) -> BytesRevIterator<'self> {
BytesRevIterator { it: self.as_bytes().rev_iter() }
fn byte_rev_iter(&self) -> ByteRevIterator<'self> {
self.byte_iter().invert()
}
/// An iterator over the characters of `self` and their byte offsets.
#[inline]
fn char_offset_iter(&self) -> CharOffsetIterator<'self> {
CharOffsetIterator {
index_front: 0,
index_back: self.len(),
string: *self
}
}
/// An iterator over the characters of `self` and their byte offsets.
#[inline]
fn char_offset_rev_iter(&self) -> CharOffsetRevIterator<'self> {
self.char_offset_iter().invert()
}
/// An iterator over substrings of `self`, separated by characters
@@ -1291,6 +1278,7 @@ fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_
only_ascii: only_ascii
}
}
/// An iterator over the start and end indices of each match of
/// `sep` within `self`.
#[inline]
@@ -1302,16 +1290,15 @@ fn matches_index_iter(&self, sep: &'self str) -> MatchesIndexIterator<'self> {
position: 0
}
}
/**
* An iterator over the substrings of `self` separated by `sep`.
*
* # Example
*
* ~~~ {.rust}
* let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
* assert_eq!(v, ["", "XXX", "YYY", ""]);
* ~~~
*/
/// An iterator over the substrings of `self` separated by `sep`.
///
/// # Example
///
/// ~~~ {.rust}
/// let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
/// assert_eq!(v, ["", "XXX", "YYY", ""]);
/// ~~~
#[inline]
fn split_str_iter(&self, sep: &'self str) -> StrSplitIterator<'self> {
StrSplitIterator {
@@ -1345,37 +1332,34 @@ fn word_iter(&self) -> WordIterator<'self> {
self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
}
/**
* Returns true if the string contains only whitespace
*
* Whitespace characters are determined by `char::is_whitespace`
*/
/// Returns true if the string contains only whitespace
///
/// Whitespace characters are determined by `char::is_whitespace`
#[inline]
fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }
/**
* Returns true if the string contains only alphanumerics
*
* Alphanumeric characters are determined by `char::is_alphanumeric`
*/
/// Returns true if the string contains only alphanumerics
///
/// Alphanumeric characters are determined by `char::is_alphanumeric`
#[inline]
fn is_alphanumeric(&self) -> bool { self.iter().all(char::is_alphanumeric) }
/// Returns the number of characters that a string holds
#[inline]
fn char_len(&self) -> uint { self.iter().len_() }
/**
* Returns a slice of the given string from the byte range
* [`begin`..`end`)
*
* Fails when `begin` and `end` do not point to valid characters or
* beyond the last character of the string
*/
/// Returns a slice of the given string from the byte range
/// [`begin`..`end`)
///
/// Fails when `begin` and `end` do not point to valid characters or
/// beyond the last character of the string
#[inline]
fn slice(&self, begin: uint, end: uint) -> &'self str {
assert!(self.is_char_boundary(begin));
assert!(self.is_char_boundary(end));
unsafe { raw::slice_bytes(*self, begin, end) }
}
/// Returns a slice of the string from `begin` to its end.
///
/// Fails when `begin` does not point to a valid character, or is
@@ -1384,6 +1368,7 @@ fn slice(&self, begin: uint, end: uint) -> &'self str {
fn slice_from(&self, begin: uint) -> &'self str {
self.slice(begin, self.len())
}
/// Returns a slice of the string from the beginning to byte
/// `end`.
///
@@ -1427,6 +1412,7 @@ fn starts_with<'a>(&self, needle: &'a str) -> bool {
else if needle_len > self_len { false }
else { match_at(*self, needle, 0u) }
}
/// Returns true if `needle` is a suffix of the string.
fn ends_with(&self, needle: &str) -> bool {
let (self_len, needle_len) = (self.len(), needle.len());
@@ -1464,51 +1450,50 @@ fn escape_unicode(&self) -> ~str {
fn trim(&self) -> &'self str {
self.trim_left().trim_right()
}
/// Returns a string with leading whitespace removed
#[inline]
fn trim_left(&self) -> &'self str {
self.trim_left_chars(&char::is_whitespace)
}
/// Returns a string with trailing whitespace removed
#[inline]
fn trim_right(&self) -> &'self str {
self.trim_right_chars(&char::is_whitespace)
}
/**
* Returns a string with characters that match `to_trim` removed.
*
* # Arguments
*
* * to_trim - a character matcher
*
* # Example
*
* ~~~ {.rust}
* assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
* assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
* assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
* ~~~
*/
/// Returns a string with characters that match `to_trim` removed.
///
/// # Arguments
///
/// * to_trim - a character matcher
///
/// # Example
///
/// ~~~ {.rust}
/// assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
/// assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
/// assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
/// ~~~
#[inline]
fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
self.trim_left_chars(to_trim).trim_right_chars(to_trim)
}
/**
* Returns a string with leading `chars_to_trim` removed.
*
* # Arguments
*
* * to_trim - a character matcher
*
* # Example
*
* ~~~ {.rust}
* assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
* assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
* assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
* ~~~
*/
/// Returns a string with leading `chars_to_trim` removed.
///
/// # Arguments
///
/// * to_trim - a character matcher
///
/// # Example
///
/// ~~~ {.rust}
/// assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
/// assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
/// assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
/// ~~~
#[inline]
fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
match self.find(|c: char| !to_trim.matches(c)) {
@@ -1516,21 +1501,20 @@ fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
}
}
/**
* Returns a string with trailing `chars_to_trim` removed.
*
* # Arguments
*
* * to_trim - a character matcher
*
* # Example
*
* ~~~ {.rust}
* assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
* assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
* assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
* ~~~
*/
/// Returns a string with trailing `chars_to_trim` removed.
///
/// # Arguments
///
/// * to_trim - a character matcher
///
/// # Example
///
/// ~~~ {.rust}
/// assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
/// assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
/// assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
/// ~~~
#[inline]
fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
match self.rfind(|c: char| !to_trim.matches(c)) {
@@ -1542,18 +1526,16 @@ fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
}
}
/**
* Replace all occurrences of one string with another
*
* # Arguments
*
* * from - The string to replace
* * to - The replacement string
*
* # Return value
*
* The original string with all occurances of `from` replaced with `to`
*/
/// Replace all occurrences of one string with another
///
/// # Arguments
///
/// * from - The string to replace
/// * to - The replacement string
///
/// # Return value
///
/// The original string with all occurances of `from` replaced with `to`
pub fn replace(&self, from: &str, to: &str) -> ~str {
let mut result = ~"";
let mut last_end = 0;
@@ -1616,65 +1598,61 @@ fn to_utf16(&self) -> ~[u16] {
u
}
/**
* Returns false if the index points into the middle of a multi-byte
* character sequence.
*/
/// Returns false if the index points into the middle of a multi-byte
/// character sequence.
fn is_char_boundary(&self, index: uint) -> bool {
if index == self.len() { return true; }
let b = self[index];
return b < 128u8 || b >= 192u8;
}
/**
* Pluck a character out of a string and return the index of the next
* character.
*
* This function can be used to iterate over the unicode characters of a
* string.
*
* # Example
*
* ~~~ {.rust}
* let s = "中华Việt Nam";
* let i = 0u;
* while i < s.len() {
* let CharRange {ch, next} = s.char_range_at(i);
* printfln!("%u: %c", i, ch);
* i = next;
* }
* ~~~
*
* # Example output
*
* ~~~
* 0:
* 3:
* 6: V
* 7: i
* 8:
* 11: t
* 12:
* 13: N
* 14: a
* 15: m
* ~~~
*
* # Arguments
*
* * s - The string
* * i - The byte offset of the char to extract
*
* # Return value
*
* A record {ch: char, next: uint} containing the char value and the byte
* index of the next unicode character.
*
* # Failure
*
* If `i` is greater than or equal to the length of the string.
* If `i` is not the index of the beginning of a valid UTF-8 character.
*/
/// Pluck a character out of a string and return the index of the next
/// character.
///
/// This function can be used to iterate over the unicode characters of a
/// string.
///
/// # Example
///
/// ~~~ {.rust}
/// let s = "中华Việt Nam";
/// let i = 0u;
/// while i < s.len() {
/// let CharRange {ch, next} = s.char_range_at(i);
/// printfln!("%u: %c", i, ch);
/// i = next;
/// }
/// ~~~
///
/// # Example output
///
/// ~~~
/// 0: 中
/// 3:
/// 6: V
/// 7: i
/// 8:
/// 11: t
/// 12:
/// 13: N
/// 14: a
/// 15: m
/// ~~~
///
/// # Arguments
///
/// * s - The string
/// * i - The byte offset of the char to extract
///
/// # Return value
///
/// A record {ch: char, next: uint} containing the char value and the byte
/// index of the next unicode character.
///
/// # Failure
///
/// If `i` is greater than or equal to the length of the string.
/// If `i` is not the index of the beginning of a valid UTF-8 character.
#[inline]
fn char_range_at(&self, i: uint) -> CharRange {
if (self[i] < 128u8) {
@@ -1704,13 +1682,11 @@ fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
#[inline]
fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
/**
* Given a byte position and a str, return the previous char and its position.
*
* This function can be used to iterate over a unicode string in reverse.
*
* Returns 0 for next index if called on start index 0.
*/
/// Given a byte position and a str, return the previous char and its position.
///
/// This function can be used to iterate over a unicode string in reverse.
///
/// Returns 0 for next index if called on start index 0.
fn char_range_at_reverse(&self, start: uint) -> CharRange {
let mut prev = start;
@@ -1737,11 +1713,9 @@ fn char_at_reverse(&self, i: uint) -> char {
self.char_range_at_reverse(i).ch
}
/**
* Work with the byte buffer of a string as a byte slice.
*
* The byte slice does not include the null terminator.
*/
/// Work with the byte buffer of a string as a byte slice.
///
/// The byte slice does not include the null terminator.
fn as_bytes(&self) -> &'self [u8] {
unsafe {
let mut slice = self.repr();
@@ -1750,17 +1724,15 @@ fn as_bytes(&self) -> &'self [u8] {
}
}
/**
* Returns the byte index of the first character of `self` that matches `search`
*
* # Return value
*
* `Some` containing the byte index of the last matching character
* or `None` if there is no match
*/
/// Returns the byte index of the first character of `self` that matches `search`
///
/// # Return value
///
/// `Some` containing the byte index of the last matching character
/// or `None` if there is no match
fn find<C: CharEq>(&self, search: C) -> Option<uint> {
if search.only_ascii() {
for self.bytes_iter().enumerate().advance |(i, b)| {
for self.byte_iter().enumerate().advance |(i, b)| {
if search.matches(b as char) { return Some(i) }
}
} else {
@@ -1773,18 +1745,17 @@ fn find<C: CharEq>(&self, search: C) -> Option<uint> {
None
}
/**
* Returns the byte index of the last character of `self` that matches `search`
*
* # Return value
*
* `Some` containing the byte index of the last matching character
* or `None` if there is no match
*/
/// Returns the byte index of the last character of `self` that matches `search`
///
/// # Return value
///
/// `Some` containing the byte index of the last matching character
/// or `None` if there is no match
fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
let mut index = self.len();
if search.only_ascii() {
for self.bytes_rev_iter().advance |b| {
for self.byte_rev_iter().advance |b| {
index -= 1;
if search.matches(b as char) { return Some(index); }
}
@@ -1798,18 +1769,16 @@ fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
None
}
/**
* Returns the byte index of the first matching substring
*
* # Arguments
*
* * `needle` - The string to search for
*
* # Return value
*
* `Some` containing the byte index of the first matching substring
* or `None` if there is no match
*/
/// Returns the byte index of the first matching substring
///
/// # Arguments
///
/// * `needle` - The string to search for
///
/// # Return value
///
/// `Some` containing the byte index of the first matching substring
/// or `None` if there is no match
fn find_str(&self, needle: &str) -> Option<uint> {
if needle.is_empty() {
Some(0)
@@ -1842,16 +1811,14 @@ fn repeat(&self, nn: uint) -> ~str {
}
}
/**
* Retrieves the first character from a string slice and returns
* it. This does not allocate a new string; instead, it returns a
* slice that point one character beyond the character that was
* shifted.
*
* # Failure
*
* If the string does not contain any characters
*/
/// Retrieves the first character from a string slice and returns
/// it. This does not allocate a new string; instead, it returns a
/// slice that point one character beyond the character that was
/// shifted.
///
/// # Failure
///
/// If the string does not contain any characters
#[inline]
fn slice_shift_char(&self) -> (char, &'self str) {
let CharRange {ch, next} = self.char_range_at(0u);
@@ -1859,7 +1826,6 @@ fn slice_shift_char(&self) -> (char, &'self str) {
return (ch, next_s);
}
/// Apply a function to each character.
fn map_chars(&self, ff: &fn(char) -> char) -> ~str {
let mut result = with_capacity(self.len());
@@ -1902,24 +1868,21 @@ fn lev_distance(&self, t: &str) -> uint {
return dcol[tlen];
}
/**
* Returns the byte offset of an inner slice relative to an enclosing outer slice.
*
* Fails if `inner` is not a direct slice contained within self.
*
* # Example
*
* ~~~ {.rust}
* let string = "a\nb\nc";
* let mut lines = ~[];
* for string.line_iter().advance |line| { lines.push(line) }
*
* assert!(string.subslice_offset(lines[0]) == 0); // &"a"
* assert!(string.subslice_offset(lines[1]) == 2); // &"b"
* assert!(string.subslice_offset(lines[2]) == 4); // &"c"
* ~~~
*/
/// Returns the byte offset of an inner slice relative to an enclosing outer slice.
///
/// Fails if `inner` is not a direct slice contained within self.
///
/// # Example
///
/// ~~~ {.rust}
/// let string = "a\nb\nc";
/// let mut lines = ~[];
/// for string.line_iter().advance |line| { lines.push(line) }
///
/// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
/// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
/// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
/// ~~~
#[inline]
fn subslice_offset(&self, inner: &str) -> uint {
do self.as_imm_buf |a, a_len| {
@@ -1939,35 +1902,31 @@ fn subslice_offset(&self, inner: &str) -> uint {
}
}
/**
* Work with the byte buffer and length of a slice.
*
* The given length is one byte longer than the 'official' indexable
* length of the string. This is to permit probing the byte past the
* indexable area for a null byte, as is the case in slices pointing
* to full strings, or suffixes of them.
*/
/// Work with the byte buffer and length of a slice.
///
/// The given length is one byte longer than the 'official' indexable
/// length of the string. This is to permit probing the byte past the
/// indexable area for a null byte, as is the case in slices pointing
/// to full strings, or suffixes of them.
#[inline]
fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T {
let v: &[u8] = unsafe { cast::transmute(*self) };
v.as_imm_buf(f)
}
/**
* Work with the byte buffer of a string as a null-terminated C string.
*
* Allows for unsafe manipulation of strings, which is useful for foreign
* interop. This is similar to `str::as_buf`, but guarantees null-termination.
* If the given slice is not already null-terminated, this function will
* allocate a temporary, copy the slice, null terminate it, and pass
* that instead.
*
* # Example
*
* ~~~ {.rust}
* let s = "PATH".as_c_str(|path| libc::getenv(path));
* ~~~
*/
/// Work with the byte buffer of a string as a null-terminated C string.
///
/// Allows for unsafe manipulation of strings, which is useful for foreign
/// interop. This is similar to `str::as_buf`, but guarantees null-termination.
/// If the given slice is not already null-terminated, this function will
/// allocate a temporary, copy the slice, null terminate it, and pass
/// that instead.
///
/// # Example
///
/// ~~~ {.rust}
/// let s = "PATH".as_c_str(|path| libc::getenv(path));
/// ~~~
#[inline]
fn as_c_str<T>(&self, f: &fn(*libc::c_char) -> T) -> T {
do self.as_imm_buf |buf, len| {
@@ -1988,11 +1947,9 @@ pub trait NullTerminatedStr {
}
impl NullTerminatedStr for ~str {
/**
* Work with the byte buffer of a string as a byte slice.
*
* The byte slice does include the null terminator.
*/
/// Work with the byte buffer of a string as a byte slice.
///
/// The byte slice does include the null terminator.
#[inline]
fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
let ptr: &'a ~[u8] = unsafe { cast::transmute(self) };
@@ -2000,12 +1957,11 @@ fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
slice
}
}
impl NullTerminatedStr for @str {
/**
* Work with the byte buffer of a string as a byte slice.
*
* The byte slice does include the null terminator.
*/
/// Work with the byte buffer of a string as a byte slice.
///
/// The byte slice does include the null terminator.
#[inline]
fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
let ptr: &'a @[u8] = unsafe { cast::transmute(self) };
@@ -2028,16 +1984,14 @@ pub trait OwnedStr {
fn capacity(&self) -> uint;
fn to_bytes_with_null(self) -> ~[u8];
/**
* Work with the mutable byte buffer and length of a slice.
*
* The given length is one byte longer than the 'official' indexable
* length of the string. This is to permit probing the byte past the
* indexable area for a null byte, as is the case in slices pointing
* to full strings, or suffixes of them.
*
* Make sure any mutations to this buffer keep this string valid UTF8.
*/
/// Work with the mutable byte buffer and length of a slice.
///
/// The given length is one byte longer than the 'official' indexable
/// length of the string. This is to permit probing the byte past the
/// indexable area for a null byte, as is the case in slices pointing
/// to full strings, or suffixes of them.
///
/// Make sure any mutations to this buffer keep this string valid UTF8.
fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T;
}
@@ -2077,6 +2031,7 @@ fn push_str(&mut self, rhs: &str) {
raw::set_len(self, llen + rlen);
}
}
/// Appends a character to the back of a string
#[inline]
fn push_char(&mut self, c: char) {
@@ -2117,13 +2072,12 @@ fn push_char(&mut self, c: char) {
raw::set_len(self, new_len);
}
}
/**
* Remove the final character from a string and return it
*
* # Failure
*
* If the string does not contain any characters
*/
/// Remove the final character from a string and return it
///
/// # Failure
///
/// If the string does not contain any characters
fn pop_char(&mut self) -> char {
let end = self.len();
assert!(end > 0u);
@@ -2132,13 +2086,11 @@ fn pop_char(&mut self) -> char {
return ch;
}
/**
* Remove the first character from a string and return it
*
* # Failure
*
* If the string does not contain any characters
*/
/// Remove the first character from a string and return it
///
/// # Failure
///
/// If the string does not contain any characters
fn shift_char(&mut self) -> char {
let CharRange {ch, next} = self.char_range_at(0u);
*self = self.slice(next, self.len()).to_owned();
@@ -2162,22 +2114,20 @@ fn append(self, rhs: &str) -> ~str {
new_str
}
/**
* Reserves capacity for exactly `n` bytes in the given string, not including
* the null terminator.
*
* Assuming single-byte characters, the resulting string will be large
* enough to hold a string of length `n`. To account for the null terminator,
* the underlying buffer will have the size `n` + 1.
*
* If the capacity for `s` is already equal to or greater than the requested
* capacity, then no action is taken.
*
* # Arguments
*
* * s - A string
* * n - The number of bytes to reserve space for
*/
/// Reserves capacity for exactly `n` bytes in the given string, not including
/// the null terminator.
///
/// Assuming single-byte characters, the resulting string will be large
/// enough to hold a string of length `n`. To account for the null terminator,
/// the underlying buffer will have the size `n` + 1.
///
/// If the capacity for `s` is already equal to or greater than the requested
/// capacity, then no action is taken.
///
/// # Arguments
///
/// * s - A string
/// * n - The number of bytes to reserve space for
#[inline]
pub fn reserve(&mut self, n: uint) {
unsafe {
@@ -2186,35 +2136,31 @@ pub fn reserve(&mut self, n: uint) {
}
}
/**
* Reserves capacity for at least `n` bytes in the given string, not including
* the null terminator.
*
* Assuming single-byte characters, the resulting string will be large
* enough to hold a string of length `n`. To account for the null terminator,
* the underlying buffer will have the size `n` + 1.
*
* This function will over-allocate in order to amortize the allocation costs
* in scenarios where the caller may need to repeatedly reserve additional
* space.
*
* If the capacity for `s` is already equal to or greater than the requested
* capacity, then no action is taken.
*
* # Arguments
*
* * s - A string
* * n - The number of bytes to reserve space for
*/
/// Reserves capacity for at least `n` bytes in the given string, not including
/// the null terminator.
///
/// Assuming single-byte characters, the resulting string will be large
/// enough to hold a string of length `n`. To account for the null terminator,
/// the underlying buffer will have the size `n` + 1.
///
/// This function will over-allocate in order to amortize the allocation costs
/// in scenarios where the caller may need to repeatedly reserve additional
/// space.
///
/// If the capacity for `s` is already equal to or greater than the requested
/// capacity, then no action is taken.
///
/// # Arguments
///
/// * s - A string
/// * n - The number of bytes to reserve space for
#[inline]
fn reserve_at_least(&mut self, n: uint) {
self.reserve(uint::next_power_of_two(n + 1u) - 1u)
}
/**
* Returns the number of single-byte characters the string can hold without
* reallocating
*/
/// Returns the number of single-byte characters the string can hold without
/// reallocating
fn capacity(&self) -> uint {
let buf: &~[u8] = unsafe { cast::transmute(self) };
let vcap = buf.capacity();
@@ -2250,75 +2196,6 @@ fn clone(&self) -> @str {
}
}
/// External iterator for a string's characters. Use with the `std::iterator`
/// module.
#[deriving(Clone)]
pub struct CharIterator<'self> {
priv index: uint,
priv string: &'self str,
}
impl<'self> Iterator<char> for CharIterator<'self> {
#[inline]
fn next(&mut self) -> Option<char> {
if self.index < self.string.len() {
let CharRange {ch, next} = self.string.char_range_at(self.index);
self.index = next;
Some(ch)
} else {
None
}
}
}
/// External iterator for a string's characters in reverse order. Use
/// with the `std::iterator` module.
#[deriving(Clone)]
pub struct CharRevIterator<'self> {
priv index: uint,
priv string: &'self str,
}
impl<'self> Iterator<char> for CharRevIterator<'self> {
#[inline]
fn next(&mut self) -> Option<char> {
if self.index > 0 {
let CharRange {ch, next} = self.string.char_range_at_reverse(self.index);
self.index = next;
Some(ch)
} else {
None
}
}
}
/// External iterator for a string's bytes. Use with the `std::iterator`
/// module.
#[deriving(Clone)]
pub struct BytesIterator<'self> {
priv it: vec::VecIterator<'self, u8>
}
impl<'self> Iterator<u8> for BytesIterator<'self> {
#[inline]
fn next(&mut self) -> Option<u8> {
self.it.next().map_consume(|&x| x)
}
}
/// External iterator for a string's bytes in reverse order. Use with
/// the `std::iterator` module.
#[deriving(Clone)]
pub struct BytesRevIterator<'self> {
priv it: vec::RevIterator<'self, u8>
}
impl<'self> Iterator<u8> for BytesRevIterator<'self> {
#[inline]
fn next(&mut self) -> Option<u8> {
self.it.next().map_consume(|&x| x)
}
}
impl<T: Iterator<char>> FromIterator<char, T> for ~str {
#[inline]
fn from_iterator(iterator: &mut T) -> ~str {
@@ -2443,6 +2320,7 @@ fn test_push_str() {
s.push_str("ประเทศไทย中华Việt Nam");
assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
}
#[test]
fn test_append() {
let mut s = ~"";
@@ -2536,22 +2414,6 @@ fn test_clear() {
assert_eq!("", data.as_slice());
}
#[test]
fn test_split_within() {
fn t(s: &str, i: uint, u: &[~str]) {
let mut v = ~[];
for each_split_within(s, i) |s| { v.push(s.to_owned()) }
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t("", 0, []);
t("", 15, []);
t("hello", 15, [~"hello"]);
t("\nMary had a little lamb\nLittle lamb\n", 15,
[~"Mary had a", ~"little lamb", ~"Little lamb"]);
t("\nMary had a little lamb\nLittle lamb\n", uint::max_value,
[~"Mary had a little lamb\nLittle lamb"]);
}
#[test]
fn test_find_str() {
// byte positions
@@ -3398,7 +3260,7 @@ fn test_rev_iterator() {
}
#[test]
fn test_bytes_iterator() {
fn test_byte_iterator() {
let s = ~"ศไทย中华Việt Nam";
let v = [
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
@@ -3407,14 +3269,14 @@ fn test_bytes_iterator() {
];
let mut pos = 0;
for s.bytes_iter().advance |b| {
for s.byte_iter().advance |b| {
assert_eq!(b, v[pos]);
pos += 1;
}
}
#[test]
fn test_bytes_rev_iterator() {
fn test_byte_rev_iterator() {
let s = ~"ศไทย中华Việt Nam";
let v = [
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
@@ -3423,12 +3285,48 @@ fn test_bytes_rev_iterator() {
];
let mut pos = v.len();
for s.bytes_rev_iter().advance |b| {
for s.byte_rev_iter().advance |b| {
pos -= 1;
assert_eq!(b, v[pos]);
}
}
#[test]
fn test_char_offset_iterator() {
use iterator::*;
let s = "ศไทย中华Việt Nam";
let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
let mut pos = 0;
let mut it = s.char_offset_iter();
for it.advance |c| {
assert_eq!(c, (p[pos], v[pos]));
pos += 1;
}
assert_eq!(pos, v.len());
assert_eq!(pos, p.len());
}
#[test]
fn test_char_offset_rev_iterator() {
use iterator::*;
let s = "ศไทย中华Việt Nam";
let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
let mut pos = 0;
let mut it = s.char_offset_rev_iter();
for it.advance |c| {
assert_eq!(c, (p[pos], v[pos]));
pos += 1;
}
assert_eq!(pos, v.len());
assert_eq!(pos, p.len());
}
#[test]
fn test_split_char_iterator() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
@@ -3446,6 +3344,7 @@ fn test_split_char_iterator() {
let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
}
#[test]
fn test_splitn_char_iterator() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+1 -1
View File
@@ -115,7 +115,7 @@ unsafe fn to_ascii_nocheck(&self) -> &'self[Ascii] {
#[inline]
fn is_ascii(&self) -> bool {
self.bytes_iter().all(|b| b.is_ascii())
self.byte_iter().all(|b| b.is_ascii())
}
}
+1 -1
View File
@@ -27,7 +27,7 @@ pub fn expand_syntax_ext(cx: @ExtCtxt, sp: span, tts: &[ast::token_tree]) -> bas
ast::expr_lit(lit) => match lit.node {
// string literal, push each byte to vector expression
ast::lit_str(s) => {
for s.bytes_iter().advance |byte| {
for s.byte_iter().advance |byte| {
bytes.push(cx.expr_u8(sp, byte));
}
}
+1 -1
View File
@@ -16,7 +16,7 @@ pub fn main() {
assert_eq!(y, 6);
let s = ~"hello there";
let mut i: int = 0;
for s.bytes_iter().advance |c| {
for s.byte_iter().advance |c| {
if i == 0 { assert!((c == 'h' as u8)); }
if i == 1 { assert!((c == 'e' as u8)); }
if i == 2 { assert!((c == 'l' as u8)); }
+1 -1
View File
@@ -41,7 +41,7 @@ pub fn main() {
fn check_str_eq(a: ~str, b: ~str) {
let mut i: int = 0;
for a.bytes_iter().advance |ab| {
for a.byte_iter().advance |ab| {
info!(i);
info!(ab);
let bb: u8 = b[i];