Skip to content

Commit

Permalink
Switched over a bunch of splitting funktions to non-allocating iterators
Browse files Browse the repository at this point in the history
  • Loading branch information
Kimundi committed Mar 26, 2013
1 parent d74606e commit b9de2b5
Show file tree
Hide file tree
Showing 13 changed files with 135 additions and 98 deletions.
8 changes: 4 additions & 4 deletions src/libcore/num/strconv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ impl_NumStrConv_Integer!(u64)


// Special value strings as [u8] consts.
const inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8];
const positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, 'n' as u8, 'f' as u8];
const negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, 'n' as u8, 'f' as u8];
const nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8];
static inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8];
static positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, 'n' as u8, 'f' as u8];
static negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, 'n' as u8, 'f' as u8];
static nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8];

/**
* Converts a number to its string representation as a byte vector.
Expand Down
3 changes: 2 additions & 1 deletion src/libcore/os.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ pub fn env() -> ~[(~str,~str)] {
fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] {
let mut pairs = ~[];
for input.each |p| {
let vs = str::splitn_char(*p, '=', 1);
let mut vs = ~[];
for str::each_splitn_char(*p, '=', 1) |s| { vs.push(s.to_owned()) }
debug!("splitting: len: %u",
vs.len());
fail_unless!(vs.len() == 2);
Expand Down
33 changes: 22 additions & 11 deletions src/libcore/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,8 @@ impl ToStr for PosixPath {
impl GenericPath for PosixPath {

fn from_str(s: &str) -> PosixPath {
let mut components = str::split_nonempty(s, |c| c == '/');
let mut components = ~[];
for str::each_split_nonempty(s, |c| c == '/') |s| { components.push(s.to_owned()) }
let is_absolute = (s.len() != 0 && s[0] == '/' as u8);
return PosixPath { is_absolute: is_absolute,
components: components }
Expand Down Expand Up @@ -504,9 +505,10 @@ impl GenericPath for PosixPath {
fn push_many(&self, cs: &[~str]) -> PosixPath {
let mut v = copy self.components;
for cs.each |e| {
let mut ss = str::split_nonempty(
*e,
|c| windows::is_sep(c as u8));
let mut ss = ~[];
for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| {
ss.push(s.to_owned())
}
unsafe { v.push_all_move(ss); }
}
PosixPath { is_absolute: self.is_absolute,
Expand All @@ -515,7 +517,10 @@ impl GenericPath for PosixPath {

fn push(&self, s: &str) -> PosixPath {
let mut v = copy self.components;
let mut ss = str::split_nonempty(s, |c| windows::is_sep(c as u8));
let mut ss = ~[];
for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| {
ss.push(s.to_owned())
}
unsafe { v.push_all_move(ss); }
PosixPath { components: v, ..copy *self }
}
Expand Down Expand Up @@ -590,8 +595,10 @@ impl GenericPath for WindowsPath {
}
}

let mut components =
str::split_nonempty(rest, |c| windows::is_sep(c as u8));
let mut components = ~[];
for str::each_split_nonempty(rest, |c| windows::is_sep(c as u8)) |s| {
components.push(s.to_owned())
}
let is_absolute = (rest.len() != 0 && windows::is_sep(rest[0]));
return WindowsPath { host: host,
device: device,
Expand Down Expand Up @@ -759,9 +766,10 @@ impl GenericPath for WindowsPath {
fn push_many(&self, cs: &[~str]) -> WindowsPath {
let mut v = copy self.components;
for cs.each |e| {
let mut ss = str::split_nonempty(
*e,
|c| windows::is_sep(c as u8));
let mut ss = ~[];
for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| {
ss.push(s.to_owned())
}
unsafe { v.push_all_move(ss); }
}
// tedious, but as-is, we can't use ..self
Expand All @@ -775,7 +783,10 @@ impl GenericPath for WindowsPath {

fn push(&self, s: &str) -> WindowsPath {
let mut v = copy self.components;
let mut ss = str::split_nonempty(s, |c| windows::is_sep(c as u8));
let mut ss = ~[];
for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| {
ss.push(s.to_owned())
}
unsafe { v.push_all_move(ss); }
return WindowsPath { components: v, ..copy *self }
}
Expand Down
4 changes: 3 additions & 1 deletion src/libcore/rand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,9 @@ impl RngUtil for @Rng {
*/
fn gen_char_from(&self, chars: &str) -> char {
fail_unless!(!chars.is_empty());
self.choose(str::chars(chars))
let mut cs = ~[];
for str::each_char(chars) |c| { cs.push(c) }
self.choose(cs)
}

/// Return a random bool
Expand Down
114 changes: 61 additions & 53 deletions src/libcore/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ pub fn each_split_char_nonempty(s: &str, sep: char, it: &fn(&str) -> bool) {
}

fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
allow_trailing_empty: bool), it: &fn(&str) -> bool) {
allow_trailing_empty: bool, it: &fn(&str) -> bool) {
if sep < 128u as char {
let b = sep as u8, l = len(s);
let mut done = 0u;
Expand Down Expand Up @@ -513,8 +513,8 @@ pub fn each_split_nonempty(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> b
each_split_inner(s, sepfn, len(s), false, false, it)
}

pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
allow_empty: bool, allow_trailing_empty: bool), it: &fn(&str) -> bool) {
fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
allow_empty: bool, allow_trailing_empty: bool, it: &fn(&str) -> bool) {
let l = len(s);
let mut i = 0u, start = 0u, done = 0u;
while i < l && done < count {
Expand All @@ -534,7 +534,7 @@ pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
}

// See Issue #1932 for why this is a naive search
fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) {
let sep_len = len(sep), l = len(s);
fail_unless!(sep_len > 0u);
let mut i = 0u, match_start = 0u, match_i = 0u;
Expand All @@ -545,7 +545,7 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
match_i += 1u;
// Found a match
if match_i == sep_len {
f(match_start, i + 1u);
if !f(match_start, i + 1u) { return; }
match_i = 0u;
}
i += 1u;
Expand All @@ -561,10 +561,10 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
}
}

fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) {
let mut last_end = 0u;
do iter_matches(s, sep) |from, to| {
f(last_end, from);
for iter_matches(s, sep) |from, to| {
if !f(last_end, from) { return; }
last_end = to;
}
f(last_end, len(s));
Expand All @@ -580,13 +580,13 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
* ~~~
*/
pub fn each_split_str(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) {
do iter_between_matches(s, sep) |from, to| {
for iter_between_matches(s, sep) |from, to| {
if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; }
}
}

pub fn each_split_str_nonempty(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) {
do iter_between_matches(s, sep) |from, to| {
for iter_between_matches(s, sep) |from, to| {
if to > from {
if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; }
}
Expand Down Expand Up @@ -630,7 +630,7 @@ pub fn levdistance(s: &str, t: &str) -> uint {
/**
* Splits a string into a vector of the substrings separated by LF ('\n').
*/
pub fn each_line(s: &str, it: &fn(&str) -> bool) { each_split_char(s, '\n', it) }
pub fn each_line(s: &str, it: &fn(&str) -> bool) { each_split_char_no_trailing(s, '\n', it) }

/**
* Splits a string into a vector of the substrings separated by LF ('\n')
Expand All @@ -656,52 +656,56 @@ pub fn each_word(s: &str, it: &fn(&str) -> bool) {
* each of which is less bytes long than a limit
*/
pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) {
let words = str::words(ss);

// empty?
if words == ~[] { return ~[]; }

let mut rows : ~[~str] = ~[];
let mut row : ~str = ~"";

for words.each |wptr| {
let word = copy *wptr;

// if adding this word to the row would go over the limit,
// then start a new row
if row.len() + word.len() + 1 > lim {
rows.push(copy row); // save previous row
row = word; // start a new one
} else {
if row.len() > 0 { row += ~" " } // separate words
row += word; // append to this row
}
// Just for fun, let's write this as an automaton
enum SplitWithinState {
A, // Leading whitespace, initial state
B, // Words
C, // Internal and trailing whitespace
}
enum Whitespace { Ws, Cr }
enum LengthLimit { UnderLim, OverLim }

// save the last row
if row != ~"" { rows.push(row); }
let mut slice_start = 0;
let mut last_start = 0;
let mut last_end = 0;
let mut state = A;

rows
// NOTE: Finish change here
let mut cont = true;
let slice = || { cont = it(ss.slice(slice_start, last_end)) };

let mut last_slice_i = 0, last_word_i = 0, word_start = true;
for each_chari(s) |i, c| {
if (i - last_slice_i) <= lim {
if char::is_whitespace(c) {
let machine = |i: uint, c: char| {
let whitespace = if char::is_whitespace(c) { Ws } else { Cr };
let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };

} else {
state = match (state, whitespace, limit) {
(A, Ws, _) => { A }
(A, Cr, _) => { slice_start = i; last_start = i; B }

}
} else {
(B, Cr, UnderLim) => { B }
(B, Cr, OverLim) if (i - last_start + 1) > lim
=> { fail!(~"word longer than limit!") }
(B, Cr, OverLim) => { slice(); slice_start = last_start; B }
(B, Ws, UnderLim) => { last_end = i; C }
(B, Ws, OverLim) => { last_end = i; slice(); A }
}
(C, Cr, UnderLim) => { last_start = i; B }
(C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
(C, Ws, OverLim) => { slice(); A }
(C, Ws, UnderLim) => { C }
};
cont
};
str::each_chari(ss, machine);
// Let the automaton 'run out'
let mut fake_i = ss.len();
while cont && match state { B | C => true, A => false } {
machine(fake_i, ' ');
fake_i += 1;
}
}


/// Convert a string to lowercase. ASCII only
pub fn to_lower(s: &str) -> ~str {
map(s,
Expand Down Expand Up @@ -731,7 +735,7 @@ pub fn to_upper(s: &str) -> ~str {
*/
pub fn replace(s: &str, from: &str, to: &str) -> ~str {
let mut result = ~"", first = true;
do iter_between_matches(s, from) |start, end| {
for iter_between_matches(s, from) |start, end| {
if first {
first = false;
} else {
Expand Down Expand Up @@ -2286,9 +2290,9 @@ pub trait StrSlice {
fn len(&self) -> uint;
fn char_len(&self) -> uint;
fn slice(&self, begin: uint, end: uint) -> &'self str;
fn split(&self, sepfn: &fn(char) -> bool) -> ~[~str];
fn split_char(&self, sep: char) -> ~[~str];
fn split_str(&self, sep: &'a str) -> ~[~str];
fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool);
fn each_split_char(&self, sep: char, it: &fn(&str) -> bool);
fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool);
fn starts_with(&self, needle: &'a str) -> bool;
fn substr(&self, begin: uint, n: uint) -> &'self str;
fn to_lower(&self) -> ~str;
Expand Down Expand Up @@ -2408,20 +2412,24 @@ impl StrSlice for &'self str {
}
/// Splits a string into substrings using a character function
#[inline]
fn split(&self, sepfn: &fn(char) -> bool) -> ~[~str] {
split(*self, sepfn)
fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) {
each_split(*self, sepfn, it)
}
/**
* Splits a string into substrings at each occurrence of a given character
*/
#[inline]
fn split_char(&self, sep: char) -> ~[~str] { split_char(*self, sep) }
fn each_split_char(&self, sep: char, it: &fn(&str) -> bool) {
each_split_char(*self, sep, it)
}
/**
* Splits a string into a vector of the substrings separated by a given
* string
*/
#[inline]
fn split_str(&self, sep: &'a str) -> ~[~str] { split_str(*self, sep) }
fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool) {
each_split_str(*self, sep, it)
}
/// Returns true if one string starts with another
#[inline]
fn starts_with(&self, needle: &'a str) -> bool {
Expand Down
4 changes: 3 additions & 1 deletion src/librustc/metadata/cstore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ pub fn get_used_libraries(cstore: @mut CStore) -> ~[~str] {
}

pub fn add_used_link_args(cstore: @mut CStore, args: &str) {
cstore.used_link_args.push_all(args.split_char(' '));
for args.each_split_char(' ') |s| {
cstore.used_link_args.push(s.to_owned());
}
}

pub fn get_used_link_args(cstore: @mut CStore) -> ~[~str] {
Expand Down
5 changes: 3 additions & 2 deletions src/librustc/middle/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ use syntax::visit::{visit_mod, visit_ty, vt};
use syntax::opt_vec::OptVec;

use core::option::{Some, get, is_some, is_none};
use core::str::{connect, split_str};
use core::str::{connect, each_split_str};
use core::hashmap::linear::LinearMap;
use std::oldmap::HashMap;

Expand Down Expand Up @@ -1696,7 +1696,8 @@ pub impl Resolver {
entry: %s (%?)",
path_string, def_like);

let mut pieces = split_str(path_string, ~"::");
let mut pieces = ~[];
for each_split_str(path_string, "::") |s| { pieces.push(s.to_owned()) }
let final_ident_str = pieces.pop();
let final_ident = self.session.ident_of(final_ident_str);

Expand Down
Loading

0 comments on commit b9de2b5

Please sign in to comment.