From 3129b17aded7fcedc99f3bf97ec4ad6f28e0cf3b Mon Sep 17 00:00:00 2001
From: Nick Johnson <arachnid@notdot.net>
Date: Sun, 22 May 2016 13:27:57 +0100
Subject: [PATCH] Updated readme; deleted stringutils

---
 README.md            | 131 ++++++++++++++++++++++++------
 StringUtils.sol      | 187 -------------------------------------------
 StringUtils_test.sol |  90 ---------------------
 3 files changed, 106 insertions(+), 302 deletions(-)
 delete mode 100644 StringUtils.sol
 delete mode 100644 StringUtils_test.sol

diff --git a/README.md b/README.md
index dec83eee..750a4ba1 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 
 # String & slice utility library for Solidity
+## Overview
 Functionality in this library is largely implemented using an abstraction called a 'slice'. A slice represents a part of a string - anything from the entire string to a single character, or even no characters at all (a 0-length slice). Since a slice only has to specify an offset and a length, copying and manipulating slices is a lot less expensive than copying and manipulating the strings they reference.
 
 To further reduce gas costs, most functions on slice that need to return a slice modify the original one instead of allocating a new one; for instance, `s.split(".")` will return the text up to the first '.', modifying s to only contain the remainder of the string after the '.'. In situations where you do not want to modify the original slice, you can make a copy first with `.copy()`, for example: `s.copy().split(".")`. Try and avoid using this idiom in loops; since Solidity has no memory management, it will result in allocating many short-lived slices that are later discarded.
@@ -8,7 +9,87 @@ Functions that return two slices come in two versions: a non-allocating version
 
 Functions that have to copy string data will return strings rather than slices; these can be cast back to slices for further processing if required.
 
-## toSlice(string self) internal returns (slice)
+## Examples
+### Basic usage
+    import "github.com/Arachnid/solidity-stringutils/strings.sol";
+
+    contract Contract {
+        using strings for *;
+
+        // ...
+    }
+
+### Getting the character length of a string
+    var len = "Unicode snowman ☃".toSlice().len(); // 17
+
+### Splitting a string around a delimiter
+    var s = "foo bar baz".toSlice();
+    var foo = s.split(" ".toSlice());
+
+After the above code executes, `s` is now "bar baz", and `foo` is now "foo".
+
+### Splitting a string into an array
+    var s = "www.google.com".toSlice();
+    var delim = ".".toSlice();
+    var parts = new strings.slice[](s.count(delim));
+    for(uint i = 0; i < parts.length; i++) {
+        parts[i] = s.split(delim).toString();
+    }
+
+### Extracting the middle part of a string
+    var s = "www.google.com".toSlice();
+    strings.slice memory part;
+    s.split(".".toSlice(), part); // part and return value is "www"
+    s.split(".".toSlice(), part); // part and return value is "google"
+
+This approach uses less memory than the above, by reusing the slice `part` for each section of string extracted.
+
+### Converting a slice back to a string
+    var myString = mySlice.toString();
+
+### Finding and returning the first occurrence of a substring
+    var s = "A B C B D".toSlice();
+    s.find("B".toSlice()); // "B C B D"
+
+`find` modifies `s` to contain the part of the string from the first match onwards.
+
+### Finding and returning the last occurrence of a substring
+    var s = "A B C B D".toSlice();
+    s.rfind("B".toSlice()); // "A B C B"
+
+`rfind` modifies `s` to contain the part of the string from the last match back to the start.
+
+### Finding without modifying the original slice.
+    var s = "A B C B D".toSlice();
+    var substring = s.copy().rfind("B".toSlice()); // "A B C B"
+
+`copy` lets you cheaply duplicate a slice so you don't modify the original.
+
+### Prefix and suffix matching
+    var s = "A B C B D".toSlice();
+    s.startsWith("A".toSlice()); // True
+    s.endsWith("D".toSlice()); // True
+    s.startsWith("B".toSlice()); // False
+
+### Removing a prefix or suffix
+    var s = "A B C B D".toSlice();
+    s.beyond("A ".toSlice()).until(" D".toSlice()); // "B C B"
+
+`beyond` modifies `s` to contain the text after its argument; `until` modifies `s` to contain the text up to its argument. If the argument isn't found, `s` is unmodified.
+
+### Finding and returning the string up to the first match
+    var s = "A B C B D".toSlice();
+    var needle = "B".toSlice();
+    var substring = s.until(s.copy().find(needle).beyond(needle));
+
+Calling `find` on a copy of `s` returns the part of the string from `needle` onwards; calling `.beyond(needle)` removes `needle` as a prefix, and finally calling `s.until()` removes the entire end of the string, leaving everything up to and including the first match.
+
+### Concatenating strings
+    var s = "abc".toSlice().concat("def".toSlice()); // "abcdef"
+
+## Reference
+
+### toSlice(string self) internal returns (slice)
 Returns a slice containing the entire string.
 
 Arguments:
@@ -17,7 +98,7 @@ Arguments:
 
 Returns A newly allocated slice containing the entire string.
          
-## copy(slice self) internal returns (slice)
+### copy(slice self) internal returns (slice)
 Returns a new slice containing the same data as the current slice.
 
 Arguments:
@@ -26,7 +107,7 @@ Arguments:
 
 Returns A new slice containing the same data as `self`.
      
-## toString(slice self) internal returns (string)
+### toString(slice self) internal returns (string)
     
 Copies a slice to a new string.
 
@@ -36,7 +117,7 @@ Arguments:
 
 Returns A newly allocated string containing the slice's text.
      
-## len(slice self) internal returns (uint)
+### len(slice self) internal returns (uint)
 
 Returns the length in runes of the slice. Note that this operation takes time proportional to the length of the slice; avoid using it in loops, and call `slice.empty()` if you only need to know whether the slice is empty or not.
 
@@ -46,7 +127,7 @@ Arguments:
 
 Returns The length of the slice in runes.
      
-## empty(slice self) internal returns (bool)
+### empty(slice self) internal returns (bool)
     
 Returns true if the slice is empty (has a length of 0).
 
@@ -56,7 +137,7 @@ Arguments:
 
 Returns True if the slice is empty, False otherwise.
      
-## compare(slice self, slice other) internal returns (int)
+### compare(slice self, slice other) internal returns (int)
 
 Returns a positive number if `other` comes lexicographically after `self`, a negative number if it comes before, or zero if the contents of the two slices are equal. Comparison is done per-rune, on unicode codepoints.
 
@@ -67,7 +148,7 @@ Arguments:
 
 Returns The result of the comparison.
      
-## equals(slice self, slice other) internal returns (bool)
+### equals(slice self, slice other) internal returns (bool)
     
 Returns true if the two slices contain the same text.
 
@@ -78,7 +159,7 @@ Arguments:
 
 Returns True if the slices are equal, false otherwise.
      
-## nextRune(slice self, slice rune) internal returns (slice)
+### nextRune(slice self, slice rune) internal returns (slice)
     
 Extracts the first rune in the slice into `rune`, advancing the slice to point to the next rune and returning `self`.
 
@@ -89,7 +170,7 @@ Arguments:
 
 Returns `rune`.
      
-## nextRune(slice self) internal returns (slice ret)
+### nextRune(slice self) internal returns (slice ret)
     
 Returns the first rune in the slice, advancing the slice to point to the next rune.
 
@@ -99,7 +180,7 @@ Arguments:
 
 Returns A slice containing only the first rune from `self`.
      
-## ord(slice self) internal returns (uint ret)
+### ord(slice self) internal returns (uint ret)
     
 Returns the number of the first codepoint in the slice.
 
@@ -109,7 +190,7 @@ Arguments:
 
 Returns The number of the first codepoint in the slice.
      
-## keccak(slice self) internal returns (bytes32 ret)
+### keccak(slice self) internal returns (bytes32 ret)
     
 Returns the keccak-256 hash of the slice.
 
@@ -119,7 +200,7 @@ Arguments:
 
 Returns The hash of the slice.
      
-## startsWith(slice self, slice needle) internal returns (bool)
+### startsWith(slice self, slice needle) internal returns (bool)
 
 Returns true if `self` starts with `needle`.
 
@@ -130,7 +211,7 @@ Arguments:
 
 Returns True if the slice starts with the provided text, false otherwise.
      
-## beyond(slice self, slice needle) internal returns (slice)
+### beyond(slice self, slice needle) internal returns (slice)
     
 If `self` starts with `needle`, `needle` is removed from the beginning of `self`. Otherwise, `self` is unmodified.
 
@@ -141,7 +222,7 @@ Arguments:
 
 Returns `self`
      
-## endsWith(slice self, slice needle) internal returns (bool)
+### endsWith(slice self, slice needle) internal returns (bool)
     
 Returns true if the slice ends with `needle`.
 
@@ -152,7 +233,7 @@ Arguments:
 
 Returns True if the slice starts with the provided text, false otherwise.
      
-## until(slice self, slice needle) internal returns (slice)
+### until(slice self, slice needle) internal returns (slice)
     
 If `self` ends with `needle`, `needle` is removed from the end of `self`. Otherwise, `self` is unmodified.
 
@@ -163,7 +244,7 @@ Arguments:
 
 Returns `self`
      
-## find(slice self, slice needle) internal returns (slice)
+### find(slice self, slice needle) internal returns (slice)
     
 Modifies `self` to contain everything from the first occurrence of `needle` to the end of the slice. `self` is set to the empty slice if `needle` is not found.
 
@@ -174,7 +255,7 @@ Arguments:
 
 Returns `self`.
      
-## rfind(slice self, slice needle) internal returns (slice)
+### rfind(slice self, slice needle) internal returns (slice)
     
 Modifies `self` to contain the part of the string from the start of `self` to the end of the first occurrence of `needle`. If `needle` is not found, `self` is set to the empty slice.
 
@@ -185,7 +266,7 @@ Arguments:
 
 Returns `self`.
      
-## split(slice self, slice needle, slice token) internal returns (slice)
+### split(slice self, slice needle, slice token) internal returns (slice)
     
 Splits the slice, setting `self` to everything after the first occurrence of `needle`, and `token` to everything before it. If `needle` does not occur in `self`, `self` is set to the empty slice, and `token` is set to the entirety of `self`.
 
@@ -197,7 +278,7 @@ Arguments:
 
 Returns `token`.
      
-## split(slice self, slice needle) internal returns (slice token)
+### split(slice self, slice needle) internal returns (slice token)
     
 Splits the slice, setting `self` to everything after the first occurrence of `needle`, and returning everything before it. If `needle` does not occur in `self`, `self` is set to the empty slice, and the entirety of `self` is returned.
 
@@ -208,7 +289,7 @@ Arguments:
 
 Returns The part of `self` up to the first occurrence of `delim`.
      
-## rsplit(slice self, slice needle, slice token) internal returns (slice)
+### rsplit(slice self, slice needle, slice token) internal returns (slice)
     
 Splits the slice, setting `self` to everything before the last occurrence of `needle`, and `token` to everything after it. If `needle` does not occur in `self`, `self` is set to the empty slice, and `token` is set to the entirety of `self`.
 
@@ -220,7 +301,7 @@ Arguments:
 
 Returns `token`.
      
-## rsplit(slice self, slice needle) internal returns (slice token)
+### rsplit(slice self, slice needle) internal returns (slice token)
     
 Splits the slice, setting `self` to everything before the last occurrence of `needle`, and returning everything after it. If `needle` does not occur in `self`, `self` is set to the empty slice, and the entirety of `self` is returned.
 
@@ -231,7 +312,7 @@ Arguments:
 
 Returns The part of `self` after the last occurrence of `delim`.
      
-## count(slice self, slice needle) internal returns (uint count)
+### count(slice self, slice needle) internal returns (uint count)
     
 Counts the number of nonoverlapping occurrences of `needle` in `self`.
 
@@ -242,7 +323,7 @@ Arguments:
 
 Returns The number of occurrences of `needle` found in `self`.
      
-## contains(slice self, slice needle) internal returns (bool)
+### contains(slice self, slice needle) internal returns (bool)
     
 Returns True if `self` contains `needle`.
 
@@ -253,7 +334,7 @@ Arguments:
 
 Returns True if `needle` is found in `self`, false otherwise.
      
-## concat(slice self, slice other) internal returns (string)
+### concat(slice self, slice other) internal returns (string)
     
 Returns a newly allocated string containing the concatenation of `self` and `other`.
 
@@ -264,7 +345,7 @@ Arguments:
 
 Returns The concatenation of the two strings.
      
-## join(slice self, slice[] parts) internal returns (string)
+### join(slice self, slice[] parts) internal returns (string)
     
 Joins an array of slices, using `self` as a delimiter, returning a newly allocated string.
 
diff --git a/StringUtils.sol b/StringUtils.sol
deleted file mode 100644
index 262047da..00000000
--- a/StringUtils.sol
+++ /dev/null
@@ -1,187 +0,0 @@
-/**
- * @title String utility functions for Solidity contracts.
- * @author Nick Johnson <arachnid@notdot.net>
- *
- * @dev All functions are UTF-8 friendly, if input strings are valid UTF-8.
- *      Offsets and sizes are specified in bytes, not characters, and so will
- *      not respect UTF-8 character boundaries; be careful to only pass values
- *      that you know are between characters.
- */
-contract StringUtils {
-    function readWord(bytes a, uint idx) private returns (bytes32 word) {
-        assembly {
-            word := mload(add(add(a, idx), 32))
-        }
-    }
-    
-    /**
-     * @dev Compares two strings, returning a negative number if a is smaller,
-     *      a positive number if a is larger, and zero if the strings are equal.
-     * @param a The first string to compare.
-     * @param b The second string to compare.
-     * @return An integer whose sign indicates the value of the comparison.
-     */
-    function strcmp(string a, string b) internal returns (int) {
-        uint shortest = bytes(a).length;
-        if (bytes(b).length < bytes(a).length)
-            shortest = bytes(b).length;
-
-        for (uint idx = 0; idx < shortest; idx += 32) {
-            var diff = int(
-                uint(readWord(bytes(a), idx)) - uint(readWord(bytes(b), idx)));
-            if (diff != 0)
-                return diff;
-        }
-        return int(bytes(a).length - bytes(b).length);
-    }
-
-    /**
-     * @dev Finds the first occurrence of a substring in a string, returning its
-     *      index, or -1 if the substring is not found.
-     * @param haystack The string to search.
-     * @param needle The string to look for.
-     * @param idx The string index at which to start searching.
-     * @return The index of the first character of the substring, or -1 if not
-     *         found.
-     */
-    function strstr(string haystack, string needle, uint idx) internal
-        returns (int)
-    {
-        uint needleSize = bytes(needle).length;
-        bytes32 hash;
-        assembly {
-            hash := sha3(add(needle, 32), needleSize)
-        }
-        for (; idx <= bytes(haystack).length - needleSize; idx++) {
-            bytes32 testHash;
-            assembly {
-                testHash := sha3(add(add(haystack, idx), 32), needleSize)
-            }
-            if (hash == testHash)
-                return int(idx);
-        }
-        return -1;
-    }
-    
-    /**
-     * @dev Finds the last occurrence of a substring in a string, returning its
-     *      index, or -1 if the substring is not found.
-     * @param haystack The string to search.
-     * @param needle The string to look for.
-     * @param idx The string index at which to start searching.
-     * @return The index of the first character of the substring, or -1 if not
-     *         found.
-     */
-    function strrstr(string haystack, string needle, uint idx) internal
-        returns (int)
-    {
-        uint needleSize = bytes(needle).length;
-        bytes32 hash;
-        assembly {
-            hash := sha3(add(needle, 32), needleSize)
-        }
-        for (int i = int(idx); i >= 0; i--) {
-            bytes32 testHash;
-            assembly {
-                testHash := sha3(add(add(haystack, i), 32), needleSize)
-            }
-            if (hash == testHash)
-                return i;
-        }
-        return -1;
-    }
-
-    /**
-     * @dev Copies part of one string into another. If the requested range
-     *      extends past the end of the source or target strings, the range will
-     *      be truncated. If src and dest are the same, the ranges must either
-     *      not overlap, or idx must be less than start.
-     * @param dest The destination string to copy into.
-     * @param idx The start index in the destination string.
-     * @param src The string to copy from.
-     * @param start The index into the source string to start copying.
-     * @param len The number of bytes to copy.
-     */
-    function strncpy(string dest, uint idx, string src, uint start, uint len)
-        internal
-    {
-        if (idx + len > bytes(dest).length)
-            len = bytes(dest).length - idx;
-        if (start > bytes(src).length)
-            return;
-        if (start + len > bytes(src).length)
-            len = bytes(src).length - start;
-
-        // From here, we treat idx and start as memory offsets for dest and idx.
-        // Skip over the first word, which contains the length of each string.
-        idx += 32;
-        start += 32;
-
-        // Copy word-length chunks while possible
-        for(; len >= 32; len -= 32) {
-            assembly {
-                mstore(add(dest, idx), mload(add(src, start)))
-            }
-            idx += 32;
-            start += 32;
-        }
-
-        // Copy remaining bytes
-        uint mask = 256 ** (32 - len) - 1;
-        assembly {
-            let destaddr := add(dest, idx)
-            let srcpart := and(mload(add(src, start)), bnot(mask))
-            let destpart := and(mload(destaddr), mask)
-            mstore(destaddr, or(destpart, srcpart))
-        }
-    }
-    
-    /**
-     * @dev Returns a substring starting at idx and continuing until the first
-     *      occurrence of delim. If delim is not found, returns the remainder of
-     *      the string.
-     * @param str The string to return a substring of.
-     * @param delim The delimiter to search for.
-     * @param idx The start index.
-     * @return A newly allocated string consisting of bytes between idx and the
-     *         first occurrence of delim.
-     */
-    function strsep(string str, string delim, uint idx) internal
-        returns (string ret)
-    {
-        int endIdx = strstr(str, delim, idx);
-        if (endIdx == -1) {
-            endIdx = int(bytes(str).length);
-        }
-        ret = new string(uint(endIdx) - idx);
-        strncpy(ret, 0, str, idx, uint(endIdx) - idx);
-    }
-
-    /**
-     * @dev Returns the length of a string, in characters.
-     * @param str The string to return the length of.
-     * @return The length of the string, in characters.
-     */
-    function strchrlen(string str) internal returns (uint len) {
-        bytes memory strdata = bytes(str);
-        for (uint i = 0; i < strdata.length; i++)
-            // Don't count continuation bytes, of the form 0b10xxxxxx
-            if (strdata[i] & 0xC0 != 0x80)
-                len += 1;
-    }
-
-    /**
-     * @dev Cheaply computes the SHA3 hash of a substring.
-     * @param str The string to hash (part of).
-     * @param idx The start index for the section to hash.
-     * @param len The number of bytes to hash.
-     * @return The SHA3 sum of the selected substring.
-     */
-    function sha3_substring(string str, uint idx, uint len)
-        internal returns (bytes32 ret)
-    {
-        assembly {
-            ret := sha3(add(add(str, 32), idx), len)
-        }
-    }
-}
diff --git a/StringUtils_test.sol b/StringUtils_test.sol
deleted file mode 100644
index 9824ae5a..00000000
--- a/StringUtils_test.sol
+++ /dev/null
@@ -1,90 +0,0 @@
-import 'dapple/test.sol';
-import 'StringUtils.sol';
-
-contract StringUtilsTest is Test, StringUtils {
-    function abs(int x) returns (int) {
-        if(x < 0)
-            return -x;
-        return x;
-    }
-
-    function sign(int x) returns (int) {
-        return x/abs(x);
-    }
-
-    function assertEq(string a, string b) {
-        assertEq(strcmp(a, b), 0);
-    }
-
-    function assertEq(bytes32 a, bytes32 b) {
-        assertEq(uint(a), uint(b));
-    }
-
-    function testStrcmp() logs_gas {
-        assertEq(sign(strcmp("foobie", "foobie")), 0);
-        assertEq(sign(strcmp("foobie", "foobif")), -1);
-        assertEq(sign(strcmp("foobie", "foobid")), 1);
-        assertEq(sign(strcmp("foobie", "foobies")), -1);
-        assertEq(sign(strcmp("foobie", "foobi")), 1);
-        assertEq(sign(strcmp("foobie", "doobie")), 1);
-        assertEq(sign(strcmp("01234567890123456789012345678901", "012345678901234567890123456789012")), -1);
-    }
-
-    function testStrstr() logs_gas {
-        assertEq(strstr("abracadabra", "bra", 0), 1);
-        assertEq(strstr("abracadabra", "bra", 2), 8);
-        assertEq(strstr("abracadabra", "rab", 0), -1);
-        assertEq(strstr("ABC ABCDAB ABCDABCDABDE", "ABCDABD", 0), 15);
-    }
-
-    function testStrrstr() logs_gas {
-        assertEq(strrstr("abracadabra", "bra", 8), 8);
-        assertEq(strrstr("abracadabra", "bra", 7), 1);
-        assertEq(strrstr("abracadabra", "rab", 11), -1);
-        assertEq(strrstr("ABC ABCDAB ABCDABCDABDE", "ABCDABD", 16), 15);
-    }
-
-    function testStrncpy() logs_gas {
-        string memory target = "0123456789";
-        
-        // Basic nonoverlapping copy
-        strncpy(target, 0, target, 5, 5);
-        assertEq(target, "5678956789");
-
-        // Truncate input range
-        strncpy(target, 0, target, 8, 5);
-        assertEq(target, "8978956789");
-
-        // Truncate output range
-        strncpy(target, 8, target, 1, 5);
-        assertEq(target, "8978956797");
-
-        // Overlapping copy
-        strncpy(target, 0, target, 2, 8);
-        assertEq(target, "7895679797");
-
-        // Copy a longer string
-        string memory longer = "0123456789012345678901234567890123456789012345";
-        strncpy(longer, 0, longer, 1, 45);
-        assertEq(longer, "1234567890123456789012345678901234567890123455");
-    }
-
-    function testStrsep() logs_gas {
-        assertEq(strsep("www.google.com", ".", 0), "www");
-        assertEq(strsep("www.google.com", ".", 4), "google");
-        assertEq(strsep("www.google.com", ".", 11), "com");
-        assertEq(strsep("www.google.com", ".", 15), "");
-        assertEq(strsep("foo->bar->baz", "->", 0), "foo");      
-        assertEq(strsep("foo->bar->baz", "->", 5), "bar");      
-    }
-
-    function testStrchrlen() logs_gas {
-        assertEq(strchrlen(""), 0);
-        assertEq(strchrlen("foobar"), 6);
-        assertEq(strchrlen("I ♥ ethereum"), 12);
-    }
-
-    function testSha3Substring() logs_gas {
-        assertEq(sha3_substring("Hello, world!", 7, 5), sha3("world"));
-    }
-}