From 3129b17aded7fcedc99f3bf97ec4ad6f28e0cf3b Mon Sep 17 00:00:00 2001 From: Nick Johnson Date: Sun, 22 May 2016 13:27:57 +0100 Subject: [PATCH] Updated readme; deleted stringutils --- README.md | 131 ++++++++++++++++++++++++------ StringUtils.sol | 187 ------------------------------------------- StringUtils_test.sol | 90 --------------------- 3 files changed, 106 insertions(+), 302 deletions(-) delete mode 100644 StringUtils.sol delete mode 100644 StringUtils_test.sol diff --git a/README.md b/README.md index dec83eee..750a4ba1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # String & slice utility library for Solidity +## Overview Functionality in this library is largely implemented using an abstraction called a 'slice'. A slice represents a part of a string - anything from the entire string to a single character, or even no characters at all (a 0-length slice). Since a slice only has to specify an offset and a length, copying and manipulating slices is a lot less expensive than copying and manipulating the strings they reference. To further reduce gas costs, most functions on slice that need to return a slice modify the original one instead of allocating a new one; for instance, `s.split(".")` will return the text up to the first '.', modifying s to only contain the remainder of the string after the '.'. In situations where you do not want to modify the original slice, you can make a copy first with `.copy()`, for example: `s.copy().split(".")`. Try and avoid using this idiom in loops; since Solidity has no memory management, it will result in allocating many short-lived slices that are later discarded. @@ -8,7 +9,87 @@ Functions that return two slices come in two versions: a non-allocating version Functions that have to copy string data will return strings rather than slices; these can be cast back to slices for further processing if required. -## toSlice(string self) internal returns (slice) +## Examples +### Basic usage + import "github.com/Arachnid/solidity-stringutils/strings.sol"; + + contract Contract { + using strings for *; + + // ... + } + +### Getting the character length of a string + var len = "Unicode snowman ☃".toSlice().len(); // 17 + +### Splitting a string around a delimiter + var s = "foo bar baz".toSlice(); + var foo = s.split(" ".toSlice()); + +After the above code executes, `s` is now "bar baz", and `foo` is now "foo". + +### Splitting a string into an array + var s = "www.google.com".toSlice(); + var delim = ".".toSlice(); + var parts = new strings.slice[](s.count(delim)); + for(uint i = 0; i < parts.length; i++) { + parts[i] = s.split(delim).toString(); + } + +### Extracting the middle part of a string + var s = "www.google.com".toSlice(); + strings.slice memory part; + s.split(".".toSlice(), part); // part and return value is "www" + s.split(".".toSlice(), part); // part and return value is "google" + +This approach uses less memory than the above, by reusing the slice `part` for each section of string extracted. + +### Converting a slice back to a string + var myString = mySlice.toString(); + +### Finding and returning the first occurrence of a substring + var s = "A B C B D".toSlice(); + s.find("B".toSlice()); // "B C B D" + +`find` modifies `s` to contain the part of the string from the first match onwards. + +### Finding and returning the last occurrence of a substring + var s = "A B C B D".toSlice(); + s.rfind("B".toSlice()); // "A B C B" + +`rfind` modifies `s` to contain the part of the string from the last match back to the start. + +### Finding without modifying the original slice. + var s = "A B C B D".toSlice(); + var substring = s.copy().rfind("B".toSlice()); // "A B C B" + +`copy` lets you cheaply duplicate a slice so you don't modify the original. + +### Prefix and suffix matching + var s = "A B C B D".toSlice(); + s.startsWith("A".toSlice()); // True + s.endsWith("D".toSlice()); // True + s.startsWith("B".toSlice()); // False + +### Removing a prefix or suffix + var s = "A B C B D".toSlice(); + s.beyond("A ".toSlice()).until(" D".toSlice()); // "B C B" + +`beyond` modifies `s` to contain the text after its argument; `until` modifies `s` to contain the text up to its argument. If the argument isn't found, `s` is unmodified. + +### Finding and returning the string up to the first match + var s = "A B C B D".toSlice(); + var needle = "B".toSlice(); + var substring = s.until(s.copy().find(needle).beyond(needle)); + +Calling `find` on a copy of `s` returns the part of the string from `needle` onwards; calling `.beyond(needle)` removes `needle` as a prefix, and finally calling `s.until()` removes the entire end of the string, leaving everything up to and including the first match. + +### Concatenating strings + var s = "abc".toSlice().concat("def".toSlice()); // "abcdef" + +## Reference + +### toSlice(string self) internal returns (slice) Returns a slice containing the entire string. Arguments: @@ -17,7 +98,7 @@ Arguments: Returns A newly allocated slice containing the entire string. -## copy(slice self) internal returns (slice) +### copy(slice self) internal returns (slice) Returns a new slice containing the same data as the current slice. Arguments: @@ -26,7 +107,7 @@ Arguments: Returns A new slice containing the same data as `self`. -## toString(slice self) internal returns (string) +### toString(slice self) internal returns (string) Copies a slice to a new string. @@ -36,7 +117,7 @@ Arguments: Returns A newly allocated string containing the slice's text. -## len(slice self) internal returns (uint) +### len(slice self) internal returns (uint) Returns the length in runes of the slice. Note that this operation takes time proportional to the length of the slice; avoid using it in loops, and call `slice.empty()` if you only need to know whether the slice is empty or not. @@ -46,7 +127,7 @@ Arguments: Returns The length of the slice in runes. -## empty(slice self) internal returns (bool) +### empty(slice self) internal returns (bool) Returns true if the slice is empty (has a length of 0). @@ -56,7 +137,7 @@ Arguments: Returns True if the slice is empty, False otherwise. -## compare(slice self, slice other) internal returns (int) +### compare(slice self, slice other) internal returns (int) Returns a positive number if `other` comes lexicographically after `self`, a negative number if it comes before, or zero if the contents of the two slices are equal. Comparison is done per-rune, on unicode codepoints. @@ -67,7 +148,7 @@ Arguments: Returns The result of the comparison. -## equals(slice self, slice other) internal returns (bool) +### equals(slice self, slice other) internal returns (bool) Returns true if the two slices contain the same text. @@ -78,7 +159,7 @@ Arguments: Returns True if the slices are equal, false otherwise. -## nextRune(slice self, slice rune) internal returns (slice) +### nextRune(slice self, slice rune) internal returns (slice) Extracts the first rune in the slice into `rune`, advancing the slice to point to the next rune and returning `self`. @@ -89,7 +170,7 @@ Arguments: Returns `rune`. -## nextRune(slice self) internal returns (slice ret) +### nextRune(slice self) internal returns (slice ret) Returns the first rune in the slice, advancing the slice to point to the next rune. @@ -99,7 +180,7 @@ Arguments: Returns A slice containing only the first rune from `self`. -## ord(slice self) internal returns (uint ret) +### ord(slice self) internal returns (uint ret) Returns the number of the first codepoint in the slice. @@ -109,7 +190,7 @@ Arguments: Returns The number of the first codepoint in the slice. -## keccak(slice self) internal returns (bytes32 ret) +### keccak(slice self) internal returns (bytes32 ret) Returns the keccak-256 hash of the slice. @@ -119,7 +200,7 @@ Arguments: Returns The hash of the slice. -## startsWith(slice self, slice needle) internal returns (bool) +### startsWith(slice self, slice needle) internal returns (bool) Returns true if `self` starts with `needle`. @@ -130,7 +211,7 @@ Arguments: Returns True if the slice starts with the provided text, false otherwise. -## beyond(slice self, slice needle) internal returns (slice) +### beyond(slice self, slice needle) internal returns (slice) If `self` starts with `needle`, `needle` is removed from the beginning of `self`. Otherwise, `self` is unmodified. @@ -141,7 +222,7 @@ Arguments: Returns `self` -## endsWith(slice self, slice needle) internal returns (bool) +### endsWith(slice self, slice needle) internal returns (bool) Returns true if the slice ends with `needle`. @@ -152,7 +233,7 @@ Arguments: Returns True if the slice starts with the provided text, false otherwise. -## until(slice self, slice needle) internal returns (slice) +### until(slice self, slice needle) internal returns (slice) If `self` ends with `needle`, `needle` is removed from the end of `self`. Otherwise, `self` is unmodified. @@ -163,7 +244,7 @@ Arguments: Returns `self` -## find(slice self, slice needle) internal returns (slice) +### find(slice self, slice needle) internal returns (slice) Modifies `self` to contain everything from the first occurrence of `needle` to the end of the slice. `self` is set to the empty slice if `needle` is not found. @@ -174,7 +255,7 @@ Arguments: Returns `self`. -## rfind(slice self, slice needle) internal returns (slice) +### rfind(slice self, slice needle) internal returns (slice) Modifies `self` to contain the part of the string from the start of `self` to the end of the first occurrence of `needle`. If `needle` is not found, `self` is set to the empty slice. @@ -185,7 +266,7 @@ Arguments: Returns `self`. -## split(slice self, slice needle, slice token) internal returns (slice) +### split(slice self, slice needle, slice token) internal returns (slice) Splits the slice, setting `self` to everything after the first occurrence of `needle`, and `token` to everything before it. If `needle` does not occur in `self`, `self` is set to the empty slice, and `token` is set to the entirety of `self`. @@ -197,7 +278,7 @@ Arguments: Returns `token`. -## split(slice self, slice needle) internal returns (slice token) +### split(slice self, slice needle) internal returns (slice token) Splits the slice, setting `self` to everything after the first occurrence of `needle`, and returning everything before it. If `needle` does not occur in `self`, `self` is set to the empty slice, and the entirety of `self` is returned. @@ -208,7 +289,7 @@ Arguments: Returns The part of `self` up to the first occurrence of `delim`. -## rsplit(slice self, slice needle, slice token) internal returns (slice) +### rsplit(slice self, slice needle, slice token) internal returns (slice) Splits the slice, setting `self` to everything before the last occurrence of `needle`, and `token` to everything after it. If `needle` does not occur in `self`, `self` is set to the empty slice, and `token` is set to the entirety of `self`. @@ -220,7 +301,7 @@ Arguments: Returns `token`. -## rsplit(slice self, slice needle) internal returns (slice token) +### rsplit(slice self, slice needle) internal returns (slice token) Splits the slice, setting `self` to everything before the last occurrence of `needle`, and returning everything after it. If `needle` does not occur in `self`, `self` is set to the empty slice, and the entirety of `self` is returned. @@ -231,7 +312,7 @@ Arguments: Returns The part of `self` after the last occurrence of `delim`. -## count(slice self, slice needle) internal returns (uint count) +### count(slice self, slice needle) internal returns (uint count) Counts the number of nonoverlapping occurrences of `needle` in `self`. @@ -242,7 +323,7 @@ Arguments: Returns The number of occurrences of `needle` found in `self`. -## contains(slice self, slice needle) internal returns (bool) +### contains(slice self, slice needle) internal returns (bool) Returns True if `self` contains `needle`. @@ -253,7 +334,7 @@ Arguments: Returns True if `needle` is found in `self`, false otherwise. -## concat(slice self, slice other) internal returns (string) +### concat(slice self, slice other) internal returns (string) Returns a newly allocated string containing the concatenation of `self` and `other`. @@ -264,7 +345,7 @@ Arguments: Returns The concatenation of the two strings. -## join(slice self, slice[] parts) internal returns (string) +### join(slice self, slice[] parts) internal returns (string) Joins an array of slices, using `self` as a delimiter, returning a newly allocated string. diff --git a/StringUtils.sol b/StringUtils.sol deleted file mode 100644 index 262047da..00000000 --- a/StringUtils.sol +++ /dev/null @@ -1,187 +0,0 @@ -/** - * @title String utility functions for Solidity contracts. - * @author Nick Johnson - * - * @dev All functions are UTF-8 friendly, if input strings are valid UTF-8. - * Offsets and sizes are specified in bytes, not characters, and so will - * not respect UTF-8 character boundaries; be careful to only pass values - * that you know are between characters. - */ -contract StringUtils { - function readWord(bytes a, uint idx) private returns (bytes32 word) { - assembly { - word := mload(add(add(a, idx), 32)) - } - } - - /** - * @dev Compares two strings, returning a negative number if a is smaller, - * a positive number if a is larger, and zero if the strings are equal. - * @param a The first string to compare. - * @param b The second string to compare. - * @return An integer whose sign indicates the value of the comparison. - */ - function strcmp(string a, string b) internal returns (int) { - uint shortest = bytes(a).length; - if (bytes(b).length < bytes(a).length) - shortest = bytes(b).length; - - for (uint idx = 0; idx < shortest; idx += 32) { - var diff = int( - uint(readWord(bytes(a), idx)) - uint(readWord(bytes(b), idx))); - if (diff != 0) - return diff; - } - return int(bytes(a).length - bytes(b).length); - } - - /** - * @dev Finds the first occurrence of a substring in a string, returning its - * index, or -1 if the substring is not found. - * @param haystack The string to search. - * @param needle The string to look for. - * @param idx The string index at which to start searching. - * @return The index of the first character of the substring, or -1 if not - * found. - */ - function strstr(string haystack, string needle, uint idx) internal - returns (int) - { - uint needleSize = bytes(needle).length; - bytes32 hash; - assembly { - hash := sha3(add(needle, 32), needleSize) - } - for (; idx <= bytes(haystack).length - needleSize; idx++) { - bytes32 testHash; - assembly { - testHash := sha3(add(add(haystack, idx), 32), needleSize) - } - if (hash == testHash) - return int(idx); - } - return -1; - } - - /** - * @dev Finds the last occurrence of a substring in a string, returning its - * index, or -1 if the substring is not found. - * @param haystack The string to search. - * @param needle The string to look for. - * @param idx The string index at which to start searching. - * @return The index of the first character of the substring, or -1 if not - * found. - */ - function strrstr(string haystack, string needle, uint idx) internal - returns (int) - { - uint needleSize = bytes(needle).length; - bytes32 hash; - assembly { - hash := sha3(add(needle, 32), needleSize) - } - for (int i = int(idx); i >= 0; i--) { - bytes32 testHash; - assembly { - testHash := sha3(add(add(haystack, i), 32), needleSize) - } - if (hash == testHash) - return i; - } - return -1; - } - - /** - * @dev Copies part of one string into another. If the requested range - * extends past the end of the source or target strings, the range will - * be truncated. If src and dest are the same, the ranges must either - * not overlap, or idx must be less than start. - * @param dest The destination string to copy into. - * @param idx The start index in the destination string. - * @param src The string to copy from. - * @param start The index into the source string to start copying. - * @param len The number of bytes to copy. - */ - function strncpy(string dest, uint idx, string src, uint start, uint len) - internal - { - if (idx + len > bytes(dest).length) - len = bytes(dest).length - idx; - if (start > bytes(src).length) - return; - if (start + len > bytes(src).length) - len = bytes(src).length - start; - - // From here, we treat idx and start as memory offsets for dest and idx. - // Skip over the first word, which contains the length of each string. - idx += 32; - start += 32; - - // Copy word-length chunks while possible - for(; len >= 32; len -= 32) { - assembly { - mstore(add(dest, idx), mload(add(src, start))) - } - idx += 32; - start += 32; - } - - // Copy remaining bytes - uint mask = 256 ** (32 - len) - 1; - assembly { - let destaddr := add(dest, idx) - let srcpart := and(mload(add(src, start)), bnot(mask)) - let destpart := and(mload(destaddr), mask) - mstore(destaddr, or(destpart, srcpart)) - } - } - - /** - * @dev Returns a substring starting at idx and continuing until the first - * occurrence of delim. If delim is not found, returns the remainder of - * the string. - * @param str The string to return a substring of. - * @param delim The delimiter to search for. - * @param idx The start index. - * @return A newly allocated string consisting of bytes between idx and the - * first occurrence of delim. - */ - function strsep(string str, string delim, uint idx) internal - returns (string ret) - { - int endIdx = strstr(str, delim, idx); - if (endIdx == -1) { - endIdx = int(bytes(str).length); - } - ret = new string(uint(endIdx) - idx); - strncpy(ret, 0, str, idx, uint(endIdx) - idx); - } - - /** - * @dev Returns the length of a string, in characters. - * @param str The string to return the length of. - * @return The length of the string, in characters. - */ - function strchrlen(string str) internal returns (uint len) { - bytes memory strdata = bytes(str); - for (uint i = 0; i < strdata.length; i++) - // Don't count continuation bytes, of the form 0b10xxxxxx - if (strdata[i] & 0xC0 != 0x80) - len += 1; - } - - /** - * @dev Cheaply computes the SHA3 hash of a substring. - * @param str The string to hash (part of). - * @param idx The start index for the section to hash. - * @param len The number of bytes to hash. - * @return The SHA3 sum of the selected substring. - */ - function sha3_substring(string str, uint idx, uint len) - internal returns (bytes32 ret) - { - assembly { - ret := sha3(add(add(str, 32), idx), len) - } - } -} diff --git a/StringUtils_test.sol b/StringUtils_test.sol deleted file mode 100644 index 9824ae5a..00000000 --- a/StringUtils_test.sol +++ /dev/null @@ -1,90 +0,0 @@ -import 'dapple/test.sol'; -import 'StringUtils.sol'; - -contract StringUtilsTest is Test, StringUtils { - function abs(int x) returns (int) { - if(x < 0) - return -x; - return x; - } - - function sign(int x) returns (int) { - return x/abs(x); - } - - function assertEq(string a, string b) { - assertEq(strcmp(a, b), 0); - } - - function assertEq(bytes32 a, bytes32 b) { - assertEq(uint(a), uint(b)); - } - - function testStrcmp() logs_gas { - assertEq(sign(strcmp("foobie", "foobie")), 0); - assertEq(sign(strcmp("foobie", "foobif")), -1); - assertEq(sign(strcmp("foobie", "foobid")), 1); - assertEq(sign(strcmp("foobie", "foobies")), -1); - assertEq(sign(strcmp("foobie", "foobi")), 1); - assertEq(sign(strcmp("foobie", "doobie")), 1); - assertEq(sign(strcmp("01234567890123456789012345678901", "012345678901234567890123456789012")), -1); - } - - function testStrstr() logs_gas { - assertEq(strstr("abracadabra", "bra", 0), 1); - assertEq(strstr("abracadabra", "bra", 2), 8); - assertEq(strstr("abracadabra", "rab", 0), -1); - assertEq(strstr("ABC ABCDAB ABCDABCDABDE", "ABCDABD", 0), 15); - } - - function testStrrstr() logs_gas { - assertEq(strrstr("abracadabra", "bra", 8), 8); - assertEq(strrstr("abracadabra", "bra", 7), 1); - assertEq(strrstr("abracadabra", "rab", 11), -1); - assertEq(strrstr("ABC ABCDAB ABCDABCDABDE", "ABCDABD", 16), 15); - } - - function testStrncpy() logs_gas { - string memory target = "0123456789"; - - // Basic nonoverlapping copy - strncpy(target, 0, target, 5, 5); - assertEq(target, "5678956789"); - - // Truncate input range - strncpy(target, 0, target, 8, 5); - assertEq(target, "8978956789"); - - // Truncate output range - strncpy(target, 8, target, 1, 5); - assertEq(target, "8978956797"); - - // Overlapping copy - strncpy(target, 0, target, 2, 8); - assertEq(target, "7895679797"); - - // Copy a longer string - string memory longer = "0123456789012345678901234567890123456789012345"; - strncpy(longer, 0, longer, 1, 45); - assertEq(longer, "1234567890123456789012345678901234567890123455"); - } - - function testStrsep() logs_gas { - assertEq(strsep("www.google.com", ".", 0), "www"); - assertEq(strsep("www.google.com", ".", 4), "google"); - assertEq(strsep("www.google.com", ".", 11), "com"); - assertEq(strsep("www.google.com", ".", 15), ""); - assertEq(strsep("foo->bar->baz", "->", 0), "foo"); - assertEq(strsep("foo->bar->baz", "->", 5), "bar"); - } - - function testStrchrlen() logs_gas { - assertEq(strchrlen(""), 0); - assertEq(strchrlen("foobar"), 6); - assertEq(strchrlen("I ♥ ethereum"), 12); - } - - function testSha3Substring() logs_gas { - assertEq(sha3_substring("Hello, world!", 7, 5), sha3("world")); - } -}