aboutsummaryrefslogtreecommitdiffstats
path: root/test/compilationTests/stringutils/strings.sol
diff options
context:
space:
mode:
Diffstat (limited to 'test/compilationTests/stringutils/strings.sol')
-rw-r--r--test/compilationTests/stringutils/strings.sol709
1 files changed, 709 insertions, 0 deletions
diff --git a/test/compilationTests/stringutils/strings.sol b/test/compilationTests/stringutils/strings.sol
new file mode 100644
index 00000000..0a2d68bd
--- /dev/null
+++ b/test/compilationTests/stringutils/strings.sol
@@ -0,0 +1,709 @@
+/*
+ * @title String & slice utility library for Solidity contracts.
+ * @author Nick Johnson <arachnid@notdot.net>
+ *
+ * @dev Functionality in this library is largely implemented using an
+ * abstraction called a 'slice'. A slice represents a part of a string -
+ * anything from the entire string to a single character, or even no
+ * characters at all (a 0-length slice). Since a slice only has to specify
+ * an offset and a length, copying and manipulating slices is a lot less
+ * expensive than copying and manipulating the strings they reference.
+ *
+ * To further reduce gas costs, most functions on slice that need to return
+ * a slice modify the original one instead of allocating a new one; for
+ * instance, `s.split(".")` will return the text up to the first '.',
+ * modifying s to only contain the remainder of the string after the '.'.
+ * In situations where you do not want to modify the original slice, you
+ * can make a copy first with `.copy()`, for example:
+ * `s.copy().split(".")`. Try and avoid using this idiom in loops; since
+ * Solidity has no memory management, it will result in allocating many
+ * short-lived slices that are later discarded.
+ *
+ * Functions that return two slices come in two versions: a non-allocating
+ * version that takes the second slice as an argument, modifying it in
+ * place, and an allocating version that allocates and returns the second
+ * slice; see `nextRune` for example.
+ *
+ * Functions that have to copy string data will return strings rather than
+ * slices; these can be cast back to slices for further processing if
+ * required.
+ *
+ * For convenience, some functions are provided with non-modifying
+ * variants that create a new slice and return both; for instance,
+ * `s.splitNew('.')` leaves s unmodified, and returns two values
+ * corresponding to the left and right parts of the string.
+ */
+library strings {
+ struct slice {
+ uint _len;
+ uint _ptr;
+ }
+
+ function memcpy(uint dest, uint src, uint len) private {
+ // Copy word-length chunks while possible
+ for(; len >= 32; len -= 32) {
+ assembly {
+ mstore(dest, mload(src))
+ }
+ dest += 32;
+ src += 32;
+ }
+
+ // Copy remaining bytes
+ uint mask = 256 ** (32 - len) - 1;
+ assembly {
+ let srcpart := and(mload(src), not(mask))
+ let destpart := and(mload(dest), mask)
+ mstore(dest, or(destpart, srcpart))
+ }
+ }
+
+ /*
+ * @dev Returns a slice containing the entire string.
+ * @param self The string to make a slice from.
+ * @return A newly allocated slice containing the entire string.
+ */
+ function toSlice(string self) internal returns (slice) {
+ uint ptr;
+ assembly {
+ ptr := add(self, 0x20)
+ }
+ return slice(bytes(self).length, ptr);
+ }
+
+ /*
+ * @dev Returns the length of a null-terminated bytes32 string.
+ * @param self The value to find the length of.
+ * @return The length of the string, from 0 to 32.
+ */
+ function len(bytes32 self) internal returns (uint) {
+ uint ret;
+ if (self == 0)
+ return 0;
+ if (self & 0xffffffffffffffffffffffffffffffff == 0) {
+ ret += 16;
+ self = bytes32(uint(self) / 0x100000000000000000000000000000000);
+ }
+ if (self & 0xffffffffffffffff == 0) {
+ ret += 8;
+ self = bytes32(uint(self) / 0x10000000000000000);
+ }
+ if (self & 0xffffffff == 0) {
+ ret += 4;
+ self = bytes32(uint(self) / 0x100000000);
+ }
+ if (self & 0xffff == 0) {
+ ret += 2;
+ self = bytes32(uint(self) / 0x10000);
+ }
+ if (self & 0xff == 0) {
+ ret += 1;
+ }
+ return 32 - ret;
+ }
+
+ /*
+ * @dev Returns a slice containing the entire bytes32, interpreted as a
+ * null-termintaed utf-8 string.
+ * @param self The bytes32 value to convert to a slice.
+ * @return A new slice containing the value of the input argument up to the
+ * first null.
+ */
+ function toSliceB32(bytes32 self) internal returns (slice ret) {
+ // Allocate space for `self` in memory, copy it there, and point ret at it
+ assembly {
+ let ptr := mload(0x40)
+ mstore(0x40, add(ptr, 0x20))
+ mstore(ptr, self)
+ mstore(add(ret, 0x20), ptr)
+ }
+ ret._len = len(self);
+ }
+
+ /*
+ * @dev Returns a new slice containing the same data as the current slice.
+ * @param self The slice to copy.
+ * @return A new slice containing the same data as `self`.
+ */
+ function copy(slice self) internal returns (slice) {
+ return slice(self._len, self._ptr);
+ }
+
+ /*
+ * @dev Copies a slice to a new string.
+ * @param self The slice to copy.
+ * @return A newly allocated string containing the slice's text.
+ */
+ function toString(slice self) internal returns (string) {
+ var ret = new string(self._len);
+ uint retptr;
+ assembly { retptr := add(ret, 32) }
+
+ memcpy(retptr, self._ptr, self._len);
+ return ret;
+ }
+
+ /*
+ * @dev Returns the length in runes of the slice. Note that this operation
+ * takes time proportional to the length of the slice; avoid using it
+ * in loops, and call `slice.empty()` if you only need to know whether
+ * the slice is empty or not.
+ * @param self The slice to operate on.
+ * @return The length of the slice in runes.
+ */
+ function len(slice self) internal returns (uint) {
+ // Starting at ptr-31 means the LSB will be the byte we care about
+ var ptr = self._ptr - 31;
+ var end = ptr + self._len;
+ for (uint len = 0; ptr < end; len++) {
+ uint8 b;
+ assembly { b := and(mload(ptr), 0xFF) }
+ if (b < 0x80) {
+ ptr += 1;
+ } else if(b < 0xE0) {
+ ptr += 2;
+ } else if(b < 0xF0) {
+ ptr += 3;
+ } else if(b < 0xF8) {
+ ptr += 4;
+ } else if(b < 0xFC) {
+ ptr += 5;
+ } else {
+ ptr += 6;
+ }
+ }
+ return len;
+ }
+
+ /*
+ * @dev Returns true if the slice is empty (has a length of 0).
+ * @param self The slice to operate on.
+ * @return True if the slice is empty, False otherwise.
+ */
+ function empty(slice self) internal returns (bool) {
+ return self._len == 0;
+ }
+
+ /*
+ * @dev Returns a positive number if `other` comes lexicographically after
+ * `self`, a negative number if it comes before, or zero if the
+ * contents of the two slices are equal. Comparison is done per-rune,
+ * on unicode codepoints.
+ * @param self The first slice to compare.
+ * @param other The second slice to compare.
+ * @return The result of the comparison.
+ */
+ function compare(slice self, slice other) internal returns (int) {
+ uint shortest = self._len;
+ if (other._len < self._len)
+ shortest = other._len;
+
+ var selfptr = self._ptr;
+ var otherptr = other._ptr;
+ for (uint idx = 0; idx < shortest; idx += 32) {
+ uint a;
+ uint b;
+ assembly {
+ a := mload(selfptr)
+ b := mload(otherptr)
+ }
+ if (a != b) {
+ // Mask out irrelevant bytes and check again
+ uint mask = ~(2 ** (8 * (32 - shortest + idx)) - 1);
+ var diff = (a & mask) - (b & mask);
+ if (diff != 0)
+ return int(diff);
+ }
+ selfptr += 32;
+ otherptr += 32;
+ }
+ return int(self._len) - int(other._len);
+ }
+
+ /*
+ * @dev Returns true if the two slices contain the same text.
+ * @param self The first slice to compare.
+ * @param self The second slice to compare.
+ * @return True if the slices are equal, false otherwise.
+ */
+ function equals(slice self, slice other) internal returns (bool) {
+ return compare(self, other) == 0;
+ }
+
+ /*
+ * @dev Extracts the first rune in the slice into `rune`, advancing the
+ * slice to point to the next rune and returning `self`.
+ * @param self The slice to operate on.
+ * @param rune The slice that will contain the first rune.
+ * @return `rune`.
+ */
+ function nextRune(slice self, slice rune) internal returns (slice) {
+ rune._ptr = self._ptr;
+
+ if (self._len == 0) {
+ rune._len = 0;
+ return rune;
+ }
+
+ uint len;
+ uint b;
+ // Load the first byte of the rune into the LSBs of b
+ assembly { b := and(mload(sub(mload(add(self, 32)), 31)), 0xFF) }
+ if (b < 0x80) {
+ len = 1;
+ } else if(b < 0xE0) {
+ len = 2;
+ } else if(b < 0xF0) {
+ len = 3;
+ } else {
+ len = 4;
+ }
+
+ // Check for truncated codepoints
+ if (len > self._len) {
+ rune._len = self._len;
+ self._ptr += self._len;
+ self._len = 0;
+ return rune;
+ }
+
+ self._ptr += len;
+ self._len -= len;
+ rune._len = len;
+ return rune;
+ }
+
+ /*
+ * @dev Returns the first rune in the slice, advancing the slice to point
+ * to the next rune.
+ * @param self The slice to operate on.
+ * @return A slice containing only the first rune from `self`.
+ */
+ function nextRune(slice self) internal returns (slice ret) {
+ nextRune(self, ret);
+ }
+
+ /*
+ * @dev Returns the number of the first codepoint in the slice.
+ * @param self The slice to operate on.
+ * @return The number of the first codepoint in the slice.
+ */
+ function ord(slice self) internal returns (uint ret) {
+ if (self._len == 0) {
+ return 0;
+ }
+
+ uint word;
+ uint len;
+ uint div = 2 ** 248;
+
+ // Load the rune into the MSBs of b
+ assembly { word:= mload(mload(add(self, 32))) }
+ var b = word / div;
+ if (b < 0x80) {
+ ret = b;
+ len = 1;
+ } else if(b < 0xE0) {
+ ret = b & 0x1F;
+ len = 2;
+ } else if(b < 0xF0) {
+ ret = b & 0x0F;
+ len = 3;
+ } else {
+ ret = b & 0x07;
+ len = 4;
+ }
+
+ // Check for truncated codepoints
+ if (len > self._len) {
+ return 0;
+ }
+
+ for (uint i = 1; i < len; i++) {
+ div = div / 256;
+ b = (word / div) & 0xFF;
+ if (b & 0xC0 != 0x80) {
+ // Invalid UTF-8 sequence
+ return 0;
+ }
+ ret = (ret * 64) | (b & 0x3F);
+ }
+
+ return ret;
+ }
+
+ /*
+ * @dev Returns the keccak-256 hash of the slice.
+ * @param self The slice to hash.
+ * @return The hash of the slice.
+ */
+ function keccak(slice self) internal returns (bytes32 ret) {
+ assembly {
+ ret := sha3(mload(add(self, 32)), mload(self))
+ }
+ }
+
+ /*
+ * @dev Returns true if `self` starts with `needle`.
+ * @param self The slice to operate on.
+ * @param needle The slice to search for.
+ * @return True if the slice starts with the provided text, false otherwise.
+ */
+ function startsWith(slice self, slice needle) internal returns (bool) {
+ if (self._len < needle._len) {
+ return false;
+ }
+
+ if (self._ptr == needle._ptr) {
+ return true;
+ }
+
+ bool equal;
+ assembly {
+ let len := mload(needle)
+ let selfptr := mload(add(self, 0x20))
+ let needleptr := mload(add(needle, 0x20))
+ equal := eq(sha3(selfptr, len), sha3(needleptr, len))
+ }
+ return equal;
+ }
+
+ /*
+ * @dev If `self` starts with `needle`, `needle` is removed from the
+ * beginning of `self`. Otherwise, `self` is unmodified.
+ * @param self The slice to operate on.
+ * @param needle The slice to search for.
+ * @return `self`
+ */
+ function beyond(slice self, slice needle) internal returns (slice) {
+ if (self._len < needle._len) {
+ return self;
+ }
+
+ bool equal = true;
+ if (self._ptr != needle._ptr) {
+ assembly {
+ let len := mload(needle)
+ let selfptr := mload(add(self, 0x20))
+ let needleptr := mload(add(needle, 0x20))
+ equal := eq(sha3(selfptr, len), sha3(needleptr, len))
+ }
+ }
+
+ if (equal) {
+ self._len -= needle._len;
+ self._ptr += needle._len;
+ }
+
+ return self;
+ }
+
+ /*
+ * @dev Returns true if the slice ends with `needle`.
+ * @param self The slice to operate on.
+ * @param needle The slice to search for.
+ * @return True if the slice starts with the provided text, false otherwise.
+ */
+ function endsWith(slice self, slice needle) internal returns (bool) {
+ if (self._len < needle._len) {
+ return false;
+ }
+
+ var selfptr = self._ptr + self._len - needle._len;
+
+ if (selfptr == needle._ptr) {
+ return true;
+ }
+
+ bool equal;
+ assembly {
+ let len := mload(needle)
+ let needleptr := mload(add(needle, 0x20))
+ equal := eq(sha3(selfptr, len), sha3(needleptr, len))
+ }
+
+ return equal;
+ }
+
+ /*
+ * @dev If `self` ends with `needle`, `needle` is removed from the
+ * end of `self`. Otherwise, `self` is unmodified.
+ * @param self The slice to operate on.
+ * @param needle The slice to search for.
+ * @return `self`
+ */
+ function until(slice self, slice needle) internal returns (slice) {
+ if (self._len < needle._len) {
+ return self;
+ }
+
+ var selfptr = self._ptr + self._len - needle._len;
+ bool equal = true;
+ if (selfptr != needle._ptr) {
+ assembly {
+ let len := mload(needle)
+ let needleptr := mload(add(needle, 0x20))
+ equal := eq(sha3(selfptr, len), sha3(needleptr, len))
+ }
+ }
+
+ if (equal) {
+ self._len -= needle._len;
+ }
+
+ return self;
+ }
+
+ // Returns the memory address of the first byte of the first occurrence of
+ // `needle` in `self`, or the first byte after `self` if not found.
+ function findPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) {
+ uint ptr;
+ uint idx;
+
+ if (needlelen <= selflen) {
+ if (needlelen <= 32) {
+ // Optimized assembly for 68 gas per byte on short strings
+ assembly {
+ let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1))
+ let needledata := and(mload(needleptr), mask)
+ let end := add(selfptr, sub(selflen, needlelen))
+ ptr := selfptr
+ loop:
+ jumpi(exit, eq(and(mload(ptr), mask), needledata))
+ ptr := add(ptr, 1)
+ jumpi(loop, lt(sub(ptr, 1), end))
+ ptr := add(selfptr, selflen)
+ exit:
+ }
+ return ptr;
+ } else {
+ // For long needles, use hashing
+ bytes32 hash;
+ assembly { hash := sha3(needleptr, needlelen) }
+ ptr = selfptr;
+ for (idx = 0; idx <= selflen - needlelen; idx++) {
+ bytes32 testHash;
+ assembly { testHash := sha3(ptr, needlelen) }
+ if (hash == testHash)
+ return ptr;
+ ptr += 1;
+ }
+ }
+ }
+ return selfptr + selflen;
+ }
+
+ // Returns the memory address of the first byte after the last occurrence of
+ // `needle` in `self`, or the address of `self` if not found.
+ function rfindPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) {
+ uint ptr;
+
+ if (needlelen <= selflen) {
+ if (needlelen <= 32) {
+ // Optimized assembly for 69 gas per byte on short strings
+ assembly {
+ let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1))
+ let needledata := and(mload(needleptr), mask)
+ ptr := add(selfptr, sub(selflen, needlelen))
+ loop:
+ jumpi(ret, eq(and(mload(ptr), mask), needledata))
+ ptr := sub(ptr, 1)
+ jumpi(loop, gt(add(ptr, 1), selfptr))
+ ptr := selfptr
+ jump(exit)
+ ret:
+ ptr := add(ptr, needlelen)
+ exit:
+ }
+ return ptr;
+ } else {
+ // For long needles, use hashing
+ bytes32 hash;
+ assembly { hash := sha3(needleptr, needlelen) }
+ ptr = selfptr + (selflen - needlelen);
+ while (ptr >= selfptr) {
+ bytes32 testHash;
+ assembly { testHash := sha3(ptr, needlelen) }
+ if (hash == testHash)
+ return ptr + needlelen;
+ ptr -= 1;
+ }
+ }
+ }
+ return selfptr;
+ }
+
+ /*
+ * @dev Modifies `self` to contain everything from the first occurrence of
+ * `needle` to the end of the slice. `self` is set to the empty slice
+ * if `needle` is not found.
+ * @param self The slice to search and modify.
+ * @param needle The text to search for.
+ * @return `self`.
+ */
+ function find(slice self, slice needle) internal returns (slice) {
+ uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr);
+ self._len -= ptr - self._ptr;
+ self._ptr = ptr;
+ return self;
+ }
+
+ /*
+ * @dev Modifies `self` to contain the part of the string from the start of
+ * `self` to the end of the first occurrence of `needle`. If `needle`
+ * is not found, `self` is set to the empty slice.
+ * @param self The slice to search and modify.
+ * @param needle The text to search for.
+ * @return `self`.
+ */
+ function rfind(slice self, slice needle) internal returns (slice) {
+ uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr);
+ self._len = ptr - self._ptr;
+ return self;
+ }
+
+ /*
+ * @dev Splits the slice, setting `self` to everything after the first
+ * occurrence of `needle`, and `token` to everything before it. If
+ * `needle` does not occur in `self`, `self` is set to the empty slice,
+ * and `token` is set to the entirety of `self`.
+ * @param self The slice to split.
+ * @param needle The text to search for in `self`.
+ * @param token An output parameter to which the first token is written.
+ * @return `token`.
+ */
+ function split(slice self, slice needle, slice token) internal returns (slice) {
+ uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr);
+ token._ptr = self._ptr;
+ token._len = ptr - self._ptr;
+ if (ptr == self._ptr + self._len) {
+ // Not found
+ self._len = 0;
+ } else {
+ self._len -= token._len + needle._len;
+ self._ptr = ptr + needle._len;
+ }
+ return token;
+ }
+
+ /*
+ * @dev Splits the slice, setting `self` to everything after the first
+ * occurrence of `needle`, and returning everything before it. If
+ * `needle` does not occur in `self`, `self` is set to the empty slice,
+ * and the entirety of `self` is returned.
+ * @param self The slice to split.
+ * @param needle The text to search for in `self`.
+ * @return The part of `self` up to the first occurrence of `delim`.
+ */
+ function split(slice self, slice needle) internal returns (slice token) {
+ split(self, needle, token);
+ }
+
+ /*
+ * @dev Splits the slice, setting `self` to everything before the last
+ * occurrence of `needle`, and `token` to everything after it. If
+ * `needle` does not occur in `self`, `self` is set to the empty slice,
+ * and `token` is set to the entirety of `self`.
+ * @param self The slice to split.
+ * @param needle The text to search for in `self`.
+ * @param token An output parameter to which the first token is written.
+ * @return `token`.
+ */
+ function rsplit(slice self, slice needle, slice token) internal returns (slice) {
+ uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr);
+ token._ptr = ptr;
+ token._len = self._len - (ptr - self._ptr);
+ if (ptr == self._ptr) {
+ // Not found
+ self._len = 0;
+ } else {
+ self._len -= token._len + needle._len;
+ }
+ return token;
+ }
+
+ /*
+ * @dev Splits the slice, setting `self` to everything before the last
+ * occurrence of `needle`, and returning everything after it. If
+ * `needle` does not occur in `self`, `self` is set to the empty slice,
+ * and the entirety of `self` is returned.
+ * @param self The slice to split.
+ * @param needle The text to search for in `self`.
+ * @return The part of `self` after the last occurrence of `delim`.
+ */
+ function rsplit(slice self, slice needle) internal returns (slice token) {
+ rsplit(self, needle, token);
+ }
+
+ /*
+ * @dev Counts the number of nonoverlapping occurrences of `needle` in `self`.
+ * @param self The slice to search.
+ * @param needle The text to search for in `self`.
+ * @return The number of occurrences of `needle` found in `self`.
+ */
+ function count(slice self, slice needle) internal returns (uint count) {
+ uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr) + needle._len;
+ while (ptr <= self._ptr + self._len) {
+ count++;
+ ptr = findPtr(self._len - (ptr - self._ptr), ptr, needle._len, needle._ptr) + needle._len;
+ }
+ }
+
+ /*
+ * @dev Returns True if `self` contains `needle`.
+ * @param self The slice to search.
+ * @param needle The text to search for in `self`.
+ * @return True if `needle` is found in `self`, false otherwise.
+ */
+ function contains(slice self, slice needle) internal returns (bool) {
+ return rfindPtr(self._len, self._ptr, needle._len, needle._ptr) != self._ptr;
+ }
+
+ /*
+ * @dev Returns a newly allocated string containing the concatenation of
+ * `self` and `other`.
+ * @param self The first slice to concatenate.
+ * @param other The second slice to concatenate.
+ * @return The concatenation of the two strings.
+ */
+ function concat(slice self, slice other) internal returns (string) {
+ var ret = new string(self._len + other._len);
+ uint retptr;
+ assembly { retptr := add(ret, 32) }
+ memcpy(retptr, self._ptr, self._len);
+ memcpy(retptr + self._len, other._ptr, other._len);
+ return ret;
+ }
+
+ /*
+ * @dev Joins an array of slices, using `self` as a delimiter, returning a
+ * newly allocated string.
+ * @param self The delimiter to use.
+ * @param parts A list of slices to join.
+ * @return A newly allocated string containing all the slices in `parts`,
+ * joined with `self`.
+ */
+ function join(slice self, slice[] parts) internal returns (string) {
+ if (parts.length == 0)
+ return "";
+
+ uint len = self._len * (parts.length - 1);
+ for(uint i = 0; i < parts.length; i++)
+ len += parts[i]._len;
+
+ var ret = new string(len);
+ uint retptr;
+ assembly { retptr := add(ret, 32) }
+
+ for(i = 0; i < parts.length; i++) {
+ memcpy(retptr, parts[i]._ptr, parts[i]._len);
+ retptr += parts[i]._len;
+ if (i < parts.length - 1) {
+ memcpy(retptr, self._ptr, self._len);
+ retptr += self._len;
+ }
+ }
+
+ return ret;
+ }
+}