// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package strings // Compare returns an integer comparing two strings lexicographically. // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. // // Compare is included only for symmetry with package bytes. // It is usually clearer and always faster to use the built-in // string comparison operators ==, <, >, and so on. func Compare(a, b string) int { // NOTE(rsc): This function does NOT call the runtime cmpstring function, // because we do not want to provide any performance justification for // using strings.Compare. Basically no one should use strings.Compare. // As the comment above says, it is here only for symmetry with package bytes. // If performance is important, the compiler should be changed to recognize // the pattern so that all code doing three-way comparisons, not just code // using strings.Compare, can benefit. if a == b { return 0 } if a < b { return -1 } return +1 }
// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package strings import ( "errors" "io" "unicode/utf8" ) // A Reader implements the io.Reader, io.ReaderAt, io.Seeker, io.WriterTo, // io.ByteScanner, and io.RuneScanner interfaces by reading // from a string. type Reader struct { s string i int64 // current reading index prevRune int // index of previous rune; or < 0 } // Len returns the number of bytes of the unread portion of the // string. func (r *Reader) Len() int { if r.i >= int64(len(r.s)) { return 0 } return int(int64(len(r.s)) - r.i) } // Size returns the original length of the underlying string. // Size is the number of bytes available for reading via ReadAt. // The returned value is always the same and is not affected by calls // to any other method. func (r *Reader) Size() int64 { return int64(len(r.s)) } func (r *Reader) Read(b []byte) (n int, err error) { if r.i >= int64(len(r.s)) { return 0, io.EOF } r.prevRune = -1 n = copy(b, r.s[r.i:]) r.i += int64(n) return } func (r *Reader) ReadAt(b []byte, off int64) (n int, err error) { // cannot modify state - see io.ReaderAt if off < 0 { return 0, errors.New("strings.Reader.ReadAt: negative offset") } if off >= int64(len(r.s)) { return 0, io.EOF } n = copy(b, r.s[off:]) if n < len(b) { err = io.EOF } return } func (r *Reader) ReadByte() (byte, error) { r.prevRune = -1 if r.i >= int64(len(r.s)) { return 0, io.EOF } b := r.s[r.i] r.i++ return b, nil } func (r *Reader) UnreadByte() error { r.prevRune = -1 if r.i <= 0 { return errors.New("strings.Reader.UnreadByte: at beginning of string") } r.i-- return nil } func (r *Reader) ReadRune() (ch rune, size int, err error) { if r.i >= int64(len(r.s)) { r.prevRune = -1 return 0, 0, io.EOF } r.prevRune = int(r.i) if c := r.s[r.i]; c < utf8.RuneSelf { r.i++ return rune(c), 1, nil } ch, size = utf8.DecodeRuneInString(r.s[r.i:]) r.i += int64(size) return } func (r *Reader) UnreadRune() error { if r.prevRune < 0 { return errors.New("strings.Reader.UnreadRune: previous operation was not ReadRune") } r.i = int64(r.prevRune) r.prevRune = -1 return nil } // Seek implements the io.Seeker interface. func (r *Reader) Seek(offset int64, whence int) (int64, error) { r.prevRune = -1 var abs int64 switch whence { case io.SeekStart: abs = offset case io.SeekCurrent: abs = r.i + offset case io.SeekEnd: abs = int64(len(r.s)) + offset default: return 0, errors.New("strings.Reader.Seek: invalid whence") } if abs < 0 { return 0, errors.New("strings.Reader.Seek: negative position") } r.i = abs return abs, nil } // WriteTo implements the io.WriterTo interface. func (r *Reader) WriteTo(w io.Writer) (n int64, err error) { r.prevRune = -1 if r.i >= int64(len(r.s)) { return 0, nil } s := r.s[r.i:] m, err := io.WriteString(w, s) if m > len(s) { panic("strings.Reader.WriteTo: invalid WriteString count") } r.i += int64(m) n = int64(m) if m != len(s) && err == nil { err = io.ErrShortWrite } return } // Reset resets the Reader to be reading from s. func (r *Reader) Reset(s string) { *r = Reader{s, 0, -1} } // NewReader returns a new Reader reading from s. // It is similar to bytes.NewBufferString but more efficient and read-only. func NewReader(s string) *Reader { return &Reader{s, 0, -1} }
// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package strings import "io" // Replacer replaces a list of strings with replacements. // It is safe for concurrent use by multiple goroutines. type Replacer struct { r replacer } // replacer is the interface that a replacement algorithm needs to implement. type replacer interface { Replace(s string) string WriteString(w io.Writer, s string) (n int, err error) } // NewReplacer returns a new Replacer from a list of old, new string pairs. // Replacements are performed in order, without overlapping matches. func NewReplacer(oldnew ...string) *Replacer { if len(oldnew)%2 == 1 { panic("strings.NewReplacer: odd argument count") } if len(oldnew) == 2 && len(oldnew[0]) > 1 { return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])} } allNewBytes := true for i := 0; i < len(oldnew); i += 2 { if len(oldnew[i]) != 1 { return &Replacer{r: makeGenericReplacer(oldnew)} } if len(oldnew[i+1]) != 1 { allNewBytes = false } } if allNewBytes { r := byteReplacer{} for i := range r { r[i] = byte(i) } // The first occurrence of old->new map takes precedence // over the others with the same old string. for i := len(oldnew) - 2; i >= 0; i -= 2 { o := oldnew[i][0] n := oldnew[i+1][0] r[o] = n } return &Replacer{r: &r} } r := byteStringReplacer{} // The first occurrence of old->new map takes precedence // over the others with the same old string. for i := len(oldnew) - 2; i >= 0; i -= 2 { o := oldnew[i][0] n := oldnew[i+1] r[o] = []byte(n) } return &Replacer{r: &r} } // Replace returns a copy of s with all replacements performed. func (r *Replacer) Replace(s string) string { return r.r.Replace(s) } // WriteString writes s to w with all replacements performed. func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) { return r.r.WriteString(w, s) } // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys // and values may be empty. For example, the trie containing keys "ax", "ay", // "bcbc", "x" and "xy" could have eight nodes: // // n0 - // n1 a- // n2 .x+ // n3 .y+ // n4 b- // n5 .cbc+ // n6 x+ // n7 .y+ // // n0 is the root node, and its children are n1, n4 and n6; n1's children are // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 // (marked with a trailing "+") are complete keys. type trieNode struct { // value is the value of the trie node's key/value pair. It is empty if // this node is not a complete key. value string // priority is the priority (higher is more important) of the trie node's // key/value pair; keys are not necessarily matched shortest- or longest- // first. Priority is positive if this node is a complete key, and zero // otherwise. In the example above, positive/zero priorities are marked // with a trailing "+" or "-". priority int // A trie node may have zero, one or more child nodes: // * if the remaining fields are zero, there are no children. // * if prefix and next are non-zero, there is one child in next. // * if table is non-zero, it defines all the children. // // Prefixes are preferred over tables when there is one child, but the // root node always uses a table for lookup efficiency. // prefix is the difference in keys between this trie node and the next. // In the example above, node n4 has prefix "cbc" and n4's next node is n5. // Node n5 has no children and so has zero prefix, next and table fields. prefix string next *trieNode // table is a lookup table indexed by the next byte in the key, after // remapping that byte through genericReplacer.mapping to create a dense // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and // genericReplacer.tableSize will be 5. Node n0's table will be // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped // 'a', 'b' and 'x'. table []*trieNode } func (t *trieNode) add(key, val string, priority int, r *genericReplacer) { if key == "" { if t.priority == 0 { t.value = val t.priority = priority } return } if t.prefix != "" { // Need to split the prefix among multiple nodes. var n int // length of the longest common prefix for ; n < len(t.prefix) && n < len(key); n++ { if t.prefix[n] != key[n] { break } } if n == len(t.prefix) { t.next.add(key[n:], val, priority, r) } else if n == 0 { // First byte differs, start a new lookup table here. Looking up // what is currently t.prefix[0] will lead to prefixNode, and // looking up key[0] will lead to keyNode. var prefixNode *trieNode if len(t.prefix) == 1 { prefixNode = t.next } else { prefixNode = &trieNode{ prefix: t.prefix[1:], next: t.next, } } keyNode := new(trieNode) t.table = make([]*trieNode, r.tableSize) t.table[r.mapping[t.prefix[0]]] = prefixNode t.table[r.mapping[key[0]]] = keyNode t.prefix = "" t.next = nil keyNode.add(key[1:], val, priority, r) } else { // Insert new node after the common section of the prefix. next := &trieNode{ prefix: t.prefix[n:], next: t.next, } t.prefix = t.prefix[:n] t.next = next next.add(key[n:], val, priority, r) } } else if t.table != nil { // Insert into existing table. m := r.mapping[key[0]] if t.table[m] == nil { t.table[m] = new(trieNode) } t.table[m].add(key[1:], val, priority, r) } else { t.prefix = key t.next = new(trieNode) t.next.add("", val, priority, r) } } func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { // Iterate down the trie to the end, and grab the value and keylen with // the highest priority. bestPriority := 0 node := &r.root n := 0 for node != nil { if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { bestPriority = node.priority val = node.value keylen = n found = true } if s == "" { break } if node.table != nil { index := r.mapping[s[0]] if int(index) == r.tableSize { break } node = node.table[index] s = s[1:] n++ } else if node.prefix != "" && HasPrefix(s, node.prefix) { n += len(node.prefix) s = s[len(node.prefix):] node = node.next } else { break } } return } // genericReplacer is the fully generic algorithm. // It's used as a fallback when nothing faster can be used. type genericReplacer struct { root trieNode // tableSize is the size of a trie node's lookup table. It is the number // of unique key bytes. tableSize int // mapping maps from key bytes to a dense index for trieNode.table. mapping [256]byte } func makeGenericReplacer(oldnew []string) *genericReplacer { r := new(genericReplacer) // Find each byte used, then assign them each an index. for i := 0; i < len(oldnew); i += 2 { key := oldnew[i] for j := 0; j < len(key); j++ { r.mapping[key[j]] = 1 } } for _, b := range r.mapping { r.tableSize += int(b) } var index byte for i, b := range r.mapping { if b == 0 { r.mapping[i] = byte(r.tableSize) } else { r.mapping[i] = index index++ } } // Ensure root node uses a lookup table (for performance). r.root.table = make([]*trieNode, r.tableSize) for i := 0; i < len(oldnew); i += 2 { r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r) } return r } type appendSliceWriter []byte // Write writes to the buffer to satisfy io.Writer. func (w *appendSliceWriter) Write(p []byte) (int, error) { *w = append(*w, p...) return len(p), nil } // WriteString writes to the buffer without string->[]byte->string allocations. func (w *appendSliceWriter) WriteString(s string) (int, error) { *w = append(*w, s...) return len(s), nil } type stringWriterIface interface { WriteString(string) (int, error) } type stringWriter struct { w io.Writer } func (w stringWriter) WriteString(s string) (int, error) { return w.w.Write([]byte(s)) } func getStringWriter(w io.Writer) stringWriterIface { sw, ok := w.(stringWriterIface) if !ok { sw = stringWriter{w} } return sw } func (r *genericReplacer) Replace(s string) string { buf := make(appendSliceWriter, 0, len(s)) r.WriteString(&buf, s) return string(buf) } func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { sw := getStringWriter(w) var last, wn int var prevMatchEmpty bool for i := 0; i <= len(s); { // Fast path: s[i] is not a prefix of any pattern. if i != len(s) && r.root.priority == 0 { index := int(r.mapping[s[i]]) if index == r.tableSize || r.root.table[index] == nil { i++ continue } } // Ignore the empty match iff the previous loop found the empty match. val, keylen, match := r.lookup(s[i:], prevMatchEmpty) prevMatchEmpty = match && keylen == 0 if match { wn, err = sw.WriteString(s[last:i]) n += wn if err != nil { return } wn, err = sw.WriteString(val) n += wn if err != nil { return } i += keylen last = i continue } i++ } if last != len(s) { wn, err = sw.WriteString(s[last:]) n += wn } return } // singleStringReplacer is the implementation that's used when there is only // one string to replace (and that string has more than one byte). type singleStringReplacer struct { finder *stringFinder // value is the new string that replaces that pattern when it's found. value string } func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer { return &singleStringReplacer{finder: makeStringFinder(pattern), value: value} } func (r *singleStringReplacer) Replace(s string) string { var buf []byte i, matched := 0, false for { match := r.finder.next(s[i:]) if match == -1 { break } matched = true buf = append(buf, s[i:i+match]...) buf = append(buf, r.value...) i += match + len(r.finder.pattern) } if !matched { return s } buf = append(buf, s[i:]...) return string(buf) } func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { sw := getStringWriter(w) var i, wn int for { match := r.finder.next(s[i:]) if match == -1 { break } wn, err = sw.WriteString(s[i : i+match]) n += wn if err != nil { return } wn, err = sw.WriteString(r.value) n += wn if err != nil { return } i += match + len(r.finder.pattern) } wn, err = sw.WriteString(s[i:]) n += wn return } // byteReplacer is the implementation that's used when all the "old" // and "new" values are single ASCII bytes. // The array contains replacement bytes indexed by old byte. type byteReplacer [256]byte func (r *byteReplacer) Replace(s string) string { var buf []byte // lazily allocated for i := 0; i < len(s); i++ { b := s[i] if r[b] != b { if buf == nil { buf = []byte(s) } buf[i] = r[b] } } if buf == nil { return s } return string(buf) } func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. bufsize := 32 << 10 if len(s) < bufsize { bufsize = len(s) } buf := make([]byte, bufsize) for len(s) > 0 { ncopy := copy(buf, s[:]) s = s[ncopy:] for i, b := range buf[:ncopy] { buf[i] = r[b] } wn, err := w.Write(buf[:ncopy]) n += wn if err != nil { return n, err } } return n, nil } // byteStringReplacer is the implementation that's used when all the // "old" values are single ASCII bytes but the "new" values vary in size. // The array contains replacement byte slices indexed by old byte. // A nil []byte means that the old byte should not be replaced. type byteStringReplacer [256][]byte func (r *byteStringReplacer) Replace(s string) string { newSize := len(s) anyChanges := false for i := 0; i < len(s); i++ { b := s[i] if r[b] != nil { anyChanges = true // The -1 is because we are replacing 1 byte with len(r[b]) bytes. newSize += len(r[b]) - 1 } } if !anyChanges { return s } buf := make([]byte, newSize) bi := buf for i := 0; i < len(s); i++ { b := s[i] if r[b] != nil { n := copy(bi, r[b]) bi = bi[n:] } else { bi[0] = b bi = bi[1:] } } return string(buf) } func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { sw := getStringWriter(w) last := 0 for i := 0; i < len(s); i++ { b := s[i] if r[b] == nil { continue } if last != i { nw, err := sw.WriteString(s[last:i]) n += nw if err != nil { return n, err } } last = i + 1 nw, err := w.Write(r[b]) n += nw if err != nil { return n, err } } if last != len(s) { var nw int nw, err = sw.WriteString(s[last:]) n += nw } return }
// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package strings // stringFinder efficiently finds strings in a source text. It's implemented // using the Boyer-Moore string search algorithm: // http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm // http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged // document uses 1-based indexing) type stringFinder struct { // pattern is the string that we are searching for in the text. pattern string // badCharSkip[b] contains the distance between the last byte of pattern // and the rightmost occurrence of b in pattern. If b is not in pattern, // badCharSkip[b] is len(pattern). // // Whenever a mismatch is found with byte b in the text, we can safely // shift the matching frame at least badCharSkip[b] until the next time // the matching char could be in alignment. badCharSkip [256]int // goodSuffixSkip[i] defines how far we can shift the matching frame given // that the suffix pattern[i+1:] matches, but the byte pattern[i] does // not. There are two cases to consider: // // 1. The matched suffix occurs elsewhere in pattern (with a different // byte preceding it that we might possibly match). In this case, we can // shift the matching frame to align with the next suffix chunk. For // example, the pattern "mississi" has the suffix "issi" next occurring // (in right-to-left order) at index 1, so goodSuffixSkip[3] == // shift+len(suffix) == 3+4 == 7. // // 2. If the matched suffix does not occur elsewhere in pattern, then the // matching frame may share part of its prefix with the end of the // matching suffix. In this case, goodSuffixSkip[i] will contain how far // to shift the frame to align this portion of the prefix to the // suffix. For example, in the pattern "abcxxxabc", when the first // mismatch from the back is found to be in position 3, the matching // suffix "xxabc" is not found elsewhere in the pattern. However, its // rightmost "abc" (at position 6) is a prefix of the whole pattern, so // goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11. goodSuffixSkip []int } func makeStringFinder(pattern string) *stringFinder { f := &stringFinder{ pattern: pattern, goodSuffixSkip: make([]int, len(pattern)), } // last is the index of the last character in the pattern. last := len(pattern) - 1 // Build bad character table. // Bytes not in the pattern can skip one pattern's length. for i := range f.badCharSkip { f.badCharSkip[i] = len(pattern) } // The loop condition is < instead of <= so that the last byte does not // have a zero distance to itself. Finding this byte out of place implies // that it is not in the last position. for i := 0; i < last; i++ { f.badCharSkip[pattern[i]] = last - i } // Build good suffix table. // First pass: set each value to the next index which starts a prefix of // pattern. lastPrefix := last for i := last; i >= 0; i-- { if HasPrefix(pattern, pattern[i+1:]) { lastPrefix = i + 1 } // lastPrefix is the shift, and (last-i) is len(suffix). f.goodSuffixSkip[i] = lastPrefix + last - i } // Second pass: find repeats of pattern's suffix starting from the front. for i := 0; i < last; i++ { lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1]) if pattern[i-lenSuffix] != pattern[last-lenSuffix] { // (last-i) is the shift, and lenSuffix is len(suffix). f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i } } return f } func longestCommonSuffix(a, b string) (i int) { for ; i < len(a) && i < len(b); i++ { if a[len(a)-1-i] != b[len(b)-1-i] { break } } return } // next returns the index in text of the first occurrence of the pattern. If // the pattern is not found, it returns -1. func (f *stringFinder) next(text string) int { i := len(f.pattern) - 1 for i < len(text) { // Compare backwards from the end until the first unmatching character. j := len(f.pattern) - 1 for j >= 0 && text[i] == f.pattern[j] { i-- j-- } if j < 0 { return i + 1 // match } i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j]) } return -1 } func max(a, b int) int { if a > b { return a } return b }
// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package strings implements simple functions to manipulate UTF-8 encoded strings. // // For information about UTF-8 strings in Go, see https://blog.golang.org/strings. package strings import ( "unicode" "unicode/utf8" ) // explode splits s into a slice of UTF-8 strings, // one string per Unicode character up to a maximum of n (n < 0 means no limit). // Invalid UTF-8 sequences become correct encodings of U+FFFD. func explode(s string, n int) []string { l := utf8.RuneCountInString(s) if n < 0 || n > l { n = l } a := make([]string, n) for i := 0; i < n-1; i++ { ch, size := utf8.DecodeRuneInString(s) a[i] = s[:size] s = s[size:] if ch == utf8.RuneError { a[i] = string(utf8.RuneError) } } if n > 0 { a[n-1] = s } return a } // primeRK is the prime base used in Rabin-Karp algorithm. const primeRK = 16777619 // hashStr returns the hash and the appropriate multiplicative // factor for use in Rabin-Karp algorithm. func hashStr(sep string) (uint32, uint32) { hash := uint32(0) for i := 0; i < len(sep); i++ { hash = hash*primeRK + uint32(sep[i]) } var pow, sq uint32 = 1, primeRK for i := len(sep); i > 0; i >>= 1 { if i&1 != 0 { pow *= sq } sq *= sq } return hash, pow } // hashStrRev returns the hash of the reverse of sep and the // appropriate multiplicative factor for use in Rabin-Karp algorithm. func hashStrRev(sep string) (uint32, uint32) { hash := uint32(0) for i := len(sep) - 1; i >= 0; i-- { hash = hash*primeRK + uint32(sep[i]) } var pow, sq uint32 = 1, primeRK for i := len(sep); i > 0; i >>= 1 { if i&1 != 0 { pow *= sq } sq *= sq } return hash, pow } // Count counts the number of non-overlapping instances of sep in s. // If sep is an empty string, Count returns 1 + the number of Unicode code points in s. func Count(s, sep string) int { n := 0 // special cases switch { case len(sep) == 0: return utf8.RuneCountInString(s) + 1 case len(sep) == 1: // special case worth making fast c := sep[0] for i := 0; i < len(s); i++ { if s[i] == c { n++ } } return n case len(sep) > len(s): return 0 case len(sep) == len(s): if sep == s { return 1 } return 0 } // Rabin-Karp search hashsep, pow := hashStr(sep) h := uint32(0) for i := 0; i < len(sep); i++ { h = h*primeRK + uint32(s[i]) } lastmatch := 0 if h == hashsep && s[:len(sep)] == sep { n++ lastmatch = len(sep) } for i := len(sep); i < len(s); { h *= primeRK h += uint32(s[i]) h -= pow * uint32(s[i-len(sep)]) i++ if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep { n++ lastmatch = i } } return n } // Contains reports whether substr is within s. func Contains(s, substr string) bool { return Index(s, substr) >= 0 } // ContainsAny reports whether any Unicode code points in chars are within s. func ContainsAny(s, chars string) bool { return IndexAny(s, chars) >= 0 } // ContainsRune reports whether the Unicode code point r is within s. func ContainsRune(s string, r rune) bool { return IndexRune(s, r) >= 0 } // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. func LastIndex(s, sep string) int { n := len(sep) switch { case n == 0: return len(s) case n == 1: return LastIndexByte(s, sep[0]) case n == len(s): if sep == s { return 0 } return -1 case n > len(s): return -1 } // Rabin-Karp search from the end of the string hashsep, pow := hashStrRev(sep) last := len(s) - n var h uint32 for i := len(s) - 1; i >= last; i-- { h = h*primeRK + uint32(s[i]) } if h == hashsep && s[last:] == sep { return last } for i := last - 1; i >= 0; i-- { h *= primeRK h += uint32(s[i]) h -= pow * uint32(s[i+n]) if h == hashsep && s[i:i+n] == sep { return i } } return -1 } // IndexRune returns the index of the first instance of the Unicode code point // r, or -1 if rune is not present in s. func IndexRune(s string, r rune) int { switch { case r < utf8.RuneSelf: return IndexByte(s, byte(r)) default: for i, c := range s { if c == r { return i } } } return -1 } // IndexAny returns the index of the first instance of any Unicode code point // from chars in s, or -1 if no Unicode code point from chars is present in s. func IndexAny(s, chars string) int { if len(chars) > 0 { for i, c := range s { for _, m := range chars { if c == m { return i } } } } return -1 } // LastIndexAny returns the index of the last instance of any Unicode code // point from chars in s, or -1 if no Unicode code point from chars is // present in s. func LastIndexAny(s, chars string) int { if len(chars) > 0 { for i := len(s); i > 0; { rune, size := utf8.DecodeLastRuneInString(s[0:i]) i -= size for _, m := range chars { if rune == m { return i } } } } return -1 } // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. func LastIndexByte(s string, c byte) int { for i := len(s) - 1; i >= 0; i-- { if s[i] == c { return i } } return -1 } // Generic split: splits after each instance of sep, // including sepSave bytes of sep in the subarrays. func genSplit(s, sep string, sepSave, n int) []string { if n == 0 { return nil } if sep == "" { return explode(s, n) } if n < 0 { n = Count(s, sep) + 1 } c := sep[0] start := 0 a := make([]string, n) na := 0 for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { a[na] = s[start : i+sepSave] na++ start = i + len(sep) i += len(sep) - 1 } } a[na] = s[start:] return a[0 : na+1] } // SplitN slices s into substrings separated by sep and returns a slice of // the substrings between those separators. // If sep is empty, SplitN splits after each UTF-8 sequence. // The count determines the number of substrings to return: // n > 0: at most n substrings; the last substring will be the unsplit remainder. // n == 0: the result is nil (zero substrings) // n < 0: all substrings func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } // SplitAfterN slices s into substrings after each instance of sep and // returns a slice of those substrings. // If sep is empty, SplitAfterN splits after each UTF-8 sequence. // The count determines the number of substrings to return: // n > 0: at most n substrings; the last substring will be the unsplit remainder. // n == 0: the result is nil (zero substrings) // n < 0: all substrings func SplitAfterN(s, sep string, n int) []string { return genSplit(s, sep, len(sep), n) } // Split slices s into all substrings separated by sep and returns a slice of // the substrings between those separators. // If sep is empty, Split splits after each UTF-8 sequence. // It is equivalent to SplitN with a count of -1. func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } // SplitAfter slices s into all substrings after each instance of sep and // returns a slice of those substrings. // If sep is empty, SplitAfter splits after each UTF-8 sequence. // It is equivalent to SplitAfterN with a count of -1. func SplitAfter(s, sep string) []string { return genSplit(s, sep, len(sep), -1) } // Fields splits the string s around each instance of one or more consecutive white space // characters, as defined by unicode.IsSpace, returning an array of substrings of s or an // empty list if s contains only white space. func Fields(s string) []string { return FieldsFunc(s, unicode.IsSpace) } // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) // and returns an array of slices of s. If all code points in s satisfy f(c) or the // string is empty, an empty slice is returned. // FieldsFunc makes no guarantees about the order in which it calls f(c). // If f does not return consistent results for a given c, FieldsFunc may crash. func FieldsFunc(s string, f func(rune) bool) []string { // First count the fields. n := 0 inField := false for _, rune := range s { wasInField := inField inField = !f(rune) if inField && !wasInField { n++ } } // Now create them. a := make([]string, n) na := 0 fieldStart := -1 // Set to -1 when looking for start of field. for i, rune := range s { if f(rune) { if fieldStart >= 0 { a[na] = s[fieldStart:i] na++ fieldStart = -1 } } else if fieldStart == -1 { fieldStart = i } } if fieldStart >= 0 { // Last field might end at EOF. a[na] = s[fieldStart:] } return a } // Join concatenates the elements of a to create a single string. The separator string // sep is placed between elements in the resulting string. func Join(a []string, sep string) string { switch len(a) { case 0: return "" case 1: return a[0] case 2: // Special case for common small values. // Remove if golang.org/issue/6714 is fixed return a[0] + sep + a[1] case 3: // Special case for common small values. // Remove if golang.org/issue/6714 is fixed return a[0] + sep + a[1] + sep + a[2] } n := len(sep) * (len(a) - 1) for i := 0; i < len(a); i++ { n += len(a[i]) } b := make([]byte, n) bp := copy(b, a[0]) for _, s := range a[1:] { bp += copy(b[bp:], sep) bp += copy(b[bp:], s) } return string(b) } // HasPrefix tests whether the string s begins with prefix. func HasPrefix(s, prefix string) bool { return len(s) >= len(prefix) && s[0:len(prefix)] == prefix } // HasSuffix tests whether the string s ends with suffix. func HasSuffix(s, suffix string) bool { return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix } // Map returns a copy of the string s with all its characters modified // according to the mapping function. If mapping returns a negative value, the character is // dropped from the string with no replacement. func Map(mapping func(rune) rune, s string) string { // In the worst case, the string can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's // fine. It could also shrink but that falls out naturally. maxbytes := len(s) // length of b nbytes := 0 // number of bytes encoded in b // The output buffer b is initialized on demand, the first // time a character differs. var b []byte for i, c := range s { r := mapping(c) if b == nil { if r == c { continue } b = make([]byte, maxbytes) nbytes = copy(b, s[:i]) } if r >= 0 { wid := 1 if r >= utf8.RuneSelf { wid = utf8.RuneLen(r) } if nbytes+wid > maxbytes { // Grow the buffer. maxbytes = maxbytes*2 + utf8.UTFMax nb := make([]byte, maxbytes) copy(nb, b[0:nbytes]) b = nb } nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) } } if b == nil { return s } return string(b[0:nbytes]) } // Repeat returns a new string consisting of count copies of the string s. func Repeat(s string, count int) string { b := make([]byte, len(s)*count) bp := copy(b, s) for bp < len(b) { copy(b[bp:], b[:bp]) bp *= 2 } return string(b) } // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case. func ToUpper(s string) string { return Map(unicode.ToUpper, s) } // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case. func ToLower(s string) string { return Map(unicode.ToLower, s) } // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case. func ToTitle(s string) string { return Map(unicode.ToTitle, s) } // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their // upper case, giving priority to the special casing rules. func ToUpperSpecial(_case unicode.SpecialCase, s string) string { return Map(func(r rune) rune { return _case.ToUpper(r) }, s) } // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their // lower case, giving priority to the special casing rules. func ToLowerSpecial(_case unicode.SpecialCase, s string) string { return Map(func(r rune) rune { return _case.ToLower(r) }, s) } // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their // title case, giving priority to the special casing rules. func ToTitleSpecial(_case unicode.SpecialCase, s string) string { return Map(func(r rune) rune { return _case.ToTitle(r) }, s) } // isSeparator reports whether the rune could mark a word boundary. // TODO: update when package unicode captures more of the properties. func isSeparator(r rune) bool { // ASCII alphanumerics and underscore are not separators if r <= 0x7F { switch { case '0' <= r && r <= '9': return false case 'a' <= r && r <= 'z': return false case 'A' <= r && r <= 'Z': return false case r == '_': return false } return true } // Letters and digits are not separators if unicode.IsLetter(r) || unicode.IsDigit(r) { return false } // Otherwise, all we can do for now is treat spaces as separators. return unicode.IsSpace(r) } // Title returns a copy of the string s with all Unicode letters that begin words // mapped to their title case. // // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. func Title(s string) string { // Use a closure here to remember state. // Hackish but effective. Depends on Map scanning in order and calling // the closure once per rune. prev := ' ' return Map( func(r rune) rune { if isSeparator(prev) { prev = r return unicode.ToTitle(r) } prev = r return r }, s) } // TrimLeftFunc returns a slice of the string s with all leading // Unicode code points c satisfying f(c) removed. func TrimLeftFunc(s string, f func(rune) bool) string { i := indexFunc(s, f, false) if i == -1 { return "" } return s[i:] } // TrimRightFunc returns a slice of the string s with all trailing // Unicode code points c satisfying f(c) removed. func TrimRightFunc(s string, f func(rune) bool) string { i := lastIndexFunc(s, f, false) if i >= 0 && s[i] >= utf8.RuneSelf { _, wid := utf8.DecodeRuneInString(s[i:]) i += wid } else { i++ } return s[0:i] } // TrimFunc returns a slice of the string s with all leading // and trailing Unicode code points c satisfying f(c) removed. func TrimFunc(s string, f func(rune) bool) string { return TrimRightFunc(TrimLeftFunc(s, f), f) } // IndexFunc returns the index into s of the first Unicode // code point satisfying f(c), or -1 if none do. func IndexFunc(s string, f func(rune) bool) int { return indexFunc(s, f, true) } // LastIndexFunc returns the index into s of the last // Unicode code point satisfying f(c), or -1 if none do. func LastIndexFunc(s string, f func(rune) bool) int { return lastIndexFunc(s, f, true) } // indexFunc is the same as IndexFunc except that if // truth==false, the sense of the predicate function is // inverted. func indexFunc(s string, f func(rune) bool, truth bool) int { start := 0 for start < len(s) { wid := 1 r := rune(s[start]) if r >= utf8.RuneSelf { r, wid = utf8.DecodeRuneInString(s[start:]) } if f(r) == truth { return start } start += wid } return -1 } // lastIndexFunc is the same as LastIndexFunc except that if // truth==false, the sense of the predicate function is // inverted. func lastIndexFunc(s string, f func(rune) bool, truth bool) int { for i := len(s); i > 0; { r, size := utf8.DecodeLastRuneInString(s[0:i]) i -= size if f(r) == truth { return i } } return -1 } func makeCutsetFunc(cutset string) func(rune) bool { return func(r rune) bool { return IndexRune(cutset, r) >= 0 } } // Trim returns a slice of the string s with all leading and // trailing Unicode code points contained in cutset removed. func Trim(s string, cutset string) string { if s == "" || cutset == "" { return s } return TrimFunc(s, makeCutsetFunc(cutset)) } // TrimLeft returns a slice of the string s with all leading // Unicode code points contained in cutset removed. func TrimLeft(s string, cutset string) string { if s == "" || cutset == "" { return s } return TrimLeftFunc(s, makeCutsetFunc(cutset)) } // TrimRight returns a slice of the string s, with all trailing // Unicode code points contained in cutset removed. func TrimRight(s string, cutset string) string { if s == "" || cutset == "" { return s } return TrimRightFunc(s, makeCutsetFunc(cutset)) } // TrimSpace returns a slice of the string s, with all leading // and trailing white space removed, as defined by Unicode. func TrimSpace(s string) string { return TrimFunc(s, unicode.IsSpace) } // TrimPrefix returns s without the provided leading prefix string. // If s doesn't start with prefix, s is returned unchanged. func TrimPrefix(s, prefix string) string { if HasPrefix(s, prefix) { return s[len(prefix):] } return s } // TrimSuffix returns s without the provided trailing suffix string. // If s doesn't end with suffix, s is returned unchanged. func TrimSuffix(s, suffix string) string { if HasSuffix(s, suffix) { return s[:len(s)-len(suffix)] } return s } // Replace returns a copy of the string s with the first n // non-overlapping instances of old replaced by new. // If old is empty, it matches at the beginning of the string // and after each UTF-8 sequence, yielding up to k+1 replacements // for a k-rune string. // If n < 0, there is no limit on the number of replacements. func Replace(s, old, new string, n int) string { if old == new || n == 0 { return s // avoid allocation } // Compute number of replacements. if m := Count(s, old); m == 0 { return s // avoid allocation } else if n < 0 || m < n { n = m } // Apply replacements to buffer. t := make([]byte, len(s)+n*(len(new)-len(old))) w := 0 start := 0 for i := 0; i < n; i++ { j := start if len(old) == 0 { if i > 0 { _, wid := utf8.DecodeRuneInString(s[start:]) j += wid } } else { j += Index(s[start:], old) } w += copy(t[w:], s[start:j]) w += copy(t[w:], new) start = j + len(old) } w += copy(t[w:], s[start:]) return string(t[0:w]) } // EqualFold reports whether s and t, interpreted as UTF-8 strings, // are equal under Unicode case-folding. func EqualFold(s, t string) bool { for s != "" && t != "" { // Extract first rune from each string. var sr, tr rune if s[0] < utf8.RuneSelf { sr, s = rune(s[0]), s[1:] } else { r, size := utf8.DecodeRuneInString(s) sr, s = r, s[size:] } if t[0] < utf8.RuneSelf { tr, t = rune(t[0]), t[1:] } else { r, size := utf8.DecodeRuneInString(t) tr, t = r, t[size:] } // If they match, keep going; if not, return false. // Easy case. if tr == sr { continue } // Make sr < tr to simplify what follows. if tr < sr { tr, sr = sr, tr } // Fast check for ASCII. if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { // ASCII, and sr is upper case. tr must be lower case. if tr == sr+'a'-'A' { continue } return false } // General case. SimpleFold(x) returns the next equivalent rune > x // or wraps around to smaller values. r := unicode.SimpleFold(sr) for r != sr && r < tr { r = unicode.SimpleFold(r) } if r == tr { continue } return false } // One string is empty. Are both? return s == t }
// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package strings // indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s. // indexShortStr requires 2 <= len(c) <= shortStringLen func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s const shortStringLen = 31 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. func Index(s, sep string) int { n := len(sep) switch { case n == 0: return 0 case n == 1: return IndexByte(s, sep[0]) case n <= shortStringLen: return indexShortStr(s, sep) case n == len(s): if sep == s { return 0 } return -1 case n > len(s): return -1 } // Rabin-Karp search hashsep, pow := hashStr(sep) var h uint32 for i := 0; i < n; i++ { h = h*primeRK + uint32(s[i]) } if h == hashsep && s[:n] == sep { return 0 } for i := n; i < len(s); { h *= primeRK h += uint32(s[i]) h -= pow * uint32(s[i-n]) i++ if h == hashsep && s[i-n:i] == sep { return i - n } } return -1 }