tinygo/src/runtime/string.go
Michael Teichgraeber 7461c298dd runtime: make stringNext use byteindex only, fix index offset
Use stringIterator.byteindex as the loop index, and remove
stringIterator.rangeindex, as "the index of the loop is the starting
position of the current rune, measured in bytes".  This patch also fixes
the current loop index returned by stringNext, using `it.byteindex'
before - not after - `length' is added.
2019-01-27 23:31:43 +01:00

178 строки
4,2 КиБ
Go

package runtime
// This file implements functions related to Go strings.
import (
"unsafe"
)
// The underlying struct for the Go string type.
type _string struct {
ptr *byte
length uintptr
}
// The iterator state for a range over a string.
type stringIterator struct {
byteindex uintptr
}
// Return true iff the strings match.
//go:nobounds
func stringEqual(x, y string) bool {
if len(x) != len(y) {
return false
}
for i := 0; i < len(x); i++ {
if x[i] != y[i] {
return false
}
}
return true
}
// Return true iff x < y.
//go:nobounds
func stringLess(x, y string) bool {
l := len(x)
if m := len(y); m < l {
l = m
}
for i := 0; i < l; i++ {
if x[i] < y[i] {
return true
}
if x[i] > y[i] {
return false
}
}
return len(x) < len(y)
}
// Add two strings together.
func stringConcat(x, y _string) _string {
if x.length == 0 {
return y
} else if y.length == 0 {
return x
} else {
length := x.length + y.length
buf := alloc(length)
memcpy(buf, unsafe.Pointer(x.ptr), x.length)
memcpy(unsafe.Pointer(uintptr(buf)+x.length), unsafe.Pointer(y.ptr), y.length)
return _string{ptr: (*byte)(buf), length: length}
}
}
// Create a string from a []byte slice.
func stringFromBytes(x struct {
ptr *byte
len uintptr
cap uintptr
}) _string {
buf := alloc(x.len)
memcpy(buf, unsafe.Pointer(x.ptr), x.len)
return _string{ptr: (*byte)(buf), length: x.len}
}
// Convert a string to a []byte slice.
func stringToBytes(x _string) (slice struct {
ptr *byte
len uintptr
cap uintptr
}) {
buf := alloc(x.length)
memcpy(buf, unsafe.Pointer(x.ptr), x.length)
slice.ptr = (*byte)(buf)
slice.len = x.length
slice.cap = x.length
return
}
// Create a string from a Unicode code point.
func stringFromUnicode(x rune) _string {
array, length := encodeUTF8(x)
// Array will be heap allocated.
// The heap most likely doesn't work with blocks below 4 bytes, so there's
// no point in allocating a smaller buffer for the string here.
return _string{ptr: (*byte)(unsafe.Pointer(&array)), length: length}
}
// Iterate over a string.
// Returns (ok, key, value).
func stringNext(s string, it *stringIterator) (bool, int, rune) {
if len(s) <= int(it.byteindex) {
return false, 0, 0
}
i := int(it.byteindex)
r, length := decodeUTF8(s, it.byteindex)
it.byteindex += length
return true, i, r
}
// Convert a Unicode code point into an array of bytes and its length.
func encodeUTF8(x rune) ([4]byte, uintptr) {
// https://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16
// Note: this code can probably be optimized (in size and speed).
switch {
case x <= 0x7f:
return [4]byte{byte(x), 0, 0, 0}, 1
case x <= 0x7ff:
b1 := 0xc0 | byte(x>>6)
b2 := 0x80 | byte(x&0x3f)
return [4]byte{b1, b2, 0, 0}, 2
case x <= 0xffff:
b1 := 0xe0 | byte(x>>12)
b2 := 0x80 | byte((x>>6)&0x3f)
b3 := 0x80 | byte((x>>0)&0x3f)
return [4]byte{b1, b2, b3, 0}, 3
case x <= 0x10ffff:
b1 := 0xf0 | byte(x>>18)
b2 := 0x80 | byte((x>>12)&0x3f)
b3 := 0x80 | byte((x>>6)&0x3f)
b4 := 0x80 | byte((x>>0)&0x3f)
return [4]byte{b1, b2, b3, b4}, 4
default:
// Invalid Unicode code point.
return [4]byte{0xef, 0xbf, 0xbd, 0}, 3
}
}
// Decode a single UTF-8 character from a string.
//go:nobounds
func decodeUTF8(s string, index uintptr) (rune, uintptr) {
remaining := uintptr(len(s)) - index // must be >= 1 before calling this function
x := s[index]
switch {
case x&0x80 == 0x00: // 0xxxxxxx
return rune(x), 1
case x&0xe0 == 0xc0: // 110xxxxx
if remaining < 2 {
return 0xfffd, 1
}
return (rune(x&0x1f) << 6) | (rune(s[index+1]) & 0x3f), 2
case x&0xf0 == 0xe0: // 1110xxxx
if remaining < 3 {
return 0xfffd, 1
}
return (rune(x&0x0f) << 12) | ((rune(s[index+1]) & 0x3f) << 6) | (rune(s[index+2]) & 0x3f), 3
case x&0xf8 == 0xf0: // 11110xxx
if remaining < 4 {
return 0xfffd, 1
}
return (rune(x&0x07) << 18) | ((rune(s[index+1]) & 0x3f) << 12) | ((rune(s[index+2]) & 0x3f) << 6) | (rune(s[index+3]) & 0x3f), 4
default:
return 0xfffd, 1
}
}
// indexByte returns the index of the first instance of c in s, or -1 if c is not present in s.
//go:linkname indexByte strings.IndexByte
func indexByte(s string, c byte) int {
for i := 0; i < len(s); i++ {
if s[i] == c {
return i
}
}
return -1
}