From dab8ef6f9f0a4a945114a96c28a422a73eb1c770 Mon Sep 17 00:00:00 2001 From: Ting-Wei Lan Date: Mon, 28 Jan 2019 18:03:53 +0800 Subject: core: vm: sqlvm: process non-UTF-8 input and escape sequences Our parser is able to process queries with invalid UTF-8, provided that it is compatible with ASCII. Since doing so requires encoding the input before passing to pigeon, Parse* functions generated by pigeon are unexported because they should not be used directly. Escape sequences in string literals and identifiers are now recognized. In addition to escape sequences supported by solidity, we support \U similar to the one supported by Go to allow users to specify non-BMP Unicode code point without using multiple \x escapes. AST printer is modified to quote non-printable characters in strings to prevent control characters from messing up the terminal. --- core/vm/sqlvm/ast/printer.go | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'core/vm/sqlvm/ast') diff --git a/core/vm/sqlvm/ast/printer.go b/core/vm/sqlvm/ast/printer.go index e8c612637..4d977aeea 100644 --- a/core/vm/sqlvm/ast/printer.go +++ b/core/vm/sqlvm/ast/printer.go @@ -3,9 +3,36 @@ package ast import ( "fmt" "reflect" + "strconv" + "unicode" "unicode/utf8" ) +func formatBytes(b []byte) string { + if utf8.Valid(b) { + for r, i, size := rune(0), 0, 0; i < len(b); i += size { + r, size = utf8.DecodeRune(b[i:]) + if !unicode.IsPrint(r) { + return strconv.Quote(string(b)) + } + } + return string(b) + } + return fmt.Sprintf("%v", b) +} + +func formatString(s string) string { + if utf8.ValidString(s) { + for _, r := range s { + if !unicode.IsPrint(r) { + return strconv.Quote(s) + } + } + return s + } + return fmt.Sprintf("%v", []byte(s)) +} + // PrintAST prints ast to stdout. func PrintAST(n interface{}, indent string, detail bool) { if n == nil { @@ -53,7 +80,8 @@ func PrintAST(n interface{}, indent string, detail bool) { return } if stringer, ok := n.(fmt.Stringer); ok { - fmt.Printf("%s%s: %s\n", indent, name, stringer.String()) + s := stringer.String() + fmt.Printf("%s%s: %s\n", indent, name, formatString(s)) return } if typeOf.Kind() == reflect.Struct { @@ -75,10 +103,8 @@ func PrintAST(n interface{}, indent string, detail bool) { return } if bs, ok := n.([]byte); ok { - if utf8.Valid(bs) { - fmt.Printf("%s%s\n", indent, bs) - return - } + fmt.Printf("%s%s\n", indent, formatBytes(bs)) + return } fmt.Printf("%s%+v\n", indent, valueOf.Interface()) } -- cgit v1.2.3