From 230cf2ec9142b6a8f421cb8873deb5df1566e89c Mon Sep 17 00:00:00 2001 From: Jeffrey Wilcke Date: Wed, 1 Mar 2017 01:11:24 +0100 Subject: cmd/evm, core/asm: add EVM assembler (#3686) The evm compile command implements a simple assembly language that compiles to EVM bytecode. --- core/asm/compiler.go | 281 +++++++++++++++++++++++++++++++++++++++++++++++++ core/asm/lex_test.go | 36 +++++++ core/asm/lexer.go | 291 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 608 insertions(+) create mode 100644 core/asm/compiler.go create mode 100644 core/asm/lex_test.go create mode 100644 core/asm/lexer.go (limited to 'core/asm') diff --git a/core/asm/compiler.go b/core/asm/compiler.go new file mode 100644 index 000000000..b2c85375c --- /dev/null +++ b/core/asm/compiler.go @@ -0,0 +1,281 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package asm + +import ( + "errors" + "fmt" + "math/big" + "os" + "strings" + + "github.com/ethereum/go-ethereum/common/math" + "github.com/ethereum/go-ethereum/core/vm" +) + +// Compiler contains information about the parsed source +// and holds the tokens for the program. +type Compiler struct { + tokens []token + binary []interface{} + + labels map[string]int + + pc, pos int + + debug bool +} + +// newCompiler returns a new allocated compiler. +func NewCompiler(debug bool) *Compiler { + return &Compiler{ + labels: make(map[string]int), + debug: debug, + } +} + +// Feed feeds tokens in to ch and are interpreted by +// the compiler. +// +// feed is the first pass in the compile stage as it +// collect the used labels in the program and keeps a +// program counter which is used to determine the locations +// of the jump dests. The labels can than be used in the +// second stage to push labels and determine the right +// position. +func (c *Compiler) Feed(ch <-chan token) { + for i := range ch { + switch i.typ { + case number: + num := math.MustParseBig256(i.text).Bytes() + if len(num) == 0 { + num = []byte{0} + } + c.pc += len(num) + case stringValue: + c.pc += len(i.text) - 2 + case element: + c.pc++ + case labelDef: + c.labels[i.text] = c.pc + c.pc++ + case label: + c.pc += 5 + } + + c.tokens = append(c.tokens, i) + } + if c.debug { + fmt.Fprintln(os.Stderr, "found", len(c.labels), "labels") + } +} + +// Compile compiles the current tokens and returns a +// binary string that can be interpreted by the EVM +// and an error if it failed. +// +// compile is the second stage in the compile phase +// which compiles the tokens to EVM instructions. +func (c *Compiler) Compile() (string, []error) { + var errors []error + // continue looping over the tokens until + // the stack has been exhausted. + for c.pos < len(c.tokens) { + if err := c.compileLine(); err != nil { + errors = append(errors, err) + } + } + + // turn the binary to hex + var bin string + for _, v := range c.binary { + switch v := v.(type) { + case vm.OpCode: + bin += fmt.Sprintf("%x", []byte{byte(v)}) + case []byte: + bin += fmt.Sprintf("%x", v) + } + } + return bin, errors +} + +// next returns the next token and increments the +// posititon. +func (c *Compiler) next() token { + token := c.tokens[c.pos] + c.pos++ + return token +} + +// compile line compiles a single line instruction e.g. +// "push 1", "jump @labal". +func (c *Compiler) compileLine() error { + n := c.next() + if n.typ != lineStart { + return compileErr(n, n.typ.String(), lineStart.String()) + } + + lvalue := c.next() + switch lvalue.typ { + case eof: + return nil + case element: + if err := c.compileElement(lvalue); err != nil { + return err + } + case labelDef: + c.compileLabel() + case lineEnd: + return nil + default: + return compileErr(lvalue, lvalue.text, fmt.Sprintf("%v or %v", labelDef, element)) + } + + if n := c.next(); n.typ != lineEnd { + return compileErr(n, n.text, lineEnd.String()) + } + + return nil +} + +// compileNumber compiles the number to bytes +func (c *Compiler) compileNumber(element token) (int, error) { + num := math.MustParseBig256(element.text).Bytes() + if len(num) == 0 { + num = []byte{0} + } + c.pushBin(num) + return len(num), nil +} + +// compileElement compiles the element (push & label or both) +// to a binary representation and may error if incorrect statements +// where fed. +func (c *Compiler) compileElement(element token) error { + // check for a jump. jumps must be read and compiled + // from right to left. + if isJump(element.text) { + rvalue := c.next() + switch rvalue.typ { + case number: + // TODO figure out how to return the error properly + c.compileNumber(rvalue) + case stringValue: + // strings are quoted, remove them. + c.pushBin(rvalue.text[1 : len(rvalue.text)-2]) + case label: + c.pushBin(vm.PUSH4) + pos := big.NewInt(int64(c.labels[rvalue.text])).Bytes() + pos = append(make([]byte, 4-len(pos)), pos...) + c.pushBin(pos) + default: + return compileErr(rvalue, rvalue.text, "number, string or label") + } + // push the operation + c.pushBin(toBinary(element.text)) + return nil + } else if isPush(element.text) { + // handle pushes. pushes are read from left to right. + var value []byte + + rvalue := c.next() + switch rvalue.typ { + case number: + value = math.MustParseBig256(rvalue.text).Bytes() + if len(value) == 0 { + value = []byte{0} + } + case stringValue: + value = []byte(rvalue.text[1 : len(rvalue.text)-1]) + case label: + value = make([]byte, 4) + copy(value, big.NewInt(int64(c.labels[rvalue.text])).Bytes()) + default: + return compileErr(rvalue, rvalue.text, "number, string or label") + } + + if len(value) > 32 { + return fmt.Errorf("%d type error: unsupported string or number with size > 32", rvalue.lineno) + } + + c.pushBin(vm.OpCode(int(vm.PUSH1) - 1 + len(value))) + c.pushBin(value) + } else { + c.pushBin(toBinary(element.text)) + } + + return nil +} + +// compileLabel pushes a jumpdest to the binary slice. +func (c *Compiler) compileLabel() { + c.pushBin(vm.JUMPDEST) +} + +// pushBin pushes the value v to the binary stack. +func (c *Compiler) pushBin(v interface{}) { + if c.debug { + fmt.Printf("%d: %v\n", len(c.binary), v) + } + c.binary = append(c.binary, v) +} + +// isPush returns whether the string op is either any of +// push(N). +func isPush(op string) bool { + if op == "push" { + return true + } + return false +} + +// isJump returns whether the string op is jump(i) +func isJump(op string) bool { + return op == "jumpi" || op == "jump" +} + +// toBinary converts text to a vm.OpCode +func toBinary(text string) vm.OpCode { + if isPush(text) { + text = "push1" + } + return vm.StringToOp(strings.ToUpper(text)) +} + +type compileError struct { + got string + want string + + lineno int +} + +func (err compileError) Error() string { + return fmt.Sprintf("%d syntax error: unexpected %v, expected %v", err.lineno, err.got, err.want) +} + +var ( + errExpBol = errors.New("expected beginning of line") + errExpElementOrLabel = errors.New("expected beginning of line") +) + +func compileErr(c token, got, want string) error { + return compileError{ + got: got, + want: want, + lineno: c.lineno, + } +} diff --git a/core/asm/lex_test.go b/core/asm/lex_test.go new file mode 100644 index 000000000..36e67bcf7 --- /dev/null +++ b/core/asm/lex_test.go @@ -0,0 +1,36 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package asm + +import "testing" + +func lexAll(src string) []token { + ch := Lex("test.asm", []byte(src), false) + + var tokens []token + for i := range ch { + tokens = append(tokens, i) + } + return tokens +} + +func TestComment(t *testing.T) { + tokens := lexAll(";; this is a comment") + if len(tokens) != 2 { // {new line, EOF} + t.Error("expected no tokens") + } +} diff --git a/core/asm/lexer.go b/core/asm/lexer.go new file mode 100644 index 000000000..2770bd35f --- /dev/null +++ b/core/asm/lexer.go @@ -0,0 +1,291 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package asm + +import ( + "fmt" + "os" + "strings" + "unicode" + "unicode/utf8" +) + +// stateFn is used through the lifetime of the +// lexer to parse the different values at the +// current state. +type stateFn func(*lexer) stateFn + +// token is emitted when the lexer has discovered +// a new parsable token. These are delivered over +// the tokens channels of the lexer +type token struct { + typ tokenType + lineno int + text string +} + +// tokenType are the different types the lexer +// is able to parse and return. +type tokenType int + +const ( + eof tokenType = iota // end of file + lineStart // emitted when a line starts + lineEnd // emitted when a line ends + invalidStatement // any invalid statement + element // any element during element parsing + label // label is emitted when a labal is found + labelDef // label definition is emitted when a new label is found + number // number is emitted when a number is found + stringValue // stringValue is emitted when a string has been found + + Numbers = "1234567890" // characters representing any decimal number + HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal + Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric +) + +// String implements stringer +func (it tokenType) String() string { + if int(it) > len(stringtokenTypes) { + return "invalid" + } + return stringtokenTypes[it] +} + +var stringtokenTypes = []string{ + eof: "EOF", + invalidStatement: "invalid statement", + element: "element", + lineEnd: "end of line", + lineStart: "new line", + label: "label", + labelDef: "label definition", + number: "number", + stringValue: "string", +} + +// lexer is the basic construct for parsing +// source code and turning them in to tokens. +// Tokens are interpreted by the compiler. +type lexer struct { + input string // input contains the source code of the program + + tokens chan token // tokens is used to deliver tokens to the listener + state stateFn // the current state function + + lineno int // current line number in the source file + start, pos, width int // positions for lexing and returning value + + debug bool // flag for triggering debug output +} + +// lex lexes the program by name with the given source. It returns a +// channel on which the tokens are delivered. +func Lex(name string, source []byte, debug bool) <-chan token { + ch := make(chan token) + l := &lexer{ + input: string(source), + tokens: ch, + state: lexLine, + debug: debug, + } + go func() { + l.emit(lineStart) + for l.state != nil { + l.state = l.state(l) + } + l.emit(eof) + close(l.tokens) + }() + + return ch +} + +// next returns the next rune in the program's source. +func (l *lexer) next() (rune rune) { + if l.pos >= len(l.input) { + l.width = 0 + return 0 + } + rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return rune +} + +// backup backsup the last parsed element (multi-character) +func (l *lexer) backup() { + l.pos -= l.width +} + +// peek returns the next rune but does not advance the seeker +func (l *lexer) peek() rune { + r := l.next() + l.backup() + return r +} + +// ignore advances the seeker and ignores the value +func (l *lexer) ignore() { + l.start = l.pos +} + +// Accepts checks whether the given input matches the next rune +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + + l.backup() + + return false +} + +// acceptRun will continue to advance the seeker until valid +// can no longer be met. +func (l *lexer) acceptRun(valid string) { + for strings.IndexRune(valid, l.next()) >= 0 { + } + l.backup() +} + +// acceptRunUntil is the inverse of acceptRun and will continue +// to advance the seeker until the rune has been found. +func (l *lexer) acceptRunUntil(until rune) bool { + // Continues running until a rune is found + for i := l.next(); strings.IndexRune(string(until), i) == -1; i = l.next() { + if i == 0 { + return false + } + } + + return true +} + +// blob returns the current value +func (l *lexer) blob() string { + return l.input[l.start:l.pos] +} + +// Emits a new token on to token channel for processing +func (l *lexer) emit(t tokenType) { + token := token{t, l.lineno, l.blob()} + + if l.debug { + fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) + } + + l.tokens <- token + l.start = l.pos +} + +// lexLine is state function for lexing lines +func lexLine(l *lexer) stateFn { + for { + switch r := l.next(); { + case r == '\n': + l.emit(lineEnd) + l.ignore() + l.lineno++ + + l.emit(lineStart) + case r == ';' && l.peek() == ';': + return lexComment + case isSpace(r): + l.ignore() + case isAlphaNumeric(r) || r == '_': + return lexElement + case isNumber(r): + return lexNumber + case r == '@': + l.ignore() + return lexLabel + case r == '"': + return lexInsideString + default: + return nil + } + } +} + +// lexComment parses the current position until the end +// of the line and discards the text. +func lexComment(l *lexer) stateFn { + l.acceptRunUntil('\n') + l.ignore() + + return lexLine +} + +// lexLabel parses the current label, emits and returns +// the lex text state function to advance the parsing +// process. +func lexLabel(l *lexer) stateFn { + l.acceptRun(Alpha + "_") + + l.emit(label) + + return lexLine +} + +// lexInsideString lexes the inside of a string until +// until the state function finds the closing quote. +// It returns the lex text state function. +func lexInsideString(l *lexer) stateFn { + if l.acceptRunUntil('"') { + l.emit(stringValue) + } + + return lexLine +} + +func lexNumber(l *lexer) stateFn { + acceptance := Numbers + if l.accept("0") && l.accept("xX") { + acceptance = HexadecimalNumbers + } + l.acceptRun(acceptance) + + l.emit(number) + + return lexLine +} + +func lexElement(l *lexer) stateFn { + l.acceptRun(Alpha + "_" + Numbers) + + if l.peek() == ':' { + l.emit(labelDef) + + l.accept(":") + l.ignore() + } else { + l.emit(element) + } + return lexLine +} + +func isAlphaNumeric(t rune) bool { + return unicode.IsLetter(t) +} + +func isSpace(t rune) bool { + return unicode.IsSpace(t) +} + +func isNumber(t rune) bool { + return unicode.IsNumber(t) +} -- cgit v1.2.3