aboutsummaryrefslogtreecommitdiffstats
path: root/core/asm/lexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'core/asm/lexer.go')
-rw-r--r--core/asm/lexer.go291
1 files changed, 291 insertions, 0 deletions
diff --git a/core/asm/lexer.go b/core/asm/lexer.go
new file mode 100644
index 000000000..2770bd35f
--- /dev/null
+++ b/core/asm/lexer.go
@@ -0,0 +1,291 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package asm
+
+import (
+ "fmt"
+ "os"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+// stateFn is used through the lifetime of the
+// lexer to parse the different values at the
+// current state.
+type stateFn func(*lexer) stateFn
+
+// token is emitted when the lexer has discovered
+// a new parsable token. These are delivered over
+// the tokens channels of the lexer
+type token struct {
+ typ tokenType
+ lineno int
+ text string
+}
+
+// tokenType are the different types the lexer
+// is able to parse and return.
+type tokenType int
+
+const (
+ eof tokenType = iota // end of file
+ lineStart // emitted when a line starts
+ lineEnd // emitted when a line ends
+ invalidStatement // any invalid statement
+ element // any element during element parsing
+ label // label is emitted when a labal is found
+ labelDef // label definition is emitted when a new label is found
+ number // number is emitted when a number is found
+ stringValue // stringValue is emitted when a string has been found
+
+ Numbers = "1234567890" // characters representing any decimal number
+ HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
+ Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
+)
+
+// String implements stringer
+func (it tokenType) String() string {
+ if int(it) > len(stringtokenTypes) {
+ return "invalid"
+ }
+ return stringtokenTypes[it]
+}
+
+var stringtokenTypes = []string{
+ eof: "EOF",
+ invalidStatement: "invalid statement",
+ element: "element",
+ lineEnd: "end of line",
+ lineStart: "new line",
+ label: "label",
+ labelDef: "label definition",
+ number: "number",
+ stringValue: "string",
+}
+
+// lexer is the basic construct for parsing
+// source code and turning them in to tokens.
+// Tokens are interpreted by the compiler.
+type lexer struct {
+ input string // input contains the source code of the program
+
+ tokens chan token // tokens is used to deliver tokens to the listener
+ state stateFn // the current state function
+
+ lineno int // current line number in the source file
+ start, pos, width int // positions for lexing and returning value
+
+ debug bool // flag for triggering debug output
+}
+
+// lex lexes the program by name with the given source. It returns a
+// channel on which the tokens are delivered.
+func Lex(name string, source []byte, debug bool) <-chan token {
+ ch := make(chan token)
+ l := &lexer{
+ input: string(source),
+ tokens: ch,
+ state: lexLine,
+ debug: debug,
+ }
+ go func() {
+ l.emit(lineStart)
+ for l.state != nil {
+ l.state = l.state(l)
+ }
+ l.emit(eof)
+ close(l.tokens)
+ }()
+
+ return ch
+}
+
+// next returns the next rune in the program's source.
+func (l *lexer) next() (rune rune) {
+ if l.pos >= len(l.input) {
+ l.width = 0
+ return 0
+ }
+ rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+ l.pos += l.width
+ return rune
+}
+
+// backup backsup the last parsed element (multi-character)
+func (l *lexer) backup() {
+ l.pos -= l.width
+}
+
+// peek returns the next rune but does not advance the seeker
+func (l *lexer) peek() rune {
+ r := l.next()
+ l.backup()
+ return r
+}
+
+// ignore advances the seeker and ignores the value
+func (l *lexer) ignore() {
+ l.start = l.pos
+}
+
+// Accepts checks whether the given input matches the next rune
+func (l *lexer) accept(valid string) bool {
+ if strings.IndexRune(valid, l.next()) >= 0 {
+ return true
+ }
+
+ l.backup()
+
+ return false
+}
+
+// acceptRun will continue to advance the seeker until valid
+// can no longer be met.
+func (l *lexer) acceptRun(valid string) {
+ for strings.IndexRune(valid, l.next()) >= 0 {
+ }
+ l.backup()
+}
+
+// acceptRunUntil is the inverse of acceptRun and will continue
+// to advance the seeker until the rune has been found.
+func (l *lexer) acceptRunUntil(until rune) bool {
+ // Continues running until a rune is found
+ for i := l.next(); strings.IndexRune(string(until), i) == -1; i = l.next() {
+ if i == 0 {
+ return false
+ }
+ }
+
+ return true
+}
+
+// blob returns the current value
+func (l *lexer) blob() string {
+ return l.input[l.start:l.pos]
+}
+
+// Emits a new token on to token channel for processing
+func (l *lexer) emit(t tokenType) {
+ token := token{t, l.lineno, l.blob()}
+
+ if l.debug {
+ fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
+ }
+
+ l.tokens <- token
+ l.start = l.pos
+}
+
+// lexLine is state function for lexing lines
+func lexLine(l *lexer) stateFn {
+ for {
+ switch r := l.next(); {
+ case r == '\n':
+ l.emit(lineEnd)
+ l.ignore()
+ l.lineno++
+
+ l.emit(lineStart)
+ case r == ';' && l.peek() == ';':
+ return lexComment
+ case isSpace(r):
+ l.ignore()
+ case isAlphaNumeric(r) || r == '_':
+ return lexElement
+ case isNumber(r):
+ return lexNumber
+ case r == '@':
+ l.ignore()
+ return lexLabel
+ case r == '"':
+ return lexInsideString
+ default:
+ return nil
+ }
+ }
+}
+
+// lexComment parses the current position until the end
+// of the line and discards the text.
+func lexComment(l *lexer) stateFn {
+ l.acceptRunUntil('\n')
+ l.ignore()
+
+ return lexLine
+}
+
+// lexLabel parses the current label, emits and returns
+// the lex text state function to advance the parsing
+// process.
+func lexLabel(l *lexer) stateFn {
+ l.acceptRun(Alpha + "_")
+
+ l.emit(label)
+
+ return lexLine
+}
+
+// lexInsideString lexes the inside of a string until
+// until the state function finds the closing quote.
+// It returns the lex text state function.
+func lexInsideString(l *lexer) stateFn {
+ if l.acceptRunUntil('"') {
+ l.emit(stringValue)
+ }
+
+ return lexLine
+}
+
+func lexNumber(l *lexer) stateFn {
+ acceptance := Numbers
+ if l.accept("0") && l.accept("xX") {
+ acceptance = HexadecimalNumbers
+ }
+ l.acceptRun(acceptance)
+
+ l.emit(number)
+
+ return lexLine
+}
+
+func lexElement(l *lexer) stateFn {
+ l.acceptRun(Alpha + "_" + Numbers)
+
+ if l.peek() == ':' {
+ l.emit(labelDef)
+
+ l.accept(":")
+ l.ignore()
+ } else {
+ l.emit(element)
+ }
+ return lexLine
+}
+
+func isAlphaNumeric(t rune) bool {
+ return unicode.IsLetter(t)
+}
+
+func isSpace(t rune) bool {
+ return unicode.IsSpace(t)
+}
+
+func isNumber(t rune) bool {
+ return unicode.IsNumber(t)
+}