aboutsummaryrefslogblamecommitdiffstats
path: root/core/asm/lexer.go
blob: 2770bd35fc2c43ae1528fce66a0af9666ef4489f (plain) (tree)


































































































































































































































































































                                                                                                                                 
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package asm

import (
    "fmt"
    "os"
    "strings"
    "unicode"
    "unicode/utf8"
)

// stateFn is used through the lifetime of the
// lexer to parse the different values at the
// current state.
type stateFn func(*lexer) stateFn

// token is emitted when the lexer has discovered
// a new parsable token. These are delivered over
// the tokens channels of the lexer
type token struct {
    typ    tokenType
    lineno int
    text   string
}

// tokenType are the different types the lexer
// is able to parse and return.
type tokenType int

const (
    eof              tokenType = iota // end of file
    lineStart                         // emitted when a line starts
    lineEnd                           // emitted when a line ends
    invalidStatement                  // any invalid statement
    element                           // any element during element parsing
    label                             // label is emitted when a labal is found
    labelDef                          // label definition is emitted when a new label is found
    number                            // number is emitted when a number is found
    stringValue                       // stringValue is emitted when a string has been found

    Numbers            = "1234567890"                                           // characters representing any decimal number
    HexadecimalNumbers = Numbers + "aAbBcCdDeEfF"                               // characters representing any hexadecimal
    Alpha              = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
)

// String implements stringer
func (it tokenType) String() string {
    if int(it) > len(stringtokenTypes) {
        return "invalid"
    }
    return stringtokenTypes[it]
}

var stringtokenTypes = []string{
    eof:              "EOF",
    invalidStatement: "invalid statement",
    element:          "element",
    lineEnd:          "end of line",
    lineStart:        "new line",
    label:            "label",
    labelDef:         "label definition",
    number:           "number",
    stringValue:      "string",
}

// lexer is the basic construct for parsing
// source code and turning them in to tokens.
// Tokens are interpreted by the compiler.
type lexer struct {
    input string // input contains the source code of the program

    tokens chan token // tokens is used to deliver tokens to the listener
    state  stateFn    // the current state function

    lineno            int // current line number in the source file
    start, pos, width int // positions for lexing and returning value

    debug bool // flag for triggering debug output
}

// lex lexes the program by name with the given source. It returns a
// channel on which the tokens are delivered.
func Lex(name string, source []byte, debug bool) <-chan token {
    ch := make(chan token)
    l := &lexer{
        input:  string(source),
        tokens: ch,
        state:  lexLine,
        debug:  debug,
    }
    go func() {
        l.emit(lineStart)
        for l.state != nil {
            l.state = l.state(l)
        }
        l.emit(eof)
        close(l.tokens)
    }()

    return ch
}

// next returns the next rune in the program's source.
func (l *lexer) next() (rune rune) {
    if l.pos >= len(l.input) {
        l.width = 0
        return 0
    }
    rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
    l.pos += l.width
    return rune
}

// backup backsup the last parsed element (multi-character)
func (l *lexer) backup() {
    l.pos -= l.width
}

// peek returns the next rune but does not advance the seeker
func (l *lexer) peek() rune {
    r := l.next()
    l.backup()
    return r
}

// ignore advances the seeker and ignores the value
func (l *lexer) ignore() {
    l.start = l.pos
}

// Accepts checks whether the given input matches the next rune
func (l *lexer) accept(valid string) bool {
    if strings.IndexRune(valid, l.next()) >= 0 {
        return true
    }

    l.backup()

    return false
}

// acceptRun will continue to advance the seeker until valid
// can no longer be met.
func (l *lexer) acceptRun(valid string) {
    for strings.IndexRune(valid, l.next()) >= 0 {
    }
    l.backup()
}

// acceptRunUntil is the inverse of acceptRun and will continue
// to advance the seeker until the rune has been found.
func (l *lexer) acceptRunUntil(until rune) bool {
    // Continues running until a rune is found
    for i := l.next(); strings.IndexRune(string(until), i) == -1; i = l.next() {
        if i == 0 {
            return false
        }
    }

    return true
}

// blob returns the current value
func (l *lexer) blob() string {
    return l.input[l.start:l.pos]
}

// Emits a new token on to token channel for processing
func (l *lexer) emit(t tokenType) {
    token := token{t, l.lineno, l.blob()}

    if l.debug {
        fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
    }

    l.tokens <- token
    l.start = l.pos
}

// lexLine is state function for lexing lines
func lexLine(l *lexer) stateFn {
    for {
        switch r := l.next(); {
        case r == '\n':
            l.emit(lineEnd)
            l.ignore()
            l.lineno++

            l.emit(lineStart)
        case r == ';' && l.peek() == ';':
            return lexComment
        case isSpace(r):
            l.ignore()
        case isAlphaNumeric(r) || r == '_':
            return lexElement
        case isNumber(r):
            return lexNumber
        case r == '@':
            l.ignore()
            return lexLabel
        case r == '"':
            return lexInsideString
        default:
            return nil
        }
    }
}

// lexComment parses the current position until the end
// of the line and discards the text.
func lexComment(l *lexer) stateFn {
    l.acceptRunUntil('\n')
    l.ignore()

    return lexLine
}

// lexLabel parses the current label, emits and returns
// the lex text state function to advance the parsing
// process.
func lexLabel(l *lexer) stateFn {
    l.acceptRun(Alpha + "_")

    l.emit(label)

    return lexLine
}

// lexInsideString lexes the inside of a string until
// until the state function finds the closing quote.
// It returns the lex text state function.
func lexInsideString(l *lexer) stateFn {
    if l.acceptRunUntil('"') {
        l.emit(stringValue)
    }

    return lexLine
}

func lexNumber(l *lexer) stateFn {
    acceptance := Numbers
    if l.accept("0") && l.accept("xX") {
        acceptance = HexadecimalNumbers
    }
    l.acceptRun(acceptance)

    l.emit(number)

    return lexLine
}

func lexElement(l *lexer) stateFn {
    l.acceptRun(Alpha + "_" + Numbers)

    if l.peek() == ':' {
        l.emit(labelDef)

        l.accept(":")
        l.ignore()
    } else {
        l.emit(element)
    }
    return lexLine
}

func isAlphaNumeric(t rune) bool {
    return unicode.IsLetter(t)
}

func isSpace(t rune) bool {
    return unicode.IsSpace(t)
}

func isNumber(t rune) bool {
    return unicode.IsNumber(t)
}