diff options
author | Jeffrey Wilcke <jeffrey@ethereum.org> | 2015-01-16 05:28:48 +0800 |
---|---|---|
committer | Jeffrey Wilcke <jeffrey@ethereum.org> | 2015-01-16 05:28:48 +0800 |
commit | 52bb14954190fd9041eda866e1f70b4b60eee521 (patch) | |
tree | 98d2496f6ea4a409493328043e323c35eb2abf1c /rlp/encode.go | |
parent | bb55307a9d8fa73b0fbc0727f8b80925a87627b7 (diff) | |
parent | 29c46cdf3405d6462deb2cb5cef87b1b4fb2bdc7 (diff) | |
download | dexon-52bb14954190fd9041eda866e1f70b4b60eee521.tar dexon-52bb14954190fd9041eda866e1f70b4b60eee521.tar.gz dexon-52bb14954190fd9041eda866e1f70b4b60eee521.tar.bz2 dexon-52bb14954190fd9041eda866e1f70b4b60eee521.tar.lz dexon-52bb14954190fd9041eda866e1f70b4b60eee521.tar.xz dexon-52bb14954190fd9041eda866e1f70b4b60eee521.tar.zst dexon-52bb14954190fd9041eda866e1f70b4b60eee521.zip |
Merge pull request #257 from fjl/rlp-encoder
rlp: add functions for encoding
Diffstat (limited to 'rlp/encode.go')
-rw-r--r-- | rlp/encode.go | 532 |
1 files changed, 532 insertions, 0 deletions
diff --git a/rlp/encode.go b/rlp/encode.go new file mode 100644 index 000000000..689d25dd8 --- /dev/null +++ b/rlp/encode.go @@ -0,0 +1,532 @@ +package rlp + +import ( + "fmt" + "io" + "math/big" + "reflect" +) + +// TODO: put encbufs in a sync.Pool. +// Doing that requires zeroing the buffers after use. +// encReader will need to drop it's buffer when done. + +var ( + // Common encoded values. + // These are useful when implementing EncodeRLP. + EmptyString = []byte{0x80} + EmptyList = []byte{0xC0} +) + +// Encoder is implemented by types that require custom +// encoding rules or want to encode private fields. +type Encoder interface { + // EncodeRLP should write the RLP encoding of its receiver to w. + // If the implementation is a pointer method, it may also be + // called for nil pointers. + // + // Implementations should generate valid RLP. The data written is + // not verified at the moment, but a future version might. It is + // recommended to write only a single value but writing multiple + // values or no value at all is also permitted. + EncodeRLP(io.Writer) error +} + +// Encode writes the RLP encoding of val to w. Note that Encode may +// perform many small writes in some cases. Consider making w +// buffered. +// +// Encode uses the following type-dependent encoding rules: +// +// If the type implements the Encoder interface, Encode calls +// EncodeRLP. This is true even for nil pointers, please see the +// documentation for Encoder. +// +// To encode a pointer, the value being pointed to is encoded. For nil +// pointers, Encode will encode the zero value of the type. A nil +// pointer to a struct type always encodes as an empty RLP list. +// +// Struct values are encoded as an RLP list of all their encoded +// public fields. Recursive struct types are supported. +// +// To encode slices and arrays, the elements are encoded as an RLP +// list of the value's elements. Note that arrays and slices with +// element type uint8 or byte are always encoded as an RLP string. +// +// A Go string is encoded as an RLP string. +// +// An unsigned integer value is encoded as an RLP string. Zero always +// encodes as an empty RLP string. Encode also supports *big.Int. +// +// An interface value encodes as the value contained in the interface. +// +// Boolean values are not supported, nor are signed integers, floating +// point numbers, maps, channels and functions. +func Encode(w io.Writer, val interface{}) error { + if outer, ok := w.(*encbuf); ok { + // Encode was called by some type's EncodeRLP. + // Avoid copying by writing to the outer encbuf directly. + return outer.encode(val) + } + eb := newencbuf() + if err := eb.encode(val); err != nil { + return err + } + return eb.toWriter(w) +} + +// EncodeBytes returns the RLP encoding of val. +// Please see the documentation of Encode for the encoding rules. +func EncodeToBytes(val interface{}) ([]byte, error) { + eb := newencbuf() + if err := eb.encode(val); err != nil { + return nil, err + } + return eb.toBytes(), nil +} + +// EncodeReader returns a reader from which the RLP encoding of val +// can be read. The returned size is the total size of the encoded +// data. +// +// Please see the documentation of Encode for the encoding rules. +func EncodeToReader(val interface{}) (size int, r io.Reader, err error) { + eb := newencbuf() + if err := eb.encode(val); err != nil { + return 0, nil, err + } + return eb.size(), &encReader{buf: eb}, nil +} + +type encbuf struct { + str []byte // string data, contains everything except list headers + lheads []*listhead // all list headers + lhsize int // sum of sizes of all encoded list headers + sizebuf []byte // 9-byte auxiliary buffer for uint encoding +} + +type listhead struct { + offset int // index of this header in string data + size int // total size of encoded data (including list headers) +} + +// encode writes head to the given buffer, which must be at least +// 9 bytes long. It returns the encoded bytes. +func (head *listhead) encode(buf []byte) []byte { + if head.size < 56 { + buf[0] = 0xC0 + byte(head.size) + return buf[:1] + } else { + sizesize := putint(buf[1:], uint64(head.size)) + buf[0] = 0xF7 + byte(sizesize) + return buf[:sizesize+1] + } +} + +func newencbuf() *encbuf { + return &encbuf{sizebuf: make([]byte, 9)} +} + +// encbuf implements io.Writer so it can be passed it into EncodeRLP. +func (w *encbuf) Write(b []byte) (int, error) { + w.str = append(w.str, b...) + return len(b), nil +} + +func (w *encbuf) encode(val interface{}) error { + rval := reflect.ValueOf(val) + ti, err := cachedTypeInfo(rval.Type()) + if err != nil { + return err + } + return ti.writer(rval, w) +} + +func (w *encbuf) encodeStringHeader(size int) { + if size < 56 { + w.str = append(w.str, 0x80+byte(size)) + } else { + // TODO: encode to w.str directly + sizesize := putint(w.sizebuf[1:], uint64(size)) + w.sizebuf[0] = 0xB7 + byte(sizesize) + w.str = append(w.str, w.sizebuf[:sizesize+1]...) + } +} + +func (w *encbuf) encodeString(b []byte) { + w.encodeStringHeader(len(b)) + w.str = append(w.str, b...) +} + +func (w *encbuf) list() *listhead { + lh := &listhead{offset: len(w.str), size: w.lhsize} + w.lheads = append(w.lheads, lh) + return lh +} + +func (w *encbuf) listEnd(lh *listhead) { + lh.size = w.size() - lh.offset - lh.size + if lh.size < 56 { + w.lhsize += 1 // length encoded into kind tag + } else { + w.lhsize += 1 + intsize(uint64(lh.size)) + } +} + +func (w *encbuf) size() int { + return len(w.str) + w.lhsize +} + +func (w *encbuf) toBytes() []byte { + out := make([]byte, w.size()) + strpos := 0 + pos := 0 + for _, head := range w.lheads { + // write string data before header + n := copy(out[pos:], w.str[strpos:head.offset]) + pos += n + strpos += n + // write the header + enc := head.encode(out[pos:]) + pos += len(enc) + } + // copy string data after the last list header + copy(out[pos:], w.str[strpos:]) + return out +} + +func (w *encbuf) toWriter(out io.Writer) (err error) { + strpos := 0 + for _, head := range w.lheads { + // write string data before header + if head.offset-strpos > 0 { + n, err := out.Write(w.str[strpos:head.offset]) + strpos += n + if err != nil { + return err + } + } + // write the header + enc := head.encode(w.sizebuf) + if _, err = out.Write(enc); err != nil { + return err + } + } + if strpos < len(w.str) { + // write string data after the last list header + _, err = out.Write(w.str[strpos:]) + } + return err +} + +// encReader is the io.Reader returned by EncodeToReader. +// It releases its encbuf at EOF. +type encReader struct { + buf *encbuf // the buffer we're reading from. this is nil when we're at EOF. + lhpos int // index of list header that we're reading + strpos int // current position in string buffer + piece []byte // next piece to be read +} + +func (r *encReader) Read(b []byte) (n int, err error) { + for { + if r.piece = r.next(); r.piece == nil { + return n, io.EOF + } + nn := copy(b[n:], r.piece) + n += nn + if nn < len(r.piece) { + // piece didn't fit, see you next time. + r.piece = r.piece[nn:] + return n, nil + } + r.piece = nil + } + panic("not reached") +} + +// next returns the next piece of data to be read. +// it returns nil at EOF. +func (r *encReader) next() []byte { + switch { + case r.piece != nil: + // There is still data available for reading. + return r.piece + + case r.lhpos < len(r.buf.lheads): + // We're before the last list header. + head := r.buf.lheads[r.lhpos] + sizebefore := head.offset - r.strpos + if sizebefore > 0 { + // String data before header. + p := r.buf.str[r.strpos:head.offset] + r.strpos += sizebefore + return p + } else { + r.lhpos++ + return head.encode(r.buf.sizebuf) + } + + case r.strpos < len(r.buf.str): + // String data at the end, after all list headers. + p := r.buf.str[r.strpos:] + r.strpos = len(r.buf.str) + return p + + default: + return nil + } +} + +var ( + encoderInterface = reflect.TypeOf(new(Encoder)).Elem() + emptyInterface = reflect.TypeOf(new(interface{})).Elem() + big0 = big.NewInt(0) +) + +// makeWriter creates a writer function for the given type. +func makeWriter(typ reflect.Type) (writer, error) { + kind := typ.Kind() + switch { + case typ.Implements(encoderInterface): + return writeEncoder, nil + case kind != reflect.Ptr && reflect.PtrTo(typ).Implements(encoderInterface): + return writeEncoderNoPtr, nil + case typ == emptyInterface: + return writeInterface, nil + case typ.AssignableTo(reflect.PtrTo(bigInt)): + return writeBigIntPtr, nil + case typ.AssignableTo(bigInt): + return writeBigIntNoPtr, nil + case isUint(kind): + return writeUint, nil + case kind == reflect.String: + return writeString, nil + case kind == reflect.Slice && typ.Elem().Kind() == reflect.Uint8 && !typ.Elem().Implements(encoderInterface): + return writeBytes, nil + case kind == reflect.Slice || kind == reflect.Array: + return makeSliceWriter(typ) + case kind == reflect.Struct: + return makeStructWriter(typ) + case kind == reflect.Ptr: + return makePtrWriter(typ) + default: + return nil, fmt.Errorf("rlp: type %v is not RLP-serializable", typ) + } +} + +func writeUint(val reflect.Value, w *encbuf) error { + i := val.Uint() + if i == 0 { + w.str = append(w.str, 0x80) + } else if i < 128 { + // fits single byte + w.str = append(w.str, byte(i)) + } else { + // TODO: encode int to w.str directly + s := putint(w.sizebuf[1:], i) + w.sizebuf[0] = 0x80 + byte(s) + w.str = append(w.str, w.sizebuf[:s+1]...) + } + return nil +} + +func writeBigIntPtr(val reflect.Value, w *encbuf) error { + return writeBigInt(val.Interface().(*big.Int), w) +} + +func writeBigIntNoPtr(val reflect.Value, w *encbuf) error { + i := val.Interface().(big.Int) + return writeBigInt(&i, w) +} + +func writeBigInt(i *big.Int, w *encbuf) error { + if cmp := i.Cmp(big0); cmp == -1 { + return fmt.Errorf("rlp: cannot encode negative *big.Int") + } else if cmp == 0 { + w.str = append(w.str, 0x80) + } else if bits := i.BitLen(); bits < 8 { + // fits single byte + w.str = append(w.str, byte(i.Uint64())) + } else { + w.encodeString(i.Bytes()) + } + return nil +} + +func writeBytes(val reflect.Value, w *encbuf) error { + w.encodeString(val.Bytes()) + return nil +} + +func writeString(val reflect.Value, w *encbuf) error { + s := val.String() + w.encodeStringHeader(len(s)) + w.str = append(w.str, s...) + return nil +} + +func writeEncoder(val reflect.Value, w *encbuf) error { + return val.Interface().(Encoder).EncodeRLP(w) +} + +// writeEncoderNoPtr handles non-pointer values that implement Encoder +// with a pointer receiver. +func writeEncoderNoPtr(val reflect.Value, w *encbuf) error { + if !val.CanAddr() { + // We can't get the address. It would be possible make the + // value addressable by creating a shallow copy, but this + // creates other problems so we're not doing it (yet). + // + // package json simply doesn't call MarshalJSON for cases like + // this, but encodes the value as if it didn't implement the + // interface. We don't want to handle it that way. + return fmt.Errorf("rlp: game over: unadressable value of type %v, EncodeRLP is pointer method", val.Type()) + } + return val.Addr().Interface().(Encoder).EncodeRLP(w) +} + +func writeInterface(val reflect.Value, w *encbuf) error { + if val.IsNil() { + // Write empty list. This is consistent with the previous RLP + // encoder that we had and should therefore avoid any + // problems. + w.str = append(w.str, 0xC0) + return nil + } + eval := val.Elem() + ti, err := cachedTypeInfo(eval.Type()) + if err != nil { + return err + } + return ti.writer(eval, w) +} + +func makeSliceWriter(typ reflect.Type) (writer, error) { + etypeinfo, err := cachedTypeInfo1(typ.Elem()) + if err != nil { + return nil, err + } + writer := func(val reflect.Value, w *encbuf) error { + lh := w.list() + vlen := val.Len() + for i := 0; i < vlen; i++ { + if err := etypeinfo.writer(val.Index(i), w); err != nil { + return err + } + } + w.listEnd(lh) + return nil + } + return writer, nil +} + +func makeStructWriter(typ reflect.Type) (writer, error) { + fields, err := structFields(typ) + if err != nil { + return nil, err + } + writer := func(val reflect.Value, w *encbuf) error { + lh := w.list() + for _, f := range fields { + if err := f.info.writer(val.Field(f.index), w); err != nil { + return err + } + } + w.listEnd(lh) + return nil + } + return writer, nil +} + +func makePtrWriter(typ reflect.Type) (writer, error) { + etypeinfo, err := cachedTypeInfo1(typ.Elem()) + if err != nil { + return nil, err + } + zero := reflect.Zero(typ.Elem()) + kind := typ.Elem().Kind() + writer := func(val reflect.Value, w *encbuf) error { + switch { + case !val.IsNil(): + return etypeinfo.writer(val.Elem(), w) + case kind == reflect.Struct: + // encoding the zero value of a struct could trigger + // infinite recursion, avoid that. + w.listEnd(w.list()) + return nil + default: + return etypeinfo.writer(zero, w) + } + } + return writer, err +} + +// putint writes i to the beginning of b in with big endian byte +// order, using the least number of bytes needed to represent i. +func putint(b []byte, i uint64) (size int) { + switch { + case i < (1 << 8): + b[0] = byte(i) + return 1 + case i < (1 << 16): + b[0] = byte(i >> 8) + b[1] = byte(i) + return 2 + case i < (1 << 24): + b[0] = byte(i >> 16) + b[1] = byte(i >> 8) + b[2] = byte(i) + return 3 + case i < (1 << 32): + b[0] = byte(i >> 24) + b[1] = byte(i >> 16) + b[2] = byte(i >> 8) + b[3] = byte(i) + return 4 + case i < (1 << 40): + b[0] = byte(i >> 32) + b[1] = byte(i >> 24) + b[2] = byte(i >> 16) + b[3] = byte(i >> 8) + b[4] = byte(i) + return 5 + case i < (1 << 48): + b[0] = byte(i >> 40) + b[1] = byte(i >> 32) + b[2] = byte(i >> 24) + b[3] = byte(i >> 16) + b[4] = byte(i >> 8) + b[5] = byte(i) + return 6 + case i < (1 << 56): + b[0] = byte(i >> 48) + b[1] = byte(i >> 40) + b[2] = byte(i >> 32) + b[3] = byte(i >> 24) + b[4] = byte(i >> 16) + b[5] = byte(i >> 8) + b[6] = byte(i) + return 7 + default: + b[0] = byte(i >> 56) + b[1] = byte(i >> 48) + b[2] = byte(i >> 40) + b[3] = byte(i >> 32) + b[4] = byte(i >> 24) + b[5] = byte(i >> 16) + b[6] = byte(i >> 8) + b[7] = byte(i) + return 8 + } +} + +// intsize computes the minimum number of bytes required to store i. +func intsize(i uint64) (size int) { + for size = 1; ; size++ { + if i >>= 8; i == 0 { + return size + } + } + panic("not reached") +} |