Merge pull request #1096 from letsencrypt/godeps_gsb

add Godeps for Google Safe Browsing PR
This commit is contained in:
Roland Bracewell Shoemaker 2015-11-06 11:27:11 -08:00
commit a6a669f4a6
61 changed files with 13602 additions and 40 deletions

19
Godeps/Godeps.json generated
View File

@ -1,6 +1,6 @@
{
"ImportPath": "github.com/letsencrypt/boulder",
"GoVersion": "go1.5",
"GoVersion": "go1.5.1",
"Packages": [
"./..."
],
@ -100,6 +100,10 @@
"ImportPath": "github.com/golang/mock/gomock",
"Rev": "06883d979f10cc178f2716846215c8cf90f9e363"
},
{
"ImportPath": "github.com/golang/protobuf/proto",
"Rev": "a1dfa5ef89a13a0aa4be5a6f81179db10bfeea36"
},
{
"ImportPath": "github.com/jmhodges/clock",
"Rev": "3c4ebd218625c9364c33db6d39c276d80c3090c6"
@ -108,6 +112,15 @@
"ImportPath": "github.com/letsencrypt/go-jose",
"Rev": "e7bd87a386998d423741e8e370af1a22638767e0"
},
{
"ImportPath": "github.com/letsencrypt/go-safe-browsing-api",
"Comment": "2.0.0-2-g814cea4",
"Rev": "814cea4d6d3063540dc15c3d93754eff4eaa756b"
},
{
"ImportPath": "github.com/letsencrypt/net/publicsuffix",
"Rev": "adbe5512bf2d8766546da71cf90b681c519cb39f"
},
{
"ImportPath": "github.com/miekg/dns",
"Rev": "7ff8d29c8b70b10f383a11f03b7bf5b7408bf41a"
@ -125,8 +138,8 @@
"Rev": "287a1d87db5d649b01d6193bd9d07e909f08094c"
},
{
"ImportPath": "github.com/letsencrypt/net/publicsuffix",
"Rev": "adbe5512bf2d8766546da71cf90b681c519cb39f"
"ImportPath": "golang.org/x/net/publicsuffix",
"Rev": "ce84af2e5bf21582345e478b116afc7d4efaba3d"
},
{
"ImportPath": "gopkg.in/gorp.v1",

View File

@ -0,0 +1,31 @@
Go support for Protocol Buffers - Google's data interchange format
Copyright 2010 The Go Authors. All rights reserved.
https://github.com/golang/protobuf
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,43 @@
# Go support for Protocol Buffers - Google's data interchange format
#
# Copyright 2010 The Go Authors. All rights reserved.
# https://github.com/golang/protobuf
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
install:
go install
test: install generate-test-pbs
go test
generate-test-pbs:
make install
make -C testdata
protoc --go_out=Mtestdata/test.proto=github.com/golang/protobuf/proto/testdata:. proto3_proto/proto3.proto
make

View File

@ -0,0 +1,223 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2011 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Protocol buffer deep copy and merge.
// TODO: MessageSet and RawMessage.
package proto
import (
"log"
"reflect"
"strings"
)
// Clone returns a deep copy of a protocol buffer.
func Clone(pb Message) Message {
in := reflect.ValueOf(pb)
if in.IsNil() {
return pb
}
out := reflect.New(in.Type().Elem())
// out is empty so a merge is a deep copy.
mergeStruct(out.Elem(), in.Elem())
return out.Interface().(Message)
}
// Merge merges src into dst.
// Required and optional fields that are set in src will be set to that value in dst.
// Elements of repeated fields will be appended.
// Merge panics if src and dst are not the same type, or if dst is nil.
func Merge(dst, src Message) {
in := reflect.ValueOf(src)
out := reflect.ValueOf(dst)
if out.IsNil() {
panic("proto: nil destination")
}
if in.Type() != out.Type() {
// Explicit test prior to mergeStruct so that mistyped nils will fail
panic("proto: type mismatch")
}
if in.IsNil() {
// Merging nil into non-nil is a quiet no-op
return
}
mergeStruct(out.Elem(), in.Elem())
}
func mergeStruct(out, in reflect.Value) {
sprop := GetProperties(in.Type())
for i := 0; i < in.NumField(); i++ {
f := in.Type().Field(i)
if strings.HasPrefix(f.Name, "XXX_") {
continue
}
mergeAny(out.Field(i), in.Field(i), false, sprop.Prop[i])
}
if emIn, ok := in.Addr().Interface().(extendableProto); ok {
emOut := out.Addr().Interface().(extendableProto)
mergeExtension(emOut.ExtensionMap(), emIn.ExtensionMap())
}
uf := in.FieldByName("XXX_unrecognized")
if !uf.IsValid() {
return
}
uin := uf.Bytes()
if len(uin) > 0 {
out.FieldByName("XXX_unrecognized").SetBytes(append([]byte(nil), uin...))
}
}
// mergeAny performs a merge between two values of the same type.
// viaPtr indicates whether the values were indirected through a pointer (implying proto2).
// prop is set if this is a struct field (it may be nil).
func mergeAny(out, in reflect.Value, viaPtr bool, prop *Properties) {
if in.Type() == protoMessageType {
if !in.IsNil() {
if out.IsNil() {
out.Set(reflect.ValueOf(Clone(in.Interface().(Message))))
} else {
Merge(out.Interface().(Message), in.Interface().(Message))
}
}
return
}
switch in.Kind() {
case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int32, reflect.Int64,
reflect.String, reflect.Uint32, reflect.Uint64:
if !viaPtr && isProto3Zero(in) {
return
}
out.Set(in)
case reflect.Interface:
// Probably a oneof field; copy non-nil values.
if in.IsNil() {
return
}
// Allocate destination if it is not set, or set to a different type.
// Otherwise we will merge as normal.
if out.IsNil() || out.Elem().Type() != in.Elem().Type() {
out.Set(reflect.New(in.Elem().Elem().Type())) // interface -> *T -> T -> new(T)
}
mergeAny(out.Elem(), in.Elem(), false, nil)
case reflect.Map:
if in.Len() == 0 {
return
}
if out.IsNil() {
out.Set(reflect.MakeMap(in.Type()))
}
// For maps with value types of *T or []byte we need to deep copy each value.
elemKind := in.Type().Elem().Kind()
for _, key := range in.MapKeys() {
var val reflect.Value
switch elemKind {
case reflect.Ptr:
val = reflect.New(in.Type().Elem().Elem())
mergeAny(val, in.MapIndex(key), false, nil)
case reflect.Slice:
val = in.MapIndex(key)
val = reflect.ValueOf(append([]byte{}, val.Bytes()...))
default:
val = in.MapIndex(key)
}
out.SetMapIndex(key, val)
}
case reflect.Ptr:
if in.IsNil() {
return
}
if out.IsNil() {
out.Set(reflect.New(in.Elem().Type()))
}
mergeAny(out.Elem(), in.Elem(), true, nil)
case reflect.Slice:
if in.IsNil() {
return
}
if in.Type().Elem().Kind() == reflect.Uint8 {
// []byte is a scalar bytes field, not a repeated field.
// Edge case: if this is in a proto3 message, a zero length
// bytes field is considered the zero value, and should not
// be merged.
if prop != nil && prop.proto3 && in.Len() == 0 {
return
}
// Make a deep copy.
// Append to []byte{} instead of []byte(nil) so that we never end up
// with a nil result.
out.SetBytes(append([]byte{}, in.Bytes()...))
return
}
n := in.Len()
if out.IsNil() {
out.Set(reflect.MakeSlice(in.Type(), 0, n))
}
switch in.Type().Elem().Kind() {
case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int32, reflect.Int64,
reflect.String, reflect.Uint32, reflect.Uint64:
out.Set(reflect.AppendSlice(out, in))
default:
for i := 0; i < n; i++ {
x := reflect.Indirect(reflect.New(in.Type().Elem()))
mergeAny(x, in.Index(i), false, nil)
out.Set(reflect.Append(out, x))
}
}
case reflect.Struct:
mergeStruct(out, in)
default:
// unknown type, so not a protocol buffer
log.Printf("proto: don't know how to copy %v", in)
}
}
func mergeExtension(out, in map[int32]Extension) {
for extNum, eIn := range in {
eOut := Extension{desc: eIn.desc}
if eIn.value != nil {
v := reflect.New(reflect.TypeOf(eIn.value)).Elem()
mergeAny(v, reflect.ValueOf(eIn.value), false, nil)
eOut.value = v.Interface()
}
if eIn.enc != nil {
eOut.enc = make([]byte, len(eIn.enc))
copy(eOut.enc, eIn.enc)
}
out[extNum] = eOut
}
}

View File

@ -0,0 +1,867 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2010 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package proto
/*
* Routines for decoding protocol buffer data to construct in-memory representations.
*/
import (
"errors"
"fmt"
"io"
"os"
"reflect"
)
// errOverflow is returned when an integer is too large to be represented.
var errOverflow = errors.New("proto: integer overflow")
// ErrInternalBadWireType is returned by generated code when an incorrect
// wire type is encountered. It does not get returned to user code.
var ErrInternalBadWireType = errors.New("proto: internal error: bad wiretype for oneof")
// The fundamental decoders that interpret bytes on the wire.
// Those that take integer types all return uint64 and are
// therefore of type valueDecoder.
// DecodeVarint reads a varint-encoded integer from the slice.
// It returns the integer and the number of bytes consumed, or
// zero if there is not enough.
// This is the format for the
// int32, int64, uint32, uint64, bool, and enum
// protocol buffer types.
func DecodeVarint(buf []byte) (x uint64, n int) {
// x, n already 0
for shift := uint(0); shift < 64; shift += 7 {
if n >= len(buf) {
return 0, 0
}
b := uint64(buf[n])
n++
x |= (b & 0x7F) << shift
if (b & 0x80) == 0 {
return x, n
}
}
// The number is too large to represent in a 64-bit value.
return 0, 0
}
// DecodeVarint reads a varint-encoded integer from the Buffer.
// This is the format for the
// int32, int64, uint32, uint64, bool, and enum
// protocol buffer types.
func (p *Buffer) DecodeVarint() (x uint64, err error) {
// x, err already 0
i := p.index
l := len(p.buf)
for shift := uint(0); shift < 64; shift += 7 {
if i >= l {
err = io.ErrUnexpectedEOF
return
}
b := p.buf[i]
i++
x |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
p.index = i
return
}
}
// The number is too large to represent in a 64-bit value.
err = errOverflow
return
}
// DecodeFixed64 reads a 64-bit integer from the Buffer.
// This is the format for the
// fixed64, sfixed64, and double protocol buffer types.
func (p *Buffer) DecodeFixed64() (x uint64, err error) {
// x, err already 0
i := p.index + 8
if i < 0 || i > len(p.buf) {
err = io.ErrUnexpectedEOF
return
}
p.index = i
x = uint64(p.buf[i-8])
x |= uint64(p.buf[i-7]) << 8
x |= uint64(p.buf[i-6]) << 16
x |= uint64(p.buf[i-5]) << 24
x |= uint64(p.buf[i-4]) << 32
x |= uint64(p.buf[i-3]) << 40
x |= uint64(p.buf[i-2]) << 48
x |= uint64(p.buf[i-1]) << 56
return
}
// DecodeFixed32 reads a 32-bit integer from the Buffer.
// This is the format for the
// fixed32, sfixed32, and float protocol buffer types.
func (p *Buffer) DecodeFixed32() (x uint64, err error) {
// x, err already 0
i := p.index + 4
if i < 0 || i > len(p.buf) {
err = io.ErrUnexpectedEOF
return
}
p.index = i
x = uint64(p.buf[i-4])
x |= uint64(p.buf[i-3]) << 8
x |= uint64(p.buf[i-2]) << 16
x |= uint64(p.buf[i-1]) << 24
return
}
// DecodeZigzag64 reads a zigzag-encoded 64-bit integer
// from the Buffer.
// This is the format used for the sint64 protocol buffer type.
func (p *Buffer) DecodeZigzag64() (x uint64, err error) {
x, err = p.DecodeVarint()
if err != nil {
return
}
x = (x >> 1) ^ uint64((int64(x&1)<<63)>>63)
return
}
// DecodeZigzag32 reads a zigzag-encoded 32-bit integer
// from the Buffer.
// This is the format used for the sint32 protocol buffer type.
func (p *Buffer) DecodeZigzag32() (x uint64, err error) {
x, err = p.DecodeVarint()
if err != nil {
return
}
x = uint64((uint32(x) >> 1) ^ uint32((int32(x&1)<<31)>>31))
return
}
// These are not ValueDecoders: they produce an array of bytes or a string.
// bytes, embedded messages
// DecodeRawBytes reads a count-delimited byte buffer from the Buffer.
// This is the format used for the bytes protocol buffer
// type and for embedded messages.
func (p *Buffer) DecodeRawBytes(alloc bool) (buf []byte, err error) {
n, err := p.DecodeVarint()
if err != nil {
return nil, err
}
nb := int(n)
if nb < 0 {
return nil, fmt.Errorf("proto: bad byte length %d", nb)
}
end := p.index + nb
if end < p.index || end > len(p.buf) {
return nil, io.ErrUnexpectedEOF
}
if !alloc {
// todo: check if can get more uses of alloc=false
buf = p.buf[p.index:end]
p.index += nb
return
}
buf = make([]byte, nb)
copy(buf, p.buf[p.index:])
p.index += nb
return
}
// DecodeStringBytes reads an encoded string from the Buffer.
// This is the format used for the proto2 string type.
func (p *Buffer) DecodeStringBytes() (s string, err error) {
buf, err := p.DecodeRawBytes(false)
if err != nil {
return
}
return string(buf), nil
}
// Skip the next item in the buffer. Its wire type is decoded and presented as an argument.
// If the protocol buffer has extensions, and the field matches, add it as an extension.
// Otherwise, if the XXX_unrecognized field exists, append the skipped data there.
func (o *Buffer) skipAndSave(t reflect.Type, tag, wire int, base structPointer, unrecField field) error {
oi := o.index
err := o.skip(t, tag, wire)
if err != nil {
return err
}
if !unrecField.IsValid() {
return nil
}
ptr := structPointer_Bytes(base, unrecField)
// Add the skipped field to struct field
obuf := o.buf
o.buf = *ptr
o.EncodeVarint(uint64(tag<<3 | wire))
*ptr = append(o.buf, obuf[oi:o.index]...)
o.buf = obuf
return nil
}
// Skip the next item in the buffer. Its wire type is decoded and presented as an argument.
func (o *Buffer) skip(t reflect.Type, tag, wire int) error {
var u uint64
var err error
switch wire {
case WireVarint:
_, err = o.DecodeVarint()
case WireFixed64:
_, err = o.DecodeFixed64()
case WireBytes:
_, err = o.DecodeRawBytes(false)
case WireFixed32:
_, err = o.DecodeFixed32()
case WireStartGroup:
for {
u, err = o.DecodeVarint()
if err != nil {
break
}
fwire := int(u & 0x7)
if fwire == WireEndGroup {
break
}
ftag := int(u >> 3)
err = o.skip(t, ftag, fwire)
if err != nil {
break
}
}
default:
err = fmt.Errorf("proto: can't skip unknown wire type %d for %s", wire, t)
}
return err
}
// Unmarshaler is the interface representing objects that can
// unmarshal themselves. The method should reset the receiver before
// decoding starts. The argument points to data that may be
// overwritten, so implementations should not keep references to the
// buffer.
type Unmarshaler interface {
Unmarshal([]byte) error
}
// Unmarshal parses the protocol buffer representation in buf and places the
// decoded result in pb. If the struct underlying pb does not match
// the data in buf, the results can be unpredictable.
//
// Unmarshal resets pb before starting to unmarshal, so any
// existing data in pb is always removed. Use UnmarshalMerge
// to preserve and append to existing data.
func Unmarshal(buf []byte, pb Message) error {
pb.Reset()
return UnmarshalMerge(buf, pb)
}
// UnmarshalMerge parses the protocol buffer representation in buf and
// writes the decoded result to pb. If the struct underlying pb does not match
// the data in buf, the results can be unpredictable.
//
// UnmarshalMerge merges into existing data in pb.
// Most code should use Unmarshal instead.
func UnmarshalMerge(buf []byte, pb Message) error {
// If the object can unmarshal itself, let it.
if u, ok := pb.(Unmarshaler); ok {
return u.Unmarshal(buf)
}
return NewBuffer(buf).Unmarshal(pb)
}
// DecodeMessage reads a count-delimited message from the Buffer.
func (p *Buffer) DecodeMessage(pb Message) error {
enc, err := p.DecodeRawBytes(false)
if err != nil {
return err
}
return NewBuffer(enc).Unmarshal(pb)
}
// DecodeGroup reads a tag-delimited group from the Buffer.
func (p *Buffer) DecodeGroup(pb Message) error {
typ, base, err := getbase(pb)
if err != nil {
return err
}
return p.unmarshalType(typ.Elem(), GetProperties(typ.Elem()), true, base)
}
// Unmarshal parses the protocol buffer representation in the
// Buffer and places the decoded result in pb. If the struct
// underlying pb does not match the data in the buffer, the results can be
// unpredictable.
func (p *Buffer) Unmarshal(pb Message) error {
// If the object can unmarshal itself, let it.
if u, ok := pb.(Unmarshaler); ok {
err := u.Unmarshal(p.buf[p.index:])
p.index = len(p.buf)
return err
}
typ, base, err := getbase(pb)
if err != nil {
return err
}
err = p.unmarshalType(typ.Elem(), GetProperties(typ.Elem()), false, base)
if collectStats {
stats.Decode++
}
return err
}
// unmarshalType does the work of unmarshaling a structure.
func (o *Buffer) unmarshalType(st reflect.Type, prop *StructProperties, is_group bool, base structPointer) error {
var state errorState
required, reqFields := prop.reqCount, uint64(0)
var err error
for err == nil && o.index < len(o.buf) {
oi := o.index
var u uint64
u, err = o.DecodeVarint()
if err != nil {
break
}
wire := int(u & 0x7)
if wire == WireEndGroup {
if is_group {
return nil // input is satisfied
}
return fmt.Errorf("proto: %s: wiretype end group for non-group", st)
}
tag := int(u >> 3)
if tag <= 0 {
return fmt.Errorf("proto: %s: illegal tag %d (wire type %d)", st, tag, wire)
}
fieldnum, ok := prop.decoderTags.get(tag)
if !ok {
// Maybe it's an extension?
if prop.extendable {
if e := structPointer_Interface(base, st).(extendableProto); isExtensionField(e, int32(tag)) {
if err = o.skip(st, tag, wire); err == nil {
ext := e.ExtensionMap()[int32(tag)] // may be missing
ext.enc = append(ext.enc, o.buf[oi:o.index]...)
e.ExtensionMap()[int32(tag)] = ext
}
continue
}
}
// Maybe it's a oneof?
if prop.oneofUnmarshaler != nil {
m := structPointer_Interface(base, st).(Message)
// First return value indicates whether tag is a oneof field.
ok, err = prop.oneofUnmarshaler(m, tag, wire, o)
if err == ErrInternalBadWireType {
// Map the error to something more descriptive.
// Do the formatting here to save generated code space.
err = fmt.Errorf("bad wiretype for oneof field in %T", m)
}
if ok {
continue
}
}
err = o.skipAndSave(st, tag, wire, base, prop.unrecField)
continue
}
p := prop.Prop[fieldnum]
if p.dec == nil {
fmt.Fprintf(os.Stderr, "proto: no protobuf decoder for %s.%s\n", st, st.Field(fieldnum).Name)
continue
}
dec := p.dec
if wire != WireStartGroup && wire != p.WireType {
if wire == WireBytes && p.packedDec != nil {
// a packable field
dec = p.packedDec
} else {
err = fmt.Errorf("proto: bad wiretype for field %s.%s: got wiretype %d, want %d", st, st.Field(fieldnum).Name, wire, p.WireType)
continue
}
}
decErr := dec(o, p, base)
if decErr != nil && !state.shouldContinue(decErr, p) {
err = decErr
}
if err == nil && p.Required {
// Successfully decoded a required field.
if tag <= 64 {
// use bitmap for fields 1-64 to catch field reuse.
var mask uint64 = 1 << uint64(tag-1)
if reqFields&mask == 0 {
// new required field
reqFields |= mask
required--
}
} else {
// This is imprecise. It can be fooled by a required field
// with a tag > 64 that is encoded twice; that's very rare.
// A fully correct implementation would require allocating
// a data structure, which we would like to avoid.
required--
}
}
}
if err == nil {
if is_group {
return io.ErrUnexpectedEOF
}
if state.err != nil {
return state.err
}
if required > 0 {
// Not enough information to determine the exact field. If we use extra
// CPU, we could determine the field only if the missing required field
// has a tag <= 64 and we check reqFields.
return &RequiredNotSetError{"{Unknown}"}
}
}
return err
}
// Individual type decoders
// For each,
// u is the decoded value,
// v is a pointer to the field (pointer) in the struct
// Sizes of the pools to allocate inside the Buffer.
// The goal is modest amortization and allocation
// on at least 16-byte boundaries.
const (
boolPoolSize = 16
uint32PoolSize = 8
uint64PoolSize = 4
)
// Decode a bool.
func (o *Buffer) dec_bool(p *Properties, base structPointer) error {
u, err := p.valDec(o)
if err != nil {
return err
}
if len(o.bools) == 0 {
o.bools = make([]bool, boolPoolSize)
}
o.bools[0] = u != 0
*structPointer_Bool(base, p.field) = &o.bools[0]
o.bools = o.bools[1:]
return nil
}
func (o *Buffer) dec_proto3_bool(p *Properties, base structPointer) error {
u, err := p.valDec(o)
if err != nil {
return err
}
*structPointer_BoolVal(base, p.field) = u != 0
return nil
}
// Decode an int32.
func (o *Buffer) dec_int32(p *Properties, base structPointer) error {
u, err := p.valDec(o)
if err != nil {
return err
}
word32_Set(structPointer_Word32(base, p.field), o, uint32(u))
return nil
}
func (o *Buffer) dec_proto3_int32(p *Properties, base structPointer) error {
u, err := p.valDec(o)
if err != nil {
return err
}
word32Val_Set(structPointer_Word32Val(base, p.field), uint32(u))
return nil
}
// Decode an int64.
func (o *Buffer) dec_int64(p *Properties, base structPointer) error {
u, err := p.valDec(o)
if err != nil {
return err
}
word64_Set(structPointer_Word64(base, p.field), o, u)
return nil
}
func (o *Buffer) dec_proto3_int64(p *Properties, base structPointer) error {
u, err := p.valDec(o)
if err != nil {
return err
}
word64Val_Set(structPointer_Word64Val(base, p.field), o, u)
return nil
}
// Decode a string.
func (o *Buffer) dec_string(p *Properties, base structPointer) error {
s, err := o.DecodeStringBytes()
if err != nil {
return err
}
*structPointer_String(base, p.field) = &s
return nil
}
func (o *Buffer) dec_proto3_string(p *Properties, base structPointer) error {
s, err := o.DecodeStringBytes()
if err != nil {
return err
}
*structPointer_StringVal(base, p.field) = s
return nil
}
// Decode a slice of bytes ([]byte).
func (o *Buffer) dec_slice_byte(p *Properties, base structPointer) error {
b, err := o.DecodeRawBytes(true)
if err != nil {
return err
}
*structPointer_Bytes(base, p.field) = b
return nil
}
// Decode a slice of bools ([]bool).
func (o *Buffer) dec_slice_bool(p *Properties, base structPointer) error {
u, err := p.valDec(o)
if err != nil {
return err
}
v := structPointer_BoolSlice(base, p.field)
*v = append(*v, u != 0)
return nil
}
// Decode a slice of bools ([]bool) in packed format.
func (o *Buffer) dec_slice_packed_bool(p *Properties, base structPointer) error {
v := structPointer_BoolSlice(base, p.field)
nn, err := o.DecodeVarint()
if err != nil {
return err
}
nb := int(nn) // number of bytes of encoded bools
fin := o.index + nb
if fin < o.index {
return errOverflow
}
y := *v
for o.index < fin {
u, err := p.valDec(o)
if err != nil {
return err
}
y = append(y, u != 0)
}
*v = y
return nil
}
// Decode a slice of int32s ([]int32).
func (o *Buffer) dec_slice_int32(p *Properties, base structPointer) error {
u, err := p.valDec(o)
if err != nil {
return err
}
structPointer_Word32Slice(base, p.field).Append(uint32(u))
return nil
}
// Decode a slice of int32s ([]int32) in packed format.
func (o *Buffer) dec_slice_packed_int32(p *Properties, base structPointer) error {
v := structPointer_Word32Slice(base, p.field)
nn, err := o.DecodeVarint()
if err != nil {
return err
}
nb := int(nn) // number of bytes of encoded int32s
fin := o.index + nb
if fin < o.index {
return errOverflow
}
for o.index < fin {
u, err := p.valDec(o)
if err != nil {
return err
}
v.Append(uint32(u))
}
return nil
}
// Decode a slice of int64s ([]int64).
func (o *Buffer) dec_slice_int64(p *Properties, base structPointer) error {
u, err := p.valDec(o)
if err != nil {
return err
}
structPointer_Word64Slice(base, p.field).Append(u)
return nil
}
// Decode a slice of int64s ([]int64) in packed format.
func (o *Buffer) dec_slice_packed_int64(p *Properties, base structPointer) error {
v := structPointer_Word64Slice(base, p.field)
nn, err := o.DecodeVarint()
if err != nil {
return err
}
nb := int(nn) // number of bytes of encoded int64s
fin := o.index + nb
if fin < o.index {
return errOverflow
}
for o.index < fin {
u, err := p.valDec(o)
if err != nil {
return err
}
v.Append(u)
}
return nil
}
// Decode a slice of strings ([]string).
func (o *Buffer) dec_slice_string(p *Properties, base structPointer) error {
s, err := o.DecodeStringBytes()
if err != nil {
return err
}
v := structPointer_StringSlice(base, p.field)
*v = append(*v, s)
return nil
}
// Decode a slice of slice of bytes ([][]byte).
func (o *Buffer) dec_slice_slice_byte(p *Properties, base structPointer) error {
b, err := o.DecodeRawBytes(true)
if err != nil {
return err
}
v := structPointer_BytesSlice(base, p.field)
*v = append(*v, b)
return nil
}
// Decode a map field.
func (o *Buffer) dec_new_map(p *Properties, base structPointer) error {
raw, err := o.DecodeRawBytes(false)
if err != nil {
return err
}
oi := o.index // index at the end of this map entry
o.index -= len(raw) // move buffer back to start of map entry
mptr := structPointer_NewAt(base, p.field, p.mtype) // *map[K]V
if mptr.Elem().IsNil() {
mptr.Elem().Set(reflect.MakeMap(mptr.Type().Elem()))
}
v := mptr.Elem() // map[K]V
// Prepare addressable doubly-indirect placeholders for the key and value types.
// See enc_new_map for why.
keyptr := reflect.New(reflect.PtrTo(p.mtype.Key())).Elem() // addressable *K
keybase := toStructPointer(keyptr.Addr()) // **K
var valbase structPointer
var valptr reflect.Value
switch p.mtype.Elem().Kind() {
case reflect.Slice:
// []byte
var dummy []byte
valptr = reflect.ValueOf(&dummy) // *[]byte
valbase = toStructPointer(valptr) // *[]byte
case reflect.Ptr:
// message; valptr is **Msg; need to allocate the intermediate pointer
valptr = reflect.New(reflect.PtrTo(p.mtype.Elem())).Elem() // addressable *V
valptr.Set(reflect.New(valptr.Type().Elem()))
valbase = toStructPointer(valptr)
default:
// everything else
valptr = reflect.New(reflect.PtrTo(p.mtype.Elem())).Elem() // addressable *V
valbase = toStructPointer(valptr.Addr()) // **V
}
// Decode.
// This parses a restricted wire format, namely the encoding of a message
// with two fields. See enc_new_map for the format.
for o.index < oi {
// tagcode for key and value properties are always a single byte
// because they have tags 1 and 2.
tagcode := o.buf[o.index]
o.index++
switch tagcode {
case p.mkeyprop.tagcode[0]:
if err := p.mkeyprop.dec(o, p.mkeyprop, keybase); err != nil {
return err
}
case p.mvalprop.tagcode[0]:
if err := p.mvalprop.dec(o, p.mvalprop, valbase); err != nil {
return err
}
default:
// TODO: Should we silently skip this instead?
return fmt.Errorf("proto: bad map data tag %d", raw[0])
}
}
keyelem, valelem := keyptr.Elem(), valptr.Elem()
if !keyelem.IsValid() || !valelem.IsValid() {
// We did not decode the key or the value in the map entry.
// Either way, it's an invalid map entry.
return fmt.Errorf("proto: bad map data: missing key/val")
}
v.SetMapIndex(keyelem, valelem)
return nil
}
// Decode a group.
func (o *Buffer) dec_struct_group(p *Properties, base structPointer) error {
bas := structPointer_GetStructPointer(base, p.field)
if structPointer_IsNil(bas) {
// allocate new nested message
bas = toStructPointer(reflect.New(p.stype))
structPointer_SetStructPointer(base, p.field, bas)
}
return o.unmarshalType(p.stype, p.sprop, true, bas)
}
// Decode an embedded message.
func (o *Buffer) dec_struct_message(p *Properties, base structPointer) (err error) {
raw, e := o.DecodeRawBytes(false)
if e != nil {
return e
}
bas := structPointer_GetStructPointer(base, p.field)
if structPointer_IsNil(bas) {
// allocate new nested message
bas = toStructPointer(reflect.New(p.stype))
structPointer_SetStructPointer(base, p.field, bas)
}
// If the object can unmarshal itself, let it.
if p.isUnmarshaler {
iv := structPointer_Interface(bas, p.stype)
return iv.(Unmarshaler).Unmarshal(raw)
}
obuf := o.buf
oi := o.index
o.buf = raw
o.index = 0
err = o.unmarshalType(p.stype, p.sprop, false, bas)
o.buf = obuf
o.index = oi
return err
}
// Decode a slice of embedded messages.
func (o *Buffer) dec_slice_struct_message(p *Properties, base structPointer) error {
return o.dec_slice_struct(p, false, base)
}
// Decode a slice of embedded groups.
func (o *Buffer) dec_slice_struct_group(p *Properties, base structPointer) error {
return o.dec_slice_struct(p, true, base)
}
// Decode a slice of structs ([]*struct).
func (o *Buffer) dec_slice_struct(p *Properties, is_group bool, base structPointer) error {
v := reflect.New(p.stype)
bas := toStructPointer(v)
structPointer_StructPointerSlice(base, p.field).Append(bas)
if is_group {
err := o.unmarshalType(p.stype, p.sprop, is_group, bas)
return err
}
raw, err := o.DecodeRawBytes(false)
if err != nil {
return err
}
// If the object can unmarshal itself, let it.
if p.isUnmarshaler {
iv := v.Interface()
return iv.(Unmarshaler).Unmarshal(raw)
}
obuf := o.buf
oi := o.index
o.buf = raw
o.index = 0
err = o.unmarshalType(p.stype, p.sprop, is_group, bas)
o.buf = obuf
o.index = oi
return err
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,267 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2011 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Protocol buffer comparison.
// TODO: MessageSet.
package proto
import (
"bytes"
"log"
"reflect"
"strings"
)
/*
Equal returns true iff protocol buffers a and b are equal.
The arguments must both be pointers to protocol buffer structs.
Equality is defined in this way:
- Two messages are equal iff they are the same type,
corresponding fields are equal, unknown field sets
are equal, and extensions sets are equal.
- Two set scalar fields are equal iff their values are equal.
If the fields are of a floating-point type, remember that
NaN != x for all x, including NaN.
- Two repeated fields are equal iff their lengths are the same,
and their corresponding elements are equal (a "bytes" field,
although represented by []byte, is not a repeated field)
- Two unset fields are equal.
- Two unknown field sets are equal if their current
encoded state is equal.
- Two extension sets are equal iff they have corresponding
elements that are pairwise equal.
- Every other combination of things are not equal.
The return value is undefined if a and b are not protocol buffers.
*/
func Equal(a, b Message) bool {
if a == nil || b == nil {
return a == b
}
v1, v2 := reflect.ValueOf(a), reflect.ValueOf(b)
if v1.Type() != v2.Type() {
return false
}
if v1.Kind() == reflect.Ptr {
if v1.IsNil() {
return v2.IsNil()
}
if v2.IsNil() {
return false
}
v1, v2 = v1.Elem(), v2.Elem()
}
if v1.Kind() != reflect.Struct {
return false
}
return equalStruct(v1, v2)
}
// v1 and v2 are known to have the same type.
func equalStruct(v1, v2 reflect.Value) bool {
for i := 0; i < v1.NumField(); i++ {
f := v1.Type().Field(i)
if strings.HasPrefix(f.Name, "XXX_") {
continue
}
f1, f2 := v1.Field(i), v2.Field(i)
if f.Type.Kind() == reflect.Ptr {
if n1, n2 := f1.IsNil(), f2.IsNil(); n1 && n2 {
// both unset
continue
} else if n1 != n2 {
// set/unset mismatch
return false
}
b1, ok := f1.Interface().(raw)
if ok {
b2 := f2.Interface().(raw)
// RawMessage
if !bytes.Equal(b1.Bytes(), b2.Bytes()) {
return false
}
continue
}
f1, f2 = f1.Elem(), f2.Elem()
}
if !equalAny(f1, f2) {
return false
}
}
if em1 := v1.FieldByName("XXX_extensions"); em1.IsValid() {
em2 := v2.FieldByName("XXX_extensions")
if !equalExtensions(v1.Type(), em1.Interface().(map[int32]Extension), em2.Interface().(map[int32]Extension)) {
return false
}
}
uf := v1.FieldByName("XXX_unrecognized")
if !uf.IsValid() {
return true
}
u1 := uf.Bytes()
u2 := v2.FieldByName("XXX_unrecognized").Bytes()
if !bytes.Equal(u1, u2) {
return false
}
return true
}
// v1 and v2 are known to have the same type.
func equalAny(v1, v2 reflect.Value) bool {
if v1.Type() == protoMessageType {
m1, _ := v1.Interface().(Message)
m2, _ := v2.Interface().(Message)
return Equal(m1, m2)
}
switch v1.Kind() {
case reflect.Bool:
return v1.Bool() == v2.Bool()
case reflect.Float32, reflect.Float64:
return v1.Float() == v2.Float()
case reflect.Int32, reflect.Int64:
return v1.Int() == v2.Int()
case reflect.Interface:
// Probably a oneof field; compare the inner values.
n1, n2 := v1.IsNil(), v2.IsNil()
if n1 || n2 {
return n1 == n2
}
e1, e2 := v1.Elem(), v2.Elem()
if e1.Type() != e2.Type() {
return false
}
return equalAny(e1, e2)
case reflect.Map:
if v1.Len() != v2.Len() {
return false
}
for _, key := range v1.MapKeys() {
val2 := v2.MapIndex(key)
if !val2.IsValid() {
// This key was not found in the second map.
return false
}
if !equalAny(v1.MapIndex(key), val2) {
return false
}
}
return true
case reflect.Ptr:
return equalAny(v1.Elem(), v2.Elem())
case reflect.Slice:
if v1.Type().Elem().Kind() == reflect.Uint8 {
// short circuit: []byte
if v1.IsNil() != v2.IsNil() {
return false
}
return bytes.Equal(v1.Interface().([]byte), v2.Interface().([]byte))
}
if v1.Len() != v2.Len() {
return false
}
for i := 0; i < v1.Len(); i++ {
if !equalAny(v1.Index(i), v2.Index(i)) {
return false
}
}
return true
case reflect.String:
return v1.Interface().(string) == v2.Interface().(string)
case reflect.Struct:
return equalStruct(v1, v2)
case reflect.Uint32, reflect.Uint64:
return v1.Uint() == v2.Uint()
}
// unknown type, so not a protocol buffer
log.Printf("proto: don't know how to compare %v", v1)
return false
}
// base is the struct type that the extensions are based on.
// em1 and em2 are extension maps.
func equalExtensions(base reflect.Type, em1, em2 map[int32]Extension) bool {
if len(em1) != len(em2) {
return false
}
for extNum, e1 := range em1 {
e2, ok := em2[extNum]
if !ok {
return false
}
m1, m2 := e1.value, e2.value
if m1 != nil && m2 != nil {
// Both are unencoded.
if !equalAny(reflect.ValueOf(m1), reflect.ValueOf(m2)) {
return false
}
continue
}
// At least one is encoded. To do a semantically correct comparison
// we need to unmarshal them first.
var desc *ExtensionDesc
if m := extensionMaps[base]; m != nil {
desc = m[extNum]
}
if desc == nil {
log.Printf("proto: don't know how to compare extension %d of %v", extNum, base)
continue
}
var err error
if m1 == nil {
m1, err = decodeExtension(e1.enc, desc)
}
if m2 == nil && err == nil {
m2, err = decodeExtension(e2.enc, desc)
}
if err != nil {
// The encoded form is invalid.
log.Printf("proto: badly encoded extension %d of %v: %v", extNum, base, err)
return false
}
if !equalAny(reflect.ValueOf(m1), reflect.ValueOf(m2)) {
return false
}
}
return true
}

View File

@ -0,0 +1,400 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2010 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package proto
/*
* Types and routines for supporting protocol buffer extensions.
*/
import (
"errors"
"fmt"
"reflect"
"strconv"
"sync"
)
// ErrMissingExtension is the error returned by GetExtension if the named extension is not in the message.
var ErrMissingExtension = errors.New("proto: missing extension")
// ExtensionRange represents a range of message extensions for a protocol buffer.
// Used in code generated by the protocol compiler.
type ExtensionRange struct {
Start, End int32 // both inclusive
}
// extendableProto is an interface implemented by any protocol buffer that may be extended.
type extendableProto interface {
Message
ExtensionRangeArray() []ExtensionRange
ExtensionMap() map[int32]Extension
}
var extendableProtoType = reflect.TypeOf((*extendableProto)(nil)).Elem()
// ExtensionDesc represents an extension specification.
// Used in generated code from the protocol compiler.
type ExtensionDesc struct {
ExtendedType Message // nil pointer to the type that is being extended
ExtensionType interface{} // nil pointer to the extension type
Field int32 // field number
Name string // fully-qualified name of extension, for text formatting
Tag string // protobuf tag style
}
func (ed *ExtensionDesc) repeated() bool {
t := reflect.TypeOf(ed.ExtensionType)
return t.Kind() == reflect.Slice && t.Elem().Kind() != reflect.Uint8
}
// Extension represents an extension in a message.
type Extension struct {
// When an extension is stored in a message using SetExtension
// only desc and value are set. When the message is marshaled
// enc will be set to the encoded form of the message.
//
// When a message is unmarshaled and contains extensions, each
// extension will have only enc set. When such an extension is
// accessed using GetExtension (or GetExtensions) desc and value
// will be set.
desc *ExtensionDesc
value interface{}
enc []byte
}
// SetRawExtension is for testing only.
func SetRawExtension(base extendableProto, id int32, b []byte) {
base.ExtensionMap()[id] = Extension{enc: b}
}
// isExtensionField returns true iff the given field number is in an extension range.
func isExtensionField(pb extendableProto, field int32) bool {
for _, er := range pb.ExtensionRangeArray() {
if er.Start <= field && field <= er.End {
return true
}
}
return false
}
// checkExtensionTypes checks that the given extension is valid for pb.
func checkExtensionTypes(pb extendableProto, extension *ExtensionDesc) error {
// Check the extended type.
if a, b := reflect.TypeOf(pb), reflect.TypeOf(extension.ExtendedType); a != b {
return errors.New("proto: bad extended type; " + b.String() + " does not extend " + a.String())
}
// Check the range.
if !isExtensionField(pb, extension.Field) {
return errors.New("proto: bad extension number; not in declared ranges")
}
return nil
}
// extPropKey is sufficient to uniquely identify an extension.
type extPropKey struct {
base reflect.Type
field int32
}
var extProp = struct {
sync.RWMutex
m map[extPropKey]*Properties
}{
m: make(map[extPropKey]*Properties),
}
func extensionProperties(ed *ExtensionDesc) *Properties {
key := extPropKey{base: reflect.TypeOf(ed.ExtendedType), field: ed.Field}
extProp.RLock()
if prop, ok := extProp.m[key]; ok {
extProp.RUnlock()
return prop
}
extProp.RUnlock()
extProp.Lock()
defer extProp.Unlock()
// Check again.
if prop, ok := extProp.m[key]; ok {
return prop
}
prop := new(Properties)
prop.Init(reflect.TypeOf(ed.ExtensionType), "unknown_name", ed.Tag, nil)
extProp.m[key] = prop
return prop
}
// encodeExtensionMap encodes any unmarshaled (unencoded) extensions in m.
func encodeExtensionMap(m map[int32]Extension) error {
for k, e := range m {
if e.value == nil || e.desc == nil {
// Extension is only in its encoded form.
continue
}
// We don't skip extensions that have an encoded form set,
// because the extension value may have been mutated after
// the last time this function was called.
et := reflect.TypeOf(e.desc.ExtensionType)
props := extensionProperties(e.desc)
p := NewBuffer(nil)
// If e.value has type T, the encoder expects a *struct{ X T }.
// Pass a *T with a zero field and hope it all works out.
x := reflect.New(et)
x.Elem().Set(reflect.ValueOf(e.value))
if err := props.enc(p, props, toStructPointer(x)); err != nil {
return err
}
e.enc = p.buf
m[k] = e
}
return nil
}
func sizeExtensionMap(m map[int32]Extension) (n int) {
for _, e := range m {
if e.value == nil || e.desc == nil {
// Extension is only in its encoded form.
n += len(e.enc)
continue
}
// We don't skip extensions that have an encoded form set,
// because the extension value may have been mutated after
// the last time this function was called.
et := reflect.TypeOf(e.desc.ExtensionType)
props := extensionProperties(e.desc)
// If e.value has type T, the encoder expects a *struct{ X T }.
// Pass a *T with a zero field and hope it all works out.
x := reflect.New(et)
x.Elem().Set(reflect.ValueOf(e.value))
n += props.size(props, toStructPointer(x))
}
return
}
// HasExtension returns whether the given extension is present in pb.
func HasExtension(pb extendableProto, extension *ExtensionDesc) bool {
// TODO: Check types, field numbers, etc.?
_, ok := pb.ExtensionMap()[extension.Field]
return ok
}
// ClearExtension removes the given extension from pb.
func ClearExtension(pb extendableProto, extension *ExtensionDesc) {
// TODO: Check types, field numbers, etc.?
delete(pb.ExtensionMap(), extension.Field)
}
// GetExtension parses and returns the given extension of pb.
// If the extension is not present and has no default value it returns ErrMissingExtension.
func GetExtension(pb extendableProto, extension *ExtensionDesc) (interface{}, error) {
if err := checkExtensionTypes(pb, extension); err != nil {
return nil, err
}
emap := pb.ExtensionMap()
e, ok := emap[extension.Field]
if !ok {
// defaultExtensionValue returns the default value or
// ErrMissingExtension if there is no default.
return defaultExtensionValue(extension)
}
if e.value != nil {
// Already decoded. Check the descriptor, though.
if e.desc != extension {
// This shouldn't happen. If it does, it means that
// GetExtension was called twice with two different
// descriptors with the same field number.
return nil, errors.New("proto: descriptor conflict")
}
return e.value, nil
}
v, err := decodeExtension(e.enc, extension)
if err != nil {
return nil, err
}
// Remember the decoded version and drop the encoded version.
// That way it is safe to mutate what we return.
e.value = v
e.desc = extension
e.enc = nil
emap[extension.Field] = e
return e.value, nil
}
// defaultExtensionValue returns the default value for extension.
// If no default for an extension is defined ErrMissingExtension is returned.
func defaultExtensionValue(extension *ExtensionDesc) (interface{}, error) {
t := reflect.TypeOf(extension.ExtensionType)
props := extensionProperties(extension)
sf, _, err := fieldDefault(t, props)
if err != nil {
return nil, err
}
if sf == nil || sf.value == nil {
// There is no default value.
return nil, ErrMissingExtension
}
if t.Kind() != reflect.Ptr {
// We do not need to return a Ptr, we can directly return sf.value.
return sf.value, nil
}
// We need to return an interface{} that is a pointer to sf.value.
value := reflect.New(t).Elem()
value.Set(reflect.New(value.Type().Elem()))
if sf.kind == reflect.Int32 {
// We may have an int32 or an enum, but the underlying data is int32.
// Since we can't set an int32 into a non int32 reflect.value directly
// set it as a int32.
value.Elem().SetInt(int64(sf.value.(int32)))
} else {
value.Elem().Set(reflect.ValueOf(sf.value))
}
return value.Interface(), nil
}
// decodeExtension decodes an extension encoded in b.
func decodeExtension(b []byte, extension *ExtensionDesc) (interface{}, error) {
o := NewBuffer(b)
t := reflect.TypeOf(extension.ExtensionType)
rep := extension.repeated()
props := extensionProperties(extension)
// t is a pointer to a struct, pointer to basic type or a slice.
// Allocate a "field" to store the pointer/slice itself; the
// pointer/slice will be stored here. We pass
// the address of this field to props.dec.
// This passes a zero field and a *t and lets props.dec
// interpret it as a *struct{ x t }.
value := reflect.New(t).Elem()
for {
// Discard wire type and field number varint. It isn't needed.
if _, err := o.DecodeVarint(); err != nil {
return nil, err
}
if err := props.dec(o, props, toStructPointer(value.Addr())); err != nil {
return nil, err
}
if !rep || o.index >= len(o.buf) {
break
}
}
return value.Interface(), nil
}
// GetExtensions returns a slice of the extensions present in pb that are also listed in es.
// The returned slice has the same length as es; missing extensions will appear as nil elements.
func GetExtensions(pb Message, es []*ExtensionDesc) (extensions []interface{}, err error) {
epb, ok := pb.(extendableProto)
if !ok {
err = errors.New("proto: not an extendable proto")
return
}
extensions = make([]interface{}, len(es))
for i, e := range es {
extensions[i], err = GetExtension(epb, e)
if err == ErrMissingExtension {
err = nil
}
if err != nil {
return
}
}
return
}
// SetExtension sets the specified extension of pb to the specified value.
func SetExtension(pb extendableProto, extension *ExtensionDesc, value interface{}) error {
if err := checkExtensionTypes(pb, extension); err != nil {
return err
}
typ := reflect.TypeOf(extension.ExtensionType)
if typ != reflect.TypeOf(value) {
return errors.New("proto: bad extension value type")
}
// nil extension values need to be caught early, because the
// encoder can't distinguish an ErrNil due to a nil extension
// from an ErrNil due to a missing field. Extensions are
// always optional, so the encoder would just swallow the error
// and drop all the extensions from the encoded message.
if reflect.ValueOf(value).IsNil() {
return fmt.Errorf("proto: SetExtension called with nil value of type %T", value)
}
pb.ExtensionMap()[extension.Field] = Extension{desc: extension, value: value}
return nil
}
// A global registry of extensions.
// The generated code will register the generated descriptors by calling RegisterExtension.
var extensionMaps = make(map[reflect.Type]map[int32]*ExtensionDesc)
// RegisterExtension is called from the generated code.
func RegisterExtension(desc *ExtensionDesc) {
st := reflect.TypeOf(desc.ExtendedType).Elem()
m := extensionMaps[st]
if m == nil {
m = make(map[int32]*ExtensionDesc)
extensionMaps[st] = m
}
if _, ok := m[desc.Field]; ok {
panic("proto: duplicate extension registered: " + st.String() + " " + strconv.Itoa(int(desc.Field)))
}
m[desc.Field] = desc
}
// RegisteredExtensions returns a map of the registered extensions of a
// protocol buffer struct, indexed by the extension number.
// The argument pb should be a nil pointer to the struct type.
func RegisteredExtensions(pb Message) map[int32]*ExtensionDesc {
return extensionMaps[reflect.TypeOf(pb).Elem()]
}

View File

@ -0,0 +1,883 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2010 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*
Package proto converts data structures to and from the wire format of
protocol buffers. It works in concert with the Go source code generated
for .proto files by the protocol compiler.
A summary of the properties of the protocol buffer interface
for a protocol buffer variable v:
- Names are turned from camel_case to CamelCase for export.
- There are no methods on v to set fields; just treat
them as structure fields.
- There are getters that return a field's value if set,
and return the field's default value if unset.
The getters work even if the receiver is a nil message.
- The zero value for a struct is its correct initialization state.
All desired fields must be set before marshaling.
- A Reset() method will restore a protobuf struct to its zero state.
- Non-repeated fields are pointers to the values; nil means unset.
That is, optional or required field int32 f becomes F *int32.
- Repeated fields are slices.
- Helper functions are available to aid the setting of fields.
msg.Foo = proto.String("hello") // set field
- Constants are defined to hold the default values of all fields that
have them. They have the form Default_StructName_FieldName.
Because the getter methods handle defaulted values,
direct use of these constants should be rare.
- Enums are given type names and maps from names to values.
Enum values are prefixed by the enclosing message's name, or by the
enum's type name if it is a top-level enum. Enum types have a String
method, and a Enum method to assist in message construction.
- Nested messages, groups and enums have type names prefixed with the name of
the surrounding message type.
- Extensions are given descriptor names that start with E_,
followed by an underscore-delimited list of the nested messages
that contain it (if any) followed by the CamelCased name of the
extension field itself. HasExtension, ClearExtension, GetExtension
and SetExtension are functions for manipulating extensions.
- Oneof field sets are given a single field in their message,
with distinguished wrapper types for each possible field value.
- Marshal and Unmarshal are functions to encode and decode the wire format.
The simplest way to describe this is to see an example.
Given file test.proto, containing
package example;
enum FOO { X = 17; }
message Test {
required string label = 1;
optional int32 type = 2 [default=77];
repeated int64 reps = 3;
optional group OptionalGroup = 4 {
required string RequiredField = 5;
}
oneof union {
int32 number = 6;
string name = 7;
}
}
The resulting file, test.pb.go, is:
package example
import proto "github.com/golang/protobuf/proto"
import math "math"
type FOO int32
const (
FOO_X FOO = 17
)
var FOO_name = map[int32]string{
17: "X",
}
var FOO_value = map[string]int32{
"X": 17,
}
func (x FOO) Enum() *FOO {
p := new(FOO)
*p = x
return p
}
func (x FOO) String() string {
return proto.EnumName(FOO_name, int32(x))
}
func (x *FOO) UnmarshalJSON(data []byte) error {
value, err := proto.UnmarshalJSONEnum(FOO_value, data)
if err != nil {
return err
}
*x = FOO(value)
return nil
}
type Test struct {
Label *string `protobuf:"bytes,1,req,name=label" json:"label,omitempty"`
Type *int32 `protobuf:"varint,2,opt,name=type,def=77" json:"type,omitempty"`
Reps []int64 `protobuf:"varint,3,rep,name=reps" json:"reps,omitempty"`
Optionalgroup *Test_OptionalGroup `protobuf:"group,4,opt,name=OptionalGroup" json:"optionalgroup,omitempty"`
// Types that are valid to be assigned to Union:
// *Test_Number
// *Test_Name
Union isTest_Union `protobuf_oneof:"union"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Test) Reset() { *m = Test{} }
func (m *Test) String() string { return proto.CompactTextString(m) }
func (*Test) ProtoMessage() {}
type isTest_Union interface {
isTest_Union()
}
type Test_Number struct {
Number int32 `protobuf:"varint,6,opt,name=number"`
}
type Test_Name struct {
Name string `protobuf:"bytes,7,opt,name=name"`
}
func (*Test_Number) isTest_Union() {}
func (*Test_Name) isTest_Union() {}
func (m *Test) GetUnion() isTest_Union {
if m != nil {
return m.Union
}
return nil
}
const Default_Test_Type int32 = 77
func (m *Test) GetLabel() string {
if m != nil && m.Label != nil {
return *m.Label
}
return ""
}
func (m *Test) GetType() int32 {
if m != nil && m.Type != nil {
return *m.Type
}
return Default_Test_Type
}
func (m *Test) GetOptionalgroup() *Test_OptionalGroup {
if m != nil {
return m.Optionalgroup
}
return nil
}
type Test_OptionalGroup struct {
RequiredField *string `protobuf:"bytes,5,req" json:"RequiredField,omitempty"`
}
func (m *Test_OptionalGroup) Reset() { *m = Test_OptionalGroup{} }
func (m *Test_OptionalGroup) String() string { return proto.CompactTextString(m) }
func (m *Test_OptionalGroup) GetRequiredField() string {
if m != nil && m.RequiredField != nil {
return *m.RequiredField
}
return ""
}
func (m *Test) GetNumber() int32 {
if x, ok := m.GetUnion().(*Test_Number); ok {
return x.Number
}
return 0
}
func (m *Test) GetName() string {
if x, ok := m.GetUnion().(*Test_Name); ok {
return x.Name
}
return ""
}
func init() {
proto.RegisterEnum("example.FOO", FOO_name, FOO_value)
}
To create and play with a Test object:
package main
import (
"log"
"github.com/golang/protobuf/proto"
pb "./example.pb"
)
func main() {
test := &pb.Test{
Label: proto.String("hello"),
Type: proto.Int32(17),
Optionalgroup: &pb.Test_OptionalGroup{
RequiredField: proto.String("good bye"),
},
Union: &pb.Test_Name{"fred"},
}
data, err := proto.Marshal(test)
if err != nil {
log.Fatal("marshaling error: ", err)
}
newTest := &pb.Test{}
err = proto.Unmarshal(data, newTest)
if err != nil {
log.Fatal("unmarshaling error: ", err)
}
// Now test and newTest contain the same data.
if test.GetLabel() != newTest.GetLabel() {
log.Fatalf("data mismatch %q != %q", test.GetLabel(), newTest.GetLabel())
}
// Use a type switch to determine which oneof was set.
switch u := test.Union.(type) {
case *pb.Test_Number: // u.Number contains the number.
case *pb.Test_Name: // u.Name contains the string.
}
// etc.
}
*/
package proto
import (
"encoding/json"
"fmt"
"log"
"reflect"
"sort"
"strconv"
"sync"
)
// Message is implemented by generated protocol buffer messages.
type Message interface {
Reset()
String() string
ProtoMessage()
}
// Stats records allocation details about the protocol buffer encoders
// and decoders. Useful for tuning the library itself.
type Stats struct {
Emalloc uint64 // mallocs in encode
Dmalloc uint64 // mallocs in decode
Encode uint64 // number of encodes
Decode uint64 // number of decodes
Chit uint64 // number of cache hits
Cmiss uint64 // number of cache misses
Size uint64 // number of sizes
}
// Set to true to enable stats collection.
const collectStats = false
var stats Stats
// GetStats returns a copy of the global Stats structure.
func GetStats() Stats { return stats }
// A Buffer is a buffer manager for marshaling and unmarshaling
// protocol buffers. It may be reused between invocations to
// reduce memory usage. It is not necessary to use a Buffer;
// the global functions Marshal and Unmarshal create a
// temporary Buffer and are fine for most applications.
type Buffer struct {
buf []byte // encode/decode byte stream
index int // write point
// pools of basic types to amortize allocation.
bools []bool
uint32s []uint32
uint64s []uint64
// extra pools, only used with pointer_reflect.go
int32s []int32
int64s []int64
float32s []float32
float64s []float64
}
// NewBuffer allocates a new Buffer and initializes its internal data to
// the contents of the argument slice.
func NewBuffer(e []byte) *Buffer {
return &Buffer{buf: e}
}
// Reset resets the Buffer, ready for marshaling a new protocol buffer.
func (p *Buffer) Reset() {
p.buf = p.buf[0:0] // for reading/writing
p.index = 0 // for reading
}
// SetBuf replaces the internal buffer with the slice,
// ready for unmarshaling the contents of the slice.
func (p *Buffer) SetBuf(s []byte) {
p.buf = s
p.index = 0
}
// Bytes returns the contents of the Buffer.
func (p *Buffer) Bytes() []byte { return p.buf }
/*
* Helper routines for simplifying the creation of optional fields of basic type.
*/
// Bool is a helper routine that allocates a new bool value
// to store v and returns a pointer to it.
func Bool(v bool) *bool {
return &v
}
// Int32 is a helper routine that allocates a new int32 value
// to store v and returns a pointer to it.
func Int32(v int32) *int32 {
return &v
}
// Int is a helper routine that allocates a new int32 value
// to store v and returns a pointer to it, but unlike Int32
// its argument value is an int.
func Int(v int) *int32 {
p := new(int32)
*p = int32(v)
return p
}
// Int64 is a helper routine that allocates a new int64 value
// to store v and returns a pointer to it.
func Int64(v int64) *int64 {
return &v
}
// Float32 is a helper routine that allocates a new float32 value
// to store v and returns a pointer to it.
func Float32(v float32) *float32 {
return &v
}
// Float64 is a helper routine that allocates a new float64 value
// to store v and returns a pointer to it.
func Float64(v float64) *float64 {
return &v
}
// Uint32 is a helper routine that allocates a new uint32 value
// to store v and returns a pointer to it.
func Uint32(v uint32) *uint32 {
return &v
}
// Uint64 is a helper routine that allocates a new uint64 value
// to store v and returns a pointer to it.
func Uint64(v uint64) *uint64 {
return &v
}
// String is a helper routine that allocates a new string value
// to store v and returns a pointer to it.
func String(v string) *string {
return &v
}
// EnumName is a helper function to simplify printing protocol buffer enums
// by name. Given an enum map and a value, it returns a useful string.
func EnumName(m map[int32]string, v int32) string {
s, ok := m[v]
if ok {
return s
}
return strconv.Itoa(int(v))
}
// UnmarshalJSONEnum is a helper function to simplify recovering enum int values
// from their JSON-encoded representation. Given a map from the enum's symbolic
// names to its int values, and a byte buffer containing the JSON-encoded
// value, it returns an int32 that can be cast to the enum type by the caller.
//
// The function can deal with both JSON representations, numeric and symbolic.
func UnmarshalJSONEnum(m map[string]int32, data []byte, enumName string) (int32, error) {
if data[0] == '"' {
// New style: enums are strings.
var repr string
if err := json.Unmarshal(data, &repr); err != nil {
return -1, err
}
val, ok := m[repr]
if !ok {
return 0, fmt.Errorf("unrecognized enum %s value %q", enumName, repr)
}
return val, nil
}
// Old style: enums are ints.
var val int32
if err := json.Unmarshal(data, &val); err != nil {
return 0, fmt.Errorf("cannot unmarshal %#q into enum %s", data, enumName)
}
return val, nil
}
// DebugPrint dumps the encoded data in b in a debugging format with a header
// including the string s. Used in testing but made available for general debugging.
func (p *Buffer) DebugPrint(s string, b []byte) {
var u uint64
obuf := p.buf
index := p.index
p.buf = b
p.index = 0
depth := 0
fmt.Printf("\n--- %s ---\n", s)
out:
for {
for i := 0; i < depth; i++ {
fmt.Print(" ")
}
index := p.index
if index == len(p.buf) {
break
}
op, err := p.DecodeVarint()
if err != nil {
fmt.Printf("%3d: fetching op err %v\n", index, err)
break out
}
tag := op >> 3
wire := op & 7
switch wire {
default:
fmt.Printf("%3d: t=%3d unknown wire=%d\n",
index, tag, wire)
break out
case WireBytes:
var r []byte
r, err = p.DecodeRawBytes(false)
if err != nil {
break out
}
fmt.Printf("%3d: t=%3d bytes [%d]", index, tag, len(r))
if len(r) <= 6 {
for i := 0; i < len(r); i++ {
fmt.Printf(" %.2x", r[i])
}
} else {
for i := 0; i < 3; i++ {
fmt.Printf(" %.2x", r[i])
}
fmt.Printf(" ..")
for i := len(r) - 3; i < len(r); i++ {
fmt.Printf(" %.2x", r[i])
}
}
fmt.Printf("\n")
case WireFixed32:
u, err = p.DecodeFixed32()
if err != nil {
fmt.Printf("%3d: t=%3d fix32 err %v\n", index, tag, err)
break out
}
fmt.Printf("%3d: t=%3d fix32 %d\n", index, tag, u)
case WireFixed64:
u, err = p.DecodeFixed64()
if err != nil {
fmt.Printf("%3d: t=%3d fix64 err %v\n", index, tag, err)
break out
}
fmt.Printf("%3d: t=%3d fix64 %d\n", index, tag, u)
case WireVarint:
u, err = p.DecodeVarint()
if err != nil {
fmt.Printf("%3d: t=%3d varint err %v\n", index, tag, err)
break out
}
fmt.Printf("%3d: t=%3d varint %d\n", index, tag, u)
case WireStartGroup:
fmt.Printf("%3d: t=%3d start\n", index, tag)
depth++
case WireEndGroup:
depth--
fmt.Printf("%3d: t=%3d end\n", index, tag)
}
}
if depth != 0 {
fmt.Printf("%3d: start-end not balanced %d\n", p.index, depth)
}
fmt.Printf("\n")
p.buf = obuf
p.index = index
}
// SetDefaults sets unset protocol buffer fields to their default values.
// It only modifies fields that are both unset and have defined defaults.
// It recursively sets default values in any non-nil sub-messages.
func SetDefaults(pb Message) {
setDefaults(reflect.ValueOf(pb), true, false)
}
// v is a pointer to a struct.
func setDefaults(v reflect.Value, recur, zeros bool) {
v = v.Elem()
defaultMu.RLock()
dm, ok := defaults[v.Type()]
defaultMu.RUnlock()
if !ok {
dm = buildDefaultMessage(v.Type())
defaultMu.Lock()
defaults[v.Type()] = dm
defaultMu.Unlock()
}
for _, sf := range dm.scalars {
f := v.Field(sf.index)
if !f.IsNil() {
// field already set
continue
}
dv := sf.value
if dv == nil && !zeros {
// no explicit default, and don't want to set zeros
continue
}
fptr := f.Addr().Interface() // **T
// TODO: Consider batching the allocations we do here.
switch sf.kind {
case reflect.Bool:
b := new(bool)
if dv != nil {
*b = dv.(bool)
}
*(fptr.(**bool)) = b
case reflect.Float32:
f := new(float32)
if dv != nil {
*f = dv.(float32)
}
*(fptr.(**float32)) = f
case reflect.Float64:
f := new(float64)
if dv != nil {
*f = dv.(float64)
}
*(fptr.(**float64)) = f
case reflect.Int32:
// might be an enum
if ft := f.Type(); ft != int32PtrType {
// enum
f.Set(reflect.New(ft.Elem()))
if dv != nil {
f.Elem().SetInt(int64(dv.(int32)))
}
} else {
// int32 field
i := new(int32)
if dv != nil {
*i = dv.(int32)
}
*(fptr.(**int32)) = i
}
case reflect.Int64:
i := new(int64)
if dv != nil {
*i = dv.(int64)
}
*(fptr.(**int64)) = i
case reflect.String:
s := new(string)
if dv != nil {
*s = dv.(string)
}
*(fptr.(**string)) = s
case reflect.Uint8:
// exceptional case: []byte
var b []byte
if dv != nil {
db := dv.([]byte)
b = make([]byte, len(db))
copy(b, db)
} else {
b = []byte{}
}
*(fptr.(*[]byte)) = b
case reflect.Uint32:
u := new(uint32)
if dv != nil {
*u = dv.(uint32)
}
*(fptr.(**uint32)) = u
case reflect.Uint64:
u := new(uint64)
if dv != nil {
*u = dv.(uint64)
}
*(fptr.(**uint64)) = u
default:
log.Printf("proto: can't set default for field %v (sf.kind=%v)", f, sf.kind)
}
}
for _, ni := range dm.nested {
f := v.Field(ni)
// f is *T or []*T or map[T]*T
switch f.Kind() {
case reflect.Ptr:
if f.IsNil() {
continue
}
setDefaults(f, recur, zeros)
case reflect.Slice:
for i := 0; i < f.Len(); i++ {
e := f.Index(i)
if e.IsNil() {
continue
}
setDefaults(e, recur, zeros)
}
case reflect.Map:
for _, k := range f.MapKeys() {
e := f.MapIndex(k)
if e.IsNil() {
continue
}
setDefaults(e, recur, zeros)
}
}
}
}
var (
// defaults maps a protocol buffer struct type to a slice of the fields,
// with its scalar fields set to their proto-declared non-zero default values.
defaultMu sync.RWMutex
defaults = make(map[reflect.Type]defaultMessage)
int32PtrType = reflect.TypeOf((*int32)(nil))
)
// defaultMessage represents information about the default values of a message.
type defaultMessage struct {
scalars []scalarField
nested []int // struct field index of nested messages
}
type scalarField struct {
index int // struct field index
kind reflect.Kind // element type (the T in *T or []T)
value interface{} // the proto-declared default value, or nil
}
// t is a struct type.
func buildDefaultMessage(t reflect.Type) (dm defaultMessage) {
sprop := GetProperties(t)
for _, prop := range sprop.Prop {
fi, ok := sprop.decoderTags.get(prop.Tag)
if !ok {
// XXX_unrecognized
continue
}
ft := t.Field(fi).Type
sf, nested, err := fieldDefault(ft, prop)
switch {
case err != nil:
log.Print(err)
case nested:
dm.nested = append(dm.nested, fi)
case sf != nil:
sf.index = fi
dm.scalars = append(dm.scalars, *sf)
}
}
return dm
}
// fieldDefault returns the scalarField for field type ft.
// sf will be nil if the field can not have a default.
// nestedMessage will be true if this is a nested message.
// Note that sf.index is not set on return.
func fieldDefault(ft reflect.Type, prop *Properties) (sf *scalarField, nestedMessage bool, err error) {
var canHaveDefault bool
switch ft.Kind() {
case reflect.Ptr:
if ft.Elem().Kind() == reflect.Struct {
nestedMessage = true
} else {
canHaveDefault = true // proto2 scalar field
}
case reflect.Slice:
switch ft.Elem().Kind() {
case reflect.Ptr:
nestedMessage = true // repeated message
case reflect.Uint8:
canHaveDefault = true // bytes field
}
case reflect.Map:
if ft.Elem().Kind() == reflect.Ptr {
nestedMessage = true // map with message values
}
}
if !canHaveDefault {
if nestedMessage {
return nil, true, nil
}
return nil, false, nil
}
// We now know that ft is a pointer or slice.
sf = &scalarField{kind: ft.Elem().Kind()}
// scalar fields without defaults
if !prop.HasDefault {
return sf, false, nil
}
// a scalar field: either *T or []byte
switch ft.Elem().Kind() {
case reflect.Bool:
x, err := strconv.ParseBool(prop.Default)
if err != nil {
return nil, false, fmt.Errorf("proto: bad default bool %q: %v", prop.Default, err)
}
sf.value = x
case reflect.Float32:
x, err := strconv.ParseFloat(prop.Default, 32)
if err != nil {
return nil, false, fmt.Errorf("proto: bad default float32 %q: %v", prop.Default, err)
}
sf.value = float32(x)
case reflect.Float64:
x, err := strconv.ParseFloat(prop.Default, 64)
if err != nil {
return nil, false, fmt.Errorf("proto: bad default float64 %q: %v", prop.Default, err)
}
sf.value = x
case reflect.Int32:
x, err := strconv.ParseInt(prop.Default, 10, 32)
if err != nil {
return nil, false, fmt.Errorf("proto: bad default int32 %q: %v", prop.Default, err)
}
sf.value = int32(x)
case reflect.Int64:
x, err := strconv.ParseInt(prop.Default, 10, 64)
if err != nil {
return nil, false, fmt.Errorf("proto: bad default int64 %q: %v", prop.Default, err)
}
sf.value = x
case reflect.String:
sf.value = prop.Default
case reflect.Uint8:
// []byte (not *uint8)
sf.value = []byte(prop.Default)
case reflect.Uint32:
x, err := strconv.ParseUint(prop.Default, 10, 32)
if err != nil {
return nil, false, fmt.Errorf("proto: bad default uint32 %q: %v", prop.Default, err)
}
sf.value = uint32(x)
case reflect.Uint64:
x, err := strconv.ParseUint(prop.Default, 10, 64)
if err != nil {
return nil, false, fmt.Errorf("proto: bad default uint64 %q: %v", prop.Default, err)
}
sf.value = x
default:
return nil, false, fmt.Errorf("proto: unhandled def kind %v", ft.Elem().Kind())
}
return sf, false, nil
}
// Map fields may have key types of non-float scalars, strings and enums.
// The easiest way to sort them in some deterministic order is to use fmt.
// If this turns out to be inefficient we can always consider other options,
// such as doing a Schwartzian transform.
func mapKeys(vs []reflect.Value) sort.Interface {
s := mapKeySorter{
vs: vs,
// default Less function: textual comparison
less: func(a, b reflect.Value) bool {
return fmt.Sprint(a.Interface()) < fmt.Sprint(b.Interface())
},
}
// Type specialization per https://developers.google.com/protocol-buffers/docs/proto#maps;
// numeric keys are sorted numerically.
if len(vs) == 0 {
return s
}
switch vs[0].Kind() {
case reflect.Int32, reflect.Int64:
s.less = func(a, b reflect.Value) bool { return a.Int() < b.Int() }
case reflect.Uint32, reflect.Uint64:
s.less = func(a, b reflect.Value) bool { return a.Uint() < b.Uint() }
}
return s
}
type mapKeySorter struct {
vs []reflect.Value
less func(a, b reflect.Value) bool
}
func (s mapKeySorter) Len() int { return len(s.vs) }
func (s mapKeySorter) Swap(i, j int) { s.vs[i], s.vs[j] = s.vs[j], s.vs[i] }
func (s mapKeySorter) Less(i, j int) bool {
return s.less(s.vs[i], s.vs[j])
}
// isProto3Zero reports whether v is a zero proto3 value.
func isProto3Zero(v reflect.Value) bool {
switch v.Kind() {
case reflect.Bool:
return !v.Bool()
case reflect.Int32, reflect.Int64:
return v.Int() == 0
case reflect.Uint32, reflect.Uint64:
return v.Uint() == 0
case reflect.Float32, reflect.Float64:
return v.Float() == 0
case reflect.String:
return v.String() == ""
}
return false
}

View File

@ -0,0 +1,287 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2010 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package proto
/*
* Support for message sets.
*/
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"reflect"
"sort"
)
// ErrNoMessageTypeId occurs when a protocol buffer does not have a message type ID.
// A message type ID is required for storing a protocol buffer in a message set.
var ErrNoMessageTypeId = errors.New("proto does not have a message type ID")
// The first two types (_MessageSet_Item and MessageSet)
// model what the protocol compiler produces for the following protocol message:
// message MessageSet {
// repeated group Item = 1 {
// required int32 type_id = 2;
// required string message = 3;
// };
// }
// That is the MessageSet wire format. We can't use a proto to generate these
// because that would introduce a circular dependency between it and this package.
//
// When a proto1 proto has a field that looks like:
// optional message<MessageSet> info = 3;
// the protocol compiler produces a field in the generated struct that looks like:
// Info *_proto_.MessageSet `protobuf:"bytes,3,opt,name=info"`
// The package is automatically inserted so there is no need for that proto file to
// import this package.
type _MessageSet_Item struct {
TypeId *int32 `protobuf:"varint,2,req,name=type_id"`
Message []byte `protobuf:"bytes,3,req,name=message"`
}
type MessageSet struct {
Item []*_MessageSet_Item `protobuf:"group,1,rep"`
XXX_unrecognized []byte
// TODO: caching?
}
// Make sure MessageSet is a Message.
var _ Message = (*MessageSet)(nil)
// messageTypeIder is an interface satisfied by a protocol buffer type
// that may be stored in a MessageSet.
type messageTypeIder interface {
MessageTypeId() int32
}
func (ms *MessageSet) find(pb Message) *_MessageSet_Item {
mti, ok := pb.(messageTypeIder)
if !ok {
return nil
}
id := mti.MessageTypeId()
for _, item := range ms.Item {
if *item.TypeId == id {
return item
}
}
return nil
}
func (ms *MessageSet) Has(pb Message) bool {
if ms.find(pb) != nil {
return true
}
return false
}
func (ms *MessageSet) Unmarshal(pb Message) error {
if item := ms.find(pb); item != nil {
return Unmarshal(item.Message, pb)
}
if _, ok := pb.(messageTypeIder); !ok {
return ErrNoMessageTypeId
}
return nil // TODO: return error instead?
}
func (ms *MessageSet) Marshal(pb Message) error {
msg, err := Marshal(pb)
if err != nil {
return err
}
if item := ms.find(pb); item != nil {
// reuse existing item
item.Message = msg
return nil
}
mti, ok := pb.(messageTypeIder)
if !ok {
return ErrNoMessageTypeId
}
mtid := mti.MessageTypeId()
ms.Item = append(ms.Item, &_MessageSet_Item{
TypeId: &mtid,
Message: msg,
})
return nil
}
func (ms *MessageSet) Reset() { *ms = MessageSet{} }
func (ms *MessageSet) String() string { return CompactTextString(ms) }
func (*MessageSet) ProtoMessage() {}
// Support for the message_set_wire_format message option.
func skipVarint(buf []byte) []byte {
i := 0
for ; buf[i]&0x80 != 0; i++ {
}
return buf[i+1:]
}
// MarshalMessageSet encodes the extension map represented by m in the message set wire format.
// It is called by generated Marshal methods on protocol buffer messages with the message_set_wire_format option.
func MarshalMessageSet(m map[int32]Extension) ([]byte, error) {
if err := encodeExtensionMap(m); err != nil {
return nil, err
}
// Sort extension IDs to provide a deterministic encoding.
// See also enc_map in encode.go.
ids := make([]int, 0, len(m))
for id := range m {
ids = append(ids, int(id))
}
sort.Ints(ids)
ms := &MessageSet{Item: make([]*_MessageSet_Item, 0, len(m))}
for _, id := range ids {
e := m[int32(id)]
// Remove the wire type and field number varint, as well as the length varint.
msg := skipVarint(skipVarint(e.enc))
ms.Item = append(ms.Item, &_MessageSet_Item{
TypeId: Int32(int32(id)),
Message: msg,
})
}
return Marshal(ms)
}
// UnmarshalMessageSet decodes the extension map encoded in buf in the message set wire format.
// It is called by generated Unmarshal methods on protocol buffer messages with the message_set_wire_format option.
func UnmarshalMessageSet(buf []byte, m map[int32]Extension) error {
ms := new(MessageSet)
if err := Unmarshal(buf, ms); err != nil {
return err
}
for _, item := range ms.Item {
id := *item.TypeId
msg := item.Message
// Restore wire type and field number varint, plus length varint.
// Be careful to preserve duplicate items.
b := EncodeVarint(uint64(id)<<3 | WireBytes)
if ext, ok := m[id]; ok {
// Existing data; rip off the tag and length varint
// so we join the new data correctly.
// We can assume that ext.enc is set because we are unmarshaling.
o := ext.enc[len(b):] // skip wire type and field number
_, n := DecodeVarint(o) // calculate length of length varint
o = o[n:] // skip length varint
msg = append(o, msg...) // join old data and new data
}
b = append(b, EncodeVarint(uint64(len(msg)))...)
b = append(b, msg...)
m[id] = Extension{enc: b}
}
return nil
}
// MarshalMessageSetJSON encodes the extension map represented by m in JSON format.
// It is called by generated MarshalJSON methods on protocol buffer messages with the message_set_wire_format option.
func MarshalMessageSetJSON(m map[int32]Extension) ([]byte, error) {
var b bytes.Buffer
b.WriteByte('{')
// Process the map in key order for deterministic output.
ids := make([]int32, 0, len(m))
for id := range m {
ids = append(ids, id)
}
sort.Sort(int32Slice(ids)) // int32Slice defined in text.go
for i, id := range ids {
ext := m[id]
if i > 0 {
b.WriteByte(',')
}
msd, ok := messageSetMap[id]
if !ok {
// Unknown type; we can't render it, so skip it.
continue
}
fmt.Fprintf(&b, `"[%s]":`, msd.name)
x := ext.value
if x == nil {
x = reflect.New(msd.t.Elem()).Interface()
if err := Unmarshal(ext.enc, x.(Message)); err != nil {
return nil, err
}
}
d, err := json.Marshal(x)
if err != nil {
return nil, err
}
b.Write(d)
}
b.WriteByte('}')
return b.Bytes(), nil
}
// UnmarshalMessageSetJSON decodes the extension map encoded in buf in JSON format.
// It is called by generated UnmarshalJSON methods on protocol buffer messages with the message_set_wire_format option.
func UnmarshalMessageSetJSON(buf []byte, m map[int32]Extension) error {
// Common-case fast path.
if len(buf) == 0 || bytes.Equal(buf, []byte("{}")) {
return nil
}
// This is fairly tricky, and it's not clear that it is needed.
return errors.New("TODO: UnmarshalMessageSetJSON not yet implemented")
}
// A global registry of types that can be used in a MessageSet.
var messageSetMap = make(map[int32]messageSetDesc)
type messageSetDesc struct {
t reflect.Type // pointer to struct
name string
}
// RegisterMessageSetType is called from the generated code.
func RegisterMessageSetType(m Message, fieldNum int32, name string) {
messageSetMap[fieldNum] = messageSetDesc{
t: reflect.TypeOf(m),
name: name,
}
}

View File

@ -0,0 +1,479 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2012 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// +build appengine
// This file contains an implementation of proto field accesses using package reflect.
// It is slower than the code in pointer_unsafe.go but it avoids package unsafe and can
// be used on App Engine.
package proto
import (
"math"
"reflect"
)
// A structPointer is a pointer to a struct.
type structPointer struct {
v reflect.Value
}
// toStructPointer returns a structPointer equivalent to the given reflect value.
// The reflect value must itself be a pointer to a struct.
func toStructPointer(v reflect.Value) structPointer {
return structPointer{v}
}
// IsNil reports whether p is nil.
func structPointer_IsNil(p structPointer) bool {
return p.v.IsNil()
}
// Interface returns the struct pointer as an interface value.
func structPointer_Interface(p structPointer, _ reflect.Type) interface{} {
return p.v.Interface()
}
// A field identifies a field in a struct, accessible from a structPointer.
// In this implementation, a field is identified by the sequence of field indices
// passed to reflect's FieldByIndex.
type field []int
// toField returns a field equivalent to the given reflect field.
func toField(f *reflect.StructField) field {
return f.Index
}
// invalidField is an invalid field identifier.
var invalidField = field(nil)
// IsValid reports whether the field identifier is valid.
func (f field) IsValid() bool { return f != nil }
// field returns the given field in the struct as a reflect value.
func structPointer_field(p structPointer, f field) reflect.Value {
// Special case: an extension map entry with a value of type T
// passes a *T to the struct-handling code with a zero field,
// expecting that it will be treated as equivalent to *struct{ X T },
// which has the same memory layout. We have to handle that case
// specially, because reflect will panic if we call FieldByIndex on a
// non-struct.
if f == nil {
return p.v.Elem()
}
return p.v.Elem().FieldByIndex(f)
}
// ifield returns the given field in the struct as an interface value.
func structPointer_ifield(p structPointer, f field) interface{} {
return structPointer_field(p, f).Addr().Interface()
}
// Bytes returns the address of a []byte field in the struct.
func structPointer_Bytes(p structPointer, f field) *[]byte {
return structPointer_ifield(p, f).(*[]byte)
}
// BytesSlice returns the address of a [][]byte field in the struct.
func structPointer_BytesSlice(p structPointer, f field) *[][]byte {
return structPointer_ifield(p, f).(*[][]byte)
}
// Bool returns the address of a *bool field in the struct.
func structPointer_Bool(p structPointer, f field) **bool {
return structPointer_ifield(p, f).(**bool)
}
// BoolVal returns the address of a bool field in the struct.
func structPointer_BoolVal(p structPointer, f field) *bool {
return structPointer_ifield(p, f).(*bool)
}
// BoolSlice returns the address of a []bool field in the struct.
func structPointer_BoolSlice(p structPointer, f field) *[]bool {
return structPointer_ifield(p, f).(*[]bool)
}
// String returns the address of a *string field in the struct.
func structPointer_String(p structPointer, f field) **string {
return structPointer_ifield(p, f).(**string)
}
// StringVal returns the address of a string field in the struct.
func structPointer_StringVal(p structPointer, f field) *string {
return structPointer_ifield(p, f).(*string)
}
// StringSlice returns the address of a []string field in the struct.
func structPointer_StringSlice(p structPointer, f field) *[]string {
return structPointer_ifield(p, f).(*[]string)
}
// ExtMap returns the address of an extension map field in the struct.
func structPointer_ExtMap(p structPointer, f field) *map[int32]Extension {
return structPointer_ifield(p, f).(*map[int32]Extension)
}
// NewAt returns the reflect.Value for a pointer to a field in the struct.
func structPointer_NewAt(p structPointer, f field, typ reflect.Type) reflect.Value {
return structPointer_field(p, f).Addr()
}
// SetStructPointer writes a *struct field in the struct.
func structPointer_SetStructPointer(p structPointer, f field, q structPointer) {
structPointer_field(p, f).Set(q.v)
}
// GetStructPointer reads a *struct field in the struct.
func structPointer_GetStructPointer(p structPointer, f field) structPointer {
return structPointer{structPointer_field(p, f)}
}
// StructPointerSlice the address of a []*struct field in the struct.
func structPointer_StructPointerSlice(p structPointer, f field) structPointerSlice {
return structPointerSlice{structPointer_field(p, f)}
}
// A structPointerSlice represents the address of a slice of pointers to structs
// (themselves messages or groups). That is, v.Type() is *[]*struct{...}.
type structPointerSlice struct {
v reflect.Value
}
func (p structPointerSlice) Len() int { return p.v.Len() }
func (p structPointerSlice) Index(i int) structPointer { return structPointer{p.v.Index(i)} }
func (p structPointerSlice) Append(q structPointer) {
p.v.Set(reflect.Append(p.v, q.v))
}
var (
int32Type = reflect.TypeOf(int32(0))
uint32Type = reflect.TypeOf(uint32(0))
float32Type = reflect.TypeOf(float32(0))
int64Type = reflect.TypeOf(int64(0))
uint64Type = reflect.TypeOf(uint64(0))
float64Type = reflect.TypeOf(float64(0))
)
// A word32 represents a field of type *int32, *uint32, *float32, or *enum.
// That is, v.Type() is *int32, *uint32, *float32, or *enum and v is assignable.
type word32 struct {
v reflect.Value
}
// IsNil reports whether p is nil.
func word32_IsNil(p word32) bool {
return p.v.IsNil()
}
// Set sets p to point at a newly allocated word with bits set to x.
func word32_Set(p word32, o *Buffer, x uint32) {
t := p.v.Type().Elem()
switch t {
case int32Type:
if len(o.int32s) == 0 {
o.int32s = make([]int32, uint32PoolSize)
}
o.int32s[0] = int32(x)
p.v.Set(reflect.ValueOf(&o.int32s[0]))
o.int32s = o.int32s[1:]
return
case uint32Type:
if len(o.uint32s) == 0 {
o.uint32s = make([]uint32, uint32PoolSize)
}
o.uint32s[0] = x
p.v.Set(reflect.ValueOf(&o.uint32s[0]))
o.uint32s = o.uint32s[1:]
return
case float32Type:
if len(o.float32s) == 0 {
o.float32s = make([]float32, uint32PoolSize)
}
o.float32s[0] = math.Float32frombits(x)
p.v.Set(reflect.ValueOf(&o.float32s[0]))
o.float32s = o.float32s[1:]
return
}
// must be enum
p.v.Set(reflect.New(t))
p.v.Elem().SetInt(int64(int32(x)))
}
// Get gets the bits pointed at by p, as a uint32.
func word32_Get(p word32) uint32 {
elem := p.v.Elem()
switch elem.Kind() {
case reflect.Int32:
return uint32(elem.Int())
case reflect.Uint32:
return uint32(elem.Uint())
case reflect.Float32:
return math.Float32bits(float32(elem.Float()))
}
panic("unreachable")
}
// Word32 returns a reference to a *int32, *uint32, *float32, or *enum field in the struct.
func structPointer_Word32(p structPointer, f field) word32 {
return word32{structPointer_field(p, f)}
}
// A word32Val represents a field of type int32, uint32, float32, or enum.
// That is, v.Type() is int32, uint32, float32, or enum and v is assignable.
type word32Val struct {
v reflect.Value
}
// Set sets *p to x.
func word32Val_Set(p word32Val, x uint32) {
switch p.v.Type() {
case int32Type:
p.v.SetInt(int64(x))
return
case uint32Type:
p.v.SetUint(uint64(x))
return
case float32Type:
p.v.SetFloat(float64(math.Float32frombits(x)))
return
}
// must be enum
p.v.SetInt(int64(int32(x)))
}
// Get gets the bits pointed at by p, as a uint32.
func word32Val_Get(p word32Val) uint32 {
elem := p.v
switch elem.Kind() {
case reflect.Int32:
return uint32(elem.Int())
case reflect.Uint32:
return uint32(elem.Uint())
case reflect.Float32:
return math.Float32bits(float32(elem.Float()))
}
panic("unreachable")
}
// Word32Val returns a reference to a int32, uint32, float32, or enum field in the struct.
func structPointer_Word32Val(p structPointer, f field) word32Val {
return word32Val{structPointer_field(p, f)}
}
// A word32Slice is a slice of 32-bit values.
// That is, v.Type() is []int32, []uint32, []float32, or []enum.
type word32Slice struct {
v reflect.Value
}
func (p word32Slice) Append(x uint32) {
n, m := p.v.Len(), p.v.Cap()
if n < m {
p.v.SetLen(n + 1)
} else {
t := p.v.Type().Elem()
p.v.Set(reflect.Append(p.v, reflect.Zero(t)))
}
elem := p.v.Index(n)
switch elem.Kind() {
case reflect.Int32:
elem.SetInt(int64(int32(x)))
case reflect.Uint32:
elem.SetUint(uint64(x))
case reflect.Float32:
elem.SetFloat(float64(math.Float32frombits(x)))
}
}
func (p word32Slice) Len() int {
return p.v.Len()
}
func (p word32Slice) Index(i int) uint32 {
elem := p.v.Index(i)
switch elem.Kind() {
case reflect.Int32:
return uint32(elem.Int())
case reflect.Uint32:
return uint32(elem.Uint())
case reflect.Float32:
return math.Float32bits(float32(elem.Float()))
}
panic("unreachable")
}
// Word32Slice returns a reference to a []int32, []uint32, []float32, or []enum field in the struct.
func structPointer_Word32Slice(p structPointer, f field) word32Slice {
return word32Slice{structPointer_field(p, f)}
}
// word64 is like word32 but for 64-bit values.
type word64 struct {
v reflect.Value
}
func word64_Set(p word64, o *Buffer, x uint64) {
t := p.v.Type().Elem()
switch t {
case int64Type:
if len(o.int64s) == 0 {
o.int64s = make([]int64, uint64PoolSize)
}
o.int64s[0] = int64(x)
p.v.Set(reflect.ValueOf(&o.int64s[0]))
o.int64s = o.int64s[1:]
return
case uint64Type:
if len(o.uint64s) == 0 {
o.uint64s = make([]uint64, uint64PoolSize)
}
o.uint64s[0] = x
p.v.Set(reflect.ValueOf(&o.uint64s[0]))
o.uint64s = o.uint64s[1:]
return
case float64Type:
if len(o.float64s) == 0 {
o.float64s = make([]float64, uint64PoolSize)
}
o.float64s[0] = math.Float64frombits(x)
p.v.Set(reflect.ValueOf(&o.float64s[0]))
o.float64s = o.float64s[1:]
return
}
panic("unreachable")
}
func word64_IsNil(p word64) bool {
return p.v.IsNil()
}
func word64_Get(p word64) uint64 {
elem := p.v.Elem()
switch elem.Kind() {
case reflect.Int64:
return uint64(elem.Int())
case reflect.Uint64:
return elem.Uint()
case reflect.Float64:
return math.Float64bits(elem.Float())
}
panic("unreachable")
}
func structPointer_Word64(p structPointer, f field) word64 {
return word64{structPointer_field(p, f)}
}
// word64Val is like word32Val but for 64-bit values.
type word64Val struct {
v reflect.Value
}
func word64Val_Set(p word64Val, o *Buffer, x uint64) {
switch p.v.Type() {
case int64Type:
p.v.SetInt(int64(x))
return
case uint64Type:
p.v.SetUint(x)
return
case float64Type:
p.v.SetFloat(math.Float64frombits(x))
return
}
panic("unreachable")
}
func word64Val_Get(p word64Val) uint64 {
elem := p.v
switch elem.Kind() {
case reflect.Int64:
return uint64(elem.Int())
case reflect.Uint64:
return elem.Uint()
case reflect.Float64:
return math.Float64bits(elem.Float())
}
panic("unreachable")
}
func structPointer_Word64Val(p structPointer, f field) word64Val {
return word64Val{structPointer_field(p, f)}
}
type word64Slice struct {
v reflect.Value
}
func (p word64Slice) Append(x uint64) {
n, m := p.v.Len(), p.v.Cap()
if n < m {
p.v.SetLen(n + 1)
} else {
t := p.v.Type().Elem()
p.v.Set(reflect.Append(p.v, reflect.Zero(t)))
}
elem := p.v.Index(n)
switch elem.Kind() {
case reflect.Int64:
elem.SetInt(int64(int64(x)))
case reflect.Uint64:
elem.SetUint(uint64(x))
case reflect.Float64:
elem.SetFloat(float64(math.Float64frombits(x)))
}
}
func (p word64Slice) Len() int {
return p.v.Len()
}
func (p word64Slice) Index(i int) uint64 {
elem := p.v.Index(i)
switch elem.Kind() {
case reflect.Int64:
return uint64(elem.Int())
case reflect.Uint64:
return uint64(elem.Uint())
case reflect.Float64:
return math.Float64bits(float64(elem.Float()))
}
panic("unreachable")
}
func structPointer_Word64Slice(p structPointer, f field) word64Slice {
return word64Slice{structPointer_field(p, f)}
}

View File

@ -0,0 +1,266 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2012 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// +build !appengine
// This file contains the implementation of the proto field accesses using package unsafe.
package proto
import (
"reflect"
"unsafe"
)
// NOTE: These type_Foo functions would more idiomatically be methods,
// but Go does not allow methods on pointer types, and we must preserve
// some pointer type for the garbage collector. We use these
// funcs with clunky names as our poor approximation to methods.
//
// An alternative would be
// type structPointer struct { p unsafe.Pointer }
// but that does not registerize as well.
// A structPointer is a pointer to a struct.
type structPointer unsafe.Pointer
// toStructPointer returns a structPointer equivalent to the given reflect value.
func toStructPointer(v reflect.Value) structPointer {
return structPointer(unsafe.Pointer(v.Pointer()))
}
// IsNil reports whether p is nil.
func structPointer_IsNil(p structPointer) bool {
return p == nil
}
// Interface returns the struct pointer, assumed to have element type t,
// as an interface value.
func structPointer_Interface(p structPointer, t reflect.Type) interface{} {
return reflect.NewAt(t, unsafe.Pointer(p)).Interface()
}
// A field identifies a field in a struct, accessible from a structPointer.
// In this implementation, a field is identified by its byte offset from the start of the struct.
type field uintptr
// toField returns a field equivalent to the given reflect field.
func toField(f *reflect.StructField) field {
return field(f.Offset)
}
// invalidField is an invalid field identifier.
const invalidField = ^field(0)
// IsValid reports whether the field identifier is valid.
func (f field) IsValid() bool {
return f != ^field(0)
}
// Bytes returns the address of a []byte field in the struct.
func structPointer_Bytes(p structPointer, f field) *[]byte {
return (*[]byte)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// BytesSlice returns the address of a [][]byte field in the struct.
func structPointer_BytesSlice(p structPointer, f field) *[][]byte {
return (*[][]byte)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// Bool returns the address of a *bool field in the struct.
func structPointer_Bool(p structPointer, f field) **bool {
return (**bool)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// BoolVal returns the address of a bool field in the struct.
func structPointer_BoolVal(p structPointer, f field) *bool {
return (*bool)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// BoolSlice returns the address of a []bool field in the struct.
func structPointer_BoolSlice(p structPointer, f field) *[]bool {
return (*[]bool)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// String returns the address of a *string field in the struct.
func structPointer_String(p structPointer, f field) **string {
return (**string)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// StringVal returns the address of a string field in the struct.
func structPointer_StringVal(p structPointer, f field) *string {
return (*string)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// StringSlice returns the address of a []string field in the struct.
func structPointer_StringSlice(p structPointer, f field) *[]string {
return (*[]string)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// ExtMap returns the address of an extension map field in the struct.
func structPointer_ExtMap(p structPointer, f field) *map[int32]Extension {
return (*map[int32]Extension)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// NewAt returns the reflect.Value for a pointer to a field in the struct.
func structPointer_NewAt(p structPointer, f field, typ reflect.Type) reflect.Value {
return reflect.NewAt(typ, unsafe.Pointer(uintptr(p)+uintptr(f)))
}
// SetStructPointer writes a *struct field in the struct.
func structPointer_SetStructPointer(p structPointer, f field, q structPointer) {
*(*structPointer)(unsafe.Pointer(uintptr(p) + uintptr(f))) = q
}
// GetStructPointer reads a *struct field in the struct.
func structPointer_GetStructPointer(p structPointer, f field) structPointer {
return *(*structPointer)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// StructPointerSlice the address of a []*struct field in the struct.
func structPointer_StructPointerSlice(p structPointer, f field) *structPointerSlice {
return (*structPointerSlice)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// A structPointerSlice represents a slice of pointers to structs (themselves submessages or groups).
type structPointerSlice []structPointer
func (v *structPointerSlice) Len() int { return len(*v) }
func (v *structPointerSlice) Index(i int) structPointer { return (*v)[i] }
func (v *structPointerSlice) Append(p structPointer) { *v = append(*v, p) }
// A word32 is the address of a "pointer to 32-bit value" field.
type word32 **uint32
// IsNil reports whether *v is nil.
func word32_IsNil(p word32) bool {
return *p == nil
}
// Set sets *v to point at a newly allocated word set to x.
func word32_Set(p word32, o *Buffer, x uint32) {
if len(o.uint32s) == 0 {
o.uint32s = make([]uint32, uint32PoolSize)
}
o.uint32s[0] = x
*p = &o.uint32s[0]
o.uint32s = o.uint32s[1:]
}
// Get gets the value pointed at by *v.
func word32_Get(p word32) uint32 {
return **p
}
// Word32 returns the address of a *int32, *uint32, *float32, or *enum field in the struct.
func structPointer_Word32(p structPointer, f field) word32 {
return word32((**uint32)(unsafe.Pointer(uintptr(p) + uintptr(f))))
}
// A word32Val is the address of a 32-bit value field.
type word32Val *uint32
// Set sets *p to x.
func word32Val_Set(p word32Val, x uint32) {
*p = x
}
// Get gets the value pointed at by p.
func word32Val_Get(p word32Val) uint32 {
return *p
}
// Word32Val returns the address of a *int32, *uint32, *float32, or *enum field in the struct.
func structPointer_Word32Val(p structPointer, f field) word32Val {
return word32Val((*uint32)(unsafe.Pointer(uintptr(p) + uintptr(f))))
}
// A word32Slice is a slice of 32-bit values.
type word32Slice []uint32
func (v *word32Slice) Append(x uint32) { *v = append(*v, x) }
func (v *word32Slice) Len() int { return len(*v) }
func (v *word32Slice) Index(i int) uint32 { return (*v)[i] }
// Word32Slice returns the address of a []int32, []uint32, []float32, or []enum field in the struct.
func structPointer_Word32Slice(p structPointer, f field) *word32Slice {
return (*word32Slice)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}
// word64 is like word32 but for 64-bit values.
type word64 **uint64
func word64_Set(p word64, o *Buffer, x uint64) {
if len(o.uint64s) == 0 {
o.uint64s = make([]uint64, uint64PoolSize)
}
o.uint64s[0] = x
*p = &o.uint64s[0]
o.uint64s = o.uint64s[1:]
}
func word64_IsNil(p word64) bool {
return *p == nil
}
func word64_Get(p word64) uint64 {
return **p
}
func structPointer_Word64(p structPointer, f field) word64 {
return word64((**uint64)(unsafe.Pointer(uintptr(p) + uintptr(f))))
}
// word64Val is like word32Val but for 64-bit values.
type word64Val *uint64
func word64Val_Set(p word64Val, o *Buffer, x uint64) {
*p = x
}
func word64Val_Get(p word64Val) uint64 {
return *p
}
func structPointer_Word64Val(p structPointer, f field) word64Val {
return word64Val((*uint64)(unsafe.Pointer(uintptr(p) + uintptr(f))))
}
// word64Slice is like word32Slice but for 64-bit values.
type word64Slice []uint64
func (v *word64Slice) Append(x uint64) { *v = append(*v, x) }
func (v *word64Slice) Len() int { return len(*v) }
func (v *word64Slice) Index(i int) uint64 { return (*v)[i] }
func structPointer_Word64Slice(p structPointer, f field) *word64Slice {
return (*word64Slice)(unsafe.Pointer(uintptr(p) + uintptr(f)))
}

View File

@ -0,0 +1,827 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2010 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package proto
/*
* Routines for encoding data into the wire format for protocol buffers.
*/
import (
"fmt"
"log"
"os"
"reflect"
"sort"
"strconv"
"strings"
"sync"
)
const debug bool = false
// Constants that identify the encoding of a value on the wire.
const (
WireVarint = 0
WireFixed64 = 1
WireBytes = 2
WireStartGroup = 3
WireEndGroup = 4
WireFixed32 = 5
)
const startSize = 10 // initial slice/string sizes
// Encoders are defined in encode.go
// An encoder outputs the full representation of a field, including its
// tag and encoder type.
type encoder func(p *Buffer, prop *Properties, base structPointer) error
// A valueEncoder encodes a single integer in a particular encoding.
type valueEncoder func(o *Buffer, x uint64) error
// Sizers are defined in encode.go
// A sizer returns the encoded size of a field, including its tag and encoder
// type.
type sizer func(prop *Properties, base structPointer) int
// A valueSizer returns the encoded size of a single integer in a particular
// encoding.
type valueSizer func(x uint64) int
// Decoders are defined in decode.go
// A decoder creates a value from its wire representation.
// Unrecognized subelements are saved in unrec.
type decoder func(p *Buffer, prop *Properties, base structPointer) error
// A valueDecoder decodes a single integer in a particular encoding.
type valueDecoder func(o *Buffer) (x uint64, err error)
// A oneofMarshaler does the marshaling for all oneof fields in a message.
type oneofMarshaler func(Message, *Buffer) error
// A oneofUnmarshaler does the unmarshaling for a oneof field in a message.
type oneofUnmarshaler func(Message, int, int, *Buffer) (bool, error)
// tagMap is an optimization over map[int]int for typical protocol buffer
// use-cases. Encoded protocol buffers are often in tag order with small tag
// numbers.
type tagMap struct {
fastTags []int
slowTags map[int]int
}
// tagMapFastLimit is the upper bound on the tag number that will be stored in
// the tagMap slice rather than its map.
const tagMapFastLimit = 1024
func (p *tagMap) get(t int) (int, bool) {
if t > 0 && t < tagMapFastLimit {
if t >= len(p.fastTags) {
return 0, false
}
fi := p.fastTags[t]
return fi, fi >= 0
}
fi, ok := p.slowTags[t]
return fi, ok
}
func (p *tagMap) put(t int, fi int) {
if t > 0 && t < tagMapFastLimit {
for len(p.fastTags) < t+1 {
p.fastTags = append(p.fastTags, -1)
}
p.fastTags[t] = fi
return
}
if p.slowTags == nil {
p.slowTags = make(map[int]int)
}
p.slowTags[t] = fi
}
// StructProperties represents properties for all the fields of a struct.
// decoderTags and decoderOrigNames should only be used by the decoder.
type StructProperties struct {
Prop []*Properties // properties for each field
reqCount int // required count
decoderTags tagMap // map from proto tag to struct field number
decoderOrigNames map[string]int // map from original name to struct field number
order []int // list of struct field numbers in tag order
unrecField field // field id of the XXX_unrecognized []byte field
extendable bool // is this an extendable proto
oneofMarshaler oneofMarshaler
oneofUnmarshaler oneofUnmarshaler
stype reflect.Type
// OneofTypes contains information about the oneof fields in this message.
// It is keyed by the original name of a field.
OneofTypes map[string]*OneofProperties
}
// OneofProperties represents information about a specific field in a oneof.
type OneofProperties struct {
Type reflect.Type // pointer to generated struct type for this oneof field
Field int // struct field number of the containing oneof in the message
Prop *Properties
}
// Implement the sorting interface so we can sort the fields in tag order, as recommended by the spec.
// See encode.go, (*Buffer).enc_struct.
func (sp *StructProperties) Len() int { return len(sp.order) }
func (sp *StructProperties) Less(i, j int) bool {
return sp.Prop[sp.order[i]].Tag < sp.Prop[sp.order[j]].Tag
}
func (sp *StructProperties) Swap(i, j int) { sp.order[i], sp.order[j] = sp.order[j], sp.order[i] }
// Properties represents the protocol-specific behavior of a single struct field.
type Properties struct {
Name string // name of the field, for error messages
OrigName string // original name before protocol compiler (always set)
Wire string
WireType int
Tag int
Required bool
Optional bool
Repeated bool
Packed bool // relevant for repeated primitives only
Enum string // set for enum types only
proto3 bool // whether this is known to be a proto3 field; set for []byte only
oneof bool // whether this is a oneof field
Default string // default value
HasDefault bool // whether an explicit default was provided
def_uint64 uint64
enc encoder
valEnc valueEncoder // set for bool and numeric types only
field field
tagcode []byte // encoding of EncodeVarint((Tag<<3)|WireType)
tagbuf [8]byte
stype reflect.Type // set for struct types only
sprop *StructProperties // set for struct types only
isMarshaler bool
isUnmarshaler bool
mtype reflect.Type // set for map types only
mkeyprop *Properties // set for map types only
mvalprop *Properties // set for map types only
size sizer
valSize valueSizer // set for bool and numeric types only
dec decoder
valDec valueDecoder // set for bool and numeric types only
// If this is a packable field, this will be the decoder for the packed version of the field.
packedDec decoder
}
// String formats the properties in the protobuf struct field tag style.
func (p *Properties) String() string {
s := p.Wire
s = ","
s += strconv.Itoa(p.Tag)
if p.Required {
s += ",req"
}
if p.Optional {
s += ",opt"
}
if p.Repeated {
s += ",rep"
}
if p.Packed {
s += ",packed"
}
if p.OrigName != p.Name {
s += ",name=" + p.OrigName
}
if p.proto3 {
s += ",proto3"
}
if p.oneof {
s += ",oneof"
}
if len(p.Enum) > 0 {
s += ",enum=" + p.Enum
}
if p.HasDefault {
s += ",def=" + p.Default
}
return s
}
// Parse populates p by parsing a string in the protobuf struct field tag style.
func (p *Properties) Parse(s string) {
// "bytes,49,opt,name=foo,def=hello!"
fields := strings.Split(s, ",") // breaks def=, but handled below.
if len(fields) < 2 {
fmt.Fprintf(os.Stderr, "proto: tag has too few fields: %q\n", s)
return
}
p.Wire = fields[0]
switch p.Wire {
case "varint":
p.WireType = WireVarint
p.valEnc = (*Buffer).EncodeVarint
p.valDec = (*Buffer).DecodeVarint
p.valSize = sizeVarint
case "fixed32":
p.WireType = WireFixed32
p.valEnc = (*Buffer).EncodeFixed32
p.valDec = (*Buffer).DecodeFixed32
p.valSize = sizeFixed32
case "fixed64":
p.WireType = WireFixed64
p.valEnc = (*Buffer).EncodeFixed64
p.valDec = (*Buffer).DecodeFixed64
p.valSize = sizeFixed64
case "zigzag32":
p.WireType = WireVarint
p.valEnc = (*Buffer).EncodeZigzag32
p.valDec = (*Buffer).DecodeZigzag32
p.valSize = sizeZigzag32
case "zigzag64":
p.WireType = WireVarint
p.valEnc = (*Buffer).EncodeZigzag64
p.valDec = (*Buffer).DecodeZigzag64
p.valSize = sizeZigzag64
case "bytes", "group":
p.WireType = WireBytes
// no numeric converter for non-numeric types
default:
fmt.Fprintf(os.Stderr, "proto: tag has unknown wire type: %q\n", s)
return
}
var err error
p.Tag, err = strconv.Atoi(fields[1])
if err != nil {
return
}
for i := 2; i < len(fields); i++ {
f := fields[i]
switch {
case f == "req":
p.Required = true
case f == "opt":
p.Optional = true
case f == "rep":
p.Repeated = true
case f == "packed":
p.Packed = true
case strings.HasPrefix(f, "name="):
p.OrigName = f[5:]
case strings.HasPrefix(f, "enum="):
p.Enum = f[5:]
case f == "proto3":
p.proto3 = true
case f == "oneof":
p.oneof = true
case strings.HasPrefix(f, "def="):
p.HasDefault = true
p.Default = f[4:] // rest of string
if i+1 < len(fields) {
// Commas aren't escaped, and def is always last.
p.Default += "," + strings.Join(fields[i+1:], ",")
break
}
}
}
}
func logNoSliceEnc(t1, t2 reflect.Type) {
fmt.Fprintf(os.Stderr, "proto: no slice oenc for %T = []%T\n", t1, t2)
}
var protoMessageType = reflect.TypeOf((*Message)(nil)).Elem()
// Initialize the fields for encoding and decoding.
func (p *Properties) setEncAndDec(typ reflect.Type, f *reflect.StructField, lockGetProp bool) {
p.enc = nil
p.dec = nil
p.size = nil
switch t1 := typ; t1.Kind() {
default:
fmt.Fprintf(os.Stderr, "proto: no coders for %v\n", t1)
// proto3 scalar types
case reflect.Bool:
p.enc = (*Buffer).enc_proto3_bool
p.dec = (*Buffer).dec_proto3_bool
p.size = size_proto3_bool
case reflect.Int32:
p.enc = (*Buffer).enc_proto3_int32
p.dec = (*Buffer).dec_proto3_int32
p.size = size_proto3_int32
case reflect.Uint32:
p.enc = (*Buffer).enc_proto3_uint32
p.dec = (*Buffer).dec_proto3_int32 // can reuse
p.size = size_proto3_uint32
case reflect.Int64, reflect.Uint64:
p.enc = (*Buffer).enc_proto3_int64
p.dec = (*Buffer).dec_proto3_int64
p.size = size_proto3_int64
case reflect.Float32:
p.enc = (*Buffer).enc_proto3_uint32 // can just treat them as bits
p.dec = (*Buffer).dec_proto3_int32
p.size = size_proto3_uint32
case reflect.Float64:
p.enc = (*Buffer).enc_proto3_int64 // can just treat them as bits
p.dec = (*Buffer).dec_proto3_int64
p.size = size_proto3_int64
case reflect.String:
p.enc = (*Buffer).enc_proto3_string
p.dec = (*Buffer).dec_proto3_string
p.size = size_proto3_string
case reflect.Ptr:
switch t2 := t1.Elem(); t2.Kind() {
default:
fmt.Fprintf(os.Stderr, "proto: no encoder function for %v -> %v\n", t1, t2)
break
case reflect.Bool:
p.enc = (*Buffer).enc_bool
p.dec = (*Buffer).dec_bool
p.size = size_bool
case reflect.Int32:
p.enc = (*Buffer).enc_int32
p.dec = (*Buffer).dec_int32
p.size = size_int32
case reflect.Uint32:
p.enc = (*Buffer).enc_uint32
p.dec = (*Buffer).dec_int32 // can reuse
p.size = size_uint32
case reflect.Int64, reflect.Uint64:
p.enc = (*Buffer).enc_int64
p.dec = (*Buffer).dec_int64
p.size = size_int64
case reflect.Float32:
p.enc = (*Buffer).enc_uint32 // can just treat them as bits
p.dec = (*Buffer).dec_int32
p.size = size_uint32
case reflect.Float64:
p.enc = (*Buffer).enc_int64 // can just treat them as bits
p.dec = (*Buffer).dec_int64
p.size = size_int64
case reflect.String:
p.enc = (*Buffer).enc_string
p.dec = (*Buffer).dec_string
p.size = size_string
case reflect.Struct:
p.stype = t1.Elem()
p.isMarshaler = isMarshaler(t1)
p.isUnmarshaler = isUnmarshaler(t1)
if p.Wire == "bytes" {
p.enc = (*Buffer).enc_struct_message
p.dec = (*Buffer).dec_struct_message
p.size = size_struct_message
} else {
p.enc = (*Buffer).enc_struct_group
p.dec = (*Buffer).dec_struct_group
p.size = size_struct_group
}
}
case reflect.Slice:
switch t2 := t1.Elem(); t2.Kind() {
default:
logNoSliceEnc(t1, t2)
break
case reflect.Bool:
if p.Packed {
p.enc = (*Buffer).enc_slice_packed_bool
p.size = size_slice_packed_bool
} else {
p.enc = (*Buffer).enc_slice_bool
p.size = size_slice_bool
}
p.dec = (*Buffer).dec_slice_bool
p.packedDec = (*Buffer).dec_slice_packed_bool
case reflect.Int32:
if p.Packed {
p.enc = (*Buffer).enc_slice_packed_int32
p.size = size_slice_packed_int32
} else {
p.enc = (*Buffer).enc_slice_int32
p.size = size_slice_int32
}
p.dec = (*Buffer).dec_slice_int32
p.packedDec = (*Buffer).dec_slice_packed_int32
case reflect.Uint32:
if p.Packed {
p.enc = (*Buffer).enc_slice_packed_uint32
p.size = size_slice_packed_uint32
} else {
p.enc = (*Buffer).enc_slice_uint32
p.size = size_slice_uint32
}
p.dec = (*Buffer).dec_slice_int32
p.packedDec = (*Buffer).dec_slice_packed_int32
case reflect.Int64, reflect.Uint64:
if p.Packed {
p.enc = (*Buffer).enc_slice_packed_int64
p.size = size_slice_packed_int64
} else {
p.enc = (*Buffer).enc_slice_int64
p.size = size_slice_int64
}
p.dec = (*Buffer).dec_slice_int64
p.packedDec = (*Buffer).dec_slice_packed_int64
case reflect.Uint8:
p.enc = (*Buffer).enc_slice_byte
p.dec = (*Buffer).dec_slice_byte
p.size = size_slice_byte
// This is a []byte, which is either a bytes field,
// or the value of a map field. In the latter case,
// we always encode an empty []byte, so we should not
// use the proto3 enc/size funcs.
// f == nil iff this is the key/value of a map field.
if p.proto3 && f != nil {
p.enc = (*Buffer).enc_proto3_slice_byte
p.size = size_proto3_slice_byte
}
case reflect.Float32, reflect.Float64:
switch t2.Bits() {
case 32:
// can just treat them as bits
if p.Packed {
p.enc = (*Buffer).enc_slice_packed_uint32
p.size = size_slice_packed_uint32
} else {
p.enc = (*Buffer).enc_slice_uint32
p.size = size_slice_uint32
}
p.dec = (*Buffer).dec_slice_int32
p.packedDec = (*Buffer).dec_slice_packed_int32
case 64:
// can just treat them as bits
if p.Packed {
p.enc = (*Buffer).enc_slice_packed_int64
p.size = size_slice_packed_int64
} else {
p.enc = (*Buffer).enc_slice_int64
p.size = size_slice_int64
}
p.dec = (*Buffer).dec_slice_int64
p.packedDec = (*Buffer).dec_slice_packed_int64
default:
logNoSliceEnc(t1, t2)
break
}
case reflect.String:
p.enc = (*Buffer).enc_slice_string
p.dec = (*Buffer).dec_slice_string
p.size = size_slice_string
case reflect.Ptr:
switch t3 := t2.Elem(); t3.Kind() {
default:
fmt.Fprintf(os.Stderr, "proto: no ptr oenc for %T -> %T -> %T\n", t1, t2, t3)
break
case reflect.Struct:
p.stype = t2.Elem()
p.isMarshaler = isMarshaler(t2)
p.isUnmarshaler = isUnmarshaler(t2)
if p.Wire == "bytes" {
p.enc = (*Buffer).enc_slice_struct_message
p.dec = (*Buffer).dec_slice_struct_message
p.size = size_slice_struct_message
} else {
p.enc = (*Buffer).enc_slice_struct_group
p.dec = (*Buffer).dec_slice_struct_group
p.size = size_slice_struct_group
}
}
case reflect.Slice:
switch t2.Elem().Kind() {
default:
fmt.Fprintf(os.Stderr, "proto: no slice elem oenc for %T -> %T -> %T\n", t1, t2, t2.Elem())
break
case reflect.Uint8:
p.enc = (*Buffer).enc_slice_slice_byte
p.dec = (*Buffer).dec_slice_slice_byte
p.size = size_slice_slice_byte
}
}
case reflect.Map:
p.enc = (*Buffer).enc_new_map
p.dec = (*Buffer).dec_new_map
p.size = size_new_map
p.mtype = t1
p.mkeyprop = &Properties{}
p.mkeyprop.init(reflect.PtrTo(p.mtype.Key()), "Key", f.Tag.Get("protobuf_key"), nil, lockGetProp)
p.mvalprop = &Properties{}
vtype := p.mtype.Elem()
if vtype.Kind() != reflect.Ptr && vtype.Kind() != reflect.Slice {
// The value type is not a message (*T) or bytes ([]byte),
// so we need encoders for the pointer to this type.
vtype = reflect.PtrTo(vtype)
}
p.mvalprop.init(vtype, "Value", f.Tag.Get("protobuf_val"), nil, lockGetProp)
}
// precalculate tag code
wire := p.WireType
if p.Packed {
wire = WireBytes
}
x := uint32(p.Tag)<<3 | uint32(wire)
i := 0
for i = 0; x > 127; i++ {
p.tagbuf[i] = 0x80 | uint8(x&0x7F)
x >>= 7
}
p.tagbuf[i] = uint8(x)
p.tagcode = p.tagbuf[0 : i+1]
if p.stype != nil {
if lockGetProp {
p.sprop = GetProperties(p.stype)
} else {
p.sprop = getPropertiesLocked(p.stype)
}
}
}
var (
marshalerType = reflect.TypeOf((*Marshaler)(nil)).Elem()
unmarshalerType = reflect.TypeOf((*Unmarshaler)(nil)).Elem()
)
// isMarshaler reports whether type t implements Marshaler.
func isMarshaler(t reflect.Type) bool {
// We're checking for (likely) pointer-receiver methods
// so if t is not a pointer, something is very wrong.
// The calls above only invoke isMarshaler on pointer types.
if t.Kind() != reflect.Ptr {
panic("proto: misuse of isMarshaler")
}
return t.Implements(marshalerType)
}
// isUnmarshaler reports whether type t implements Unmarshaler.
func isUnmarshaler(t reflect.Type) bool {
// We're checking for (likely) pointer-receiver methods
// so if t is not a pointer, something is very wrong.
// The calls above only invoke isUnmarshaler on pointer types.
if t.Kind() != reflect.Ptr {
panic("proto: misuse of isUnmarshaler")
}
return t.Implements(unmarshalerType)
}
// Init populates the properties from a protocol buffer struct tag.
func (p *Properties) Init(typ reflect.Type, name, tag string, f *reflect.StructField) {
p.init(typ, name, tag, f, true)
}
func (p *Properties) init(typ reflect.Type, name, tag string, f *reflect.StructField, lockGetProp bool) {
// "bytes,49,opt,def=hello!"
p.Name = name
p.OrigName = name
if f != nil {
p.field = toField(f)
}
if tag == "" {
return
}
p.Parse(tag)
p.setEncAndDec(typ, f, lockGetProp)
}
var (
propertiesMu sync.RWMutex
propertiesMap = make(map[reflect.Type]*StructProperties)
)
// GetProperties returns the list of properties for the type represented by t.
// t must represent a generated struct type of a protocol message.
func GetProperties(t reflect.Type) *StructProperties {
if t.Kind() != reflect.Struct {
panic("proto: type must have kind struct")
}
// Most calls to GetProperties in a long-running program will be
// retrieving details for types we have seen before.
propertiesMu.RLock()
sprop, ok := propertiesMap[t]
propertiesMu.RUnlock()
if ok {
if collectStats {
stats.Chit++
}
return sprop
}
propertiesMu.Lock()
sprop = getPropertiesLocked(t)
propertiesMu.Unlock()
return sprop
}
// getPropertiesLocked requires that propertiesMu is held.
func getPropertiesLocked(t reflect.Type) *StructProperties {
if prop, ok := propertiesMap[t]; ok {
if collectStats {
stats.Chit++
}
return prop
}
if collectStats {
stats.Cmiss++
}
prop := new(StructProperties)
// in case of recursive protos, fill this in now.
propertiesMap[t] = prop
// build properties
prop.extendable = reflect.PtrTo(t).Implements(extendableProtoType)
prop.unrecField = invalidField
prop.Prop = make([]*Properties, t.NumField())
prop.order = make([]int, t.NumField())
for i := 0; i < t.NumField(); i++ {
f := t.Field(i)
p := new(Properties)
name := f.Name
p.init(f.Type, name, f.Tag.Get("protobuf"), &f, false)
if f.Name == "XXX_extensions" { // special case
p.enc = (*Buffer).enc_map
p.dec = nil // not needed
p.size = size_map
}
if f.Name == "XXX_unrecognized" { // special case
prop.unrecField = toField(&f)
}
oneof := f.Tag.Get("protobuf_oneof") != "" // special case
prop.Prop[i] = p
prop.order[i] = i
if debug {
print(i, " ", f.Name, " ", t.String(), " ")
if p.Tag > 0 {
print(p.String())
}
print("\n")
}
if p.enc == nil && !strings.HasPrefix(f.Name, "XXX_") && !oneof {
fmt.Fprintln(os.Stderr, "proto: no encoder for", f.Name, f.Type.String(), "[GetProperties]")
}
}
// Re-order prop.order.
sort.Sort(prop)
type oneofMessage interface {
XXX_OneofFuncs() (func(Message, *Buffer) error, func(Message, int, int, *Buffer) (bool, error), []interface{})
}
if om, ok := reflect.Zero(reflect.PtrTo(t)).Interface().(oneofMessage); ok {
var oots []interface{}
prop.oneofMarshaler, prop.oneofUnmarshaler, oots = om.XXX_OneofFuncs()
prop.stype = t
// Interpret oneof metadata.
prop.OneofTypes = make(map[string]*OneofProperties)
for _, oot := range oots {
oop := &OneofProperties{
Type: reflect.ValueOf(oot).Type(), // *T
Prop: new(Properties),
}
sft := oop.Type.Elem().Field(0)
oop.Prop.Name = sft.Name
oop.Prop.Parse(sft.Tag.Get("protobuf"))
// There will be exactly one interface field that
// this new value is assignable to.
for i := 0; i < t.NumField(); i++ {
f := t.Field(i)
if f.Type.Kind() != reflect.Interface {
continue
}
if !oop.Type.AssignableTo(f.Type) {
continue
}
oop.Field = i
break
}
prop.OneofTypes[oop.Prop.OrigName] = oop
}
}
// build required counts
// build tags
reqCount := 0
prop.decoderOrigNames = make(map[string]int)
for i, p := range prop.Prop {
if strings.HasPrefix(p.Name, "XXX_") {
// Internal fields should not appear in tags/origNames maps.
// They are handled specially when encoding and decoding.
continue
}
if p.Required {
reqCount++
}
prop.decoderTags.put(p.Tag, i)
prop.decoderOrigNames[p.OrigName] = i
}
prop.reqCount = reqCount
return prop
}
// Return the Properties object for the x[0]'th field of the structure.
func propByIndex(t reflect.Type, x []int) *Properties {
if len(x) != 1 {
fmt.Fprintf(os.Stderr, "proto: field index dimension %d (not 1) for type %s\n", len(x), t)
return nil
}
prop := GetProperties(t)
return prop.Prop[x[0]]
}
// Get the address and type of a pointer to a struct from an interface.
func getbase(pb Message) (t reflect.Type, b structPointer, err error) {
if pb == nil {
err = ErrNil
return
}
// get the reflect type of the pointer to the struct.
t = reflect.TypeOf(pb)
// get the address of the struct.
value := reflect.ValueOf(pb)
b = toStructPointer(value)
return
}
// A global registry of enum types.
// The generated code will register the generated maps by calling RegisterEnum.
var enumValueMaps = make(map[string]map[string]int32)
// RegisterEnum is called from the generated code to install the enum descriptor
// maps into the global table to aid parsing text format protocol buffers.
func RegisterEnum(typeName string, unusedNameMap map[int32]string, valueMap map[string]int32) {
if _, ok := enumValueMaps[typeName]; ok {
panic("proto: duplicate enum registered: " + typeName)
}
enumValueMaps[typeName] = valueMap
}
// EnumValueMap returns the mapping from names to integers of the
// enum type enumType, or a nil if not found.
func EnumValueMap(enumType string) map[string]int32 {
return enumValueMaps[enumType]
}
// A registry of all linked message types.
// The key is a fully-qualified proto name ("pkg.Message").
var protoTypes = make(map[string]reflect.Type)
// RegisterType is called from generated code and maps from the fully qualified
// proto name to the type (pointer to struct) of the protocol buffer.
func RegisterType(x interface{}, name string) {
if _, ok := protoTypes[name]; ok {
// TODO: Some day, make this a panic.
log.Printf("proto: duplicate proto type registered: %s", name)
return
}
protoTypes[name] = reflect.TypeOf(x)
}

View File

@ -0,0 +1,122 @@
// Code generated by protoc-gen-go.
// source: proto3_proto/proto3.proto
// DO NOT EDIT!
/*
Package proto3_proto is a generated protocol buffer package.
It is generated from these files:
proto3_proto/proto3.proto
It has these top-level messages:
Message
Nested
MessageWithMap
*/
package proto3_proto
import proto "github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/golang/protobuf/proto"
import testdata "github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/golang/protobuf/proto/testdata"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
type Message_Humour int32
const (
Message_UNKNOWN Message_Humour = 0
Message_PUNS Message_Humour = 1
Message_SLAPSTICK Message_Humour = 2
Message_BILL_BAILEY Message_Humour = 3
)
var Message_Humour_name = map[int32]string{
0: "UNKNOWN",
1: "PUNS",
2: "SLAPSTICK",
3: "BILL_BAILEY",
}
var Message_Humour_value = map[string]int32{
"UNKNOWN": 0,
"PUNS": 1,
"SLAPSTICK": 2,
"BILL_BAILEY": 3,
}
func (x Message_Humour) String() string {
return proto.EnumName(Message_Humour_name, int32(x))
}
type Message struct {
Name string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
Hilarity Message_Humour `protobuf:"varint,2,opt,name=hilarity,enum=proto3_proto.Message_Humour" json:"hilarity,omitempty"`
HeightInCm uint32 `protobuf:"varint,3,opt,name=height_in_cm" json:"height_in_cm,omitempty"`
Data []byte `protobuf:"bytes,4,opt,name=data,proto3" json:"data,omitempty"`
ResultCount int64 `protobuf:"varint,7,opt,name=result_count" json:"result_count,omitempty"`
TrueScotsman bool `protobuf:"varint,8,opt,name=true_scotsman" json:"true_scotsman,omitempty"`
Score float32 `protobuf:"fixed32,9,opt,name=score" json:"score,omitempty"`
Key []uint64 `protobuf:"varint,5,rep,name=key" json:"key,omitempty"`
Nested *Nested `protobuf:"bytes,6,opt,name=nested" json:"nested,omitempty"`
Terrain map[string]*Nested `protobuf:"bytes,10,rep,name=terrain" json:"terrain,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
Proto2Field *testdata.SubDefaults `protobuf:"bytes,11,opt,name=proto2_field" json:"proto2_field,omitempty"`
Proto2Value map[string]*testdata.SubDefaults `protobuf:"bytes,13,rep,name=proto2_value" json:"proto2_value,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
}
func (m *Message) Reset() { *m = Message{} }
func (m *Message) String() string { return proto.CompactTextString(m) }
func (*Message) ProtoMessage() {}
func (m *Message) GetNested() *Nested {
if m != nil {
return m.Nested
}
return nil
}
func (m *Message) GetTerrain() map[string]*Nested {
if m != nil {
return m.Terrain
}
return nil
}
func (m *Message) GetProto2Field() *testdata.SubDefaults {
if m != nil {
return m.Proto2Field
}
return nil
}
func (m *Message) GetProto2Value() map[string]*testdata.SubDefaults {
if m != nil {
return m.Proto2Value
}
return nil
}
type Nested struct {
Bunny string `protobuf:"bytes,1,opt,name=bunny" json:"bunny,omitempty"`
}
func (m *Nested) Reset() { *m = Nested{} }
func (m *Nested) String() string { return proto.CompactTextString(m) }
func (*Nested) ProtoMessage() {}
type MessageWithMap struct {
ByteMapping map[bool][]byte `protobuf:"bytes,1,rep,name=byte_mapping" json:"byte_mapping,omitempty" protobuf_key:"varint,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value,proto3"`
}
func (m *MessageWithMap) Reset() { *m = MessageWithMap{} }
func (m *MessageWithMap) String() string { return proto.CompactTextString(m) }
func (*MessageWithMap) ProtoMessage() {}
func (m *MessageWithMap) GetByteMapping() map[bool][]byte {
if m != nil {
return m.ByteMapping
}
return nil
}
func init() {
proto.RegisterEnum("proto3_proto.Message_Humour", Message_Humour_name, Message_Humour_value)
}

View File

@ -0,0 +1,68 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2014 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
import "testdata/test.proto";
package proto3_proto;
message Message {
enum Humour {
UNKNOWN = 0;
PUNS = 1;
SLAPSTICK = 2;
BILL_BAILEY = 3;
}
string name = 1;
Humour hilarity = 2;
uint32 height_in_cm = 3;
bytes data = 4;
int64 result_count = 7;
bool true_scotsman = 8;
float score = 9;
repeated uint64 key = 5;
Nested nested = 6;
map<string, Nested> terrain = 10;
testdata.SubDefaults proto2_field = 11;
map<string, testdata.SubDefaults> proto2_value = 13;
}
message Nested {
string bunny = 1;
}
message MessageWithMap {
map<bool, bytes> byte_mapping = 1;
}

View File

@ -0,0 +1,797 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2010 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package proto
// Functions for writing the text protocol buffer format.
import (
"bufio"
"bytes"
"encoding"
"errors"
"fmt"
"io"
"log"
"math"
"reflect"
"sort"
"strings"
)
var (
newline = []byte("\n")
spaces = []byte(" ")
gtNewline = []byte(">\n")
endBraceNewline = []byte("}\n")
backslashN = []byte{'\\', 'n'}
backslashR = []byte{'\\', 'r'}
backslashT = []byte{'\\', 't'}
backslashDQ = []byte{'\\', '"'}
backslashBS = []byte{'\\', '\\'}
posInf = []byte("inf")
negInf = []byte("-inf")
nan = []byte("nan")
)
type writer interface {
io.Writer
WriteByte(byte) error
}
// textWriter is an io.Writer that tracks its indentation level.
type textWriter struct {
ind int
complete bool // if the current position is a complete line
compact bool // whether to write out as a one-liner
w writer
}
func (w *textWriter) WriteString(s string) (n int, err error) {
if !strings.Contains(s, "\n") {
if !w.compact && w.complete {
w.writeIndent()
}
w.complete = false
return io.WriteString(w.w, s)
}
// WriteString is typically called without newlines, so this
// codepath and its copy are rare. We copy to avoid
// duplicating all of Write's logic here.
return w.Write([]byte(s))
}
func (w *textWriter) Write(p []byte) (n int, err error) {
newlines := bytes.Count(p, newline)
if newlines == 0 {
if !w.compact && w.complete {
w.writeIndent()
}
n, err = w.w.Write(p)
w.complete = false
return n, err
}
frags := bytes.SplitN(p, newline, newlines+1)
if w.compact {
for i, frag := range frags {
if i > 0 {
if err := w.w.WriteByte(' '); err != nil {
return n, err
}
n++
}
nn, err := w.w.Write(frag)
n += nn
if err != nil {
return n, err
}
}
return n, nil
}
for i, frag := range frags {
if w.complete {
w.writeIndent()
}
nn, err := w.w.Write(frag)
n += nn
if err != nil {
return n, err
}
if i+1 < len(frags) {
if err := w.w.WriteByte('\n'); err != nil {
return n, err
}
n++
}
}
w.complete = len(frags[len(frags)-1]) == 0
return n, nil
}
func (w *textWriter) WriteByte(c byte) error {
if w.compact && c == '\n' {
c = ' '
}
if !w.compact && w.complete {
w.writeIndent()
}
err := w.w.WriteByte(c)
w.complete = c == '\n'
return err
}
func (w *textWriter) indent() { w.ind++ }
func (w *textWriter) unindent() {
if w.ind == 0 {
log.Printf("proto: textWriter unindented too far")
return
}
w.ind--
}
func writeName(w *textWriter, props *Properties) error {
if _, err := w.WriteString(props.OrigName); err != nil {
return err
}
if props.Wire != "group" {
return w.WriteByte(':')
}
return nil
}
var (
messageSetType = reflect.TypeOf((*MessageSet)(nil)).Elem()
)
// raw is the interface satisfied by RawMessage.
type raw interface {
Bytes() []byte
}
func writeStruct(w *textWriter, sv reflect.Value) error {
if sv.Type() == messageSetType {
return writeMessageSet(w, sv.Addr().Interface().(*MessageSet))
}
st := sv.Type()
sprops := GetProperties(st)
for i := 0; i < sv.NumField(); i++ {
fv := sv.Field(i)
props := sprops.Prop[i]
name := st.Field(i).Name
if strings.HasPrefix(name, "XXX_") {
// There are two XXX_ fields:
// XXX_unrecognized []byte
// XXX_extensions map[int32]proto.Extension
// The first is handled here;
// the second is handled at the bottom of this function.
if name == "XXX_unrecognized" && !fv.IsNil() {
if err := writeUnknownStruct(w, fv.Interface().([]byte)); err != nil {
return err
}
}
continue
}
if fv.Kind() == reflect.Ptr && fv.IsNil() {
// Field not filled in. This could be an optional field or
// a required field that wasn't filled in. Either way, there
// isn't anything we can show for it.
continue
}
if fv.Kind() == reflect.Slice && fv.IsNil() {
// Repeated field that is empty, or a bytes field that is unused.
continue
}
if props.Repeated && fv.Kind() == reflect.Slice {
// Repeated field.
for j := 0; j < fv.Len(); j++ {
if err := writeName(w, props); err != nil {
return err
}
if !w.compact {
if err := w.WriteByte(' '); err != nil {
return err
}
}
v := fv.Index(j)
if v.Kind() == reflect.Ptr && v.IsNil() {
// A nil message in a repeated field is not valid,
// but we can handle that more gracefully than panicking.
if _, err := w.Write([]byte("<nil>\n")); err != nil {
return err
}
continue
}
if err := writeAny(w, v, props); err != nil {
return err
}
if err := w.WriteByte('\n'); err != nil {
return err
}
}
continue
}
if fv.Kind() == reflect.Map {
// Map fields are rendered as a repeated struct with key/value fields.
keys := fv.MapKeys()
sort.Sort(mapKeys(keys))
for _, key := range keys {
val := fv.MapIndex(key)
if err := writeName(w, props); err != nil {
return err
}
if !w.compact {
if err := w.WriteByte(' '); err != nil {
return err
}
}
// open struct
if err := w.WriteByte('<'); err != nil {
return err
}
if !w.compact {
if err := w.WriteByte('\n'); err != nil {
return err
}
}
w.indent()
// key
if _, err := w.WriteString("key:"); err != nil {
return err
}
if !w.compact {
if err := w.WriteByte(' '); err != nil {
return err
}
}
if err := writeAny(w, key, props.mkeyprop); err != nil {
return err
}
if err := w.WriteByte('\n'); err != nil {
return err
}
// nil values aren't legal, but we can avoid panicking because of them.
if val.Kind() != reflect.Ptr || !val.IsNil() {
// value
if _, err := w.WriteString("value:"); err != nil {
return err
}
if !w.compact {
if err := w.WriteByte(' '); err != nil {
return err
}
}
if err := writeAny(w, val, props.mvalprop); err != nil {
return err
}
if err := w.WriteByte('\n'); err != nil {
return err
}
}
// close struct
w.unindent()
if err := w.WriteByte('>'); err != nil {
return err
}
if err := w.WriteByte('\n'); err != nil {
return err
}
}
continue
}
if props.proto3 && fv.Kind() == reflect.Slice && fv.Len() == 0 {
// empty bytes field
continue
}
if fv.Kind() != reflect.Ptr && fv.Kind() != reflect.Slice {
// proto3 non-repeated scalar field; skip if zero value
if isProto3Zero(fv) {
continue
}
}
if fv.Kind() == reflect.Interface {
// Check if it is a oneof.
if st.Field(i).Tag.Get("protobuf_oneof") != "" {
// fv is nil, or holds a pointer to generated struct.
// That generated struct has exactly one field,
// which has a protobuf struct tag.
if fv.IsNil() {
continue
}
inner := fv.Elem().Elem() // interface -> *T -> T
tag := inner.Type().Field(0).Tag.Get("protobuf")
props = new(Properties) // Overwrite the outer props var, but not its pointee.
props.Parse(tag)
// Write the value in the oneof, not the oneof itself.
fv = inner.Field(0)
// Special case to cope with malformed messages gracefully:
// If the value in the oneof is a nil pointer, don't panic
// in writeAny.
if fv.Kind() == reflect.Ptr && fv.IsNil() {
// Use errors.New so writeAny won't render quotes.
msg := errors.New("/* nil */")
fv = reflect.ValueOf(&msg).Elem()
}
}
}
if err := writeName(w, props); err != nil {
return err
}
if !w.compact {
if err := w.WriteByte(' '); err != nil {
return err
}
}
if b, ok := fv.Interface().(raw); ok {
if err := writeRaw(w, b.Bytes()); err != nil {
return err
}
continue
}
// Enums have a String method, so writeAny will work fine.
if err := writeAny(w, fv, props); err != nil {
return err
}
if err := w.WriteByte('\n'); err != nil {
return err
}
}
// Extensions (the XXX_extensions field).
pv := sv.Addr()
if pv.Type().Implements(extendableProtoType) {
if err := writeExtensions(w, pv); err != nil {
return err
}
}
return nil
}
// writeRaw writes an uninterpreted raw message.
func writeRaw(w *textWriter, b []byte) error {
if err := w.WriteByte('<'); err != nil {
return err
}
if !w.compact {
if err := w.WriteByte('\n'); err != nil {
return err
}
}
w.indent()
if err := writeUnknownStruct(w, b); err != nil {
return err
}
w.unindent()
if err := w.WriteByte('>'); err != nil {
return err
}
return nil
}
// writeAny writes an arbitrary field.
func writeAny(w *textWriter, v reflect.Value, props *Properties) error {
v = reflect.Indirect(v)
// Floats have special cases.
if v.Kind() == reflect.Float32 || v.Kind() == reflect.Float64 {
x := v.Float()
var b []byte
switch {
case math.IsInf(x, 1):
b = posInf
case math.IsInf(x, -1):
b = negInf
case math.IsNaN(x):
b = nan
}
if b != nil {
_, err := w.Write(b)
return err
}
// Other values are handled below.
}
// We don't attempt to serialise every possible value type; only those
// that can occur in protocol buffers.
switch v.Kind() {
case reflect.Slice:
// Should only be a []byte; repeated fields are handled in writeStruct.
if err := writeString(w, string(v.Interface().([]byte))); err != nil {
return err
}
case reflect.String:
if err := writeString(w, v.String()); err != nil {
return err
}
case reflect.Struct:
// Required/optional group/message.
var bra, ket byte = '<', '>'
if props != nil && props.Wire == "group" {
bra, ket = '{', '}'
}
if err := w.WriteByte(bra); err != nil {
return err
}
if !w.compact {
if err := w.WriteByte('\n'); err != nil {
return err
}
}
w.indent()
if tm, ok := v.Interface().(encoding.TextMarshaler); ok {
text, err := tm.MarshalText()
if err != nil {
return err
}
if _, err = w.Write(text); err != nil {
return err
}
} else if err := writeStruct(w, v); err != nil {
return err
}
w.unindent()
if err := w.WriteByte(ket); err != nil {
return err
}
default:
_, err := fmt.Fprint(w, v.Interface())
return err
}
return nil
}
// equivalent to C's isprint.
func isprint(c byte) bool {
return c >= 0x20 && c < 0x7f
}
// writeString writes a string in the protocol buffer text format.
// It is similar to strconv.Quote except we don't use Go escape sequences,
// we treat the string as a byte sequence, and we use octal escapes.
// These differences are to maintain interoperability with the other
// languages' implementations of the text format.
func writeString(w *textWriter, s string) error {
// use WriteByte here to get any needed indent
if err := w.WriteByte('"'); err != nil {
return err
}
// Loop over the bytes, not the runes.
for i := 0; i < len(s); i++ {
var err error
// Divergence from C++: we don't escape apostrophes.
// There's no need to escape them, and the C++ parser
// copes with a naked apostrophe.
switch c := s[i]; c {
case '\n':
_, err = w.w.Write(backslashN)
case '\r':
_, err = w.w.Write(backslashR)
case '\t':
_, err = w.w.Write(backslashT)
case '"':
_, err = w.w.Write(backslashDQ)
case '\\':
_, err = w.w.Write(backslashBS)
default:
if isprint(c) {
err = w.w.WriteByte(c)
} else {
_, err = fmt.Fprintf(w.w, "\\%03o", c)
}
}
if err != nil {
return err
}
}
return w.WriteByte('"')
}
func writeMessageSet(w *textWriter, ms *MessageSet) error {
for _, item := range ms.Item {
id := *item.TypeId
if msd, ok := messageSetMap[id]; ok {
// Known message set type.
if _, err := fmt.Fprintf(w, "[%s]: <\n", msd.name); err != nil {
return err
}
w.indent()
pb := reflect.New(msd.t.Elem())
if err := Unmarshal(item.Message, pb.Interface().(Message)); err != nil {
if _, err := fmt.Fprintf(w, "/* bad message: %v */\n", err); err != nil {
return err
}
} else {
if err := writeStruct(w, pb.Elem()); err != nil {
return err
}
}
} else {
// Unknown type.
if _, err := fmt.Fprintf(w, "[%d]: <\n", id); err != nil {
return err
}
w.indent()
if err := writeUnknownStruct(w, item.Message); err != nil {
return err
}
}
w.unindent()
if _, err := w.Write(gtNewline); err != nil {
return err
}
}
return nil
}
func writeUnknownStruct(w *textWriter, data []byte) (err error) {
if !w.compact {
if _, err := fmt.Fprintf(w, "/* %d unknown bytes */\n", len(data)); err != nil {
return err
}
}
b := NewBuffer(data)
for b.index < len(b.buf) {
x, err := b.DecodeVarint()
if err != nil {
_, err := fmt.Fprintf(w, "/* %v */\n", err)
return err
}
wire, tag := x&7, x>>3
if wire == WireEndGroup {
w.unindent()
if _, err := w.Write(endBraceNewline); err != nil {
return err
}
continue
}
if _, err := fmt.Fprint(w, tag); err != nil {
return err
}
if wire != WireStartGroup {
if err := w.WriteByte(':'); err != nil {
return err
}
}
if !w.compact || wire == WireStartGroup {
if err := w.WriteByte(' '); err != nil {
return err
}
}
switch wire {
case WireBytes:
buf, e := b.DecodeRawBytes(false)
if e == nil {
_, err = fmt.Fprintf(w, "%q", buf)
} else {
_, err = fmt.Fprintf(w, "/* %v */", e)
}
case WireFixed32:
x, err = b.DecodeFixed32()
err = writeUnknownInt(w, x, err)
case WireFixed64:
x, err = b.DecodeFixed64()
err = writeUnknownInt(w, x, err)
case WireStartGroup:
err = w.WriteByte('{')
w.indent()
case WireVarint:
x, err = b.DecodeVarint()
err = writeUnknownInt(w, x, err)
default:
_, err = fmt.Fprintf(w, "/* unknown wire type %d */", wire)
}
if err != nil {
return err
}
if err = w.WriteByte('\n'); err != nil {
return err
}
}
return nil
}
func writeUnknownInt(w *textWriter, x uint64, err error) error {
if err == nil {
_, err = fmt.Fprint(w, x)
} else {
_, err = fmt.Fprintf(w, "/* %v */", err)
}
return err
}
type int32Slice []int32
func (s int32Slice) Len() int { return len(s) }
func (s int32Slice) Less(i, j int) bool { return s[i] < s[j] }
func (s int32Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// writeExtensions writes all the extensions in pv.
// pv is assumed to be a pointer to a protocol message struct that is extendable.
func writeExtensions(w *textWriter, pv reflect.Value) error {
emap := extensionMaps[pv.Type().Elem()]
ep := pv.Interface().(extendableProto)
// Order the extensions by ID.
// This isn't strictly necessary, but it will give us
// canonical output, which will also make testing easier.
m := ep.ExtensionMap()
ids := make([]int32, 0, len(m))
for id := range m {
ids = append(ids, id)
}
sort.Sort(int32Slice(ids))
for _, extNum := range ids {
ext := m[extNum]
var desc *ExtensionDesc
if emap != nil {
desc = emap[extNum]
}
if desc == nil {
// Unknown extension.
if err := writeUnknownStruct(w, ext.enc); err != nil {
return err
}
continue
}
pb, err := GetExtension(ep, desc)
if err != nil {
return fmt.Errorf("failed getting extension: %v", err)
}
// Repeated extensions will appear as a slice.
if !desc.repeated() {
if err := writeExtension(w, desc.Name, pb); err != nil {
return err
}
} else {
v := reflect.ValueOf(pb)
for i := 0; i < v.Len(); i++ {
if err := writeExtension(w, desc.Name, v.Index(i).Interface()); err != nil {
return err
}
}
}
}
return nil
}
func writeExtension(w *textWriter, name string, pb interface{}) error {
if _, err := fmt.Fprintf(w, "[%s]:", name); err != nil {
return err
}
if !w.compact {
if err := w.WriteByte(' '); err != nil {
return err
}
}
if err := writeAny(w, reflect.ValueOf(pb), nil); err != nil {
return err
}
if err := w.WriteByte('\n'); err != nil {
return err
}
return nil
}
func (w *textWriter) writeIndent() {
if !w.complete {
return
}
remain := w.ind * 2
for remain > 0 {
n := remain
if n > len(spaces) {
n = len(spaces)
}
w.w.Write(spaces[:n])
remain -= n
}
w.complete = false
}
func marshalText(w io.Writer, pb Message, compact bool) error {
val := reflect.ValueOf(pb)
if pb == nil || val.IsNil() {
w.Write([]byte("<nil>"))
return nil
}
var bw *bufio.Writer
ww, ok := w.(writer)
if !ok {
bw = bufio.NewWriter(w)
ww = bw
}
aw := &textWriter{
w: ww,
complete: true,
compact: compact,
}
if tm, ok := pb.(encoding.TextMarshaler); ok {
text, err := tm.MarshalText()
if err != nil {
return err
}
if _, err = aw.Write(text); err != nil {
return err
}
if bw != nil {
return bw.Flush()
}
return nil
}
// Dereference the received pointer so we don't have outer < and >.
v := reflect.Indirect(val)
if err := writeStruct(aw, v); err != nil {
return err
}
if bw != nil {
return bw.Flush()
}
return nil
}
// MarshalText writes a given protocol buffer in text format.
// The only errors returned are from w.
func MarshalText(w io.Writer, pb Message) error {
return marshalText(w, pb, false)
}
// MarshalTextString is the same as MarshalText, but returns the string directly.
func MarshalTextString(pb Message) string {
var buf bytes.Buffer
marshalText(&buf, pb, false)
return buf.String()
}
// CompactText writes a given protocol buffer in compact text format (one line).
func CompactText(w io.Writer, pb Message) error { return marshalText(w, pb, true) }
// CompactTextString is the same as CompactText, but returns the string directly.
func CompactTextString(pb Message) string {
var buf bytes.Buffer
marshalText(&buf, pb, true)
return buf.String()
}

View File

@ -0,0 +1,798 @@
// Go support for Protocol Buffers - Google's data interchange format
//
// Copyright 2010 The Go Authors. All rights reserved.
// https://github.com/golang/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package proto
// Functions for parsing the Text protocol buffer format.
// TODO: message sets.
import (
"encoding"
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"unicode/utf8"
)
type ParseError struct {
Message string
Line int // 1-based line number
Offset int // 0-based byte offset from start of input
}
func (p *ParseError) Error() string {
if p.Line == 1 {
// show offset only for first line
return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
}
return fmt.Sprintf("line %d: %v", p.Line, p.Message)
}
type token struct {
value string
err *ParseError
line int // line number
offset int // byte number from start of input, not start of line
unquoted string // the unquoted version of value, if it was a quoted string
}
func (t *token) String() string {
if t.err == nil {
return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
}
return fmt.Sprintf("parse error: %v", t.err)
}
type textParser struct {
s string // remaining input
done bool // whether the parsing is finished (success or error)
backed bool // whether back() was called
offset, line int
cur token
}
func newTextParser(s string) *textParser {
p := new(textParser)
p.s = s
p.line = 1
p.cur.line = 1
return p
}
func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
p.cur.err = pe
p.done = true
return pe
}
// Numbers and identifiers are matched by [-+._A-Za-z0-9]
func isIdentOrNumberChar(c byte) bool {
switch {
case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
return true
case '0' <= c && c <= '9':
return true
}
switch c {
case '-', '+', '.', '_':
return true
}
return false
}
func isWhitespace(c byte) bool {
switch c {
case ' ', '\t', '\n', '\r':
return true
}
return false
}
func (p *textParser) skipWhitespace() {
i := 0
for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
if p.s[i] == '#' {
// comment; skip to end of line or input
for i < len(p.s) && p.s[i] != '\n' {
i++
}
if i == len(p.s) {
break
}
}
if p.s[i] == '\n' {
p.line++
}
i++
}
p.offset += i
p.s = p.s[i:len(p.s)]
if len(p.s) == 0 {
p.done = true
}
}
func (p *textParser) advance() {
// Skip whitespace
p.skipWhitespace()
if p.done {
return
}
// Start of non-whitespace
p.cur.err = nil
p.cur.offset, p.cur.line = p.offset, p.line
p.cur.unquoted = ""
switch p.s[0] {
case '<', '>', '{', '}', ':', '[', ']', ';', ',':
// Single symbol
p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
case '"', '\'':
// Quoted string
i := 1
for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
if p.s[i] == '\\' && i+1 < len(p.s) {
// skip escaped char
i++
}
i++
}
if i >= len(p.s) || p.s[i] != p.s[0] {
p.errorf("unmatched quote")
return
}
unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
if err != nil {
p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
return
}
p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
p.cur.unquoted = unq
default:
i := 0
for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
i++
}
if i == 0 {
p.errorf("unexpected byte %#x", p.s[0])
return
}
p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
}
p.offset += len(p.cur.value)
}
var (
errBadUTF8 = errors.New("proto: bad UTF-8")
errBadHex = errors.New("proto: bad hexadecimal")
)
func unquoteC(s string, quote rune) (string, error) {
// This is based on C++'s tokenizer.cc.
// Despite its name, this is *not* parsing C syntax.
// For instance, "\0" is an invalid quoted string.
// Avoid allocation in trivial cases.
simple := true
for _, r := range s {
if r == '\\' || r == quote {
simple = false
break
}
}
if simple {
return s, nil
}
buf := make([]byte, 0, 3*len(s)/2)
for len(s) > 0 {
r, n := utf8.DecodeRuneInString(s)
if r == utf8.RuneError && n == 1 {
return "", errBadUTF8
}
s = s[n:]
if r != '\\' {
if r < utf8.RuneSelf {
buf = append(buf, byte(r))
} else {
buf = append(buf, string(r)...)
}
continue
}
ch, tail, err := unescape(s)
if err != nil {
return "", err
}
buf = append(buf, ch...)
s = tail
}
return string(buf), nil
}
func unescape(s string) (ch string, tail string, err error) {
r, n := utf8.DecodeRuneInString(s)
if r == utf8.RuneError && n == 1 {
return "", "", errBadUTF8
}
s = s[n:]
switch r {
case 'a':
return "\a", s, nil
case 'b':
return "\b", s, nil
case 'f':
return "\f", s, nil
case 'n':
return "\n", s, nil
case 'r':
return "\r", s, nil
case 't':
return "\t", s, nil
case 'v':
return "\v", s, nil
case '?':
return "?", s, nil // trigraph workaround
case '\'', '"', '\\':
return string(r), s, nil
case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
if len(s) < 2 {
return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
}
base := 8
ss := s[:2]
s = s[2:]
if r == 'x' || r == 'X' {
base = 16
} else {
ss = string(r) + ss
}
i, err := strconv.ParseUint(ss, base, 8)
if err != nil {
return "", "", err
}
return string([]byte{byte(i)}), s, nil
case 'u', 'U':
n := 4
if r == 'U' {
n = 8
}
if len(s) < n {
return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
}
bs := make([]byte, n/2)
for i := 0; i < n; i += 2 {
a, ok1 := unhex(s[i])
b, ok2 := unhex(s[i+1])
if !ok1 || !ok2 {
return "", "", errBadHex
}
bs[i/2] = a<<4 | b
}
s = s[n:]
return string(bs), s, nil
}
return "", "", fmt.Errorf(`unknown escape \%c`, r)
}
// Adapted from src/pkg/strconv/quote.go.
func unhex(b byte) (v byte, ok bool) {
switch {
case '0' <= b && b <= '9':
return b - '0', true
case 'a' <= b && b <= 'f':
return b - 'a' + 10, true
case 'A' <= b && b <= 'F':
return b - 'A' + 10, true
}
return 0, false
}
// Back off the parser by one token. Can only be done between calls to next().
// It makes the next advance() a no-op.
func (p *textParser) back() { p.backed = true }
// Advances the parser and returns the new current token.
func (p *textParser) next() *token {
if p.backed || p.done {
p.backed = false
return &p.cur
}
p.advance()
if p.done {
p.cur.value = ""
} else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
// Look for multiple quoted strings separated by whitespace,
// and concatenate them.
cat := p.cur
for {
p.skipWhitespace()
if p.done || p.s[0] != '"' {
break
}
p.advance()
if p.cur.err != nil {
return &p.cur
}
cat.value += " " + p.cur.value
cat.unquoted += p.cur.unquoted
}
p.done = false // parser may have seen EOF, but we want to return cat
p.cur = cat
}
return &p.cur
}
func (p *textParser) consumeToken(s string) error {
tok := p.next()
if tok.err != nil {
return tok.err
}
if tok.value != s {
p.back()
return p.errorf("expected %q, found %q", s, tok.value)
}
return nil
}
// Return a RequiredNotSetError indicating which required field was not set.
func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError {
st := sv.Type()
sprops := GetProperties(st)
for i := 0; i < st.NumField(); i++ {
if !isNil(sv.Field(i)) {
continue
}
props := sprops.Prop[i]
if props.Required {
return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
}
}
return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
}
// Returns the index in the struct for the named field, as well as the parsed tag properties.
func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) {
i, ok := sprops.decoderOrigNames[name]
if ok {
return i, sprops.Prop[i], true
}
return -1, nil, false
}
// Consume a ':' from the input stream (if the next token is a colon),
// returning an error if a colon is needed but not present.
func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
tok := p.next()
if tok.err != nil {
return tok.err
}
if tok.value != ":" {
// Colon is optional when the field is a group or message.
needColon := true
switch props.Wire {
case "group":
needColon = false
case "bytes":
// A "bytes" field is either a message, a string, or a repeated field;
// those three become *T, *string and []T respectively, so we can check for
// this field being a pointer to a non-string.
if typ.Kind() == reflect.Ptr {
// *T or *string
if typ.Elem().Kind() == reflect.String {
break
}
} else if typ.Kind() == reflect.Slice {
// []T or []*T
if typ.Elem().Kind() != reflect.Ptr {
break
}
} else if typ.Kind() == reflect.String {
// The proto3 exception is for a string field,
// which requires a colon.
break
}
needColon = false
}
if needColon {
return p.errorf("expected ':', found %q", tok.value)
}
p.back()
}
return nil
}
func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
st := sv.Type()
sprops := GetProperties(st)
reqCount := sprops.reqCount
var reqFieldErr error
fieldSet := make(map[string]bool)
// A struct is a sequence of "name: value", terminated by one of
// '>' or '}', or the end of the input. A name may also be
// "[extension]".
for {
tok := p.next()
if tok.err != nil {
return tok.err
}
if tok.value == terminator {
break
}
if tok.value == "[" {
// Looks like an extension.
//
// TODO: Check whether we need to handle
// namespace rooted names (e.g. ".something.Foo").
tok = p.next()
if tok.err != nil {
return tok.err
}
var desc *ExtensionDesc
// This could be faster, but it's functional.
// TODO: Do something smarter than a linear scan.
for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
if d.Name == tok.value {
desc = d
break
}
}
if desc == nil {
return p.errorf("unrecognized extension %q", tok.value)
}
// Check the extension terminator.
tok = p.next()
if tok.err != nil {
return tok.err
}
if tok.value != "]" {
return p.errorf("unrecognized extension terminator %q", tok.value)
}
props := &Properties{}
props.Parse(desc.Tag)
typ := reflect.TypeOf(desc.ExtensionType)
if err := p.checkForColon(props, typ); err != nil {
return err
}
rep := desc.repeated()
// Read the extension structure, and set it in
// the value we're constructing.
var ext reflect.Value
if !rep {
ext = reflect.New(typ).Elem()
} else {
ext = reflect.New(typ.Elem()).Elem()
}
if err := p.readAny(ext, props); err != nil {
if _, ok := err.(*RequiredNotSetError); !ok {
return err
}
reqFieldErr = err
}
ep := sv.Addr().Interface().(extendableProto)
if !rep {
SetExtension(ep, desc, ext.Interface())
} else {
old, err := GetExtension(ep, desc)
var sl reflect.Value
if err == nil {
sl = reflect.ValueOf(old) // existing slice
} else {
sl = reflect.MakeSlice(typ, 0, 1)
}
sl = reflect.Append(sl, ext)
SetExtension(ep, desc, sl.Interface())
}
if err := p.consumeOptionalSeparator(); err != nil {
return err
}
continue
}
// This is a normal, non-extension field.
name := tok.value
var dst reflect.Value
fi, props, ok := structFieldByName(sprops, name)
if ok {
dst = sv.Field(fi)
} else if oop, ok := sprops.OneofTypes[name]; ok {
// It is a oneof.
props = oop.Prop
nv := reflect.New(oop.Type.Elem())
dst = nv.Elem().Field(0)
sv.Field(oop.Field).Set(nv)
}
if !dst.IsValid() {
return p.errorf("unknown field name %q in %v", name, st)
}
if dst.Kind() == reflect.Map {
// Consume any colon.
if err := p.checkForColon(props, dst.Type()); err != nil {
return err
}
// Construct the map if it doesn't already exist.
if dst.IsNil() {
dst.Set(reflect.MakeMap(dst.Type()))
}
key := reflect.New(dst.Type().Key()).Elem()
val := reflect.New(dst.Type().Elem()).Elem()
// The map entry should be this sequence of tokens:
// < key : KEY value : VALUE >
// Technically the "key" and "value" could come in any order,
// but in practice they won't.
tok := p.next()
var terminator string
switch tok.value {
case "<":
terminator = ">"
case "{":
terminator = "}"
default:
return p.errorf("expected '{' or '<', found %q", tok.value)
}
if err := p.consumeToken("key"); err != nil {
return err
}
if err := p.consumeToken(":"); err != nil {
return err
}
if err := p.readAny(key, props.mkeyprop); err != nil {
return err
}
if err := p.consumeOptionalSeparator(); err != nil {
return err
}
if err := p.consumeToken("value"); err != nil {
return err
}
if err := p.checkForColon(props.mvalprop, dst.Type().Elem()); err != nil {
return err
}
if err := p.readAny(val, props.mvalprop); err != nil {
return err
}
if err := p.consumeOptionalSeparator(); err != nil {
return err
}
if err := p.consumeToken(terminator); err != nil {
return err
}
dst.SetMapIndex(key, val)
continue
}
// Check that it's not already set if it's not a repeated field.
if !props.Repeated && fieldSet[name] {
return p.errorf("non-repeated field %q was repeated", name)
}
if err := p.checkForColon(props, dst.Type()); err != nil {
return err
}
// Parse into the field.
fieldSet[name] = true
if err := p.readAny(dst, props); err != nil {
if _, ok := err.(*RequiredNotSetError); !ok {
return err
}
reqFieldErr = err
} else if props.Required {
reqCount--
}
if err := p.consumeOptionalSeparator(); err != nil {
return err
}
}
if reqCount > 0 {
return p.missingRequiredFieldError(sv)
}
return reqFieldErr
}
// consumeOptionalSeparator consumes an optional semicolon or comma.
// It is used in readStruct to provide backward compatibility.
func (p *textParser) consumeOptionalSeparator() error {
tok := p.next()
if tok.err != nil {
return tok.err
}
if tok.value != ";" && tok.value != "," {
p.back()
}
return nil
}
func (p *textParser) readAny(v reflect.Value, props *Properties) error {
tok := p.next()
if tok.err != nil {
return tok.err
}
if tok.value == "" {
return p.errorf("unexpected EOF")
}
switch fv := v; fv.Kind() {
case reflect.Slice:
at := v.Type()
if at.Elem().Kind() == reflect.Uint8 {
// Special case for []byte
if tok.value[0] != '"' && tok.value[0] != '\'' {
// Deliberately written out here, as the error after
// this switch statement would write "invalid []byte: ...",
// which is not as user-friendly.
return p.errorf("invalid string: %v", tok.value)
}
bytes := []byte(tok.unquoted)
fv.Set(reflect.ValueOf(bytes))
return nil
}
// Repeated field.
if tok.value == "[" {
// Repeated field with list notation, like [1,2,3].
for {
fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
err := p.readAny(fv.Index(fv.Len()-1), props)
if err != nil {
return err
}
tok := p.next()
if tok.err != nil {
return tok.err
}
if tok.value == "]" {
break
}
if tok.value != "," {
return p.errorf("Expected ']' or ',' found %q", tok.value)
}
}
return nil
}
// One value of the repeated field.
p.back()
fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
return p.readAny(fv.Index(fv.Len()-1), props)
case reflect.Bool:
// Either "true", "false", 1 or 0.
switch tok.value {
case "true", "1":
fv.SetBool(true)
return nil
case "false", "0":
fv.SetBool(false)
return nil
}
case reflect.Float32, reflect.Float64:
v := tok.value
// Ignore 'f' for compatibility with output generated by C++, but don't
// remove 'f' when the value is "-inf" or "inf".
if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
v = v[:len(v)-1]
}
if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
fv.SetFloat(f)
return nil
}
case reflect.Int32:
if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
fv.SetInt(x)
return nil
}
if len(props.Enum) == 0 {
break
}
m, ok := enumValueMaps[props.Enum]
if !ok {
break
}
x, ok := m[tok.value]
if !ok {
break
}
fv.SetInt(int64(x))
return nil
case reflect.Int64:
if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
fv.SetInt(x)
return nil
}
case reflect.Ptr:
// A basic field (indirected through pointer), or a repeated message/group
p.back()
fv.Set(reflect.New(fv.Type().Elem()))
return p.readAny(fv.Elem(), props)
case reflect.String:
if tok.value[0] == '"' || tok.value[0] == '\'' {
fv.SetString(tok.unquoted)
return nil
}
case reflect.Struct:
var terminator string
switch tok.value {
case "{":
terminator = "}"
case "<":
terminator = ">"
default:
return p.errorf("expected '{' or '<', found %q", tok.value)
}
// TODO: Handle nested messages which implement encoding.TextUnmarshaler.
return p.readStruct(fv, terminator)
case reflect.Uint32:
if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
fv.SetUint(uint64(x))
return nil
}
case reflect.Uint64:
if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
fv.SetUint(x)
return nil
}
}
return p.errorf("invalid %v: %v", v.Type(), tok.value)
}
// UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
// before starting to unmarshal, so any existing data in pb is always removed.
// If a required field is not set and no other error occurs,
// UnmarshalText returns *RequiredNotSetError.
func UnmarshalText(s string, pb Message) error {
if um, ok := pb.(encoding.TextUnmarshaler); ok {
err := um.UnmarshalText([]byte(s))
return err
}
pb.Reset()
v := reflect.ValueOf(pb)
if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
return pe
}
return nil
}

View File

@ -0,0 +1,10 @@
language: go
go:
- 1.2
before_install:
- go get github.com/axw/gocov/gocov
- go get code.google.com/p/go.tools/cmd/cover
- go get github.com/mattn/goveralls
script:
- go test -covermode=count -coverprofile=profile.cov
- $HOME/gopath/bin/goveralls -repotoken z9bxGeTThOUlkUvGFuZLJ6PezhGyLXCGL -coverprofile=profile.cov -service=travis-ci

View File

@ -0,0 +1,195 @@
Google Safe Browsing API
========================
[![Build Status](https://travis-ci.org/rjohnsondev/go-safe-browsing-api.svg)](https://travis-ci.org/rjohnsondev/go-safe-browsing-api)
[![Coverage Status](https://coveralls.io/repos/rjohnsondev/go-safe-browsing-api/badge.png?branch=master)](https://coveralls.io/r/rjohnsondev/go-safe-browsing-api?branch=master)
This library provides client functionality for version 3 of the Google safe
browsing API as per:
https://developers.google.com/safe-browsing/developers_guide_v3
Installation
------------
This should do the trick:
go get github.com/golang/protobuf/proto
go get github.com/rjohnsondev/go-safe-browsing-api
Usage
-----
The library requires at least your Safe Browsing API key and a writable
directory to store the list data.
It it recommended you also set the <code>Client</code>, <code>AppVersion</code> and
<code>ProtocolVersion</code> globals to something appropriate:
```go
safebrowsing.Client := "api"
safebrowsing.AppVersion := "1.5.2"
safebrowsing.ProtocolVersion := "3.0"
```
Calling <code>NewSafeBrowsing</code> immediately attempts to contact the google
servers and perform an update/inital download. If this succeeds, it returns a
SafeBrowsing instance after spawning a new goroutine which will update itself
at the interval requested by google.
```go
package main
import (
safebrowsing "github.com/rjohnsondev/go-safe-browsing-api"
"os"
"fmt"
)
func main() {
key := "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA_BBBBBBBBB"
dataDir := "./data"
sb, err := safebrowsing.NewSafeBrowsing(key, dataDir)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
}
```
### Looking up a URL
There are two methods for looking up URLs, <code>IsListed</code> and
<code>MightBeListed</code>. Both of these return either an empty string in the
case of an unlisted URL, or the name of the list on which the URL is listed.
If there was an error requesting confirmation from Google for a listed URL, or
if the last update request was over 45 mins ago, it will be returned along with
an empty string.
<code>IsListed(string)</code> is the recommended method to use if displaying a
message to a user. It may however make a blocking request to Google's servers
for pages that have partial hash matches to perform a full hash match (if it
has not already done so for that URL) which can be slow.
```go
response, err := sb.IsListed(url)
if err != nil {
fmt.Println("Error quering URL:", err)
}
if response == "" {
fmt.Println("not listed")
} else {
fmt.Println("URL listed on:", response)
}
```
If a quick return time is required, it may be worth using the
MightBeListed(string) method. This will not contact Google for confirmation,
so it can only be used to display a message to the user if the fullHashMatch
return value is True AND the last successful update from Google was in the last
45 mins:
```go
response, fullHashMatch, err := sb.MightBeListed(url)
if err != nil {
fmt.Println("Error quering URL:", err)
}
if response == "" {
fmt.Println("not listed")
} else {
if fullHashMatch && sb.IsUpToDate() {
fmt.Println("URL listed on:", response)
} else {
fmt.Println("URL may be listed on:", response)
}
}
```
It is recommended you combine the two calls when a non-blocking response is
required, so a full hash can be requested and used for future queries about the
same url:
```go
response, fullHashMatch, err := sb.MightBeListed(url)
if err != nil {
fmt.Println("Error quering URL:", err)
}
if response != "" {
if fullHashMatch && sb.IsUpToDate() {
fmt.Println("URL listed on:", response)
} else {
fmt.Println("URL may be listed on:", response)
// Requesting full hash in background...
go sb.IsListed(url)
}
}
```
### Offline Mode
The library can work in "offline" mode, where it will not attempt to contact
Google's servers and work purely from local files. This can be activated by
setting the <code>OfflineMode</code> global variable:
```go
package main
import (
safebrowsing "github.com/rjohnsondev/go-safe-browsing-api"
)
func main() {
key := "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA_BBBBBBBBB"
dataDir := "./data"
// only work from local files.
safebrowsing.OfflineMode = true
sb, err = safebrowsing.NewSafeBrowsing(key, dataDir)
...
}
```
In this mode <code>IsListed</code> will always return an error complaining that
the list has not been updated within the last 45 mins and no warnings may be
shown to users.
Example Webserver
-----------------
The package also includes a small JSON endpoint for the bulk querying of URLs.
It has an additional config dependency, so it can be installed with something
like:
go get github.com/rjohnsondev/go-safe-browsing-api
go get github.com/BurntSushi/toml
go install github.com/rjohnsondev/go-safe-browsing-api/webserver
The server takes a config file as a parameter, an example one is provided with
the source, but here's the contents for convenience:
# example config file for safe browsing server
address = "0.0.0.0:8080"
googleApiKey = ""
dataDir = "/tmp/safe-browsing-data"
# enable example usage page at /form
enableFormPage = true
The config requires at a minimum your Google API key to be added (otherwise
you'll get a nice non-friendly go panic). Once up and running it provides a
helpful example page at http://localhost:8080/form
Other Notes
-----------
### Memory Usage
The current implementation stores hashes in a reasonably effecient hat-trie
data structure (bundled from https://github.com/dcjones/hat-trie). This
results in a memory footprint of approximately 35MB.
### File Format
The files stored by the library are gob streams of Chunks. They should be
portable between identical versions of the library.

View File

@ -0,0 +1,303 @@
/*
Copyright (c) 2013, Richard Johnson
Copyright (c) 2014, Kilian Gilonne
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package safebrowsing
import (
"bytes"
"encoding/hex"
"fmt"
"net"
"regexp"
"strconv"
"strings"
)
// Canonicalize a URL as needed for safe browsing lookups.
// This is required before obtaining the host key or generating
// url lookup iterations.
func Canonicalize(fullurl string) (canonicalized string) {
// basic trim
fullurl = strings.TrimSpace(fullurl)
// add default http protocol
re := regexp.MustCompile("[a-zA-Z][a-zA-Z0-9+-.]*://.*")
if !re.Match([]byte(fullurl)) {
fullurl = "http://" + fullurl
}
// strip off the fragment (if it exists)
fullurl = strings.Split(fullurl, "#")[0]
// remove any tab (0x09), CR (0x0d), and LF (0x0a)
fullurl = strings.Replace(fullurl, "\t", "", -1)
fullurl = strings.Replace(fullurl, "\n", "", -1)
fullurl = strings.Replace(fullurl, "\r", "", -1)
// unescape until there are no more encoded chars
for newurl, performedUnescape := unescape(fullurl); performedUnescape; {
fullurl = newurl
newurl, performedUnescape = unescape(fullurl)
}
// extract the hostname
fullurl = canonicalizeHostname(fullurl)
fullurl = canonicalizePath(fullurl)
fullurl = escapeUrl(fullurl)
return fullurl
}
func canonicalizeHostname(fullurl string) (canonicalized string) {
// extract the hostname from the url
re := regexp.MustCompile("[a-zA-Z][a-zA-Z0-9+-.]*://([^/]+)/.*")
matches := re.FindAllSubmatch([]byte(fullurl), 1)
if len(matches) > 0 {
hostname := string(matches[0][1])
// remove all leading and trailing dots
canonicalized = strings.Trim(hostname, ".")
// Replace consecutive dots with a single dot.
re = regexp.MustCompile("\\.\\.*")
canonicalized = re.ReplaceAllString(canonicalized, ".")
// attempt to parse as a IP address.
ip := net.ParseIP(canonicalized)
if ip != nil {
canonicalized = ip.String()
}
ipInt, err := strconv.ParseUint(canonicalized, 10, 0)
if err == nil {
// we were an int!
canonicalized = fmt.Sprintf("%d.%d.%d.%d",
(ipInt>>24)&0xFF,
(ipInt>>16)&0xFF,
(ipInt>>8)&0xFF,
ipInt&0xFF)
}
canonicalized = strings.ToLower(canonicalized)
canonicalized = strings.Replace(fullurl, hostname, canonicalized, 1)
return canonicalized
}
return fullurl
}
func canonicalizePath(fullurl string) (canonicalized string) {
re := regexp.MustCompile("[a-zA-Z][a-zA-Z0-9+-.]*://[^/]+(/[^?]+)")
matches := re.FindAllSubmatch([]byte(fullurl), 1)
if len(matches) > 0 {
path := string(matches[0][1])
// The sequences "/../" and "/./" in the path should be resolved,
// by replacing "/./" with "/", and removing "/../" along with
// the preceding path component.
canonicalized = strings.Replace(path, "/./", "/", -1)
re = regexp.MustCompile("/?[^/]+/\\.\\.(/|$)")
canonicalized = re.ReplaceAllString(canonicalized, "/")
re = regexp.MustCompile("//*")
canonicalized = re.ReplaceAllString(canonicalized, "/")
canonicalized = strings.Replace(fullurl, path, canonicalized, 1)
return canonicalized
}
if fullurl[len(fullurl)-1] != '/' {
fullurl = fullurl + "/"
}
return fullurl
}
func escapeUrl(url string) string {
// percent-escape all characters in the URL which are <= ASCII 32,
// >= 127, "#", or "%". The escapes should use uppercase hex characters.
buf := bytes.Buffer{}
buf.Grow(len(url))
for _, b := range []byte(url) {
switch {
case b <= 32 || b >= 127 || b == '#' || b == '%':
buf.WriteByte('%')
buf.WriteString(hex.EncodeToString([]byte{b}))
default:
buf.WriteByte(b)
}
}
return buf.String()
}
// custom version of unescape to work around potential errors
// that would otherwise throw off url.QueryUnescape
func unescape(s string) (string, bool) {
// Count %, check that they're well-formed.
n := 0
t := make([]byte, len(s)-2*n)
j := 0
performedUnescape := false
for i := 0; i < len(s); {
switch s[i] {
case '%':
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
// we were an invalid encoding, copy a char and keep going
t[j] = s[i]
j++
i++
} else {
t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
performedUnescape = true
j++
i += 3
}
default:
t[j] = s[i]
j++
i++
}
}
return string(t[:j]), performedUnescape
}
func unhex(c byte) byte {
switch {
case '0' <= c && c <= '9':
return c - '0'
case 'a' <= c && c <= 'f':
return c - 'a' + 10
case 'A' <= c && c <= 'F':
return c - 'A' + 10
}
return 0
}
func ishex(c byte) bool {
switch {
case '0' <= c && c <= '9':
return true
case 'a' <= c && c <= 'f':
return true
case 'A' <= c && c <= 'F':
return true
}
return false
}
func iterateHostnames(fullurl string) (urls []string) {
re := regexp.MustCompile("([a-zA-Z][a-zA-Z0-9+-.]*://)([^/]+)/.*")
matches := re.FindAllSubmatch([]byte(fullurl), 1)
if len(matches) > 0 {
hostname := string(matches[0][2])
ip := net.ParseIP(hostname)
if ip != nil {
// we're an IP!
return []string{fullurl}
}
pathBits := strings.Split(hostname, ".")
urls = make([]string, 0, len(pathBits))
// add the initial one
urls = append(urls, fullurl)
if len(pathBits) > 1 {
if len(pathBits) > 6 {
pathBits = pathBits[len(pathBits)-6:]
}
newHost := pathBits[len(pathBits)-2] + "." + pathBits[len(pathBits)-1]
urls = append(urls, strings.Replace(fullurl, hostname, newHost, 1))
for x := len(pathBits) - 3; x > 0; x-- {
newHost = pathBits[x] + "." + newHost
newUrl := strings.Replace(fullurl, hostname, newHost, 1)
urls = append(urls, newUrl)
}
}
return urls
}
return []string{fullurl}
}
func iteratePaths(fullurl string) (urls []string) {
re := regexp.MustCompile("([a-zA-Z][a-zA-Z0-9+-.]*://[^/]+)(/[^?]*)")
matches := re.FindAllSubmatch([]byte(fullurl), 1)
urls = make([]string, 0)
if strings.ContainsRune(fullurl, '?') {
// add original url
urls = append(urls, fullurl)
}
if len(matches) > 0 {
bits := matches[0]
if len(bits) > 2 {
path := string(bits[2])
prefix := string(bits[1])
// url without query string
urls = append(urls, prefix+path)
pathBits := strings.Split(path, "/")
if len(pathBits) > 1 {
// url without path
prefix += "/"
urls = append(urls, prefix)
for x := 1; x < len(pathBits)-1 && x < 4; x++ {
prefix += pathBits[x] + "/"
urls = append(urls, prefix)
}
}
}
}
return urls
}
func stripProtocol(fullurl string) (url string) {
sep := "://"
startByte := strings.Index(fullurl, sep)
if startByte == -1 {
return fullurl
}
startByte += len(sep)
return fullurl[startByte:]
}
// Generate all required iterations of the URL for checking against the
// lookup table.
// NOTE: We assume that the URL has already be Canonicalized
func GenerateTestCandidates(url string) (urls []string) {
urls = make([]string, 0)
values := iterateHostnames(url)
for _, val := range values {
paths := iteratePaths(val)
for _, path := range paths {
path = stripProtocol(path)
urls = append(urls, path)
}
}
return urls
}
// Extract the host from a URL in a format suitable for hashing to generate
// a Host Key.
// NOTE: We assume that the URL has already be Canonicalized
func ExtractHostKey(fullUrl string) (url string) {
// strip off protocol
url = stripProtocol(fullUrl)
// strip off the path
index := strings.Index(url, "/")
if index > 0 {
url = url[:index+1]
} else {
url += "/"
}
dotCount := strings.Count(url, ".")
for dotCount > 2 {
url = url[strings.Index(url, ".")+1:]
dotCount = strings.Count(url, ".")
}
return url
}

View File

@ -0,0 +1,353 @@
/*
Copyright (c) 2013, Richard Johnson
Copyright (c) 2014, Kilian Gilonne
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package safebrowsing
import (
"bytes"
"crypto/sha256"
"errors"
"fmt"
"io/ioutil"
"math/rand"
"net/http"
// "runtime/debug"
"strconv"
"strings"
"time"
)
//import "encoding/hex"
type FullHashCache struct {
CreationDate time.Time
CacheLifeTime int
}
func newFullHashCache(creationDate time.Time, cacheLifeTime int) (fch *FullHashCache) {
fch = &FullHashCache{
CreationDate: creationDate,
CacheLifeTime: cacheLifeTime,
}
return fch
}
// Check to see if a URL is marked as unsafe by Google.
// Returns what list the URL is on, or an empty string if the URL is unlisted.
// Note that this query may perform a blocking HTTP request; if speed is important
// it may be preferable to use MightBeListed which will return quickly. If showing
// a warning to the user however, this call must be used.
func (sb *SafeBrowsing) IsListed(url string) (list string, err error) {
list, _, err = sb.queryUrl(url, true)
return list, err
}
// Check to see if a URL is likely marked as unsafe by Google.
// Returns what list the URL may be listed on, or an empty string if the URL is not listed.
// Note that this query does not perform a "request for full hashes" and MUST NOT be
// used to show a warning to the user.
func (sb *SafeBrowsing) MightBeListed(url string) (list string, fullHashMatch bool, err error) {
return sb.queryUrl(url, false)
}
var ErrOutOfDateHashes = errors.New("Unable to check listing, list hasn't been updated for 45 mins")
// Here is where we actually look up the hashes against our map.
func (sb *SafeBrowsing) queryUrl(url string, matchFullHash bool) (list string, fullHashMatch bool, err error) {
// defer debug.FreeOSMemory()
if matchFullHash && !sb.IsUpToDate() {
// we haven't had a sucessful update in the last 45 mins! abort!
return "", false, ErrOutOfDateHashes
}
// first Canonicalize
url = Canonicalize(url)
urls := GenerateTestCandidates(url)
// sb.Logger.Debug("Checking %d iterations of url", len(urls))
for list, sbl := range sb.Lists {
for _, url := range urls {
hostKey := ExtractHostKey(url)
hostKeyHash := HostHash(getHash(hostKey)[:4])
// sb.Logger.Debug("Host hash: %s", hex.EncodeToString([]byte(hostKeyHash)))
sbl.updateLock.RLock()
// hash it up
// sb.Logger.Debug("Hashing %s", url)
urlHash := getHash(url)
prefix := urlHash[:PREFIX_4B_SZ]
lookupHash := string(prefix)
fullLookupHash := string(urlHash)
// sb.Logger.Debug("testing hash: %s, full = %s",
// hex.EncodeToString([]byte(lookupHash)),
// hex.EncodeToString([]byte(fullLookupHash)))
fhc, ok := sb.Cache[hostKeyHash]
if ok && !fhc.checkValidity() {
delete(sb.Cache, hostKeyHash)
// sbl.Logger.Debug("Delete full length hash: %s",
// hex.EncodeToString([]byte(fullLookupHash)))
sbl.FullHashRequested.Delete(lookupHash)
sbl.FullHashes.Delete(fullLookupHash)
}
// look up full hash matches
if sbl.FullHashes.Get(fullLookupHash) {
sbl.updateLock.RUnlock()
return list, true, nil
}
// now see if there is a match in our prefix trie
keysToLookupMap := make(map[LookupHash]bool)
if sbl.Lookup.Get(lookupHash) {
if !matchFullHash || OfflineMode {
// sb.Logger.Debug("Partial hash hit")
sbl.updateLock.RUnlock()
return list, false, nil
}
// have we have already asked for full hashes for this prefix?
if sbl.FullHashRequested.Get(string(lookupHash)) {
// sb.Logger.Debug("Full length hash miss")
sbl.updateLock.RUnlock()
continue
}
// we matched a prefix and need to request a full hash
// sb.Logger.Debug("Need to request full length hashes for %s",
// hex.EncodeToString([]byte(prefix)))
keysToLookupMap[prefix] = true
}
sbl.updateLock.RUnlock()
if len(keysToLookupMap) > 0 {
err := sb.requestFullHashes(list, hostKeyHash, keysToLookupMap)
if err != nil {
return "", false, err
}
sbl.updateLock.RLock()
// re-check for full hash hit.
if sbl.FullHashes.Get(string(fullLookupHash)) {
sbl.updateLock.RUnlock()
return list, true, nil
}
sbl.updateLock.RUnlock()
}
}
// debug.FreeOSMemory()
}
return "", false, nil
}
// Checks to ensure we have had a successful update in the last 45 mins
func (sb *SafeBrowsing) IsUpToDate() bool {
return !OfflineMode && time.Since(sb.LastUpdated) < (time.Duration(45)*time.Minute)
}
func getHash(input string) (hash LookupHash) {
hasher := sha256.New()
hasher.Write([]byte(input))
return LookupHash(hasher.Sum(nil))
}
// Full hashes request have a temporary response validity
func (fhc *FullHashCache) checkValidity() bool {
return (time.Since(fhc.CreationDate) < (time.Duration(fhc.CacheLifeTime) * time.Second))
}
// request full hases for a set of lookup prefixes.
func (sb *SafeBrowsing) requestFullHashes(list string, host HostHash, prefixes map[LookupHash]bool) error {
if len(prefixes) == 0 {
return nil
}
query := "%d:%d\n%s"
buf := bytes.Buffer{}
firstPrefixLen := 0
for prefix, _ := range prefixes {
_, err := buf.Write([]byte(prefix))
if err != nil {
return err
}
if firstPrefixLen == 0 {
firstPrefixLen = len(prefix)
}
if firstPrefixLen != len(prefix) {
return fmt.Errorf("Attempted to used variable length hashes in lookup!")
}
}
body := fmt.Sprintf(query,
firstPrefixLen,
len(buf.String()),
buf.String())
url := fmt.Sprintf(
"https://safebrowsing.google.com/safebrowsing/gethash?"+
"client=%s&key=%s&appver=%s&pver=%s",
sb.Client, sb.Key, sb.AppVersion, sb.ProtocolVersion)
response, err := sb.request(url, body, true)
if err != nil {
return err // non-server error with HTTP
}
defer response.Body.Close()
// mark these prefxes as having been requested
sb.Lists[list].updateLock.Lock()
for prefix, _ := range prefixes {
sb.Lists[list].FullHashRequested.Set(string(prefix))
}
sb.Lists[list].updateLock.Unlock()
if response.StatusCode != 200 {
if response.StatusCode == 503 {
// Retry in background with a new thread
go sb.doFullHashBackOffRequest(host, url, body)
return fmt.Errorf("Service temporarily Unavailable")
}
return fmt.Errorf("Unable to lookup full hash, server returned %d",
response.StatusCode)
}
data, err := ioutil.ReadAll(response.Body)
if err != nil {
return err
}
return sb.processFullHashes(string(data), host)
}
// Process the retrieved full hashes, saving them to disk
func (sb *SafeBrowsing) processFullHashes(data string, host HostHash) (err error) {
// defer debug.FreeOSMemory()
split := strings.Split(data, "\n")
split_sz := len(split)
if split_sz == 0 {
return nil
}
cacheLifeTime, err := strconv.Atoi(split[0])
if err != nil {
return err
}
sb.Cache[host] = newFullHashCache(time.Now(), cacheLifeTime)
if split_sz <= 2 {
return nil
}
for i, len_splitsplit, chunk_sz := 1, 0, 0; (i+1) < split_sz && err == nil; i += chunk_sz {
splitsplit := strings.Split(split[i], ":")
len_splitsplit = len(splitsplit)
if len_splitsplit < 3 {
return fmt.Errorf("Malformated response: %s", split[i])
} else if len_splitsplit == 4 {
num_resp, err := strconv.Atoi(splitsplit[2])
if err != nil {
return err
} else if (num_resp + 2 + i) > split_sz {
return fmt.Errorf("Malformated response: %s", split[i])
}
chunk_sz = 2 + num_resp
} else {
chunk_sz = 2
}
err = sb.readFullHashChunk(split[i+1], splitsplit[0], host)
}
return err
}
func (sb *SafeBrowsing) readFullHashChunk(hashes string, list string, host HostHash) (err error) {
if hashes == "" || list == "" || host == "" {
return fmt.Errorf("Imcomplete data to readFullHashChunck()")
}
hashlen := 32
hasheslen := len(hashes)
for i := 0; (i + hashlen) <= hasheslen; i += hashlen {
hash := hashes[i:(i + hashlen)]
//sb.Lists[list].Logger.Debug("Adding full length hash: %s",
//hex.EncodeToString([]byte(hash)))
if sb.Lists == nil {
return fmt.Errorf("Google safe browsing lists have not been initialized")
} else if sb.Lists[list] == nil {
return fmt.Errorf("Google safe browsing list (%s) have not been initialized", list)
}
sb.Lists[list].updateLock.Lock()
sb.Lists[list].FullHashes.Set(hash)
sb.Lists[list].updateLock.Unlock()
}
return nil
}
// Continue the attempt to request for full hashes in the background, observing the required backoff behaviour.
func (sb *SafeBrowsing) doFullHashBackOffRequest(host HostHash, url string, body string) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
randomFloat := r.Float64()
var response *http.Response
response.StatusCode = 503
var err error
for x := 0; response.StatusCode == 503; x++ {
// first we wait 1 min, than some time between 30-60 mins
// doubling until we stop at 480 mins or succeed
mins := (30 * (randomFloat + 1) * float64(x)) + 1
if mins > 480 {
sb.Logger.Warn(
"Back-off for full hash %s exceeded 8 hours, it ain't going to happen, giving up: %s",
body,
response,
)
return
}
sb.Logger.Warn(
"Update failed, in full hash back-off mode (waiting %d mins)",
mins,
)
time.Sleep(time.Duration(mins) * time.Minute)
response, err = sb.request(url, body, true)
if err != nil {
sb.Logger.Error(
"Unable to request full hashes from response in back-off mode: %s",
err,
)
continue
}
}
data, err := ioutil.ReadAll(response.Body)
if err != nil {
sb.Logger.Error(
"Unable to request full hashes from response in back-off mode: %s",
err,
)
}
err = sb.processFullHashes(string(data), host)
if err != nil {
sb.Logger.Error(
"Unable process full hashes from response in back-off mode: %s; trying again.",
err,
)
sb.doFullHashBackOffRequest(host, url, body)
}
}

View File

@ -0,0 +1,65 @@
/*
Copyright (c) 2014, Kilian Gilonne
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package safebrowsing
import proto "github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/golang/protobuf/proto"
import (
"encoding/binary"
"unsafe"
)
type ChunkNum int32
const CHUNK_TYPE_ADD = ChunkData_ChunkType(0)
const CHUNK_TYPE_SUB = ChunkData_ChunkType(1)
const PREFIX_4B = ChunkData_PrefixType(0)
const PREFIX_32B = ChunkData_PrefixType(1)
const PREFIX_4B_SZ = 4
const PREFIX_32B_SZ = 32
func ReadChunk(data []byte, length uint32) (chunk *ChunkData, new_len uint32, err error) {
chunk = new(ChunkData)
uint32_sz := uint32(unsafe.Sizeof(uint32(1)))
n := binary.BigEndian.Uint32(data[:uint32_sz])
if length < uint32_sz {
return nil, 0, nil
}
new_len = length - uint32_sz
if (n <= 0) || (n > new_len) {
return nil, new_len, nil
}
new_len = length - (n + uint32_sz)
err = proto.Unmarshal(data[uint32_sz:(uint32_sz+n)], chunk)
if err != nil {
return nil, new_len, err
}
return chunk, new_len, err
}

View File

@ -0,0 +1,152 @@
// Code generated by protoc-gen-go.
// source: chunkdata.proto
// DO NOT EDIT!
/*
Package safebrowsing is a generated protocol buffer package.
It is generated from these files:
chunkdata.proto
It has these top-level messages:
ChunkData
*/
package safebrowsing
import proto "github.com/letsencrypt/boulder/Godeps/_workspace/src/github.com/golang/protobuf/proto"
import math "math"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = math.Inf
// The chunk type is either an add or sub chunk.
type ChunkData_ChunkType int32
const (
ChunkData_ADD ChunkData_ChunkType = 0
ChunkData_SUB ChunkData_ChunkType = 1
)
var ChunkData_ChunkType_name = map[int32]string{
0: "ADD",
1: "SUB",
}
var ChunkData_ChunkType_value = map[string]int32{
"ADD": 0,
"SUB": 1,
}
func (x ChunkData_ChunkType) Enum() *ChunkData_ChunkType {
p := new(ChunkData_ChunkType)
*p = x
return p
}
func (x ChunkData_ChunkType) String() string {
return proto.EnumName(ChunkData_ChunkType_name, int32(x))
}
func (x *ChunkData_ChunkType) UnmarshalJSON(data []byte) error {
value, err := proto.UnmarshalJSONEnum(ChunkData_ChunkType_value, data, "ChunkData_ChunkType")
if err != nil {
return err
}
*x = ChunkData_ChunkType(value)
return nil
}
// Prefix type which currently is either 4B or 32B. The default is set
// to the prefix length, so it doesn't have to be set at all for most
// chunks.
type ChunkData_PrefixType int32
const (
ChunkData_PREFIX_4B ChunkData_PrefixType = 0
ChunkData_FULL_32B ChunkData_PrefixType = 1
)
var ChunkData_PrefixType_name = map[int32]string{
0: "PREFIX_4B",
1: "FULL_32B",
}
var ChunkData_PrefixType_value = map[string]int32{
"PREFIX_4B": 0,
"FULL_32B": 1,
}
func (x ChunkData_PrefixType) Enum() *ChunkData_PrefixType {
p := new(ChunkData_PrefixType)
*p = x
return p
}
func (x ChunkData_PrefixType) String() string {
return proto.EnumName(ChunkData_PrefixType_name, int32(x))
}
func (x *ChunkData_PrefixType) UnmarshalJSON(data []byte) error {
value, err := proto.UnmarshalJSONEnum(ChunkData_PrefixType_value, data, "ChunkData_PrefixType")
if err != nil {
return err
}
*x = ChunkData_PrefixType(value)
return nil
}
// Chunk data encoding format for the shavar-proto list format.
type ChunkData struct {
ChunkNumber *int32 `protobuf:"varint,1,req,name=chunk_number" json:"chunk_number,omitempty"`
ChunkType *ChunkData_ChunkType `protobuf:"varint,2,opt,name=chunk_type,enum=safebrowsing.ChunkData_ChunkType,def=0" json:"chunk_type,omitempty"`
PrefixType *ChunkData_PrefixType `protobuf:"varint,3,opt,name=prefix_type,enum=safebrowsing.ChunkData_PrefixType,def=0" json:"prefix_type,omitempty"`
// Stores all SHA256 add or sub prefixes or full-length hashes. The number
// of hashes can be inferred from the length of the hashes string and the
// prefix type above.
Hashes []byte `protobuf:"bytes,4,opt,name=hashes" json:"hashes,omitempty"`
// Sub chunks also encode one add chunk number for every hash stored above.
AddNumbers []int32 `protobuf:"varint,5,rep,packed,name=add_numbers" json:"add_numbers,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *ChunkData) Reset() { *m = ChunkData{} }
func (m *ChunkData) String() string { return proto.CompactTextString(m) }
func (*ChunkData) ProtoMessage() {}
const Default_ChunkData_ChunkType ChunkData_ChunkType = ChunkData_ADD
const Default_ChunkData_PrefixType ChunkData_PrefixType = ChunkData_PREFIX_4B
func (m *ChunkData) GetChunkNumber() int32 {
if m != nil && m.ChunkNumber != nil {
return *m.ChunkNumber
}
return 0
}
func (m *ChunkData) GetChunkType() ChunkData_ChunkType {
if m != nil && m.ChunkType != nil {
return *m.ChunkType
}
return Default_ChunkData_ChunkType
}
func (m *ChunkData) GetPrefixType() ChunkData_PrefixType {
if m != nil && m.PrefixType != nil {
return *m.PrefixType
}
return Default_ChunkData_PrefixType
}
func (m *ChunkData) GetHashes() []byte {
if m != nil {
return m.Hashes
}
return nil
}
func (m *ChunkData) GetAddNumbers() []int32 {
if m != nil {
return m.AddNumbers
}
return nil
}
func init() {
proto.RegisterEnum("safebrowsing.ChunkData_ChunkType", ChunkData_ChunkType_name, ChunkData_ChunkType_value)
proto.RegisterEnum("safebrowsing.ChunkData_PrefixType", ChunkData_PrefixType_name, ChunkData_PrefixType_value)
}

View File

@ -0,0 +1,29 @@
package safebrowsing;
// Chunk data encoding format for the shavar-proto list format.
message ChunkData {
required int32 chunk_number = 1;
// The chunk type is either an add or sub chunk.
enum ChunkType {
ADD = 0;
SUB = 1;
}
optional ChunkType chunk_type = 2 [default = ADD];
// Prefix type which currently is either 4B or 32B. The default is set
// to the prefix length, so it doesn't have to be set at all for most
// chunks.
enum PrefixType {
PREFIX_4B = 0;
FULL_32B = 1;
}
optional PrefixType prefix_type = 3 [default = PREFIX_4B];
// Stores all SHA256 add or sub prefixes or full-length hashes. The number
// of hashes can be inferred from the length of the hashes string and the
// prefix type above.
optional bytes hashes = 4;
// Sub chunks also encode one add chunk number for every hash stored above.
repeated int32 add_numbers = 5 [packed = true];
}

View File

@ -0,0 +1,82 @@
/*
Copyright (c) 2013, Richard Johnson
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package safebrowsing
import (
"fmt"
"time"
)
// logger interface deliberatly matches the log4go.Logger signature
// to allow for drop-in injection.
type logger interface {
Finest(arg0 interface{}, args ...interface{})
Fine(arg0 interface{}, args ...interface{})
Debug(arg0 interface{}, args ...interface{})
Trace(arg0 interface{}, args ...interface{})
Info(arg0 interface{}, args ...interface{})
Warn(arg0 interface{}, args ...interface{}) error
Error(arg0 interface{}, args ...interface{}) error
Critical(arg0 interface{}, args ...interface{}) error
}
// Default logger provides a simple console output implementation of the logger
// interface. This is intended for logger dependency injection, such as log4go.
type DefaultLogger struct{}
func (dl *DefaultLogger) log(level string, arg0 interface{}, args ...interface{}) {
prefix := fmt.Sprintf(
"[%v] [%s] ",
time.Now().Format("2006-01-02 15:04:05"),
level)
fmt.Printf(prefix+arg0.(string)+"\n", args...)
}
func (dl *DefaultLogger) Finest(arg0 interface{}, args ...interface{}) {
dl.log("FINE", arg0, args...)
}
func (dl *DefaultLogger) Fine(arg0 interface{}, args ...interface{}) {
dl.log("FINE", arg0, args...)
}
func (dl *DefaultLogger) Debug(arg0 interface{}, args ...interface{}) {
dl.log("DEBG", arg0, args...)
}
func (dl *DefaultLogger) Trace(arg0 interface{}, args ...interface{}) {
dl.log("TRAC", arg0, args...)
}
func (dl *DefaultLogger) Info(arg0 interface{}, args ...interface{}) {
dl.log("INFO", arg0, args...)
}
func (dl *DefaultLogger) Warn(arg0 interface{}, args ...interface{}) error {
dl.log("WARN", arg0, args...)
return nil
}
func (dl *DefaultLogger) Error(arg0 interface{}, args ...interface{}) error {
dl.log("EROR", arg0, args...)
return nil
}
func (dl *DefaultLogger) Critical(arg0 interface{}, args ...interface{}) error {
dl.log("CRIT", arg0, args...)
return nil
}

View File

@ -0,0 +1,19 @@
Copyright (C) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

View File

@ -0,0 +1,9 @@
SUBDIRS = src test
EXTRA_DIST = README.md COPYING
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = hat-trie-0.1.pc

View File

@ -0,0 +1,47 @@
Hat-Trie
========
[![Build Status](https://travis-ci.org/dcjones/hat-trie.svg)](https://travis-ci.org/dcjones/hat-trie)
This a ANSI C99 implementation of the HAT-trie data structure of Askitis and
Sinha, an extremely efficient (space and time) modern variant of tries.
The version implemented here maps arrays of bytes to words (i.e., unsigned
longs), which can be used to store counts, pointers, etc, or not used at all if
you simply want to maintain a set of unique strings.
For details see,
1. Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data
structure for strings. Proceedings of the thirtieth Australasian conference on
Computer science-Volume 62 (pp. 97105). Australian Computer Society, Inc.
2. Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in
string hash tables. String Processing and Information Retrieval (pp.
91102). Springer.
Installation
------------
git clone git@github.com:dcjones/hat-trie.git
cd hat-trie
autoreconf -i
./configure
make install
To use the library, include `hat-trie.h` and link using `-lhat-trie`.
Tests
-----
Build and run the tests:
make check
Other Language Bindings
-----------------------
* Ruby - https://github.com/luikore/triez
* Python - https://github.com/kmike/hat-trie

View File

@ -0,0 +1,6 @@
todo:
* Deletion in ahtable.
* Deletion in hattrie.

View File

@ -0,0 +1,34 @@
AC_INIT([hat-trie], [0.1.0], [dcjones@cs.washington.edu])
AM_INIT_AUTOMAKE([foreign])
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
AC_CONFIG_MACRO_DIR([m4])
base_CFLAGS="-std=c99 -Wall -Wextra -pedantic"
opt_CFLAGS="${base_CFLAGS} -O3"
dbg_CFLAGS="${base_CFLAGS} -g -O0"
AC_ARG_ENABLE([debugging],
[AS_HELP_STRING([--enable-debugging],
[enable debugging info (default is no)])],
[], [enable_debugging=no])
AS_IF([test "x$enable_debugging" = xyes],
[CFLAGS="$dbg_CFLAGS"],
[CFLAGS="$opt_CFLAGS"])
AC_PROG_CC
AC_PROG_CPP
AC_PROG_INSTALL
AC_PROG_LN_S
AC_PROG_MAKE_SET
AC_DISABLE_SHARED
AC_PROG_LIBTOOL
AC_C_BIGENDIAN([AC_MSG_ERROR([Big-endian systems are not currently supported.])])
AC_HEADER_STDBOOL
AC_CONFIG_FILES([hat-trie-0.1.pc Makefile src/Makefile test/Makefile])
AC_OUTPUT

View File

@ -0,0 +1,12 @@
prefix=@prefix@
exec_prefix=@exec_prefix@
libdir=@libdir@
includedir=@includedir@
Name: @PACKAGE_NAME@
Description: An efficient trie implementation.
Version: @PACKAGE_VERSION@
Cflags: -I{includedir}
Libs: -L${libdir}

View File

@ -0,0 +1,11 @@
lib_LTLIBRARIES = libhat-trie.la
libhat_trie_la_SOURCES = common.h \
ahtable.h ahtable.c \
hat-trie.h hat-trie.c \
misc.h misc.c \
murmurhash3.h murmurhash3.c
pkginclude_HEADERS = hat-trie.h ahtable.h common.h pstdint.h

View File

@ -0,0 +1,552 @@
/*
* This file is part of hat-trie.
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
* See ahtable.h for description of the Array Hash Table.
*
*/
#include "ahtable.h"
#include "misc.h"
#include "murmurhash3.h"
#include <assert.h>
#include <string.h>
const double ahtable_max_load_factor = 100000.0; /* arbitrary large number => don't resize */
const size_t ahtable_initial_size = 4096;
static size_t keylen(slot_t s) {
if (0x1 & *s) {
return (size_t) (*((uint16_t*) s) >> 1);
}
else {
return (size_t) (*s >> 1);
}
}
ahtable_t* ahtable_create()
{
return ahtable_create_n(ahtable_initial_size);
}
ahtable_t* ahtable_create_n(size_t n)
{
ahtable_t* table = malloc_or_die(sizeof(ahtable_t));
table->flag = 0;
table->c0 = table->c1 = '\0';
table->n = n;
table->m = 0;
table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n);
table->slots = malloc_or_die(n * sizeof(slot_t));
memset(table->slots, 0, n * sizeof(slot_t));
table->slot_sizes = malloc_or_die(n * sizeof(size_t));
memset(table->slot_sizes, 0, n * sizeof(size_t));
return table;
}
void ahtable_free(ahtable_t* table)
{
if (table == NULL) return;
size_t i;
for (i = 0; i < table->n; ++i) free(table->slots[i]);
free(table->slots);
free(table->slot_sizes);
free(table);
}
size_t ahtable_size(const ahtable_t* table)
{
return table->m;
}
void ahtable_clear(ahtable_t* table)
{
size_t i;
for (i = 0; i < table->n; ++i) free(table->slots[i]);
table->n = ahtable_initial_size;
table->slots = realloc_or_die(table->slots, table->n * sizeof(slot_t));
memset(table->slots, 0, table->n * sizeof(slot_t));
table->slot_sizes = realloc_or_die(table->slot_sizes, table->n * sizeof(size_t));
memset(table->slot_sizes, 0, table->n * sizeof(size_t));
}
/** Inserts a key with value into slot s, and returns a pointer to the
* space immediately after.
*/
static slot_t ins_key(slot_t s, const char* key, size_t len, value_t** val)
{
// key length
if (len < 128) {
s[0] = (unsigned char) (len << 1);
s += 1;
}
else {
/* The least significant bit is set to indicate that two bytes are
* being used to store the key length. */
*((uint16_t*) s) = ((uint16_t) len << 1) | 0x1;
s += 2;
}
// key
memcpy(s, key, len * sizeof(unsigned char));
s += len;
// value
*val = (value_t*) s;
**val = 0;
s += sizeof(value_t);
return s;
}
static void ahtable_expand(ahtable_t* table)
{
/* Resizing a table is essentially building a brand new one.
* One little shortcut we can take on the memory allocation front is to
* figure out how much memory each slot needs in advance.
*/
assert(table->n > 0);
size_t new_n = 2 * table->n;
size_t* slot_sizes = malloc_or_die(new_n * sizeof(size_t));
memset(slot_sizes, 0, new_n * sizeof(size_t));
const char* key;
size_t len = 0;
size_t m = 0;
ahtable_iter_t* i = ahtable_iter_begin(table, false);
while (!ahtable_iter_finished(i)) {
key = ahtable_iter_key(i, &len);
slot_sizes[hash(key, len) % new_n] +=
len + sizeof(value_t) + (len >= 128 ? 2 : 1);
++m;
ahtable_iter_next(i);
}
assert(m == table->m);
ahtable_iter_free(i);
/* allocate slots */
slot_t* slots = malloc_or_die(new_n * sizeof(slot_t));
size_t j;
for (j = 0; j < new_n; ++j) {
if (slot_sizes[j] > 0) {
slots[j] = malloc_or_die(slot_sizes[j]);
}
else slots[j] = NULL;
}
/* rehash values. A few shortcuts can be taken here as well, as we know
* there will be no collisions. Instead of the regular insertion routine,
* we keep track of the ends of every slot and simply insert keys.
* */
slot_t* slots_next = malloc_or_die(new_n * sizeof(slot_t));
memcpy(slots_next, slots, new_n * sizeof(slot_t));
size_t h;
m = 0;
value_t* u;
value_t* v;
i = ahtable_iter_begin(table, false);
while (!ahtable_iter_finished(i)) {
key = ahtable_iter_key(i, &len);
h = hash(key, len) % new_n;
slots_next[h] = ins_key(slots_next[h], key, len, &u);
v = ahtable_iter_val(i);
*u = *v;
++m;
ahtable_iter_next(i);
}
assert(m == table->m);
ahtable_iter_free(i);
free(slots_next);
for (j = 0; j < table->n; ++j) free(table->slots[j]);
free(table->slots);
table->slots = slots;
free(table->slot_sizes);
table->slot_sizes = slot_sizes;
table->n = new_n;
table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n);
}
static value_t* get_key(ahtable_t* table, const char* key, size_t len, bool insert_missing)
{
/* if we are at capacity, preemptively resize */
if (insert_missing && table->m >= table->max_m) {
ahtable_expand(table);
}
uint32_t i = hash(key, len) % table->n;
size_t k;
slot_t s;
value_t* val;
/* search the array for our key */
s = table->slots[i];
while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) {
/* get the key length */
k = keylen(s);
s += k < 128 ? 1 : 2;
/* skip keys that are longer than ours */
if (k != len) {
s += k + sizeof(value_t);
continue;
}
/* key found. */
if (memcmp(s, key, len) == 0) {
return (value_t*) (s + len);
}
/* key not found. */
else {
s += k + sizeof(value_t);
continue;
}
}
if (insert_missing) {
/* the key was not found, so we must insert it. */
size_t new_size = table->slot_sizes[i];
new_size += 1 + (len >= 128 ? 1 : 0); // key length
new_size += len * sizeof(unsigned char); // key
new_size += sizeof(value_t); // value
table->slots[i] = realloc_or_die(table->slots[i], new_size);
++table->m;
ins_key(table->slots[i] + table->slot_sizes[i], key, len, &val);
table->slot_sizes[i] = new_size;
return val;
}
else return NULL;
}
value_t* ahtable_get(ahtable_t* table, const char* key, size_t len)
{
return get_key(table, key, len, true);
}
value_t* ahtable_tryget(ahtable_t* table, const char* key, size_t len )
{
return get_key(table, key, len, false);
}
int ahtable_del(ahtable_t* table, const char* key, size_t len)
{
uint32_t i = hash(key, len) % table->n;
size_t k;
slot_t s;
/* search the array for our key */
s = table->slots[i];
while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) {
/* get the key length */
k = keylen(s);
s += k < 128 ? 1 : 2;
/* skip keys that are longer than ours */
if (k != len) {
s += k + sizeof(value_t);
continue;
}
/* key found. */
if (memcmp(s, key, len) == 0) {
/* move everything over, resize the array */
unsigned char* t = s + len + sizeof(value_t);
s -= k < 128 ? 1 : 2;
memmove(s, t, table->slot_sizes[i] - (size_t) (t - table->slots[i]));
table->slot_sizes[i] -= (size_t) (t - s);
--table->m;
return 0;
}
/* key not found. */
else {
s += k + sizeof(value_t);
continue;
}
}
// Key was not found. Do nothing.
return -1;
}
static int cmpkey(const void* a_, const void* b_)
{
slot_t a = *(slot_t*) a_;
slot_t b = *(slot_t*) b_;
size_t ka = keylen(a), kb = keylen(b);
a += ka < 128 ? 1 : 2;
b += kb < 128 ? 1 : 2;
int c = memcmp(a, b, ka < kb ? ka : kb);
return c == 0 ? (int) ka - (int) kb : c;
}
/* Sorted/unsorted iterators are kept private and exposed by passing the
sorted flag to ahtable_iter_begin. */
typedef struct ahtable_sorted_iter_t_
{
const ahtable_t* table; // parent
slot_t* xs; // pointers to keys
size_t i; // current key
} ahtable_sorted_iter_t;
static ahtable_sorted_iter_t* ahtable_sorted_iter_begin(const ahtable_t* table)
{
ahtable_sorted_iter_t* i = malloc_or_die(sizeof(ahtable_sorted_iter_t));
i->table = table;
i->xs = malloc_or_die(table->m * sizeof(slot_t));
i->i = 0;
slot_t s;
size_t j, k, u;
for (j = 0, u = 0; j < table->n; ++j) {
s = table->slots[j];
while (s < table->slots[j] + table->slot_sizes[j]) {
i->xs[u++] = s;
k = keylen(s);
s += k < 128 ? 1 : 2;
s += k + sizeof(value_t);
}
}
qsort(i->xs, table->m, sizeof(slot_t), cmpkey);
return i;
}
static bool ahtable_sorted_iter_finished(ahtable_sorted_iter_t* i)
{
return i->i >= i->table->m;
}
static void ahtable_sorted_iter_next(ahtable_sorted_iter_t* i)
{
if (ahtable_sorted_iter_finished(i)) return;
++i->i;
}
static void ahtable_sorted_iter_free(ahtable_sorted_iter_t* i)
{
if (i == NULL) return;
free(i->xs);
free(i);
}
static const char* ahtable_sorted_iter_key(ahtable_sorted_iter_t* i, size_t* len)
{
if (ahtable_sorted_iter_finished(i)) return NULL;
slot_t s = i->xs[i->i];
*len = keylen(s);
return (const char*) (s + (*len < 128 ? 1 : 2));
}
static value_t* ahtable_sorted_iter_val(ahtable_sorted_iter_t* i)
{
if (ahtable_sorted_iter_finished(i)) return NULL;
slot_t s = i->xs[i->i];
size_t k = keylen(s);
s += k < 128 ? 1 : 2;
s += k;
return (value_t*) s;
}
typedef struct ahtable_unsorted_iter_t_
{
const ahtable_t* table; // parent
size_t i; // slot index
slot_t s; // slot position
} ahtable_unsorted_iter_t;
static ahtable_unsorted_iter_t* ahtable_unsorted_iter_begin(const ahtable_t* table)
{
ahtable_unsorted_iter_t* i = malloc_or_die(sizeof(ahtable_unsorted_iter_t));
i->table = table;
for (i->i = 0; i->i < i->table->n; ++i->i) {
i->s = table->slots[i->i];
if ((size_t) (i->s - table->slots[i->i]) >= table->slot_sizes[i->i]) continue;
break;
}
return i;
}
static bool ahtable_unsorted_iter_finished(ahtable_unsorted_iter_t* i)
{
return i->i >= i->table->n;
}
static void ahtable_unsorted_iter_next(ahtable_unsorted_iter_t* i)
{
if (ahtable_unsorted_iter_finished(i)) return;
/* get the key length */
size_t k = keylen(i->s);
i->s += k < 128 ? 1 : 2;
/* skip to the next key */
i->s += k + sizeof(value_t);
if ((size_t) (i->s - i->table->slots[i->i]) >= i->table->slot_sizes[i->i]) {
do {
++i->i;
} while(i->i < i->table->n &&
i->table->slot_sizes[i->i] == 0);
if (i->i < i->table->n) i->s = i->table->slots[i->i];
else i->s = NULL;
}
}
static void ahtable_unsorted_iter_free(ahtable_unsorted_iter_t* i)
{
free(i);
}
static const char* ahtable_unsorted_iter_key(ahtable_unsorted_iter_t* i, size_t* len)
{
if (ahtable_unsorted_iter_finished(i)) return NULL;
slot_t s = i->s;
size_t k;
if (0x1 & *s) {
k = (size_t) (*((uint16_t*) s)) >> 1;
s += 2;
}
else {
k = (size_t) (*s >> 1);
s += 1;
}
*len = k;
return (const char*) s;
}
static value_t* ahtable_unsorted_iter_val(ahtable_unsorted_iter_t* i)
{
if (ahtable_unsorted_iter_finished(i)) return NULL;
slot_t s = i->s;
size_t k;
if (0x1 & *s) {
k = (size_t) (*((uint16_t*) s)) >> 1;
s += 2;
}
else {
k = (size_t) (*s >> 1);
s += 1;
}
s += k;
return (value_t*) s;
}
struct ahtable_iter_t_
{
bool sorted;
union {
ahtable_unsorted_iter_t* unsorted;
ahtable_sorted_iter_t* sorted;
} i;
};
ahtable_iter_t* ahtable_iter_begin(const ahtable_t* table, bool sorted) {
ahtable_iter_t* i = malloc_or_die(sizeof(ahtable_iter_t));
i->sorted = sorted;
if (sorted) i->i.sorted = ahtable_sorted_iter_begin(table);
else i->i.unsorted = ahtable_unsorted_iter_begin(table);
return i;
}
void ahtable_iter_next(ahtable_iter_t* i)
{
if (i->sorted) ahtable_sorted_iter_next(i->i.sorted);
else ahtable_unsorted_iter_next(i->i.unsorted);
}
bool ahtable_iter_finished(ahtable_iter_t* i)
{
if (i->sorted) return ahtable_sorted_iter_finished(i->i.sorted);
else return ahtable_unsorted_iter_finished(i->i.unsorted);
}
void ahtable_iter_free(ahtable_iter_t* i)
{
if (i == NULL) return;
if (i->sorted) ahtable_sorted_iter_free(i->i.sorted);
else ahtable_unsorted_iter_free(i->i.unsorted);
free(i);
}
const char* ahtable_iter_key(ahtable_iter_t* i, size_t* len)
{
if (i->sorted) return ahtable_sorted_iter_key(i->i.sorted, len);
else return ahtable_unsorted_iter_key(i->i.unsorted, len);
}
value_t* ahtable_iter_val(ahtable_iter_t* i)
{
if (i->sorted) return ahtable_sorted_iter_val(i->i.sorted);
else return ahtable_unsorted_iter_val(i->i.unsorted);
}

View File

@ -0,0 +1,114 @@
/*
* This file is part of hat-trie.
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
*
* This is an implementation of the 'cache-conscious' hash tables described in,
*
* Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in
* string hash tables. String Processing and Information Retrieval (pp.
* 91102). Springer.
*
* http://naskitis.com/naskitis-spire05.pdf
*
* Briefly, the idea behind an Array Hash Table is, as opposed to separate
* chaining with linked lists, to store keys contiguously in one big array,
* thereby improving the caching behavior, and reducing space requirements.
*
* ahtable keeps a fixed number (array) of slots, each of which contains a
* variable number of key/value pairs. Each key is preceded by its length--
* one byte for lengths < 128 bytes, and TWO bytes for longer keys. The least
* significant bit of the first byte indicates, if set, that the size is two
* bytes. The slot number where a key/value pair goes is determined by finding
* the murmurhashed integer value of its key, modulus the number of slots.
* The number of slots expands in a stepwise fashion when the number of
# key/value pairs reaches an arbitrarily large number.
*
* +-------+-------+-------+-------+-------+-------+
* | 0 | 1 | 2 | 3 | ... | N |
* +-------+-------+-------+-------+-------+-------+
* | | | | |
* v | | v v
* NULL | | 4html[VALUE] etc.
* | v
* | 5space[VALUE]4jury[VALUE]
* v
* 6justice[VALUE]3car[VALUE]4star[VALUE]
*
*/
#ifndef HATTRIE_AHTABLE_H
#define HATTRIE_AHTABLE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
#include <stdbool.h>
#include "pstdint.h"
#include "common.h"
typedef unsigned char* slot_t;
typedef struct ahtable_t_
{
/* these fields are reserved for hattrie to fiddle with */
uint8_t flag;
unsigned char c0;
unsigned char c1;
size_t n; // number of slots
size_t m; // number of key/value pairs stored
size_t max_m; // number of stored keys before we resize
size_t* slot_sizes;
slot_t* slots;
} ahtable_t;
extern const double ahtable_max_load_factor;
extern const size_t ahtable_initial_size;
ahtable_t* ahtable_create (void); // Create an empty hash table.
ahtable_t* ahtable_create_n (size_t n); // Create an empty hash table, with
// n slots reserved.
void ahtable_free (ahtable_t*); // Free all memory used by a table.
void ahtable_clear (ahtable_t*); // Remove all entries.
size_t ahtable_size (const ahtable_t*); // Number of stored keys.
/** Find the given key in the table, inserting it if it does not exist, and
* returning a pointer to it's value.
*
* This pointer is not guaranteed to be valid after additional calls to
* ahtable_get, ahtable_del, ahtable_clear, or other functions that modify the
* table.
*/
value_t* ahtable_get (ahtable_t*, const char* key, size_t len);
/* Find a given key in the table, return a NULL pointer if it does not exist. */
value_t* ahtable_tryget (ahtable_t*, const char* key, size_t len);
int ahtable_del(ahtable_t*, const char* key, size_t len);
typedef struct ahtable_iter_t_ ahtable_iter_t;
ahtable_iter_t* ahtable_iter_begin (const ahtable_t*, bool sorted);
void ahtable_iter_next (ahtable_iter_t*);
bool ahtable_iter_finished (ahtable_iter_t*);
void ahtable_iter_free (ahtable_iter_t*);
const char* ahtable_iter_key (ahtable_iter_t*, size_t* len);
value_t* ahtable_iter_val (ahtable_iter_t*);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,22 @@
/*
* This file is part of hat-trie.
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
*
* Common typedefs, etc.
*
*/
#ifndef HATTRIE_COMMON_H
#define HATTRIE_COMMON_H
#include "pstdint.h"
// an unsigned int that is guaranteed to be the same size as a pointer
typedef uintptr_t value_t;
#endif

View File

@ -0,0 +1,656 @@
/*
* This file is part of hat-trie.
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
*/
#include "hat-trie.h"
#include "ahtable.h"
#include "misc.h"
#include "pstdint.h"
#include <assert.h>
#include <string.h>
#define HT_UNUSED(x) x=x
/* maximum number of keys that may be stored in a bucket before it is burst */
static const size_t MAX_BUCKET_SIZE = 16384;
#define NODE_MAXCHAR 0xff // 0x7f for 7-bit ASCII
#define NODE_CHILDS (NODE_MAXCHAR+1)
static const uint8_t NODE_TYPE_TRIE = 0x1;
static const uint8_t NODE_TYPE_PURE_BUCKET = 0x2;
static const uint8_t NODE_TYPE_HYBRID_BUCKET = 0x4;
static const uint8_t NODE_HAS_VAL = 0x8;
struct trie_node_t_;
/* Node's may be trie nodes or buckets. This union allows us to keep
* non-specific pointer. */
typedef union node_ptr_
{
ahtable_t* b;
struct trie_node_t_* t;
uint8_t* flag;
} node_ptr;
typedef struct trie_node_t_
{
uint8_t flag;
/* the value for the key that is consumed on a trie node */
value_t val;
/* Map a character to either a trie_node_t or a ahtable_t. The first byte
* must be examined to determine which. */
node_ptr xs[NODE_CHILDS];
} trie_node_t;
struct hattrie_t_
{
node_ptr root; // root node
size_t m; // number of stored keys
};
/* Create a new trie node with all pointers pointing to the given child (which
* can be NULL). */
static trie_node_t* alloc_trie_node(hattrie_t* T, node_ptr child)
{
trie_node_t* node = malloc_or_die(sizeof(trie_node_t));
node->flag = NODE_TYPE_TRIE;
node->val = 0;
/* pass T to allow custom allocator for trie. */
HT_UNUSED(T); /* unused now */
size_t i;
for (i = 0; i < NODE_CHILDS; ++i) node->xs[i] = child;
return node;
}
/* iterate trie nodes until string is consumed or bucket is found */
static node_ptr hattrie_consume(node_ptr *p, const char **k, size_t *l, unsigned brk)
{
node_ptr node = p->t->xs[(unsigned char) **k];
while (*node.flag & NODE_TYPE_TRIE && *l > brk) {
++*k;
--*l;
*p = node;
node = node.t->xs[(unsigned char) **k];
}
/* copy and writeback variables if it's faster */
assert(*p->flag & NODE_TYPE_TRIE);
return node;
}
/* use node value and return pointer to it */
static inline value_t* hattrie_useval(hattrie_t *T, node_ptr n)
{
if (!(n.t->flag & NODE_HAS_VAL)) {
n.t->flag |= NODE_HAS_VAL;
++T->m;
}
return &n.t->val;
}
/* clear node value if exists */
static inline int hattrie_clrval(hattrie_t *T, node_ptr n)
{
if (n.t->flag & NODE_HAS_VAL) {
n.t->flag &= ~NODE_HAS_VAL;
n.t->val = 0;
--T->m;
return 0;
}
return -1;
}
/* find node in trie */
static node_ptr hattrie_find(hattrie_t* T, const char **key, size_t *len)
{
node_ptr parent = T->root;
assert(*parent.flag & NODE_TYPE_TRIE);
if (*len == 0) return parent;
node_ptr node = hattrie_consume(&parent, key, len, 1);
/* if the trie node consumes value, use it */
if (*node.flag & NODE_TYPE_TRIE) {
if (!(node.t->flag & NODE_HAS_VAL)) {
node.flag = NULL;
}
return node;
}
/* pure bucket holds only key suffixes, skip current char */
if (*node.flag & NODE_TYPE_PURE_BUCKET) {
*key += 1;
*len -= 1;
}
/* do not scan bucket, it's not needed for this operation */
return node;
}
hattrie_t* hattrie_create()
{
hattrie_t* T = malloc_or_die(sizeof(hattrie_t));
T->m = 0;
node_ptr node;
node.b = ahtable_create();
node.b->flag = NODE_TYPE_HYBRID_BUCKET;
node.b->c0 = 0x00;
node.b->c1 = NODE_MAXCHAR;
T->root.t = alloc_trie_node(T, node);
return T;
}
static void hattrie_free_node(node_ptr node)
{
if (*node.flag & NODE_TYPE_TRIE) {
size_t i;
for (i = 0; i < NODE_CHILDS; ++i) {
if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t) continue;
/* XXX: recursion might not be the best choice here. It is possible
* to build a very deep trie. */
if (node.t->xs[i].t) hattrie_free_node(node.t->xs[i]);
}
free(node.t);
}
else {
ahtable_free(node.b);
}
}
void hattrie_free(hattrie_t* T)
{
hattrie_free_node(T->root);
free(T);
}
/* Perform one split operation on the given node with the given parent.
*/
static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node)
{
/* only buckets may be split */
assert(*node.flag & NODE_TYPE_PURE_BUCKET ||
*node.flag & NODE_TYPE_HYBRID_BUCKET);
assert(*parent.flag & NODE_TYPE_TRIE);
if (*node.flag & NODE_TYPE_PURE_BUCKET) {
/* turn the pure bucket into a hybrid bucket */
parent.t->xs[node.b->c0].t = alloc_trie_node(T, node);
/* if the bucket had an empty key, move it to the new trie node */
value_t* val = ahtable_tryget(node.b, NULL, 0);
if (val) {
parent.t->xs[node.b->c0].t->val = *val;
parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL;
*val = 0;
ahtable_del(node.b, NULL, 0);
}
node.b->c0 = 0x00;
node.b->c1 = NODE_MAXCHAR;
node.b->flag = NODE_TYPE_HYBRID_BUCKET;
return;
}
/* This is a hybrid bucket. Perform a proper split. */
/* count the number of occourances of every leading character */
unsigned int cs[NODE_CHILDS]; // occurance count for leading chars
memset(cs, 0, NODE_CHILDS * sizeof(unsigned int));
size_t len;
const char* key;
ahtable_iter_t* i = ahtable_iter_begin(node.b, false);
while (!ahtable_iter_finished(i)) {
key = ahtable_iter_key(i, &len);
assert(len > 0);
cs[(unsigned char) key[0]] += 1;
ahtable_iter_next(i);
}
ahtable_iter_free(i);
/* choose a split point */
unsigned int left_m, right_m, all_m;
unsigned char j = node.b->c0;
all_m = ahtable_size(node.b);
left_m = cs[j];
right_m = all_m - left_m;
int d;
while (j + 1 < node.b->c1) {
d = abs((int) (left_m + cs[j + 1]) - (int) (right_m - cs[j + 1]));
if (d <= abs((int) (left_m - right_m)) && left_m + cs[j + 1] < all_m) {
j += 1;
left_m += cs[j];
right_m -= cs[j];
}
else break;
}
/* now split into two node cooresponding to ranges [0, j] and
* [j + 1, NODE_MAXCHAR], respectively. */
/* create new left and right nodes */
/* TODO: Add a special case if either node is a hybrid bucket containing all
* the keys. In such a case, do not build a new table, just use the old one.
* */
size_t num_slots;
for (num_slots = ahtable_initial_size;
(double) left_m > ahtable_max_load_factor * (double) num_slots;
num_slots *= 2);
node_ptr left, right;
left.b = ahtable_create_n(num_slots);
left.b->c0 = node.b->c0;
left.b->c1 = j;
left.b->flag = left.b->c0 == left.b->c1 ?
NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET;
for (num_slots = ahtable_initial_size;
(double) right_m > ahtable_max_load_factor * (double) num_slots;
num_slots *= 2);
right.b = ahtable_create_n(num_slots);
right.b->c0 = j + 1;
right.b->c1 = node.b->c1;
right.b->flag = right.b->c0 == right.b->c1 ?
NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET;
/* update the parent's pointer */
unsigned int c;
for (c = node.b->c0; c <= j; ++c) parent.t->xs[c] = left;
for (; c <= node.b->c1; ++c) parent.t->xs[c] = right;
/* distribute keys to the new left or right node */
value_t* u;
value_t* v;
i = ahtable_iter_begin(node.b, false);
while (!ahtable_iter_finished(i)) {
key = ahtable_iter_key(i, &len);
u = ahtable_iter_val(i);
assert(len > 0);
/* left */
if ((unsigned char) key[0] <= j) {
if (*left.flag & NODE_TYPE_PURE_BUCKET) {
v = ahtable_get(left.b, key + 1, len - 1);
}
else {
v = ahtable_get(left.b, key, len);
}
*v = *u;
}
/* right */
else {
if (*right.flag & NODE_TYPE_PURE_BUCKET) {
v = ahtable_get(right.b, key + 1, len - 1);
}
else {
v = ahtable_get(right.b, key, len);
}
*v = *u;
}
ahtable_iter_next(i);
}
ahtable_iter_free(i);
ahtable_free(node.b);
}
value_t* hattrie_get(hattrie_t* T, const char* key, size_t len)
{
node_ptr parent = T->root;
assert(*parent.flag & NODE_TYPE_TRIE);
if (len == 0) return &parent.t->val;
/* consume all trie nodes, now parent must be trie and child anything */
node_ptr node = hattrie_consume(&parent, &key, &len, 0);
assert(*parent.flag & NODE_TYPE_TRIE);
/* if the key has been consumed on a trie node, use its value */
if (len == 0) {
if (*node.flag & NODE_TYPE_TRIE) {
return hattrie_useval(T, node);
}
else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
return hattrie_useval(T, parent);
}
}
/* preemptively split the bucket if it is full */
while (ahtable_size(node.b) >= MAX_BUCKET_SIZE) {
hattrie_split(T, parent, node);
/* after the split, the node pointer is invalidated, so we search from
* the parent again. */
node = hattrie_consume(&parent, &key, &len, 0);
/* if the key has been consumed on a trie node, use its value */
if (len == 0) {
if (*node.flag & NODE_TYPE_TRIE) {
return hattrie_useval(T, node);
}
else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
return hattrie_useval(T, parent);
}
}
}
assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET);
assert(len > 0);
size_t m_old = node.b->m;
value_t* val;
if (*node.flag & NODE_TYPE_PURE_BUCKET) {
val = ahtable_get(node.b, key + 1, len - 1);
}
else {
val = ahtable_get(node.b, key, len);
}
T->m += (node.b->m - m_old);
return val;
}
value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len)
{
/* find node for given key */
node_ptr node = hattrie_find(T, &key, &len);
if (node.flag == NULL) {
return NULL;
}
/* if the trie node consumes value, use it */
if (*node.flag & NODE_TYPE_TRIE) {
return &node.t->val;
}
return ahtable_tryget(node.b, key, len);
}
int hattrie_del(hattrie_t* T, const char* key, size_t len)
{
node_ptr parent = T->root;
assert(*parent.flag & NODE_TYPE_TRIE);
/* find node for deletion */
node_ptr node = hattrie_find(T, &key, &len);
if (node.flag == NULL) {
return -1;
}
/* if consumed on a trie node, clear the value */
if (*node.flag & NODE_TYPE_TRIE) {
return hattrie_clrval(T, node);
}
/* remove from bucket */
size_t m_old = ahtable_size(node.b);
int ret = ahtable_del(node.b, key, len);
T->m -= (m_old - ahtable_size(node.b));
/* merge empty buckets */
/*! \todo */
return ret;
}
/* plan for iteration:
* This is tricky, as we have no parent pointers currently, and I would like to
* avoid adding them. That means maintaining a stack
*
*/
typedef struct hattrie_node_stack_t_
{
unsigned char c;
size_t level;
node_ptr node;
struct hattrie_node_stack_t_* next;
} hattrie_node_stack_t;
struct hattrie_iter_t_
{
char* key;
size_t keysize; // space reserved for the key
size_t level;
/* keep track of keys stored in trie nodes */
bool has_nil_key;
value_t nil_val;
const hattrie_t* T;
bool sorted;
ahtable_iter_t* i;
hattrie_node_stack_t* stack;
};
static void hattrie_iter_pushchar(hattrie_iter_t* i, size_t level, char c)
{
if (i->keysize < level) {
i->keysize *= 2;
i->key = realloc_or_die(i->key, i->keysize * sizeof(char));
}
if (level > 0) {
i->key[level - 1] = c;
}
i->level = level;
}
static void hattrie_iter_nextnode(hattrie_iter_t* i)
{
if (i->stack == NULL) return;
/* pop the stack */
node_ptr node;
hattrie_node_stack_t* next;
unsigned char c;
size_t level;
node = i->stack->node;
next = i->stack->next;
c = i->stack->c;
level = i->stack->level;
free(i->stack);
i->stack = next;
if (*node.flag & NODE_TYPE_TRIE) {
hattrie_iter_pushchar(i, level, c);
if(node.t->flag & NODE_HAS_VAL) {
i->has_nil_key = true;
i->nil_val = node.t->val;
}
/* push all child nodes from right to left */
int j;
for (j = NODE_MAXCHAR; j >= 0; --j) {
/* skip repeated pointers to hybrid bucket */
if (j < NODE_MAXCHAR && node.t->xs[j].t == node.t->xs[j + 1].t) continue;
// push stack
next = i->stack;
i->stack = malloc_or_die(sizeof(hattrie_node_stack_t));
i->stack->node = node.t->xs[j];
i->stack->next = next;
i->stack->level = level + 1;
i->stack->c = (unsigned char) j;
}
}
else {
if (*node.flag & NODE_TYPE_PURE_BUCKET) {
hattrie_iter_pushchar(i, level, c);
}
else {
i->level = level - 1;
}
i->i = ahtable_iter_begin(node.b, i->sorted);
}
}
hattrie_iter_t* hattrie_iter_begin(const hattrie_t* T, bool sorted)
{
hattrie_iter_t* i = malloc_or_die(sizeof(hattrie_iter_t));
i->T = T;
i->sorted = sorted;
i->i = NULL;
i->keysize = 16;
i->key = malloc_or_die(i->keysize * sizeof(char));
i->level = 0;
i->has_nil_key = false;
i->nil_val = 0;
i->stack = malloc_or_die(sizeof(hattrie_node_stack_t));
i->stack->next = NULL;
i->stack->node = T->root;
i->stack->c = '\0';
i->stack->level = 0;
while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
i->stack != NULL ) {
ahtable_iter_free(i->i);
i->i = NULL;
hattrie_iter_nextnode(i);
}
if (i->i != NULL && ahtable_iter_finished(i->i)) {
ahtable_iter_free(i->i);
i->i = NULL;
}
return i;
}
void hattrie_iter_next(hattrie_iter_t* i)
{
if (hattrie_iter_finished(i)) return;
if (i->i != NULL && !ahtable_iter_finished(i->i)) {
ahtable_iter_next(i->i);
}
else if (i->has_nil_key) {
i->has_nil_key = false;
i->nil_val = 0;
hattrie_iter_nextnode(i);
}
while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
i->stack != NULL ) {
ahtable_iter_free(i->i);
i->i = NULL;
hattrie_iter_nextnode(i);
}
if (i->i != NULL && ahtable_iter_finished(i->i)) {
ahtable_iter_free(i->i);
i->i = NULL;
}
}
bool hattrie_iter_finished(hattrie_iter_t* i)
{
return i->stack == NULL && i->i == NULL && !i->has_nil_key;
}
void hattrie_iter_free(hattrie_iter_t* i)
{
if (i == NULL) return;
if (i->i) ahtable_iter_free(i->i);
hattrie_node_stack_t* next;
while (i->stack) {
next = i->stack->next;
free(i->stack);
i->stack = next;
}
free(i->key);
free(i);
}
const char* hattrie_iter_key(hattrie_iter_t* i, size_t* len)
{
if (hattrie_iter_finished(i)) return NULL;
size_t sublen;
const char* subkey;
if (i->has_nil_key) {
subkey = NULL;
sublen = 0;
}
else subkey = ahtable_iter_key(i->i, &sublen);
if (i->keysize < i->level + sublen + 1) {
while (i->keysize < i->level + sublen + 1) i->keysize *= 2;
i->key = realloc_or_die(i->key, i->keysize * sizeof(char));
}
memcpy(i->key + i->level, subkey, sublen);
i->key[i->level + sublen] = '\0';
*len = i->level + sublen;
return i->key;
}
value_t* hattrie_iter_val(hattrie_iter_t* i)
{
if (i->has_nil_key) return &i->nil_val;
if (hattrie_iter_finished(i)) return NULL;
return ahtable_iter_val(i->i);
}

View File

@ -0,0 +1,68 @@
/*
* This file is part of hat-trie
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
*
* This is an implementation of the HAT-trie data structure described in,
*
* Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data
* structure for strings. Proceedings of the thirtieth Australasian conference on
* Computer science-Volume 62 (pp. 97105). Australian Computer Society, Inc.
*
* The HAT-trie is in essence a hybrid data structure, combining tries and hash
* tables in a clever way to try to get the best of both worlds.
*
*/
#ifndef HATTRIE_HATTRIE_H
#define HATTRIE_HATTRIE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
#include <stdlib.h>
#include <stdbool.h>
typedef struct hattrie_t_ hattrie_t;
hattrie_t* hattrie_create (void); //< Create an empty hat-trie.
void hattrie_free (hattrie_t*); //< Free all memory used by a trie.
hattrie_t* hattrie_dup (const hattrie_t*); //< Duplicate an existing trie.
void hattrie_clear (hattrie_t*); //< Remove all entries.
/** Find the given key in the trie, inserting it if it does not exist, and
* returning a pointer to it's key.
*
* This pointer is not guaranteed to be valid after additional calls to
* hattrie_get, hattrie_del, hattrie_clear, or other functions that modifies the
* trie.
*/
value_t* hattrie_get (hattrie_t*, const char* key, size_t len);
/** Find a given key in the table, returning a NULL pointer if it does not
* exist. */
value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len);
/** Delete a given key from trie. Returns 0 if successful or -1 if not found.
*/
int hattrie_del(hattrie_t* T, const char* key, size_t len);
typedef struct hattrie_iter_t_ hattrie_iter_t;
hattrie_iter_t* hattrie_iter_begin (const hattrie_t*, bool sorted);
void hattrie_iter_next (hattrie_iter_t*);
bool hattrie_iter_finished (hattrie_iter_t*);
void hattrie_iter_free (hattrie_iter_t*);
const char* hattrie_iter_key (hattrie_iter_t*, size_t* len);
value_t* hattrie_iter_val (hattrie_iter_t*);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,46 @@
/*
* This file is part of hat-trie.
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
*/
#include "misc.h"
#include <stdlib.h>
void* malloc_or_die(size_t n)
{
void* p = malloc(n);
if (p == NULL && n != 0) {
fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
exit(EXIT_FAILURE);
}
return p;
}
void* realloc_or_die(void* ptr, size_t n)
{
void* p = realloc(ptr, n);
if (p == NULL && n != 0) {
fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
exit(EXIT_FAILURE);
}
return p;
}
FILE* fopen_or_die(const char* path, const char* mode)
{
FILE* f = fopen(path, mode);
if (f == NULL) {
fprintf(stderr, "Cannot open file %s with mode %s.\n", path, mode);
exit(EXIT_FAILURE);
}
return f;
}

View File

@ -0,0 +1,22 @@
/*
* This file is part of hat-trie.
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
* misc :
* miscelaneous functions.
*
*/
#ifndef LINESET_MISC_H
#define LINESET_MISC_H
#include <stdio.h>
void* malloc_or_die(size_t);
void* realloc_or_die(void*, size_t);
FILE* fopen_or_die(const char*, const char*);
#endif

View File

@ -0,0 +1,77 @@
/* This is MurmurHash3. The original C++ code was placed in the public domain
* by its author, Austin Appleby. */
#include "murmurhash3.h"
static inline uint32_t fmix(uint32_t h)
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
static inline uint32_t rotl32(uint32_t x, int8_t r)
{
return (x << r) | (x >> (32 - r));
}
uint32_t hash(const char* data, size_t len_)
{
const int len = (int) len_;
const int nblocks = len / 4;
uint32_t h1 = 0xc062fb4a;
uint32_t c1 = 0xcc9e2d51;
uint32_t c2 = 0x1b873593;
//----------
// body
const uint32_t * blocks = (const uint32_t*) (data + nblocks * 4);
int i;
for(i = -nblocks; i; i++)
{
uint32_t k1 = blocks[i];
k1 *= c1;
k1 = rotl32(k1, 15);
k1 *= c2;
h1 ^= k1;
h1 = rotl32(h1, 13);
h1 = h1*5+0xe6546b64;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
uint32_t k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
}
//----------
// finalization
h1 ^= len;
h1 = fmix(h1);
return h1;
}

View File

@ -0,0 +1,12 @@
#ifndef MURMURHASH3_H
#define MURMURHASH3_H
#include <stdlib.h>
#include "pstdint.h"
uint32_t hash(const char* data, size_t len);
#endif

View File

@ -0,0 +1,813 @@
/* A portable stdint.h
****************************************************************************
* BSD License:
****************************************************************************
*
* Copyright (c) 2005-2014 Paul Hsieh
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************
*
* Version 0.1.14
*
* The ANSI C standard committee, for the C99 standard, specified the
* inclusion of a new standard include file called stdint.h. This is
* a very useful and long desired include file which contains several
* very precise definitions for integer scalar types that is
* critically important for making portable several classes of
* applications including cryptography, hashing, variable length
* integer libraries and so on. But for most developers its likely
* useful just for programming sanity.
*
* The problem is that most compiler vendors have decided not to
* implement the C99 standard, and the next C++ language standard
* (which has a lot more mindshare these days) will be a long time in
* coming and its unknown whether or not it will include stdint.h or
* how much adoption it will have. Either way, it will be a long time
* before all compilers come with a stdint.h and it also does nothing
* for the extremely large number of compilers available today which
* do not include this file, or anything comparable to it.
*
* So that's what this file is all about. Its an attempt to build a
* single universal include file that works on as many platforms as
* possible to deliver what stdint.h is supposed to. A few things
* that should be noted about this file:
*
* 1) It is not guaranteed to be portable and/or present an identical
* interface on all platforms. The extreme variability of the
* ANSI C standard makes this an impossibility right from the
* very get go. Its really only meant to be useful for the vast
* majority of platforms that possess the capability of
* implementing usefully and precisely defined, standard sized
* integer scalars. Systems which are not intrinsically 2s
* complement may produce invalid constants.
*
* 2) There is an unavoidable use of non-reserved symbols.
*
* 3) Other standard include files are invoked.
*
* 4) This file may come in conflict with future platforms that do
* include stdint.h. The hope is that one or the other can be
* used with no real difference.
*
* 5) In the current verison, if your platform can't represent
* int32_t, int16_t and int8_t, it just dumps out with a compiler
* error.
*
* 6) 64 bit integers may or may not be defined. Test for their
* presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
* Note that this is different from the C99 specification which
* requires the existence of 64 bit support in the compiler. If
* this is not defined for your platform, yet it is capable of
* dealing with 64 bits then it is because this file has not yet
* been extended to cover all of your system's capabilities.
*
* 7) (u)intptr_t may or may not be defined. Test for its presence
* with the test: #ifdef PTRDIFF_MAX. If this is not defined
* for your platform, then it is because this file has not yet
* been extended to cover all of your system's capabilities, not
* because its optional.
*
* 8) The following might not been defined even if your platform is
* capable of defining it:
*
* WCHAR_MIN
* WCHAR_MAX
* (u)int64_t
* PTRDIFF_MIN
* PTRDIFF_MAX
* (u)intptr_t
*
* 9) The following have not been defined:
*
* WINT_MIN
* WINT_MAX
*
* 10) The criteria for defining (u)int_least(*)_t isn't clear,
* except for systems which don't have a type that precisely
* defined 8, 16, or 32 bit types (which this include file does
* not support anyways). Default definitions have been given.
*
* 11) The criteria for defining (u)int_fast(*)_t isn't something I
* would trust to any particular compiler vendor or the ANSI C
* committee. It is well known that "compatible systems" are
* commonly created that have very different performance
* characteristics from the systems they are compatible with,
* especially those whose vendors make both the compiler and the
* system. Default definitions have been given, but its strongly
* recommended that users never use these definitions for any
* reason (they do *NOT* deliver any serious guarantee of
* improved performance -- not in this file, nor any vendor's
* stdint.h).
*
* 12) The following macros:
*
* PRINTF_INTMAX_MODIFIER
* PRINTF_INT64_MODIFIER
* PRINTF_INT32_MODIFIER
* PRINTF_INT16_MODIFIER
* PRINTF_LEAST64_MODIFIER
* PRINTF_LEAST32_MODIFIER
* PRINTF_LEAST16_MODIFIER
* PRINTF_INTPTR_MODIFIER
*
* are strings which have been defined as the modifiers required
* for the "d", "u" and "x" printf formats to correctly output
* (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
* (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
* PRINTF_INTPTR_MODIFIER is not defined for some systems which
* provide their own stdint.h. PRINTF_INT64_MODIFIER is not
* defined if INT64_MAX is not defined. These are an extension
* beyond what C99 specifies must be in stdint.h.
*
* In addition, the following macros are defined:
*
* PRINTF_INTMAX_HEX_WIDTH
* PRINTF_INT64_HEX_WIDTH
* PRINTF_INT32_HEX_WIDTH
* PRINTF_INT16_HEX_WIDTH
* PRINTF_INT8_HEX_WIDTH
* PRINTF_INTMAX_DEC_WIDTH
* PRINTF_INT64_DEC_WIDTH
* PRINTF_INT32_DEC_WIDTH
* PRINTF_INT16_DEC_WIDTH
* PRINTF_INT8_DEC_WIDTH
*
* Which specifies the maximum number of characters required to
* print the number of that type in either hexadecimal or decimal.
* These are an extension beyond what C99 specifies must be in
* stdint.h.
*
* Compilers tested (all with 0 warnings at their highest respective
* settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
* bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
* .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
*
* This file should be considered a work in progress. Suggestions for
* improvements, especially those which increase coverage are strongly
* encouraged.
*
* Acknowledgements
*
* The following people have made significant contributions to the
* development and testing of this file:
*
* Chris Howie
* John Steele Scott
* Dave Thorup
* John Dill
* Florian Wobbe
* Christopher Sean Morrison
*
*/
#include <stddef.h>
#include <limits.h>
#include <signal.h>
/*
* For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
* do nothing else. On the Mac OS X version of gcc this is _STDINT_H_.
*/
#if ((defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (__GNUC__ > 3 || defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED)
#include <stdint.h>
#define _PSTDINT_H_INCLUDED
# if defined(__GNUC__) && (defined(__x86_64__) || defined(__ppc64__))
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "l"
# endif
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
# else
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER "l"
# endif
# endif
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER "h"
# endif
# ifndef PRINTF_INTMAX_MODIFIER
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
# endif
# ifndef PRINTF_INT64_HEX_WIDTH
# define PRINTF_INT64_HEX_WIDTH "16"
# endif
# ifndef PRINTF_INT32_HEX_WIDTH
# define PRINTF_INT32_HEX_WIDTH "8"
# endif
# ifndef PRINTF_INT16_HEX_WIDTH
# define PRINTF_INT16_HEX_WIDTH "4"
# endif
# ifndef PRINTF_INT8_HEX_WIDTH
# define PRINTF_INT8_HEX_WIDTH "2"
# endif
# ifndef PRINTF_INT64_DEC_WIDTH
# define PRINTF_INT64_DEC_WIDTH "20"
# endif
# ifndef PRINTF_INT32_DEC_WIDTH
# define PRINTF_INT32_DEC_WIDTH "10"
# endif
# ifndef PRINTF_INT16_DEC_WIDTH
# define PRINTF_INT16_DEC_WIDTH "5"
# endif
# ifndef PRINTF_INT8_DEC_WIDTH
# define PRINTF_INT8_DEC_WIDTH "3"
# endif
# ifndef PRINTF_INTMAX_HEX_WIDTH
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
# endif
# ifndef PRINTF_INTMAX_DEC_WIDTH
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
# endif
/*
* Something really weird is going on with Open Watcom. Just pull some of
* these duplicated definitions from Open Watcom's stdint.h file for now.
*/
# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
# if !defined (INT64_C)
# define INT64_C(x) (x + (INT64_MAX - INT64_MAX))
# endif
# if !defined (UINT64_C)
# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
# endif
# if !defined (INT32_C)
# define INT32_C(x) (x + (INT32_MAX - INT32_MAX))
# endif
# if !defined (UINT32_C)
# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX))
# endif
# if !defined (INT16_C)
# define INT16_C(x) (x)
# endif
# if !defined (UINT16_C)
# define UINT16_C(x) (x)
# endif
# if !defined (INT8_C)
# define INT8_C(x) (x)
# endif
# if !defined (UINT8_C)
# define UINT8_C(x) (x)
# endif
# if !defined (UINT64_MAX)
# define UINT64_MAX 18446744073709551615ULL
# endif
# if !defined (INT64_MAX)
# define INT64_MAX 9223372036854775807LL
# endif
# if !defined (UINT32_MAX)
# define UINT32_MAX 4294967295UL
# endif
# if !defined (INT32_MAX)
# define INT32_MAX 2147483647L
# endif
# if !defined (INTMAX_MAX)
# define INTMAX_MAX INT64_MAX
# endif
# if !defined (INTMAX_MIN)
# define INTMAX_MIN INT64_MIN
# endif
# endif
#endif
#ifndef _PSTDINT_H_INCLUDED
#define _PSTDINT_H_INCLUDED
#ifndef SIZE_MAX
# define SIZE_MAX (~(size_t)0)
#endif
/*
* Deduce the type assignments from limits.h under the assumption that
* integer sizes in bits are powers of 2, and follow the ANSI
* definitions.
*/
#ifndef UINT8_MAX
# define UINT8_MAX 0xff
#endif
#if !defined(uint8_t) && !defined(_UINT8_T)
# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
typedef unsigned char uint8_t;
# define UINT8_C(v) ((uint8_t) v)
# else
# error "Platform not supported"
# endif
#endif
#ifndef INT8_MAX
# define INT8_MAX 0x7f
#endif
#ifndef INT8_MIN
# define INT8_MIN INT8_C(0x80)
#endif
#if !defined(int8_t) && !defined(_INT8_T)
# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
typedef signed char int8_t;
# define INT8_C(v) ((int8_t) v)
# else
# error "Platform not supported"
# endif
#endif
#ifndef UINT16_MAX
# define UINT16_MAX 0xffff
#endif
#if !defined(uint16_t) && !defined(_UINT16_T)
#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
typedef unsigned int uint16_t;
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER ""
# endif
# define UINT16_C(v) ((uint16_t) (v))
#elif (USHRT_MAX == UINT16_MAX)
typedef unsigned short uint16_t;
# define UINT16_C(v) ((uint16_t) (v))
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER "h"
# endif
#else
#error "Platform not supported"
#endif
#endif
#ifndef INT16_MAX
# define INT16_MAX 0x7fff
#endif
#ifndef INT16_MIN
# define INT16_MIN INT16_C(0x8000)
#endif
#if !defined(int16_t) && !defined(_INT16_T)
#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
typedef signed int int16_t;
# define INT16_C(v) ((int16_t) (v))
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER ""
# endif
#elif (SHRT_MAX == INT16_MAX)
typedef signed short int16_t;
# define INT16_C(v) ((int16_t) (v))
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER "h"
# endif
#else
#error "Platform not supported"
#endif
#endif
#ifndef UINT32_MAX
# define UINT32_MAX (0xffffffffUL)
#endif
#if !defined(uint32_t) && !defined(_UINT32_T)
#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
typedef unsigned long uint32_t;
# define UINT32_C(v) v ## UL
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER "l"
# endif
#elif (UINT_MAX == UINT32_MAX)
typedef unsigned int uint32_t;
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
# define UINT32_C(v) v ## U
#elif (USHRT_MAX == UINT32_MAX)
typedef unsigned short uint32_t;
# define UINT32_C(v) ((unsigned short) (v))
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
#else
#error "Platform not supported"
#endif
#endif
#ifndef INT32_MAX
# define INT32_MAX (0x7fffffffL)
#endif
#ifndef INT32_MIN
# define INT32_MIN INT32_C(0x80000000)
#endif
#if !defined(int32_t) && !defined(_INT32_T)
#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
typedef signed long int32_t;
# define INT32_C(v) v ## L
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER "l"
# endif
#elif (INT_MAX == INT32_MAX)
typedef signed int int32_t;
# define INT32_C(v) v
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
#elif (SHRT_MAX == INT32_MAX)
typedef signed short int32_t;
# define INT32_C(v) ((short) (v))
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
#else
#error "Platform not supported"
#endif
#endif
/*
* The macro stdint_int64_defined is temporarily used to record
* whether or not 64 integer support is available. It must be
* defined for any 64 integer extensions for new platforms that are
* added.
*/
#undef stdint_int64_defined
#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S)
# define stdint_int64_defined
typedef long long int64_t;
typedef unsigned long long uint64_t;
# define UINT64_C(v) v ## ULL
# define INT64_C(v) v ## LL
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# endif
#endif
#if !defined (stdint_int64_defined)
# if defined(__GNUC__)
# define stdint_int64_defined
__extension__ typedef long long int64_t;
__extension__ typedef unsigned long long uint64_t;
# define UINT64_C(v) v ## ULL
# define INT64_C(v) v ## LL
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
# define stdint_int64_defined
typedef long long int64_t;
typedef unsigned long long uint64_t;
# define UINT64_C(v) v ## ULL
# define INT64_C(v) v ## LL
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
# define stdint_int64_defined
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
# define UINT64_C(v) v ## UI64
# define INT64_C(v) v ## I64
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "I64"
# endif
# endif
#endif
#if !defined (LONG_LONG_MAX) && defined (INT64_C)
# define LONG_LONG_MAX INT64_C (9223372036854775807)
#endif
#ifndef ULONG_LONG_MAX
# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
#endif
#if !defined (INT64_MAX) && defined (INT64_C)
# define INT64_MAX INT64_C (9223372036854775807)
#endif
#if !defined (INT64_MIN) && defined (INT64_C)
# define INT64_MIN INT64_C (-9223372036854775808)
#endif
#if !defined (UINT64_MAX) && defined (INT64_C)
# define UINT64_MAX UINT64_C (18446744073709551615)
#endif
/*
* Width of hexadecimal for number field.
*/
#ifndef PRINTF_INT64_HEX_WIDTH
# define PRINTF_INT64_HEX_WIDTH "16"
#endif
#ifndef PRINTF_INT32_HEX_WIDTH
# define PRINTF_INT32_HEX_WIDTH "8"
#endif
#ifndef PRINTF_INT16_HEX_WIDTH
# define PRINTF_INT16_HEX_WIDTH "4"
#endif
#ifndef PRINTF_INT8_HEX_WIDTH
# define PRINTF_INT8_HEX_WIDTH "2"
#endif
#ifndef PRINTF_INT64_DEC_WIDTH
# define PRINTF_INT64_DEC_WIDTH "20"
#endif
#ifndef PRINTF_INT32_DEC_WIDTH
# define PRINTF_INT32_DEC_WIDTH "10"
#endif
#ifndef PRINTF_INT16_DEC_WIDTH
# define PRINTF_INT16_DEC_WIDTH "5"
#endif
#ifndef PRINTF_INT8_DEC_WIDTH
# define PRINTF_INT8_DEC_WIDTH "3"
#endif
/*
* Ok, lets not worry about 128 bit integers for now. Moore's law says
* we don't need to worry about that until about 2040 at which point
* we'll have bigger things to worry about.
*/
#ifdef stdint_int64_defined
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
# define INTMAX_MAX INT64_MAX
# define INTMAX_MIN INT64_MIN
# define UINTMAX_MAX UINT64_MAX
# define UINTMAX_C(v) UINT64_C(v)
# define INTMAX_C(v) INT64_C(v)
# ifndef PRINTF_INTMAX_MODIFIER
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
# endif
# ifndef PRINTF_INTMAX_HEX_WIDTH
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
# endif
# ifndef PRINTF_INTMAX_DEC_WIDTH
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
# endif
#else
typedef int32_t intmax_t;
typedef uint32_t uintmax_t;
# define INTMAX_MAX INT32_MAX
# define UINTMAX_MAX UINT32_MAX
# define UINTMAX_C(v) UINT32_C(v)
# define INTMAX_C(v) INT32_C(v)
# ifndef PRINTF_INTMAX_MODIFIER
# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
# endif
# ifndef PRINTF_INTMAX_HEX_WIDTH
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
# endif
# ifndef PRINTF_INTMAX_DEC_WIDTH
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
# endif
#endif
/*
* Because this file currently only supports platforms which have
* precise powers of 2 as bit sizes for the default integers, the
* least definitions are all trivial. Its possible that a future
* version of this file could have different definitions.
*/
#ifndef stdint_least_defined
typedef int8_t int_least8_t;
typedef uint8_t uint_least8_t;
typedef int16_t int_least16_t;
typedef uint16_t uint_least16_t;
typedef int32_t int_least32_t;
typedef uint32_t uint_least32_t;
# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
# define UINT_LEAST8_MAX UINT8_MAX
# define INT_LEAST8_MAX INT8_MAX
# define UINT_LEAST16_MAX UINT16_MAX
# define INT_LEAST16_MAX INT16_MAX
# define UINT_LEAST32_MAX UINT32_MAX
# define INT_LEAST32_MAX INT32_MAX
# define INT_LEAST8_MIN INT8_MIN
# define INT_LEAST16_MIN INT16_MIN
# define INT_LEAST32_MIN INT32_MIN
# ifdef stdint_int64_defined
typedef int64_t int_least64_t;
typedef uint64_t uint_least64_t;
# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
# define UINT_LEAST64_MAX UINT64_MAX
# define INT_LEAST64_MAX INT64_MAX
# define INT_LEAST64_MIN INT64_MIN
# endif
#endif
#undef stdint_least_defined
/*
* The ANSI C committee pretending to know or specify anything about
* performance is the epitome of misguided arrogance. The mandate of
* this file is to *ONLY* ever support that absolute minimum
* definition of the fast integer types, for compatibility purposes.
* No extensions, and no attempt to suggest what may or may not be a
* faster integer type will ever be made in this file. Developers are
* warned to stay away from these types when using this or any other
* stdint.h.
*/
typedef int_least8_t int_fast8_t;
typedef uint_least8_t uint_fast8_t;
typedef int_least16_t int_fast16_t;
typedef uint_least16_t uint_fast16_t;
typedef int_least32_t int_fast32_t;
typedef uint_least32_t uint_fast32_t;
#define UINT_FAST8_MAX UINT_LEAST8_MAX
#define INT_FAST8_MAX INT_LEAST8_MAX
#define UINT_FAST16_MAX UINT_LEAST16_MAX
#define INT_FAST16_MAX INT_LEAST16_MAX
#define UINT_FAST32_MAX UINT_LEAST32_MAX
#define INT_FAST32_MAX INT_LEAST32_MAX
#define INT_FAST8_MIN INT_LEAST8_MIN
#define INT_FAST16_MIN INT_LEAST16_MIN
#define INT_FAST32_MIN INT_LEAST32_MIN
#ifdef stdint_int64_defined
typedef int_least64_t int_fast64_t;
typedef uint_least64_t uint_fast64_t;
# define UINT_FAST64_MAX UINT_LEAST64_MAX
# define INT_FAST64_MAX INT_LEAST64_MAX
# define INT_FAST64_MIN INT_LEAST64_MIN
#endif
#undef stdint_int64_defined
/*
* Whatever piecemeal, per compiler thing we can do about the wchar_t
* type limits.
*/
#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
# include <wchar.h>
# ifndef WCHAR_MIN
# define WCHAR_MIN 0
# endif
# ifndef WCHAR_MAX
# define WCHAR_MAX ((wchar_t)-1)
# endif
#endif
/*
* Whatever piecemeal, per compiler/platform thing we can do about the
* (u)intptr_t types and limits.
*/
#if (defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)) || defined (_UINTPTR_T)
# define STDINT_H_UINTPTR_T_DEFINED
#endif
#ifndef STDINT_H_UINTPTR_T_DEFINED
# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) || defined (__ppc64__)
# define stdint_intptr_bits 64
# elif defined (__WATCOMC__) || defined (__TURBOC__)
# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
# define stdint_intptr_bits 16
# else
# define stdint_intptr_bits 32
# endif
# elif defined (__i386__) || defined (_WIN32) || defined (WIN32) || defined (__ppc64__)
# define stdint_intptr_bits 32
# elif defined (__INTEL_COMPILER)
/* TODO -- what did Intel do about x86-64? */
# else
/* #error "This platform might not be supported yet" */
# endif
# ifdef stdint_intptr_bits
# define stdint_intptr_glue3_i(a,b,c) a##b##c
# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c)
# ifndef PRINTF_INTPTR_MODIFIER
# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
# endif
# ifndef PTRDIFF_MAX
# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
# endif
# ifndef PTRDIFF_MIN
# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
# endif
# ifndef UINTPTR_MAX
# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
# endif
# ifndef INTPTR_MAX
# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
# endif
# ifndef INTPTR_MIN
# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
# endif
# ifndef INTPTR_C
# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
# endif
# ifndef UINTPTR_C
# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
# endif
typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t;
# else
/* TODO -- This following is likely wrong for some platforms, and does
nothing for the definition of uintptr_t. */
typedef ptrdiff_t intptr_t;
# endif
# define STDINT_H_UINTPTR_T_DEFINED
#endif
/*
* Assumes sig_atomic_t is signed and we have a 2s complement machine.
*/
#ifndef SIG_ATOMIC_MAX
# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
#endif
#endif
#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
/*
* Please compile with the maximum warning settings to make sure macros are not
* defined more than once.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define glue3_aux(x,y,z) x ## y ## z
#define glue3(x,y,z) glue3_aux(x,y,z)
#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,) = glue3(UINT,bits,_C) (0);
#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,) = glue3(INT,bits,_C) (0);
#define DECL(us,bits) glue3(DECL,us,) (bits)
#define TESTUMAX(bits) glue3(u,bits,) = ~glue3(u,bits,); if (glue3(UINT,bits,_MAX) != glue3(u,bits,)) printf ("Something wrong with UINT%d_MAX\n", bits)
int main () {
DECL(I,8)
DECL(U,8)
DECL(I,16)
DECL(U,16)
DECL(I,32)
DECL(U,32)
#ifdef INT64_MAX
DECL(I,64)
DECL(U,64)
#endif
intmax_t imax = INTMAX_C(0);
uintmax_t umax = UINTMAX_C(0);
char str0[256], str1[256];
sprintf (str0, "%d %x\n", 0, ~0);
sprintf (str1, "%d %x\n", i8, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
sprintf (str1, "%u %x\n", u8, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
sprintf (str1, "%d %x\n", i16, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
sprintf (str1, "%u %x\n", u16, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);
sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
#ifdef INT64_MAX
sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
#endif
sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0);
if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);
TESTUMAX(8);
TESTUMAX(16);
TESTUMAX(32);
#ifdef INT64_MAX
TESTUMAX(64);
#endif
return EXIT_SUCCESS;
}
#endif

View File

@ -0,0 +1,15 @@
TESTS = check_ahtable check_hattrie
check_PROGRAMS = check_ahtable check_hattrie bench_sorted_iter
check_ahtable_SOURCES = check_ahtable.c str_map.c
check_ahtable_LDADD = $(top_builddir)/src/libhat-trie.la
check_ahtable_CPPFLAGS = -I$(top_builddir)/src
check_hattrie_SOURCES = check_hattrie.c str_map.c
check_hattrie_LDADD = $(top_builddir)/src/libhat-trie.la
check_hattrie_CPPFLAGS = -I$(top_builddir)/src
bench_sorted_iter_SOURCES = bench_sorted_iter.c
bench_sorted_iter_LDADD = $(top_builddir)/src/libhat-trie.la
bench_sorted_iter_CPPFLAGS = -I$(top_builddir)/src

View File

@ -0,0 +1,69 @@
/* A quick test of the degree to which ordered iteration is slower than unordered. */
#include "../src/hat-trie.h"
#include <stdio.h>
#include <time.h>
/* Simple random string generation. */
void randstr(char* x, size_t len)
{
x[len] = '\0';
while (len > 0) {
x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1));
}
}
int main()
{
hattrie_t* T = hattrie_create();
const size_t n = 1000000; // how many strings
const size_t m_low = 50; // minimum length of each string
const size_t m_high = 500; // maximum length of each string
char x[501];
size_t i, m;
for (i = 0; i < n; ++i) {
m = m_low + rand() % (m_high - m_low);
randstr(x, m);
*hattrie_get(T, x, m) = 1;
}
hattrie_iter_t* it;
clock_t t0, t;
const size_t repetitions = 100;
size_t r;
/* iterate in unsorted order */
fprintf(stderr, "iterating out of order ... ");
t0 = clock();
for (r = 0; r < repetitions; ++r) {
it = hattrie_iter_begin(T, false);
while (!hattrie_iter_finished(it)) {
hattrie_iter_next(it);
}
hattrie_iter_free(it);
}
t = clock();
fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);
/* iterate in sorted order */
fprintf(stderr, "iterating in order ... ");
t0 = clock();
for (r = 0; r < repetitions; ++r) {
it = hattrie_iter_begin(T, true);
while (!hattrie_iter_finished(it)) {
hattrie_iter_next(it);
}
hattrie_iter_free(it);
}
t = clock();
fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);
hattrie_free(T);
return 0;
}

View File

@ -0,0 +1,220 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "str_map.h"
#include "../src/ahtable.h"
/* Simple random string generation. */
void randstr(char* x, size_t len)
{
x[len] = '\0';
while (len > 0) {
x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1));
}
}
const size_t n = 100000; // how many unique strings
const size_t m_low = 50; // minimum length of each string
const size_t m_high = 500; // maximum length of each string
const size_t k = 200000; // number of insertions
char** xs;
ahtable_t* T;
str_map* M;
void setup()
{
fprintf(stderr, "generating %zu keys ... ", n);
xs = malloc(n * sizeof(char*));
size_t i;
size_t m;
for (i = 0; i < n; ++i) {
m = m_low + rand() % (m_high - m_low);
xs[i] = malloc(m + 1);
randstr(xs[i], m);
}
T = ahtable_create();
M = str_map_create();
fprintf(stderr, "done.\n");
}
void teardown()
{
ahtable_free(T);
str_map_destroy(M);
size_t i;
for (i = 0; i < n; ++i) {
free(xs[i]);
}
free(xs);
}
void test_ahtable_insert()
{
fprintf(stderr, "inserting %zu keys ... \n", k);
size_t i, j;
value_t* u;
value_t v;
for (j = 0; j < k; ++j) {
i = rand() % n;
v = 1 + str_map_get(M, xs[i], strlen(xs[i]));
str_map_set(M, xs[i], strlen(xs[i]), v);
u = ahtable_get(T, xs[i], strlen(xs[i]));
*u += 1;
if (*u != v) {
fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n",
*u, v);
}
}
/* delete some keys */
for (j = 0; i < k/100; ++j) {
i = rand() % n;
ahtable_del(T, xs[i], strlen(xs[i]));
str_map_del(M, xs[i], strlen(xs[i]));
u = ahtable_tryget(T, xs[i], strlen(xs[i]));
if (u) {
fprintf(stderr, "[error] deleted node found in ahtable\n");
}
}
fprintf(stderr, "done.\n");
}
void test_ahtable_iteration()
{
fprintf(stderr, "iterating through %zu keys ... \n", k);
ahtable_iter_t* i = ahtable_iter_begin(T, false);
size_t count = 0;
value_t* u;
value_t v;
size_t len;
const char* key;
while (!ahtable_iter_finished(i)) {
++count;
key = ahtable_iter_key(i, &len);
u = ahtable_iter_val(i);
v = str_map_get(M, key, len);
if (*u != v) {
if (v == 0) {
fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
}
else {
fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
}
}
// this way we will see an error if the same key is iterated through
// twice
str_map_set(M, key, len, 0);
ahtable_iter_next(i);
}
if (count != M->m) {
fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
count, M->m);
}
ahtable_iter_free(i);
fprintf(stderr, "done.\n");
}
int cmpkey(const char* a, size_t ka, const char* b, size_t kb)
{
int c = memcmp(a, b, ka < kb ? ka : kb);
return c == 0 ? (int) ka - (int) kb : c;
}
void test_ahtable_sorted_iteration()
{
fprintf(stderr, "iterating in order through %zu keys ... \n", k);
ahtable_iter_t* i = ahtable_iter_begin(T, true);
size_t count = 0;
value_t* u;
value_t v;
char* prev_key = malloc(m_high + 1);
size_t prev_len = 0;
const char *key = NULL;
size_t len = 0;
while (!ahtable_iter_finished(i)) {
memcpy(prev_key, key, len);
prev_len = len;
++count;
key = ahtable_iter_key(i, &len);
if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) {
fprintf(stderr, "[error] iteration is not correctly ordered.\n");
}
u = ahtable_iter_val(i);
v = str_map_get(M, key, len);
if (*u != v) {
if (v == 0) {
fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
}
else {
fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
}
}
// this way we will see an error if the same key is iterated through
// twice
str_map_set(M, key, len, 0);
ahtable_iter_next(i);
}
ahtable_iter_free(i);
free(prev_key);
fprintf(stderr, "done.\n");
}
int main()
{
setup();
test_ahtable_insert();
test_ahtable_iteration();
teardown();
setup();
test_ahtable_insert();
test_ahtable_sorted_iteration();
teardown();
return 0;
}

View File

@ -0,0 +1,268 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "str_map.h"
#include "../src/hat-trie.h"
/* Simple random string generation. */
void randstr(char* x, size_t len)
{
x[len] = '\0';
while (len > 0) {
x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1));
}
}
const size_t n = 100000; // how many unique strings
const size_t m_low = 50; // minimum length of each string
const size_t m_high = 500; // maximum length of each string
const size_t k = 200000; // number of insertions
const size_t d = 50000;
char** xs;
char** ds;
hattrie_t* T;
str_map* M;
void setup()
{
fprintf(stderr, "generating %zu keys ... ", n);
xs = malloc(n * sizeof(char*));
ds = malloc(d * sizeof(char*));
size_t i;
size_t m;
for (i = 0; i < n; ++i) {
m = m_low + rand() % (m_high - m_low);
xs[i] = malloc(m + 1);
randstr(xs[i], m);
}
for (i = 0; i < d; ++i) {
m = rand()%n;
ds[i] = xs[m];
}
T = hattrie_create();
M = str_map_create();
fprintf(stderr, "done.\n");
}
void teardown()
{
hattrie_free(T);
str_map_destroy(M);
size_t i;
for (i = 0; i < n; ++i) {
free(xs[i]);
}
free(xs);
free(ds);
}
void test_hattrie_insert()
{
fprintf(stderr, "inserting %zu keys ... \n", k);
size_t i, j;
value_t* u;
value_t v;
for (j = 0; j < k; ++j) {
i = rand() % n;
v = 1 + str_map_get(M, xs[i], strlen(xs[i]));
str_map_set(M, xs[i], strlen(xs[i]), v);
u = hattrie_get(T, xs[i], strlen(xs[i]));
*u += 1;
if (*u != v) {
fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n",
*u, v);
}
}
fprintf(stderr, "deleting %zu keys ... \n", d);
for (j = 0; j < d; ++j) {
str_map_del(M, ds[j], strlen(ds[j]));
hattrie_del(T, ds[j], strlen(ds[j]));
u = hattrie_tryget(T, ds[j], strlen(ds[j]));
if (u) {
fprintf(stderr, "[error] item %zu still found in trie after delete\n",
j);
}
}
fprintf(stderr, "done.\n");
}
void test_hattrie_iteration()
{
fprintf(stderr, "iterating through %zu keys ... \n", k);
hattrie_iter_t* i = hattrie_iter_begin(T, false);
size_t count = 0;
value_t* u;
value_t v;
size_t len;
const char* key;
while (!hattrie_iter_finished(i)) {
++count;
key = hattrie_iter_key(i, &len);
u = hattrie_iter_val(i);
v = str_map_get(M, key, len);
if (*u != v) {
if (v == 0) {
fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
}
else {
fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
}
}
// this way we will see an error if the same key is iterated through
// twice
str_map_set(M, key, len, 0);
hattrie_iter_next(i);
}
if (count != M->m) {
fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
count, M->m);
}
hattrie_iter_free(i);
fprintf(stderr, "done.\n");
}
int cmpkey(const char* a, size_t ka, const char* b, size_t kb)
{
int c = memcmp(a, b, ka < kb ? ka : kb);
return c == 0 ? (int) ka - (int) kb : c;
}
void test_hattrie_sorted_iteration()
{
fprintf(stderr, "iterating in order through %zu keys ... \n", k);
hattrie_iter_t* i = hattrie_iter_begin(T, true);
size_t count = 0;
value_t* u;
value_t v;
char* key_copy = malloc(m_high + 1);
char* prev_key = malloc(m_high + 1);
memset(prev_key, 0, m_high + 1);
size_t prev_len = 0;
const char *key = NULL;
size_t len = 0;
while (!hattrie_iter_finished(i)) {
memcpy(prev_key, key_copy, len);
prev_key[len] = '\0';
prev_len = len;
++count;
key = hattrie_iter_key(i, &len);
/* memory for key may be changed on iter, copy it */
strncpy(key_copy, key, len);
if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) {
fprintf(stderr, "[error] iteration is not correctly ordered.\n");
}
u = hattrie_iter_val(i);
v = str_map_get(M, key, len);
if (*u != v) {
if (v == 0) {
fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
}
else {
fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
}
}
// this way we will see an error if the same key is iterated through
// twice
str_map_set(M, key, len, 0);
hattrie_iter_next(i);
}
if (count != M->m) {
fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
count, M->m);
}
hattrie_iter_free(i);
free(prev_key);
free(key_copy);
fprintf(stderr, "done.\n");
}
void test_trie_non_ascii()
{
fprintf(stderr, "checking non-ascii... \n");
value_t* u;
hattrie_t* T = hattrie_create();
char* txt = "\x81\x70";
u = hattrie_get(T, txt, strlen(txt));
*u = 10;
u = hattrie_tryget(T, txt, strlen(txt));
if (*u != 10){
fprintf(stderr, "can't store non-ascii strings\n");
}
hattrie_free(T);
fprintf(stderr, "done.\n");
}
int main()
{
test_trie_non_ascii();
setup();
test_hattrie_insert();
test_hattrie_iteration();
teardown();
setup();
test_hattrie_insert();
test_hattrie_sorted_iteration();
teardown();
return 0;
}

View File

@ -0,0 +1,241 @@
/*
* This file is part of fastq-tools.
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
*/
#include "str_map.h"
#include "misc.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
static const size_t INITIAL_TABLE_SIZE = 16;
static const double MAX_LOAD = 0.77;
/*
* Paul Hsieh's SuperFastHash
* http://www.azillionmonkeys.com/qed/hash.html
*/
#undef get16bits
#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
|| defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
#define get16bits(d) (*((const uint16_t *) (d)))
#endif
#if !defined (get16bits)
#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+(uint32_t)(((const uint8_t *)(d))[0]) )
#endif
static uint32_t hash(const char * data, size_t len) {
uint32_t hash = len, tmp;
int rem;
if (len <= 0 || data == NULL) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
for (;len > 0; len--) {
hash += get16bits (data);
tmp = (get16bits (data+2) << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof (uint16_t);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += get16bits (data);
hash ^= hash << 16;
hash ^= data[sizeof (uint16_t)] << 18;
hash += hash >> 11;
break;
case 2: hash += get16bits (data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += *data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}
static void rehash(str_map* T, size_t new_n);
static void clear(str_map*);
str_map* str_map_create()
{
str_map* T = malloc_or_die(sizeof(str_map));
T->A = malloc_or_die(INITIAL_TABLE_SIZE * sizeof(str_map_pair*));
memset(T->A, 0, INITIAL_TABLE_SIZE * sizeof(str_map_pair*));
T->n = INITIAL_TABLE_SIZE;
T->m = 0;
T->max_m = T->n * MAX_LOAD;
return T;
}
void str_map_destroy(str_map* T)
{
if (T != NULL) {
clear(T);
free(T->A);
free(T);
}
}
void clear(str_map* T)
{
str_map_pair* u;
size_t i;
for (i = 0; i < T->n; i++) {
while (T->A[i]) {
u = T->A[i]->next;
free(T->A[i]->key);
free(T->A[i]);
T->A[i] = u;
}
}
T->m = 0;
}
static void insert_without_copy(str_map* T, str_map_pair* V)
{
uint32_t h = hash(V->key, V->keylen) % T->n;
V->next = T->A[h];
T->A[h] = V;
T->m++;
}
static void rehash(str_map* T, size_t new_n)
{
str_map U;
U.n = new_n;
U.m = 0;
U.max_m = U.n * MAX_LOAD;
U.A = malloc_or_die(U.n * sizeof(str_map_pair*));
memset(U.A, 0, U.n * sizeof(str_map_pair*));
str_map_pair *j, *k;
size_t i;
for (i = 0; i < T->n; i++) {
j = T->A[i];
while (j) {
k = j->next;
insert_without_copy(&U, j);
j = k;
}
T->A[i] = NULL;
}
free(T->A);
T->A = U.A;
T->n = U.n;
T->max_m = U.max_m;
}
void str_map_set(str_map* T, const char* key, size_t keylen, value_t value)
{
if (T->m >= T->max_m) rehash(T, T->n * 2);
uint32_t h = hash(key, keylen) % T->n;
str_map_pair* u = T->A[h];
while (u) {
if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
u->value = value;
return;
}
u = u->next;
}
u = malloc_or_die(sizeof(str_map_pair));
u->key = malloc_or_die(keylen);
memcpy(u->key, key, keylen);
u->keylen = keylen;
u->value = value;
u->next = T->A[h];
T->A[h] = u;
T->m++;
}
value_t str_map_get(const str_map* T, const char* key, size_t keylen)
{
uint32_t h = hash(key, keylen) % T->n;
str_map_pair* u = T->A[h];
while (u) {
if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
return u->value;
}
u = u->next;
}
return 0;
}
void str_map_del(str_map* T, const char* key, size_t keylen)
{
uint32_t h = hash(key, keylen) % T->n;
str_map_pair* u = T->A[h];
str_map_pair* p = NULL;
while (u) {
if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
if (p) {
p->next = u->next;
} else {
T->A[h] = u->next;
}
free(u->key);
free(u);
--T->m;
return;
}
p = u;
u = u->next;
}
}

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
* hash :
* A quick and simple hash table mapping strings to things.
*
*/
#ifndef ISOLATOR_STR_MAP_H
#define ISOLATOR_STR_MAP_H
#if defined(__cplusplus)
extern "C" {
#endif
#include <stdlib.h>
#include <stdint.h>
#include "common.h"
typedef struct str_map_pair_
{
char* key;
size_t keylen;
value_t value;
struct str_map_pair_* next;
} str_map_pair;
typedef struct
{
str_map_pair** A; /* table proper */
size_t n; /* table size */
size_t m; /* hashed items */
size_t max_m; /* max hashed items before rehash */
} str_map;
str_map* str_map_create(void);
void str_map_destroy(str_map*);
void str_map_set(str_map*, const char* key, size_t keylen, value_t value);
value_t str_map_get(const str_map*, const char* key, size_t keylen);
void str_map_del(str_map* T, const char* key, size_t keylen);
#if defined(__cplusplus)
}
#endif
#endif

View File

@ -0,0 +1,126 @@
package safebrowsing
/*
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "hat-trie/src/hat-trie.h"
#include "hat-trie/src/ahtable.c"
#include "hat-trie/src/hat-trie.c"
#include "hat-trie/src/misc.c"
#include "hat-trie/src/murmurhash3.c"
hattrie_t* start() {
hattrie_t* trie;
trie = hattrie_create();
return trie;
}
void set(hattrie_t* h, char* key, size_t len) {
value_t* val;
val = hattrie_get(h, key, len);
*val = 1;
}
int get(hattrie_t* h, char* key, size_t len) {
value_t* val;
val = hattrie_tryget(h, key, len);
if (val != 0) {
return *val;
}
return 0;
}
void delete(hattrie_t* h, char* key, size_t len) {
value_t* val;
val = hattrie_tryget(h, key, len);
if (val != 0) {
*val = 0;
}
}
char* hattrie_iter_key_string(hattrie_iter_t* i, size_t* len) {
const char* in_key;
char* out_key;
in_key = hattrie_iter_key(i, len);
out_key = malloc((*len) * sizeof(char));
memcpy(out_key, in_key, *len);
return out_key;
}
*/
import "C"
import (
"runtime"
"unsafe"
)
type HatTrie struct {
trie *C.hattrie_t
}
func finalizeHatTrie(c *HatTrie) {
C.hattrie_free(c.trie)
}
func NewTrie() *HatTrie {
trie := C.start()
out := &HatTrie{
trie: trie,
}
runtime.SetFinalizer(out, finalizeHatTrie)
return out
}
func (h *HatTrie) Delete(key string) {
ckey := C.CString(key)
defer C.free(unsafe.Pointer(ckey))
C.delete(h.trie, ckey, C.size_t(len(key)))
}
func (h *HatTrie) Set(key string) {
ckey := C.CString(key)
defer C.free(unsafe.Pointer(ckey))
C.set(h.trie, ckey, C.size_t(len(key)))
}
func (h *HatTrie) Get(key string) bool {
ckey := C.CString(key)
defer C.free(unsafe.Pointer(ckey))
val := C.get(h.trie, ckey, C.size_t(len(key)))
return val == 1
}
type HatTrieIterator struct {
iterator *C.hattrie_iter_t
}
func finalizeHatTrieIterator(i *HatTrieIterator) {
C.hattrie_iter_free(i.iterator)
}
func (h *HatTrie) Iterator() *HatTrieIterator {
out := C.hattrie_iter_begin(h.trie, true)
hi := &HatTrieIterator{
iterator: out,
}
runtime.SetFinalizer(hi, finalizeHatTrieIterator)
return hi
}
func (i *HatTrieIterator) Next() string {
if C.hattrie_iter_finished(i.iterator) {
return ""
}
keylen := C.size_t(0)
ckey := C.hattrie_iter_key_string(i.iterator, &keylen)
defer C.free(unsafe.Pointer(ckey))
key := C.GoStringN(ckey, C.int(keylen))
C.hattrie_iter_next(i.iterator)
return key
}

View File

@ -0,0 +1,117 @@
/*
Copyright (c) 2013, Richard Johnson
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package safebrowsing
import (
"bytes"
"fmt"
"strconv"
"strings"
)
func buildChunkRanges(chunkIndexes map[ChunkNum]bool) string {
if chunkIndexes == nil || len(chunkIndexes) == 0 {
return ""
}
// find the highest and lowest chunk numbers
lowest := int64(-1)
highest := int64(-1)
for chunkNumUint, _ := range chunkIndexes {
chunkNum := int64(chunkNumUint)
if lowest == -1 || lowest > chunkNum {
lowest = chunkNum
}
if highest == -1 || highest < chunkNum {
highest = chunkNum
}
}
if len(chunkIndexes) == 1 {
return fmt.Sprintf("%d", lowest)
}
output := &bytes.Buffer{}
start := lowest
end := lowest
inRange := true
for end = lowest; end <= highest; end++ {
if _, exists := chunkIndexes[ChunkNum(end)]; exists {
if inRange {
continue
}
start = end
inRange = true
continue
}
if inRange {
if start == end-1 {
fmt.Fprintf(output, "%d,", start)
} else {
fmt.Fprintf(output, "%d-%d,", start, end-1)
}
inRange = false
start = end
}
}
if start == end-1 {
fmt.Fprintf(output, "%d", start)
} else {
fmt.Fprintf(output, "%d-%d", start, end-1)
}
return output.String()
}
func parseChunkRange(rangeString string) (out map[ChunkNum]bool, err error) {
out = make(map[ChunkNum]bool)
rangeString = strings.TrimSpace(rangeString)
ranges := strings.Split(rangeString, ",")
for _, r := range ranges {
if len(r) == 0 {
return nil, fmt.Errorf("Invalid range")
}
numbers := strings.Split(r, "-")
if len(numbers) > 2 {
return nil, fmt.Errorf("Invalid range")
}
if len(numbers) == 1 {
i, err := strconv.Atoi(numbers[0])
if err != nil {
return nil, fmt.Errorf("Invalid range")
}
out[ChunkNum(i)] = true
continue
}
x, err := strconv.Atoi(numbers[0])
if err != nil {
return nil, fmt.Errorf("Invalid range")
}
y, err := strconv.Atoi(numbers[1])
if err != nil {
return nil, fmt.Errorf("Invalid range")
}
for ; x <= y; x++ {
out[ChunkNum(x)] = true
}
}
return out, nil
}

View File

@ -0,0 +1,53 @@
/*
Copyright (c) 2013, Richard Johnson
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package safebrowsing
import (
"bytes"
"fmt"
"net/http"
"runtime"
)
func request(url string, data string, isPost bool) (response *http.Response, err error) {
buf := bytes.NewBufferString(data)
client := &http.Client{Transport: Transport}
if isPost {
response, err = client.Post(url, "text/plain", buf)
} else {
response, err = client.Get(url)
}
if err != nil {
_, filename, line_no, _ := runtime.Caller(0)
_, filename1, line_no1, _ := runtime.Caller(1)
return response, fmt.Errorf(`Error getting %s:
Error: %s,
At: %s:%d
By: %s:%d
`, url, err, filename, line_no, filename1, line_no1)
}
return response, nil
}

View File

@ -0,0 +1,366 @@
/*
Copyright (c) 2013, Richard Johnson
Copyright (c) 2014, Kilian Gilonne
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package safebrowsing
import (
"bufio"
"bytes"
"fmt"
"io"
"math/rand"
"net/http"
"os"
// "runtime/debug"
"strconv"
"strings"
"time"
)
type HostHash string
type LookupHash string
type SafeBrowsing struct {
DataDir string
Key string
Client string
AppVersion string
ProtocolVersion string
UpdateDelay int
LastUpdated time.Time
Lists map[string]*SafeBrowsingList
Cache map[HostHash]*FullHashCache
request func(string, string, bool) (*http.Response, error)
Logger logger
}
var SupportedLists map[string]bool = map[string]bool{
"goog-malware-shavar": true,
"googpub-phish-shavar": true,
}
var Logger logger = new(DefaultLogger)
var Client string = "api"
var AppVersion string = "1.5.2"
var ProtocolVersion string = "3.0"
var OfflineMode bool = false
var Transport *http.Transport = &http.Transport{}
func NewSafeBrowsing(apiKey string, dataDirectory string) (sb *SafeBrowsing, err error) {
sb = &SafeBrowsing{
Key: apiKey,
Client: Client,
AppVersion: AppVersion,
ProtocolVersion: ProtocolVersion,
DataDir: dataDirectory,
Lists: make(map[string]*SafeBrowsingList),
Cache: make(map[HostHash]*FullHashCache),
request: request,
Logger: Logger,
}
// if the dataDirectory does not currently exist, have a go at creating it:
err = os.MkdirAll(dataDirectory, os.ModeDir|0700)
if err != nil {
sb.Logger.Error(
"Directory \"%s\" does not exist, and I was unable to create it!",
dataDirectory)
}
// if we are in offline mode we want to just load up the lists we
// currently have and work with that
if OfflineMode {
for listName, _ := range SupportedLists {
fileName := sb.DataDir + "/" + listName + ".dat"
tmpList := newSafeBrowsingList(listName, fileName)
tmpList.Logger = sb.Logger
err := tmpList.load(nil)
if err != nil {
sb.Logger.Warn("Error loading list %s: %s", listName, err)
continue
}
sb.Lists[listName] = tmpList
}
// debug.FreeOSMemory()
return sb, nil
}
// normal mode, contact the server for updates, etc.
err = sb.UpdateProcess()
return sb, err
}
func (sb *SafeBrowsing) UpdateProcess() (err error) {
sb.Logger.Info("Requesting list of lists from server...")
err = sb.requestSafeBrowsingLists()
if err != nil {
return err
}
err = sb.loadExistingData()
if err != nil {
return err
}
err, status := sb.update()
if (err != nil) && (status != 503) {
return err
} else if status == 503 {
sb.Logger.Warn("GSB service temporarily unavailable")
}
go sb.reloadLoop()
return nil
}
func (sb *SafeBrowsing) requestSafeBrowsingLists() (err error) {
// defer debug.FreeOSMemory()
url := fmt.Sprintf(
"https://safebrowsing.google.com/safebrowsing/list?"+
"client=%s&key=%s&appver=%s&pver=%s",
sb.Client, sb.Key, sb.AppVersion, sb.ProtocolVersion)
listresp, err := sb.request(url, "", true)
if err != nil {
return err
}
if listresp.StatusCode == 503 {
sb.requestSafeBrowsingLists()
} else if listresp.StatusCode != 200 {
return fmt.Errorf("Unexpected server response code: %d", listresp.StatusCode)
}
return sb.processSafeBrowsingLists(listresp.Body)
}
func (sb *SafeBrowsing) processSafeBrowsingLists(body io.Reader) (err error) {
buf := bytes.Buffer{}
if _, err = buf.ReadFrom(body); err != nil {
return fmt.Errorf("Unable to read list data: %s", err)
}
for _, listName := range strings.Split(strings.TrimSpace(buf.String()), "\n") {
if _, exists := SupportedLists[listName]; !exists {
continue
}
fileName := sb.DataDir + "/" + listName + ".dat"
tmpList := newSafeBrowsingList(listName, fileName)
tmpList.Logger = sb.Logger
sb.Lists[listName] = tmpList
}
return nil
}
func (sb *SafeBrowsing) loadExistingData() error {
sb.Logger.Info("Loading existing data....")
for _, sbl := range sb.Lists {
err := sbl.load(nil)
if err != nil {
return fmt.Errorf("Error loading list from %s: %s", sb.DataDir, err)
}
// debug.FreeOSMemory()
}
return nil
}
func (sb *SafeBrowsing) update() (err error, status int) {
sb.Logger.Info("Requesting updates...")
if err, status = sb.requestRedirectList(); err != nil {
return fmt.Errorf("Unable to retrieve updates: %s", err.Error()), status
}
for listName, list := range sb.Lists {
if err = list.loadDataFromRedirectLists(); err != nil {
return fmt.Errorf("Unable to process updates for %s: %s", listName, err.Error()), status
}
}
// update the last updated time
sb.LastUpdated = time.Now()
return nil, status
}
func (sb *SafeBrowsing) requestRedirectList() (err error, status int) {
// defer debug.FreeOSMemory()
url := fmt.Sprintf(
"https://safebrowsing.google.com/safebrowsing/downloads?"+
"client=%s&key=%s&appver=%s&pver=%s",
sb.Client, sb.Key, sb.AppVersion, sb.ProtocolVersion)
listsStr := ""
for list, sbl := range sb.Lists {
listsStr += string(list) + ";"
addChunkRange := sbl.ChunkRanges[CHUNK_TYPE_ADD]
if addChunkRange != "" {
listsStr += "a:" + addChunkRange + ":"
}
subChunkRange := sbl.ChunkRanges[CHUNK_TYPE_SUB]
if subChunkRange != "" {
listsStr += "s:" + subChunkRange
}
listsStr += "\n"
}
redirects, err := sb.request(url, listsStr, true)
if err != nil {
return err, 0
}
defer redirects.Body.Close()
if redirects.StatusCode != 200 {
tmp := &bytes.Buffer{}
tmp.ReadFrom(redirects.Body)
return fmt.Errorf("Unexpected server response code: %d\n%s", redirects.StatusCode, tmp), redirects.StatusCode
}
if err = sb.processRedirectList(redirects.Body); err != nil {
return err, redirects.StatusCode
}
return nil, redirects.StatusCode
}
func (sb *SafeBrowsing) processRedirectList(buf io.Reader) error {
// defer debug.FreeOSMemory()
scanner := bufio.NewScanner(buf)
//initialize temporary var
var currentListName string
var RedirectList []string = nil
currentDeletes := make(map[ChunkData_ChunkType]map[ChunkNum]bool)
currentDeletes[CHUNK_TYPE_ADD] = make(map[ChunkNum]bool)
currentDeletes[CHUNK_TYPE_SUB] = make(map[ChunkNum]bool)
for scanner.Scan() {
line := scanner.Text()
bits := strings.SplitN(line, ":", 2)
switch bits[0] {
case "n":
updateDelayStr := bits[1]
updateDelay, err := strconv.Atoi(updateDelayStr)
if err != nil {
return fmt.Errorf("Unable to parse timeout: %s", err)
}
sb.UpdateDelay = updateDelay
case "r":
// we need to reset full!
sb.reset()
// the docs say to request again, so we do that...
err, _ := sb.requestRedirectList()
return err
case "i":
if RedirectList != nil {
// save to DataRedirects
sb.Lists[currentListName].DataRedirects = RedirectList
sb.Lists[currentListName].DeleteChunks = currentDeletes
}
// reinitialize temporary var
RedirectList = make([]string, 0)
currentDeletes = make(map[ChunkData_ChunkType]map[ChunkNum]bool)
currentDeletes[CHUNK_TYPE_ADD] = make(map[ChunkNum]bool, 0)
currentDeletes[CHUNK_TYPE_SUB] = make(map[ChunkNum]bool, 0)
currentListName = bits[1]
case "u":
RedirectList = append(RedirectList, "https://"+bits[1])
case "ad":
addDeletes, err := parseChunkRange(bits[1])
if err != nil {
return fmt.Errorf("Error parsing delete add chunks range: %s", err)
}
currentDeletes[CHUNK_TYPE_ADD] = addDeletes
case "sd":
subDeletes, err := parseChunkRange(bits[1])
if err != nil {
return fmt.Errorf("Error parsing delete sub chunks range: %s", err)
}
currentDeletes[CHUNK_TYPE_SUB] = subDeletes
default:
continue
}
// debug.FreeOSMemory()
}
// add the final list
sb.Lists[currentListName].DataRedirects = RedirectList
sb.Lists[currentListName].DeleteChunks = currentDeletes
if err := scanner.Err(); err != nil && err != io.EOF {
return fmt.Errorf("Unable to parse list response: %s", err)
}
return nil
}
func (sb *SafeBrowsing) reset() {
for _, sbl := range sb.Lists {
sbl.Lookup = NewTrie()
sbl.FullHashes = NewTrie()
sbl.FullHashRequested = NewTrie()
sbl.DataRedirects = make([]string, 0)
sbl.DeleteChunks = make(map[ChunkData_ChunkType]map[ChunkNum]bool)
sbl.DeleteChunks[CHUNK_TYPE_ADD] = make(map[ChunkNum]bool, 0)
sbl.DeleteChunks[CHUNK_TYPE_SUB] = make(map[ChunkNum]bool, 0)
// kill off the chunks
sbl.ChunkRanges = map[ChunkData_ChunkType]string{
CHUNK_TYPE_ADD: "",
CHUNK_TYPE_SUB: "",
}
// delete any files we have loaded for this map
if sbl.FileName != "" {
os.Remove(sbl.FileName)
}
// debug.FreeOSMemory()
}
}
func (sb *SafeBrowsing) reloadLoop() {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
randomFloat := r.Float64()
for {
// wait the update delay
duration := time.Duration(sb.UpdateDelay) * time.Second
sb.Logger.Info("Next update in %d seconds", sb.UpdateDelay)
time.Sleep(duration)
err, status := sb.update()
for x := 0; status == 503; x++ {
// first we wait 1 min, than some time between 30-60 mins
// doubling until we stop at 480 mins or succeed
mins := (30 * (randomFloat + 1) * float64(x)) + 1
if mins > 480 {
mins = 480
}
sb.Logger.Warn(
"Update failed, in back-off mode (waiting %d mins): %s",
mins,
err,
)
time.Sleep(time.Duration(mins) * time.Minute)
err, status = sb.update()
}
// debug.FreeOSMemory()
}
}

View File

@ -0,0 +1,315 @@
/*
Copyright (c) 2013, Richard Johnson
Copyright (c) 2014, Kilian Gilonne
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package safebrowsing
import (
"encoding/gob"
"fmt"
"io"
"io/ioutil"
"os"
// "runtime/debug"
"sync"
)
//import "encoding/hex"
type SafeBrowsingList struct {
Name string
FileName string
DataRedirects []string
DeleteChunks map[ChunkData_ChunkType]map[ChunkNum]bool
ChunkRanges map[ChunkData_ChunkType]string
// lookup map only contain prefix hash
Lookup *HatTrie
FullHashRequested *HatTrie
FullHashes *HatTrie
Logger logger
updateLock *sync.RWMutex
}
func newSafeBrowsingList(name string, filename string) (sbl *SafeBrowsingList) {
sbl = &SafeBrowsingList{
Name: name,
FileName: filename,
DataRedirects: make([]string, 0),
Lookup: NewTrie(),
FullHashRequested: NewTrie(),
FullHashes: NewTrie(),
DeleteChunks: make(map[ChunkData_ChunkType]map[ChunkNum]bool),
Logger: &DefaultLogger{},
updateLock: new(sync.RWMutex),
}
sbl.DeleteChunks[CHUNK_TYPE_ADD] = make(map[ChunkNum]bool)
sbl.DeleteChunks[CHUNK_TYPE_SUB] = make(map[ChunkNum]bool)
return sbl
}
func (sbl *SafeBrowsingList) loadDataFromRedirectLists() error {
// defer debug.FreeOSMemory()
if len(sbl.DataRedirects) < 1 {
sbl.Logger.Info("No pending updates available")
return nil
}
newChunks := make([]*ChunkData, 0)
for _, url := range sbl.DataRedirects {
response, err := request(url, "", false)
if err != nil {
return err
}
defer response.Body.Close()
if response.StatusCode != 200 {
return fmt.Errorf("Unexpected server response code: %d",
response.StatusCode)
}
data, err := ioutil.ReadAll(response.Body)
length := uint32(len(data))
len := length
for len != 0 {
chunk, new_len, err := ReadChunk(data[(length-len):], len)
if err != nil {
return err
}
len = new_len
newChunks = append(newChunks, chunk)
}
}
if newChunks[0] == nil {
return fmt.Errorf("No chunk : empty redirect file")
}
return sbl.load(newChunks)
}
func (sbl *SafeBrowsingList) load(newChunks []*ChunkData) (err error) {
// defer debug.FreeOSMemory()
sbl.Logger.Info("Reloading %s", sbl.Name)
sbl.updateLock.Lock()
defer sbl.updateLock.Unlock()
// get the input stream
f, err := os.Open(sbl.FileName)
if err != nil && !os.IsNotExist(err) {
sbl.Logger.Warn("Error opening data file for reading, assuming empty: %s", err)
}
close_file := func(f *os.File) {
if f != nil {
f.Close()
}
}
defer close_file(f)
var dec *gob.Decoder = nil
if f != nil {
dec = gob.NewDecoder(f)
}
// open the file again for output
fOut, err := os.Create(sbl.FileName + ".tmp")
if err != nil {
return fmt.Errorf("Error opening file: %s", err)
}
close_tmp_file := func(fout *os.File, fileName string) {
if fout != nil {
fOut.Close()
os.Remove(fileName + ".tmp")
}
}
defer close_tmp_file(fOut, sbl.FileName)
enc := gob.NewEncoder(fOut)
// the chunks we loaded for the next request to the server
addChunkIndexes := make(map[ChunkNum]bool)
subChunkIndexes := make(map[ChunkNum]bool)
// reset the lookup map
newEntryCount := 0
subEntryCount := 0
deletedChunkCount := 0
addedChunkCount := len(newChunks)
// load existing chunk
if dec != nil {
for {
chunk := &ChunkData{}
err = dec.Decode(&chunk)
if err != nil {
break
}
cast := ChunkNum(chunk.GetChunkNumber())
if _, exists := sbl.DeleteChunks[chunk.GetChunkType()][cast]; exists {
// skip this chunk, we've been instructed to delete it
deletedChunkCount++
continue
} else if chunk.GetChunkType() == CHUNK_TYPE_ADD && chunk.GetPrefixType() == PREFIX_4B {
addChunkIndexes[cast] = true
newEntryCount += len(chunk.Hashes) / PREFIX_4B_SZ
} else if chunk.GetChunkType() == CHUNK_TYPE_ADD && chunk.GetPrefixType() == PREFIX_32B {
addChunkIndexes[cast] = true
newEntryCount += len(chunk.Hashes) / PREFIX_32B_SZ
} else if chunk.GetChunkType() == CHUNK_TYPE_SUB && chunk.GetPrefixType() == PREFIX_4B {
subChunkIndexes[cast] = true
subEntryCount += len(chunk.Hashes) / PREFIX_4B_SZ
} else if chunk.GetChunkType() == CHUNK_TYPE_SUB && chunk.GetPrefixType() == PREFIX_32B {
subChunkIndexes[cast] = true
subEntryCount += len(chunk.Hashes) / PREFIX_32B_SZ
} else {
sbl.Logger.Warn("Chunk not decoded properly")
}
if enc != nil {
err = enc.Encode(chunk)
if err != nil {
return err
}
}
sbl.updateLookupMap(chunk)
}
if err != io.EOF {
return err
}
}
// add on any new chunks
if newChunks != nil {
for _, chunk := range newChunks {
cast := ChunkNum(chunk.GetChunkNumber())
if _, exists := sbl.DeleteChunks[chunk.GetChunkType()][cast]; exists {
// skip this chunk, we've been instructed to delete it
addedChunkCount--
continue
} else if chunk.GetChunkType() == CHUNK_TYPE_ADD && chunk.GetPrefixType() == PREFIX_4B {
addChunkIndexes[cast] = true
newEntryCount += len(chunk.Hashes) / PREFIX_4B_SZ
} else if chunk.GetChunkType() == CHUNK_TYPE_ADD && chunk.GetPrefixType() == PREFIX_32B {
addChunkIndexes[cast] = true
newEntryCount += len(chunk.Hashes) / PREFIX_32B_SZ
} else if chunk.GetChunkType() == CHUNK_TYPE_SUB && chunk.GetPrefixType() == PREFIX_4B {
subChunkIndexes[cast] = true
subEntryCount += len(chunk.Hashes) / PREFIX_4B_SZ
} else if chunk.GetChunkType() == CHUNK_TYPE_SUB && chunk.GetPrefixType() == PREFIX_32B {
subChunkIndexes[cast] = true
subEntryCount += len(chunk.Hashes) / PREFIX_32B_SZ
} else {
sbl.Logger.Warn("Unknow chunk type")
addedChunkCount--
continue
}
if enc != nil {
err = enc.Encode(chunk)
if err != nil {
return err
}
}
sbl.updateLookupMap(chunk)
}
}
// now close off our files, discard the old and keep the new
if f != nil {
err = os.Remove(sbl.FileName)
if err != nil {
return err
}
}
err = os.Rename(sbl.FileName+".tmp", sbl.FileName)
if err != nil {
return err
}
sbl.ChunkRanges = map[ChunkData_ChunkType]string{
CHUNK_TYPE_ADD: buildChunkRanges(addChunkIndexes),
CHUNK_TYPE_SUB: buildChunkRanges(subChunkIndexes),
}
sbl.DeleteChunks = make(map[ChunkData_ChunkType]map[ChunkNum]bool)
sbl.Logger.Info("Loaded %d existing add chunks and %d sub chunks "+
"(~ %d hashes added, ~ %d hashes removed), deleted %d chunks, added %d new chunks.",
len(addChunkIndexes),
len(subChunkIndexes),
newEntryCount,
subEntryCount,
deletedChunkCount,
addedChunkCount,
)
return nil
}
func (sbl *SafeBrowsingList) updateLookupMap(chunk *ChunkData) {
hashlen := 0
hasheslen := len(chunk.Hashes)
if chunk.GetPrefixType() == PREFIX_4B {
hashlen = PREFIX_4B_SZ
}
if chunk.GetPrefixType() == PREFIX_32B {
hashlen = PREFIX_32B_SZ
}
for i := 0; (i + hashlen) <= hasheslen; i += hashlen {
hash := chunk.Hashes[i:(i + hashlen)]
switch hashlen {
case PREFIX_4B_SZ:
// we are a hash-prefix
prefix := string(hash)
switch chunk.GetChunkType() {
case CHUNK_TYPE_ADD:
sbl.Lookup.Set(prefix)
case CHUNK_TYPE_SUB:
sbl.Lookup.Delete(prefix)
i := sbl.FullHashes.Iterator()
for key := i.Next(); key != ""; key = i.Next() {
keyPrefix := key[0:len(prefix)]
if keyPrefix == prefix {
sbl.FullHashes.Delete(key)
}
}
}
case PREFIX_32B_SZ:
// we are a full-length hash
lookupHash := string(hash)
switch chunk.GetChunkType() {
case CHUNK_TYPE_ADD:
// sbl.Logger.Debug("Adding full length hash: %s",
// hex.EncodeToString([]byte(lookupHash)))
sbl.FullHashes.Set(lookupHash)
case CHUNK_TYPE_SUB:
// sbl.Logger.Debug("sub full length hash: %s",
// hex.EncodeToString([]byte(lookupHash)))
sbl.FullHashes.Delete(lookupHash)
}
}
}
}

View File

@ -0,0 +1,4 @@
config.toml
data
data.old
webserver

View File

@ -0,0 +1,6 @@
# example config file for safe browsing server
address = "0.0.0.0:8080"
googleApiKey = ""
dataDir = "/tmp/safe-browsing-data"
# enable example usage page at /form
enableFormPage = true

View File

@ -0,0 +1,225 @@
/*
Copyright (c) 2014, Richard Johnson
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package main
import (
"encoding/json"
"flag"
"fmt"
toml "github.com/BurntSushi/toml"
safebrowsing "github.com/rjohnsondev/go-safe-browsing-api"
"net/http"
"os"
)
type Config struct {
Address string
GoogleApiKey string
DataDir string
EnableFormPage bool
}
var sb *safebrowsing.SafeBrowsing
func main() {
flag.Parse()
if len(flag.Args()) < 1 {
fmt.Printf("Usage: webserver config-file.toml")
os.Exit(1)
}
var conf Config
if _, err := toml.DecodeFile(flag.Arg(0), &conf); err != nil {
fmt.Printf(
"Error reading config file %s: %s",
flag.Arg(0),
err,
)
os.Exit(1)
}
var err error
sb, err = safebrowsing.NewSafeBrowsing(
conf.GoogleApiKey,
conf.DataDir,
)
if err != nil {
panic(err)
}
if conf.EnableFormPage {
http.HandleFunc("/form", handleHtml)
}
http.HandleFunc("/", handler)
http.ListenAndServe(conf.Address, nil)
}
type UrlResponse struct {
IsListed bool `json:"isListed"`
List string `json:"list,omitempty"`
Error string `json:"error,omitempty"`
WarningTitle string `json:"warningTitle,omitempty"`
WarningText string `json:"warningText,omitempty"`
FullHashRequested bool `json:"fullHashRequested,omitempty"`
}
var warnings map[string]map[string]string = map[string]map[string]string{
"goog-malware-shavar": map[string]string{
"title": "Warning - Visiting this web site may harm your computer.",
"text": "This page may be a forgery or imitation of another website, " +
"designed to trick users into sharing personal or financial " +
"information. Entering any personal information on this page " +
"may result in identity theft or other abuse. You can find " +
"out more about phishing from http://www.antiphishing.org/",
},
"googpub-phish-shavar": map[string]string{
"title": "Warning - Suspected phishing page.",
"text": "This page appears to contain malicious code that could be " +
"downloaded to your computer without your consent. You can " +
"learn more about harmful web content including viruses and " +
"other malicious code and how to protect your computer at " +
"http://StopBadware.org/",
},
}
func handleHtml(w http.ResponseWriter, r *http.Request) {
html := `<!DOCTYPE html>
<html>
<body>
<div style="margin: auto; width: 800px; font-family: sans-serif;">
<h2>Example JSON usage:</h2>
Request Object:<br />
<textarea id="txtJson" rows="6" style="width: 100%;">[
"http://www.google.com/",
"http://www.ianfette.org/",
"http://www.evil.com/"
]
</textarea><br />
<br />
<label><input type="checkbox" id="blocking" /> Have server block to confirm suspect URLs*</label><br />
<small>
* As the server contains only partial hash matches for URLs, the first time a URL
matches a bad hash the server needs to consult Google's Safe Browsing service
to fetch the full hash before it is able to confirm it is indeed a bad URL.<br />
<br />
By default, the server returns immediately and spawns a goroutine to fetch the
full hash in the background, meaning the first query on a bad URL will return:
<code>{ isListed: false, fullHashRequested: true }</code>. If however you wish
to wait for this request for full hashes to happen and not miss the first query
about a bad URL, check this box to pass through the blocking=1 parameter.
</small><br />
<br />
<input type="button" value="Submit" onclick="fireRequest();" /><br />
<br />
Output:<br />
<pre id="output" style="border: 1px solid #CCC; padding: 5px; overflow: auto;"></pre><br/>
<br />
JS code:<br />
<pre style="padding: 5px; border: 1px solid #CCC;">
var obj = {"urls": $("#txtJson").val(), "block": $("#blocking").prop("checked")};
$.post("/", obj, function(data, textStatus, jqXHR) {
$("#output").text(data);
});
</pre>
<script>
fireRequest = function() {
var obj = {"urls": $("#txtJson").val(), "block": $("#blocking").prop("checked")};
$.post("/", obj, function(data, textStatus, jqXHR) {
$("#output").text(data);
});
}
</script>
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
</div>
</body>
</html>
`
fmt.Fprint(w, html)
}
func queryUrl(url string, isBlocking bool) (response *UrlResponse) {
response = new(UrlResponse)
list := ""
var err error
fullHashMatch := false
if isBlocking {
list, err = sb.IsListed(url)
fullHashMatch = true
} else {
list, fullHashMatch, err = sb.MightBeListed(url)
}
if err != nil {
fmt.Sprintf(response.Error, "Error looking up url: %s", err.Error())
}
if list != "" {
if fullHashMatch && sb.IsUpToDate() {
response.IsListed = true
response.List = list
response.WarningTitle = warnings[list]["title"]
response.WarningText = warnings[list]["text"]
} else {
response.IsListed = false
response.List = list
response.FullHashRequested = true
// Requesting full hash in background...
go sb.IsListed(url)
}
}
return response
}
func handler(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
fmt.Fprintf(w, "Error loading form: %s", err.Error())
return
}
isBlocking := (r.FormValue("block") != "" &&
r.FormValue("block") != "false" &&
r.FormValue("block") != "0")
urls := make([]string, 0)
err = json.Unmarshal([]byte(r.FormValue("urls")), &urls)
if err != nil {
fmt.Fprintf(w, "Error reading json: %s", err.Error())
return
}
output := make(map[string]*UrlResponse, 0)
for _, url := range urls {
output[url] = queryUrl(url, isBlocking)
}
txtOutput, err := json.MarshalIndent(output, "", " ")
if err != nil {
fmt.Fprintf(w, "Error marshalling response: %s", err.Error())
return
}
fmt.Fprint(w, string(txtOutput))
}

View File

@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

27
Godeps/_workspace/src/golang.org/x/net/LICENSE generated vendored Normal file
View File

@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

22
Godeps/_workspace/src/golang.org/x/net/PATENTS generated vendored Normal file
View File

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

View File

@ -43,38 +43,11 @@ func (list) String() string {
// domains like foo.appspot.com can be found at
// https://wiki.mozilla.org/Public_Suffix_List/Use_Cases
func PublicSuffix(domain string) (publicSuffix string, icann bool) {
publicSuffix, icann = getSuffix(domain, false)
if publicSuffix == "" {
// If no rules match, the prevailing rule is "*", so return the rightmost
// label in the domain.
publicSuffix = domain[1+strings.LastIndex(domain, "."):]
}
return
}
// ICANNTLD returns the public suffix of the domain using only the ICANN
// section of the library's compiled-in database.
// If the domain does not end in an ICANN-managed domain, ICANNTLD returns an
// error.
func ICANNTLD(domain string) (string, error) {
tld, _ := getSuffix(domain, true)
if tld == "" {
return "", fmt.Errorf("publicsuffix: %s has no ICANN TLD.", domain)
}
return tld, nil
}
// getSuffix is a helper function underlying both PublicSuffix and ICANNTLD. It
// applies the public suffix algorithm to domain. If icannOnly is true, it
// considers only the ICANN section of the public suffix list.
// If no rules match, getSuffix returns the empty string.
func getSuffix(domain string, icannOnly bool) (publicSuffix string, icann bool) {
lo, hi := uint32(0), uint32(numTLD)
s, suffix, wildcard := domain, len(domain), false
var dot int
loop:
for ;; s = s[:dot] {
dot = strings.LastIndex(s, ".")
for {
dot := strings.LastIndex(s, ".")
if wildcard {
suffix = 1 + dot
}
@ -88,11 +61,6 @@ loop:
u := nodes[f] >> (nodesBitsTextOffset + nodesBitsTextLength)
icann = u&(1<<nodesBitsICANN-1) != 0
// If we're only interested in ICANN suffixes, ignore any matches that are
// not ICANN.
if icannOnly && !icann {
continue
}
u >>= nodesBitsICANN
u = children[u&(1<<nodesBitsChildren-1)]
lo = u & (1<<childrenBitsLo - 1)
@ -109,12 +77,14 @@ loop:
u >>= childrenBitsNodeType
wildcard = u&(1<<childrenBitsWildcard-1) != 0
if dot == -1 {
if dot == -1 {
break
}
s = s[:dot]
}
if icannOnly && suffix < len(domain) {
icann = true
if suffix == len(domain) {
// If no rules match, the prevailing rule is "*".
return domain[1+strings.LastIndex(domain, "."):], icann
}
return domain[suffix:], icann
}