Update test-infra (#21)

* Bring in the latest features and fixes.
* Use the default build/unit test runners.
* Remove test-infra import hack from `Gopack.lock` and `update-deps.sh`
This commit is contained in:
Adriano Cunha 2019-01-17 17:08:31 -08:00 committed by Knative Prow Robot
parent 06e9787157
commit 97110f4e0c
37 changed files with 6514 additions and 352 deletions

37
Gopkg.lock generated
View File

@ -79,6 +79,21 @@
pruneopts = "NUT" pruneopts = "NUT"
revision = "24818f796faf91cd76ec7bddd72458fbced7a6c1" revision = "24818f796faf91cd76ec7bddd72458fbced7a6c1"
[[projects]]
branch = "master"
digest = "1:b6b3bd1c08338cb397623d1b9dedde711eccc2d3408fe9017a495d815065d869"
name = "github.com/google/licenseclassifier"
packages = [
".",
"internal/sets",
"stringclassifier",
"stringclassifier/internal/pq",
"stringclassifier/searchset",
"stringclassifier/searchset/tokenizer",
]
pruneopts = "NUT"
revision = "c2a262e3078ad90718f59866f1ec18601b2fee1b"
[[projects]] [[projects]]
digest = "1:06a7dadb7b760767341ffb6c8d377238d68a1226f2b21b5d497d2e3f6ecf6b4e" digest = "1:06a7dadb7b760767341ffb6c8d377238d68a1226f2b21b5d497d2e3f6ecf6b4e"
name = "github.com/googleapis/gnostic" name = "github.com/googleapis/gnostic"
@ -133,11 +148,14 @@
[[projects]] [[projects]]
branch = "master" branch = "master"
digest = "1:09521a823a008f7df66962ac2637c22e1cdc842ebfcc7a083c444d35258986f7" digest = "1:da39b58557275d30a9340c2e1e13e16691461f9859d3230f59cceed411c04b49"
name = "github.com/knative/test-infra" name = "github.com/knative/test-infra"
packages = ["."] packages = [
pruneopts = "T" "scripts",
revision = "7ed32409fa2c447a44a4281f0022ab25ce955f51" "tools/dep-collector",
]
pruneopts = "UT"
revision = "89e4aae358be056ee70b595c20106a4a5c70fdc1"
[[projects]] [[projects]]
digest = "1:2f42fa12d6911c7b7659738758631bec870b7e9b4c6be5444f963cdcfccc191f" digest = "1:2f42fa12d6911c7b7659738758631bec870b7e9b4c6be5444f963cdcfccc191f"
@ -171,6 +189,14 @@
revision = "5f041e8faa004a95c88a202771f4cc3e991971e6" revision = "5f041e8faa004a95c88a202771f4cc3e991971e6"
version = "v2.0.1" version = "v2.0.1"
[[projects]]
digest = "1:d917313f309bda80d27274d53985bc65651f81a5b66b820749ac7f8ef061fd04"
name = "github.com/sergi/go-diff"
packages = ["diffmatchpatch"]
pruneopts = "NUT"
revision = "1744e2970ca51c86172c8190fadad617561ed6e7"
version = "v1.0.0"
[[projects]] [[projects]]
digest = "1:9d8420bbf131d1618bde6530af37c3799340d3762cc47210c1d9532a4c3a2779" digest = "1:9d8420bbf131d1618bde6530af37c3799340d3762cc47210c1d9532a4c3a2779"
name = "github.com/spf13/pflag" name = "github.com/spf13/pflag"
@ -452,7 +478,8 @@
"github.com/google/go-cmp/cmp", "github.com/google/go-cmp/cmp",
"github.com/knative/pkg/apis", "github.com/knative/pkg/apis",
"github.com/knative/pkg/kmeta", "github.com/knative/pkg/kmeta",
"github.com/knative/test-infra", "github.com/knative/test-infra/scripts",
"github.com/knative/test-infra/tools/dep-collector",
"k8s.io/api/core/v1", "k8s.io/api/core/v1",
"k8s.io/apimachinery/pkg/api/equality", "k8s.io/apimachinery/pkg/api/equality",
"k8s.io/apimachinery/pkg/api/errors", "k8s.io/apimachinery/pkg/api/errors",

View File

@ -8,7 +8,8 @@ required = [
"k8s.io/code-generator/cmd/client-gen", "k8s.io/code-generator/cmd/client-gen",
"k8s.io/code-generator/cmd/lister-gen", "k8s.io/code-generator/cmd/lister-gen",
"k8s.io/code-generator/cmd/informer-gen", "k8s.io/code-generator/cmd/informer-gen",
"github.com/knative/test-infra", "github.com/knative/test-infra/scripts",
"github.com/knative/test-infra/tools/dep-collector",
] ]
[[override]] [[override]]
@ -45,5 +46,4 @@ required = [
[[prune.project]] [[prune.project]]
name = "github.com/knative/test-infra" name = "github.com/knative/test-infra"
unused-packages = false
non-go = false non-go = false

View File

@ -27,7 +27,3 @@ dep ensure
rm -rf $(find vendor/ -name 'OWNERS') rm -rf $(find vendor/ -name 'OWNERS')
rm -rf $(find vendor/ -name '*_test.go') rm -rf $(find vendor/ -name '*_test.go')
# Keep the only dir in knative/test-infra we're interested in
find vendor/github.com/knative/test-infra -mindepth 1 -maxdepth 1 ! -name scripts -exec rm -fr {} \;

View File

@ -23,24 +23,8 @@
source $(dirname $0)/../vendor/github.com/knative/test-infra/scripts/presubmit-tests.sh source $(dirname $0)/../vendor/github.com/knative/test-infra/scripts/presubmit-tests.sh
function build_tests() { # TODO(mattmoor): integration tests
header "Running build tests"
local result=0
go build -v ./pkg/... || result=1
subheader "Checking autogenerated code is up-to-date" # We use the default build, unit and integration test runners.
./hack/verify-codegen.sh || result=1
return ${result}
}
function unit_tests() {
header "Running unit tests"
report_go_test ./...
}
function integration_tests() {
header "TODO(mattmoor): integration tests"
}
main $@ main $@

202
vendor/github.com/google/licenseclassifier/LICENSE generated vendored Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,429 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package licenseclassifier provides methods to identify the open source
// license that most closely matches an unknown license.
package licenseclassifier
import (
"archive/tar"
"bytes"
"compress/gzip"
"fmt"
"html"
"io"
"math"
"regexp"
"sort"
"strings"
"sync"
"unicode"
"github.com/google/licenseclassifier/stringclassifier"
"github.com/google/licenseclassifier/stringclassifier/searchset"
)
// DefaultConfidenceThreshold is the minimum confidence percentage we're willing to accept in order
// to say that a match is good.
const DefaultConfidenceThreshold = 0.80
var (
// Normalizers is a list of functions that get applied to the strings
// before they are registered with the string classifier.
Normalizers = []stringclassifier.NormalizeFunc{
html.UnescapeString,
removeShebangLine,
RemoveNonWords,
NormalizeEquivalentWords,
NormalizePunctuation,
strings.ToLower,
removeIgnorableTexts,
stringclassifier.FlattenWhitespace,
strings.TrimSpace,
}
// commonLicenseWords are words that are common to all known licenses.
// If an unknown text doesn't have at least one of these, then we can
// ignore it.
commonLicenseWords = []*regexp.Regexp{
regexp.MustCompile(`(?i)\bcode\b`),
regexp.MustCompile(`(?i)\blicense\b`),
regexp.MustCompile(`(?i)\boriginal\b`),
regexp.MustCompile(`(?i)\brights\b`),
regexp.MustCompile(`(?i)\bsoftware\b`),
regexp.MustCompile(`(?i)\bterms\b`),
regexp.MustCompile(`(?i)\bversion\b`),
regexp.MustCompile(`(?i)\bwork\b`),
}
)
// License is a classifier pre-loaded with known open source licenses.
type License struct {
c *stringclassifier.Classifier
// Threshold is the lowest confidence percentage acceptable for the
// classifier.
Threshold float64
}
// New creates a license classifier and pre-loads it with known open source licenses.
func New(threshold float64) (*License, error) {
classifier := &License{
c: stringclassifier.New(threshold, Normalizers...),
Threshold: threshold,
}
if err := classifier.registerLicenses(LicenseArchive); err != nil {
return nil, fmt.Errorf("cannot register licenses: %v", err)
}
return classifier, nil
}
// NewWithForbiddenLicenses creates a license classifier and pre-loads it with
// known open source licenses which are forbidden.
func NewWithForbiddenLicenses(threshold float64) (*License, error) {
classifier := &License{
c: stringclassifier.New(threshold, Normalizers...),
Threshold: threshold,
}
if err := classifier.registerLicenses(ForbiddenLicenseArchive); err != nil {
return nil, fmt.Errorf("cannot register licenses: %v", err)
}
return classifier, nil
}
// WithinConfidenceThreshold returns true if the confidence value is above or
// equal to the confidence threshold.
func (c *License) WithinConfidenceThreshold(conf float64) bool {
return conf > c.Threshold || math.Abs(conf-c.Threshold) < math.SmallestNonzeroFloat64
}
// NearestMatch returns the "nearest" match to the given set of known licenses.
// Returned are the name of the license, and a confidence percentage indicating
// how confident the classifier is in the result.
func (c *License) NearestMatch(contents string) *stringclassifier.Match {
if !c.hasCommonLicenseWords(contents) {
return nil
}
m := c.c.NearestMatch(contents)
m.Name = strings.TrimSuffix(m.Name, ".header")
return m
}
// MultipleMatch matches all licenses within an unknown text.
func (c *License) MultipleMatch(contents string, includeHeaders bool) stringclassifier.Matches {
norm := normalizeText(contents)
if !c.hasCommonLicenseWords(norm) {
return nil
}
m := make(map[stringclassifier.Match]bool)
var matches stringclassifier.Matches
for _, v := range c.c.MultipleMatch(norm) {
if !c.WithinConfidenceThreshold(v.Confidence) {
continue
}
if !includeHeaders && strings.HasSuffix(v.Name, ".header") {
continue
}
v.Name = strings.TrimSuffix(v.Name, ".header")
if re, ok := forbiddenRegexps[v.Name]; ok && !re.MatchString(norm) {
continue
}
if _, ok := m[*v]; !ok {
m[*v] = true
matches = append(matches, v)
}
}
sort.Sort(matches)
return matches
}
func normalizeText(s string) string {
for _, n := range Normalizers {
s = n(s)
}
return s
}
// hasCommonLicenseWords returns true if the unknown text has at least one word
// that's common to all licenses.
func (c *License) hasCommonLicenseWords(s string) bool {
for _, re := range commonLicenseWords {
if re.MatchString(s) {
return true
}
}
return false
}
type archivedValue struct {
name string
normalized string
set *searchset.SearchSet
}
// registerLicenses loads all known licenses and adds them to c as known values
// for comparison. The allocated space after ingesting the 'licenses.db'
// archive is ~167M.
func (c *License) registerLicenses(archive string) error {
contents, err := ReadLicenseFile(archive)
if err != nil {
return err
}
reader := bytes.NewReader(contents)
gr, err := gzip.NewReader(reader)
if err != nil {
return err
}
defer gr.Close()
tr := tar.NewReader(gr)
var muVals sync.Mutex
var vals []archivedValue
for i := 0; ; i++ {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
name := strings.TrimSuffix(hdr.Name, ".txt")
// Read normalized value.
var b bytes.Buffer
if _, err := io.Copy(&b, tr); err != nil {
return err
}
normalized := b.String()
b.Reset()
// Read precomputed hashes.
hdr, err = tr.Next()
if err != nil {
return err
}
if _, err := io.Copy(&b, tr); err != nil {
return err
}
var set searchset.SearchSet
searchset.Deserialize(&b, &set)
muVals.Lock()
vals = append(vals, archivedValue{name, normalized, &set})
muVals.Unlock()
}
for _, v := range vals {
if err = c.c.AddPrecomputedValue(v.name, v.normalized, v.set); err != nil {
return err
}
}
return nil
}
// endOfLicenseText is text commonly associated with the end of a license. We
// can remove text that occurs after it.
var endOfLicenseText = []string{
"END OF TERMS AND CONDITIONS",
}
// TrimExtraneousTrailingText removes text after an obvious end of the license
// and does not include substantive text of the license.
func TrimExtraneousTrailingText(s string) string {
for _, e := range endOfLicenseText {
if i := strings.LastIndex(s, e); i != -1 {
return s[:i+len(e)]
}
}
return s
}
var copyrightRE = regexp.MustCompile(`(?m)(?i:Copyright)\s+(?i:©\s+|\(c\)\s+)?(?:\d{2,4})(?:[-,]\s*\d{2,4})*,?\s*(?i:by)?\s*(.*?(?i:\s+Inc\.)?)[.,]?\s*(?i:All rights reserved\.?)?\s*$`)
// CopyrightHolder finds a copyright notification, if it exists, and returns
// the copyright holder.
func CopyrightHolder(contents string) string {
matches := copyrightRE.FindStringSubmatch(contents)
if len(matches) == 2 {
return matches[1]
}
return ""
}
var publicDomainRE = regexp.MustCompile("(?i)(this file )?is( in the)? public domain")
// HasPublicDomainNotice performs a simple regex over the contents to see if a
// public domain notice is in there. As you can imagine, this isn't 100%
// definitive, but can be useful if a license match isn't found.
func (c *License) HasPublicDomainNotice(contents string) bool {
return publicDomainRE.FindString(contents) != ""
}
// ignorableTexts is a list of lines at the start of the string we can remove
// to get a cleaner match.
var ignorableTexts = []*regexp.Regexp{
regexp.MustCompile(`(?i)^(?:the )?mit license(?: \(mit\))?$`),
regexp.MustCompile(`(?i)^(?:new )?bsd license$`),
regexp.MustCompile(`(?i)^copyright and permission notice$`),
regexp.MustCompile(`(?i)^copyright (\(c\) )?(\[yyyy\]|\d{4})[,.]? .*$`),
regexp.MustCompile(`(?i)^(all|some) rights reserved\.?$`),
regexp.MustCompile(`(?i)^@license$`),
regexp.MustCompile(`^\s*$`),
}
// removeIgnorableTexts removes common text, which is not important for
// classification, that shows up before the body of the license.
func removeIgnorableTexts(s string) string {
lines := strings.Split(strings.TrimRight(s, "\n"), "\n")
var start int
for ; start < len(lines); start++ {
line := strings.TrimSpace(lines[start])
var matches bool
for _, re := range ignorableTexts {
if re.MatchString(line) {
matches = true
break
}
}
if !matches {
break
}
}
end := len(lines)
if start > end {
return "\n"
}
return strings.Join(lines[start:end], "\n") + "\n"
}
// removeShebangLine removes the '#!...' line if it's the first line in the
// file. Note that if it's the only line in a comment, it won't be removed.
func removeShebangLine(s string) string {
lines := strings.Split(s, "\n")
if len(lines) <= 1 || !strings.HasPrefix(lines[0], "#!") {
return s
}
return strings.Join(lines[1:], "\n")
}
// isDecorative returns true if the line is made up purely of non-letter and
// non-digit characters.
func isDecorative(s string) bool {
for _, c := range s {
if unicode.IsLetter(c) || unicode.IsDigit(c) {
return false
}
}
return true
}
var nonWords = regexp.MustCompile("[[:punct:]]+")
// RemoveNonWords removes non-words from the string.
func RemoveNonWords(s string) string {
return nonWords.ReplaceAllString(s, " ")
}
// interchangeablePunctutation is punctuation that can be normalized.
var interchangeablePunctuation = []struct {
interchangeable *regexp.Regexp
substitute string
}{
// Hyphen, Dash, En Dash, and Em Dash.
{regexp.MustCompile(`[-‒–—]`), "-"},
// Single, Double, Curly Single, and Curly Double.
{regexp.MustCompile("['\"`‘’“”]"), "'"},
// Copyright.
{regexp.MustCompile("©"), "(c)"},
// Hyphen-separated words.
{regexp.MustCompile(`(\S)-\s+(\S)`), "${1}-${2}"},
// Currency and Section. (Different copies of the CDDL use each marker.)
{regexp.MustCompile("[§¤]"), "(s)"},
// Middle Dot
{regexp.MustCompile("·"), "*"},
}
// NormalizePunctuation takes all hyphens and quotes and normalizes them.
func NormalizePunctuation(s string) string {
for _, iw := range interchangeablePunctuation {
s = iw.interchangeable.ReplaceAllString(s, iw.substitute)
}
return s
}
// interchangeableWords are words we can substitute for a normalized form
// without changing the meaning of the license. See
// https://spdx.org/spdx-license-list/matching-guidelines for the list.
var interchangeableWords = []struct {
interchangeable *regexp.Regexp
substitute string
}{
{regexp.MustCompile("(?i)Acknowledgment"), "Acknowledgement"},
{regexp.MustCompile("(?i)Analogue"), "Analog"},
{regexp.MustCompile("(?i)Analyse"), "Analyze"},
{regexp.MustCompile("(?i)Artefact"), "Artifact"},
{regexp.MustCompile("(?i)Authorisation"), "Authorization"},
{regexp.MustCompile("(?i)Authorised"), "Authorized"},
{regexp.MustCompile("(?i)Calibre"), "Caliber"},
{regexp.MustCompile("(?i)Cancelled"), "Canceled"},
{regexp.MustCompile("(?i)Capitalisations"), "Capitalizations"},
{regexp.MustCompile("(?i)Catalogue"), "Catalog"},
{regexp.MustCompile("(?i)Categorise"), "Categorize"},
{regexp.MustCompile("(?i)Centre"), "Center"},
{regexp.MustCompile("(?i)Emphasised"), "Emphasized"},
{regexp.MustCompile("(?i)Favour"), "Favor"},
{regexp.MustCompile("(?i)Favourite"), "Favorite"},
{regexp.MustCompile("(?i)Fulfil"), "Fulfill"},
{regexp.MustCompile("(?i)Fulfilment"), "Fulfillment"},
{regexp.MustCompile("(?i)Initialise"), "Initialize"},
{regexp.MustCompile("(?i)Judgment"), "Judgement"},
{regexp.MustCompile("(?i)Labelling"), "Labeling"},
{regexp.MustCompile("(?i)Labour"), "Labor"},
{regexp.MustCompile("(?i)Licence"), "License"},
{regexp.MustCompile("(?i)Maximise"), "Maximize"},
{regexp.MustCompile("(?i)Modelled"), "Modeled"},
{regexp.MustCompile("(?i)Modelling"), "Modeling"},
{regexp.MustCompile("(?i)Offence"), "Offense"},
{regexp.MustCompile("(?i)Optimise"), "Optimize"},
{regexp.MustCompile("(?i)Organisation"), "Organization"},
{regexp.MustCompile("(?i)Organise"), "Organize"},
{regexp.MustCompile("(?i)Practise"), "Practice"},
{regexp.MustCompile("(?i)Programme"), "Program"},
{regexp.MustCompile("(?i)Realise"), "Realize"},
{regexp.MustCompile("(?i)Recognise"), "Recognize"},
{regexp.MustCompile("(?i)Signalling"), "Signaling"},
{regexp.MustCompile("(?i)Sub[- ]license"), "Sublicense"},
{regexp.MustCompile("(?i)Utilisation"), "Utilization"},
{regexp.MustCompile("(?i)Whilst"), "While"},
{regexp.MustCompile("(?i)Wilful"), "Wilfull"},
{regexp.MustCompile("(?i)Non-commercial"), "Noncommercial"},
{regexp.MustCompile("(?i)Per cent"), "Percent"},
}
// NormalizeEquivalentWords normalizes equivalent words that are interchangeable.
func NormalizeEquivalentWords(s string) string {
for _, iw := range interchangeableWords {
s = iw.interchangeable.ReplaceAllString(s, iw.substitute)
}
return s
}

View File

@ -0,0 +1,65 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package licenseclassifier
import (
"go/build"
"io/ioutil"
"os"
"path/filepath"
)
const (
// LicenseDirectory is the directory where the prototype licenses are kept.
LicenseDirectory = "src/github.com/google/licenseclassifier/licenses"
// LicenseArchive is the name of the archive containing preprocessed
// license texts.
LicenseArchive = "licenses.db"
// ForbiddenLicenseArchive is the name of the archive containing preprocessed
// forbidden license texts only.
ForbiddenLicenseArchive = "forbidden_licenses.db"
)
func findInGOPATH(rel string) (fullPath string, err error) {
for _, path := range filepath.SplitList(build.Default.GOPATH) {
fullPath := filepath.Join(path, rel)
if _, err := os.Stat(fullPath); err != nil {
if os.IsNotExist(err) {
continue
}
return "", err
}
return fullPath, nil
}
return "", nil
}
// ReadLicenseFile locates and reads the license file.
func ReadLicenseFile(filename string) ([]byte, error) {
archive, err := findInGOPATH(filepath.Join(LicenseDirectory, filename))
if err != nil || archive == "" {
return nil, err
}
return ioutil.ReadFile(archive)
}
// ReadLicenseDir reads directory containing the license files.
func ReadLicenseDir() ([]os.FileInfo, error) {
filename, err := findInGOPATH(filepath.Join(LicenseDirectory, LicenseArchive))
if err != nil || filename == "" {
return nil, err
}
return ioutil.ReadDir(filepath.Dir(filename))
}

View File

@ -0,0 +1,48 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package licenseclassifier
import "regexp"
var (
reCCBYNC = regexp.MustCompile(`(?i).*\bAttribution NonCommercial\b.*`)
reCCBYNCND = regexp.MustCompile(`(?i).*\bAttribution NonCommercial NoDerivs\b.*`)
reCCBYNCSA = regexp.MustCompile(`(?i).*\bAttribution NonCommercial ShareAlike\b.*`)
// forbiddenRegexps are regular expressions we expect to find in
// forbidden licenses. If we think we have a forbidden license but
// don't find the equivalent phrase, then it's probably just a
// misclassification.
forbiddenRegexps = map[string]*regexp.Regexp{
AGPL10: regexp.MustCompile(`(?i).*\bAFFERO GENERAL PUBLIC LICENSE\b.*`),
AGPL30: regexp.MustCompile(`(?i).*\bGNU AFFERO GENERAL PUBLIC LICENSE\b.*`),
CCBYNC10: reCCBYNC,
CCBYNC20: reCCBYNC,
CCBYNC25: reCCBYNC,
CCBYNC30: reCCBYNC,
CCBYNC40: reCCBYNC,
CCBYNCND10: regexp.MustCompile(`(?i).*\bAttribution NoDerivs NonCommercial\b.*`),
CCBYNCND20: reCCBYNCND,
CCBYNCND25: reCCBYNCND,
CCBYNCND30: reCCBYNCND,
CCBYNCND40: regexp.MustCompile(`(?i).*\bAttribution NonCommercial NoDerivatives\b.*`),
CCBYNCSA10: reCCBYNCSA,
CCBYNCSA20: reCCBYNCSA,
CCBYNCSA25: reCCBYNCSA,
CCBYNCSA30: reCCBYNCSA,
CCBYNCSA40: reCCBYNCSA,
WTFPL: regexp.MustCompile(`(?i).*\bDO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE\b.*`),
}
)

View File

@ -0,0 +1,20 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package sets provides sets for storing collections of unique elements.
package sets
// present is an empty struct used as the "value" in the map[int], since
// empty structs consume zero bytes (unlike 1 unnecessary byte per bool).
type present struct{}

View File

@ -0,0 +1,228 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sets
import (
"fmt"
"sort"
"strings"
)
// StringSet stores a set of unique string elements.
type StringSet struct {
set map[string]present
}
// NewStringSet creates a StringSet containing the supplied initial string elements.
func NewStringSet(elements ...string) *StringSet {
s := &StringSet{}
s.set = make(map[string]present)
s.Insert(elements...)
return s
}
// Copy returns a newly allocated copy of the supplied StringSet.
func (s *StringSet) Copy() *StringSet {
c := NewStringSet()
if s != nil {
for e := range s.set {
c.set[e] = present{}
}
}
return c
}
// Insert zero or more string elements into the StringSet.
// As expected for a Set, elements already present in the StringSet are
// simply ignored.
func (s *StringSet) Insert(elements ...string) {
for _, e := range elements {
s.set[e] = present{}
}
}
// Delete zero or more string elements from the StringSet.
// Any elements not present in the StringSet are simply ignored.
func (s *StringSet) Delete(elements ...string) {
for _, e := range elements {
delete(s.set, e)
}
}
// Intersect returns a new StringSet containing the intersection of the
// receiver and argument StringSets. Returns an empty set if the argument is nil.
func (s *StringSet) Intersect(other *StringSet) *StringSet {
if other == nil {
return NewStringSet()
}
// Point a and b to the maps, setting a to the smaller of the two.
a, b := s.set, other.set
if len(b) < len(a) {
a, b = b, a
}
// Perform the intersection.
intersect := NewStringSet()
for e := range a {
if _, ok := b[e]; ok {
intersect.set[e] = present{}
}
}
return intersect
}
// Disjoint returns true if the intersection of the receiver and the argument
// StringSets is the empty set. Returns true if the argument is nil or either
// StringSet is the empty set.
func (s *StringSet) Disjoint(other *StringSet) bool {
if other == nil || len(other.set) == 0 || len(s.set) == 0 {
return true
}
// Point a and b to the maps, setting a to the smaller of the two.
a, b := s.set, other.set
if len(b) < len(a) {
a, b = b, a
}
// Check for non-empty intersection.
for e := range a {
if _, ok := b[e]; ok {
return false // Early-exit because intersecting.
}
}
return true
}
// Difference returns a new StringSet containing the elements in the receiver
// that are not present in the argument StringSet. Returns a copy of the
// receiver if the argument is nil.
func (s *StringSet) Difference(other *StringSet) *StringSet {
if other == nil {
return s.Copy()
}
// Insert only the elements in the receiver that are not present in the
// argument StringSet.
diff := NewStringSet()
for e := range s.set {
if _, ok := other.set[e]; !ok {
diff.set[e] = present{}
}
}
return diff
}
// Unique returns a new StringSet containing the elements in the receiver
// that are not present in the argument StringSet *and* the elements in the
// argument StringSet that are not in the receiver (which is the union of two
// disjoint sets). Returns a copy of the
// receiver if the argument is nil.
func (s *StringSet) Unique(other *StringSet) *StringSet {
if other == nil {
return s.Copy()
}
sNotInOther := s.Difference(other)
otherNotInS := other.Difference(s)
// Duplicate Union implementation here to avoid extra Copy, since both
// sNotInOther and otherNotInS are already copies.
unique := sNotInOther
for e := range otherNotInS.set {
unique.set[e] = present{}
}
return unique
}
// Equal returns true if the receiver and the argument StringSet contain
// exactly the same elements.
func (s *StringSet) Equal(other *StringSet) bool {
if s == nil || other == nil {
return s == nil && other == nil
}
// Two sets of different length cannot have the exact same unique elements.
if len(s.set) != len(other.set) {
return false
}
// Only one loop is needed. If the two sets are known to be of equal
// length, then the two sets are equal only if exactly all of the elements
// in the first set are found in the second.
for e := range s.set {
if _, ok := other.set[e]; !ok {
return false
}
}
return true
}
// Union returns a new StringSet containing the union of the receiver and
// argument StringSets. Returns a copy of the receiver if the argument is nil.
func (s *StringSet) Union(other *StringSet) *StringSet {
union := s.Copy()
if other != nil {
for e := range other.set {
union.set[e] = present{}
}
}
return union
}
// Contains returns true if element is in the StringSet.
func (s *StringSet) Contains(element string) bool {
_, in := s.set[element]
return in
}
// Len returns the number of unique elements in the StringSet.
func (s *StringSet) Len() int {
return len(s.set)
}
// Empty returns true if the receiver is the empty set.
func (s *StringSet) Empty() bool {
return len(s.set) == 0
}
// Elements returns a []string of the elements in the StringSet, in no
// particular (or consistent) order.
func (s *StringSet) Elements() []string {
elements := []string{} // Return at least an empty slice rather than nil.
for e := range s.set {
elements = append(elements, e)
}
return elements
}
// Sorted returns a sorted []string of the elements in the StringSet.
func (s *StringSet) Sorted() []string {
elements := s.Elements()
sort.Strings(elements)
return elements
}
// String formats the StringSet elements as sorted strings, representing them
// in "array initializer" syntax.
func (s *StringSet) String() string {
elements := s.Sorted()
var quoted []string
for _, e := range elements {
quoted = append(quoted, fmt.Sprintf("%q", e))
}
return fmt.Sprintf("{%s}", strings.Join(quoted, ", "))
}

View File

@ -0,0 +1,376 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package licenseclassifier
// *** NOTE: Update this file when adding a new license. You need to:
//
// 1. Add the canonical name to the list, and
// 2. Categorize the license.
import "github.com/google/licenseclassifier/internal/sets"
// Canonical names of the licenses.
const (
// The names come from the https://spdx.org/licenses website, and are
// also the filenames of the licenses in licenseclassifier/licenses.
AFL11 = "AFL-1.1"
AFL12 = "AFL-1.2"
AFL20 = "AFL-2.0"
AFL21 = "AFL-2.1"
AFL30 = "AFL-3.0"
AGPL10 = "AGPL-1.0"
AGPL30 = "AGPL-3.0"
Apache10 = "Apache-1.0"
Apache11 = "Apache-1.1"
Apache20 = "Apache-2.0"
APSL10 = "APSL-1.0"
APSL11 = "APSL-1.1"
APSL12 = "APSL-1.2"
APSL20 = "APSL-2.0"
Artistic10cl8 = "Artistic-1.0-cl8"
Artistic10Perl = "Artistic-1.0-Perl"
Artistic10 = "Artistic-1.0"
Artistic20 = "Artistic-2.0"
BCL = "BCL"
Beerware = "Beerware"
BSD2ClauseFreeBSD = "BSD-2-Clause-FreeBSD"
BSD2ClauseNetBSD = "BSD-2-Clause-NetBSD"
BSD2Clause = "BSD-2-Clause"
BSD3ClauseAttribution = "BSD-3-Clause-Attribution"
BSD3ClauseClear = "BSD-3-Clause-Clear"
BSD3ClauseLBNL = "BSD-3-Clause-LBNL"
BSD3Clause = "BSD-3-Clause"
BSD4Clause = "BSD-4-Clause"
BSD4ClauseUC = "BSD-4-Clause-UC"
BSDProtection = "BSD-Protection"
BSL10 = "BSL-1.0"
CC010 = "CC0-1.0"
CCBY10 = "CC-BY-1.0"
CCBY20 = "CC-BY-2.0"
CCBY25 = "CC-BY-2.5"
CCBY30 = "CC-BY-3.0"
CCBY40 = "CC-BY-4.0"
CCBYNC10 = "CC-BY-NC-1.0"
CCBYNC20 = "CC-BY-NC-2.0"
CCBYNC25 = "CC-BY-NC-2.5"
CCBYNC30 = "CC-BY-NC-3.0"
CCBYNC40 = "CC-BY-NC-4.0"
CCBYNCND10 = "CC-BY-NC-ND-1.0"
CCBYNCND20 = "CC-BY-NC-ND-2.0"
CCBYNCND25 = "CC-BY-NC-ND-2.5"
CCBYNCND30 = "CC-BY-NC-ND-3.0"
CCBYNCND40 = "CC-BY-NC-ND-4.0"
CCBYNCSA10 = "CC-BY-NC-SA-1.0"
CCBYNCSA20 = "CC-BY-NC-SA-2.0"
CCBYNCSA25 = "CC-BY-NC-SA-2.5"
CCBYNCSA30 = "CC-BY-NC-SA-3.0"
CCBYNCSA40 = "CC-BY-NC-SA-4.0"
CCBYND10 = "CC-BY-ND-1.0"
CCBYND20 = "CC-BY-ND-2.0"
CCBYND25 = "CC-BY-ND-2.5"
CCBYND30 = "CC-BY-ND-3.0"
CCBYND40 = "CC-BY-ND-4.0"
CCBYSA10 = "CC-BY-SA-1.0"
CCBYSA20 = "CC-BY-SA-2.0"
CCBYSA25 = "CC-BY-SA-2.5"
CCBYSA30 = "CC-BY-SA-3.0"
CCBYSA40 = "CC-BY-SA-4.0"
CDDL10 = "CDDL-1.0"
CDDL11 = "CDDL-1.1"
CommonsClause = "Commons-Clause"
CPAL10 = "CPAL-1.0"
CPL10 = "CPL-1.0"
eGenix = "eGenix"
EPL10 = "EPL-1.0"
EUPL10 = "EUPL-1.0"
EUPL11 = "EUPL-1.1"
Facebook2Clause = "Facebook-2-Clause"
Facebook3Clause = "Facebook-3-Clause"
FacebookExamples = "Facebook-Examples"
FreeImage = "FreeImage"
FTL = "FTL"
GPL10 = "GPL-1.0"
GPL20 = "GPL-2.0"
GPL20withautoconfexception = "GPL-2.0-with-autoconf-exception"
GPL20withbisonexception = "GPL-2.0-with-bison-exception"
GPL20withclasspathexception = "GPL-2.0-with-classpath-exception"
GPL20withfontexception = "GPL-2.0-with-font-exception"
GPL20withGCCexception = "GPL-2.0-with-GCC-exception"
GPL30 = "GPL-3.0"
GPL30withautoconfexception = "GPL-3.0-with-autoconf-exception"
GPL30withGCCexception = "GPL-3.0-with-GCC-exception"
GUSTFont = "GUST-Font-License"
ImageMagick = "ImageMagick"
IPL10 = "IPL-1.0"
ISC = "ISC"
LGPL20 = "LGPL-2.0"
LGPL21 = "LGPL-2.1"
LGPL30 = "LGPL-3.0"
LGPLLR = "LGPLLR"
Libpng = "Libpng"
Lil10 = "Lil-1.0"
LPL102 = "LPL-1.02"
LPL10 = "LPL-1.0"
LPPL13c = "LPPL-1.3c"
MIT = "MIT"
MPL10 = "MPL-1.0"
MPL11 = "MPL-1.1"
MPL20 = "MPL-2.0"
MSPL = "MS-PL"
NCSA = "NCSA"
NPL10 = "NPL-1.0"
NPL11 = "NPL-1.1"
OFL = "OFL"
OpenSSL = "OpenSSL"
OSL10 = "OSL-1.0"
OSL11 = "OSL-1.1"
OSL20 = "OSL-2.0"
OSL21 = "OSL-2.1"
OSL30 = "OSL-3.0"
PHP301 = "PHP-3.01"
PHP30 = "PHP-3.0"
PIL = "PIL"
Python20complete = "Python-2.0-complete"
Python20 = "Python-2.0"
QPL10 = "QPL-1.0"
Ruby = "Ruby"
SGIB10 = "SGI-B-1.0"
SGIB11 = "SGI-B-1.1"
SGIB20 = "SGI-B-2.0"
SISSL12 = "SISSL-1.2"
SISSL = "SISSL"
Sleepycat = "Sleepycat"
UnicodeTOU = "Unicode-TOU"
Unlicense = "Unlicense"
W3C19980720 = "W3C-19980720"
W3C = "W3C"
WTFPL = "WTFPL"
X11 = "X11"
Xnet = "Xnet"
Zend20 = "Zend-2.0"
ZlibAcknowledgement = "zlib-acknowledgement"
Zlib = "Zlib"
ZPL11 = "ZPL-1.1"
ZPL20 = "ZPL-2.0"
ZPL21 = "ZPL-2.1"
)
var (
// Licenses Categorized by Type
// restricted - Licenses in this category require mandatory source
// distribution if we ships a product that includes third-party code
// protected by such a license.
restrictedType = sets.NewStringSet(
BCL,
CCBYND10,
CCBYND20,
CCBYND25,
CCBYND30,
CCBYND40,
CCBYSA10,
CCBYSA20,
CCBYSA25,
CCBYSA30,
CCBYSA40,
GPL10,
GPL20,
GPL20withautoconfexception,
GPL20withbisonexception,
GPL20withclasspathexception,
GPL20withfontexception,
GPL20withGCCexception,
GPL30,
GPL30withautoconfexception,
GPL30withGCCexception,
LGPL20,
LGPL21,
LGPL30,
NPL10,
NPL11,
OSL10,
OSL11,
OSL20,
OSL21,
OSL30,
QPL10,
Sleepycat,
)
// reciprocal - These licenses allow usage of software made available
// under such licenses freely in *unmodified* form. If the third-party
// source code is modified in any way these modifications to the
// original third-party source code must be made available.
reciprocalType = sets.NewStringSet(
APSL10,
APSL11,
APSL12,
APSL20,
CDDL10,
CDDL11,
CPL10,
EPL10,
FreeImage,
IPL10,
MPL10,
MPL11,
MPL20,
Ruby,
)
// notice - These licenses contain few restrictions, allowing original
// or modified third-party software to be shipped in any product
// without endangering or encumbering our source code. All of the
// licenses in this category do, however, have an "original Copyright
// notice" or "advertising clause", wherein any external distributions
// must include the notice or clause specified in the license.
noticeType = sets.NewStringSet(
AFL11,
AFL12,
AFL20,
AFL21,
AFL30,
Apache10,
Apache11,
Apache20,
Artistic10cl8,
Artistic10Perl,
Artistic10,
Artistic20,
BSL10,
BSD2ClauseFreeBSD,
BSD2ClauseNetBSD,
BSD2Clause,
BSD3ClauseAttribution,
BSD3ClauseClear,
BSD3ClauseLBNL,
BSD3Clause,
BSD4Clause,
BSD4ClauseUC,
BSDProtection,
CCBY10,
CCBY20,
CCBY25,
CCBY30,
CCBY40,
FTL,
ISC,
ImageMagick,
Libpng,
Lil10,
LPL102,
LPL10,
MSPL,
MIT,
NCSA,
OpenSSL,
PHP301,
PHP30,
PIL,
Python20,
Python20complete,
SGIB10,
SGIB11,
SGIB20,
UnicodeTOU,
W3C19980720,
W3C,
X11,
Xnet,
Zend20,
ZlibAcknowledgement,
Zlib,
ZPL11,
ZPL20,
ZPL21,
)
// permissive - These licenses can be used in (relatively rare) cases
// where third-party software is under a license (not "Public Domain"
// or "free for any use" like 'unencumbered') that is even more lenient
// than a 'notice' license. Use the 'permissive' license type when even
// a copyright notice is not required for license compliance.
permissiveType = sets.NewStringSet()
// unencumbered - Licenses that basically declare that the code is "free for any use".
unencumberedType = sets.NewStringSet(
CC010,
Unlicense,
)
// byexceptiononly - Licenses that are incompatible with all (or most)
// uses in combination with our source code. Commercial third-party
// packages that are purchased and licensed only for a specific use
// fall into this category.
byExceptionOnlyType = sets.NewStringSet(
Beerware,
OFL,
)
// forbidden - Licenses that are forbidden to be used.
forbiddenType = sets.NewStringSet(
AGPL10,
AGPL30,
CCBYNC10,
CCBYNC20,
CCBYNC25,
CCBYNC30,
CCBYNC40,
CCBYNCND10,
CCBYNCND20,
CCBYNCND25,
CCBYNCND30,
CCBYNCND40,
CCBYNCSA10,
CCBYNCSA20,
CCBYNCSA25,
CCBYNCSA30,
CCBYNCSA40,
CommonsClause,
Facebook2Clause,
Facebook3Clause,
FacebookExamples,
WTFPL,
)
// LicenseTypes is a set of the types of licenses Google recognizes.
LicenseTypes = sets.NewStringSet(
"restricted",
"reciprocal",
"notice",
"permissive",
"unencumbered",
"by_exception_only",
)
)
// LicenseType returns the type the license has.
func LicenseType(name string) string {
switch {
case restrictedType.Contains(name):
return "restricted"
case reciprocalType.Contains(name):
return "reciprocal"
case noticeType.Contains(name):
return "notice"
case permissiveType.Contains(name):
return "permissive"
case unencumberedType.Contains(name):
return "unencumbered"
case forbiddenType.Contains(name):
return "FORBIDDEN"
}
return ""
}

View File

@ -0,0 +1,24 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or distribute
this software, either in source code form or as a compiled binary, for any
purpose, commercial or non-commercial, and by any means.
In jurisdictions that recognize copyright laws, the author or authors of this
software dedicate any and all copyright interest in the software to the public
domain. We make this dedication for the benefit of the public at large and to
the detriment of our heirs and
successors. We intend this dedication to be an overt act of relinquishment in
perpetuity of all present and future rights to this software under copyright
law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

Binary file not shown.

View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,560 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package stringclassifier finds the nearest match between a string and a set of known values. It
// uses the Levenshtein Distance (LD) algorithm to determine this. A match with a large LD is less
// likely to be correct than one with a small LD. A confidence percentage is returned, which
// indicates how confident the algorithm is that the match is correct. The higher the percentage,
// the greater the confidence that the match is correct.
//
// Example Usage:
//
// type Text struct {
// Name string
// Text string
// }
//
// func NewClassifier(knownTexts []Text) (*stringclassifier.Classifier, error) {
// sc := stringclassifier.New(stringclassifier.FlattenWhitespace)
// for _, known := range knownTexts {
// if err := sc.AddValue(known.Name, known.Text); err != nil {
// return nil, err
// }
// }
// return sc, nil
// }
//
// func IdentifyTexts(sc *stringclassifier.Classifier, unknownTexts []*Text) {
// for _, unknown := range unknownTexts {
// m := sc.NearestMatch(unknown.Text)
// log.Printf("The nearest match to %q is %q (confidence: %v)",
// unknown.Name, m.Name, m.Confidence)
// }
// }
package stringclassifier
import (
"fmt"
"log"
"math"
"regexp"
"sort"
"sync"
"github.com/google/licenseclassifier/stringclassifier/internal/pq"
"github.com/google/licenseclassifier/stringclassifier/searchset"
"github.com/sergi/go-diff/diffmatchpatch"
)
// The diff/match/patch algorithm.
var dmp = diffmatchpatch.New()
const (
// DefaultConfidenceThreshold is the minimum ratio threshold between
// the matching range and the full source range that we're willing to
// accept in order to say that the matching range will produce a
// sufficiently good edit distance. I.e., if the matching range is
// below this threshold we won't run the Levenshtein Distance algorithm
// on it.
DefaultConfidenceThreshold float64 = 0.80
defaultMinDiffRatio float64 = 0.75
)
// A Classifier matches a string to a set of known values.
type Classifier struct {
muValues sync.RWMutex
values map[string]*knownValue
normalizers []NormalizeFunc
threshold float64
// MinDiffRatio defines the minimum ratio of the length difference
// allowed to consider a known value a possible match. This is used as
// a performance optimization to eliminate values that are unlikely to
// be a match.
//
// For example, a value of 0.75 means that the shorter string must be
// at least 75% the length of the longer string to consider it a
// possible match.
//
// Setting this to 1.0 will require that strings are identical length.
// Setting this to 0 will consider all known values as possible
// matches.
MinDiffRatio float64
}
// NormalizeFunc is a function that is used to normalize a string prior to comparison.
type NormalizeFunc func(string) string
// New creates a new Classifier with the provided NormalizeFuncs. Each
// NormalizeFunc is applied in order to a string before comparison.
func New(threshold float64, funcs ...NormalizeFunc) *Classifier {
return &Classifier{
values: make(map[string]*knownValue),
normalizers: append([]NormalizeFunc(nil), funcs...),
threshold: threshold,
MinDiffRatio: defaultMinDiffRatio,
}
}
// knownValue identifies a value in the corpus to match against.
type knownValue struct {
key string
normalizedValue string
reValue *regexp.Regexp
set *searchset.SearchSet
}
// AddValue adds a known value to be matched against. If a value already exists
// for key, an error is returned.
func (c *Classifier) AddValue(key, value string) error {
c.muValues.Lock()
defer c.muValues.Unlock()
if _, ok := c.values[key]; ok {
return fmt.Errorf("value already registered with key %q", key)
}
norm := c.normalize(value)
c.values[key] = &knownValue{
key: key,
normalizedValue: norm,
reValue: regexp.MustCompile(norm),
}
return nil
}
// AddPrecomputedValue adds a known value to be matched against. The value has
// already been normalized and the SearchSet object deserialized, so no
// processing is necessary.
func (c *Classifier) AddPrecomputedValue(key, value string, set *searchset.SearchSet) error {
c.muValues.Lock()
defer c.muValues.Unlock()
if _, ok := c.values[key]; ok {
return fmt.Errorf("value already registered with key %q", key)
}
set.GenerateNodeList()
c.values[key] = &knownValue{
key: key,
normalizedValue: value,
reValue: regexp.MustCompile(value),
set: set,
}
return nil
}
// normalize a string by applying each of the registered NormalizeFuncs.
func (c *Classifier) normalize(s string) string {
for _, fn := range c.normalizers {
s = fn(s)
}
return s
}
// Match identifies the result of matching a string against a knownValue.
type Match struct {
Name string // Name of knownValue that was matched
Confidence float64 // Confidence percentage
Offset int // The offset into the unknown string the match was made
Extent int // The length from the offset into the unknown string
}
// Matches is a list of Match-es. This is here mainly so that the list can be
// sorted.
type Matches []*Match
func (m Matches) Len() int { return len(m) }
func (m Matches) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
func (m Matches) Less(i, j int) bool {
if math.Abs(m[j].Confidence-m[i].Confidence) < math.SmallestNonzeroFloat64 {
if m[i].Name == m[j].Name {
if m[i].Offset > m[j].Offset {
return false
}
if m[i].Offset == m[j].Offset {
return m[i].Extent > m[j].Extent
}
return true
}
return m[i].Name < m[j].Name
}
return m[i].Confidence > m[j].Confidence
}
// Names returns an unsorted slice of the names of the matched licenses.
func (m Matches) Names() []string {
var names []string
for _, n := range m {
names = append(names, n.Name)
}
return names
}
// uniquify goes through the matches and removes any that are contained within
// one with a higher confidence. This assumes that Matches is sorted.
func (m Matches) uniquify() Matches {
type matchedRange struct {
offset, extent int
}
var matched []matchedRange
var matches Matches
OUTER:
for _, match := range m {
for _, mr := range matched {
if match.Offset >= mr.offset && match.Offset <= mr.offset+mr.extent {
continue OUTER
}
}
matched = append(matched, matchedRange{match.Offset, match.Extent})
matches = append(matches, match)
}
return matches
}
// NearestMatch returns the name of the known value that most closely matches
// the unknown string and a confidence percentage is returned indicating how
// confident the classifier is in the result. A percentage of "1.0" indicates
// an exact match, while a percentage of "0.0" indicates a complete mismatch.
//
// If the string is equidistant from multiple known values, it is undefined
// which will be returned.
func (c *Classifier) NearestMatch(s string) *Match {
pq := c.nearestMatch(s)
if pq.Len() == 0 {
return &Match{}
}
return pq.Pop().(*Match)
}
// MultipleMatch tries to determine which known strings are found within an
// unknown string. This differs from "NearestMatch" in that it looks only at
// those areas within the unknown string that are likely to match. A list of
// potential matches are returned. It's up to the caller to determine which
// ones are acceptable.
func (c *Classifier) MultipleMatch(s string) (matches Matches) {
pq := c.multipleMatch(s)
if pq == nil {
return matches
}
// A map to remove duplicate entries.
m := make(map[Match]bool)
for pq.Len() != 0 {
v := pq.Pop().(*Match)
if _, ok := m[*v]; !ok {
m[*v] = true
matches = append(matches, v)
}
}
sort.Sort(matches)
return matches.uniquify()
}
// possibleMatch identifies a known value and it's diffRatio to a given string.
type possibleMatch struct {
value *knownValue
diffRatio float64
}
// likelyMatches is a slice of possibleMatches that can be sorted by their
// diffRatio to a given string, such that the most likely matches (based on
// length) are at the beginning.
type likelyMatches []possibleMatch
func (m likelyMatches) Len() int { return len(m) }
func (m likelyMatches) Less(i, j int) bool { return m[i].diffRatio > m[j].diffRatio }
func (m likelyMatches) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
// nearestMatch returns a Queue of values that the unknown string may be. The
// values are compared via their Levenshtein Distance and ranked with the
// nearest match at the beginning.
func (c *Classifier) nearestMatch(unknown string) *pq.Queue {
var mu sync.Mutex // Protect the priority queue.
pq := pq.NewQueue(func(x, y interface{}) bool {
return x.(*Match).Confidence > y.(*Match).Confidence
}, nil)
unknown = c.normalize(unknown)
if len(unknown) == 0 {
return pq
}
c.muValues.RLock()
var likely likelyMatches
for _, v := range c.values {
dr := diffRatio(unknown, v.normalizedValue)
if dr < c.MinDiffRatio {
continue
}
if unknown == v.normalizedValue {
// We found an exact match.
pq.Push(&Match{Name: v.key, Confidence: 1.0, Offset: 0, Extent: len(unknown)})
c.muValues.RUnlock()
return pq
}
likely = append(likely, possibleMatch{value: v, diffRatio: dr})
}
c.muValues.RUnlock()
sort.Sort(likely)
var wg sync.WaitGroup
classifyString := func(name, unknown, known string) {
defer wg.Done()
diffs := dmp.DiffMain(unknown, known, true)
distance := dmp.DiffLevenshtein(diffs)
confidence := confidencePercentage(len(unknown), len(known), distance)
if confidence > 0.0 {
mu.Lock()
pq.Push(&Match{Name: name, Confidence: confidence, Offset: 0, Extent: len(unknown)})
mu.Unlock()
}
}
wg.Add(len(likely))
for _, known := range likely {
go classifyString(known.value.key, unknown, known.value.normalizedValue)
}
wg.Wait()
return pq
}
// matcher finds all potential matches of "known" in "unknown". The results are
// placed in "queue".
type matcher struct {
unknown *searchset.SearchSet
normUnknown string
threshold float64
mu sync.Mutex
queue *pq.Queue
}
// newMatcher creates a "matcher" object.
func newMatcher(unknown string, threshold float64) *matcher {
return &matcher{
unknown: searchset.New(unknown, searchset.DefaultGranularity),
normUnknown: unknown,
threshold: threshold,
queue: pq.NewQueue(func(x, y interface{}) bool {
return x.(*Match).Confidence > y.(*Match).Confidence
}, nil),
}
}
// findMatches takes a known text and finds all potential instances of it in
// the unknown text. The resulting matches can then filtered to determine which
// are the best matches.
func (m *matcher) findMatches(known *knownValue) {
var mrs []searchset.MatchRanges
if all := known.reValue.FindAllStringIndex(m.normUnknown, -1); all != nil {
// We found exact matches. Just use those!
for _, a := range all {
var start, end int
for i, tok := range m.unknown.Tokens {
if tok.Offset == a[0] {
start = i
} else if tok.Offset >= a[len(a)-1]-len(tok.Text) {
end = i
break
}
}
mrs = append(mrs, searchset.MatchRanges{{
SrcStart: 0,
SrcEnd: len(known.set.Tokens),
TargetStart: start,
TargetEnd: end + 1,
}})
}
} else {
// No exact match. Perform a more thorough match.
mrs = searchset.FindPotentialMatches(known.set, m.unknown)
}
var wg sync.WaitGroup
for _, mr := range mrs {
if !m.withinConfidenceThreshold(known.set, mr) {
continue
}
wg.Add(1)
go func(mr searchset.MatchRanges) {
start, end := mr.TargetRange(m.unknown)
conf := levDist(m.normUnknown[start:end], known.normalizedValue)
if conf > 0.0 {
m.mu.Lock()
m.queue.Push(&Match{Name: known.key, Confidence: conf, Offset: start, Extent: end - start})
m.mu.Unlock()
}
wg.Done()
}(mr)
}
wg.Wait()
}
// withinConfidenceThreshold returns the Confidence we have in the potential
// match. It does this by calculating the ratio of what's matching to the
// original known text.
func (m *matcher) withinConfidenceThreshold(known *searchset.SearchSet, mr searchset.MatchRanges) bool {
return float64(mr.Size())/float64(len(known.Tokens)) >= m.threshold
}
// multipleMatch returns a Queue of values that might be within the unknown
// string. The values are compared via their Levenshtein Distance and ranked
// with the nearest match at the beginning.
func (c *Classifier) multipleMatch(unknown string) *pq.Queue {
normUnknown := c.normalize(unknown)
if normUnknown == "" {
return nil
}
m := newMatcher(normUnknown, c.threshold)
c.muValues.RLock()
var kvals []*knownValue
for _, known := range c.values {
kvals = append(kvals, known)
}
c.muValues.RUnlock()
var wg sync.WaitGroup
wg.Add(len(kvals))
for _, known := range kvals {
go func(known *knownValue) {
if known.set == nil {
k := searchset.New(known.normalizedValue, searchset.DefaultGranularity)
c.muValues.Lock()
c.values[known.key].set = k
c.muValues.Unlock()
}
m.findMatches(known)
wg.Done()
}(known)
}
wg.Wait()
return m.queue
}
// levDist runs the Levenshtein Distance algorithm on the known and unknown
// texts to measure how well they match.
func levDist(unknown, known string) float64 {
if len(known) == 0 || len(unknown) == 0 {
log.Printf("Zero-sized texts in Levenshtein Distance algorithm: known==%d, unknown==%d", len(known), len(unknown))
return 0.0
}
// Calculate the differences between the potentially matching known
// text and the unknown text.
diffs := dmp.DiffMain(unknown, known, false)
end := diffRangeEnd(known, diffs)
// Now execute the Levenshtein Distance algorithm to see how much it
// does match.
distance := dmp.DiffLevenshtein(diffs[:end])
return confidencePercentage(unknownTextLength(unknown, diffs), len(known), distance)
}
// unknownTextLength returns the length of the unknown text based on the diff range.
func unknownTextLength(unknown string, diffs []diffmatchpatch.Diff) int {
last := len(diffs) - 1
for ; last >= 0; last-- {
if diffs[last].Type == diffmatchpatch.DiffEqual {
break
}
}
ulen := 0
for i := 0; i < last+1; i++ {
switch diffs[i].Type {
case diffmatchpatch.DiffEqual, diffmatchpatch.DiffDelete:
ulen += len(diffs[i].Text)
}
}
return ulen
}
// diffRangeEnd returns the end index for the "Diff" objects that constructs
// (or nearly constructs) the "known" value.
func diffRangeEnd(known string, diffs []diffmatchpatch.Diff) (end int) {
var seen string
for end = 0; end < len(diffs); end++ {
if seen == known {
// Once we've constructed the "known" value, then we've
// reached the point in the diff list where more
// "Diff"s would just make the Levenshtein Distance
// less valid. There shouldn't be further "DiffEqual"
// nodes, because there's nothing further to match in
// the "known" text.
break
}
switch diffs[end].Type {
case diffmatchpatch.DiffEqual, diffmatchpatch.DiffInsert:
seen += diffs[end].Text
}
}
return end
}
// confidencePercentage calculates how confident we are in the result of the
// match. A percentage of "1.0" means an identical match. A confidence of "0.0"
// means a complete mismatch.
func confidencePercentage(ulen, klen, distance int) float64 {
if ulen == 0 && klen == 0 {
return 1.0
}
if ulen == 0 || klen == 0 || (distance > ulen && distance > klen) {
return 0.0
}
return 1.0 - float64(distance)/float64(max(ulen, klen))
}
// diffRatio calculates the ratio of the length of s1 and s2, returned as a
// percentage of the length of the longer string. E.g., diffLength("abcd", "e")
// would return 0.25 because "e" is 25% of the size of "abcd". Comparing
// strings of equal length will return 1.
func diffRatio(s1, s2 string) float64 {
x, y := len(s1), len(s2)
if x == 0 && y == 0 {
// Both strings are zero length
return 1.0
}
if x < y {
return float64(x) / float64(y)
}
return float64(y) / float64(x)
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
// wsRegexp is a regexp used to identify blocks of whitespace.
var wsRegexp = regexp.MustCompile(`\s+`)
// FlattenWhitespace will flatten contiguous blocks of whitespace down to a single space.
var FlattenWhitespace NormalizeFunc = func(s string) string {
return wsRegexp.ReplaceAllString(s, " ")
}

View File

@ -0,0 +1,111 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package pq provides a priority queue.
package pq
import "container/heap"
// NewQueue returns an unbounded priority queue that compares elements using
// less; the minimal element is at the top of the queue.
//
// If setIndex is not nil, the queue calls setIndex to inform each element of
// its position in the queue. If an element's priority changes, its position in
// the queue may be incorrect. Call Fix on the element's index to update the
// queue. Call Remove on the element's index to remove it from the queue.
func NewQueue(less func(x, y interface{}) bool, setIndex func(x interface{}, idx int)) *Queue {
return &Queue{
heap: pqHeap{
less: less,
setIndex: setIndex,
},
}
}
// Queue is a priority queue that supports updating the priority of an element.
// A Queue must be created with NewQueue.
type Queue struct {
heap pqHeap
}
// Len returns the number of elements in the queue.
func (pq *Queue) Len() int {
return pq.heap.Len()
}
// Push adds x to the queue.
func (pq *Queue) Push(x interface{}) {
heap.Push(&pq.heap, x)
}
// Min returns the minimal element.
// Min panics if the queue is empty.
func (pq *Queue) Min() interface{} {
return pq.heap.a[0]
}
// Pop removes and returns the minimal element.
// Pop panics if the queue is empty.
func (pq *Queue) Pop() interface{} {
return heap.Pop(&pq.heap)
}
// Fix adjusts the heap to reflect that the element at index has changed priority.
func (pq *Queue) Fix(index int) {
heap.Fix(&pq.heap, index)
}
// Remove removes the element at index i from the heap.
func (pq *Queue) Remove(index int) {
heap.Remove(&pq.heap, index)
}
// pqHeap implements heap.Interface.
type pqHeap struct {
a []interface{}
less func(x, y interface{}) bool
setIndex func(x interface{}, idx int)
}
func (h pqHeap) Len() int {
return len(h.a)
}
func (h pqHeap) Less(i, j int) bool {
return h.less(h.a[i], h.a[j])
}
func (h pqHeap) Swap(i, j int) {
h.a[i], h.a[j] = h.a[j], h.a[i]
if h.setIndex != nil {
h.setIndex(h.a[i], i)
h.setIndex(h.a[j], j)
}
}
func (h *pqHeap) Push(x interface{}) {
n := len(h.a)
if h.setIndex != nil {
h.setIndex(x, n)
}
h.a = append(h.a, x)
}
func (h *pqHeap) Pop() interface{} {
old := h.a
n := len(old)
x := old[n-1]
h.a = old[:n-1]
return x
}

View File

@ -0,0 +1,491 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package searchset generates hashes for all substrings of a text. Potential
// matches between two SearchSet objects can then be determined quickly.
// Generating the hashes can be expensive, so it's best to perform it once. If
// the text is part of a known corpus, then the SearchSet can be serialized and
// kept in an archive.
//
// Matching occurs by "mapping" ranges from the source text into the target
// text but still retaining the source order:
//
// SOURCE: |-----------------------------|
//
// TARGET: |*****************************************|
//
// MAP SOURCE SECTIONS ONTO TARGET IN SOURCE ORDER:
//
// S: |-[--]-----[---]------[----]------|
// / | \
// |---| |---------| |-------------|
// T: |*****************************************|
//
// Note that a single source range may match many different ranges in the
// target. The matching algorithm untangles these so that all matched ranges
// are in order with respect to the source ranges. This is especially important
// since the source text may occur more than once in the target text. The
// algorithm finds each potential occurrence of S in T and returns all as
// potential matched ranges.
package searchset
import (
"encoding/gob"
"fmt"
"io"
"sort"
"github.com/google/licenseclassifier/stringclassifier/searchset/tokenizer"
)
// DefaultGranularity is the minimum size (in words) of the hash chunks.
const DefaultGranularity = 3
// SearchSet is a set of substrings that have hashes associated with them,
// making it fast to search for potential matches.
type SearchSet struct {
// Tokens is a tokenized list of the original input string.
Tokens tokenizer.Tokens
// Hashes is a map of checksums to a range of tokens.
Hashes tokenizer.Hash
// Checksums is a list of checksums ordered from longest range to
// shortest.
Checksums []uint32
// ChecksumRanges are the token ranges for the above checksums.
ChecksumRanges tokenizer.TokenRanges
nodes []*node
}
// node consists of a range of tokens along with the checksum for those tokens.
type node struct {
checksum uint32
tokens *tokenizer.TokenRange
}
func (n *node) String() string {
return fmt.Sprintf("[%d:%d]", n.tokens.Start, n.tokens.End)
}
// New creates a new SearchSet object. It generates a hash for each substring of "s".
func New(s string, granularity int) *SearchSet {
toks := tokenizer.Tokenize(s)
// Start generating hash values for all substrings within the text.
h := make(tokenizer.Hash)
checksums, tokenRanges := toks.GenerateHashes(h, func(a, b int) int {
if a < b {
return a
}
return b
}(len(toks), granularity))
sset := &SearchSet{
Tokens: toks,
Hashes: h,
Checksums: checksums,
ChecksumRanges: tokenRanges,
}
sset.GenerateNodeList()
return sset
}
// GenerateNodeList creates a node list out of the search set.
func (s *SearchSet) GenerateNodeList() {
if len(s.Tokens) == 0 {
return
}
for i := 0; i < len(s.Checksums); i++ {
s.nodes = append(s.nodes, &node{
checksum: s.Checksums[i],
tokens: s.ChecksumRanges[i],
})
}
}
// Serialize emits the SearchSet out so that it can be recreated at a later
// time.
func (s *SearchSet) Serialize(w io.Writer) error {
return gob.NewEncoder(w).Encode(s)
}
// Deserialize reads a file with a serialized SearchSet in it and reconstructs it.
func Deserialize(r io.Reader, s *SearchSet) error {
if err := gob.NewDecoder(r).Decode(&s); err != nil {
return err
}
s.GenerateNodeList()
return nil
}
// MatchRange is the range within the source text that is a match to the range
// in the target text.
type MatchRange struct {
// Offsets into the source tokens.
SrcStart, SrcEnd int
// Offsets into the target tokens.
TargetStart, TargetEnd int
}
// in returns true if the start and end are enclosed in the match range.
func (m *MatchRange) in(start, end int) bool {
return start >= m.TargetStart && end <= m.TargetEnd
}
func (m *MatchRange) String() string {
return fmt.Sprintf("[%v, %v)->[%v, %v)", m.SrcStart, m.SrcEnd, m.TargetStart, m.TargetEnd)
}
// MatchRanges is a list of "MatchRange"s. The ranges are monotonically
// increasing in value and indicate a single potential occurrence of the source
// text in the target text.
type MatchRanges []*MatchRange
func (m MatchRanges) Len() int { return len(m) }
func (m MatchRanges) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
func (m MatchRanges) Less(i, j int) bool {
if m[i].TargetStart < m[j].TargetStart {
return true
}
return m[i].TargetStart == m[j].TargetStart && m[i].SrcStart < m[j].SrcStart
}
// TargetRange is the start and stop token offsets into the target text.
func (m MatchRanges) TargetRange(target *SearchSet) (start, end int) {
start = target.Tokens[m[0].TargetStart].Offset
end = target.Tokens[m[len(m)-1].TargetEnd-1].Offset + len(target.Tokens[m[len(m)-1].TargetEnd-1].Text)
return start, end
}
// Size is the number of source tokens that were matched.
func (m MatchRanges) Size() int {
sum := 0
for _, mr := range m {
sum += mr.SrcEnd - mr.SrcStart
}
return sum
}
// FindPotentialMatches returns the ranges in the target (unknown) text that
// are best potential matches to the source (known) text.
func FindPotentialMatches(src, target *SearchSet) []MatchRanges {
matchedRanges := getMatchedRanges(src, target)
if len(matchedRanges) == 0 {
return nil
}
// Cleanup the matching ranges so that we get the longest contiguous ranges.
for i := 0; i < len(matchedRanges); i++ {
matchedRanges[i] = coalesceMatchRanges(matchedRanges[i])
}
return matchedRanges
}
// getMatchedRanges finds the ranges in the target text that match the source
// text. There can be multiple occurrences of the source text within the target
// text. Each separate occurrence is an entry in the returned slice.
func getMatchedRanges(src, target *SearchSet) []MatchRanges {
matched := targetMatchedRanges(src, target)
if len(matched) == 0 {
return nil
}
sort.Sort(matched)
matched = untangleSourceRanges(matched)
matchedRanges := splitRanges(matched)
return mergeConsecutiveRanges(matchedRanges)
}
func extendsAny(tr tokenizer.TokenRanges, mr []MatchRanges) bool {
if len(mr) == 0 {
return false
}
for _, tv := range tr {
for _, mv := range mr {
if tv.Start >= mv[0].TargetStart && tv.Start <= mv[len(mv)-1].TargetEnd {
return true
}
}
}
return false
}
// targetMatchedRanges finds matching sequences in target and src ordered by target position
func targetMatchedRanges(src, target *SearchSet) MatchRanges {
if src.nodes == nil {
return nil
}
var matched MatchRanges
var previous *node
var possible []MatchRanges
for _, tgtNode := range target.nodes {
sr, ok := src.Hashes[tgtNode.checksum]
if !ok || (previous != nil && tgtNode.tokens.Start > previous.tokens.End) || !extendsAny(sr, possible) {
for _, r := range possible {
matched = append(matched, r...)
}
possible = possible[:0]
previous = nil
}
if !ok {
// There isn't a match in the source.
continue
}
// Maps index within `possible` to the slice of ranges extended by a new range
extended := make(map[int]*MatchRanges)
// Go over the set of source ranges growing lists of `possible` match ranges.
tv := tgtNode.tokens
for _, sv := range sr {
r := &MatchRange{
SrcStart: sv.Start,
SrcEnd: sv.End,
TargetStart: tv.Start,
TargetEnd: tv.End,
}
found := false
// Grow or extend each abutting `possible` match range.
for i, p := range possible {
last := p[len(p)-1]
if sv.Start >= last.SrcStart && sv.Start <= last.SrcEnd && tv.Start >= last.TargetStart && tv.Start <= last.TargetEnd {
found = true
possible[i] = append(possible[i], r)
extended[i] = &possible[i]
}
}
if !found {
// Did not abut any existing ranges, start a new `possible` match range.
mrs := make(MatchRanges, 0, 2)
mrs = append(mrs, r)
possible = append(possible, mrs)
extended[len(possible)-1] = &possible[len(possible)-1]
}
}
if len(extended) < len(possible) {
// Ranges not extended--add to `matched` if not included in other range.
for i := 0; i < len(possible); {
_, updated := extended[i]
if updated {
i++ // Keep in `possible` and advance to next index.
continue
}
p1 := possible[i]
found := false // whether found as subrange of another `possible` match.
for _, p2 := range extended {
if p1[0].SrcStart >= (*p2)[0].SrcStart && p1[0].TargetStart >= (*p2)[0].TargetStart {
found = true
break
}
}
if !found {
matched = append(matched, p1...)
} // else included in other match.
// Finished -- delete from `possible` and continue from same index.
possible = append(possible[:i], possible[i+1:]...)
}
}
previous = tgtNode
}
// At end of file, terminate all `possible` match ranges.
for i := 0; i < len(possible); i++ {
p1 := possible[i]
found := false // whether found as subrange of another `possible` match.
for j := i + 1; j < len(possible); {
p2 := possible[j]
if p1[0].SrcStart <= p2[0].SrcStart && p1[0].TargetStart <= p2[0].TargetStart {
// Delete later sub-ranges included in this range.
possible = append(possible[:j], possible[j+1:]...)
continue
}
// Skip if subrange of a later range
if p1[0].SrcStart >= p2[0].SrcStart && p1[0].TargetStart >= p2[0].TargetStart {
found = true
}
j++
}
if !found {
matched = append(matched, p1...)
}
}
return matched
}
// untangleSourceRanges goes through the ranges and removes any whose source
// ranges are "out of order". A source range is "out of order" if the source
// range is out of sequence with the source ranges before and after it. This
// happens when more than one source range maps to the same target range.
// E.g.:
//
// SrcStart: 20, SrcEnd: 30, TargetStart: 127, TargetEnd: 137
// 1: SrcStart: 12, SrcEnd: 17, TargetStart: 138, TargetEnd: 143
// 2: SrcStart: 32, SrcEnd: 37, TargetStart: 138, TargetEnd: 143
// SrcStart: 38, SrcEnd: 40, TargetStart: 144, TargetEnd: 146
//
// Here (1) is out of order, because the source range [12, 17) is out of
// sequence with the surrounding source sequences, but [32, 37) is.
func untangleSourceRanges(matched MatchRanges) MatchRanges {
mr := MatchRanges{matched[0]}
NEXT:
for i := 1; i < len(matched); i++ {
if mr[len(mr)-1].TargetStart == matched[i].TargetStart && mr[len(mr)-1].TargetEnd == matched[i].TargetEnd {
// The matched range has already been added.
continue
}
if i+1 < len(matched) && equalTargetRange(matched[i], matched[i+1]) {
// A sequence of ranges match the same target range.
// Find the first one that has a source range greater
// than the currently matched range. Omit all others.
if matched[i].SrcStart > mr[len(mr)-1].SrcStart {
mr = append(mr, matched[i])
continue
}
for j := i + 1; j < len(matched) && equalTargetRange(matched[i], matched[j]); j++ {
// Check subsequent ranges to see if we can
// find one that matches in the correct order.
if matched[j].SrcStart > mr[len(mr)-1].SrcStart {
mr = append(mr, matched[j])
i = j
continue NEXT
}
}
}
mr = append(mr, matched[i])
}
return mr
}
// equalTargetRange returns true if the two MatchRange's cover the same target range.
func equalTargetRange(this, that *MatchRange) bool {
return this.TargetStart == that.TargetStart && this.TargetEnd == that.TargetEnd
}
// splitRanges splits the matched ranges so that a single match range has a
// monotonically increasing source range (indicating a single, potential
// instance of the source in the target).
func splitRanges(matched MatchRanges) []MatchRanges {
var matchedRanges []MatchRanges
mr := MatchRanges{matched[0]}
for i := 1; i < len(matched); i++ {
if mr[len(mr)-1].SrcStart > matched[i].SrcStart {
matchedRanges = append(matchedRanges, mr)
mr = MatchRanges{matched[i]}
} else {
mr = append(mr, matched[i])
}
}
matchedRanges = append(matchedRanges, mr)
return matchedRanges
}
// mergeConsecutiveRanges goes through the matched ranges and merges
// consecutive ranges. Two ranges are consecutive if the end of the previous
// matched range and beginning of the next matched range overlap. "matched"
// should have 1 or more MatchRanges, each with one or more MatchRange objects.
func mergeConsecutiveRanges(matched []MatchRanges) []MatchRanges {
mr := []MatchRanges{matched[0]}
// Convenience functions.
prevMatchedRange := func() MatchRanges {
return mr[len(mr)-1]
}
prevMatchedRangeLastElem := func() *MatchRange {
return prevMatchedRange()[len(prevMatchedRange())-1]
}
// This algorithm compares the start of each MatchRanges object to the
// end of the previous MatchRanges object. If they overlap, then it
// tries to combine them. Note that a 0 offset into a MatchRanges
// object (e.g., matched[i][0]) is its first MatchRange, which
// indicates the start of the whole matched range.
NEXT:
for i := 1; i < len(matched); i++ {
if prevMatchedRangeLastElem().TargetEnd > matched[i][0].TargetStart {
// Consecutive matched ranges overlap. Merge them.
if prevMatchedRangeLastElem().TargetStart < matched[i][0].TargetStart {
// The last element of the previous matched
// range overlaps with the first element of the
// current matched range. Concatenate them.
if prevMatchedRangeLastElem().TargetEnd < matched[i][0].TargetEnd {
prevMatchedRangeLastElem().SrcEnd += matched[i][0].TargetEnd - prevMatchedRangeLastElem().TargetEnd
prevMatchedRangeLastElem().TargetEnd = matched[i][0].TargetEnd
}
mr[len(mr)-1] = append(prevMatchedRange(), matched[i][1:]...)
continue
}
for j := 1; j < len(matched[i]); j++ {
// Find the positions in the ranges where the
// tail end of the previous matched range
// overlaps with the start of the next matched
// range.
for k := len(prevMatchedRange()) - 1; k > 0; k-- {
if prevMatchedRange()[k].SrcStart < matched[i][j].SrcStart &&
prevMatchedRange()[k].TargetStart < matched[i][j].TargetStart {
// Append the next range to the previous range.
if prevMatchedRange()[k].TargetEnd < matched[i][j].TargetStart {
// Coalesce the ranges.
prevMatchedRange()[k].SrcEnd += matched[i][j-1].TargetEnd - prevMatchedRange()[k].TargetEnd
prevMatchedRange()[k].TargetEnd = matched[i][j-1].TargetEnd
}
mr[len(mr)-1] = append(prevMatchedRange()[:k+1], matched[i][j:]...)
continue NEXT
}
}
}
}
mr = append(mr, matched[i])
}
return mr
}
// coalesceMatchRanges coalesces overlapping match ranges into a single
// contiguous match range.
func coalesceMatchRanges(matchedRanges MatchRanges) MatchRanges {
coalesced := MatchRanges{matchedRanges[0]}
for i := 1; i < len(matchedRanges); i++ {
c := coalesced[len(coalesced)-1]
mr := matchedRanges[i]
if mr.SrcStart <= c.SrcEnd && mr.SrcStart >= c.SrcStart {
var se, ts, te int
if mr.SrcEnd > c.SrcEnd {
se = mr.SrcEnd
} else {
se = c.SrcEnd
}
if mr.TargetStart < c.TargetStart {
ts = mr.TargetStart
} else {
ts = c.TargetStart
}
if mr.TargetEnd > c.TargetEnd {
te = mr.TargetEnd
} else {
te = c.TargetEnd
}
coalesced[len(coalesced)-1] = &MatchRange{
SrcStart: c.SrcStart,
SrcEnd: se,
TargetStart: ts,
TargetEnd: te,
}
} else {
coalesced = append(coalesced, mr)
}
}
return coalesced
}

View File

@ -0,0 +1,175 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package tokenizer converts a text into a stream of tokens.
package tokenizer
import (
"bytes"
"fmt"
"hash/crc32"
"sort"
"unicode"
"unicode/utf8"
)
// Token is a non-whitespace sequence (i.e., word or punctuation) in the
// original string. This is not meant for use outside of this package.
type token struct {
Text string
Offset int
}
// Tokens is a list of Token objects.
type Tokens []*token
// newToken creates a new token object with an invalid (negative) offset, which
// will be set before the token's used.
func newToken() *token {
return &token{Offset: -1}
}
// Tokenize converts a string into a stream of tokens.
func Tokenize(s string) (toks Tokens) {
tok := newToken()
for i := 0; i < len(s); {
r, size := utf8.DecodeRuneInString(s[i:])
switch {
case unicode.IsSpace(r):
if tok.Offset >= 0 {
toks = append(toks, tok)
tok = newToken()
}
case unicode.IsPunct(r):
if tok.Offset >= 0 {
toks = append(toks, tok)
tok = newToken()
}
toks = append(toks, &token{
Text: string(r),
Offset: i,
})
default:
if tok.Offset == -1 {
tok.Offset = i
}
tok.Text += string(r)
}
i += size
}
if tok.Offset != -1 {
// Add any remaining token that wasn't yet included in the list.
toks = append(toks, tok)
}
return toks
}
// GenerateHashes generates hashes for "size" length substrings. The
// "stringifyTokens" call takes a long time to run, so not all substrings have
// hashes, i.e. we skip some of the smaller substrings.
func (t Tokens) GenerateHashes(h Hash, size int) ([]uint32, TokenRanges) {
if size == 0 {
return nil, nil
}
var css []uint32
var tr TokenRanges
for offset := 0; offset+size <= len(t); offset += size / 2 {
var b bytes.Buffer
t.stringifyTokens(&b, offset, size)
cs := crc32.ChecksumIEEE(b.Bytes())
css = append(css, cs)
tr = append(tr, &TokenRange{offset, offset + size})
h.add(cs, offset, offset+size)
if size <= 1 {
break
}
}
return css, tr
}
// stringifyTokens serializes a sublist of tokens into a bytes buffer.
func (t Tokens) stringifyTokens(b *bytes.Buffer, offset, size int) {
for j := offset; j < offset+size; j++ {
if j != offset {
b.WriteRune(' ')
}
b.WriteString(t[j].Text)
}
}
// TokenRange indicates the range of tokens that map to a particular checksum.
type TokenRange struct {
Start int
End int
}
func (t *TokenRange) String() string {
return fmt.Sprintf("[%v, %v)", t.Start, t.End)
}
// TokenRanges is a list of TokenRange objects. The chance that two different
// strings map to the same checksum is very small, but unfortunately isn't
// zero, so we use this instead of making the assumption that they will all be
// unique.
type TokenRanges []*TokenRange
func (t TokenRanges) Len() int { return len(t) }
func (t TokenRanges) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t TokenRanges) Less(i, j int) bool { return t[i].Start < t[j].Start }
// CombineUnique returns the combination of both token ranges with no duplicates.
func (t TokenRanges) CombineUnique(other TokenRanges) TokenRanges {
if len(other) == 0 {
return t
}
if len(t) == 0 {
return other
}
cu := append(t, other...)
sort.Sort(cu)
if len(cu) == 0 {
return nil
}
res := TokenRanges{cu[0]}
for prev, i := cu[0], 1; i < len(cu); i++ {
if prev.Start != cu[i].Start || prev.End != cu[i].End {
res = append(res, cu[i])
prev = cu[i]
}
}
return res
}
// Hash is a map of the hashes of a section of text to the token range covering that text.
type Hash map[uint32]TokenRanges
// add associates a token range, [start, end], to a checksum.
func (h Hash) add(checksum uint32, start, end int) {
ntr := &TokenRange{Start: start, End: end}
if r, ok := h[checksum]; ok {
for _, tr := range r {
if tr.Start == ntr.Start && tr.End == ntr.End {
// The token range already exists at this
// checksum. No need to re-add it.
return
}
}
}
h[checksum] = append(h[checksum], ntr)
}

202
vendor/github.com/knative/test-infra/LICENSE generated vendored Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -9,21 +9,35 @@ This is a helper script to run the presubmit tests. To use it:
1. Source this script. 1. Source this script.
1. Define the functions `build_tests()` and `unit_tests()`. They should run all 1. [optional] Define the function `build_tests()`. If you don't define this
tests (i.e., not fail fast), and return 0 if all passed, 1 if a failure function, the default action for running the build tests is to:
occurred. The environment variables `RUN_BUILD_TESTS`, `RUN_UNIT_TESTS` and - lint and link check markdown files
`RUN_INTEGRATION_TESTS` are set to 0 (false) or 1 (true) accordingly. If - run `go build` on the entire repo
`--emit-metrics` is passed, `EMIT_METRICS` will be set to 1. - run `/hack/verify-codegen.sh` (if it exists)
- check licenses in `/cmd` (if it exists)
1. [optional] Define the function `integration_tests()`, just like the previous 1. [optional] Define the functions `pre_build_tests()` and/or
ones. If you don't define this function, the default action for running the `post_build_tests()`. These functions will be called before or after the
integration tests is to call the `./test/e2e-tests.sh` script (passing the build tests (either your custom one or the default action) and will cause
`--emit-metrics` flag if necessary). the test to fail if they don't return success.
1. [optional] Define the functions `pre_integration_tests()` or 1. [optional] Define the function `unit_tests()`. If you don't define this
`post_integration_tests()`. These functions will be called before or after the function, the default action for running the unit tests is to run all go tests
integration tests (either your custom one or the default action) and will cause in the repo.
the test to fail if they don't return success.
1. [optional] Define the functions `pre_unit_tests()` and/or
`post_unit_tests()`. These functions will be called before or after the
unit tests (either your custom one or the default action) and will cause
the test to fail if they don't return success.
1. [optional] Define the function `integration_tests()`. If you don't define
this function, the default action for running the integration tests is to run
all run all `./test/e2e-*tests.sh` scripts, in sequence.
1. [optional] Define the functions `pre_integration_tests()` and/or
`post_integration_tests()`. These functions will be called before or after the
integration tests (either your custom one or the default action) and will cause
the test to fail if they don't return success.
1. Call the `main()` function passing `$@` (without quotes). 1. Call the `main()` function passing `$@` (without quotes).
@ -33,20 +47,27 @@ integration tests).
Use the flags `--build-tests`, `--unit-tests` and `--integration-tests` to run Use the flags `--build-tests`, `--unit-tests` and `--integration-tests` to run
a specific set of tests. The flag `--emit-metrics` is used to emit metrics when a specific set of tests. The flag `--emit-metrics` is used to emit metrics when
running the tests, and is automatically handled by the default action (see running the tests, and is automatically handled by the default action for
above). integration tests (see above).
The script will automatically skip all presubmit tests for PRs where all changed
files are exempt of tests (e.g., a PR changing only the `OWNERS` file).
Also, for PRs touching only markdown files, the unit and integration tests are
skipped.
### Sample presubmit test script ### Sample presubmit test script
```bash ```bash
source vendor/github.com/knative/test-infra/scripts/presubmit-tests.sh source vendor/github.com/knative/test-infra/scripts/presubmit-tests.sh
function build_tests() { function post_build_tests() {
go build . echo "Cleaning up after build tests"
rm -fr ./build-cache
} }
function unit_tests() { function unit_tests() {
report_go_test . make -C tests test
} }
function pre_integration_tests() { function pre_integration_tests() {
@ -66,43 +87,44 @@ This is a helper script for Knative E2E test scripts. To use it:
1. Source the script. 1. Source the script.
1. [optional] Write the `teardown()` function, which will tear down your test 1. [optional] Write the `teardown()` function, which will tear down your test
resources. resources.
1. [optional] Write the `dump_extra_cluster_state()` function. It will be 1. [optional] Write the `dump_extra_cluster_state()` function. It will be
called when a test fails, and can dump extra information about the current state called when a test fails, and can dump extra information about the current state
of the cluster (tipically using `kubectl`). of the cluster (tipically using `kubectl`).
1. [optional] Write the `parse_flags()` function. It will be called whenever an 1. [optional] Write the `parse_flags()` function. It will be called whenever an
unrecognized flag is passed to the script, allowing you to define your own flags. unrecognized flag is passed to the script, allowing you to define your own flags.
The function must return 0 if the flag is unrecognized, or the number of items The function must return 0 if the flag is unrecognized, or the number of items
to skip in the command line if the flag was parsed successfully. For example, to skip in the command line if the flag was parsed successfully. For example,
return 1 for a simple flag, and 2 for a flag with a parameter. return 1 for a simple flag, and 2 for a flag with a parameter.
1. Call the `initialize()` function passing `$@` (without quotes). 1. Call the `initialize()` function passing `$@` (without quotes).
1. Write logic for the end-to-end tests. Run all go tests using `go_test_e2e()` 1. Write logic for the end-to-end tests. Run all go tests using `go_test_e2e()`
(or `report_go_test()` if you need a more fine-grained control) and call (or `report_go_test()` if you need a more fine-grained control) and call
`fail_test()` or `success()` if any of them failed. The environment variables `fail_test()` or `success()` if any of them failed. The environment variables
`DOCKER_REPO_OVERRIDE`, `K8S_CLUSTER_OVERRIDE` and `K8S_USER_OVERRIDE` will be set `DOCKER_REPO_OVERRIDE`, `K8S_CLUSTER_OVERRIDE` and `K8S_USER_OVERRIDE` will be
according to the test cluster. You can also use the following boolean (0 is false, set according to the test cluster. You can also use the following boolean (0 is
1 is true) environment variables for the logic: false, 1 is true) environment variables for the logic:
* `EMIT_METRICS`: true if `--emit-metrics` was passed.
* `USING_EXISTING_CLUSTER`: true if the test cluster is an already existing one,
and not a temporary cluster created by `kubetest`.
All environment variables above are marked read-only. - `EMIT_METRICS`: true if `--emit-metrics` was passed.
- `USING_EXISTING_CLUSTER`: true if the test cluster is an already existing one,
and not a temporary cluster created by `kubetest`.
All environment variables above are marked read-only.
**Notes:** **Notes:**
1. Calling your script without arguments will create a new cluster in the GCP 1. Calling your script without arguments will create a new cluster in the GCP
project `$PROJECT_ID` and run the tests against it. project `$PROJECT_ID` and run the tests against it.
1. Calling your script with `--run-tests` and the variables `K8S_CLUSTER_OVERRIDE`, 1. Calling your script with `--run-tests` and the variables `K8S_CLUSTER_OVERRIDE`,
`K8S_USER_OVERRIDE` and `DOCKER_REPO_OVERRIDE` set will immediately start the `K8S_USER_OVERRIDE` and `DOCKER_REPO_OVERRIDE` set will immediately start the
tests against the cluster. tests against the cluster.
1. You can force running the tests against a specific GKE cluster version by using 1. You can force running the tests against a specific GKE cluster version by using
the `--cluster-version` flag and passing a X.Y.Z version as the flag value. the `--cluster-version` flag and passing a X.Y.Z version as the flag value.
### Sample end-to-end test script ### Sample end-to-end test script
@ -150,31 +172,38 @@ This is a helper script for Knative release scripts. To use it:
1. Call the `initialize()` function passing `$@` (without quotes). 1. Call the `initialize()` function passing `$@` (without quotes).
1. Call the `run_validation_tests()` function passing the script or executable that 1. Call the `run_validation_tests()` function passing the script or executable that
runs the release validation tests. It will call the script to run the tests unless runs the release validation tests. It will call the script to run the tests unless
`--skip_tests` was passed. `--skip_tests` was passed.
1. Write logic for the release process. Call `publish_yaml()` to publish the manifest(s), 1. Write logic for the release process. Call `publish_yaml()` to publish the manifest(s),
`tag_releases_in_yaml()` to tag the generated images, `branch_release()` to branch `tag_releases_in_yaml()` to tag the generated images, `branch_release()` to branch
named releases. Use the following boolean (0 is false, 1 is true) and string environment named releases. Use the following boolean (0 is false, 1 is true) and string environment
variables for the logic: variables for the logic:
* `RELEASE_VERSION`: contains the release version if `--version` was passed. This
also overrides the value of the `TAG` variable as `v<version>`.
* `RELEASE_BRANCH`: contains the release branch if `--branch` was passed. Otherwise
it's empty and `master` HEAD will be considered the release branch.
* `RELEASE_NOTES`: contains the filename with the release notes if `--release-notes`
was passed. The release notes is a simple markdown file.
* `SKIP_TESTS`: true if `--skip-tests` was passed. This is handled automatically
by the run_validation_tests() function.
* `TAG_RELEASE`: true if `--tag-release` was passed. In this case, the environment
variable `TAG` will contain the release tag in the form `vYYYYMMDD-<commit_short_hash>`.
* `PUBLISH_RELEASE`: true if `--publish` was passed. In this case, the environment
variable `KO_FLAGS` will be updated with the `-L` option.
* `BRANCH_RELEASE`: true if both `--version` and `--publish-release` were passed.
All boolean environment variables default to false for safety. - `RELEASE_VERSION`: contains the release version if `--version` was passed. This
also overrides the value of the `TAG` variable as `v<version>`.
- `RELEASE_BRANCH`: contains the release branch if `--branch` was passed. Otherwise
it's empty and `master` HEAD will be considered the release branch.
- `RELEASE_NOTES`: contains the filename with the release notes if `--release-notes`
was passed. The release notes is a simple markdown file.
- `RELEASE_GCS_BUCKET`: contains the GCS bucket name to store the manifests if
`--release-gcs` was passed, otherwise the default value `knative-nightly/<repo>`
will be used. It is empty if `--publish` was not passed.
- `KO_DOCKER_REPO`: contains the GCR to store the images if `--release-gcr` was
passed, otherwise the default value `gcr.io/knative-nightly` will be used. It
is set to `ko.local` if `--publish` was not passed.
- `SKIP_TESTS`: true if `--skip-tests` was passed. This is handled automatically
by the `run_validation_tests()` function.
- `TAG_RELEASE`: true if `--tag-release` was passed. In this case, the environment
variable `TAG` will contain the release tag in the form `vYYYYMMDD-<commit_short_hash>`.
- `PUBLISH_RELEASE`: true if `--publish` was passed. In this case, the environment
variable `KO_FLAGS` will be updated with the `-L` option.
- `BRANCH_RELEASE`: true if both `--version` and `--publish-release` were passed.
All environment variables above, except `KO_FLAGS`, are marked read-only once All boolean environment variables default to false for safety.
`initialize()` is called.
All environment variables above, except `KO_FLAGS`, are marked read-only once
`initialize()` is called.
### Sample release script ### Sample release script
@ -186,14 +215,12 @@ initialize $@
run_validation_tests ./test/presubmit-tests.sh run_validation_tests ./test/presubmit-tests.sh
# config/ contains the manifests # config/ contains the manifests
KO_DOCKER_REPO=gcr.io/knative-foo
ko resolve ${KO_FLAGS} -f config/ > release.yaml ko resolve ${KO_FLAGS} -f config/ > release.yaml
tag_images_in_yaml release.yaml $KO_DOCKER_REPO $TAG tag_images_in_yaml release.yaml
if (( PUBLISH_RELEASE )); then if (( PUBLISH_RELEASE )); then
# gs://knative-foo hosts the manifest publish_yaml release.yaml
publish_yaml release.yaml knative-foo $TAG
fi fi
branch_release "Knative Foo" release.yaml branch_release "Knative Foo" release.yaml

View File

@ -36,14 +36,17 @@ function build_resource_name() {
} }
# Test cluster parameters # Test cluster parameters
readonly E2E_BASE_NAME=k$(basename ${REPO_ROOT_DIR}) readonly E2E_BASE_NAME="k${REPO_NAME}"
readonly E2E_CLUSTER_NAME=$(build_resource_name e2e-cls) readonly E2E_CLUSTER_NAME=$(build_resource_name e2e-cls)
readonly E2E_NETWORK_NAME=$(build_resource_name e2e-net) readonly E2E_NETWORK_NAME=$(build_resource_name e2e-net)
readonly E2E_CLUSTER_REGION=us-central1 readonly E2E_CLUSTER_REGION=us-central1
readonly E2E_CLUSTER_ZONE=${E2E_CLUSTER_REGION}-a
readonly E2E_CLUSTER_NODES=3
readonly E2E_CLUSTER_MACHINE=n1-standard-4 readonly E2E_CLUSTER_MACHINE=n1-standard-4
readonly TEST_RESULT_FILE=/tmp/${E2E_BASE_NAME}-e2e-result readonly TEST_RESULT_FILE=/tmp/${E2E_BASE_NAME}-e2e-result
# Each knative repository may have a different cluster size requirement here,
# so we allow calling code to set these parameters. If they are not set we
# use some sane defaults.
readonly E2E_MIN_CLUSTER_NODES=${E2E_MIN_CLUSTER_NODES:-1}
readonly E2E_MAX_CLUSTER_NODES=${E2E_MAX_CLUSTER_NODES:-3}
# Flag whether test is using a boskos GCP project # Flag whether test is using a boskos GCP project
IS_BOSKOS=0 IS_BOSKOS=0
@ -60,14 +63,6 @@ function teardown_test_resources() {
rm -fr kubernetes kubernetes.tar.gz rm -fr kubernetes kubernetes.tar.gz
} }
# Exit test, dumping current state info.
# Parameters: $1 - error message (optional).
function fail_test() {
[[ -n $1 ]] && echo "ERROR: $1"
dump_cluster_state
exit 1
}
# Run the given E2E tests. Assume tests are tagged e2e, unless `-tags=XXX` is passed. # Run the given E2E tests. Assume tests are tagged e2e, unless `-tags=XXX` is passed.
# Parameters: $1..$n - any go test flags, then directories containing the tests to run. # Parameters: $1..$n - any go test flags, then directories containing the tests to run.
function go_test_e2e() { function go_test_e2e() {
@ -149,20 +144,21 @@ function create_test_cluster() {
set -o pipefail set -o pipefail
header "Creating test cluster" header "Creating test cluster"
echo "Cluster will have a minimum of ${E2E_MIN_CLUSTER_NODES} and a maximum of ${E2E_MAX_CLUSTER_NODES} nodes."
# Smallest cluster required to run the end-to-end-tests # Smallest cluster required to run the end-to-end-tests
local CLUSTER_CREATION_ARGS=( local CLUSTER_CREATION_ARGS=(
--gke-create-args="--enable-autoscaling --min-nodes=1 --max-nodes=${E2E_CLUSTER_NODES} --scopes=cloud-platform --enable-basic-auth --no-issue-client-certificate" --gke-create-args="--enable-autoscaling --min-nodes=${E2E_MIN_CLUSTER_NODES} --max-nodes=${E2E_MAX_CLUSTER_NODES} --scopes=cloud-platform --enable-basic-auth --no-issue-client-certificate"
--gke-shape={\"default\":{\"Nodes\":${E2E_CLUSTER_NODES}\,\"MachineType\":\"${E2E_CLUSTER_MACHINE}\"}} --gke-shape={\"default\":{\"Nodes\":${E2E_MIN_CLUSTER_NODES}\,\"MachineType\":\"${E2E_CLUSTER_MACHINE}\"}}
--provider=gke --provider=gke
--deployment=gke --deployment=gke
--cluster="${E2E_CLUSTER_NAME}" --cluster="${E2E_CLUSTER_NAME}"
--gcp-zone="${E2E_CLUSTER_ZONE}" --gcp-region="${E2E_CLUSTER_REGION}"
--gcp-network="${E2E_NETWORK_NAME}" --gcp-network="${E2E_NETWORK_NAME}"
--gke-environment=prod --gke-environment=prod
) )
if (( IS_BOSKOS )); then if (( ! IS_BOSKOS )); then
CLUSTER_CREATION_ARGS+=(--gcp-service-account=/etc/service-account/service-account.json)
else
CLUSTER_CREATION_ARGS+=(--gcp-project=${GCP_PROJECT}) CLUSTER_CREATION_ARGS+=(--gcp-project=${GCP_PROJECT})
fi fi
# SSH keys are not used, but kubetest checks for their existence. # SSH keys are not used, but kubetest checks for their existence.
@ -175,8 +171,8 @@ function create_test_cluster() {
# be a writeable docker repo. # be a writeable docker repo.
export K8S_USER_OVERRIDE= export K8S_USER_OVERRIDE=
export K8S_CLUSTER_OVERRIDE= export K8S_CLUSTER_OVERRIDE=
# Assume test failed (see more details at the end of this script). # Assume test failed (see details in set_test_return_code()).
echo -n "1"> ${TEST_RESULT_FILE} set_test_return_code 1
local test_cmd_args="--run-tests" local test_cmd_args="--run-tests"
(( EMIT_METRICS )) && test_cmd_args+=" --emit-metrics" (( EMIT_METRICS )) && test_cmd_args+=" --emit-metrics"
[[ -n "${GCP_PROJECT}" ]] && test_cmd_args+=" --gcp-project ${GCP_PROJECT}" [[ -n "${GCP_PROJECT}" ]] && test_cmd_args+=" --gcp-project ${GCP_PROJECT}"
@ -241,7 +237,7 @@ function setup_test_cluster() {
if [[ -z ${K8S_CLUSTER_OVERRIDE} ]]; then if [[ -z ${K8S_CLUSTER_OVERRIDE} ]]; then
USING_EXISTING_CLUSTER=0 USING_EXISTING_CLUSTER=0
export K8S_CLUSTER_OVERRIDE=$(kubectl config current-context) export K8S_CLUSTER_OVERRIDE=$(kubectl config current-context)
acquire_cluster_admin_role ${K8S_USER_OVERRIDE} ${E2E_CLUSTER_NAME} ${E2E_CLUSTER_ZONE} acquire_cluster_admin_role ${K8S_USER_OVERRIDE} ${E2E_CLUSTER_NAME} ${E2E_CLUSTER_REGION}
# Make sure we're in the default namespace. Currently kubetest switches to # Make sure we're in the default namespace. Currently kubetest switches to
# test-pods namespace when creating the cluster. # test-pods namespace when creating the cluster.
kubectl config set-context $K8S_CLUSTER_OVERRIDE --namespace=default kubectl config set-context $K8S_CLUSTER_OVERRIDE --namespace=default
@ -257,6 +253,7 @@ function setup_test_cluster() {
echo "- Docker is ${DOCKER_REPO_OVERRIDE}" echo "- Docker is ${DOCKER_REPO_OVERRIDE}"
export KO_DOCKER_REPO="${DOCKER_REPO_OVERRIDE}" export KO_DOCKER_REPO="${DOCKER_REPO_OVERRIDE}"
export KO_DATA_PATH="${REPO_ROOT_DIR}/.git"
trap teardown_test_resources EXIT trap teardown_test_resources EXIT
@ -274,19 +271,34 @@ function setup_test_cluster() {
set +o pipefail set +o pipefail
} }
function success() { # Set the return code that the test script will return.
# Parameters: $1 - return code (0-255)
function set_test_return_code() {
# kubetest teardown might fail and thus incorrectly report failure of the # kubetest teardown might fail and thus incorrectly report failure of the
# script, even if the tests pass. # script, even if the tests pass.
# We store the real test result to return it later, ignoring any teardown # We store the real test result to return it later, ignoring any teardown
# failure in kubetest. # failure in kubetest.
# TODO(adrcunha): Get rid of this workaround. # TODO(adrcunha): Get rid of this workaround.
echo -n "0"> ${TEST_RESULT_FILE} echo -n "$1"> ${TEST_RESULT_FILE}
}
function success() {
set_test_return_code 0
echo "**************************************" echo "**************************************"
echo "*** E2E TESTS PASSED ***" echo "*** E2E TESTS PASSED ***"
echo "**************************************" echo "**************************************"
exit 0 exit 0
} }
# Exit test, dumping current state info.
# Parameters: $1 - error message (optional).
function fail_test() {
set_test_return_code 1
[[ -n $1 ]] && echo "ERROR: $1"
dump_cluster_state
exit 1
}
RUN_TESTS=0 RUN_TESTS=0
EMIT_METRICS=0 EMIT_METRICS=0
USING_EXISTING_CLUSTER=1 USING_EXISTING_CLUSTER=1
@ -294,11 +306,6 @@ GCP_PROJECT=""
E2E_SCRIPT="" E2E_SCRIPT=""
E2E_CLUSTER_VERSION="" E2E_CLUSTER_VERSION=""
function abort() {
echo "error: $@"
exit 1
}
# Parse flags and initialize the test cluster. # Parse flags and initialize the test cluster.
function initialize() { function initialize() {
# Normalize calling script path; we can't use readlink because it's not available everywhere # Normalize calling script path; we can't use readlink because it's not available everywhere
@ -357,10 +364,8 @@ function initialize() {
(( IS_PROW )) && [[ -z "${GCP_PROJECT}" ]] && IS_BOSKOS=1 (( IS_PROW )) && [[ -z "${GCP_PROJECT}" ]] && IS_BOSKOS=1
# Safety checks # Safety checks
is_protected_gcr ${DOCKER_REPO_OVERRIDE} && \
if [[ "${DOCKER_REPO_OVERRIDE}" =~ ^gcr.io/knative-(releases|nightly)/?$ ]]; then abort "\$DOCKER_REPO_OVERRIDE set to ${DOCKER_REPO_OVERRIDE}, which is forbidden"
abort "\$DOCKER_REPO_OVERRIDE is set to ${DOCKER_REPO_OVERRIDE}, which is forbidden"
fi
readonly RUN_TESTS readonly RUN_TESTS
readonly EMIT_METRICS readonly EMIT_METRICS

View File

@ -23,11 +23,12 @@ readonly SERVING_GKE_VERSION=latest
readonly SERVING_GKE_IMAGE=cos readonly SERVING_GKE_IMAGE=cos
# Public latest stable nightly images and yaml files. # Public latest stable nightly images and yaml files.
readonly KNATIVE_ISTIO_CRD_YAML=https://storage.googleapis.com/knative-nightly/serving/latest/istio-crds.yaml readonly KNATIVE_BASE_YAML_SOURCE=https://storage.googleapis.com/knative-nightly/@/latest
readonly KNATIVE_ISTIO_YAML=https://storage.googleapis.com/knative-nightly/serving/latest/istio.yaml readonly KNATIVE_ISTIO_CRD_YAML=${KNATIVE_BASE_YAML_SOURCE/@/serving}/istio-crds.yaml
readonly KNATIVE_SERVING_RELEASE=https://storage.googleapis.com/knative-nightly/serving/latest/release.yaml readonly KNATIVE_ISTIO_YAML=${KNATIVE_BASE_YAML_SOURCE/@/serving}/istio.yaml
readonly KNATIVE_BUILD_RELEASE=https://storage.googleapis.com/knative-nightly/build/latest/release.yaml readonly KNATIVE_SERVING_RELEASE=${KNATIVE_BASE_YAML_SOURCE/@/serving}/serving.yaml
readonly KNATIVE_EVENTING_RELEASE=https://storage.googleapis.com/knative-nightly/eventing/latest/release.yaml readonly KNATIVE_BUILD_RELEASE=${KNATIVE_BASE_YAML_SOURCE/@/build}/release.yaml
readonly KNATIVE_EVENTING_RELEASE=${KNATIVE_BASE_YAML_SOURCE/@/eventing}/release.yaml
# Conveniently set GOPATH if unset # Conveniently set GOPATH if unset
if [[ -z "${GOPATH:-}" ]]; then if [[ -z "${GOPATH:-}" ]]; then
@ -41,13 +42,24 @@ fi
[[ -n "${PROW_JOB_ID:-}" ]] && IS_PROW=1 || IS_PROW=0 [[ -n "${PROW_JOB_ID:-}" ]] && IS_PROW=1 || IS_PROW=0
readonly IS_PROW readonly IS_PROW
readonly REPO_ROOT_DIR="$(git rev-parse --show-toplevel)" readonly REPO_ROOT_DIR="$(git rev-parse --show-toplevel)"
readonly REPO_NAME="$(basename ${REPO_ROOT_DIR})"
# On a Prow job, redirect stderr to stdout so it's synchronously added to log
(( IS_PROW )) && exec 2>&1
# Print error message and exit 1
# Parameters: $1..$n - error message to be displayed
function abort() {
echo "error: $@"
exit 1
}
# Display a box banner. # Display a box banner.
# Parameters: $1 - character to use for the box. # Parameters: $1 - character to use for the box.
# $2 - banner message. # $2 - banner message.
function make_banner() { function make_banner() {
local msg="$1$1$1$1 $2 $1$1$1$1" local msg="$1$1$1$1 $2 $1$1$1$1"
local border="${msg//[-0-9A-Za-z _.,\/]/$1}" local border="${msg//[-0-9A-Za-z _.,\/()]/$1}"
echo -e "${border}\n${msg}\n${border}" echo -e "${border}\n${msg}\n${border}"
} }
@ -72,20 +84,6 @@ function function_exists() {
[[ "$(type -t $1)" == "function" ]] [[ "$(type -t $1)" == "function" ]]
} }
# Remove ALL images in the given GCR repository.
# Parameters: $1 - GCR repository.
function delete_gcr_images() {
for image in $(gcloud --format='value(name)' container images list --repository=$1); do
echo "Checking ${image} for removal"
delete_gcr_images ${image}
for digest in $(gcloud --format='get(digest)' container images list-tags ${image} --limit=99999); do
local full_image="${image}@${digest}"
echo "Removing ${full_image}"
gcloud container images delete -q --force-delete-tags ${full_image}
done
done
}
# Waits until the given object doesn't exist. # Waits until the given object doesn't exist.
# Parameters: $1 - the kind of the object. # Parameters: $1 - the kind of the object.
# $2 - object's name. # $2 - object's name.
@ -100,8 +98,8 @@ function wait_until_object_does_not_exist() {
fi fi
echo -n "Waiting until ${DESCRIPTION} does not exist" echo -n "Waiting until ${DESCRIPTION} does not exist"
for i in {1..150}; do # timeout after 5 minutes for i in {1..150}; do # timeout after 5 minutes
if kubectl ${KUBECTL_ARGS} > /dev/null 2>&1; then if ! kubectl ${KUBECTL_ARGS} > /dev/null 2>&1; then
echo "\n${DESCRIPTION} does not exist" echo -e "\n${DESCRIPTION} does not exist"
return 0 return 0
fi fi
echo -n "." echo -n "."
@ -140,7 +138,6 @@ function wait_until_pods_running() {
sleep 2 sleep 2
done done
echo -e "\n\nERROR: timeout waiting for pods to come up\n${pods}" echo -e "\n\nERROR: timeout waiting for pods to come up\n${pods}"
kubectl get pods -n $1
return 1 return 1
} }
@ -201,12 +198,12 @@ function get_app_pods() {
# Sets the given user as cluster admin. # Sets the given user as cluster admin.
# Parameters: $1 - user # Parameters: $1 - user
# $2 - cluster name # $2 - cluster name
# $3 - cluster zone # $3 - cluster region
function acquire_cluster_admin_role() { function acquire_cluster_admin_role() {
# Get the password of the admin and use it, as the service account (or the user) # Get the password of the admin and use it, as the service account (or the user)
# might not have the necessary permission. # might not have the necessary permission.
local password=$(gcloud --format="value(masterAuth.password)" \ local password=$(gcloud --format="value(masterAuth.password)" \
container clusters describe $2 --zone=$3) container clusters describe $2 --region=$3)
if [[ -n "${password}" ]]; then if [[ -n "${password}" ]]; then
# Cluster created with basic authentication # Cluster created with basic authentication
kubectl config set-credentials cluster-admin \ kubectl config set-credentials cluster-admin \
@ -216,9 +213,9 @@ function acquire_cluster_admin_role() {
local key=$(mktemp) local key=$(mktemp)
echo "Certificate in ${cert}, key in ${key}" echo "Certificate in ${cert}, key in ${key}"
gcloud --format="value(masterAuth.clientCertificate)" \ gcloud --format="value(masterAuth.clientCertificate)" \
container clusters describe $2 --zone=$3 | base64 -d > ${cert} container clusters describe $2 --region=$3 | base64 -d > ${cert}
gcloud --format="value(masterAuth.clientKey)" \ gcloud --format="value(masterAuth.clientKey)" \
container clusters describe $2 --zone=$3 | base64 -d > ${key} container clusters describe $2 --region=$3 | base64 -d > ${key}
kubectl config set-credentials cluster-admin \ kubectl config set-credentials cluster-admin \
--client-certificate=${cert} --client-key=${key} --client-certificate=${cert} --client-key=${key}
fi fi
@ -229,10 +226,10 @@ function acquire_cluster_admin_role() {
--user=$1 --user=$1
# Reset back to the default account # Reset back to the default account
gcloud container clusters get-credentials \ gcloud container clusters get-credentials \
$2 --zone=$3 --project $(gcloud config get-value project) $2 --region=$3 --project $(gcloud config get-value project)
} }
# Runs a go test and generate a junit summary through bazel. # Runs a go test and generate a junit summary.
# Parameters: $1... - parameters to go test # Parameters: $1... - parameters to go test
function report_go_test() { function report_go_test() {
# Run tests in verbose mode to capture details. # Run tests in verbose mode to capture details.
@ -246,102 +243,18 @@ function report_go_test() {
fi fi
echo "Running tests with '${go_test}'" echo "Running tests with '${go_test}'"
local report=$(mktemp) local report=$(mktemp)
local failed=0 ${go_test} | tee ${report}
local test_count=0 local failed=( ${PIPESTATUS[@]} )
local tests_failed=0 [[ ${failed[0]} -eq 0 ]] && failed=${failed[1]} || failed=${failed[0]}
${go_test} > ${report} || failed=$?
echo "Finished run, return code is ${failed}" echo "Finished run, return code is ${failed}"
# Tests didn't run. # Install go-junit-report if necessary.
[[ ! -s ${report} ]] && return 1 run_go_tool github.com/jstemmer/go-junit-report go-junit-report --help > /dev/null 2>&1
# Create WORKSPACE file, required to use bazel, if necessary. local xml=$(mktemp ${ARTIFACTS}/junit_XXXXXXXX.xml)
touch WORKSPACE cat ${report} \
local targets="" | go-junit-report \
local last_run="" | sed -e "s#\"github.com/knative/${REPO_NAME}/#\"#g" \
local test_files="" > ${xml}
# Parse the report and generate fake tests for each passing/failing test. echo "XML report written to ${xml}"
echo "Start parsing results, summary:"
while read line ; do
local fields=(`echo -n ${line}`)
local field0="${fields[0]}"
local field1="${fields[1]}"
local name="${fields[2]}"
# Deal with a SIGQUIT log entry (usually a test timeout).
# This is a fallback in case there's no kill signal log entry.
# SIGQUIT: quit
if [[ "${field0}" == "SIGQUIT:" ]]; then
name="${last_run}"
field1="FAIL:"
error="${fields[@]}"
fi
# Ignore subtests (those containing slashes)
if [[ -n "${name##*/*}" ]]; then
local error=""
# Deal with a kill signal log entry (usually a test timeout).
# *** Test killed with quit: ran too long (10m0s).
if [[ "${field0}" == "***" ]]; then
name="${last_run}"
field1="FAIL:"
error="${fields[@]:1}"
fi
# Deal with a fatal log entry, which has a different format:
# fatal TestFoo foo_test.go:275 Expected "foo" but got "bar"
if [[ "${field0}" == "fatal" ]]; then
name="${field1}"
field1="FAIL:"
error="${fields[@]:3}"
fi
# Keep track of the test currently running.
if [[ "${field1}" == "RUN" ]]; then
last_run="${name}"
fi
# Handle regular go test pass/fail entry for a test.
if [[ "${field1}" == "PASS:" || "${field1}" == "FAIL:" ]]; then
echo "- ${name} :${field1}"
test_count=$(( test_count + 1 ))
local src="${name}.sh"
echo "exit 0" > ${src}
if [[ "${field1}" == "FAIL:" ]]; then
tests_failed=$(( tests_failed + 1 ))
[[ -z "${error}" ]] && read error
echo "cat <<ERROR-EOF" > ${src}
echo "${error}" >> ${src}
echo "ERROR-EOF" >> ${src}
echo "exit 1" >> ${src}
fi
chmod +x ${src}
test_files="${test_files} ${src}"
# Populate BUILD.bazel
echo "sh_test(name=\"${name}\", srcs=[\"${src}\"])" >> BUILD.bazel
elif [[ "${field0}" == "FAIL" || "${field0}" == "ok" ]] && [[ -n "${field1}" ]]; then
echo "- ${field0} ${field1}"
# Create the package structure, move tests and BUILD file
local package=${field1/github.com\//}
local bazel_files="$(ls -1 ${test_files} BUILD.bazel 2> /dev/null)"
if [[ -n "${bazel_files}" ]]; then
mkdir -p ${package}
targets="${targets} //${package}/..."
mv ${bazel_files} ${package}
else
echo "*** INTERNAL ERROR: missing tests for ${package}, got [${bazel_files/$'\n'/, }]"
fi
test_files=""
fi
fi
done < ${report}
echo "Done parsing ${test_count} tests, ${tests_failed} tests failed"
# If any test failed, show the detailed report.
# Otherwise, we already shown the summary.
# Exception: when emitting metrics, dump the full report.
if (( failed )) || [[ "$@" == *" -emitmetrics"* ]]; then
if (( failed )); then
echo "There were ${tests_failed} test failures, full log:"
else
echo "Dumping full log as metrics were requested:"
fi
cat ${report}
fi
# Always generate the junit summary.
bazel test ${targets} > /dev/null 2>&1 || true
return ${failed} return ${failed}
} }
@ -349,11 +262,16 @@ function report_go_test() {
function start_latest_knative_serving() { function start_latest_knative_serving() {
header "Starting Knative Serving" header "Starting Knative Serving"
subheader "Installing Istio" subheader "Installing Istio"
echo "Installing Istio CRD from ${KNATIVE_ISTIO_CRD_YAML}"
kubectl apply -f ${KNATIVE_ISTIO_CRD_YAML} || return 1 kubectl apply -f ${KNATIVE_ISTIO_CRD_YAML} || return 1
echo "Installing Istio from ${KNATIVE_ISTIO_YAML}"
kubectl apply -f ${KNATIVE_ISTIO_YAML} || return 1 kubectl apply -f ${KNATIVE_ISTIO_YAML} || return 1
wait_until_pods_running istio-system || return 1 wait_until_pods_running istio-system || return 1
kubectl label namespace default istio-injection=enabled || return 1 kubectl label namespace default istio-injection=enabled || return 1
subheader "Installing Knative Build"
kubectl apply -f ${KNATIVE_BUILD_RELEASE} || return 1
subheader "Installing Knative Serving" subheader "Installing Knative Serving"
echo "Installing Serving from ${KNATIVE_SERVING_RELEASE}"
kubectl apply -f ${KNATIVE_SERVING_RELEASE} || return 1 kubectl apply -f ${KNATIVE_SERVING_RELEASE} || return 1
wait_until_pods_running knative-serving || return 1 wait_until_pods_running knative-serving || return 1
wait_until_pods_running knative-build || return 1 wait_until_pods_running knative-build || return 1
@ -432,8 +350,22 @@ function check_links_in_markdown() {
} }
# Check format of the given markdown files. # Check format of the given markdown files.
# Parameters: $1...$n - files to inspect # Parameters: $1..$n - files to inspect
function lint_markdown() { function lint_markdown() {
# https://github.com/markdownlint/markdownlint # https://github.com/markdownlint/markdownlint
run_lint_tool mdl "linting markdown files" "-r ~MD013" $@ run_lint_tool mdl "linting markdown files" "-r ~MD013" $@
} }
# Return 0 if the given parameter is an integer, otherwise 1
# Parameters: $1 - an integer
function is_int() {
[[ -n $1 && $1 =~ ^[0-9]+$ ]]
}
# Return 0 if the given parameter is the knative release/nightly gcr, 1
# otherwise
# Parameters: $1 - gcr name, e.g. gcr.io/knative-nightly
function is_protected_gcr() {
[[ -n $1 && "$1" =~ "^gcr.io/knative-(releases|nightly)/?$" ]]
}

View File

@ -20,7 +20,199 @@
source $(dirname ${BASH_SOURCE})/library.sh source $(dirname ${BASH_SOURCE})/library.sh
# Extensions or file patterns that don't require presubmit tests. # Extensions or file patterns that don't require presubmit tests.
readonly NO_PRESUBMIT_FILES=(\.md \.png ^OWNERS ^OWNERS_ALIASES) readonly NO_PRESUBMIT_FILES=(\.png \.gitignore \.gitattributes ^OWNERS ^OWNERS_ALIASES ^AUTHORS)
# Flag if this is a presubmit run or not.
[[ IS_PROW && -n "${PULL_PULL_SHA}" ]] && IS_PRESUBMIT=1 || IS_PRESUBMIT=0
readonly IS_PRESUBMIT
# List of changed files on presubmit, LF separated.
CHANGED_FILES=""
# Flags that this PR is exempt of presubmit tests.
IS_PRESUBMIT_EXEMPT_PR=0
# Flags that this PR contains only changes to documentation.
IS_DOCUMENTATION_PR=0
# Returns true if PR only contains the given file regexes.
# Parameters: $1 - file regexes, space separated.
function pr_only_contains() {
[[ -z "$(echo "${CHANGED_FILES}" | grep -v \(${1// /\\|}\)$))" ]]
}
# List changed files in the current PR.
# This is implemented as a function so it can be mocked in unit tests.
function list_changed_files() {
/workspace/githubhelper -list-changed-files
}
# Initialize flags and context for presubmit tests:
# CHANGED_FILES, IS_PRESUBMIT_EXEMPT_PR and IS_DOCUMENTATION_PR.
function initialize_environment() {
CHANGED_FILES=""
IS_PRESUBMIT_EXEMPT_PR=0
IS_DOCUMENTATION_PR=0
(( ! IS_PRESUBMIT )) && return
CHANGED_FILES="$(list_changed_files)"
if [[ -n "${CHANGED_FILES}" ]]; then
echo -e "Changed files in commit ${PULL_PULL_SHA}:\n${CHANGED_FILES}"
local no_presubmit_files="${NO_PRESUBMIT_FILES[*]}"
pr_only_contains "${no_presubmit_files}" && IS_PRESUBMIT_EXEMPT_PR=1
pr_only_contains "\.md ${no_presubmit_files}" && IS_DOCUMENTATION_PR=1
else
header "NO CHANGED FILES REPORTED, ASSUMING IT'S AN ERROR AND RUNNING TESTS ANYWAY"
fi
readonly CHANGED_FILES
readonly IS_DOCUMENTATION_PR
readonly IS_PRESUBMIT_EXEMPT_PR
}
# Display a pass/fail banner for a test group.
# Parameters: $1 - test group name (e.g., build)
# $2 - result (0=passed, 1=failed)
function results_banner() {
local result
[[ $2 -eq 0 ]] && result="PASSED" || result="FAILED"
header "$1 tests ${result}"
}
# Run build tests. If there's no `build_tests` function, run the default
# build test runner.
function run_build_tests() {
(( ! RUN_BUILD_TESTS )) && return 0
header "Running build tests"
local failed=0
# Run pre-build tests, if any
if function_exists pre_build_tests; then
pre_build_tests || failed=1
fi
# Don't run build tests if pre-build tests failed
if (( ! failed )); then
if function_exists build_tests; then
build_tests || failed=1
else
default_build_test_runner || failed=1
fi
fi
# Don't run post-build tests if pre/build tests failed
if function_exists post_build_tests; then
post_build_tests || failed=1
fi
results_banner "Build" ${failed}
return ${failed}
}
# Default build test runner that:
# * lint and link check markdown files
# * `go build` on the entire repo
# * run `/hack/verify-codegen.sh` (if it exists)
# * check licenses in `/cmd` (if it exists)
function default_build_test_runner() {
local failed=0
# Ignore markdown files in /vendor
local mdfiles="$(echo "${CHANGED_FILES}" | grep \.md$ | grep -v ^vendor/)"
if [[ -n "${mdfiles}" ]]; then
subheader "Linting the markdown files"
lint_markdown ${mdfiles} || failed=1
subheader "Checking links in the markdown files"
check_links_in_markdown ${mdfiles} || failed=1
fi
# For documentation PRs, just check the md files
(( IS_DOCUMENTATION_PR )) && return ${failed}
# Ensure all the code builds
subheader "Checking that go code builds"
go build -v ./... || failed=1
# Get all build tags in go code (ignore /vendor)
local tags="$(grep -r '// +build' . \
| grep -v '^./vendor/' | cut -f3 -d' ' | sort | uniq | tr '\n' ' ')"
if [[ -n "${tags}" ]]; then
go test -run=^$ -tags="${tags}" ./... || failed=1
fi
if [[ -f ./hack/verify-codegen.sh ]]; then
subheader "Checking autogenerated code is up-to-date"
./hack/verify-codegen.sh || failed=1
fi
# Check that we don't have any forbidden licenses in our images.
if [[ -d ./cmd ]]; then
subheader "Checking for forbidden licenses"
check_licenses ./cmd/* || failed=1
fi
return ${failed}
}
# Run unit tests. If there's no `unit_tests` function, run the default
# unit test runner.
function run_unit_tests() {
(( ! RUN_UNIT_TESTS )) && return 0
header "Running unit tests"
local failed=0
# Run pre-unit tests, if any
if function_exists pre_unit_tests; then
pre_unit_tests || failed=1
fi
# Don't run unit tests if pre-unit tests failed
if (( ! failed )); then
if function_exists unit_tests; then
unit_tests || failed=1
else
default_unit_test_runner || failed=1
fi
fi
# Don't run post-unit tests if pre/unit tests failed
if function_exists post_unit_tests; then
post_unit_tests || failed=1
fi
results_banner "Unit" ${failed}
return ${failed}
}
# Default unit test runner that runs all go tests in the repo.
function default_unit_test_runner() {
report_go_test ./...
}
# Run integration tests. If there's no `integration_tests` function, run the
# default integration test runner.
function run_integration_tests() {
# Don't run integration tests if not requested OR on documentation PRs
(( ! RUN_INTEGRATION_TESTS )) && return 0
(( IS_DOCUMENTATION_PR )) && return 0
header "Running integration tests"
local failed=0
# Run pre-integration tests, if any
if function_exists pre_integration_tests; then
pre_integration_tests || failed=1
fi
# Don't run integration tests if pre-integration tests failed
if (( ! failed )); then
if function_exists integration_tests; then
integration_tests || failed=1
else
default_integration_test_runner || failed=1
fi
fi
# Don't run integration tests if pre/integration tests failed
if (( ! failed )) && function_exists post_integration_tests; then
post_integration_tests || failed=1
fi
results_banner "Integration" ${failed}
return ${failed}
}
# Default integration test runner that runs all `test/e2e-*tests.sh`.
function default_integration_test_runner() {
local options=""
local failed=0
(( EMIT_METRICS )) && options="--emit-metrics"
for e2e_test in $(find test/ -name e2e-*tests.sh); do
echo "Running integration test ${e2e_test}"
if ! ${e2e_test} ${options}; then
failed=1
fi
done
return ${failed}
}
# Options set by command-line flags. # Options set by command-line flags.
RUN_BUILD_TESTS=0 RUN_BUILD_TESTS=0
@ -28,34 +220,13 @@ RUN_UNIT_TESTS=0
RUN_INTEGRATION_TESTS=0 RUN_INTEGRATION_TESTS=0
EMIT_METRICS=0 EMIT_METRICS=0
# Exit presubmit tests if only documentation files were changed.
function exit_if_presubmit_not_required() {
if [[ -n "${PULL_PULL_SHA}" ]]; then
# On a presubmit job
local changes="$(/workspace/githubhelper -list-changed-files)"
if [[ -z "${changes}" ]]; then
header "NO CHANGED FILES REPORTED, ASSUMING IT'S AN ERROR AND RUNNING TESTS ANYWAY"
return
fi
local no_presubmit_pattern="${NO_PRESUBMIT_FILES[*]}"
local no_presubmit_pattern="\(${no_presubmit_pattern// /\\|}\)$"
echo -e "Changed files in commit ${PULL_PULL_SHA}:\n${changes}"
if [[ -z "$(echo "${changes}" | grep -v ${no_presubmit_pattern})" ]]; then
# Nothing changed other than files that don't require presubmit tests
header "Commit only contains changes that don't affect tests, skipping"
exit 0
fi
fi
}
function abort() {
echo "error: $@"
exit 1
}
# Process flags and run tests accordingly. # Process flags and run tests accordingly.
function main() { function main() {
exit_if_presubmit_not_required initialize_environment
if (( IS_PRESUBMIT_EXEMPT_PR )) && (( ! IS_DOCUMENTATION_PR )); then
header "Commit only contains changes that don't require tests, skipping"
exit 0
fi
# Show the version of the tools we're using # Show the version of the tools we're using
if (( IS_PROW )); then if (( IS_PROW )); then
@ -70,6 +241,12 @@ function main() {
go version go version
echo ">> git version" echo ">> git version"
git version git version
echo ">> bazel version"
bazel version 2> /dev/null
if [[ "${DOCKER_IN_DOCKER_ENABLED}" == "true" ]]; then
echo ">> docker version"
docker version
fi
fi fi
[[ -z $1 ]] && set -- "--all-tests" [[ -z $1 ]] && set -- "--all-tests"
@ -117,45 +294,9 @@ function main() {
${TEST_TO_RUN} || failed=1 ${TEST_TO_RUN} || failed=1
fi fi
if (( RUN_BUILD_TESTS )); then run_build_tests || failed=1
build_tests || failed=1 run_unit_tests || failed=1
fi run_integration_tests || failed=1
if (( RUN_UNIT_TESTS )); then
unit_tests || failed=1
fi
if (( RUN_INTEGRATION_TESTS )); then
local e2e_failed=0
# Run pre-integration tests, if any
if function_exists pre_integration_tests; then
if ! pre_integration_tests; then
failed=1
e2e_failed=1
fi
fi
# Don't run integration tests if pre-integration tests failed
if (( ! e2e_failed )); then
if function_exists integration_tests; then
if ! integration_tests; then
failed=1
e2e_failed=1
fi
else
local options=""
(( EMIT_METRICS )) && options="--emit-metrics"
for e2e_test in ./test/e2e-*tests.sh; do
echo "Running integration test ${e2e_test}"
if ! ${e2e_test} ${options}; then
failed=1
e2e_failed=1
fi
done
fi
fi
# Don't run post-integration
if (( ! e2e_failed )) && function_exists post_integration_tests; then
post_integration_tests || failed=1
fi
fi
exit ${failed} exit ${failed}
} }

View File

@ -19,33 +19,46 @@
source $(dirname ${BASH_SOURCE})/library.sh source $(dirname ${BASH_SOURCE})/library.sh
# GitHub upstream.
readonly KNATIVE_UPSTREAM="https://github.com/knative/${REPO_NAME}"
# Simple banner for logging purposes. # Simple banner for logging purposes.
# Parameters: $1 - message to display. # Parameters: $1 - message to display.
function banner() { function banner() {
make_banner "@" "$1" make_banner "@" "$1"
} }
# Tag images in the yaml file with a tag. If not tag is passed, does nothing. # Tag images in the yaml file if $TAG is not empty.
# $KO_DOCKER_REPO is the registry containing the images to tag with $TAG.
# Parameters: $1 - yaml file to parse for images. # Parameters: $1 - yaml file to parse for images.
# $2 - registry where the images are stored.
# $3 - tag to apply (optional).
function tag_images_in_yaml() { function tag_images_in_yaml() {
[[ -z $3 ]] && return 0 [[ -z ${TAG} ]] && return 0
local src_dir="${GOPATH}/src/" local SRC_DIR="${GOPATH}/src/"
local BASE_PATH="${REPO_ROOT_DIR/$src_dir}" local DOCKER_BASE="${KO_DOCKER_REPO}/${REPO_ROOT_DIR/$SRC_DIR}"
echo "Tagging images under '${BASE_PATH}' with $3" echo "Tagging images under '${DOCKER_BASE}' with ${TAG}"
for image in $(grep -o "$2/${BASE_PATH}/[a-z\./-]\+@sha256:[0-9a-f]\+" $1); do for image in $(grep -o "${DOCKER_BASE}/[a-z\./-]\+@sha256:[0-9a-f]\+" $1); do
gcloud -q container images add-tag ${image} ${image%%@*}:$3 gcloud -q container images add-tag ${image} ${image%%@*}:${TAG}
# Georeplicate to {us,eu,asia}.gcr.io
gcloud -q container images add-tag ${image} us.${image%%@*}:${TAG}
gcloud -q container images add-tag ${image} eu.${image%%@*}:${TAG}
gcloud -q container images add-tag ${image} asia.${image%%@*}:${TAG}
done done
} }
# Copy the given yaml file to a GCS bucket. Image is tagged :latest, and optionally :$2. # Copy the given yaml file to the $RELEASE_GCS_BUCKET bucket's "latest" directory.
# If $TAG is not empty, also copy it to $RELEASE_GCS_BUCKET bucket's "previous" directory.
# Parameters: $1 - yaml file to copy. # Parameters: $1 - yaml file to copy.
# $2 - destination bucket name.
# $3 - tag to apply (optional).
function publish_yaml() { function publish_yaml() {
gsutil cp $1 gs://$2/latest/ function verbose_gsutil_cp {
[[ -n $3 ]] && gsutil cp $1 gs://$2/previous/$3/ || true local DEST="gs://${RELEASE_GCS_BUCKET}/$2/"
echo "Publishing $1 to ${DEST}"
gsutil cp $1 ${DEST}
}
verbose_gsutil_cp $1 latest
if [[ -n ${TAG} ]]; then
verbose_gsutil_cp $1 previous/${TAG}
fi
} }
# These are global environment variables. # These are global environment variables.
@ -57,11 +70,98 @@ TAG=""
RELEASE_VERSION="" RELEASE_VERSION=""
RELEASE_NOTES="" RELEASE_NOTES=""
RELEASE_BRANCH="" RELEASE_BRANCH=""
RELEASE_GCS_BUCKET=""
KO_FLAGS="" KO_FLAGS=""
export KO_DOCKER_REPO=""
export GITHUB_TOKEN=""
function abort() { # Convenience function to run the hub tool.
echo "error: $@" # Parameters: $1..$n - arguments to hub.
exit 1 function hub_tool() {
run_go_tool github.com/github/hub hub $@
}
# Return the master version of a release.
# For example, "v0.2.1" returns "0.2"
# Parameters: $1 - release version label.
function master_version() {
local release="${1//v/}"
local tokens=(${release//\./ })
echo "${tokens[0]}.${tokens[1]}"
}
# Return the release build number of a release.
# For example, "v0.2.1" returns "1".
# Parameters: $1 - release version label.
function release_build_number() {
local tokens=(${1//\./ })
echo "${tokens[2]}"
}
# Setup the repository upstream, if not set.
function setup_upstream() {
# hub and checkout need the upstream URL to be set
# TODO(adrcunha): Use "git remote get-url" once available on Prow.
local upstream="$(git config --get remote.upstream.url)"
echo "Remote upstream URL is '${upstream}'"
if [[ -z "${upstream}" ]]; then
echo "Setting remote upstream URL to '${KNATIVE_UPSTREAM}'"
git remote add upstream ${KNATIVE_UPSTREAM}
fi
}
# Fetch the release branch, so we can check it out.
function setup_branch() {
[[ -z "${RELEASE_BRANCH}" ]] && return
git fetch ${KNATIVE_UPSTREAM} ${RELEASE_BRANCH}:upstream/${RELEASE_BRANCH}
}
# Setup version, branch and release notes for a "dot" release.
function prepare_dot_release() {
echo "Dot release requested"
TAG_RELEASE=1
PUBLISH_RELEASE=1
# List latest release
local releases # don't combine with the line below, or $? will be 0
releases="$(hub_tool release)"
[[ $? -eq 0 ]] || abort "cannot list releases"
# If --release-branch passed, restrict to that release
if [[ -n "${RELEASE_BRANCH}" ]]; then
local version_filter="v${RELEASE_BRANCH##release-}"
echo "Dot release will be generated for ${version_filter}"
releases="$(echo "${releases}" | grep ^${version_filter})"
fi
local last_version="$(echo "${releases}" | grep '^v[0-9]\+\.[0-9]\+\.[0-9]\+$' | sort -r | head -1)"
[[ -n "${last_version}" ]] || abort "no previous release exist"
if [[ -z "${RELEASE_BRANCH}" ]]; then
echo "Last release is ${last_version}"
# Determine branch
local major_minor_version="$(master_version ${last_version})"
RELEASE_BRANCH="release-${major_minor_version}"
echo "Last release branch is ${RELEASE_BRANCH}"
fi
# Ensure there are new commits in the branch, otherwise we don't create a new release
setup_branch
local last_release_commit="$(git rev-list -n 1 ${last_version})"
local release_branch_commit="$(git rev-list -n 1 upstream/${RELEASE_BRANCH})"
[[ -n "${last_release_commit}" ]] || abort "cannot get last release commit"
[[ -n "${release_branch_commit}" ]] || abort "cannot get release branch last commit"
if [[ "${last_release_commit}" == "${release_branch_commit}" ]]; then
echo "*** Branch ${RELEASE_BRANCH} is at commit ${release_branch_commit}"
echo "*** Branch ${RELEASE_BRANCH} has no new cherry-picks since release ${last_version}"
echo "*** No dot release will be generated, as no changes exist"
exit 0
fi
# Create new release version number
local last_build="$(release_build_number ${last_version})"
RELEASE_VERSION="${major_minor_version}.$(( last_build + 1 ))"
echo "Will create release ${RELEASE_VERSION} at commit ${release_branch_commit}"
# If --release-notes not used, copy from the latest release
if [[ -z "${RELEASE_NOTES}" ]]; then
RELEASE_NOTES="$(mktemp)"
hub_tool release show -f "%b" ${last_version} > ${RELEASE_NOTES}
echo "Release notes from ${last_version} copied to ${RELEASE_NOTES}"
fi
} }
# Parses flags and sets environment variables accordingly. # Parses flags and sets environment variables accordingly.
@ -71,42 +171,71 @@ function parse_flags() {
RELEASE_NOTES="" RELEASE_NOTES=""
RELEASE_BRANCH="" RELEASE_BRANCH=""
KO_FLAGS="-P" KO_FLAGS="-P"
KO_DOCKER_REPO="gcr.io/knative-nightly"
RELEASE_GCS_BUCKET="knative-nightly/${REPO_NAME}"
GITHUB_TOKEN=""
local has_gcr_flag=0
local has_gcs_flag=0
local is_dot_release=0
cd ${REPO_ROOT_DIR} cd ${REPO_ROOT_DIR}
while [[ $# -ne 0 ]]; do while [[ $# -ne 0 ]]; do
local parameter=$1 local parameter=$1
case $parameter in case ${parameter} in
--skip-tests) SKIP_TESTS=1 ;; --skip-tests) SKIP_TESTS=1 ;;
--tag-release) TAG_RELEASE=1 ;; --tag-release) TAG_RELEASE=1 ;;
--notag-release) TAG_RELEASE=0 ;; --notag-release) TAG_RELEASE=0 ;;
--publish) PUBLISH_RELEASE=1 ;; --publish) PUBLISH_RELEASE=1 ;;
--nopublish) PUBLISH_RELEASE=0 ;; --nopublish) PUBLISH_RELEASE=0 ;;
--version) --dot-release) is_dot_release=1 ;;
*)
[[ $# -ge 2 ]] || abort "missing parameter after $1"
shift shift
[[ $# -ge 1 ]] || abort "missing version after --version" case ${parameter} in
[[ $1 =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || abort "version format must be '[0-9].[0-9].[0-9]'" --github-token)
RELEASE_VERSION=$1 [[ ! -f "$1" ]] && abort "file $1 doesn't exist"
;; GITHUB_TOKEN="$(cat $1)"
--branch) [[ -n "${GITHUB_TOKEN}" ]] || abort "file $1 is empty"
shift ;;
[[ $# -ge 1 ]] || abort "missing branch after --commit" --release-gcr)
[[ $1 =~ ^release-[0-9]+\.[0-9]+$ ]] || abort "branch name must be 'release-[0-9].[0-9]'" KO_DOCKER_REPO=$1
RELEASE_BRANCH=$1 has_gcr_flag=1
;; ;;
--release-notes) --release-gcs)
shift RELEASE_GCS_BUCKET=$1
[[ $# -ge 1 ]] || abort "missing release notes file after --release-notes" has_gcs_flag=1
[[ ! -f "$1" ]] && abort "file $1 doesn't exist" ;;
RELEASE_NOTES=$1 --version)
;; [[ $1 =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || abort "version format must be '[0-9].[0-9].[0-9]'"
*) abort "unknown option ${parameter}" ;; RELEASE_VERSION=$1
;;
--branch)
[[ $1 =~ ^release-[0-9]+\.[0-9]+$ ]] || abort "branch name must be 'release-[0-9].[0-9]'"
RELEASE_BRANCH=$1
;;
--release-notes)
[[ ! -f "$1" ]] && abort "file $1 doesn't exist"
RELEASE_NOTES=$1
;;
*) abort "unknown option ${parameter}" ;;
esac
esac esac
shift shift
done done
# Setup dot releases
if (( is_dot_release )); then
setup_upstream
prepare_dot_release
fi
# Update KO_DOCKER_REPO and KO_FLAGS if we're not publishing. # Update KO_DOCKER_REPO and KO_FLAGS if we're not publishing.
if (( ! PUBLISH_RELEASE )); then if (( ! PUBLISH_RELEASE )); then
(( has_gcr_flag )) && echo "Not publishing the release, GCR flag is ignored"
(( has_gcs_flag )) && echo "Not publishing the release, GCS flag is ignored"
KO_DOCKER_REPO="ko.local" KO_DOCKER_REPO="ko.local"
KO_FLAGS="-L ${KO_FLAGS}" KO_FLAGS="-L ${KO_FLAGS}"
RELEASE_GCS_BUCKET=""
fi fi
if (( TAG_RELEASE )); then if (( TAG_RELEASE )); then
@ -131,6 +260,8 @@ function parse_flags() {
readonly RELEASE_VERSION readonly RELEASE_VERSION
readonly RELEASE_NOTES readonly RELEASE_NOTES
readonly RELEASE_BRANCH readonly RELEASE_BRANCH
readonly RELEASE_GCS_BUCKET
readonly KO_DOCKER_REPO
} }
# Run tests (unless --skip-tests was passed). Conveniently displays a banner indicating so. # Run tests (unless --skip-tests was passed). Conveniently displays a banner indicating so.
@ -149,8 +280,29 @@ function run_validation_tests() {
# Initialize everything (flags, workspace, etc) for a release. # Initialize everything (flags, workspace, etc) for a release.
function initialize() { function initialize() {
parse_flags $@ parse_flags $@
# Log what will be done and where.
banner "Release configuration"
echo "- Destination GCR: ${KO_DOCKER_REPO}"
(( SKIP_TESTS )) && echo "- Tests will NOT be run" || echo "- Tests will be run"
if (( TAG_RELEASE )); then
echo "- Artifacts will tagged '${TAG}'"
else
echo "- Artifacts WILL NOT be tagged"
fi
if (( PUBLISH_RELEASE )); then
echo "- Release WILL BE published to '${RELEASE_GCS_BUCKET}'"
else
echo "- Release will not be published"
fi
if (( BRANCH_RELEASE )); then
echo "- Release WILL BE branched from '${RELEASE_BRANCH}'"
fi
[[ -n "${RELEASE_NOTES}" ]] && echo "- Release notes are generated from '${RELEASE_NOTES}'"
# Checkout specific branch, if necessary # Checkout specific branch, if necessary
if (( BRANCH_RELEASE )); then if (( BRANCH_RELEASE )); then
setup_upstream
setup_branch
git checkout upstream/${RELEASE_BRANCH} || abort "cannot checkout branch ${RELEASE_BRANCH}" git checkout upstream/${RELEASE_BRANCH} || abort "cannot checkout branch ${RELEASE_BRANCH}"
fi fi
} }
@ -174,8 +326,11 @@ function branch_release() {
cat ${RELEASE_NOTES} >> ${description} cat ${RELEASE_NOTES} >> ${description}
fi fi
git tag -a ${TAG} -m "${title}" git tag -a ${TAG} -m "${title}"
git push $(git remote get-url upstream) tag ${TAG} local repo_url="${KNATIVE_UPSTREAM}"
run_go_tool github.com/github/hub hub release create \ [[ -n "${GITHUB_TOKEN}}" ]] && repo_url="${repo_url/:\/\//:\/\/${GITHUB_TOKEN}@}"
hub_tool push ${repo_url} tag ${TAG}
hub_tool release create \
--prerelease \ --prerelease \
${attachments[@]} \ ${attachments[@]} \
--file=${description} \ --file=${description} \

View File

@ -0,0 +1,90 @@
# dep-collector
`dep-collector` is a tool for gathering up a collection of licenses for Go
dependencies that have been pulled into the idiomatic `vendor/` directory.
The resulting file from running `dep-collector` is intended for inclusion
in container images to respect the licenses of the included software.
## Basic Usage
You can run `dep-collector` on one or more Go import paths as entrypoints,
and it will:
1. Walk the transitive dependencies to identify vendored software packages,
1. Search for licenses for each vendored dependency,
1. Dump a file containing the licenses for each vendored import.
For example (single import path):
```shell
$ dep-collector .
===========================================================
Import: github.com/mattmoor/dep-collector/vendor/github.com/google/licenseclassifier
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
...
```
For example (multiple import paths):
```shell
$ dep-collector ./cmd/controller ./cmd/sleeper
===========================================================
Import: github.com/mattmoor/warm-image/vendor/cloud.google.com/go
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
```
## CSV Usage
You can also run `dep-collector` in a mode that produces CSV output,
including basic classification of the license.
> In order to run dep-collector in this mode, you must first run:
> go get github.com/google/licenseclassifier
For example:
```shell
$ dep-collector -csv .
github.com/google/licenseclassifier,Static,,https://github.com/mattmoor/dep-collector/blob/master/vendor/github.com/google/licenseclassifier/LICENSE,Apache-2.0
github.com/google/licenseclassifier/stringclassifier,Static,,https://github.com/mattmoor/dep-collector/blob/master/vendor/github.com/google/licenseclassifier/stringclassifier/LICENSE,Apache-2.0
github.com/sergi/go-diff,Static,,https://github.com/mattmoor/dep-collector/blob/master/vendor/github.com/sergi/go-diff/LICENSE,MIT
```
The columns here are:
* Import Path,
* How the dependency is linked in (always reports "static"),
* A column for whether any modifications have been made (always empty),
* The URL by which to access the license file (assumes `master`),
* A classification of what license this is ([using this](https://github.com/google/licenseclassifier)).
## Check mode
`dep-collector` also includes a mode that will check for "forbidden" licenses.
> In order to run dep-collector in this mode, you must first run:
> go get github.com/google/licenseclassifier
For example (failing):
```shell
$ dep-collector -check ./foo/bar/baz
2018/07/20 22:01:29 Error checking license collection: Errors validating licenses:
Found matching forbidden license in "foo.io/bar/vendor/github.com/BurntSushi/toml":WTFPL
```
For example (passing):
```shell
$ dep-collector -check .
2018/07/20 22:29:09 No errors found.
```

View File

@ -0,0 +1,94 @@
/*
Copyright 2018 The Knative Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"fmt"
gb "go/build"
"path/filepath"
"sort"
"strings"
)
func CollectTransitiveImports(binaries []string) ([]string, error) {
// Perform a simple DFS to collect the binaries' transitive dependencies.
visited := make(map[string]struct{})
for _, importpath := range binaries {
if gb.IsLocalImport(importpath) {
ip, err := qualifyLocalImport(importpath)
if err != nil {
return nil, err
}
importpath = ip
}
pkg, err := gb.Import(importpath, WorkingDir, gb.ImportComment)
if err != nil {
return nil, err
}
if err := visit(pkg, visited); err != nil {
return nil, err
}
}
// Sort the dependencies deterministically.
var list sort.StringSlice
for ip := range visited {
if !strings.Contains(ip, "/vendor/") {
// Skip files outside of vendor
continue
}
list = append(list, ip)
}
list.Sort()
return list, nil
}
func qualifyLocalImport(ip string) (string, error) {
gopathsrc := filepath.Join(gb.Default.GOPATH, "src")
if !strings.HasPrefix(WorkingDir, gopathsrc) {
return "", fmt.Errorf("working directory must be on ${GOPATH}/src = %s", gopathsrc)
}
return filepath.Join(strings.TrimPrefix(WorkingDir, gopathsrc+string(filepath.Separator)), ip), nil
}
func visit(pkg *gb.Package, visited map[string]struct{}) error {
if _, ok := visited[pkg.ImportPath]; ok {
return nil
}
visited[pkg.ImportPath] = struct{}{}
for _, ip := range pkg.Imports {
if ip == "C" {
// skip cgo
continue
}
subpkg, err := gb.Import(ip, WorkingDir, gb.ImportComment)
if err != nil {
return fmt.Errorf("%v\n -> %v", pkg.ImportPath, err)
}
if !strings.HasPrefix(subpkg.Dir, WorkingDir) {
// Skip import paths outside of our workspace (std library)
continue
}
if err := visit(subpkg, visited); err != nil {
return fmt.Errorf("%v (%v)\n -> %v", pkg.ImportPath, pkg.Dir, err)
}
}
return nil
}

View File

@ -0,0 +1,203 @@
/*
Copyright 2018 The Knative Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"fmt"
gb "go/build"
"io/ioutil"
"os"
"path/filepath"
"sort"
"strings"
"github.com/google/licenseclassifier"
)
var LicenseNames = []string{
"LICENCE",
"LICENSE",
"LICENSE.code",
"LICENSE.md",
"LICENSE.txt",
"COPYING",
"copyright",
}
const MatchThreshold = 0.9
type LicenseFile struct {
EnclosingImportPath string
LicensePath string
}
func (lf *LicenseFile) Body() (string, error) {
body, err := ioutil.ReadFile(lf.LicensePath)
if err != nil {
return "", err
}
return string(body), nil
}
func (lt *LicenseFile) Classify(classifier *licenseclassifier.License) (string, error) {
body, err := lt.Body()
if err != nil {
return "", err
}
m := classifier.NearestMatch(body)
if m == nil {
return "", fmt.Errorf("unable to classify license: %v", lt.EnclosingImportPath)
}
return m.Name, nil
}
func (lt *LicenseFile) Check(classifier *licenseclassifier.License) error {
body, err := lt.Body()
if err != nil {
return err
}
ms := classifier.MultipleMatch(body, false)
for _, m := range ms {
return fmt.Errorf("Found matching forbidden license in %q: %v", lt.EnclosingImportPath, m.Name)
}
return nil
}
func (lt *LicenseFile) Entry() (string, error) {
body, err := lt.Body()
if err != nil {
return "", err
}
return fmt.Sprintf(`
===========================================================
Import: %s
%s
`, lt.EnclosingImportPath, body), nil
}
func (lt *LicenseFile) CSVRow(classifier *licenseclassifier.License) (string, error) {
classification, err := lt.Classify(classifier)
if err != nil {
return "", err
}
parts := strings.Split(lt.EnclosingImportPath, "/vendor/")
if len(parts) != 2 {
return "", fmt.Errorf("wrong number of parts splitting import path on %q : %q", "/vendor/", lt.EnclosingImportPath)
}
return strings.Join([]string{
parts[1],
"Static",
"", // TODO(mattmoor): Modifications?
"https://" + parts[0] + "/blob/master/vendor/" + parts[1] + "/" + filepath.Base(lt.LicensePath),
classification,
}, ","), nil
}
func findLicense(ip string) (*LicenseFile, error) {
pkg, err := gb.Import(ip, WorkingDir, gb.ImportComment)
if err != nil {
return nil, err
}
dir := pkg.Dir
for {
// When we reach the root of our workspace, stop searching.
if dir == WorkingDir {
return nil, fmt.Errorf("unable to find license for %q", pkg.ImportPath)
}
for _, name := range LicenseNames {
p := filepath.Join(dir, name)
if _, err := os.Stat(p); err != nil {
continue
}
return &LicenseFile{
EnclosingImportPath: ip,
LicensePath: p,
}, nil
}
// Walk to the parent directory / import path
dir = filepath.Dir(dir)
ip = filepath.Dir(ip)
}
}
type LicenseCollection []*LicenseFile
func (lc LicenseCollection) Entries() (string, error) {
sections := make([]string, 0, len(lc))
for _, key := range lc {
entry, err := key.Entry()
if err != nil {
return "", err
}
sections = append(sections, entry)
}
return strings.Join(sections, "\n"), nil
}
func (lc LicenseCollection) CSV(classifier *licenseclassifier.License) (string, error) {
sections := make([]string, 0, len(lc))
for _, entry := range lc {
row, err := entry.CSVRow(classifier)
if err != nil {
return "", err
}
sections = append(sections, row)
}
return strings.Join(sections, "\n"), nil
}
func (lc LicenseCollection) Check(classifier *licenseclassifier.License) error {
errors := []string{}
for _, entry := range lc {
if err := entry.Check(classifier); err != nil {
errors = append(errors, err.Error())
}
}
if len(errors) == 0 {
return nil
}
return fmt.Errorf("Errors validating licenses:\n%v", strings.Join(errors, "\n"))
}
func CollectLicenses(imports []string) (LicenseCollection, error) {
// for each of the import paths, search for a license file.
licenseFiles := make(map[string]*LicenseFile)
for _, ip := range imports {
lf, err := findLicense(ip)
if err != nil {
return nil, err
}
licenseFiles[lf.EnclosingImportPath] = lf
}
order := sort.StringSlice{}
for key := range licenseFiles {
order = append(order, key)
}
order.Sort()
licenseTypes := LicenseCollection{}
for _, key := range order {
licenseTypes = append(licenseTypes, licenseFiles[key])
}
return licenseTypes, nil
}

View File

@ -0,0 +1,81 @@
/*
Copyright 2018 The Knative Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"flag"
"log"
"os"
"github.com/google/licenseclassifier"
)
var WorkingDir, _ = os.Getwd()
var (
csv = flag.Bool("csv", false, "Whether to print in CSV format (with slow classification).")
check = flag.Bool("check", false, "Whether to just check license files for forbidden licenses.")
)
func main() {
flag.Parse()
if flag.NArg() == 0 {
log.Fatalf("Expected a list of import paths, got: %v", flag.Args())
}
// Perform a simple DFS to collect the binaries' transitive dependencies.
transitiveImports, err := CollectTransitiveImports(flag.Args())
if err != nil {
log.Fatalf("Error collecting transitive dependencies: %v", err)
}
// Gather all of the license data from the imports.
collection, err := CollectLicenses(transitiveImports)
if err != nil {
log.Fatalf("Error identifying licenses for transitive dependencies: %v", err)
}
if *check {
classifier, err := licenseclassifier.NewWithForbiddenLicenses(MatchThreshold)
if err != nil {
log.Fatalf("Error creating license classifier: %v", err)
}
if err := collection.Check(classifier); err != nil {
log.Fatalf("Error checking license collection: %v", err)
}
log.Printf("No errors found.")
return
}
if *csv {
classifier, err := licenseclassifier.New(MatchThreshold)
if err != nil {
log.Fatalf("Error creating license classifier: %v", err)
}
output, err := collection.CSV(classifier)
if err != nil {
log.Fatalf("Error generating CSV: %v", err)
}
os.Stdout.Write([]byte(output))
} else {
entries, err := collection.Entries()
if err != nil {
log.Fatalf("Error generating entries: %v", err)
}
os.Stdout.Write([]byte(entries))
}
}

25
vendor/github.com/sergi/go-diff/AUTHORS generated vendored Normal file
View File

@ -0,0 +1,25 @@
# This is the official list of go-diff authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Danny Yoo <dannyyoo@google.com>
James Kolb <jkolb@google.com>
Jonathan Amsterdam <jba@google.com>
Markus Zimmermann <markus.zimmermann@nethead.at> <markus.zimmermann@symflower.com> <zimmski@gmail.com>
Matt Kovars <akaskik@gmail.com>
Örjan Persson <orjan@spotify.com>
Osman Masood <oamasood@gmail.com>
Robert Carlsen <rwcarlsen@gmail.com>
Rory Flynn <roryflynn@users.noreply.github.com>
Sergi Mansilla <sergi.mansilla@gmail.com>
Shatrugna Sadhu <ssadhu@apcera.com>
Shawn Smith <shawnpsmith@gmail.com>
Stas Maksimov <maksimov@gmail.com>
Tor Arvid Lund <torarvid@gmail.com>
Zac Bergquist <zbergquist99@gmail.com>

32
vendor/github.com/sergi/go-diff/CONTRIBUTORS generated vendored Normal file
View File

@ -0,0 +1,32 @@
# This is the official list of people who can contribute
# (and typically have contributed) code to the go-diff
# repository.
#
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, ACME Inc. employees would be listed here
# but not in AUTHORS, because ACME Inc. would hold the copyright.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file.
#
# Names should be added to this file like so:
# Name <email address>
#
# Please keep the list sorted.
Danny Yoo <dannyyoo@google.com>
James Kolb <jkolb@google.com>
Jonathan Amsterdam <jba@google.com>
Markus Zimmermann <markus.zimmermann@nethead.at> <markus.zimmermann@symflower.com> <zimmski@gmail.com>
Matt Kovars <akaskik@gmail.com>
Örjan Persson <orjan@spotify.com>
Osman Masood <oamasood@gmail.com>
Robert Carlsen <rwcarlsen@gmail.com>
Rory Flynn <roryflynn@users.noreply.github.com>
Sergi Mansilla <sergi.mansilla@gmail.com>
Shatrugna Sadhu <ssadhu@apcera.com>
Shawn Smith <shawnpsmith@gmail.com>
Stas Maksimov <maksimov@gmail.com>
Tor Arvid Lund <torarvid@gmail.com>
Zac Bergquist <zbergquist99@gmail.com>

20
vendor/github.com/sergi/go-diff/LICENSE generated vendored Normal file
View File

@ -0,0 +1,20 @@
Copyright (c) 2012-2016 The go-diff Authors. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

1344
vendor/github.com/sergi/go-diff/diffmatchpatch/diff.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,46 @@
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
// https://github.com/sergi/go-diff
// See the included LICENSE file for license details.
//
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
// Original library is Copyright (c) 2006 Google Inc.
// http://code.google.com/p/google-diff-match-patch/
// Package diffmatchpatch offers robust algorithms to perform the operations required for synchronizing plain text.
package diffmatchpatch
import (
"time"
)
// DiffMatchPatch holds the configuration for diff-match-patch operations.
type DiffMatchPatch struct {
// Number of seconds to map a diff before giving up (0 for infinity).
DiffTimeout time.Duration
// Cost of an empty edit operation in terms of edit characters.
DiffEditCost int
// How far to search for a match (0 = exact location, 1000+ = broad match). A match this many characters away from the expected location will add 1.0 to the score (0.0 is a perfect match).
MatchDistance int
// When deleting a large block of text (over ~64 characters), how close do the contents have to be to match the expected contents. (0.0 = perfection, 1.0 = very loose). Note that MatchThreshold controls how closely the end points of a delete need to match.
PatchDeleteThreshold float64
// Chunk size for context length.
PatchMargin int
// The number of bits in an int.
MatchMaxBits int
// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
MatchThreshold float64
}
// New creates a new DiffMatchPatch object with default parameters.
func New() *DiffMatchPatch {
// Defaults.
return &DiffMatchPatch{
DiffTimeout: time.Second,
DiffEditCost: 4,
MatchThreshold: 0.5,
MatchDistance: 1000,
PatchDeleteThreshold: 0.5,
PatchMargin: 4,
MatchMaxBits: 32,
}
}

160
vendor/github.com/sergi/go-diff/diffmatchpatch/match.go generated vendored Normal file
View File

@ -0,0 +1,160 @@
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
// https://github.com/sergi/go-diff
// See the included LICENSE file for license details.
//
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
// Original library is Copyright (c) 2006 Google Inc.
// http://code.google.com/p/google-diff-match-patch/
package diffmatchpatch
import (
"math"
)
// MatchMain locates the best instance of 'pattern' in 'text' near 'loc'.
// Returns -1 if no match found.
func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int {
// Check for null inputs not needed since null can't be passed in C#.
loc = int(math.Max(0, math.Min(float64(loc), float64(len(text)))))
if text == pattern {
// Shortcut (potentially not guaranteed by the algorithm)
return 0
} else if len(text) == 0 {
// Nothing to match.
return -1
} else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern {
// Perfect match at the perfect spot! (Includes case of null pattern)
return loc
}
// Do a fuzzy compare.
return dmp.MatchBitap(text, pattern, loc)
}
// MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm.
// Returns -1 if no match was found.
func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int {
// Initialise the alphabet.
s := dmp.MatchAlphabet(pattern)
// Highest score beyond which we give up.
scoreThreshold := dmp.MatchThreshold
// Is there a nearby exact match? (speedup)
bestLoc := indexOf(text, pattern, loc)
if bestLoc != -1 {
scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
pattern), scoreThreshold)
// What about in the other direction? (speedup)
bestLoc = lastIndexOf(text, pattern, loc+len(pattern))
if bestLoc != -1 {
scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
pattern), scoreThreshold)
}
}
// Initialise the bit arrays.
matchmask := 1 << uint((len(pattern) - 1))
bestLoc = -1
var binMin, binMid int
binMax := len(pattern) + len(text)
lastRd := []int{}
for d := 0; d < len(pattern); d++ {
// Scan for the best match; each iteration allows for one more error. Run a binary search to determine how far from 'loc' we can stray at this error level.
binMin = 0
binMid = binMax
for binMin < binMid {
if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold {
binMin = binMid
} else {
binMax = binMid
}
binMid = (binMax-binMin)/2 + binMin
}
// Use the result from this iteration as the maximum for the next.
binMax = binMid
start := int(math.Max(1, float64(loc-binMid+1)))
finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern)))
rd := make([]int, finish+2)
rd[finish+1] = (1 << uint(d)) - 1
for j := finish; j >= start; j-- {
var charMatch int
if len(text) <= j-1 {
// Out of range.
charMatch = 0
} else if _, ok := s[text[j-1]]; !ok {
charMatch = 0
} else {
charMatch = s[text[j-1]]
}
if d == 0 {
// First pass: exact match.
rd[j] = ((rd[j+1] << 1) | 1) & charMatch
} else {
// Subsequent passes: fuzzy match.
rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1]
}
if (rd[j] & matchmask) != 0 {
score := dmp.matchBitapScore(d, j-1, loc, pattern)
// This match will almost certainly be better than any existing match. But check anyway.
if score <= scoreThreshold {
// Told you so.
scoreThreshold = score
bestLoc = j - 1
if bestLoc > loc {
// When passing loc, don't exceed our current distance from loc.
start = int(math.Max(1, float64(2*loc-bestLoc)))
} else {
// Already passed loc, downhill from here on in.
break
}
}
}
}
if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold {
// No hope for a (better) match at greater error levels.
break
}
lastRd = rd
}
return bestLoc
}
// matchBitapScore computes and returns the score for a match with e errors and x location.
func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 {
accuracy := float64(e) / float64(len(pattern))
proximity := math.Abs(float64(loc - x))
if dmp.MatchDistance == 0 {
// Dodge divide by zero error.
if proximity == 0 {
return accuracy
}
return 1.0
}
return accuracy + (proximity / float64(dmp.MatchDistance))
}
// MatchAlphabet initialises the alphabet for the Bitap algorithm.
func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int {
s := map[byte]int{}
charPattern := []byte(pattern)
for _, c := range charPattern {
_, ok := s[c]
if !ok {
s[c] = 0
}
}
i := 0
for _, c := range charPattern {
value := s[c] | int(uint(1)<<uint((len(pattern)-i-1)))
s[c] = value
i++
}
return s
}

View File

@ -0,0 +1,23 @@
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
// https://github.com/sergi/go-diff
// See the included LICENSE file for license details.
//
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
// Original library is Copyright (c) 2006 Google Inc.
// http://code.google.com/p/google-diff-match-patch/
package diffmatchpatch
func min(x, y int) int {
if x < y {
return x
}
return y
}
func max(x, y int) int {
if x > y {
return x
}
return y
}

556
vendor/github.com/sergi/go-diff/diffmatchpatch/patch.go generated vendored Normal file
View File

@ -0,0 +1,556 @@
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
// https://github.com/sergi/go-diff
// See the included LICENSE file for license details.
//
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
// Original library is Copyright (c) 2006 Google Inc.
// http://code.google.com/p/google-diff-match-patch/
package diffmatchpatch
import (
"bytes"
"errors"
"math"
"net/url"
"regexp"
"strconv"
"strings"
)
// Patch represents one patch operation.
type Patch struct {
diffs []Diff
Start1 int
Start2 int
Length1 int
Length2 int
}
// String emulates GNU diff's format.
// Header: @@ -382,8 +481,9 @@
// Indices are printed as 1-based, not 0-based.
func (p *Patch) String() string {
var coords1, coords2 string
if p.Length1 == 0 {
coords1 = strconv.Itoa(p.Start1) + ",0"
} else if p.Length1 == 1 {
coords1 = strconv.Itoa(p.Start1 + 1)
} else {
coords1 = strconv.Itoa(p.Start1+1) + "," + strconv.Itoa(p.Length1)
}
if p.Length2 == 0 {
coords2 = strconv.Itoa(p.Start2) + ",0"
} else if p.Length2 == 1 {
coords2 = strconv.Itoa(p.Start2 + 1)
} else {
coords2 = strconv.Itoa(p.Start2+1) + "," + strconv.Itoa(p.Length2)
}
var text bytes.Buffer
_, _ = text.WriteString("@@ -" + coords1 + " +" + coords2 + " @@\n")
// Escape the body of the patch with %xx notation.
for _, aDiff := range p.diffs {
switch aDiff.Type {
case DiffInsert:
_, _ = text.WriteString("+")
case DiffDelete:
_, _ = text.WriteString("-")
case DiffEqual:
_, _ = text.WriteString(" ")
}
_, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1))
_, _ = text.WriteString("\n")
}
return unescaper.Replace(text.String())
}
// PatchAddContext increases the context until it is unique, but doesn't let the pattern expand beyond MatchMaxBits.
func (dmp *DiffMatchPatch) PatchAddContext(patch Patch, text string) Patch {
if len(text) == 0 {
return patch
}
pattern := text[patch.Start2 : patch.Start2+patch.Length1]
padding := 0
// Look for the first and last matches of pattern in text. If two different matches are found, increase the pattern length.
for strings.Index(text, pattern) != strings.LastIndex(text, pattern) &&
len(pattern) < dmp.MatchMaxBits-2*dmp.PatchMargin {
padding += dmp.PatchMargin
maxStart := max(0, patch.Start2-padding)
minEnd := min(len(text), patch.Start2+patch.Length1+padding)
pattern = text[maxStart:minEnd]
}
// Add one chunk for good luck.
padding += dmp.PatchMargin
// Add the prefix.
prefix := text[max(0, patch.Start2-padding):patch.Start2]
if len(prefix) != 0 {
patch.diffs = append([]Diff{Diff{DiffEqual, prefix}}, patch.diffs...)
}
// Add the suffix.
suffix := text[patch.Start2+patch.Length1 : min(len(text), patch.Start2+patch.Length1+padding)]
if len(suffix) != 0 {
patch.diffs = append(patch.diffs, Diff{DiffEqual, suffix})
}
// Roll back the start points.
patch.Start1 -= len(prefix)
patch.Start2 -= len(prefix)
// Extend the lengths.
patch.Length1 += len(prefix) + len(suffix)
patch.Length2 += len(prefix) + len(suffix)
return patch
}
// PatchMake computes a list of patches.
func (dmp *DiffMatchPatch) PatchMake(opt ...interface{}) []Patch {
if len(opt) == 1 {
diffs, _ := opt[0].([]Diff)
text1 := dmp.DiffText1(diffs)
return dmp.PatchMake(text1, diffs)
} else if len(opt) == 2 {
text1 := opt[0].(string)
switch t := opt[1].(type) {
case string:
diffs := dmp.DiffMain(text1, t, true)
if len(diffs) > 2 {
diffs = dmp.DiffCleanupSemantic(diffs)
diffs = dmp.DiffCleanupEfficiency(diffs)
}
return dmp.PatchMake(text1, diffs)
case []Diff:
return dmp.patchMake2(text1, t)
}
} else if len(opt) == 3 {
return dmp.PatchMake(opt[0], opt[2])
}
return []Patch{}
}
// patchMake2 computes a list of patches to turn text1 into text2.
// text2 is not provided, diffs are the delta between text1 and text2.
func (dmp *DiffMatchPatch) patchMake2(text1 string, diffs []Diff) []Patch {
// Check for null inputs not needed since null can't be passed in C#.
patches := []Patch{}
if len(diffs) == 0 {
return patches // Get rid of the null case.
}
patch := Patch{}
charCount1 := 0 // Number of characters into the text1 string.
charCount2 := 0 // Number of characters into the text2 string.
// Start with text1 (prepatchText) and apply the diffs until we arrive at text2 (postpatchText). We recreate the patches one by one to determine context info.
prepatchText := text1
postpatchText := text1
for i, aDiff := range diffs {
if len(patch.diffs) == 0 && aDiff.Type != DiffEqual {
// A new patch starts here.
patch.Start1 = charCount1
patch.Start2 = charCount2
}
switch aDiff.Type {
case DiffInsert:
patch.diffs = append(patch.diffs, aDiff)
patch.Length2 += len(aDiff.Text)
postpatchText = postpatchText[:charCount2] +
aDiff.Text + postpatchText[charCount2:]
case DiffDelete:
patch.Length1 += len(aDiff.Text)
patch.diffs = append(patch.diffs, aDiff)
postpatchText = postpatchText[:charCount2] + postpatchText[charCount2+len(aDiff.Text):]
case DiffEqual:
if len(aDiff.Text) <= 2*dmp.PatchMargin &&
len(patch.diffs) != 0 && i != len(diffs)-1 {
// Small equality inside a patch.
patch.diffs = append(patch.diffs, aDiff)
patch.Length1 += len(aDiff.Text)
patch.Length2 += len(aDiff.Text)
}
if len(aDiff.Text) >= 2*dmp.PatchMargin {
// Time for a new patch.
if len(patch.diffs) != 0 {
patch = dmp.PatchAddContext(patch, prepatchText)
patches = append(patches, patch)
patch = Patch{}
// Unlike Unidiff, our patch lists have a rolling context. http://code.google.com/p/google-diff-match-patch/wiki/Unidiff Update prepatch text & pos to reflect the application of the just completed patch.
prepatchText = postpatchText
charCount1 = charCount2
}
}
}
// Update the current character count.
if aDiff.Type != DiffInsert {
charCount1 += len(aDiff.Text)
}
if aDiff.Type != DiffDelete {
charCount2 += len(aDiff.Text)
}
}
// Pick up the leftover patch if not empty.
if len(patch.diffs) != 0 {
patch = dmp.PatchAddContext(patch, prepatchText)
patches = append(patches, patch)
}
return patches
}
// PatchDeepCopy returns an array that is identical to a given an array of patches.
func (dmp *DiffMatchPatch) PatchDeepCopy(patches []Patch) []Patch {
patchesCopy := []Patch{}
for _, aPatch := range patches {
patchCopy := Patch{}
for _, aDiff := range aPatch.diffs {
patchCopy.diffs = append(patchCopy.diffs, Diff{
aDiff.Type,
aDiff.Text,
})
}
patchCopy.Start1 = aPatch.Start1
patchCopy.Start2 = aPatch.Start2
patchCopy.Length1 = aPatch.Length1
patchCopy.Length2 = aPatch.Length2
patchesCopy = append(patchesCopy, patchCopy)
}
return patchesCopy
}
// PatchApply merges a set of patches onto the text. Returns a patched text, as well as an array of true/false values indicating which patches were applied.
func (dmp *DiffMatchPatch) PatchApply(patches []Patch, text string) (string, []bool) {
if len(patches) == 0 {
return text, []bool{}
}
// Deep copy the patches so that no changes are made to originals.
patches = dmp.PatchDeepCopy(patches)
nullPadding := dmp.PatchAddPadding(patches)
text = nullPadding + text + nullPadding
patches = dmp.PatchSplitMax(patches)
x := 0
// delta keeps track of the offset between the expected and actual location of the previous patch. If there are patches expected at positions 10 and 20, but the first patch was found at 12, delta is 2 and the second patch has an effective expected position of 22.
delta := 0
results := make([]bool, len(patches))
for _, aPatch := range patches {
expectedLoc := aPatch.Start2 + delta
text1 := dmp.DiffText1(aPatch.diffs)
var startLoc int
endLoc := -1
if len(text1) > dmp.MatchMaxBits {
// PatchSplitMax will only provide an oversized pattern in the case of a monster delete.
startLoc = dmp.MatchMain(text, text1[:dmp.MatchMaxBits], expectedLoc)
if startLoc != -1 {
endLoc = dmp.MatchMain(text,
text1[len(text1)-dmp.MatchMaxBits:], expectedLoc+len(text1)-dmp.MatchMaxBits)
if endLoc == -1 || startLoc >= endLoc {
// Can't find valid trailing context. Drop this patch.
startLoc = -1
}
}
} else {
startLoc = dmp.MatchMain(text, text1, expectedLoc)
}
if startLoc == -1 {
// No match found. :(
results[x] = false
// Subtract the delta for this failed patch from subsequent patches.
delta -= aPatch.Length2 - aPatch.Length1
} else {
// Found a match. :)
results[x] = true
delta = startLoc - expectedLoc
var text2 string
if endLoc == -1 {
text2 = text[startLoc:int(math.Min(float64(startLoc+len(text1)), float64(len(text))))]
} else {
text2 = text[startLoc:int(math.Min(float64(endLoc+dmp.MatchMaxBits), float64(len(text))))]
}
if text1 == text2 {
// Perfect match, just shove the Replacement text in.
text = text[:startLoc] + dmp.DiffText2(aPatch.diffs) + text[startLoc+len(text1):]
} else {
// Imperfect match. Run a diff to get a framework of equivalent indices.
diffs := dmp.DiffMain(text1, text2, false)
if len(text1) > dmp.MatchMaxBits && float64(dmp.DiffLevenshtein(diffs))/float64(len(text1)) > dmp.PatchDeleteThreshold {
// The end points match, but the content is unacceptably bad.
results[x] = false
} else {
diffs = dmp.DiffCleanupSemanticLossless(diffs)
index1 := 0
for _, aDiff := range aPatch.diffs {
if aDiff.Type != DiffEqual {
index2 := dmp.DiffXIndex(diffs, index1)
if aDiff.Type == DiffInsert {
// Insertion
text = text[:startLoc+index2] + aDiff.Text + text[startLoc+index2:]
} else if aDiff.Type == DiffDelete {
// Deletion
startIndex := startLoc + index2
text = text[:startIndex] +
text[startIndex+dmp.DiffXIndex(diffs, index1+len(aDiff.Text))-index2:]
}
}
if aDiff.Type != DiffDelete {
index1 += len(aDiff.Text)
}
}
}
}
}
x++
}
// Strip the padding off.
text = text[len(nullPadding) : len(nullPadding)+(len(text)-2*len(nullPadding))]
return text, results
}
// PatchAddPadding adds some padding on text start and end so that edges can match something.
// Intended to be called only from within patchApply.
func (dmp *DiffMatchPatch) PatchAddPadding(patches []Patch) string {
paddingLength := dmp.PatchMargin
nullPadding := ""
for x := 1; x <= paddingLength; x++ {
nullPadding += string(x)
}
// Bump all the patches forward.
for i := range patches {
patches[i].Start1 += paddingLength
patches[i].Start2 += paddingLength
}
// Add some padding on start of first diff.
if len(patches[0].diffs) == 0 || patches[0].diffs[0].Type != DiffEqual {
// Add nullPadding equality.
patches[0].diffs = append([]Diff{Diff{DiffEqual, nullPadding}}, patches[0].diffs...)
patches[0].Start1 -= paddingLength // Should be 0.
patches[0].Start2 -= paddingLength // Should be 0.
patches[0].Length1 += paddingLength
patches[0].Length2 += paddingLength
} else if paddingLength > len(patches[0].diffs[0].Text) {
// Grow first equality.
extraLength := paddingLength - len(patches[0].diffs[0].Text)
patches[0].diffs[0].Text = nullPadding[len(patches[0].diffs[0].Text):] + patches[0].diffs[0].Text
patches[0].Start1 -= extraLength
patches[0].Start2 -= extraLength
patches[0].Length1 += extraLength
patches[0].Length2 += extraLength
}
// Add some padding on end of last diff.
last := len(patches) - 1
if len(patches[last].diffs) == 0 || patches[last].diffs[len(patches[last].diffs)-1].Type != DiffEqual {
// Add nullPadding equality.
patches[last].diffs = append(patches[last].diffs, Diff{DiffEqual, nullPadding})
patches[last].Length1 += paddingLength
patches[last].Length2 += paddingLength
} else if paddingLength > len(patches[last].diffs[len(patches[last].diffs)-1].Text) {
// Grow last equality.
lastDiff := patches[last].diffs[len(patches[last].diffs)-1]
extraLength := paddingLength - len(lastDiff.Text)
patches[last].diffs[len(patches[last].diffs)-1].Text += nullPadding[:extraLength]
patches[last].Length1 += extraLength
patches[last].Length2 += extraLength
}
return nullPadding
}
// PatchSplitMax looks through the patches and breaks up any which are longer than the maximum limit of the match algorithm.
// Intended to be called only from within patchApply.
func (dmp *DiffMatchPatch) PatchSplitMax(patches []Patch) []Patch {
patchSize := dmp.MatchMaxBits
for x := 0; x < len(patches); x++ {
if patches[x].Length1 <= patchSize {
continue
}
bigpatch := patches[x]
// Remove the big old patch.
patches = append(patches[:x], patches[x+1:]...)
x--
Start1 := bigpatch.Start1
Start2 := bigpatch.Start2
precontext := ""
for len(bigpatch.diffs) != 0 {
// Create one of several smaller patches.
patch := Patch{}
empty := true
patch.Start1 = Start1 - len(precontext)
patch.Start2 = Start2 - len(precontext)
if len(precontext) != 0 {
patch.Length1 = len(precontext)
patch.Length2 = len(precontext)
patch.diffs = append(patch.diffs, Diff{DiffEqual, precontext})
}
for len(bigpatch.diffs) != 0 && patch.Length1 < patchSize-dmp.PatchMargin {
diffType := bigpatch.diffs[0].Type
diffText := bigpatch.diffs[0].Text
if diffType == DiffInsert {
// Insertions are harmless.
patch.Length2 += len(diffText)
Start2 += len(diffText)
patch.diffs = append(patch.diffs, bigpatch.diffs[0])
bigpatch.diffs = bigpatch.diffs[1:]
empty = false
} else if diffType == DiffDelete && len(patch.diffs) == 1 && patch.diffs[0].Type == DiffEqual && len(diffText) > 2*patchSize {
// This is a large deletion. Let it pass in one chunk.
patch.Length1 += len(diffText)
Start1 += len(diffText)
empty = false
patch.diffs = append(patch.diffs, Diff{diffType, diffText})
bigpatch.diffs = bigpatch.diffs[1:]
} else {
// Deletion or equality. Only take as much as we can stomach.
diffText = diffText[:min(len(diffText), patchSize-patch.Length1-dmp.PatchMargin)]
patch.Length1 += len(diffText)
Start1 += len(diffText)
if diffType == DiffEqual {
patch.Length2 += len(diffText)
Start2 += len(diffText)
} else {
empty = false
}
patch.diffs = append(patch.diffs, Diff{diffType, diffText})
if diffText == bigpatch.diffs[0].Text {
bigpatch.diffs = bigpatch.diffs[1:]
} else {
bigpatch.diffs[0].Text =
bigpatch.diffs[0].Text[len(diffText):]
}
}
}
// Compute the head context for the next patch.
precontext = dmp.DiffText2(patch.diffs)
precontext = precontext[max(0, len(precontext)-dmp.PatchMargin):]
postcontext := ""
// Append the end context for this patch.
if len(dmp.DiffText1(bigpatch.diffs)) > dmp.PatchMargin {
postcontext = dmp.DiffText1(bigpatch.diffs)[:dmp.PatchMargin]
} else {
postcontext = dmp.DiffText1(bigpatch.diffs)
}
if len(postcontext) != 0 {
patch.Length1 += len(postcontext)
patch.Length2 += len(postcontext)
if len(patch.diffs) != 0 && patch.diffs[len(patch.diffs)-1].Type == DiffEqual {
patch.diffs[len(patch.diffs)-1].Text += postcontext
} else {
patch.diffs = append(patch.diffs, Diff{DiffEqual, postcontext})
}
}
if !empty {
x++
patches = append(patches[:x], append([]Patch{patch}, patches[x:]...)...)
}
}
}
return patches
}
// PatchToText takes a list of patches and returns a textual representation.
func (dmp *DiffMatchPatch) PatchToText(patches []Patch) string {
var text bytes.Buffer
for _, aPatch := range patches {
_, _ = text.WriteString(aPatch.String())
}
return text.String()
}
// PatchFromText parses a textual representation of patches and returns a List of Patch objects.
func (dmp *DiffMatchPatch) PatchFromText(textline string) ([]Patch, error) {
patches := []Patch{}
if len(textline) == 0 {
return patches, nil
}
text := strings.Split(textline, "\n")
textPointer := 0
patchHeader := regexp.MustCompile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$")
var patch Patch
var sign uint8
var line string
for textPointer < len(text) {
if !patchHeader.MatchString(text[textPointer]) {
return patches, errors.New("Invalid patch string: " + text[textPointer])
}
patch = Patch{}
m := patchHeader.FindStringSubmatch(text[textPointer])
patch.Start1, _ = strconv.Atoi(m[1])
if len(m[2]) == 0 {
patch.Start1--
patch.Length1 = 1
} else if m[2] == "0" {
patch.Length1 = 0
} else {
patch.Start1--
patch.Length1, _ = strconv.Atoi(m[2])
}
patch.Start2, _ = strconv.Atoi(m[3])
if len(m[4]) == 0 {
patch.Start2--
patch.Length2 = 1
} else if m[4] == "0" {
patch.Length2 = 0
} else {
patch.Start2--
patch.Length2, _ = strconv.Atoi(m[4])
}
textPointer++
for textPointer < len(text) {
if len(text[textPointer]) > 0 {
sign = text[textPointer][0]
} else {
textPointer++
continue
}
line = text[textPointer][1:]
line = strings.Replace(line, "+", "%2b", -1)
line, _ = url.QueryUnescape(line)
if sign == '-' {
// Deletion.
patch.diffs = append(patch.diffs, Diff{DiffDelete, line})
} else if sign == '+' {
// Insertion.
patch.diffs = append(patch.diffs, Diff{DiffInsert, line})
} else if sign == ' ' {
// Minor equality.
patch.diffs = append(patch.diffs, Diff{DiffEqual, line})
} else if sign == '@' {
// Start of next patch.
break
} else {
// WTF?
return patches, errors.New("Invalid patch mode '" + string(sign) + "' in: " + string(line))
}
textPointer++
}
patches = append(patches, patch)
}
return patches, nil
}

View File

@ -0,0 +1,88 @@
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
// https://github.com/sergi/go-diff
// See the included LICENSE file for license details.
//
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
// Original library is Copyright (c) 2006 Google Inc.
// http://code.google.com/p/google-diff-match-patch/
package diffmatchpatch
import (
"strings"
"unicode/utf8"
)
// unescaper unescapes selected chars for compatibility with JavaScript's encodeURI.
// In speed critical applications this could be dropped since the receiving application will certainly decode these fine. Note that this function is case-sensitive. Thus "%3F" would not be unescaped. But this is ok because it is only called with the output of HttpUtility.UrlEncode which returns lowercase hex. Example: "%3f" -> "?", "%24" -> "$", etc.
var unescaper = strings.NewReplacer(
"%21", "!", "%7E", "~", "%27", "'",
"%28", "(", "%29", ")", "%3B", ";",
"%2F", "/", "%3F", "?", "%3A", ":",
"%40", "@", "%26", "&", "%3D", "=",
"%2B", "+", "%24", "$", "%2C", ",", "%23", "#", "%2A", "*")
// indexOf returns the first index of pattern in str, starting at str[i].
func indexOf(str string, pattern string, i int) int {
if i > len(str)-1 {
return -1
}
if i <= 0 {
return strings.Index(str, pattern)
}
ind := strings.Index(str[i:], pattern)
if ind == -1 {
return -1
}
return ind + i
}
// lastIndexOf returns the last index of pattern in str, starting at str[i].
func lastIndexOf(str string, pattern string, i int) int {
if i < 0 {
return -1
}
if i >= len(str) {
return strings.LastIndex(str, pattern)
}
_, size := utf8.DecodeRuneInString(str[i:])
return strings.LastIndex(str[:i+size], pattern)
}
// runesIndexOf returns the index of pattern in target, starting at target[i].
func runesIndexOf(target, pattern []rune, i int) int {
if i > len(target)-1 {
return -1
}
if i <= 0 {
return runesIndex(target, pattern)
}
ind := runesIndex(target[i:], pattern)
if ind == -1 {
return -1
}
return ind + i
}
func runesEqual(r1, r2 []rune) bool {
if len(r1) != len(r2) {
return false
}
for i, c := range r1 {
if c != r2[i] {
return false
}
}
return true
}
// runesIndex is the equivalent of strings.Index for rune slices.
func runesIndex(r1, r2 []rune) int {
last := len(r1) - len(r2)
for i := 0; i <= last; i++ {
if runesEqual(r1[i:i+len(r2)], r2) {
return i
}
}
return -1
}