mirror of https://github.com/knative/caching.git
Update test-infra (#21)
* Bring in the latest features and fixes. * Use the default build/unit test runners. * Remove test-infra import hack from `Gopack.lock` and `update-deps.sh`
This commit is contained in:
parent
06e9787157
commit
97110f4e0c
|
|
@ -79,6 +79,21 @@
|
|||
pruneopts = "NUT"
|
||||
revision = "24818f796faf91cd76ec7bddd72458fbced7a6c1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:b6b3bd1c08338cb397623d1b9dedde711eccc2d3408fe9017a495d815065d869"
|
||||
name = "github.com/google/licenseclassifier"
|
||||
packages = [
|
||||
".",
|
||||
"internal/sets",
|
||||
"stringclassifier",
|
||||
"stringclassifier/internal/pq",
|
||||
"stringclassifier/searchset",
|
||||
"stringclassifier/searchset/tokenizer",
|
||||
]
|
||||
pruneopts = "NUT"
|
||||
revision = "c2a262e3078ad90718f59866f1ec18601b2fee1b"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:06a7dadb7b760767341ffb6c8d377238d68a1226f2b21b5d497d2e3f6ecf6b4e"
|
||||
name = "github.com/googleapis/gnostic"
|
||||
|
|
@ -133,11 +148,14 @@
|
|||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:09521a823a008f7df66962ac2637c22e1cdc842ebfcc7a083c444d35258986f7"
|
||||
digest = "1:da39b58557275d30a9340c2e1e13e16691461f9859d3230f59cceed411c04b49"
|
||||
name = "github.com/knative/test-infra"
|
||||
packages = ["."]
|
||||
pruneopts = "T"
|
||||
revision = "7ed32409fa2c447a44a4281f0022ab25ce955f51"
|
||||
packages = [
|
||||
"scripts",
|
||||
"tools/dep-collector",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "89e4aae358be056ee70b595c20106a4a5c70fdc1"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:2f42fa12d6911c7b7659738758631bec870b7e9b4c6be5444f963cdcfccc191f"
|
||||
|
|
@ -171,6 +189,14 @@
|
|||
revision = "5f041e8faa004a95c88a202771f4cc3e991971e6"
|
||||
version = "v2.0.1"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:d917313f309bda80d27274d53985bc65651f81a5b66b820749ac7f8ef061fd04"
|
||||
name = "github.com/sergi/go-diff"
|
||||
packages = ["diffmatchpatch"]
|
||||
pruneopts = "NUT"
|
||||
revision = "1744e2970ca51c86172c8190fadad617561ed6e7"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:9d8420bbf131d1618bde6530af37c3799340d3762cc47210c1d9532a4c3a2779"
|
||||
name = "github.com/spf13/pflag"
|
||||
|
|
@ -452,7 +478,8 @@
|
|||
"github.com/google/go-cmp/cmp",
|
||||
"github.com/knative/pkg/apis",
|
||||
"github.com/knative/pkg/kmeta",
|
||||
"github.com/knative/test-infra",
|
||||
"github.com/knative/test-infra/scripts",
|
||||
"github.com/knative/test-infra/tools/dep-collector",
|
||||
"k8s.io/api/core/v1",
|
||||
"k8s.io/apimachinery/pkg/api/equality",
|
||||
"k8s.io/apimachinery/pkg/api/errors",
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@ required = [
|
|||
"k8s.io/code-generator/cmd/client-gen",
|
||||
"k8s.io/code-generator/cmd/lister-gen",
|
||||
"k8s.io/code-generator/cmd/informer-gen",
|
||||
"github.com/knative/test-infra",
|
||||
"github.com/knative/test-infra/scripts",
|
||||
"github.com/knative/test-infra/tools/dep-collector",
|
||||
]
|
||||
|
||||
[[override]]
|
||||
|
|
@ -45,5 +46,4 @@ required = [
|
|||
|
||||
[[prune.project]]
|
||||
name = "github.com/knative/test-infra"
|
||||
unused-packages = false
|
||||
non-go = false
|
||||
|
|
|
|||
|
|
@ -27,7 +27,3 @@ dep ensure
|
|||
|
||||
rm -rf $(find vendor/ -name 'OWNERS')
|
||||
rm -rf $(find vendor/ -name '*_test.go')
|
||||
|
||||
# Keep the only dir in knative/test-infra we're interested in
|
||||
find vendor/github.com/knative/test-infra -mindepth 1 -maxdepth 1 ! -name scripts -exec rm -fr {} \;
|
||||
|
||||
|
|
|
|||
|
|
@ -23,24 +23,8 @@
|
|||
|
||||
source $(dirname $0)/../vendor/github.com/knative/test-infra/scripts/presubmit-tests.sh
|
||||
|
||||
function build_tests() {
|
||||
header "Running build tests"
|
||||
local result=0
|
||||
go build -v ./pkg/... || result=1
|
||||
# TODO(mattmoor): integration tests
|
||||
|
||||
subheader "Checking autogenerated code is up-to-date"
|
||||
./hack/verify-codegen.sh || result=1
|
||||
|
||||
return ${result}
|
||||
}
|
||||
|
||||
function unit_tests() {
|
||||
header "Running unit tests"
|
||||
report_go_test ./...
|
||||
}
|
||||
|
||||
function integration_tests() {
|
||||
header "TODO(mattmoor): integration tests"
|
||||
}
|
||||
# We use the default build, unit and integration test runners.
|
||||
|
||||
main $@
|
||||
|
|
|
|||
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -0,0 +1,429 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package licenseclassifier provides methods to identify the open source
|
||||
// license that most closely matches an unknown license.
|
||||
package licenseclassifier
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"fmt"
|
||||
"html"
|
||||
"io"
|
||||
"math"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode"
|
||||
|
||||
"github.com/google/licenseclassifier/stringclassifier"
|
||||
"github.com/google/licenseclassifier/stringclassifier/searchset"
|
||||
)
|
||||
|
||||
// DefaultConfidenceThreshold is the minimum confidence percentage we're willing to accept in order
|
||||
// to say that a match is good.
|
||||
const DefaultConfidenceThreshold = 0.80
|
||||
|
||||
var (
|
||||
// Normalizers is a list of functions that get applied to the strings
|
||||
// before they are registered with the string classifier.
|
||||
Normalizers = []stringclassifier.NormalizeFunc{
|
||||
html.UnescapeString,
|
||||
removeShebangLine,
|
||||
RemoveNonWords,
|
||||
NormalizeEquivalentWords,
|
||||
NormalizePunctuation,
|
||||
strings.ToLower,
|
||||
removeIgnorableTexts,
|
||||
stringclassifier.FlattenWhitespace,
|
||||
strings.TrimSpace,
|
||||
}
|
||||
|
||||
// commonLicenseWords are words that are common to all known licenses.
|
||||
// If an unknown text doesn't have at least one of these, then we can
|
||||
// ignore it.
|
||||
commonLicenseWords = []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)\bcode\b`),
|
||||
regexp.MustCompile(`(?i)\blicense\b`),
|
||||
regexp.MustCompile(`(?i)\boriginal\b`),
|
||||
regexp.MustCompile(`(?i)\brights\b`),
|
||||
regexp.MustCompile(`(?i)\bsoftware\b`),
|
||||
regexp.MustCompile(`(?i)\bterms\b`),
|
||||
regexp.MustCompile(`(?i)\bversion\b`),
|
||||
regexp.MustCompile(`(?i)\bwork\b`),
|
||||
}
|
||||
)
|
||||
|
||||
// License is a classifier pre-loaded with known open source licenses.
|
||||
type License struct {
|
||||
c *stringclassifier.Classifier
|
||||
|
||||
// Threshold is the lowest confidence percentage acceptable for the
|
||||
// classifier.
|
||||
Threshold float64
|
||||
}
|
||||
|
||||
// New creates a license classifier and pre-loads it with known open source licenses.
|
||||
func New(threshold float64) (*License, error) {
|
||||
classifier := &License{
|
||||
c: stringclassifier.New(threshold, Normalizers...),
|
||||
Threshold: threshold,
|
||||
}
|
||||
if err := classifier.registerLicenses(LicenseArchive); err != nil {
|
||||
return nil, fmt.Errorf("cannot register licenses: %v", err)
|
||||
}
|
||||
return classifier, nil
|
||||
}
|
||||
|
||||
// NewWithForbiddenLicenses creates a license classifier and pre-loads it with
|
||||
// known open source licenses which are forbidden.
|
||||
func NewWithForbiddenLicenses(threshold float64) (*License, error) {
|
||||
classifier := &License{
|
||||
c: stringclassifier.New(threshold, Normalizers...),
|
||||
Threshold: threshold,
|
||||
}
|
||||
if err := classifier.registerLicenses(ForbiddenLicenseArchive); err != nil {
|
||||
return nil, fmt.Errorf("cannot register licenses: %v", err)
|
||||
}
|
||||
return classifier, nil
|
||||
}
|
||||
|
||||
// WithinConfidenceThreshold returns true if the confidence value is above or
|
||||
// equal to the confidence threshold.
|
||||
func (c *License) WithinConfidenceThreshold(conf float64) bool {
|
||||
return conf > c.Threshold || math.Abs(conf-c.Threshold) < math.SmallestNonzeroFloat64
|
||||
}
|
||||
|
||||
// NearestMatch returns the "nearest" match to the given set of known licenses.
|
||||
// Returned are the name of the license, and a confidence percentage indicating
|
||||
// how confident the classifier is in the result.
|
||||
func (c *License) NearestMatch(contents string) *stringclassifier.Match {
|
||||
if !c.hasCommonLicenseWords(contents) {
|
||||
return nil
|
||||
}
|
||||
m := c.c.NearestMatch(contents)
|
||||
m.Name = strings.TrimSuffix(m.Name, ".header")
|
||||
return m
|
||||
}
|
||||
|
||||
// MultipleMatch matches all licenses within an unknown text.
|
||||
func (c *License) MultipleMatch(contents string, includeHeaders bool) stringclassifier.Matches {
|
||||
norm := normalizeText(contents)
|
||||
if !c.hasCommonLicenseWords(norm) {
|
||||
return nil
|
||||
}
|
||||
|
||||
m := make(map[stringclassifier.Match]bool)
|
||||
var matches stringclassifier.Matches
|
||||
for _, v := range c.c.MultipleMatch(norm) {
|
||||
if !c.WithinConfidenceThreshold(v.Confidence) {
|
||||
continue
|
||||
}
|
||||
|
||||
if !includeHeaders && strings.HasSuffix(v.Name, ".header") {
|
||||
continue
|
||||
}
|
||||
|
||||
v.Name = strings.TrimSuffix(v.Name, ".header")
|
||||
if re, ok := forbiddenRegexps[v.Name]; ok && !re.MatchString(norm) {
|
||||
continue
|
||||
}
|
||||
if _, ok := m[*v]; !ok {
|
||||
m[*v] = true
|
||||
matches = append(matches, v)
|
||||
}
|
||||
}
|
||||
sort.Sort(matches)
|
||||
return matches
|
||||
}
|
||||
|
||||
func normalizeText(s string) string {
|
||||
for _, n := range Normalizers {
|
||||
s = n(s)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// hasCommonLicenseWords returns true if the unknown text has at least one word
|
||||
// that's common to all licenses.
|
||||
func (c *License) hasCommonLicenseWords(s string) bool {
|
||||
for _, re := range commonLicenseWords {
|
||||
if re.MatchString(s) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type archivedValue struct {
|
||||
name string
|
||||
normalized string
|
||||
set *searchset.SearchSet
|
||||
}
|
||||
|
||||
// registerLicenses loads all known licenses and adds them to c as known values
|
||||
// for comparison. The allocated space after ingesting the 'licenses.db'
|
||||
// archive is ~167M.
|
||||
func (c *License) registerLicenses(archive string) error {
|
||||
contents, err := ReadLicenseFile(archive)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
reader := bytes.NewReader(contents)
|
||||
gr, err := gzip.NewReader(reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer gr.Close()
|
||||
|
||||
tr := tar.NewReader(gr)
|
||||
|
||||
var muVals sync.Mutex
|
||||
var vals []archivedValue
|
||||
for i := 0; ; i++ {
|
||||
hdr, err := tr.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
name := strings.TrimSuffix(hdr.Name, ".txt")
|
||||
|
||||
// Read normalized value.
|
||||
var b bytes.Buffer
|
||||
if _, err := io.Copy(&b, tr); err != nil {
|
||||
return err
|
||||
}
|
||||
normalized := b.String()
|
||||
b.Reset()
|
||||
|
||||
// Read precomputed hashes.
|
||||
hdr, err = tr.Next()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := io.Copy(&b, tr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var set searchset.SearchSet
|
||||
searchset.Deserialize(&b, &set)
|
||||
|
||||
muVals.Lock()
|
||||
vals = append(vals, archivedValue{name, normalized, &set})
|
||||
muVals.Unlock()
|
||||
}
|
||||
|
||||
for _, v := range vals {
|
||||
if err = c.c.AddPrecomputedValue(v.name, v.normalized, v.set); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// endOfLicenseText is text commonly associated with the end of a license. We
|
||||
// can remove text that occurs after it.
|
||||
var endOfLicenseText = []string{
|
||||
"END OF TERMS AND CONDITIONS",
|
||||
}
|
||||
|
||||
// TrimExtraneousTrailingText removes text after an obvious end of the license
|
||||
// and does not include substantive text of the license.
|
||||
func TrimExtraneousTrailingText(s string) string {
|
||||
for _, e := range endOfLicenseText {
|
||||
if i := strings.LastIndex(s, e); i != -1 {
|
||||
return s[:i+len(e)]
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
var copyrightRE = regexp.MustCompile(`(?m)(?i:Copyright)\s+(?i:©\s+|\(c\)\s+)?(?:\d{2,4})(?:[-,]\s*\d{2,4})*,?\s*(?i:by)?\s*(.*?(?i:\s+Inc\.)?)[.,]?\s*(?i:All rights reserved\.?)?\s*$`)
|
||||
|
||||
// CopyrightHolder finds a copyright notification, if it exists, and returns
|
||||
// the copyright holder.
|
||||
func CopyrightHolder(contents string) string {
|
||||
matches := copyrightRE.FindStringSubmatch(contents)
|
||||
if len(matches) == 2 {
|
||||
return matches[1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var publicDomainRE = regexp.MustCompile("(?i)(this file )?is( in the)? public domain")
|
||||
|
||||
// HasPublicDomainNotice performs a simple regex over the contents to see if a
|
||||
// public domain notice is in there. As you can imagine, this isn't 100%
|
||||
// definitive, but can be useful if a license match isn't found.
|
||||
func (c *License) HasPublicDomainNotice(contents string) bool {
|
||||
return publicDomainRE.FindString(contents) != ""
|
||||
}
|
||||
|
||||
// ignorableTexts is a list of lines at the start of the string we can remove
|
||||
// to get a cleaner match.
|
||||
var ignorableTexts = []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)^(?:the )?mit license(?: \(mit\))?$`),
|
||||
regexp.MustCompile(`(?i)^(?:new )?bsd license$`),
|
||||
regexp.MustCompile(`(?i)^copyright and permission notice$`),
|
||||
regexp.MustCompile(`(?i)^copyright (\(c\) )?(\[yyyy\]|\d{4})[,.]? .*$`),
|
||||
regexp.MustCompile(`(?i)^(all|some) rights reserved\.?$`),
|
||||
regexp.MustCompile(`(?i)^@license$`),
|
||||
regexp.MustCompile(`^\s*$`),
|
||||
}
|
||||
|
||||
// removeIgnorableTexts removes common text, which is not important for
|
||||
// classification, that shows up before the body of the license.
|
||||
func removeIgnorableTexts(s string) string {
|
||||
lines := strings.Split(strings.TrimRight(s, "\n"), "\n")
|
||||
var start int
|
||||
for ; start < len(lines); start++ {
|
||||
line := strings.TrimSpace(lines[start])
|
||||
var matches bool
|
||||
for _, re := range ignorableTexts {
|
||||
if re.MatchString(line) {
|
||||
matches = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matches {
|
||||
break
|
||||
}
|
||||
}
|
||||
end := len(lines)
|
||||
if start > end {
|
||||
return "\n"
|
||||
}
|
||||
return strings.Join(lines[start:end], "\n") + "\n"
|
||||
}
|
||||
|
||||
// removeShebangLine removes the '#!...' line if it's the first line in the
|
||||
// file. Note that if it's the only line in a comment, it won't be removed.
|
||||
func removeShebangLine(s string) string {
|
||||
lines := strings.Split(s, "\n")
|
||||
if len(lines) <= 1 || !strings.HasPrefix(lines[0], "#!") {
|
||||
return s
|
||||
}
|
||||
|
||||
return strings.Join(lines[1:], "\n")
|
||||
}
|
||||
|
||||
// isDecorative returns true if the line is made up purely of non-letter and
|
||||
// non-digit characters.
|
||||
func isDecorative(s string) bool {
|
||||
for _, c := range s {
|
||||
if unicode.IsLetter(c) || unicode.IsDigit(c) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var nonWords = regexp.MustCompile("[[:punct:]]+")
|
||||
|
||||
// RemoveNonWords removes non-words from the string.
|
||||
func RemoveNonWords(s string) string {
|
||||
return nonWords.ReplaceAllString(s, " ")
|
||||
}
|
||||
|
||||
// interchangeablePunctutation is punctuation that can be normalized.
|
||||
var interchangeablePunctuation = []struct {
|
||||
interchangeable *regexp.Regexp
|
||||
substitute string
|
||||
}{
|
||||
// Hyphen, Dash, En Dash, and Em Dash.
|
||||
{regexp.MustCompile(`[-‒–—]`), "-"},
|
||||
// Single, Double, Curly Single, and Curly Double.
|
||||
{regexp.MustCompile("['\"`‘’“”]"), "'"},
|
||||
// Copyright.
|
||||
{regexp.MustCompile("©"), "(c)"},
|
||||
// Hyphen-separated words.
|
||||
{regexp.MustCompile(`(\S)-\s+(\S)`), "${1}-${2}"},
|
||||
// Currency and Section. (Different copies of the CDDL use each marker.)
|
||||
{regexp.MustCompile("[§¤]"), "(s)"},
|
||||
// Middle Dot
|
||||
{regexp.MustCompile("·"), "*"},
|
||||
}
|
||||
|
||||
// NormalizePunctuation takes all hyphens and quotes and normalizes them.
|
||||
func NormalizePunctuation(s string) string {
|
||||
for _, iw := range interchangeablePunctuation {
|
||||
s = iw.interchangeable.ReplaceAllString(s, iw.substitute)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// interchangeableWords are words we can substitute for a normalized form
|
||||
// without changing the meaning of the license. See
|
||||
// https://spdx.org/spdx-license-list/matching-guidelines for the list.
|
||||
var interchangeableWords = []struct {
|
||||
interchangeable *regexp.Regexp
|
||||
substitute string
|
||||
}{
|
||||
{regexp.MustCompile("(?i)Acknowledgment"), "Acknowledgement"},
|
||||
{regexp.MustCompile("(?i)Analogue"), "Analog"},
|
||||
{regexp.MustCompile("(?i)Analyse"), "Analyze"},
|
||||
{regexp.MustCompile("(?i)Artefact"), "Artifact"},
|
||||
{regexp.MustCompile("(?i)Authorisation"), "Authorization"},
|
||||
{regexp.MustCompile("(?i)Authorised"), "Authorized"},
|
||||
{regexp.MustCompile("(?i)Calibre"), "Caliber"},
|
||||
{regexp.MustCompile("(?i)Cancelled"), "Canceled"},
|
||||
{regexp.MustCompile("(?i)Capitalisations"), "Capitalizations"},
|
||||
{regexp.MustCompile("(?i)Catalogue"), "Catalog"},
|
||||
{regexp.MustCompile("(?i)Categorise"), "Categorize"},
|
||||
{regexp.MustCompile("(?i)Centre"), "Center"},
|
||||
{regexp.MustCompile("(?i)Emphasised"), "Emphasized"},
|
||||
{regexp.MustCompile("(?i)Favour"), "Favor"},
|
||||
{regexp.MustCompile("(?i)Favourite"), "Favorite"},
|
||||
{regexp.MustCompile("(?i)Fulfil"), "Fulfill"},
|
||||
{regexp.MustCompile("(?i)Fulfilment"), "Fulfillment"},
|
||||
{regexp.MustCompile("(?i)Initialise"), "Initialize"},
|
||||
{regexp.MustCompile("(?i)Judgment"), "Judgement"},
|
||||
{regexp.MustCompile("(?i)Labelling"), "Labeling"},
|
||||
{regexp.MustCompile("(?i)Labour"), "Labor"},
|
||||
{regexp.MustCompile("(?i)Licence"), "License"},
|
||||
{regexp.MustCompile("(?i)Maximise"), "Maximize"},
|
||||
{regexp.MustCompile("(?i)Modelled"), "Modeled"},
|
||||
{regexp.MustCompile("(?i)Modelling"), "Modeling"},
|
||||
{regexp.MustCompile("(?i)Offence"), "Offense"},
|
||||
{regexp.MustCompile("(?i)Optimise"), "Optimize"},
|
||||
{regexp.MustCompile("(?i)Organisation"), "Organization"},
|
||||
{regexp.MustCompile("(?i)Organise"), "Organize"},
|
||||
{regexp.MustCompile("(?i)Practise"), "Practice"},
|
||||
{regexp.MustCompile("(?i)Programme"), "Program"},
|
||||
{regexp.MustCompile("(?i)Realise"), "Realize"},
|
||||
{regexp.MustCompile("(?i)Recognise"), "Recognize"},
|
||||
{regexp.MustCompile("(?i)Signalling"), "Signaling"},
|
||||
{regexp.MustCompile("(?i)Sub[- ]license"), "Sublicense"},
|
||||
{regexp.MustCompile("(?i)Utilisation"), "Utilization"},
|
||||
{regexp.MustCompile("(?i)Whilst"), "While"},
|
||||
{regexp.MustCompile("(?i)Wilful"), "Wilfull"},
|
||||
{regexp.MustCompile("(?i)Non-commercial"), "Noncommercial"},
|
||||
{regexp.MustCompile("(?i)Per cent"), "Percent"},
|
||||
}
|
||||
|
||||
// NormalizeEquivalentWords normalizes equivalent words that are interchangeable.
|
||||
func NormalizeEquivalentWords(s string) string {
|
||||
for _, iw := range interchangeableWords {
|
||||
s = iw.interchangeable.ReplaceAllString(s, iw.substitute)
|
||||
}
|
||||
return s
|
||||
}
|
||||
65
vendor/github.com/google/licenseclassifier/file_system_resources.go
generated
vendored
Normal file
65
vendor/github.com/google/licenseclassifier/file_system_resources.go
generated
vendored
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package licenseclassifier
|
||||
|
||||
import (
|
||||
"go/build"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
const (
|
||||
// LicenseDirectory is the directory where the prototype licenses are kept.
|
||||
LicenseDirectory = "src/github.com/google/licenseclassifier/licenses"
|
||||
// LicenseArchive is the name of the archive containing preprocessed
|
||||
// license texts.
|
||||
LicenseArchive = "licenses.db"
|
||||
// ForbiddenLicenseArchive is the name of the archive containing preprocessed
|
||||
// forbidden license texts only.
|
||||
ForbiddenLicenseArchive = "forbidden_licenses.db"
|
||||
)
|
||||
|
||||
func findInGOPATH(rel string) (fullPath string, err error) {
|
||||
for _, path := range filepath.SplitList(build.Default.GOPATH) {
|
||||
fullPath := filepath.Join(path, rel)
|
||||
if _, err := os.Stat(fullPath); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
return fullPath, nil
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// ReadLicenseFile locates and reads the license file.
|
||||
func ReadLicenseFile(filename string) ([]byte, error) {
|
||||
archive, err := findInGOPATH(filepath.Join(LicenseDirectory, filename))
|
||||
if err != nil || archive == "" {
|
||||
return nil, err
|
||||
}
|
||||
return ioutil.ReadFile(archive)
|
||||
}
|
||||
|
||||
// ReadLicenseDir reads directory containing the license files.
|
||||
func ReadLicenseDir() ([]os.FileInfo, error) {
|
||||
filename, err := findInGOPATH(filepath.Join(LicenseDirectory, LicenseArchive))
|
||||
if err != nil || filename == "" {
|
||||
return nil, err
|
||||
}
|
||||
return ioutil.ReadDir(filepath.Dir(filename))
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package licenseclassifier
|
||||
|
||||
import "regexp"
|
||||
|
||||
var (
|
||||
reCCBYNC = regexp.MustCompile(`(?i).*\bAttribution NonCommercial\b.*`)
|
||||
reCCBYNCND = regexp.MustCompile(`(?i).*\bAttribution NonCommercial NoDerivs\b.*`)
|
||||
reCCBYNCSA = regexp.MustCompile(`(?i).*\bAttribution NonCommercial ShareAlike\b.*`)
|
||||
|
||||
// forbiddenRegexps are regular expressions we expect to find in
|
||||
// forbidden licenses. If we think we have a forbidden license but
|
||||
// don't find the equivalent phrase, then it's probably just a
|
||||
// misclassification.
|
||||
forbiddenRegexps = map[string]*regexp.Regexp{
|
||||
AGPL10: regexp.MustCompile(`(?i).*\bAFFERO GENERAL PUBLIC LICENSE\b.*`),
|
||||
AGPL30: regexp.MustCompile(`(?i).*\bGNU AFFERO GENERAL PUBLIC LICENSE\b.*`),
|
||||
CCBYNC10: reCCBYNC,
|
||||
CCBYNC20: reCCBYNC,
|
||||
CCBYNC25: reCCBYNC,
|
||||
CCBYNC30: reCCBYNC,
|
||||
CCBYNC40: reCCBYNC,
|
||||
CCBYNCND10: regexp.MustCompile(`(?i).*\bAttribution NoDerivs NonCommercial\b.*`),
|
||||
CCBYNCND20: reCCBYNCND,
|
||||
CCBYNCND25: reCCBYNCND,
|
||||
CCBYNCND30: reCCBYNCND,
|
||||
CCBYNCND40: regexp.MustCompile(`(?i).*\bAttribution NonCommercial NoDerivatives\b.*`),
|
||||
CCBYNCSA10: reCCBYNCSA,
|
||||
CCBYNCSA20: reCCBYNCSA,
|
||||
CCBYNCSA25: reCCBYNCSA,
|
||||
CCBYNCSA30: reCCBYNCSA,
|
||||
CCBYNCSA40: reCCBYNCSA,
|
||||
WTFPL: regexp.MustCompile(`(?i).*\bDO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE\b.*`),
|
||||
}
|
||||
)
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package sets provides sets for storing collections of unique elements.
|
||||
package sets
|
||||
|
||||
// present is an empty struct used as the "value" in the map[int], since
|
||||
// empty structs consume zero bytes (unlike 1 unnecessary byte per bool).
|
||||
type present struct{}
|
||||
228
vendor/github.com/google/licenseclassifier/internal/sets/stringset.go
generated
vendored
Normal file
228
vendor/github.com/google/licenseclassifier/internal/sets/stringset.go
generated
vendored
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
package sets
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// StringSet stores a set of unique string elements.
|
||||
type StringSet struct {
|
||||
set map[string]present
|
||||
}
|
||||
|
||||
// NewStringSet creates a StringSet containing the supplied initial string elements.
|
||||
func NewStringSet(elements ...string) *StringSet {
|
||||
s := &StringSet{}
|
||||
s.set = make(map[string]present)
|
||||
s.Insert(elements...)
|
||||
return s
|
||||
}
|
||||
|
||||
// Copy returns a newly allocated copy of the supplied StringSet.
|
||||
func (s *StringSet) Copy() *StringSet {
|
||||
c := NewStringSet()
|
||||
if s != nil {
|
||||
for e := range s.set {
|
||||
c.set[e] = present{}
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// Insert zero or more string elements into the StringSet.
|
||||
// As expected for a Set, elements already present in the StringSet are
|
||||
// simply ignored.
|
||||
func (s *StringSet) Insert(elements ...string) {
|
||||
for _, e := range elements {
|
||||
s.set[e] = present{}
|
||||
}
|
||||
}
|
||||
|
||||
// Delete zero or more string elements from the StringSet.
|
||||
// Any elements not present in the StringSet are simply ignored.
|
||||
func (s *StringSet) Delete(elements ...string) {
|
||||
for _, e := range elements {
|
||||
delete(s.set, e)
|
||||
}
|
||||
}
|
||||
|
||||
// Intersect returns a new StringSet containing the intersection of the
|
||||
// receiver and argument StringSets. Returns an empty set if the argument is nil.
|
||||
func (s *StringSet) Intersect(other *StringSet) *StringSet {
|
||||
if other == nil {
|
||||
return NewStringSet()
|
||||
}
|
||||
|
||||
// Point a and b to the maps, setting a to the smaller of the two.
|
||||
a, b := s.set, other.set
|
||||
if len(b) < len(a) {
|
||||
a, b = b, a
|
||||
}
|
||||
|
||||
// Perform the intersection.
|
||||
intersect := NewStringSet()
|
||||
for e := range a {
|
||||
if _, ok := b[e]; ok {
|
||||
intersect.set[e] = present{}
|
||||
}
|
||||
}
|
||||
return intersect
|
||||
}
|
||||
|
||||
// Disjoint returns true if the intersection of the receiver and the argument
|
||||
// StringSets is the empty set. Returns true if the argument is nil or either
|
||||
// StringSet is the empty set.
|
||||
func (s *StringSet) Disjoint(other *StringSet) bool {
|
||||
if other == nil || len(other.set) == 0 || len(s.set) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Point a and b to the maps, setting a to the smaller of the two.
|
||||
a, b := s.set, other.set
|
||||
if len(b) < len(a) {
|
||||
a, b = b, a
|
||||
}
|
||||
|
||||
// Check for non-empty intersection.
|
||||
for e := range a {
|
||||
if _, ok := b[e]; ok {
|
||||
return false // Early-exit because intersecting.
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Difference returns a new StringSet containing the elements in the receiver
|
||||
// that are not present in the argument StringSet. Returns a copy of the
|
||||
// receiver if the argument is nil.
|
||||
func (s *StringSet) Difference(other *StringSet) *StringSet {
|
||||
if other == nil {
|
||||
return s.Copy()
|
||||
}
|
||||
|
||||
// Insert only the elements in the receiver that are not present in the
|
||||
// argument StringSet.
|
||||
diff := NewStringSet()
|
||||
for e := range s.set {
|
||||
if _, ok := other.set[e]; !ok {
|
||||
diff.set[e] = present{}
|
||||
}
|
||||
}
|
||||
return diff
|
||||
}
|
||||
|
||||
// Unique returns a new StringSet containing the elements in the receiver
|
||||
// that are not present in the argument StringSet *and* the elements in the
|
||||
// argument StringSet that are not in the receiver (which is the union of two
|
||||
// disjoint sets). Returns a copy of the
|
||||
// receiver if the argument is nil.
|
||||
func (s *StringSet) Unique(other *StringSet) *StringSet {
|
||||
if other == nil {
|
||||
return s.Copy()
|
||||
}
|
||||
|
||||
sNotInOther := s.Difference(other)
|
||||
otherNotInS := other.Difference(s)
|
||||
|
||||
// Duplicate Union implementation here to avoid extra Copy, since both
|
||||
// sNotInOther and otherNotInS are already copies.
|
||||
unique := sNotInOther
|
||||
for e := range otherNotInS.set {
|
||||
unique.set[e] = present{}
|
||||
}
|
||||
return unique
|
||||
}
|
||||
|
||||
// Equal returns true if the receiver and the argument StringSet contain
|
||||
// exactly the same elements.
|
||||
func (s *StringSet) Equal(other *StringSet) bool {
|
||||
if s == nil || other == nil {
|
||||
return s == nil && other == nil
|
||||
}
|
||||
|
||||
// Two sets of different length cannot have the exact same unique elements.
|
||||
if len(s.set) != len(other.set) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Only one loop is needed. If the two sets are known to be of equal
|
||||
// length, then the two sets are equal only if exactly all of the elements
|
||||
// in the first set are found in the second.
|
||||
for e := range s.set {
|
||||
if _, ok := other.set[e]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Union returns a new StringSet containing the union of the receiver and
|
||||
// argument StringSets. Returns a copy of the receiver if the argument is nil.
|
||||
func (s *StringSet) Union(other *StringSet) *StringSet {
|
||||
union := s.Copy()
|
||||
if other != nil {
|
||||
for e := range other.set {
|
||||
union.set[e] = present{}
|
||||
}
|
||||
}
|
||||
return union
|
||||
}
|
||||
|
||||
// Contains returns true if element is in the StringSet.
|
||||
func (s *StringSet) Contains(element string) bool {
|
||||
_, in := s.set[element]
|
||||
return in
|
||||
}
|
||||
|
||||
// Len returns the number of unique elements in the StringSet.
|
||||
func (s *StringSet) Len() int {
|
||||
return len(s.set)
|
||||
}
|
||||
|
||||
// Empty returns true if the receiver is the empty set.
|
||||
func (s *StringSet) Empty() bool {
|
||||
return len(s.set) == 0
|
||||
}
|
||||
|
||||
// Elements returns a []string of the elements in the StringSet, in no
|
||||
// particular (or consistent) order.
|
||||
func (s *StringSet) Elements() []string {
|
||||
elements := []string{} // Return at least an empty slice rather than nil.
|
||||
for e := range s.set {
|
||||
elements = append(elements, e)
|
||||
}
|
||||
return elements
|
||||
}
|
||||
|
||||
// Sorted returns a sorted []string of the elements in the StringSet.
|
||||
func (s *StringSet) Sorted() []string {
|
||||
elements := s.Elements()
|
||||
sort.Strings(elements)
|
||||
return elements
|
||||
}
|
||||
|
||||
// String formats the StringSet elements as sorted strings, representing them
|
||||
// in "array initializer" syntax.
|
||||
func (s *StringSet) String() string {
|
||||
elements := s.Sorted()
|
||||
var quoted []string
|
||||
for _, e := range elements {
|
||||
quoted = append(quoted, fmt.Sprintf("%q", e))
|
||||
}
|
||||
return fmt.Sprintf("{%s}", strings.Join(quoted, ", "))
|
||||
}
|
||||
|
|
@ -0,0 +1,376 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package licenseclassifier
|
||||
|
||||
// *** NOTE: Update this file when adding a new license. You need to:
|
||||
//
|
||||
// 1. Add the canonical name to the list, and
|
||||
// 2. Categorize the license.
|
||||
|
||||
import "github.com/google/licenseclassifier/internal/sets"
|
||||
|
||||
// Canonical names of the licenses.
|
||||
const (
|
||||
// The names come from the https://spdx.org/licenses website, and are
|
||||
// also the filenames of the licenses in licenseclassifier/licenses.
|
||||
AFL11 = "AFL-1.1"
|
||||
AFL12 = "AFL-1.2"
|
||||
AFL20 = "AFL-2.0"
|
||||
AFL21 = "AFL-2.1"
|
||||
AFL30 = "AFL-3.0"
|
||||
AGPL10 = "AGPL-1.0"
|
||||
AGPL30 = "AGPL-3.0"
|
||||
Apache10 = "Apache-1.0"
|
||||
Apache11 = "Apache-1.1"
|
||||
Apache20 = "Apache-2.0"
|
||||
APSL10 = "APSL-1.0"
|
||||
APSL11 = "APSL-1.1"
|
||||
APSL12 = "APSL-1.2"
|
||||
APSL20 = "APSL-2.0"
|
||||
Artistic10cl8 = "Artistic-1.0-cl8"
|
||||
Artistic10Perl = "Artistic-1.0-Perl"
|
||||
Artistic10 = "Artistic-1.0"
|
||||
Artistic20 = "Artistic-2.0"
|
||||
BCL = "BCL"
|
||||
Beerware = "Beerware"
|
||||
BSD2ClauseFreeBSD = "BSD-2-Clause-FreeBSD"
|
||||
BSD2ClauseNetBSD = "BSD-2-Clause-NetBSD"
|
||||
BSD2Clause = "BSD-2-Clause"
|
||||
BSD3ClauseAttribution = "BSD-3-Clause-Attribution"
|
||||
BSD3ClauseClear = "BSD-3-Clause-Clear"
|
||||
BSD3ClauseLBNL = "BSD-3-Clause-LBNL"
|
||||
BSD3Clause = "BSD-3-Clause"
|
||||
BSD4Clause = "BSD-4-Clause"
|
||||
BSD4ClauseUC = "BSD-4-Clause-UC"
|
||||
BSDProtection = "BSD-Protection"
|
||||
BSL10 = "BSL-1.0"
|
||||
CC010 = "CC0-1.0"
|
||||
CCBY10 = "CC-BY-1.0"
|
||||
CCBY20 = "CC-BY-2.0"
|
||||
CCBY25 = "CC-BY-2.5"
|
||||
CCBY30 = "CC-BY-3.0"
|
||||
CCBY40 = "CC-BY-4.0"
|
||||
CCBYNC10 = "CC-BY-NC-1.0"
|
||||
CCBYNC20 = "CC-BY-NC-2.0"
|
||||
CCBYNC25 = "CC-BY-NC-2.5"
|
||||
CCBYNC30 = "CC-BY-NC-3.0"
|
||||
CCBYNC40 = "CC-BY-NC-4.0"
|
||||
CCBYNCND10 = "CC-BY-NC-ND-1.0"
|
||||
CCBYNCND20 = "CC-BY-NC-ND-2.0"
|
||||
CCBYNCND25 = "CC-BY-NC-ND-2.5"
|
||||
CCBYNCND30 = "CC-BY-NC-ND-3.0"
|
||||
CCBYNCND40 = "CC-BY-NC-ND-4.0"
|
||||
CCBYNCSA10 = "CC-BY-NC-SA-1.0"
|
||||
CCBYNCSA20 = "CC-BY-NC-SA-2.0"
|
||||
CCBYNCSA25 = "CC-BY-NC-SA-2.5"
|
||||
CCBYNCSA30 = "CC-BY-NC-SA-3.0"
|
||||
CCBYNCSA40 = "CC-BY-NC-SA-4.0"
|
||||
CCBYND10 = "CC-BY-ND-1.0"
|
||||
CCBYND20 = "CC-BY-ND-2.0"
|
||||
CCBYND25 = "CC-BY-ND-2.5"
|
||||
CCBYND30 = "CC-BY-ND-3.0"
|
||||
CCBYND40 = "CC-BY-ND-4.0"
|
||||
CCBYSA10 = "CC-BY-SA-1.0"
|
||||
CCBYSA20 = "CC-BY-SA-2.0"
|
||||
CCBYSA25 = "CC-BY-SA-2.5"
|
||||
CCBYSA30 = "CC-BY-SA-3.0"
|
||||
CCBYSA40 = "CC-BY-SA-4.0"
|
||||
CDDL10 = "CDDL-1.0"
|
||||
CDDL11 = "CDDL-1.1"
|
||||
CommonsClause = "Commons-Clause"
|
||||
CPAL10 = "CPAL-1.0"
|
||||
CPL10 = "CPL-1.0"
|
||||
eGenix = "eGenix"
|
||||
EPL10 = "EPL-1.0"
|
||||
EUPL10 = "EUPL-1.0"
|
||||
EUPL11 = "EUPL-1.1"
|
||||
Facebook2Clause = "Facebook-2-Clause"
|
||||
Facebook3Clause = "Facebook-3-Clause"
|
||||
FacebookExamples = "Facebook-Examples"
|
||||
FreeImage = "FreeImage"
|
||||
FTL = "FTL"
|
||||
GPL10 = "GPL-1.0"
|
||||
GPL20 = "GPL-2.0"
|
||||
GPL20withautoconfexception = "GPL-2.0-with-autoconf-exception"
|
||||
GPL20withbisonexception = "GPL-2.0-with-bison-exception"
|
||||
GPL20withclasspathexception = "GPL-2.0-with-classpath-exception"
|
||||
GPL20withfontexception = "GPL-2.0-with-font-exception"
|
||||
GPL20withGCCexception = "GPL-2.0-with-GCC-exception"
|
||||
GPL30 = "GPL-3.0"
|
||||
GPL30withautoconfexception = "GPL-3.0-with-autoconf-exception"
|
||||
GPL30withGCCexception = "GPL-3.0-with-GCC-exception"
|
||||
GUSTFont = "GUST-Font-License"
|
||||
ImageMagick = "ImageMagick"
|
||||
IPL10 = "IPL-1.0"
|
||||
ISC = "ISC"
|
||||
LGPL20 = "LGPL-2.0"
|
||||
LGPL21 = "LGPL-2.1"
|
||||
LGPL30 = "LGPL-3.0"
|
||||
LGPLLR = "LGPLLR"
|
||||
Libpng = "Libpng"
|
||||
Lil10 = "Lil-1.0"
|
||||
LPL102 = "LPL-1.02"
|
||||
LPL10 = "LPL-1.0"
|
||||
LPPL13c = "LPPL-1.3c"
|
||||
MIT = "MIT"
|
||||
MPL10 = "MPL-1.0"
|
||||
MPL11 = "MPL-1.1"
|
||||
MPL20 = "MPL-2.0"
|
||||
MSPL = "MS-PL"
|
||||
NCSA = "NCSA"
|
||||
NPL10 = "NPL-1.0"
|
||||
NPL11 = "NPL-1.1"
|
||||
OFL = "OFL"
|
||||
OpenSSL = "OpenSSL"
|
||||
OSL10 = "OSL-1.0"
|
||||
OSL11 = "OSL-1.1"
|
||||
OSL20 = "OSL-2.0"
|
||||
OSL21 = "OSL-2.1"
|
||||
OSL30 = "OSL-3.0"
|
||||
PHP301 = "PHP-3.01"
|
||||
PHP30 = "PHP-3.0"
|
||||
PIL = "PIL"
|
||||
Python20complete = "Python-2.0-complete"
|
||||
Python20 = "Python-2.0"
|
||||
QPL10 = "QPL-1.0"
|
||||
Ruby = "Ruby"
|
||||
SGIB10 = "SGI-B-1.0"
|
||||
SGIB11 = "SGI-B-1.1"
|
||||
SGIB20 = "SGI-B-2.0"
|
||||
SISSL12 = "SISSL-1.2"
|
||||
SISSL = "SISSL"
|
||||
Sleepycat = "Sleepycat"
|
||||
UnicodeTOU = "Unicode-TOU"
|
||||
Unlicense = "Unlicense"
|
||||
W3C19980720 = "W3C-19980720"
|
||||
W3C = "W3C"
|
||||
WTFPL = "WTFPL"
|
||||
X11 = "X11"
|
||||
Xnet = "Xnet"
|
||||
Zend20 = "Zend-2.0"
|
||||
ZlibAcknowledgement = "zlib-acknowledgement"
|
||||
Zlib = "Zlib"
|
||||
ZPL11 = "ZPL-1.1"
|
||||
ZPL20 = "ZPL-2.0"
|
||||
ZPL21 = "ZPL-2.1"
|
||||
)
|
||||
|
||||
var (
|
||||
// Licenses Categorized by Type
|
||||
|
||||
// restricted - Licenses in this category require mandatory source
|
||||
// distribution if we ships a product that includes third-party code
|
||||
// protected by such a license.
|
||||
restrictedType = sets.NewStringSet(
|
||||
BCL,
|
||||
CCBYND10,
|
||||
CCBYND20,
|
||||
CCBYND25,
|
||||
CCBYND30,
|
||||
CCBYND40,
|
||||
CCBYSA10,
|
||||
CCBYSA20,
|
||||
CCBYSA25,
|
||||
CCBYSA30,
|
||||
CCBYSA40,
|
||||
GPL10,
|
||||
GPL20,
|
||||
GPL20withautoconfexception,
|
||||
GPL20withbisonexception,
|
||||
GPL20withclasspathexception,
|
||||
GPL20withfontexception,
|
||||
GPL20withGCCexception,
|
||||
GPL30,
|
||||
GPL30withautoconfexception,
|
||||
GPL30withGCCexception,
|
||||
LGPL20,
|
||||
LGPL21,
|
||||
LGPL30,
|
||||
NPL10,
|
||||
NPL11,
|
||||
OSL10,
|
||||
OSL11,
|
||||
OSL20,
|
||||
OSL21,
|
||||
OSL30,
|
||||
QPL10,
|
||||
Sleepycat,
|
||||
)
|
||||
|
||||
// reciprocal - These licenses allow usage of software made available
|
||||
// under such licenses freely in *unmodified* form. If the third-party
|
||||
// source code is modified in any way these modifications to the
|
||||
// original third-party source code must be made available.
|
||||
reciprocalType = sets.NewStringSet(
|
||||
APSL10,
|
||||
APSL11,
|
||||
APSL12,
|
||||
APSL20,
|
||||
CDDL10,
|
||||
CDDL11,
|
||||
CPL10,
|
||||
EPL10,
|
||||
FreeImage,
|
||||
IPL10,
|
||||
MPL10,
|
||||
MPL11,
|
||||
MPL20,
|
||||
Ruby,
|
||||
)
|
||||
|
||||
// notice - These licenses contain few restrictions, allowing original
|
||||
// or modified third-party software to be shipped in any product
|
||||
// without endangering or encumbering our source code. All of the
|
||||
// licenses in this category do, however, have an "original Copyright
|
||||
// notice" or "advertising clause", wherein any external distributions
|
||||
// must include the notice or clause specified in the license.
|
||||
noticeType = sets.NewStringSet(
|
||||
AFL11,
|
||||
AFL12,
|
||||
AFL20,
|
||||
AFL21,
|
||||
AFL30,
|
||||
Apache10,
|
||||
Apache11,
|
||||
Apache20,
|
||||
Artistic10cl8,
|
||||
Artistic10Perl,
|
||||
Artistic10,
|
||||
Artistic20,
|
||||
BSL10,
|
||||
BSD2ClauseFreeBSD,
|
||||
BSD2ClauseNetBSD,
|
||||
BSD2Clause,
|
||||
BSD3ClauseAttribution,
|
||||
BSD3ClauseClear,
|
||||
BSD3ClauseLBNL,
|
||||
BSD3Clause,
|
||||
BSD4Clause,
|
||||
BSD4ClauseUC,
|
||||
BSDProtection,
|
||||
CCBY10,
|
||||
CCBY20,
|
||||
CCBY25,
|
||||
CCBY30,
|
||||
CCBY40,
|
||||
FTL,
|
||||
ISC,
|
||||
ImageMagick,
|
||||
Libpng,
|
||||
Lil10,
|
||||
LPL102,
|
||||
LPL10,
|
||||
MSPL,
|
||||
MIT,
|
||||
NCSA,
|
||||
OpenSSL,
|
||||
PHP301,
|
||||
PHP30,
|
||||
PIL,
|
||||
Python20,
|
||||
Python20complete,
|
||||
SGIB10,
|
||||
SGIB11,
|
||||
SGIB20,
|
||||
UnicodeTOU,
|
||||
W3C19980720,
|
||||
W3C,
|
||||
X11,
|
||||
Xnet,
|
||||
Zend20,
|
||||
ZlibAcknowledgement,
|
||||
Zlib,
|
||||
ZPL11,
|
||||
ZPL20,
|
||||
ZPL21,
|
||||
)
|
||||
|
||||
// permissive - These licenses can be used in (relatively rare) cases
|
||||
// where third-party software is under a license (not "Public Domain"
|
||||
// or "free for any use" like 'unencumbered') that is even more lenient
|
||||
// than a 'notice' license. Use the 'permissive' license type when even
|
||||
// a copyright notice is not required for license compliance.
|
||||
permissiveType = sets.NewStringSet()
|
||||
|
||||
// unencumbered - Licenses that basically declare that the code is "free for any use".
|
||||
unencumberedType = sets.NewStringSet(
|
||||
CC010,
|
||||
Unlicense,
|
||||
)
|
||||
|
||||
// byexceptiononly - Licenses that are incompatible with all (or most)
|
||||
// uses in combination with our source code. Commercial third-party
|
||||
// packages that are purchased and licensed only for a specific use
|
||||
// fall into this category.
|
||||
byExceptionOnlyType = sets.NewStringSet(
|
||||
Beerware,
|
||||
OFL,
|
||||
)
|
||||
|
||||
// forbidden - Licenses that are forbidden to be used.
|
||||
forbiddenType = sets.NewStringSet(
|
||||
AGPL10,
|
||||
AGPL30,
|
||||
CCBYNC10,
|
||||
CCBYNC20,
|
||||
CCBYNC25,
|
||||
CCBYNC30,
|
||||
CCBYNC40,
|
||||
CCBYNCND10,
|
||||
CCBYNCND20,
|
||||
CCBYNCND25,
|
||||
CCBYNCND30,
|
||||
CCBYNCND40,
|
||||
CCBYNCSA10,
|
||||
CCBYNCSA20,
|
||||
CCBYNCSA25,
|
||||
CCBYNCSA30,
|
||||
CCBYNCSA40,
|
||||
CommonsClause,
|
||||
Facebook2Clause,
|
||||
Facebook3Clause,
|
||||
FacebookExamples,
|
||||
WTFPL,
|
||||
)
|
||||
|
||||
// LicenseTypes is a set of the types of licenses Google recognizes.
|
||||
LicenseTypes = sets.NewStringSet(
|
||||
"restricted",
|
||||
"reciprocal",
|
||||
"notice",
|
||||
"permissive",
|
||||
"unencumbered",
|
||||
"by_exception_only",
|
||||
)
|
||||
)
|
||||
|
||||
// LicenseType returns the type the license has.
|
||||
func LicenseType(name string) string {
|
||||
switch {
|
||||
case restrictedType.Contains(name):
|
||||
return "restricted"
|
||||
case reciprocalType.Contains(name):
|
||||
return "reciprocal"
|
||||
case noticeType.Contains(name):
|
||||
return "notice"
|
||||
case permissiveType.Contains(name):
|
||||
return "permissive"
|
||||
case unencumberedType.Contains(name):
|
||||
return "unencumbered"
|
||||
case forbiddenType.Contains(name):
|
||||
return "FORBIDDEN"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or distribute
|
||||
this software, either in source code form or as a compiled binary, for any
|
||||
purpose, commercial or non-commercial, and by any means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors of this
|
||||
software dedicate any and all copyright interest in the software to the public
|
||||
domain. We make this dedication for the benefit of the public at large and to
|
||||
the detriment of our heirs and
|
||||
|
||||
successors. We intend this dedication to be an overt act of relinquishment in
|
||||
perpetuity of all present and future rights to this software under copyright
|
||||
law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
||||
|
||||
Binary file not shown.
202
vendor/github.com/google/licenseclassifier/stringclassifier/LICENSE
generated
vendored
Normal file
202
vendor/github.com/google/licenseclassifier/stringclassifier/LICENSE
generated
vendored
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
560
vendor/github.com/google/licenseclassifier/stringclassifier/classifier.go
generated
vendored
Normal file
560
vendor/github.com/google/licenseclassifier/stringclassifier/classifier.go
generated
vendored
Normal file
|
|
@ -0,0 +1,560 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package stringclassifier finds the nearest match between a string and a set of known values. It
|
||||
// uses the Levenshtein Distance (LD) algorithm to determine this. A match with a large LD is less
|
||||
// likely to be correct than one with a small LD. A confidence percentage is returned, which
|
||||
// indicates how confident the algorithm is that the match is correct. The higher the percentage,
|
||||
// the greater the confidence that the match is correct.
|
||||
//
|
||||
// Example Usage:
|
||||
//
|
||||
// type Text struct {
|
||||
// Name string
|
||||
// Text string
|
||||
// }
|
||||
//
|
||||
// func NewClassifier(knownTexts []Text) (*stringclassifier.Classifier, error) {
|
||||
// sc := stringclassifier.New(stringclassifier.FlattenWhitespace)
|
||||
// for _, known := range knownTexts {
|
||||
// if err := sc.AddValue(known.Name, known.Text); err != nil {
|
||||
// return nil, err
|
||||
// }
|
||||
// }
|
||||
// return sc, nil
|
||||
// }
|
||||
//
|
||||
// func IdentifyTexts(sc *stringclassifier.Classifier, unknownTexts []*Text) {
|
||||
// for _, unknown := range unknownTexts {
|
||||
// m := sc.NearestMatch(unknown.Text)
|
||||
// log.Printf("The nearest match to %q is %q (confidence: %v)",
|
||||
// unknown.Name, m.Name, m.Confidence)
|
||||
// }
|
||||
// }
|
||||
package stringclassifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"regexp"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/google/licenseclassifier/stringclassifier/internal/pq"
|
||||
"github.com/google/licenseclassifier/stringclassifier/searchset"
|
||||
"github.com/sergi/go-diff/diffmatchpatch"
|
||||
)
|
||||
|
||||
// The diff/match/patch algorithm.
|
||||
var dmp = diffmatchpatch.New()
|
||||
|
||||
const (
|
||||
// DefaultConfidenceThreshold is the minimum ratio threshold between
|
||||
// the matching range and the full source range that we're willing to
|
||||
// accept in order to say that the matching range will produce a
|
||||
// sufficiently good edit distance. I.e., if the matching range is
|
||||
// below this threshold we won't run the Levenshtein Distance algorithm
|
||||
// on it.
|
||||
DefaultConfidenceThreshold float64 = 0.80
|
||||
|
||||
defaultMinDiffRatio float64 = 0.75
|
||||
)
|
||||
|
||||
// A Classifier matches a string to a set of known values.
|
||||
type Classifier struct {
|
||||
muValues sync.RWMutex
|
||||
values map[string]*knownValue
|
||||
normalizers []NormalizeFunc
|
||||
threshold float64
|
||||
|
||||
// MinDiffRatio defines the minimum ratio of the length difference
|
||||
// allowed to consider a known value a possible match. This is used as
|
||||
// a performance optimization to eliminate values that are unlikely to
|
||||
// be a match.
|
||||
//
|
||||
// For example, a value of 0.75 means that the shorter string must be
|
||||
// at least 75% the length of the longer string to consider it a
|
||||
// possible match.
|
||||
//
|
||||
// Setting this to 1.0 will require that strings are identical length.
|
||||
// Setting this to 0 will consider all known values as possible
|
||||
// matches.
|
||||
MinDiffRatio float64
|
||||
}
|
||||
|
||||
// NormalizeFunc is a function that is used to normalize a string prior to comparison.
|
||||
type NormalizeFunc func(string) string
|
||||
|
||||
// New creates a new Classifier with the provided NormalizeFuncs. Each
|
||||
// NormalizeFunc is applied in order to a string before comparison.
|
||||
func New(threshold float64, funcs ...NormalizeFunc) *Classifier {
|
||||
return &Classifier{
|
||||
values: make(map[string]*knownValue),
|
||||
normalizers: append([]NormalizeFunc(nil), funcs...),
|
||||
threshold: threshold,
|
||||
MinDiffRatio: defaultMinDiffRatio,
|
||||
}
|
||||
}
|
||||
|
||||
// knownValue identifies a value in the corpus to match against.
|
||||
type knownValue struct {
|
||||
key string
|
||||
normalizedValue string
|
||||
reValue *regexp.Regexp
|
||||
set *searchset.SearchSet
|
||||
}
|
||||
|
||||
// AddValue adds a known value to be matched against. If a value already exists
|
||||
// for key, an error is returned.
|
||||
func (c *Classifier) AddValue(key, value string) error {
|
||||
c.muValues.Lock()
|
||||
defer c.muValues.Unlock()
|
||||
if _, ok := c.values[key]; ok {
|
||||
return fmt.Errorf("value already registered with key %q", key)
|
||||
}
|
||||
norm := c.normalize(value)
|
||||
c.values[key] = &knownValue{
|
||||
key: key,
|
||||
normalizedValue: norm,
|
||||
reValue: regexp.MustCompile(norm),
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddPrecomputedValue adds a known value to be matched against. The value has
|
||||
// already been normalized and the SearchSet object deserialized, so no
|
||||
// processing is necessary.
|
||||
func (c *Classifier) AddPrecomputedValue(key, value string, set *searchset.SearchSet) error {
|
||||
c.muValues.Lock()
|
||||
defer c.muValues.Unlock()
|
||||
if _, ok := c.values[key]; ok {
|
||||
return fmt.Errorf("value already registered with key %q", key)
|
||||
}
|
||||
set.GenerateNodeList()
|
||||
c.values[key] = &knownValue{
|
||||
key: key,
|
||||
normalizedValue: value,
|
||||
reValue: regexp.MustCompile(value),
|
||||
set: set,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// normalize a string by applying each of the registered NormalizeFuncs.
|
||||
func (c *Classifier) normalize(s string) string {
|
||||
for _, fn := range c.normalizers {
|
||||
s = fn(s)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Match identifies the result of matching a string against a knownValue.
|
||||
type Match struct {
|
||||
Name string // Name of knownValue that was matched
|
||||
Confidence float64 // Confidence percentage
|
||||
Offset int // The offset into the unknown string the match was made
|
||||
Extent int // The length from the offset into the unknown string
|
||||
}
|
||||
|
||||
// Matches is a list of Match-es. This is here mainly so that the list can be
|
||||
// sorted.
|
||||
type Matches []*Match
|
||||
|
||||
func (m Matches) Len() int { return len(m) }
|
||||
func (m Matches) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
|
||||
func (m Matches) Less(i, j int) bool {
|
||||
if math.Abs(m[j].Confidence-m[i].Confidence) < math.SmallestNonzeroFloat64 {
|
||||
if m[i].Name == m[j].Name {
|
||||
if m[i].Offset > m[j].Offset {
|
||||
return false
|
||||
}
|
||||
if m[i].Offset == m[j].Offset {
|
||||
return m[i].Extent > m[j].Extent
|
||||
}
|
||||
return true
|
||||
}
|
||||
return m[i].Name < m[j].Name
|
||||
}
|
||||
return m[i].Confidence > m[j].Confidence
|
||||
}
|
||||
|
||||
// Names returns an unsorted slice of the names of the matched licenses.
|
||||
func (m Matches) Names() []string {
|
||||
var names []string
|
||||
for _, n := range m {
|
||||
names = append(names, n.Name)
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
// uniquify goes through the matches and removes any that are contained within
|
||||
// one with a higher confidence. This assumes that Matches is sorted.
|
||||
func (m Matches) uniquify() Matches {
|
||||
type matchedRange struct {
|
||||
offset, extent int
|
||||
}
|
||||
|
||||
var matched []matchedRange
|
||||
var matches Matches
|
||||
OUTER:
|
||||
for _, match := range m {
|
||||
for _, mr := range matched {
|
||||
if match.Offset >= mr.offset && match.Offset <= mr.offset+mr.extent {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
matched = append(matched, matchedRange{match.Offset, match.Extent})
|
||||
matches = append(matches, match)
|
||||
}
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
// NearestMatch returns the name of the known value that most closely matches
|
||||
// the unknown string and a confidence percentage is returned indicating how
|
||||
// confident the classifier is in the result. A percentage of "1.0" indicates
|
||||
// an exact match, while a percentage of "0.0" indicates a complete mismatch.
|
||||
//
|
||||
// If the string is equidistant from multiple known values, it is undefined
|
||||
// which will be returned.
|
||||
func (c *Classifier) NearestMatch(s string) *Match {
|
||||
pq := c.nearestMatch(s)
|
||||
if pq.Len() == 0 {
|
||||
return &Match{}
|
||||
}
|
||||
return pq.Pop().(*Match)
|
||||
}
|
||||
|
||||
// MultipleMatch tries to determine which known strings are found within an
|
||||
// unknown string. This differs from "NearestMatch" in that it looks only at
|
||||
// those areas within the unknown string that are likely to match. A list of
|
||||
// potential matches are returned. It's up to the caller to determine which
|
||||
// ones are acceptable.
|
||||
func (c *Classifier) MultipleMatch(s string) (matches Matches) {
|
||||
pq := c.multipleMatch(s)
|
||||
if pq == nil {
|
||||
return matches
|
||||
}
|
||||
|
||||
// A map to remove duplicate entries.
|
||||
m := make(map[Match]bool)
|
||||
|
||||
for pq.Len() != 0 {
|
||||
v := pq.Pop().(*Match)
|
||||
if _, ok := m[*v]; !ok {
|
||||
m[*v] = true
|
||||
matches = append(matches, v)
|
||||
}
|
||||
}
|
||||
|
||||
sort.Sort(matches)
|
||||
return matches.uniquify()
|
||||
}
|
||||
|
||||
// possibleMatch identifies a known value and it's diffRatio to a given string.
|
||||
type possibleMatch struct {
|
||||
value *knownValue
|
||||
diffRatio float64
|
||||
}
|
||||
|
||||
// likelyMatches is a slice of possibleMatches that can be sorted by their
|
||||
// diffRatio to a given string, such that the most likely matches (based on
|
||||
// length) are at the beginning.
|
||||
type likelyMatches []possibleMatch
|
||||
|
||||
func (m likelyMatches) Len() int { return len(m) }
|
||||
func (m likelyMatches) Less(i, j int) bool { return m[i].diffRatio > m[j].diffRatio }
|
||||
func (m likelyMatches) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
|
||||
|
||||
// nearestMatch returns a Queue of values that the unknown string may be. The
|
||||
// values are compared via their Levenshtein Distance and ranked with the
|
||||
// nearest match at the beginning.
|
||||
func (c *Classifier) nearestMatch(unknown string) *pq.Queue {
|
||||
var mu sync.Mutex // Protect the priority queue.
|
||||
pq := pq.NewQueue(func(x, y interface{}) bool {
|
||||
return x.(*Match).Confidence > y.(*Match).Confidence
|
||||
}, nil)
|
||||
|
||||
unknown = c.normalize(unknown)
|
||||
if len(unknown) == 0 {
|
||||
return pq
|
||||
}
|
||||
|
||||
c.muValues.RLock()
|
||||
var likely likelyMatches
|
||||
for _, v := range c.values {
|
||||
dr := diffRatio(unknown, v.normalizedValue)
|
||||
if dr < c.MinDiffRatio {
|
||||
continue
|
||||
}
|
||||
if unknown == v.normalizedValue {
|
||||
// We found an exact match.
|
||||
pq.Push(&Match{Name: v.key, Confidence: 1.0, Offset: 0, Extent: len(unknown)})
|
||||
c.muValues.RUnlock()
|
||||
return pq
|
||||
}
|
||||
likely = append(likely, possibleMatch{value: v, diffRatio: dr})
|
||||
}
|
||||
c.muValues.RUnlock()
|
||||
sort.Sort(likely)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
classifyString := func(name, unknown, known string) {
|
||||
defer wg.Done()
|
||||
|
||||
diffs := dmp.DiffMain(unknown, known, true)
|
||||
distance := dmp.DiffLevenshtein(diffs)
|
||||
confidence := confidencePercentage(len(unknown), len(known), distance)
|
||||
if confidence > 0.0 {
|
||||
mu.Lock()
|
||||
pq.Push(&Match{Name: name, Confidence: confidence, Offset: 0, Extent: len(unknown)})
|
||||
mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
wg.Add(len(likely))
|
||||
for _, known := range likely {
|
||||
go classifyString(known.value.key, unknown, known.value.normalizedValue)
|
||||
}
|
||||
wg.Wait()
|
||||
return pq
|
||||
}
|
||||
|
||||
// matcher finds all potential matches of "known" in "unknown". The results are
|
||||
// placed in "queue".
|
||||
type matcher struct {
|
||||
unknown *searchset.SearchSet
|
||||
normUnknown string
|
||||
threshold float64
|
||||
|
||||
mu sync.Mutex
|
||||
queue *pq.Queue
|
||||
}
|
||||
|
||||
// newMatcher creates a "matcher" object.
|
||||
func newMatcher(unknown string, threshold float64) *matcher {
|
||||
return &matcher{
|
||||
unknown: searchset.New(unknown, searchset.DefaultGranularity),
|
||||
normUnknown: unknown,
|
||||
threshold: threshold,
|
||||
queue: pq.NewQueue(func(x, y interface{}) bool {
|
||||
return x.(*Match).Confidence > y.(*Match).Confidence
|
||||
}, nil),
|
||||
}
|
||||
}
|
||||
|
||||
// findMatches takes a known text and finds all potential instances of it in
|
||||
// the unknown text. The resulting matches can then filtered to determine which
|
||||
// are the best matches.
|
||||
func (m *matcher) findMatches(known *knownValue) {
|
||||
var mrs []searchset.MatchRanges
|
||||
if all := known.reValue.FindAllStringIndex(m.normUnknown, -1); all != nil {
|
||||
// We found exact matches. Just use those!
|
||||
for _, a := range all {
|
||||
var start, end int
|
||||
for i, tok := range m.unknown.Tokens {
|
||||
if tok.Offset == a[0] {
|
||||
start = i
|
||||
} else if tok.Offset >= a[len(a)-1]-len(tok.Text) {
|
||||
end = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
mrs = append(mrs, searchset.MatchRanges{{
|
||||
SrcStart: 0,
|
||||
SrcEnd: len(known.set.Tokens),
|
||||
TargetStart: start,
|
||||
TargetEnd: end + 1,
|
||||
}})
|
||||
}
|
||||
} else {
|
||||
// No exact match. Perform a more thorough match.
|
||||
mrs = searchset.FindPotentialMatches(known.set, m.unknown)
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for _, mr := range mrs {
|
||||
if !m.withinConfidenceThreshold(known.set, mr) {
|
||||
continue
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func(mr searchset.MatchRanges) {
|
||||
start, end := mr.TargetRange(m.unknown)
|
||||
conf := levDist(m.normUnknown[start:end], known.normalizedValue)
|
||||
if conf > 0.0 {
|
||||
m.mu.Lock()
|
||||
m.queue.Push(&Match{Name: known.key, Confidence: conf, Offset: start, Extent: end - start})
|
||||
m.mu.Unlock()
|
||||
}
|
||||
wg.Done()
|
||||
}(mr)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// withinConfidenceThreshold returns the Confidence we have in the potential
|
||||
// match. It does this by calculating the ratio of what's matching to the
|
||||
// original known text.
|
||||
func (m *matcher) withinConfidenceThreshold(known *searchset.SearchSet, mr searchset.MatchRanges) bool {
|
||||
return float64(mr.Size())/float64(len(known.Tokens)) >= m.threshold
|
||||
}
|
||||
|
||||
// multipleMatch returns a Queue of values that might be within the unknown
|
||||
// string. The values are compared via their Levenshtein Distance and ranked
|
||||
// with the nearest match at the beginning.
|
||||
func (c *Classifier) multipleMatch(unknown string) *pq.Queue {
|
||||
normUnknown := c.normalize(unknown)
|
||||
if normUnknown == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
m := newMatcher(normUnknown, c.threshold)
|
||||
|
||||
c.muValues.RLock()
|
||||
var kvals []*knownValue
|
||||
for _, known := range c.values {
|
||||
kvals = append(kvals, known)
|
||||
}
|
||||
c.muValues.RUnlock()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(kvals))
|
||||
for _, known := range kvals {
|
||||
go func(known *knownValue) {
|
||||
if known.set == nil {
|
||||
k := searchset.New(known.normalizedValue, searchset.DefaultGranularity)
|
||||
c.muValues.Lock()
|
||||
c.values[known.key].set = k
|
||||
c.muValues.Unlock()
|
||||
}
|
||||
m.findMatches(known)
|
||||
wg.Done()
|
||||
}(known)
|
||||
}
|
||||
wg.Wait()
|
||||
return m.queue
|
||||
}
|
||||
|
||||
// levDist runs the Levenshtein Distance algorithm on the known and unknown
|
||||
// texts to measure how well they match.
|
||||
func levDist(unknown, known string) float64 {
|
||||
if len(known) == 0 || len(unknown) == 0 {
|
||||
log.Printf("Zero-sized texts in Levenshtein Distance algorithm: known==%d, unknown==%d", len(known), len(unknown))
|
||||
return 0.0
|
||||
}
|
||||
|
||||
// Calculate the differences between the potentially matching known
|
||||
// text and the unknown text.
|
||||
diffs := dmp.DiffMain(unknown, known, false)
|
||||
end := diffRangeEnd(known, diffs)
|
||||
|
||||
// Now execute the Levenshtein Distance algorithm to see how much it
|
||||
// does match.
|
||||
distance := dmp.DiffLevenshtein(diffs[:end])
|
||||
return confidencePercentage(unknownTextLength(unknown, diffs), len(known), distance)
|
||||
}
|
||||
|
||||
// unknownTextLength returns the length of the unknown text based on the diff range.
|
||||
func unknownTextLength(unknown string, diffs []diffmatchpatch.Diff) int {
|
||||
last := len(diffs) - 1
|
||||
for ; last >= 0; last-- {
|
||||
if diffs[last].Type == diffmatchpatch.DiffEqual {
|
||||
break
|
||||
}
|
||||
}
|
||||
ulen := 0
|
||||
for i := 0; i < last+1; i++ {
|
||||
switch diffs[i].Type {
|
||||
case diffmatchpatch.DiffEqual, diffmatchpatch.DiffDelete:
|
||||
ulen += len(diffs[i].Text)
|
||||
}
|
||||
}
|
||||
return ulen
|
||||
}
|
||||
|
||||
// diffRangeEnd returns the end index for the "Diff" objects that constructs
|
||||
// (or nearly constructs) the "known" value.
|
||||
func diffRangeEnd(known string, diffs []diffmatchpatch.Diff) (end int) {
|
||||
var seen string
|
||||
for end = 0; end < len(diffs); end++ {
|
||||
if seen == known {
|
||||
// Once we've constructed the "known" value, then we've
|
||||
// reached the point in the diff list where more
|
||||
// "Diff"s would just make the Levenshtein Distance
|
||||
// less valid. There shouldn't be further "DiffEqual"
|
||||
// nodes, because there's nothing further to match in
|
||||
// the "known" text.
|
||||
break
|
||||
}
|
||||
switch diffs[end].Type {
|
||||
case diffmatchpatch.DiffEqual, diffmatchpatch.DiffInsert:
|
||||
seen += diffs[end].Text
|
||||
}
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// confidencePercentage calculates how confident we are in the result of the
|
||||
// match. A percentage of "1.0" means an identical match. A confidence of "0.0"
|
||||
// means a complete mismatch.
|
||||
func confidencePercentage(ulen, klen, distance int) float64 {
|
||||
if ulen == 0 && klen == 0 {
|
||||
return 1.0
|
||||
}
|
||||
if ulen == 0 || klen == 0 || (distance > ulen && distance > klen) {
|
||||
return 0.0
|
||||
}
|
||||
return 1.0 - float64(distance)/float64(max(ulen, klen))
|
||||
}
|
||||
|
||||
// diffRatio calculates the ratio of the length of s1 and s2, returned as a
|
||||
// percentage of the length of the longer string. E.g., diffLength("abcd", "e")
|
||||
// would return 0.25 because "e" is 25% of the size of "abcd". Comparing
|
||||
// strings of equal length will return 1.
|
||||
func diffRatio(s1, s2 string) float64 {
|
||||
x, y := len(s1), len(s2)
|
||||
if x == 0 && y == 0 {
|
||||
// Both strings are zero length
|
||||
return 1.0
|
||||
}
|
||||
if x < y {
|
||||
return float64(x) / float64(y)
|
||||
}
|
||||
return float64(y) / float64(x)
|
||||
}
|
||||
|
||||
func max(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// wsRegexp is a regexp used to identify blocks of whitespace.
|
||||
var wsRegexp = regexp.MustCompile(`\s+`)
|
||||
|
||||
// FlattenWhitespace will flatten contiguous blocks of whitespace down to a single space.
|
||||
var FlattenWhitespace NormalizeFunc = func(s string) string {
|
||||
return wsRegexp.ReplaceAllString(s, " ")
|
||||
}
|
||||
111
vendor/github.com/google/licenseclassifier/stringclassifier/internal/pq/priority.go
generated
vendored
Normal file
111
vendor/github.com/google/licenseclassifier/stringclassifier/internal/pq/priority.go
generated
vendored
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package pq provides a priority queue.
|
||||
package pq
|
||||
|
||||
import "container/heap"
|
||||
|
||||
// NewQueue returns an unbounded priority queue that compares elements using
|
||||
// less; the minimal element is at the top of the queue.
|
||||
//
|
||||
// If setIndex is not nil, the queue calls setIndex to inform each element of
|
||||
// its position in the queue. If an element's priority changes, its position in
|
||||
// the queue may be incorrect. Call Fix on the element's index to update the
|
||||
// queue. Call Remove on the element's index to remove it from the queue.
|
||||
func NewQueue(less func(x, y interface{}) bool, setIndex func(x interface{}, idx int)) *Queue {
|
||||
return &Queue{
|
||||
heap: pqHeap{
|
||||
less: less,
|
||||
setIndex: setIndex,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Queue is a priority queue that supports updating the priority of an element.
|
||||
// A Queue must be created with NewQueue.
|
||||
type Queue struct {
|
||||
heap pqHeap
|
||||
}
|
||||
|
||||
// Len returns the number of elements in the queue.
|
||||
func (pq *Queue) Len() int {
|
||||
return pq.heap.Len()
|
||||
}
|
||||
|
||||
// Push adds x to the queue.
|
||||
func (pq *Queue) Push(x interface{}) {
|
||||
heap.Push(&pq.heap, x)
|
||||
}
|
||||
|
||||
// Min returns the minimal element.
|
||||
// Min panics if the queue is empty.
|
||||
func (pq *Queue) Min() interface{} {
|
||||
return pq.heap.a[0]
|
||||
}
|
||||
|
||||
// Pop removes and returns the minimal element.
|
||||
// Pop panics if the queue is empty.
|
||||
func (pq *Queue) Pop() interface{} {
|
||||
return heap.Pop(&pq.heap)
|
||||
}
|
||||
|
||||
// Fix adjusts the heap to reflect that the element at index has changed priority.
|
||||
func (pq *Queue) Fix(index int) {
|
||||
heap.Fix(&pq.heap, index)
|
||||
}
|
||||
|
||||
// Remove removes the element at index i from the heap.
|
||||
func (pq *Queue) Remove(index int) {
|
||||
heap.Remove(&pq.heap, index)
|
||||
}
|
||||
|
||||
// pqHeap implements heap.Interface.
|
||||
type pqHeap struct {
|
||||
a []interface{}
|
||||
less func(x, y interface{}) bool
|
||||
setIndex func(x interface{}, idx int)
|
||||
}
|
||||
|
||||
func (h pqHeap) Len() int {
|
||||
return len(h.a)
|
||||
}
|
||||
|
||||
func (h pqHeap) Less(i, j int) bool {
|
||||
return h.less(h.a[i], h.a[j])
|
||||
}
|
||||
|
||||
func (h pqHeap) Swap(i, j int) {
|
||||
h.a[i], h.a[j] = h.a[j], h.a[i]
|
||||
if h.setIndex != nil {
|
||||
h.setIndex(h.a[i], i)
|
||||
h.setIndex(h.a[j], j)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *pqHeap) Push(x interface{}) {
|
||||
n := len(h.a)
|
||||
if h.setIndex != nil {
|
||||
h.setIndex(x, n)
|
||||
}
|
||||
h.a = append(h.a, x)
|
||||
}
|
||||
|
||||
func (h *pqHeap) Pop() interface{} {
|
||||
old := h.a
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
h.a = old[:n-1]
|
||||
return x
|
||||
}
|
||||
491
vendor/github.com/google/licenseclassifier/stringclassifier/searchset/searchset.go
generated
vendored
Normal file
491
vendor/github.com/google/licenseclassifier/stringclassifier/searchset/searchset.go
generated
vendored
Normal file
|
|
@ -0,0 +1,491 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package searchset generates hashes for all substrings of a text. Potential
|
||||
// matches between two SearchSet objects can then be determined quickly.
|
||||
// Generating the hashes can be expensive, so it's best to perform it once. If
|
||||
// the text is part of a known corpus, then the SearchSet can be serialized and
|
||||
// kept in an archive.
|
||||
//
|
||||
// Matching occurs by "mapping" ranges from the source text into the target
|
||||
// text but still retaining the source order:
|
||||
//
|
||||
// SOURCE: |-----------------------------|
|
||||
//
|
||||
// TARGET: |*****************************************|
|
||||
//
|
||||
// MAP SOURCE SECTIONS ONTO TARGET IN SOURCE ORDER:
|
||||
//
|
||||
// S: |-[--]-----[---]------[----]------|
|
||||
// / | \
|
||||
// |---| |---------| |-------------|
|
||||
// T: |*****************************************|
|
||||
//
|
||||
// Note that a single source range may match many different ranges in the
|
||||
// target. The matching algorithm untangles these so that all matched ranges
|
||||
// are in order with respect to the source ranges. This is especially important
|
||||
// since the source text may occur more than once in the target text. The
|
||||
// algorithm finds each potential occurrence of S in T and returns all as
|
||||
// potential matched ranges.
|
||||
package searchset
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
|
||||
"github.com/google/licenseclassifier/stringclassifier/searchset/tokenizer"
|
||||
)
|
||||
|
||||
// DefaultGranularity is the minimum size (in words) of the hash chunks.
|
||||
const DefaultGranularity = 3
|
||||
|
||||
// SearchSet is a set of substrings that have hashes associated with them,
|
||||
// making it fast to search for potential matches.
|
||||
type SearchSet struct {
|
||||
// Tokens is a tokenized list of the original input string.
|
||||
Tokens tokenizer.Tokens
|
||||
// Hashes is a map of checksums to a range of tokens.
|
||||
Hashes tokenizer.Hash
|
||||
// Checksums is a list of checksums ordered from longest range to
|
||||
// shortest.
|
||||
Checksums []uint32
|
||||
// ChecksumRanges are the token ranges for the above checksums.
|
||||
ChecksumRanges tokenizer.TokenRanges
|
||||
|
||||
nodes []*node
|
||||
}
|
||||
|
||||
// node consists of a range of tokens along with the checksum for those tokens.
|
||||
type node struct {
|
||||
checksum uint32
|
||||
tokens *tokenizer.TokenRange
|
||||
}
|
||||
|
||||
func (n *node) String() string {
|
||||
return fmt.Sprintf("[%d:%d]", n.tokens.Start, n.tokens.End)
|
||||
}
|
||||
|
||||
// New creates a new SearchSet object. It generates a hash for each substring of "s".
|
||||
func New(s string, granularity int) *SearchSet {
|
||||
toks := tokenizer.Tokenize(s)
|
||||
|
||||
// Start generating hash values for all substrings within the text.
|
||||
h := make(tokenizer.Hash)
|
||||
checksums, tokenRanges := toks.GenerateHashes(h, func(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}(len(toks), granularity))
|
||||
sset := &SearchSet{
|
||||
Tokens: toks,
|
||||
Hashes: h,
|
||||
Checksums: checksums,
|
||||
ChecksumRanges: tokenRanges,
|
||||
}
|
||||
sset.GenerateNodeList()
|
||||
return sset
|
||||
}
|
||||
|
||||
// GenerateNodeList creates a node list out of the search set.
|
||||
func (s *SearchSet) GenerateNodeList() {
|
||||
if len(s.Tokens) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < len(s.Checksums); i++ {
|
||||
s.nodes = append(s.nodes, &node{
|
||||
checksum: s.Checksums[i],
|
||||
tokens: s.ChecksumRanges[i],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Serialize emits the SearchSet out so that it can be recreated at a later
|
||||
// time.
|
||||
func (s *SearchSet) Serialize(w io.Writer) error {
|
||||
return gob.NewEncoder(w).Encode(s)
|
||||
}
|
||||
|
||||
// Deserialize reads a file with a serialized SearchSet in it and reconstructs it.
|
||||
func Deserialize(r io.Reader, s *SearchSet) error {
|
||||
if err := gob.NewDecoder(r).Decode(&s); err != nil {
|
||||
return err
|
||||
}
|
||||
s.GenerateNodeList()
|
||||
return nil
|
||||
}
|
||||
|
||||
// MatchRange is the range within the source text that is a match to the range
|
||||
// in the target text.
|
||||
type MatchRange struct {
|
||||
// Offsets into the source tokens.
|
||||
SrcStart, SrcEnd int
|
||||
// Offsets into the target tokens.
|
||||
TargetStart, TargetEnd int
|
||||
}
|
||||
|
||||
// in returns true if the start and end are enclosed in the match range.
|
||||
func (m *MatchRange) in(start, end int) bool {
|
||||
return start >= m.TargetStart && end <= m.TargetEnd
|
||||
}
|
||||
|
||||
func (m *MatchRange) String() string {
|
||||
return fmt.Sprintf("[%v, %v)->[%v, %v)", m.SrcStart, m.SrcEnd, m.TargetStart, m.TargetEnd)
|
||||
}
|
||||
|
||||
// MatchRanges is a list of "MatchRange"s. The ranges are monotonically
|
||||
// increasing in value and indicate a single potential occurrence of the source
|
||||
// text in the target text.
|
||||
type MatchRanges []*MatchRange
|
||||
|
||||
func (m MatchRanges) Len() int { return len(m) }
|
||||
func (m MatchRanges) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
|
||||
func (m MatchRanges) Less(i, j int) bool {
|
||||
if m[i].TargetStart < m[j].TargetStart {
|
||||
return true
|
||||
}
|
||||
return m[i].TargetStart == m[j].TargetStart && m[i].SrcStart < m[j].SrcStart
|
||||
}
|
||||
|
||||
// TargetRange is the start and stop token offsets into the target text.
|
||||
func (m MatchRanges) TargetRange(target *SearchSet) (start, end int) {
|
||||
start = target.Tokens[m[0].TargetStart].Offset
|
||||
end = target.Tokens[m[len(m)-1].TargetEnd-1].Offset + len(target.Tokens[m[len(m)-1].TargetEnd-1].Text)
|
||||
return start, end
|
||||
}
|
||||
|
||||
// Size is the number of source tokens that were matched.
|
||||
func (m MatchRanges) Size() int {
|
||||
sum := 0
|
||||
for _, mr := range m {
|
||||
sum += mr.SrcEnd - mr.SrcStart
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// FindPotentialMatches returns the ranges in the target (unknown) text that
|
||||
// are best potential matches to the source (known) text.
|
||||
func FindPotentialMatches(src, target *SearchSet) []MatchRanges {
|
||||
matchedRanges := getMatchedRanges(src, target)
|
||||
if len(matchedRanges) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cleanup the matching ranges so that we get the longest contiguous ranges.
|
||||
for i := 0; i < len(matchedRanges); i++ {
|
||||
matchedRanges[i] = coalesceMatchRanges(matchedRanges[i])
|
||||
}
|
||||
return matchedRanges
|
||||
}
|
||||
|
||||
// getMatchedRanges finds the ranges in the target text that match the source
|
||||
// text. There can be multiple occurrences of the source text within the target
|
||||
// text. Each separate occurrence is an entry in the returned slice.
|
||||
func getMatchedRanges(src, target *SearchSet) []MatchRanges {
|
||||
matched := targetMatchedRanges(src, target)
|
||||
if len(matched) == 0 {
|
||||
return nil
|
||||
}
|
||||
sort.Sort(matched)
|
||||
matched = untangleSourceRanges(matched)
|
||||
matchedRanges := splitRanges(matched)
|
||||
return mergeConsecutiveRanges(matchedRanges)
|
||||
}
|
||||
|
||||
func extendsAny(tr tokenizer.TokenRanges, mr []MatchRanges) bool {
|
||||
if len(mr) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, tv := range tr {
|
||||
for _, mv := range mr {
|
||||
if tv.Start >= mv[0].TargetStart && tv.Start <= mv[len(mv)-1].TargetEnd {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// targetMatchedRanges finds matching sequences in target and src ordered by target position
|
||||
func targetMatchedRanges(src, target *SearchSet) MatchRanges {
|
||||
if src.nodes == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var matched MatchRanges
|
||||
var previous *node
|
||||
var possible []MatchRanges
|
||||
for _, tgtNode := range target.nodes {
|
||||
sr, ok := src.Hashes[tgtNode.checksum]
|
||||
if !ok || (previous != nil && tgtNode.tokens.Start > previous.tokens.End) || !extendsAny(sr, possible) {
|
||||
for _, r := range possible {
|
||||
matched = append(matched, r...)
|
||||
}
|
||||
possible = possible[:0]
|
||||
previous = nil
|
||||
}
|
||||
if !ok {
|
||||
// There isn't a match in the source.
|
||||
continue
|
||||
}
|
||||
|
||||
// Maps index within `possible` to the slice of ranges extended by a new range
|
||||
extended := make(map[int]*MatchRanges)
|
||||
// Go over the set of source ranges growing lists of `possible` match ranges.
|
||||
tv := tgtNode.tokens
|
||||
for _, sv := range sr {
|
||||
r := &MatchRange{
|
||||
SrcStart: sv.Start,
|
||||
SrcEnd: sv.End,
|
||||
TargetStart: tv.Start,
|
||||
TargetEnd: tv.End,
|
||||
}
|
||||
found := false
|
||||
// Grow or extend each abutting `possible` match range.
|
||||
for i, p := range possible {
|
||||
last := p[len(p)-1]
|
||||
if sv.Start >= last.SrcStart && sv.Start <= last.SrcEnd && tv.Start >= last.TargetStart && tv.Start <= last.TargetEnd {
|
||||
found = true
|
||||
possible[i] = append(possible[i], r)
|
||||
extended[i] = &possible[i]
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
// Did not abut any existing ranges, start a new `possible` match range.
|
||||
mrs := make(MatchRanges, 0, 2)
|
||||
mrs = append(mrs, r)
|
||||
possible = append(possible, mrs)
|
||||
extended[len(possible)-1] = &possible[len(possible)-1]
|
||||
}
|
||||
}
|
||||
if len(extended) < len(possible) {
|
||||
// Ranges not extended--add to `matched` if not included in other range.
|
||||
for i := 0; i < len(possible); {
|
||||
_, updated := extended[i]
|
||||
if updated {
|
||||
i++ // Keep in `possible` and advance to next index.
|
||||
continue
|
||||
}
|
||||
p1 := possible[i]
|
||||
found := false // whether found as subrange of another `possible` match.
|
||||
for _, p2 := range extended {
|
||||
if p1[0].SrcStart >= (*p2)[0].SrcStart && p1[0].TargetStart >= (*p2)[0].TargetStart {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
matched = append(matched, p1...)
|
||||
} // else included in other match.
|
||||
// Finished -- delete from `possible` and continue from same index.
|
||||
possible = append(possible[:i], possible[i+1:]...)
|
||||
}
|
||||
}
|
||||
previous = tgtNode
|
||||
}
|
||||
// At end of file, terminate all `possible` match ranges.
|
||||
for i := 0; i < len(possible); i++ {
|
||||
p1 := possible[i]
|
||||
found := false // whether found as subrange of another `possible` match.
|
||||
for j := i + 1; j < len(possible); {
|
||||
p2 := possible[j]
|
||||
if p1[0].SrcStart <= p2[0].SrcStart && p1[0].TargetStart <= p2[0].TargetStart {
|
||||
// Delete later sub-ranges included in this range.
|
||||
possible = append(possible[:j], possible[j+1:]...)
|
||||
continue
|
||||
}
|
||||
// Skip if subrange of a later range
|
||||
if p1[0].SrcStart >= p2[0].SrcStart && p1[0].TargetStart >= p2[0].TargetStart {
|
||||
found = true
|
||||
}
|
||||
j++
|
||||
}
|
||||
if !found {
|
||||
matched = append(matched, p1...)
|
||||
}
|
||||
}
|
||||
return matched
|
||||
}
|
||||
|
||||
// untangleSourceRanges goes through the ranges and removes any whose source
|
||||
// ranges are "out of order". A source range is "out of order" if the source
|
||||
// range is out of sequence with the source ranges before and after it. This
|
||||
// happens when more than one source range maps to the same target range.
|
||||
// E.g.:
|
||||
//
|
||||
// SrcStart: 20, SrcEnd: 30, TargetStart: 127, TargetEnd: 137
|
||||
// 1: SrcStart: 12, SrcEnd: 17, TargetStart: 138, TargetEnd: 143
|
||||
// 2: SrcStart: 32, SrcEnd: 37, TargetStart: 138, TargetEnd: 143
|
||||
// SrcStart: 38, SrcEnd: 40, TargetStart: 144, TargetEnd: 146
|
||||
//
|
||||
// Here (1) is out of order, because the source range [12, 17) is out of
|
||||
// sequence with the surrounding source sequences, but [32, 37) is.
|
||||
func untangleSourceRanges(matched MatchRanges) MatchRanges {
|
||||
mr := MatchRanges{matched[0]}
|
||||
NEXT:
|
||||
for i := 1; i < len(matched); i++ {
|
||||
if mr[len(mr)-1].TargetStart == matched[i].TargetStart && mr[len(mr)-1].TargetEnd == matched[i].TargetEnd {
|
||||
// The matched range has already been added.
|
||||
continue
|
||||
}
|
||||
|
||||
if i+1 < len(matched) && equalTargetRange(matched[i], matched[i+1]) {
|
||||
// A sequence of ranges match the same target range.
|
||||
// Find the first one that has a source range greater
|
||||
// than the currently matched range. Omit all others.
|
||||
if matched[i].SrcStart > mr[len(mr)-1].SrcStart {
|
||||
mr = append(mr, matched[i])
|
||||
continue
|
||||
}
|
||||
|
||||
for j := i + 1; j < len(matched) && equalTargetRange(matched[i], matched[j]); j++ {
|
||||
// Check subsequent ranges to see if we can
|
||||
// find one that matches in the correct order.
|
||||
if matched[j].SrcStart > mr[len(mr)-1].SrcStart {
|
||||
mr = append(mr, matched[j])
|
||||
i = j
|
||||
continue NEXT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mr = append(mr, matched[i])
|
||||
}
|
||||
return mr
|
||||
}
|
||||
|
||||
// equalTargetRange returns true if the two MatchRange's cover the same target range.
|
||||
func equalTargetRange(this, that *MatchRange) bool {
|
||||
return this.TargetStart == that.TargetStart && this.TargetEnd == that.TargetEnd
|
||||
}
|
||||
|
||||
// splitRanges splits the matched ranges so that a single match range has a
|
||||
// monotonically increasing source range (indicating a single, potential
|
||||
// instance of the source in the target).
|
||||
func splitRanges(matched MatchRanges) []MatchRanges {
|
||||
var matchedRanges []MatchRanges
|
||||
mr := MatchRanges{matched[0]}
|
||||
for i := 1; i < len(matched); i++ {
|
||||
if mr[len(mr)-1].SrcStart > matched[i].SrcStart {
|
||||
matchedRanges = append(matchedRanges, mr)
|
||||
mr = MatchRanges{matched[i]}
|
||||
} else {
|
||||
mr = append(mr, matched[i])
|
||||
}
|
||||
}
|
||||
matchedRanges = append(matchedRanges, mr)
|
||||
return matchedRanges
|
||||
}
|
||||
|
||||
// mergeConsecutiveRanges goes through the matched ranges and merges
|
||||
// consecutive ranges. Two ranges are consecutive if the end of the previous
|
||||
// matched range and beginning of the next matched range overlap. "matched"
|
||||
// should have 1 or more MatchRanges, each with one or more MatchRange objects.
|
||||
func mergeConsecutiveRanges(matched []MatchRanges) []MatchRanges {
|
||||
mr := []MatchRanges{matched[0]}
|
||||
|
||||
// Convenience functions.
|
||||
prevMatchedRange := func() MatchRanges {
|
||||
return mr[len(mr)-1]
|
||||
}
|
||||
prevMatchedRangeLastElem := func() *MatchRange {
|
||||
return prevMatchedRange()[len(prevMatchedRange())-1]
|
||||
}
|
||||
|
||||
// This algorithm compares the start of each MatchRanges object to the
|
||||
// end of the previous MatchRanges object. If they overlap, then it
|
||||
// tries to combine them. Note that a 0 offset into a MatchRanges
|
||||
// object (e.g., matched[i][0]) is its first MatchRange, which
|
||||
// indicates the start of the whole matched range.
|
||||
NEXT:
|
||||
for i := 1; i < len(matched); i++ {
|
||||
if prevMatchedRangeLastElem().TargetEnd > matched[i][0].TargetStart {
|
||||
// Consecutive matched ranges overlap. Merge them.
|
||||
if prevMatchedRangeLastElem().TargetStart < matched[i][0].TargetStart {
|
||||
// The last element of the previous matched
|
||||
// range overlaps with the first element of the
|
||||
// current matched range. Concatenate them.
|
||||
if prevMatchedRangeLastElem().TargetEnd < matched[i][0].TargetEnd {
|
||||
prevMatchedRangeLastElem().SrcEnd += matched[i][0].TargetEnd - prevMatchedRangeLastElem().TargetEnd
|
||||
prevMatchedRangeLastElem().TargetEnd = matched[i][0].TargetEnd
|
||||
}
|
||||
mr[len(mr)-1] = append(prevMatchedRange(), matched[i][1:]...)
|
||||
continue
|
||||
}
|
||||
|
||||
for j := 1; j < len(matched[i]); j++ {
|
||||
// Find the positions in the ranges where the
|
||||
// tail end of the previous matched range
|
||||
// overlaps with the start of the next matched
|
||||
// range.
|
||||
for k := len(prevMatchedRange()) - 1; k > 0; k-- {
|
||||
if prevMatchedRange()[k].SrcStart < matched[i][j].SrcStart &&
|
||||
prevMatchedRange()[k].TargetStart < matched[i][j].TargetStart {
|
||||
// Append the next range to the previous range.
|
||||
if prevMatchedRange()[k].TargetEnd < matched[i][j].TargetStart {
|
||||
// Coalesce the ranges.
|
||||
prevMatchedRange()[k].SrcEnd += matched[i][j-1].TargetEnd - prevMatchedRange()[k].TargetEnd
|
||||
prevMatchedRange()[k].TargetEnd = matched[i][j-1].TargetEnd
|
||||
}
|
||||
mr[len(mr)-1] = append(prevMatchedRange()[:k+1], matched[i][j:]...)
|
||||
continue NEXT
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
mr = append(mr, matched[i])
|
||||
}
|
||||
return mr
|
||||
}
|
||||
|
||||
// coalesceMatchRanges coalesces overlapping match ranges into a single
|
||||
// contiguous match range.
|
||||
func coalesceMatchRanges(matchedRanges MatchRanges) MatchRanges {
|
||||
coalesced := MatchRanges{matchedRanges[0]}
|
||||
for i := 1; i < len(matchedRanges); i++ {
|
||||
c := coalesced[len(coalesced)-1]
|
||||
mr := matchedRanges[i]
|
||||
|
||||
if mr.SrcStart <= c.SrcEnd && mr.SrcStart >= c.SrcStart {
|
||||
var se, ts, te int
|
||||
if mr.SrcEnd > c.SrcEnd {
|
||||
se = mr.SrcEnd
|
||||
} else {
|
||||
se = c.SrcEnd
|
||||
}
|
||||
if mr.TargetStart < c.TargetStart {
|
||||
ts = mr.TargetStart
|
||||
} else {
|
||||
ts = c.TargetStart
|
||||
}
|
||||
if mr.TargetEnd > c.TargetEnd {
|
||||
te = mr.TargetEnd
|
||||
} else {
|
||||
te = c.TargetEnd
|
||||
}
|
||||
coalesced[len(coalesced)-1] = &MatchRange{
|
||||
SrcStart: c.SrcStart,
|
||||
SrcEnd: se,
|
||||
TargetStart: ts,
|
||||
TargetEnd: te,
|
||||
}
|
||||
} else {
|
||||
coalesced = append(coalesced, mr)
|
||||
}
|
||||
}
|
||||
return coalesced
|
||||
}
|
||||
175
vendor/github.com/google/licenseclassifier/stringclassifier/searchset/tokenizer/tokenizer.go
generated
vendored
Normal file
175
vendor/github.com/google/licenseclassifier/stringclassifier/searchset/tokenizer/tokenizer.go
generated
vendored
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
// Copyright 2017 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package tokenizer converts a text into a stream of tokens.
|
||||
package tokenizer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"sort"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Token is a non-whitespace sequence (i.e., word or punctuation) in the
|
||||
// original string. This is not meant for use outside of this package.
|
||||
type token struct {
|
||||
Text string
|
||||
Offset int
|
||||
}
|
||||
|
||||
// Tokens is a list of Token objects.
|
||||
type Tokens []*token
|
||||
|
||||
// newToken creates a new token object with an invalid (negative) offset, which
|
||||
// will be set before the token's used.
|
||||
func newToken() *token {
|
||||
return &token{Offset: -1}
|
||||
}
|
||||
|
||||
// Tokenize converts a string into a stream of tokens.
|
||||
func Tokenize(s string) (toks Tokens) {
|
||||
tok := newToken()
|
||||
for i := 0; i < len(s); {
|
||||
r, size := utf8.DecodeRuneInString(s[i:])
|
||||
switch {
|
||||
case unicode.IsSpace(r):
|
||||
if tok.Offset >= 0 {
|
||||
toks = append(toks, tok)
|
||||
tok = newToken()
|
||||
}
|
||||
case unicode.IsPunct(r):
|
||||
if tok.Offset >= 0 {
|
||||
toks = append(toks, tok)
|
||||
tok = newToken()
|
||||
}
|
||||
toks = append(toks, &token{
|
||||
Text: string(r),
|
||||
Offset: i,
|
||||
})
|
||||
default:
|
||||
if tok.Offset == -1 {
|
||||
tok.Offset = i
|
||||
}
|
||||
tok.Text += string(r)
|
||||
}
|
||||
i += size
|
||||
}
|
||||
if tok.Offset != -1 {
|
||||
// Add any remaining token that wasn't yet included in the list.
|
||||
toks = append(toks, tok)
|
||||
}
|
||||
return toks
|
||||
}
|
||||
|
||||
// GenerateHashes generates hashes for "size" length substrings. The
|
||||
// "stringifyTokens" call takes a long time to run, so not all substrings have
|
||||
// hashes, i.e. we skip some of the smaller substrings.
|
||||
func (t Tokens) GenerateHashes(h Hash, size int) ([]uint32, TokenRanges) {
|
||||
if size == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var css []uint32
|
||||
var tr TokenRanges
|
||||
for offset := 0; offset+size <= len(t); offset += size / 2 {
|
||||
var b bytes.Buffer
|
||||
t.stringifyTokens(&b, offset, size)
|
||||
cs := crc32.ChecksumIEEE(b.Bytes())
|
||||
css = append(css, cs)
|
||||
tr = append(tr, &TokenRange{offset, offset + size})
|
||||
h.add(cs, offset, offset+size)
|
||||
if size <= 1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return css, tr
|
||||
}
|
||||
|
||||
// stringifyTokens serializes a sublist of tokens into a bytes buffer.
|
||||
func (t Tokens) stringifyTokens(b *bytes.Buffer, offset, size int) {
|
||||
for j := offset; j < offset+size; j++ {
|
||||
if j != offset {
|
||||
b.WriteRune(' ')
|
||||
}
|
||||
b.WriteString(t[j].Text)
|
||||
}
|
||||
}
|
||||
|
||||
// TokenRange indicates the range of tokens that map to a particular checksum.
|
||||
type TokenRange struct {
|
||||
Start int
|
||||
End int
|
||||
}
|
||||
|
||||
func (t *TokenRange) String() string {
|
||||
return fmt.Sprintf("[%v, %v)", t.Start, t.End)
|
||||
}
|
||||
|
||||
// TokenRanges is a list of TokenRange objects. The chance that two different
|
||||
// strings map to the same checksum is very small, but unfortunately isn't
|
||||
// zero, so we use this instead of making the assumption that they will all be
|
||||
// unique.
|
||||
type TokenRanges []*TokenRange
|
||||
|
||||
func (t TokenRanges) Len() int { return len(t) }
|
||||
func (t TokenRanges) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
|
||||
func (t TokenRanges) Less(i, j int) bool { return t[i].Start < t[j].Start }
|
||||
|
||||
// CombineUnique returns the combination of both token ranges with no duplicates.
|
||||
func (t TokenRanges) CombineUnique(other TokenRanges) TokenRanges {
|
||||
if len(other) == 0 {
|
||||
return t
|
||||
}
|
||||
if len(t) == 0 {
|
||||
return other
|
||||
}
|
||||
|
||||
cu := append(t, other...)
|
||||
sort.Sort(cu)
|
||||
|
||||
if len(cu) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
res := TokenRanges{cu[0]}
|
||||
for prev, i := cu[0], 1; i < len(cu); i++ {
|
||||
if prev.Start != cu[i].Start || prev.End != cu[i].End {
|
||||
res = append(res, cu[i])
|
||||
prev = cu[i]
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// Hash is a map of the hashes of a section of text to the token range covering that text.
|
||||
type Hash map[uint32]TokenRanges
|
||||
|
||||
// add associates a token range, [start, end], to a checksum.
|
||||
func (h Hash) add(checksum uint32, start, end int) {
|
||||
ntr := &TokenRange{Start: start, End: end}
|
||||
if r, ok := h[checksum]; ok {
|
||||
for _, tr := range r {
|
||||
if tr.Start == ntr.Start && tr.End == ntr.End {
|
||||
// The token range already exists at this
|
||||
// checksum. No need to re-add it.
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
h[checksum] = append(h[checksum], ntr)
|
||||
}
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -9,21 +9,35 @@ This is a helper script to run the presubmit tests. To use it:
|
|||
|
||||
1. Source this script.
|
||||
|
||||
1. Define the functions `build_tests()` and `unit_tests()`. They should run all
|
||||
tests (i.e., not fail fast), and return 0 if all passed, 1 if a failure
|
||||
occurred. The environment variables `RUN_BUILD_TESTS`, `RUN_UNIT_TESTS` and
|
||||
`RUN_INTEGRATION_TESTS` are set to 0 (false) or 1 (true) accordingly. If
|
||||
`--emit-metrics` is passed, `EMIT_METRICS` will be set to 1.
|
||||
1. [optional] Define the function `build_tests()`. If you don't define this
|
||||
function, the default action for running the build tests is to:
|
||||
- lint and link check markdown files
|
||||
- run `go build` on the entire repo
|
||||
- run `/hack/verify-codegen.sh` (if it exists)
|
||||
- check licenses in `/cmd` (if it exists)
|
||||
|
||||
1. [optional] Define the function `integration_tests()`, just like the previous
|
||||
ones. If you don't define this function, the default action for running the
|
||||
integration tests is to call the `./test/e2e-tests.sh` script (passing the
|
||||
`--emit-metrics` flag if necessary).
|
||||
1. [optional] Define the functions `pre_build_tests()` and/or
|
||||
`post_build_tests()`. These functions will be called before or after the
|
||||
build tests (either your custom one or the default action) and will cause
|
||||
the test to fail if they don't return success.
|
||||
|
||||
1. [optional] Define the functions `pre_integration_tests()` or
|
||||
`post_integration_tests()`. These functions will be called before or after the
|
||||
integration tests (either your custom one or the default action) and will cause
|
||||
the test to fail if they don't return success.
|
||||
1. [optional] Define the function `unit_tests()`. If you don't define this
|
||||
function, the default action for running the unit tests is to run all go tests
|
||||
in the repo.
|
||||
|
||||
1. [optional] Define the functions `pre_unit_tests()` and/or
|
||||
`post_unit_tests()`. These functions will be called before or after the
|
||||
unit tests (either your custom one or the default action) and will cause
|
||||
the test to fail if they don't return success.
|
||||
|
||||
1. [optional] Define the function `integration_tests()`. If you don't define
|
||||
this function, the default action for running the integration tests is to run
|
||||
all run all `./test/e2e-*tests.sh` scripts, in sequence.
|
||||
|
||||
1. [optional] Define the functions `pre_integration_tests()` and/or
|
||||
`post_integration_tests()`. These functions will be called before or after the
|
||||
integration tests (either your custom one or the default action) and will cause
|
||||
the test to fail if they don't return success.
|
||||
|
||||
1. Call the `main()` function passing `$@` (without quotes).
|
||||
|
||||
|
|
@ -33,20 +47,27 @@ integration tests).
|
|||
|
||||
Use the flags `--build-tests`, `--unit-tests` and `--integration-tests` to run
|
||||
a specific set of tests. The flag `--emit-metrics` is used to emit metrics when
|
||||
running the tests, and is automatically handled by the default action (see
|
||||
above).
|
||||
running the tests, and is automatically handled by the default action for
|
||||
integration tests (see above).
|
||||
|
||||
The script will automatically skip all presubmit tests for PRs where all changed
|
||||
files are exempt of tests (e.g., a PR changing only the `OWNERS` file).
|
||||
|
||||
Also, for PRs touching only markdown files, the unit and integration tests are
|
||||
skipped.
|
||||
|
||||
### Sample presubmit test script
|
||||
|
||||
```bash
|
||||
source vendor/github.com/knative/test-infra/scripts/presubmit-tests.sh
|
||||
|
||||
function build_tests() {
|
||||
go build .
|
||||
function post_build_tests() {
|
||||
echo "Cleaning up after build tests"
|
||||
rm -fr ./build-cache
|
||||
}
|
||||
|
||||
function unit_tests() {
|
||||
report_go_test .
|
||||
make -C tests test
|
||||
}
|
||||
|
||||
function pre_integration_tests() {
|
||||
|
|
@ -66,43 +87,44 @@ This is a helper script for Knative E2E test scripts. To use it:
|
|||
1. Source the script.
|
||||
|
||||
1. [optional] Write the `teardown()` function, which will tear down your test
|
||||
resources.
|
||||
resources.
|
||||
|
||||
1. [optional] Write the `dump_extra_cluster_state()` function. It will be
|
||||
called when a test fails, and can dump extra information about the current state
|
||||
of the cluster (tipically using `kubectl`).
|
||||
called when a test fails, and can dump extra information about the current state
|
||||
of the cluster (tipically using `kubectl`).
|
||||
|
||||
1. [optional] Write the `parse_flags()` function. It will be called whenever an
|
||||
unrecognized flag is passed to the script, allowing you to define your own flags.
|
||||
The function must return 0 if the flag is unrecognized, or the number of items
|
||||
to skip in the command line if the flag was parsed successfully. For example,
|
||||
return 1 for a simple flag, and 2 for a flag with a parameter.
|
||||
unrecognized flag is passed to the script, allowing you to define your own flags.
|
||||
The function must return 0 if the flag is unrecognized, or the number of items
|
||||
to skip in the command line if the flag was parsed successfully. For example,
|
||||
return 1 for a simple flag, and 2 for a flag with a parameter.
|
||||
|
||||
1. Call the `initialize()` function passing `$@` (without quotes).
|
||||
|
||||
1. Write logic for the end-to-end tests. Run all go tests using `go_test_e2e()`
|
||||
(or `report_go_test()` if you need a more fine-grained control) and call
|
||||
`fail_test()` or `success()` if any of them failed. The environment variables
|
||||
`DOCKER_REPO_OVERRIDE`, `K8S_CLUSTER_OVERRIDE` and `K8S_USER_OVERRIDE` will be set
|
||||
according to the test cluster. You can also use the following boolean (0 is false,
|
||||
1 is true) environment variables for the logic:
|
||||
* `EMIT_METRICS`: true if `--emit-metrics` was passed.
|
||||
* `USING_EXISTING_CLUSTER`: true if the test cluster is an already existing one,
|
||||
and not a temporary cluster created by `kubetest`.
|
||||
(or `report_go_test()` if you need a more fine-grained control) and call
|
||||
`fail_test()` or `success()` if any of them failed. The environment variables
|
||||
`DOCKER_REPO_OVERRIDE`, `K8S_CLUSTER_OVERRIDE` and `K8S_USER_OVERRIDE` will be
|
||||
set according to the test cluster. You can also use the following boolean (0 is
|
||||
false, 1 is true) environment variables for the logic:
|
||||
|
||||
All environment variables above are marked read-only.
|
||||
- `EMIT_METRICS`: true if `--emit-metrics` was passed.
|
||||
- `USING_EXISTING_CLUSTER`: true if the test cluster is an already existing one,
|
||||
and not a temporary cluster created by `kubetest`.
|
||||
|
||||
All environment variables above are marked read-only.
|
||||
|
||||
**Notes:**
|
||||
|
||||
1. Calling your script without arguments will create a new cluster in the GCP
|
||||
project `$PROJECT_ID` and run the tests against it.
|
||||
project `$PROJECT_ID` and run the tests against it.
|
||||
|
||||
1. Calling your script with `--run-tests` and the variables `K8S_CLUSTER_OVERRIDE`,
|
||||
`K8S_USER_OVERRIDE` and `DOCKER_REPO_OVERRIDE` set will immediately start the
|
||||
tests against the cluster.
|
||||
`K8S_USER_OVERRIDE` and `DOCKER_REPO_OVERRIDE` set will immediately start the
|
||||
tests against the cluster.
|
||||
|
||||
1. You can force running the tests against a specific GKE cluster version by using
|
||||
the `--cluster-version` flag and passing a X.Y.Z version as the flag value.
|
||||
the `--cluster-version` flag and passing a X.Y.Z version as the flag value.
|
||||
|
||||
### Sample end-to-end test script
|
||||
|
||||
|
|
@ -150,31 +172,38 @@ This is a helper script for Knative release scripts. To use it:
|
|||
1. Call the `initialize()` function passing `$@` (without quotes).
|
||||
|
||||
1. Call the `run_validation_tests()` function passing the script or executable that
|
||||
runs the release validation tests. It will call the script to run the tests unless
|
||||
`--skip_tests` was passed.
|
||||
runs the release validation tests. It will call the script to run the tests unless
|
||||
`--skip_tests` was passed.
|
||||
|
||||
1. Write logic for the release process. Call `publish_yaml()` to publish the manifest(s),
|
||||
`tag_releases_in_yaml()` to tag the generated images, `branch_release()` to branch
|
||||
named releases. Use the following boolean (0 is false, 1 is true) and string environment
|
||||
variables for the logic:
|
||||
* `RELEASE_VERSION`: contains the release version if `--version` was passed. This
|
||||
also overrides the value of the `TAG` variable as `v<version>`.
|
||||
* `RELEASE_BRANCH`: contains the release branch if `--branch` was passed. Otherwise
|
||||
it's empty and `master` HEAD will be considered the release branch.
|
||||
* `RELEASE_NOTES`: contains the filename with the release notes if `--release-notes`
|
||||
was passed. The release notes is a simple markdown file.
|
||||
* `SKIP_TESTS`: true if `--skip-tests` was passed. This is handled automatically
|
||||
by the run_validation_tests() function.
|
||||
* `TAG_RELEASE`: true if `--tag-release` was passed. In this case, the environment
|
||||
variable `TAG` will contain the release tag in the form `vYYYYMMDD-<commit_short_hash>`.
|
||||
* `PUBLISH_RELEASE`: true if `--publish` was passed. In this case, the environment
|
||||
variable `KO_FLAGS` will be updated with the `-L` option.
|
||||
* `BRANCH_RELEASE`: true if both `--version` and `--publish-release` were passed.
|
||||
`tag_releases_in_yaml()` to tag the generated images, `branch_release()` to branch
|
||||
named releases. Use the following boolean (0 is false, 1 is true) and string environment
|
||||
variables for the logic:
|
||||
|
||||
All boolean environment variables default to false for safety.
|
||||
- `RELEASE_VERSION`: contains the release version if `--version` was passed. This
|
||||
also overrides the value of the `TAG` variable as `v<version>`.
|
||||
- `RELEASE_BRANCH`: contains the release branch if `--branch` was passed. Otherwise
|
||||
it's empty and `master` HEAD will be considered the release branch.
|
||||
- `RELEASE_NOTES`: contains the filename with the release notes if `--release-notes`
|
||||
was passed. The release notes is a simple markdown file.
|
||||
- `RELEASE_GCS_BUCKET`: contains the GCS bucket name to store the manifests if
|
||||
`--release-gcs` was passed, otherwise the default value `knative-nightly/<repo>`
|
||||
will be used. It is empty if `--publish` was not passed.
|
||||
- `KO_DOCKER_REPO`: contains the GCR to store the images if `--release-gcr` was
|
||||
passed, otherwise the default value `gcr.io/knative-nightly` will be used. It
|
||||
is set to `ko.local` if `--publish` was not passed.
|
||||
- `SKIP_TESTS`: true if `--skip-tests` was passed. This is handled automatically
|
||||
by the `run_validation_tests()` function.
|
||||
- `TAG_RELEASE`: true if `--tag-release` was passed. In this case, the environment
|
||||
variable `TAG` will contain the release tag in the form `vYYYYMMDD-<commit_short_hash>`.
|
||||
- `PUBLISH_RELEASE`: true if `--publish` was passed. In this case, the environment
|
||||
variable `KO_FLAGS` will be updated with the `-L` option.
|
||||
- `BRANCH_RELEASE`: true if both `--version` and `--publish-release` were passed.
|
||||
|
||||
All environment variables above, except `KO_FLAGS`, are marked read-only once
|
||||
`initialize()` is called.
|
||||
All boolean environment variables default to false for safety.
|
||||
|
||||
All environment variables above, except `KO_FLAGS`, are marked read-only once
|
||||
`initialize()` is called.
|
||||
|
||||
### Sample release script
|
||||
|
||||
|
|
@ -186,14 +215,12 @@ initialize $@
|
|||
run_validation_tests ./test/presubmit-tests.sh
|
||||
|
||||
# config/ contains the manifests
|
||||
KO_DOCKER_REPO=gcr.io/knative-foo
|
||||
ko resolve ${KO_FLAGS} -f config/ > release.yaml
|
||||
|
||||
tag_images_in_yaml release.yaml $KO_DOCKER_REPO $TAG
|
||||
tag_images_in_yaml release.yaml
|
||||
|
||||
if (( PUBLISH_RELEASE )); then
|
||||
# gs://knative-foo hosts the manifest
|
||||
publish_yaml release.yaml knative-foo $TAG
|
||||
publish_yaml release.yaml
|
||||
fi
|
||||
|
||||
branch_release "Knative Foo" release.yaml
|
||||
|
|
|
|||
|
|
@ -36,14 +36,17 @@ function build_resource_name() {
|
|||
}
|
||||
|
||||
# Test cluster parameters
|
||||
readonly E2E_BASE_NAME=k$(basename ${REPO_ROOT_DIR})
|
||||
readonly E2E_BASE_NAME="k${REPO_NAME}"
|
||||
readonly E2E_CLUSTER_NAME=$(build_resource_name e2e-cls)
|
||||
readonly E2E_NETWORK_NAME=$(build_resource_name e2e-net)
|
||||
readonly E2E_CLUSTER_REGION=us-central1
|
||||
readonly E2E_CLUSTER_ZONE=${E2E_CLUSTER_REGION}-a
|
||||
readonly E2E_CLUSTER_NODES=3
|
||||
readonly E2E_CLUSTER_MACHINE=n1-standard-4
|
||||
readonly TEST_RESULT_FILE=/tmp/${E2E_BASE_NAME}-e2e-result
|
||||
# Each knative repository may have a different cluster size requirement here,
|
||||
# so we allow calling code to set these parameters. If they are not set we
|
||||
# use some sane defaults.
|
||||
readonly E2E_MIN_CLUSTER_NODES=${E2E_MIN_CLUSTER_NODES:-1}
|
||||
readonly E2E_MAX_CLUSTER_NODES=${E2E_MAX_CLUSTER_NODES:-3}
|
||||
|
||||
# Flag whether test is using a boskos GCP project
|
||||
IS_BOSKOS=0
|
||||
|
|
@ -60,14 +63,6 @@ function teardown_test_resources() {
|
|||
rm -fr kubernetes kubernetes.tar.gz
|
||||
}
|
||||
|
||||
# Exit test, dumping current state info.
|
||||
# Parameters: $1 - error message (optional).
|
||||
function fail_test() {
|
||||
[[ -n $1 ]] && echo "ERROR: $1"
|
||||
dump_cluster_state
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Run the given E2E tests. Assume tests are tagged e2e, unless `-tags=XXX` is passed.
|
||||
# Parameters: $1..$n - any go test flags, then directories containing the tests to run.
|
||||
function go_test_e2e() {
|
||||
|
|
@ -149,20 +144,21 @@ function create_test_cluster() {
|
|||
set -o pipefail
|
||||
|
||||
header "Creating test cluster"
|
||||
|
||||
echo "Cluster will have a minimum of ${E2E_MIN_CLUSTER_NODES} and a maximum of ${E2E_MAX_CLUSTER_NODES} nodes."
|
||||
|
||||
# Smallest cluster required to run the end-to-end-tests
|
||||
local CLUSTER_CREATION_ARGS=(
|
||||
--gke-create-args="--enable-autoscaling --min-nodes=1 --max-nodes=${E2E_CLUSTER_NODES} --scopes=cloud-platform --enable-basic-auth --no-issue-client-certificate"
|
||||
--gke-shape={\"default\":{\"Nodes\":${E2E_CLUSTER_NODES}\,\"MachineType\":\"${E2E_CLUSTER_MACHINE}\"}}
|
||||
--gke-create-args="--enable-autoscaling --min-nodes=${E2E_MIN_CLUSTER_NODES} --max-nodes=${E2E_MAX_CLUSTER_NODES} --scopes=cloud-platform --enable-basic-auth --no-issue-client-certificate"
|
||||
--gke-shape={\"default\":{\"Nodes\":${E2E_MIN_CLUSTER_NODES}\,\"MachineType\":\"${E2E_CLUSTER_MACHINE}\"}}
|
||||
--provider=gke
|
||||
--deployment=gke
|
||||
--cluster="${E2E_CLUSTER_NAME}"
|
||||
--gcp-zone="${E2E_CLUSTER_ZONE}"
|
||||
--gcp-region="${E2E_CLUSTER_REGION}"
|
||||
--gcp-network="${E2E_NETWORK_NAME}"
|
||||
--gke-environment=prod
|
||||
)
|
||||
if (( IS_BOSKOS )); then
|
||||
CLUSTER_CREATION_ARGS+=(--gcp-service-account=/etc/service-account/service-account.json)
|
||||
else
|
||||
if (( ! IS_BOSKOS )); then
|
||||
CLUSTER_CREATION_ARGS+=(--gcp-project=${GCP_PROJECT})
|
||||
fi
|
||||
# SSH keys are not used, but kubetest checks for their existence.
|
||||
|
|
@ -175,8 +171,8 @@ function create_test_cluster() {
|
|||
# be a writeable docker repo.
|
||||
export K8S_USER_OVERRIDE=
|
||||
export K8S_CLUSTER_OVERRIDE=
|
||||
# Assume test failed (see more details at the end of this script).
|
||||
echo -n "1"> ${TEST_RESULT_FILE}
|
||||
# Assume test failed (see details in set_test_return_code()).
|
||||
set_test_return_code 1
|
||||
local test_cmd_args="--run-tests"
|
||||
(( EMIT_METRICS )) && test_cmd_args+=" --emit-metrics"
|
||||
[[ -n "${GCP_PROJECT}" ]] && test_cmd_args+=" --gcp-project ${GCP_PROJECT}"
|
||||
|
|
@ -241,7 +237,7 @@ function setup_test_cluster() {
|
|||
if [[ -z ${K8S_CLUSTER_OVERRIDE} ]]; then
|
||||
USING_EXISTING_CLUSTER=0
|
||||
export K8S_CLUSTER_OVERRIDE=$(kubectl config current-context)
|
||||
acquire_cluster_admin_role ${K8S_USER_OVERRIDE} ${E2E_CLUSTER_NAME} ${E2E_CLUSTER_ZONE}
|
||||
acquire_cluster_admin_role ${K8S_USER_OVERRIDE} ${E2E_CLUSTER_NAME} ${E2E_CLUSTER_REGION}
|
||||
# Make sure we're in the default namespace. Currently kubetest switches to
|
||||
# test-pods namespace when creating the cluster.
|
||||
kubectl config set-context $K8S_CLUSTER_OVERRIDE --namespace=default
|
||||
|
|
@ -257,6 +253,7 @@ function setup_test_cluster() {
|
|||
echo "- Docker is ${DOCKER_REPO_OVERRIDE}"
|
||||
|
||||
export KO_DOCKER_REPO="${DOCKER_REPO_OVERRIDE}"
|
||||
export KO_DATA_PATH="${REPO_ROOT_DIR}/.git"
|
||||
|
||||
trap teardown_test_resources EXIT
|
||||
|
||||
|
|
@ -274,19 +271,34 @@ function setup_test_cluster() {
|
|||
set +o pipefail
|
||||
}
|
||||
|
||||
function success() {
|
||||
# Set the return code that the test script will return.
|
||||
# Parameters: $1 - return code (0-255)
|
||||
function set_test_return_code() {
|
||||
# kubetest teardown might fail and thus incorrectly report failure of the
|
||||
# script, even if the tests pass.
|
||||
# We store the real test result to return it later, ignoring any teardown
|
||||
# failure in kubetest.
|
||||
# TODO(adrcunha): Get rid of this workaround.
|
||||
echo -n "0"> ${TEST_RESULT_FILE}
|
||||
echo -n "$1"> ${TEST_RESULT_FILE}
|
||||
}
|
||||
|
||||
function success() {
|
||||
set_test_return_code 0
|
||||
echo "**************************************"
|
||||
echo "*** E2E TESTS PASSED ***"
|
||||
echo "**************************************"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Exit test, dumping current state info.
|
||||
# Parameters: $1 - error message (optional).
|
||||
function fail_test() {
|
||||
set_test_return_code 1
|
||||
[[ -n $1 ]] && echo "ERROR: $1"
|
||||
dump_cluster_state
|
||||
exit 1
|
||||
}
|
||||
|
||||
RUN_TESTS=0
|
||||
EMIT_METRICS=0
|
||||
USING_EXISTING_CLUSTER=1
|
||||
|
|
@ -294,11 +306,6 @@ GCP_PROJECT=""
|
|||
E2E_SCRIPT=""
|
||||
E2E_CLUSTER_VERSION=""
|
||||
|
||||
function abort() {
|
||||
echo "error: $@"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse flags and initialize the test cluster.
|
||||
function initialize() {
|
||||
# Normalize calling script path; we can't use readlink because it's not available everywhere
|
||||
|
|
@ -357,10 +364,8 @@ function initialize() {
|
|||
(( IS_PROW )) && [[ -z "${GCP_PROJECT}" ]] && IS_BOSKOS=1
|
||||
|
||||
# Safety checks
|
||||
|
||||
if [[ "${DOCKER_REPO_OVERRIDE}" =~ ^gcr.io/knative-(releases|nightly)/?$ ]]; then
|
||||
abort "\$DOCKER_REPO_OVERRIDE is set to ${DOCKER_REPO_OVERRIDE}, which is forbidden"
|
||||
fi
|
||||
is_protected_gcr ${DOCKER_REPO_OVERRIDE} && \
|
||||
abort "\$DOCKER_REPO_OVERRIDE set to ${DOCKER_REPO_OVERRIDE}, which is forbidden"
|
||||
|
||||
readonly RUN_TESTS
|
||||
readonly EMIT_METRICS
|
||||
|
|
|
|||
|
|
@ -23,11 +23,12 @@ readonly SERVING_GKE_VERSION=latest
|
|||
readonly SERVING_GKE_IMAGE=cos
|
||||
|
||||
# Public latest stable nightly images and yaml files.
|
||||
readonly KNATIVE_ISTIO_CRD_YAML=https://storage.googleapis.com/knative-nightly/serving/latest/istio-crds.yaml
|
||||
readonly KNATIVE_ISTIO_YAML=https://storage.googleapis.com/knative-nightly/serving/latest/istio.yaml
|
||||
readonly KNATIVE_SERVING_RELEASE=https://storage.googleapis.com/knative-nightly/serving/latest/release.yaml
|
||||
readonly KNATIVE_BUILD_RELEASE=https://storage.googleapis.com/knative-nightly/build/latest/release.yaml
|
||||
readonly KNATIVE_EVENTING_RELEASE=https://storage.googleapis.com/knative-nightly/eventing/latest/release.yaml
|
||||
readonly KNATIVE_BASE_YAML_SOURCE=https://storage.googleapis.com/knative-nightly/@/latest
|
||||
readonly KNATIVE_ISTIO_CRD_YAML=${KNATIVE_BASE_YAML_SOURCE/@/serving}/istio-crds.yaml
|
||||
readonly KNATIVE_ISTIO_YAML=${KNATIVE_BASE_YAML_SOURCE/@/serving}/istio.yaml
|
||||
readonly KNATIVE_SERVING_RELEASE=${KNATIVE_BASE_YAML_SOURCE/@/serving}/serving.yaml
|
||||
readonly KNATIVE_BUILD_RELEASE=${KNATIVE_BASE_YAML_SOURCE/@/build}/release.yaml
|
||||
readonly KNATIVE_EVENTING_RELEASE=${KNATIVE_BASE_YAML_SOURCE/@/eventing}/release.yaml
|
||||
|
||||
# Conveniently set GOPATH if unset
|
||||
if [[ -z "${GOPATH:-}" ]]; then
|
||||
|
|
@ -41,13 +42,24 @@ fi
|
|||
[[ -n "${PROW_JOB_ID:-}" ]] && IS_PROW=1 || IS_PROW=0
|
||||
readonly IS_PROW
|
||||
readonly REPO_ROOT_DIR="$(git rev-parse --show-toplevel)"
|
||||
readonly REPO_NAME="$(basename ${REPO_ROOT_DIR})"
|
||||
|
||||
# On a Prow job, redirect stderr to stdout so it's synchronously added to log
|
||||
(( IS_PROW )) && exec 2>&1
|
||||
|
||||
# Print error message and exit 1
|
||||
# Parameters: $1..$n - error message to be displayed
|
||||
function abort() {
|
||||
echo "error: $@"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Display a box banner.
|
||||
# Parameters: $1 - character to use for the box.
|
||||
# $2 - banner message.
|
||||
function make_banner() {
|
||||
local msg="$1$1$1$1 $2 $1$1$1$1"
|
||||
local border="${msg//[-0-9A-Za-z _.,\/]/$1}"
|
||||
local border="${msg//[-0-9A-Za-z _.,\/()]/$1}"
|
||||
echo -e "${border}\n${msg}\n${border}"
|
||||
}
|
||||
|
||||
|
|
@ -72,20 +84,6 @@ function function_exists() {
|
|||
[[ "$(type -t $1)" == "function" ]]
|
||||
}
|
||||
|
||||
# Remove ALL images in the given GCR repository.
|
||||
# Parameters: $1 - GCR repository.
|
||||
function delete_gcr_images() {
|
||||
for image in $(gcloud --format='value(name)' container images list --repository=$1); do
|
||||
echo "Checking ${image} for removal"
|
||||
delete_gcr_images ${image}
|
||||
for digest in $(gcloud --format='get(digest)' container images list-tags ${image} --limit=99999); do
|
||||
local full_image="${image}@${digest}"
|
||||
echo "Removing ${full_image}"
|
||||
gcloud container images delete -q --force-delete-tags ${full_image}
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
# Waits until the given object doesn't exist.
|
||||
# Parameters: $1 - the kind of the object.
|
||||
# $2 - object's name.
|
||||
|
|
@ -100,8 +98,8 @@ function wait_until_object_does_not_exist() {
|
|||
fi
|
||||
echo -n "Waiting until ${DESCRIPTION} does not exist"
|
||||
for i in {1..150}; do # timeout after 5 minutes
|
||||
if kubectl ${KUBECTL_ARGS} > /dev/null 2>&1; then
|
||||
echo "\n${DESCRIPTION} does not exist"
|
||||
if ! kubectl ${KUBECTL_ARGS} > /dev/null 2>&1; then
|
||||
echo -e "\n${DESCRIPTION} does not exist"
|
||||
return 0
|
||||
fi
|
||||
echo -n "."
|
||||
|
|
@ -140,7 +138,6 @@ function wait_until_pods_running() {
|
|||
sleep 2
|
||||
done
|
||||
echo -e "\n\nERROR: timeout waiting for pods to come up\n${pods}"
|
||||
kubectl get pods -n $1
|
||||
return 1
|
||||
}
|
||||
|
||||
|
|
@ -201,12 +198,12 @@ function get_app_pods() {
|
|||
# Sets the given user as cluster admin.
|
||||
# Parameters: $1 - user
|
||||
# $2 - cluster name
|
||||
# $3 - cluster zone
|
||||
# $3 - cluster region
|
||||
function acquire_cluster_admin_role() {
|
||||
# Get the password of the admin and use it, as the service account (or the user)
|
||||
# might not have the necessary permission.
|
||||
local password=$(gcloud --format="value(masterAuth.password)" \
|
||||
container clusters describe $2 --zone=$3)
|
||||
container clusters describe $2 --region=$3)
|
||||
if [[ -n "${password}" ]]; then
|
||||
# Cluster created with basic authentication
|
||||
kubectl config set-credentials cluster-admin \
|
||||
|
|
@ -216,9 +213,9 @@ function acquire_cluster_admin_role() {
|
|||
local key=$(mktemp)
|
||||
echo "Certificate in ${cert}, key in ${key}"
|
||||
gcloud --format="value(masterAuth.clientCertificate)" \
|
||||
container clusters describe $2 --zone=$3 | base64 -d > ${cert}
|
||||
container clusters describe $2 --region=$3 | base64 -d > ${cert}
|
||||
gcloud --format="value(masterAuth.clientKey)" \
|
||||
container clusters describe $2 --zone=$3 | base64 -d > ${key}
|
||||
container clusters describe $2 --region=$3 | base64 -d > ${key}
|
||||
kubectl config set-credentials cluster-admin \
|
||||
--client-certificate=${cert} --client-key=${key}
|
||||
fi
|
||||
|
|
@ -229,10 +226,10 @@ function acquire_cluster_admin_role() {
|
|||
--user=$1
|
||||
# Reset back to the default account
|
||||
gcloud container clusters get-credentials \
|
||||
$2 --zone=$3 --project $(gcloud config get-value project)
|
||||
$2 --region=$3 --project $(gcloud config get-value project)
|
||||
}
|
||||
|
||||
# Runs a go test and generate a junit summary through bazel.
|
||||
# Runs a go test and generate a junit summary.
|
||||
# Parameters: $1... - parameters to go test
|
||||
function report_go_test() {
|
||||
# Run tests in verbose mode to capture details.
|
||||
|
|
@ -246,102 +243,18 @@ function report_go_test() {
|
|||
fi
|
||||
echo "Running tests with '${go_test}'"
|
||||
local report=$(mktemp)
|
||||
local failed=0
|
||||
local test_count=0
|
||||
local tests_failed=0
|
||||
${go_test} > ${report} || failed=$?
|
||||
${go_test} | tee ${report}
|
||||
local failed=( ${PIPESTATUS[@]} )
|
||||
[[ ${failed[0]} -eq 0 ]] && failed=${failed[1]} || failed=${failed[0]}
|
||||
echo "Finished run, return code is ${failed}"
|
||||
# Tests didn't run.
|
||||
[[ ! -s ${report} ]] && return 1
|
||||
# Create WORKSPACE file, required to use bazel, if necessary.
|
||||
touch WORKSPACE
|
||||
local targets=""
|
||||
local last_run=""
|
||||
local test_files=""
|
||||
# Parse the report and generate fake tests for each passing/failing test.
|
||||
echo "Start parsing results, summary:"
|
||||
while read line ; do
|
||||
local fields=(`echo -n ${line}`)
|
||||
local field0="${fields[0]}"
|
||||
local field1="${fields[1]}"
|
||||
local name="${fields[2]}"
|
||||
# Deal with a SIGQUIT log entry (usually a test timeout).
|
||||
# This is a fallback in case there's no kill signal log entry.
|
||||
# SIGQUIT: quit
|
||||
if [[ "${field0}" == "SIGQUIT:" ]]; then
|
||||
name="${last_run}"
|
||||
field1="FAIL:"
|
||||
error="${fields[@]}"
|
||||
fi
|
||||
# Ignore subtests (those containing slashes)
|
||||
if [[ -n "${name##*/*}" ]]; then
|
||||
local error=""
|
||||
# Deal with a kill signal log entry (usually a test timeout).
|
||||
# *** Test killed with quit: ran too long (10m0s).
|
||||
if [[ "${field0}" == "***" ]]; then
|
||||
name="${last_run}"
|
||||
field1="FAIL:"
|
||||
error="${fields[@]:1}"
|
||||
fi
|
||||
# Deal with a fatal log entry, which has a different format:
|
||||
# fatal TestFoo foo_test.go:275 Expected "foo" but got "bar"
|
||||
if [[ "${field0}" == "fatal" ]]; then
|
||||
name="${field1}"
|
||||
field1="FAIL:"
|
||||
error="${fields[@]:3}"
|
||||
fi
|
||||
# Keep track of the test currently running.
|
||||
if [[ "${field1}" == "RUN" ]]; then
|
||||
last_run="${name}"
|
||||
fi
|
||||
# Handle regular go test pass/fail entry for a test.
|
||||
if [[ "${field1}" == "PASS:" || "${field1}" == "FAIL:" ]]; then
|
||||
echo "- ${name} :${field1}"
|
||||
test_count=$(( test_count + 1 ))
|
||||
local src="${name}.sh"
|
||||
echo "exit 0" > ${src}
|
||||
if [[ "${field1}" == "FAIL:" ]]; then
|
||||
tests_failed=$(( tests_failed + 1 ))
|
||||
[[ -z "${error}" ]] && read error
|
||||
echo "cat <<ERROR-EOF" > ${src}
|
||||
echo "${error}" >> ${src}
|
||||
echo "ERROR-EOF" >> ${src}
|
||||
echo "exit 1" >> ${src}
|
||||
fi
|
||||
chmod +x ${src}
|
||||
test_files="${test_files} ${src}"
|
||||
# Populate BUILD.bazel
|
||||
echo "sh_test(name=\"${name}\", srcs=[\"${src}\"])" >> BUILD.bazel
|
||||
elif [[ "${field0}" == "FAIL" || "${field0}" == "ok" ]] && [[ -n "${field1}" ]]; then
|
||||
echo "- ${field0} ${field1}"
|
||||
# Create the package structure, move tests and BUILD file
|
||||
local package=${field1/github.com\//}
|
||||
local bazel_files="$(ls -1 ${test_files} BUILD.bazel 2> /dev/null)"
|
||||
if [[ -n "${bazel_files}" ]]; then
|
||||
mkdir -p ${package}
|
||||
targets="${targets} //${package}/..."
|
||||
mv ${bazel_files} ${package}
|
||||
else
|
||||
echo "*** INTERNAL ERROR: missing tests for ${package}, got [${bazel_files/$'\n'/, }]"
|
||||
fi
|
||||
test_files=""
|
||||
fi
|
||||
fi
|
||||
done < ${report}
|
||||
echo "Done parsing ${test_count} tests, ${tests_failed} tests failed"
|
||||
# If any test failed, show the detailed report.
|
||||
# Otherwise, we already shown the summary.
|
||||
# Exception: when emitting metrics, dump the full report.
|
||||
if (( failed )) || [[ "$@" == *" -emitmetrics"* ]]; then
|
||||
if (( failed )); then
|
||||
echo "There were ${tests_failed} test failures, full log:"
|
||||
else
|
||||
echo "Dumping full log as metrics were requested:"
|
||||
fi
|
||||
cat ${report}
|
||||
fi
|
||||
# Always generate the junit summary.
|
||||
bazel test ${targets} > /dev/null 2>&1 || true
|
||||
# Install go-junit-report if necessary.
|
||||
run_go_tool github.com/jstemmer/go-junit-report go-junit-report --help > /dev/null 2>&1
|
||||
local xml=$(mktemp ${ARTIFACTS}/junit_XXXXXXXX.xml)
|
||||
cat ${report} \
|
||||
| go-junit-report \
|
||||
| sed -e "s#\"github.com/knative/${REPO_NAME}/#\"#g" \
|
||||
> ${xml}
|
||||
echo "XML report written to ${xml}"
|
||||
return ${failed}
|
||||
}
|
||||
|
||||
|
|
@ -349,11 +262,16 @@ function report_go_test() {
|
|||
function start_latest_knative_serving() {
|
||||
header "Starting Knative Serving"
|
||||
subheader "Installing Istio"
|
||||
echo "Installing Istio CRD from ${KNATIVE_ISTIO_CRD_YAML}"
|
||||
kubectl apply -f ${KNATIVE_ISTIO_CRD_YAML} || return 1
|
||||
echo "Installing Istio from ${KNATIVE_ISTIO_YAML}"
|
||||
kubectl apply -f ${KNATIVE_ISTIO_YAML} || return 1
|
||||
wait_until_pods_running istio-system || return 1
|
||||
kubectl label namespace default istio-injection=enabled || return 1
|
||||
subheader "Installing Knative Build"
|
||||
kubectl apply -f ${KNATIVE_BUILD_RELEASE} || return 1
|
||||
subheader "Installing Knative Serving"
|
||||
echo "Installing Serving from ${KNATIVE_SERVING_RELEASE}"
|
||||
kubectl apply -f ${KNATIVE_SERVING_RELEASE} || return 1
|
||||
wait_until_pods_running knative-serving || return 1
|
||||
wait_until_pods_running knative-build || return 1
|
||||
|
|
@ -432,8 +350,22 @@ function check_links_in_markdown() {
|
|||
}
|
||||
|
||||
# Check format of the given markdown files.
|
||||
# Parameters: $1...$n - files to inspect
|
||||
# Parameters: $1..$n - files to inspect
|
||||
function lint_markdown() {
|
||||
# https://github.com/markdownlint/markdownlint
|
||||
run_lint_tool mdl "linting markdown files" "-r ~MD013" $@
|
||||
}
|
||||
|
||||
# Return 0 if the given parameter is an integer, otherwise 1
|
||||
# Parameters: $1 - an integer
|
||||
function is_int() {
|
||||
[[ -n $1 && $1 =~ ^[0-9]+$ ]]
|
||||
}
|
||||
|
||||
# Return 0 if the given parameter is the knative release/nightly gcr, 1
|
||||
# otherwise
|
||||
# Parameters: $1 - gcr name, e.g. gcr.io/knative-nightly
|
||||
function is_protected_gcr() {
|
||||
[[ -n $1 && "$1" =~ "^gcr.io/knative-(releases|nightly)/?$" ]]
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,199 @@
|
|||
source $(dirname ${BASH_SOURCE})/library.sh
|
||||
|
||||
# Extensions or file patterns that don't require presubmit tests.
|
||||
readonly NO_PRESUBMIT_FILES=(\.md \.png ^OWNERS ^OWNERS_ALIASES)
|
||||
readonly NO_PRESUBMIT_FILES=(\.png \.gitignore \.gitattributes ^OWNERS ^OWNERS_ALIASES ^AUTHORS)
|
||||
|
||||
# Flag if this is a presubmit run or not.
|
||||
[[ IS_PROW && -n "${PULL_PULL_SHA}" ]] && IS_PRESUBMIT=1 || IS_PRESUBMIT=0
|
||||
readonly IS_PRESUBMIT
|
||||
|
||||
# List of changed files on presubmit, LF separated.
|
||||
CHANGED_FILES=""
|
||||
|
||||
# Flags that this PR is exempt of presubmit tests.
|
||||
IS_PRESUBMIT_EXEMPT_PR=0
|
||||
|
||||
# Flags that this PR contains only changes to documentation.
|
||||
IS_DOCUMENTATION_PR=0
|
||||
|
||||
# Returns true if PR only contains the given file regexes.
|
||||
# Parameters: $1 - file regexes, space separated.
|
||||
function pr_only_contains() {
|
||||
[[ -z "$(echo "${CHANGED_FILES}" | grep -v \(${1// /\\|}\)$))" ]]
|
||||
}
|
||||
|
||||
# List changed files in the current PR.
|
||||
# This is implemented as a function so it can be mocked in unit tests.
|
||||
function list_changed_files() {
|
||||
/workspace/githubhelper -list-changed-files
|
||||
}
|
||||
|
||||
# Initialize flags and context for presubmit tests:
|
||||
# CHANGED_FILES, IS_PRESUBMIT_EXEMPT_PR and IS_DOCUMENTATION_PR.
|
||||
function initialize_environment() {
|
||||
CHANGED_FILES=""
|
||||
IS_PRESUBMIT_EXEMPT_PR=0
|
||||
IS_DOCUMENTATION_PR=0
|
||||
(( ! IS_PRESUBMIT )) && return
|
||||
CHANGED_FILES="$(list_changed_files)"
|
||||
if [[ -n "${CHANGED_FILES}" ]]; then
|
||||
echo -e "Changed files in commit ${PULL_PULL_SHA}:\n${CHANGED_FILES}"
|
||||
local no_presubmit_files="${NO_PRESUBMIT_FILES[*]}"
|
||||
pr_only_contains "${no_presubmit_files}" && IS_PRESUBMIT_EXEMPT_PR=1
|
||||
pr_only_contains "\.md ${no_presubmit_files}" && IS_DOCUMENTATION_PR=1
|
||||
else
|
||||
header "NO CHANGED FILES REPORTED, ASSUMING IT'S AN ERROR AND RUNNING TESTS ANYWAY"
|
||||
fi
|
||||
readonly CHANGED_FILES
|
||||
readonly IS_DOCUMENTATION_PR
|
||||
readonly IS_PRESUBMIT_EXEMPT_PR
|
||||
}
|
||||
|
||||
# Display a pass/fail banner for a test group.
|
||||
# Parameters: $1 - test group name (e.g., build)
|
||||
# $2 - result (0=passed, 1=failed)
|
||||
function results_banner() {
|
||||
local result
|
||||
[[ $2 -eq 0 ]] && result="PASSED" || result="FAILED"
|
||||
header "$1 tests ${result}"
|
||||
}
|
||||
|
||||
# Run build tests. If there's no `build_tests` function, run the default
|
||||
# build test runner.
|
||||
function run_build_tests() {
|
||||
(( ! RUN_BUILD_TESTS )) && return 0
|
||||
header "Running build tests"
|
||||
local failed=0
|
||||
# Run pre-build tests, if any
|
||||
if function_exists pre_build_tests; then
|
||||
pre_build_tests || failed=1
|
||||
fi
|
||||
# Don't run build tests if pre-build tests failed
|
||||
if (( ! failed )); then
|
||||
if function_exists build_tests; then
|
||||
build_tests || failed=1
|
||||
else
|
||||
default_build_test_runner || failed=1
|
||||
fi
|
||||
fi
|
||||
# Don't run post-build tests if pre/build tests failed
|
||||
if function_exists post_build_tests; then
|
||||
post_build_tests || failed=1
|
||||
fi
|
||||
results_banner "Build" ${failed}
|
||||
return ${failed}
|
||||
}
|
||||
|
||||
# Default build test runner that:
|
||||
# * lint and link check markdown files
|
||||
# * `go build` on the entire repo
|
||||
# * run `/hack/verify-codegen.sh` (if it exists)
|
||||
# * check licenses in `/cmd` (if it exists)
|
||||
function default_build_test_runner() {
|
||||
local failed=0
|
||||
# Ignore markdown files in /vendor
|
||||
local mdfiles="$(echo "${CHANGED_FILES}" | grep \.md$ | grep -v ^vendor/)"
|
||||
if [[ -n "${mdfiles}" ]]; then
|
||||
subheader "Linting the markdown files"
|
||||
lint_markdown ${mdfiles} || failed=1
|
||||
subheader "Checking links in the markdown files"
|
||||
check_links_in_markdown ${mdfiles} || failed=1
|
||||
fi
|
||||
# For documentation PRs, just check the md files
|
||||
(( IS_DOCUMENTATION_PR )) && return ${failed}
|
||||
# Ensure all the code builds
|
||||
subheader "Checking that go code builds"
|
||||
go build -v ./... || failed=1
|
||||
# Get all build tags in go code (ignore /vendor)
|
||||
local tags="$(grep -r '// +build' . \
|
||||
| grep -v '^./vendor/' | cut -f3 -d' ' | sort | uniq | tr '\n' ' ')"
|
||||
if [[ -n "${tags}" ]]; then
|
||||
go test -run=^$ -tags="${tags}" ./... || failed=1
|
||||
fi
|
||||
if [[ -f ./hack/verify-codegen.sh ]]; then
|
||||
subheader "Checking autogenerated code is up-to-date"
|
||||
./hack/verify-codegen.sh || failed=1
|
||||
fi
|
||||
# Check that we don't have any forbidden licenses in our images.
|
||||
if [[ -d ./cmd ]]; then
|
||||
subheader "Checking for forbidden licenses"
|
||||
check_licenses ./cmd/* || failed=1
|
||||
fi
|
||||
return ${failed}
|
||||
}
|
||||
|
||||
# Run unit tests. If there's no `unit_tests` function, run the default
|
||||
# unit test runner.
|
||||
function run_unit_tests() {
|
||||
(( ! RUN_UNIT_TESTS )) && return 0
|
||||
header "Running unit tests"
|
||||
local failed=0
|
||||
# Run pre-unit tests, if any
|
||||
if function_exists pre_unit_tests; then
|
||||
pre_unit_tests || failed=1
|
||||
fi
|
||||
# Don't run unit tests if pre-unit tests failed
|
||||
if (( ! failed )); then
|
||||
if function_exists unit_tests; then
|
||||
unit_tests || failed=1
|
||||
else
|
||||
default_unit_test_runner || failed=1
|
||||
fi
|
||||
fi
|
||||
# Don't run post-unit tests if pre/unit tests failed
|
||||
if function_exists post_unit_tests; then
|
||||
post_unit_tests || failed=1
|
||||
fi
|
||||
results_banner "Unit" ${failed}
|
||||
return ${failed}
|
||||
}
|
||||
|
||||
# Default unit test runner that runs all go tests in the repo.
|
||||
function default_unit_test_runner() {
|
||||
report_go_test ./...
|
||||
}
|
||||
|
||||
# Run integration tests. If there's no `integration_tests` function, run the
|
||||
# default integration test runner.
|
||||
function run_integration_tests() {
|
||||
# Don't run integration tests if not requested OR on documentation PRs
|
||||
(( ! RUN_INTEGRATION_TESTS )) && return 0
|
||||
(( IS_DOCUMENTATION_PR )) && return 0
|
||||
header "Running integration tests"
|
||||
local failed=0
|
||||
# Run pre-integration tests, if any
|
||||
if function_exists pre_integration_tests; then
|
||||
pre_integration_tests || failed=1
|
||||
fi
|
||||
# Don't run integration tests if pre-integration tests failed
|
||||
if (( ! failed )); then
|
||||
if function_exists integration_tests; then
|
||||
integration_tests || failed=1
|
||||
else
|
||||
default_integration_test_runner || failed=1
|
||||
fi
|
||||
fi
|
||||
# Don't run integration tests if pre/integration tests failed
|
||||
if (( ! failed )) && function_exists post_integration_tests; then
|
||||
post_integration_tests || failed=1
|
||||
fi
|
||||
results_banner "Integration" ${failed}
|
||||
return ${failed}
|
||||
}
|
||||
|
||||
# Default integration test runner that runs all `test/e2e-*tests.sh`.
|
||||
function default_integration_test_runner() {
|
||||
local options=""
|
||||
local failed=0
|
||||
(( EMIT_METRICS )) && options="--emit-metrics"
|
||||
for e2e_test in $(find test/ -name e2e-*tests.sh); do
|
||||
echo "Running integration test ${e2e_test}"
|
||||
if ! ${e2e_test} ${options}; then
|
||||
failed=1
|
||||
fi
|
||||
done
|
||||
return ${failed}
|
||||
}
|
||||
|
||||
# Options set by command-line flags.
|
||||
RUN_BUILD_TESTS=0
|
||||
|
|
@ -28,34 +220,13 @@ RUN_UNIT_TESTS=0
|
|||
RUN_INTEGRATION_TESTS=0
|
||||
EMIT_METRICS=0
|
||||
|
||||
# Exit presubmit tests if only documentation files were changed.
|
||||
function exit_if_presubmit_not_required() {
|
||||
if [[ -n "${PULL_PULL_SHA}" ]]; then
|
||||
# On a presubmit job
|
||||
local changes="$(/workspace/githubhelper -list-changed-files)"
|
||||
if [[ -z "${changes}" ]]; then
|
||||
header "NO CHANGED FILES REPORTED, ASSUMING IT'S AN ERROR AND RUNNING TESTS ANYWAY"
|
||||
return
|
||||
fi
|
||||
local no_presubmit_pattern="${NO_PRESUBMIT_FILES[*]}"
|
||||
local no_presubmit_pattern="\(${no_presubmit_pattern// /\\|}\)$"
|
||||
echo -e "Changed files in commit ${PULL_PULL_SHA}:\n${changes}"
|
||||
if [[ -z "$(echo "${changes}" | grep -v ${no_presubmit_pattern})" ]]; then
|
||||
# Nothing changed other than files that don't require presubmit tests
|
||||
header "Commit only contains changes that don't affect tests, skipping"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function abort() {
|
||||
echo "error: $@"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Process flags and run tests accordingly.
|
||||
function main() {
|
||||
exit_if_presubmit_not_required
|
||||
initialize_environment
|
||||
if (( IS_PRESUBMIT_EXEMPT_PR )) && (( ! IS_DOCUMENTATION_PR )); then
|
||||
header "Commit only contains changes that don't require tests, skipping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Show the version of the tools we're using
|
||||
if (( IS_PROW )); then
|
||||
|
|
@ -70,6 +241,12 @@ function main() {
|
|||
go version
|
||||
echo ">> git version"
|
||||
git version
|
||||
echo ">> bazel version"
|
||||
bazel version 2> /dev/null
|
||||
if [[ "${DOCKER_IN_DOCKER_ENABLED}" == "true" ]]; then
|
||||
echo ">> docker version"
|
||||
docker version
|
||||
fi
|
||||
fi
|
||||
|
||||
[[ -z $1 ]] && set -- "--all-tests"
|
||||
|
|
@ -117,45 +294,9 @@ function main() {
|
|||
${TEST_TO_RUN} || failed=1
|
||||
fi
|
||||
|
||||
if (( RUN_BUILD_TESTS )); then
|
||||
build_tests || failed=1
|
||||
fi
|
||||
if (( RUN_UNIT_TESTS )); then
|
||||
unit_tests || failed=1
|
||||
fi
|
||||
if (( RUN_INTEGRATION_TESTS )); then
|
||||
local e2e_failed=0
|
||||
# Run pre-integration tests, if any
|
||||
if function_exists pre_integration_tests; then
|
||||
if ! pre_integration_tests; then
|
||||
failed=1
|
||||
e2e_failed=1
|
||||
fi
|
||||
fi
|
||||
# Don't run integration tests if pre-integration tests failed
|
||||
if (( ! e2e_failed )); then
|
||||
if function_exists integration_tests; then
|
||||
if ! integration_tests; then
|
||||
failed=1
|
||||
e2e_failed=1
|
||||
fi
|
||||
else
|
||||
local options=""
|
||||
(( EMIT_METRICS )) && options="--emit-metrics"
|
||||
for e2e_test in ./test/e2e-*tests.sh; do
|
||||
echo "Running integration test ${e2e_test}"
|
||||
if ! ${e2e_test} ${options}; then
|
||||
failed=1
|
||||
e2e_failed=1
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
# Don't run post-integration
|
||||
if (( ! e2e_failed )) && function_exists post_integration_tests; then
|
||||
post_integration_tests || failed=1
|
||||
fi
|
||||
fi
|
||||
run_build_tests || failed=1
|
||||
run_unit_tests || failed=1
|
||||
run_integration_tests || failed=1
|
||||
|
||||
exit ${failed}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,33 +19,46 @@
|
|||
|
||||
source $(dirname ${BASH_SOURCE})/library.sh
|
||||
|
||||
# GitHub upstream.
|
||||
readonly KNATIVE_UPSTREAM="https://github.com/knative/${REPO_NAME}"
|
||||
|
||||
# Simple banner for logging purposes.
|
||||
# Parameters: $1 - message to display.
|
||||
function banner() {
|
||||
make_banner "@" "$1"
|
||||
}
|
||||
|
||||
# Tag images in the yaml file with a tag. If not tag is passed, does nothing.
|
||||
# Tag images in the yaml file if $TAG is not empty.
|
||||
# $KO_DOCKER_REPO is the registry containing the images to tag with $TAG.
|
||||
# Parameters: $1 - yaml file to parse for images.
|
||||
# $2 - registry where the images are stored.
|
||||
# $3 - tag to apply (optional).
|
||||
function tag_images_in_yaml() {
|
||||
[[ -z $3 ]] && return 0
|
||||
local src_dir="${GOPATH}/src/"
|
||||
local BASE_PATH="${REPO_ROOT_DIR/$src_dir}"
|
||||
echo "Tagging images under '${BASE_PATH}' with $3"
|
||||
for image in $(grep -o "$2/${BASE_PATH}/[a-z\./-]\+@sha256:[0-9a-f]\+" $1); do
|
||||
gcloud -q container images add-tag ${image} ${image%%@*}:$3
|
||||
[[ -z ${TAG} ]] && return 0
|
||||
local SRC_DIR="${GOPATH}/src/"
|
||||
local DOCKER_BASE="${KO_DOCKER_REPO}/${REPO_ROOT_DIR/$SRC_DIR}"
|
||||
echo "Tagging images under '${DOCKER_BASE}' with ${TAG}"
|
||||
for image in $(grep -o "${DOCKER_BASE}/[a-z\./-]\+@sha256:[0-9a-f]\+" $1); do
|
||||
gcloud -q container images add-tag ${image} ${image%%@*}:${TAG}
|
||||
|
||||
# Georeplicate to {us,eu,asia}.gcr.io
|
||||
gcloud -q container images add-tag ${image} us.${image%%@*}:${TAG}
|
||||
gcloud -q container images add-tag ${image} eu.${image%%@*}:${TAG}
|
||||
gcloud -q container images add-tag ${image} asia.${image%%@*}:${TAG}
|
||||
done
|
||||
}
|
||||
|
||||
# Copy the given yaml file to a GCS bucket. Image is tagged :latest, and optionally :$2.
|
||||
# Copy the given yaml file to the $RELEASE_GCS_BUCKET bucket's "latest" directory.
|
||||
# If $TAG is not empty, also copy it to $RELEASE_GCS_BUCKET bucket's "previous" directory.
|
||||
# Parameters: $1 - yaml file to copy.
|
||||
# $2 - destination bucket name.
|
||||
# $3 - tag to apply (optional).
|
||||
function publish_yaml() {
|
||||
gsutil cp $1 gs://$2/latest/
|
||||
[[ -n $3 ]] && gsutil cp $1 gs://$2/previous/$3/ || true
|
||||
function verbose_gsutil_cp {
|
||||
local DEST="gs://${RELEASE_GCS_BUCKET}/$2/"
|
||||
echo "Publishing $1 to ${DEST}"
|
||||
gsutil cp $1 ${DEST}
|
||||
}
|
||||
verbose_gsutil_cp $1 latest
|
||||
if [[ -n ${TAG} ]]; then
|
||||
verbose_gsutil_cp $1 previous/${TAG}
|
||||
fi
|
||||
}
|
||||
|
||||
# These are global environment variables.
|
||||
|
|
@ -57,11 +70,98 @@ TAG=""
|
|||
RELEASE_VERSION=""
|
||||
RELEASE_NOTES=""
|
||||
RELEASE_BRANCH=""
|
||||
RELEASE_GCS_BUCKET=""
|
||||
KO_FLAGS=""
|
||||
export KO_DOCKER_REPO=""
|
||||
export GITHUB_TOKEN=""
|
||||
|
||||
function abort() {
|
||||
echo "error: $@"
|
||||
exit 1
|
||||
# Convenience function to run the hub tool.
|
||||
# Parameters: $1..$n - arguments to hub.
|
||||
function hub_tool() {
|
||||
run_go_tool github.com/github/hub hub $@
|
||||
}
|
||||
|
||||
# Return the master version of a release.
|
||||
# For example, "v0.2.1" returns "0.2"
|
||||
# Parameters: $1 - release version label.
|
||||
function master_version() {
|
||||
local release="${1//v/}"
|
||||
local tokens=(${release//\./ })
|
||||
echo "${tokens[0]}.${tokens[1]}"
|
||||
}
|
||||
|
||||
# Return the release build number of a release.
|
||||
# For example, "v0.2.1" returns "1".
|
||||
# Parameters: $1 - release version label.
|
||||
function release_build_number() {
|
||||
local tokens=(${1//\./ })
|
||||
echo "${tokens[2]}"
|
||||
}
|
||||
|
||||
# Setup the repository upstream, if not set.
|
||||
function setup_upstream() {
|
||||
# hub and checkout need the upstream URL to be set
|
||||
# TODO(adrcunha): Use "git remote get-url" once available on Prow.
|
||||
local upstream="$(git config --get remote.upstream.url)"
|
||||
echo "Remote upstream URL is '${upstream}'"
|
||||
if [[ -z "${upstream}" ]]; then
|
||||
echo "Setting remote upstream URL to '${KNATIVE_UPSTREAM}'"
|
||||
git remote add upstream ${KNATIVE_UPSTREAM}
|
||||
fi
|
||||
}
|
||||
|
||||
# Fetch the release branch, so we can check it out.
|
||||
function setup_branch() {
|
||||
[[ -z "${RELEASE_BRANCH}" ]] && return
|
||||
git fetch ${KNATIVE_UPSTREAM} ${RELEASE_BRANCH}:upstream/${RELEASE_BRANCH}
|
||||
}
|
||||
|
||||
# Setup version, branch and release notes for a "dot" release.
|
||||
function prepare_dot_release() {
|
||||
echo "Dot release requested"
|
||||
TAG_RELEASE=1
|
||||
PUBLISH_RELEASE=1
|
||||
# List latest release
|
||||
local releases # don't combine with the line below, or $? will be 0
|
||||
releases="$(hub_tool release)"
|
||||
[[ $? -eq 0 ]] || abort "cannot list releases"
|
||||
# If --release-branch passed, restrict to that release
|
||||
if [[ -n "${RELEASE_BRANCH}" ]]; then
|
||||
local version_filter="v${RELEASE_BRANCH##release-}"
|
||||
echo "Dot release will be generated for ${version_filter}"
|
||||
releases="$(echo "${releases}" | grep ^${version_filter})"
|
||||
fi
|
||||
local last_version="$(echo "${releases}" | grep '^v[0-9]\+\.[0-9]\+\.[0-9]\+$' | sort -r | head -1)"
|
||||
[[ -n "${last_version}" ]] || abort "no previous release exist"
|
||||
if [[ -z "${RELEASE_BRANCH}" ]]; then
|
||||
echo "Last release is ${last_version}"
|
||||
# Determine branch
|
||||
local major_minor_version="$(master_version ${last_version})"
|
||||
RELEASE_BRANCH="release-${major_minor_version}"
|
||||
echo "Last release branch is ${RELEASE_BRANCH}"
|
||||
fi
|
||||
# Ensure there are new commits in the branch, otherwise we don't create a new release
|
||||
setup_branch
|
||||
local last_release_commit="$(git rev-list -n 1 ${last_version})"
|
||||
local release_branch_commit="$(git rev-list -n 1 upstream/${RELEASE_BRANCH})"
|
||||
[[ -n "${last_release_commit}" ]] || abort "cannot get last release commit"
|
||||
[[ -n "${release_branch_commit}" ]] || abort "cannot get release branch last commit"
|
||||
if [[ "${last_release_commit}" == "${release_branch_commit}" ]]; then
|
||||
echo "*** Branch ${RELEASE_BRANCH} is at commit ${release_branch_commit}"
|
||||
echo "*** Branch ${RELEASE_BRANCH} has no new cherry-picks since release ${last_version}"
|
||||
echo "*** No dot release will be generated, as no changes exist"
|
||||
exit 0
|
||||
fi
|
||||
# Create new release version number
|
||||
local last_build="$(release_build_number ${last_version})"
|
||||
RELEASE_VERSION="${major_minor_version}.$(( last_build + 1 ))"
|
||||
echo "Will create release ${RELEASE_VERSION} at commit ${release_branch_commit}"
|
||||
# If --release-notes not used, copy from the latest release
|
||||
if [[ -z "${RELEASE_NOTES}" ]]; then
|
||||
RELEASE_NOTES="$(mktemp)"
|
||||
hub_tool release show -f "%b" ${last_version} > ${RELEASE_NOTES}
|
||||
echo "Release notes from ${last_version} copied to ${RELEASE_NOTES}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Parses flags and sets environment variables accordingly.
|
||||
|
|
@ -71,42 +171,71 @@ function parse_flags() {
|
|||
RELEASE_NOTES=""
|
||||
RELEASE_BRANCH=""
|
||||
KO_FLAGS="-P"
|
||||
KO_DOCKER_REPO="gcr.io/knative-nightly"
|
||||
RELEASE_GCS_BUCKET="knative-nightly/${REPO_NAME}"
|
||||
GITHUB_TOKEN=""
|
||||
local has_gcr_flag=0
|
||||
local has_gcs_flag=0
|
||||
local is_dot_release=0
|
||||
|
||||
cd ${REPO_ROOT_DIR}
|
||||
while [[ $# -ne 0 ]]; do
|
||||
local parameter=$1
|
||||
case $parameter in
|
||||
case ${parameter} in
|
||||
--skip-tests) SKIP_TESTS=1 ;;
|
||||
--tag-release) TAG_RELEASE=1 ;;
|
||||
--notag-release) TAG_RELEASE=0 ;;
|
||||
--publish) PUBLISH_RELEASE=1 ;;
|
||||
--nopublish) PUBLISH_RELEASE=0 ;;
|
||||
--version)
|
||||
--dot-release) is_dot_release=1 ;;
|
||||
*)
|
||||
[[ $# -ge 2 ]] || abort "missing parameter after $1"
|
||||
shift
|
||||
[[ $# -ge 1 ]] || abort "missing version after --version"
|
||||
[[ $1 =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || abort "version format must be '[0-9].[0-9].[0-9]'"
|
||||
RELEASE_VERSION=$1
|
||||
;;
|
||||
--branch)
|
||||
shift
|
||||
[[ $# -ge 1 ]] || abort "missing branch after --commit"
|
||||
[[ $1 =~ ^release-[0-9]+\.[0-9]+$ ]] || abort "branch name must be 'release-[0-9].[0-9]'"
|
||||
RELEASE_BRANCH=$1
|
||||
;;
|
||||
--release-notes)
|
||||
shift
|
||||
[[ $# -ge 1 ]] || abort "missing release notes file after --release-notes"
|
||||
[[ ! -f "$1" ]] && abort "file $1 doesn't exist"
|
||||
RELEASE_NOTES=$1
|
||||
;;
|
||||
*) abort "unknown option ${parameter}" ;;
|
||||
case ${parameter} in
|
||||
--github-token)
|
||||
[[ ! -f "$1" ]] && abort "file $1 doesn't exist"
|
||||
GITHUB_TOKEN="$(cat $1)"
|
||||
[[ -n "${GITHUB_TOKEN}" ]] || abort "file $1 is empty"
|
||||
;;
|
||||
--release-gcr)
|
||||
KO_DOCKER_REPO=$1
|
||||
has_gcr_flag=1
|
||||
;;
|
||||
--release-gcs)
|
||||
RELEASE_GCS_BUCKET=$1
|
||||
has_gcs_flag=1
|
||||
;;
|
||||
--version)
|
||||
[[ $1 =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || abort "version format must be '[0-9].[0-9].[0-9]'"
|
||||
RELEASE_VERSION=$1
|
||||
;;
|
||||
--branch)
|
||||
[[ $1 =~ ^release-[0-9]+\.[0-9]+$ ]] || abort "branch name must be 'release-[0-9].[0-9]'"
|
||||
RELEASE_BRANCH=$1
|
||||
;;
|
||||
--release-notes)
|
||||
[[ ! -f "$1" ]] && abort "file $1 doesn't exist"
|
||||
RELEASE_NOTES=$1
|
||||
;;
|
||||
*) abort "unknown option ${parameter}" ;;
|
||||
esac
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Setup dot releases
|
||||
if (( is_dot_release )); then
|
||||
setup_upstream
|
||||
prepare_dot_release
|
||||
fi
|
||||
|
||||
# Update KO_DOCKER_REPO and KO_FLAGS if we're not publishing.
|
||||
if (( ! PUBLISH_RELEASE )); then
|
||||
(( has_gcr_flag )) && echo "Not publishing the release, GCR flag is ignored"
|
||||
(( has_gcs_flag )) && echo "Not publishing the release, GCS flag is ignored"
|
||||
KO_DOCKER_REPO="ko.local"
|
||||
KO_FLAGS="-L ${KO_FLAGS}"
|
||||
RELEASE_GCS_BUCKET=""
|
||||
fi
|
||||
|
||||
if (( TAG_RELEASE )); then
|
||||
|
|
@ -131,6 +260,8 @@ function parse_flags() {
|
|||
readonly RELEASE_VERSION
|
||||
readonly RELEASE_NOTES
|
||||
readonly RELEASE_BRANCH
|
||||
readonly RELEASE_GCS_BUCKET
|
||||
readonly KO_DOCKER_REPO
|
||||
}
|
||||
|
||||
# Run tests (unless --skip-tests was passed). Conveniently displays a banner indicating so.
|
||||
|
|
@ -149,8 +280,29 @@ function run_validation_tests() {
|
|||
# Initialize everything (flags, workspace, etc) for a release.
|
||||
function initialize() {
|
||||
parse_flags $@
|
||||
# Log what will be done and where.
|
||||
banner "Release configuration"
|
||||
echo "- Destination GCR: ${KO_DOCKER_REPO}"
|
||||
(( SKIP_TESTS )) && echo "- Tests will NOT be run" || echo "- Tests will be run"
|
||||
if (( TAG_RELEASE )); then
|
||||
echo "- Artifacts will tagged '${TAG}'"
|
||||
else
|
||||
echo "- Artifacts WILL NOT be tagged"
|
||||
fi
|
||||
if (( PUBLISH_RELEASE )); then
|
||||
echo "- Release WILL BE published to '${RELEASE_GCS_BUCKET}'"
|
||||
else
|
||||
echo "- Release will not be published"
|
||||
fi
|
||||
if (( BRANCH_RELEASE )); then
|
||||
echo "- Release WILL BE branched from '${RELEASE_BRANCH}'"
|
||||
fi
|
||||
[[ -n "${RELEASE_NOTES}" ]] && echo "- Release notes are generated from '${RELEASE_NOTES}'"
|
||||
|
||||
# Checkout specific branch, if necessary
|
||||
if (( BRANCH_RELEASE )); then
|
||||
setup_upstream
|
||||
setup_branch
|
||||
git checkout upstream/${RELEASE_BRANCH} || abort "cannot checkout branch ${RELEASE_BRANCH}"
|
||||
fi
|
||||
}
|
||||
|
|
@ -174,8 +326,11 @@ function branch_release() {
|
|||
cat ${RELEASE_NOTES} >> ${description}
|
||||
fi
|
||||
git tag -a ${TAG} -m "${title}"
|
||||
git push $(git remote get-url upstream) tag ${TAG}
|
||||
run_go_tool github.com/github/hub hub release create \
|
||||
local repo_url="${KNATIVE_UPSTREAM}"
|
||||
[[ -n "${GITHUB_TOKEN}}" ]] && repo_url="${repo_url/:\/\//:\/\/${GITHUB_TOKEN}@}"
|
||||
hub_tool push ${repo_url} tag ${TAG}
|
||||
|
||||
hub_tool release create \
|
||||
--prerelease \
|
||||
${attachments[@]} \
|
||||
--file=${description} \
|
||||
|
|
|
|||
90
vendor/github.com/knative/test-infra/tools/dep-collector/README.md
generated
vendored
Normal file
90
vendor/github.com/knative/test-infra/tools/dep-collector/README.md
generated
vendored
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
# dep-collector
|
||||
|
||||
`dep-collector` is a tool for gathering up a collection of licenses for Go
|
||||
dependencies that have been pulled into the idiomatic `vendor/` directory.
|
||||
The resulting file from running `dep-collector` is intended for inclusion
|
||||
in container images to respect the licenses of the included software.
|
||||
|
||||
## Basic Usage
|
||||
|
||||
You can run `dep-collector` on one or more Go import paths as entrypoints,
|
||||
and it will:
|
||||
|
||||
1. Walk the transitive dependencies to identify vendored software packages,
|
||||
1. Search for licenses for each vendored dependency,
|
||||
1. Dump a file containing the licenses for each vendored import.
|
||||
|
||||
For example (single import path):
|
||||
|
||||
```shell
|
||||
$ dep-collector .
|
||||
===========================================================
|
||||
Import: github.com/mattmoor/dep-collector/vendor/github.com/google/licenseclassifier
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
...
|
||||
|
||||
```
|
||||
|
||||
For example (multiple import paths):
|
||||
|
||||
```shell
|
||||
$ dep-collector ./cmd/controller ./cmd/sleeper
|
||||
|
||||
===========================================================
|
||||
Import: github.com/mattmoor/warm-image/vendor/cloud.google.com/go
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
```
|
||||
|
||||
## CSV Usage
|
||||
|
||||
You can also run `dep-collector` in a mode that produces CSV output,
|
||||
including basic classification of the license.
|
||||
|
||||
> In order to run dep-collector in this mode, you must first run:
|
||||
> go get github.com/google/licenseclassifier
|
||||
|
||||
For example:
|
||||
|
||||
```shell
|
||||
$ dep-collector -csv .
|
||||
github.com/google/licenseclassifier,Static,,https://github.com/mattmoor/dep-collector/blob/master/vendor/github.com/google/licenseclassifier/LICENSE,Apache-2.0
|
||||
github.com/google/licenseclassifier/stringclassifier,Static,,https://github.com/mattmoor/dep-collector/blob/master/vendor/github.com/google/licenseclassifier/stringclassifier/LICENSE,Apache-2.0
|
||||
github.com/sergi/go-diff,Static,,https://github.com/mattmoor/dep-collector/blob/master/vendor/github.com/sergi/go-diff/LICENSE,MIT
|
||||
|
||||
```
|
||||
|
||||
The columns here are:
|
||||
|
||||
* Import Path,
|
||||
* How the dependency is linked in (always reports "static"),
|
||||
* A column for whether any modifications have been made (always empty),
|
||||
* The URL by which to access the license file (assumes `master`),
|
||||
* A classification of what license this is ([using this](https://github.com/google/licenseclassifier)).
|
||||
|
||||
## Check mode
|
||||
|
||||
`dep-collector` also includes a mode that will check for "forbidden" licenses.
|
||||
|
||||
> In order to run dep-collector in this mode, you must first run:
|
||||
> go get github.com/google/licenseclassifier
|
||||
|
||||
For example (failing):
|
||||
|
||||
```shell
|
||||
$ dep-collector -check ./foo/bar/baz
|
||||
2018/07/20 22:01:29 Error checking license collection: Errors validating licenses:
|
||||
Found matching forbidden license in "foo.io/bar/vendor/github.com/BurntSushi/toml":WTFPL
|
||||
```
|
||||
|
||||
For example (passing):
|
||||
|
||||
```shell
|
||||
$ dep-collector -check .
|
||||
2018/07/20 22:29:09 No errors found.
|
||||
```
|
||||
94
vendor/github.com/knative/test-infra/tools/dep-collector/imports.go
generated
vendored
Normal file
94
vendor/github.com/knative/test-infra/tools/dep-collector/imports.go
generated
vendored
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
Copyright 2018 The Knative Authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
gb "go/build"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func CollectTransitiveImports(binaries []string) ([]string, error) {
|
||||
// Perform a simple DFS to collect the binaries' transitive dependencies.
|
||||
visited := make(map[string]struct{})
|
||||
for _, importpath := range binaries {
|
||||
if gb.IsLocalImport(importpath) {
|
||||
ip, err := qualifyLocalImport(importpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
importpath = ip
|
||||
}
|
||||
|
||||
pkg, err := gb.Import(importpath, WorkingDir, gb.ImportComment)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := visit(pkg, visited); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the dependencies deterministically.
|
||||
var list sort.StringSlice
|
||||
for ip := range visited {
|
||||
if !strings.Contains(ip, "/vendor/") {
|
||||
// Skip files outside of vendor
|
||||
continue
|
||||
}
|
||||
list = append(list, ip)
|
||||
}
|
||||
list.Sort()
|
||||
|
||||
return list, nil
|
||||
}
|
||||
|
||||
func qualifyLocalImport(ip string) (string, error) {
|
||||
gopathsrc := filepath.Join(gb.Default.GOPATH, "src")
|
||||
if !strings.HasPrefix(WorkingDir, gopathsrc) {
|
||||
return "", fmt.Errorf("working directory must be on ${GOPATH}/src = %s", gopathsrc)
|
||||
}
|
||||
return filepath.Join(strings.TrimPrefix(WorkingDir, gopathsrc+string(filepath.Separator)), ip), nil
|
||||
}
|
||||
|
||||
func visit(pkg *gb.Package, visited map[string]struct{}) error {
|
||||
if _, ok := visited[pkg.ImportPath]; ok {
|
||||
return nil
|
||||
}
|
||||
visited[pkg.ImportPath] = struct{}{}
|
||||
|
||||
for _, ip := range pkg.Imports {
|
||||
if ip == "C" {
|
||||
// skip cgo
|
||||
continue
|
||||
}
|
||||
subpkg, err := gb.Import(ip, WorkingDir, gb.ImportComment)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%v\n -> %v", pkg.ImportPath, err)
|
||||
}
|
||||
if !strings.HasPrefix(subpkg.Dir, WorkingDir) {
|
||||
// Skip import paths outside of our workspace (std library)
|
||||
continue
|
||||
}
|
||||
if err := visit(subpkg, visited); err != nil {
|
||||
return fmt.Errorf("%v (%v)\n -> %v", pkg.ImportPath, pkg.Dir, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
203
vendor/github.com/knative/test-infra/tools/dep-collector/licenses.go
generated
vendored
Normal file
203
vendor/github.com/knative/test-infra/tools/dep-collector/licenses.go
generated
vendored
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
Copyright 2018 The Knative Authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
gb "go/build"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/google/licenseclassifier"
|
||||
)
|
||||
|
||||
var LicenseNames = []string{
|
||||
"LICENCE",
|
||||
"LICENSE",
|
||||
"LICENSE.code",
|
||||
"LICENSE.md",
|
||||
"LICENSE.txt",
|
||||
"COPYING",
|
||||
"copyright",
|
||||
}
|
||||
|
||||
const MatchThreshold = 0.9
|
||||
|
||||
type LicenseFile struct {
|
||||
EnclosingImportPath string
|
||||
LicensePath string
|
||||
}
|
||||
|
||||
func (lf *LicenseFile) Body() (string, error) {
|
||||
body, err := ioutil.ReadFile(lf.LicensePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(body), nil
|
||||
}
|
||||
|
||||
func (lt *LicenseFile) Classify(classifier *licenseclassifier.License) (string, error) {
|
||||
body, err := lt.Body()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
m := classifier.NearestMatch(body)
|
||||
if m == nil {
|
||||
return "", fmt.Errorf("unable to classify license: %v", lt.EnclosingImportPath)
|
||||
}
|
||||
return m.Name, nil
|
||||
}
|
||||
|
||||
func (lt *LicenseFile) Check(classifier *licenseclassifier.License) error {
|
||||
body, err := lt.Body()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ms := classifier.MultipleMatch(body, false)
|
||||
for _, m := range ms {
|
||||
return fmt.Errorf("Found matching forbidden license in %q: %v", lt.EnclosingImportPath, m.Name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (lt *LicenseFile) Entry() (string, error) {
|
||||
body, err := lt.Body()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf(`
|
||||
===========================================================
|
||||
Import: %s
|
||||
|
||||
%s
|
||||
`, lt.EnclosingImportPath, body), nil
|
||||
}
|
||||
|
||||
func (lt *LicenseFile) CSVRow(classifier *licenseclassifier.License) (string, error) {
|
||||
classification, err := lt.Classify(classifier)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
parts := strings.Split(lt.EnclosingImportPath, "/vendor/")
|
||||
if len(parts) != 2 {
|
||||
return "", fmt.Errorf("wrong number of parts splitting import path on %q : %q", "/vendor/", lt.EnclosingImportPath)
|
||||
}
|
||||
return strings.Join([]string{
|
||||
parts[1],
|
||||
"Static",
|
||||
"", // TODO(mattmoor): Modifications?
|
||||
"https://" + parts[0] + "/blob/master/vendor/" + parts[1] + "/" + filepath.Base(lt.LicensePath),
|
||||
classification,
|
||||
}, ","), nil
|
||||
}
|
||||
|
||||
func findLicense(ip string) (*LicenseFile, error) {
|
||||
pkg, err := gb.Import(ip, WorkingDir, gb.ImportComment)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dir := pkg.Dir
|
||||
|
||||
for {
|
||||
// When we reach the root of our workspace, stop searching.
|
||||
if dir == WorkingDir {
|
||||
return nil, fmt.Errorf("unable to find license for %q", pkg.ImportPath)
|
||||
}
|
||||
|
||||
for _, name := range LicenseNames {
|
||||
p := filepath.Join(dir, name)
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
return &LicenseFile{
|
||||
EnclosingImportPath: ip,
|
||||
LicensePath: p,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Walk to the parent directory / import path
|
||||
dir = filepath.Dir(dir)
|
||||
ip = filepath.Dir(ip)
|
||||
}
|
||||
}
|
||||
|
||||
type LicenseCollection []*LicenseFile
|
||||
|
||||
func (lc LicenseCollection) Entries() (string, error) {
|
||||
sections := make([]string, 0, len(lc))
|
||||
for _, key := range lc {
|
||||
entry, err := key.Entry()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
sections = append(sections, entry)
|
||||
}
|
||||
return strings.Join(sections, "\n"), nil
|
||||
}
|
||||
|
||||
func (lc LicenseCollection) CSV(classifier *licenseclassifier.License) (string, error) {
|
||||
sections := make([]string, 0, len(lc))
|
||||
for _, entry := range lc {
|
||||
row, err := entry.CSVRow(classifier)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
sections = append(sections, row)
|
||||
}
|
||||
return strings.Join(sections, "\n"), nil
|
||||
}
|
||||
|
||||
func (lc LicenseCollection) Check(classifier *licenseclassifier.License) error {
|
||||
errors := []string{}
|
||||
for _, entry := range lc {
|
||||
if err := entry.Check(classifier); err != nil {
|
||||
errors = append(errors, err.Error())
|
||||
}
|
||||
}
|
||||
if len(errors) == 0 {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Errors validating licenses:\n%v", strings.Join(errors, "\n"))
|
||||
}
|
||||
|
||||
func CollectLicenses(imports []string) (LicenseCollection, error) {
|
||||
// for each of the import paths, search for a license file.
|
||||
licenseFiles := make(map[string]*LicenseFile)
|
||||
for _, ip := range imports {
|
||||
lf, err := findLicense(ip)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
licenseFiles[lf.EnclosingImportPath] = lf
|
||||
}
|
||||
|
||||
order := sort.StringSlice{}
|
||||
for key := range licenseFiles {
|
||||
order = append(order, key)
|
||||
}
|
||||
order.Sort()
|
||||
|
||||
licenseTypes := LicenseCollection{}
|
||||
for _, key := range order {
|
||||
licenseTypes = append(licenseTypes, licenseFiles[key])
|
||||
}
|
||||
return licenseTypes, nil
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
Copyright 2018 The Knative Authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/google/licenseclassifier"
|
||||
)
|
||||
|
||||
var WorkingDir, _ = os.Getwd()
|
||||
|
||||
var (
|
||||
csv = flag.Bool("csv", false, "Whether to print in CSV format (with slow classification).")
|
||||
check = flag.Bool("check", false, "Whether to just check license files for forbidden licenses.")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
if flag.NArg() == 0 {
|
||||
log.Fatalf("Expected a list of import paths, got: %v", flag.Args())
|
||||
}
|
||||
|
||||
// Perform a simple DFS to collect the binaries' transitive dependencies.
|
||||
transitiveImports, err := CollectTransitiveImports(flag.Args())
|
||||
if err != nil {
|
||||
log.Fatalf("Error collecting transitive dependencies: %v", err)
|
||||
}
|
||||
|
||||
// Gather all of the license data from the imports.
|
||||
collection, err := CollectLicenses(transitiveImports)
|
||||
if err != nil {
|
||||
log.Fatalf("Error identifying licenses for transitive dependencies: %v", err)
|
||||
}
|
||||
|
||||
if *check {
|
||||
classifier, err := licenseclassifier.NewWithForbiddenLicenses(MatchThreshold)
|
||||
if err != nil {
|
||||
log.Fatalf("Error creating license classifier: %v", err)
|
||||
}
|
||||
if err := collection.Check(classifier); err != nil {
|
||||
log.Fatalf("Error checking license collection: %v", err)
|
||||
}
|
||||
log.Printf("No errors found.")
|
||||
return
|
||||
}
|
||||
|
||||
if *csv {
|
||||
classifier, err := licenseclassifier.New(MatchThreshold)
|
||||
if err != nil {
|
||||
log.Fatalf("Error creating license classifier: %v", err)
|
||||
}
|
||||
output, err := collection.CSV(classifier)
|
||||
if err != nil {
|
||||
log.Fatalf("Error generating CSV: %v", err)
|
||||
}
|
||||
os.Stdout.Write([]byte(output))
|
||||
} else {
|
||||
entries, err := collection.Entries()
|
||||
if err != nil {
|
||||
log.Fatalf("Error generating entries: %v", err)
|
||||
}
|
||||
os.Stdout.Write([]byte(entries))
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
# This is the official list of go-diff authors for copyright purposes.
|
||||
# This file is distinct from the CONTRIBUTORS files.
|
||||
# See the latter for an explanation.
|
||||
|
||||
# Names should be added to this file as
|
||||
# Name or Organization <email address>
|
||||
# The email address is not required for organizations.
|
||||
|
||||
# Please keep the list sorted.
|
||||
|
||||
Danny Yoo <dannyyoo@google.com>
|
||||
James Kolb <jkolb@google.com>
|
||||
Jonathan Amsterdam <jba@google.com>
|
||||
Markus Zimmermann <markus.zimmermann@nethead.at> <markus.zimmermann@symflower.com> <zimmski@gmail.com>
|
||||
Matt Kovars <akaskik@gmail.com>
|
||||
Örjan Persson <orjan@spotify.com>
|
||||
Osman Masood <oamasood@gmail.com>
|
||||
Robert Carlsen <rwcarlsen@gmail.com>
|
||||
Rory Flynn <roryflynn@users.noreply.github.com>
|
||||
Sergi Mansilla <sergi.mansilla@gmail.com>
|
||||
Shatrugna Sadhu <ssadhu@apcera.com>
|
||||
Shawn Smith <shawnpsmith@gmail.com>
|
||||
Stas Maksimov <maksimov@gmail.com>
|
||||
Tor Arvid Lund <torarvid@gmail.com>
|
||||
Zac Bergquist <zbergquist99@gmail.com>
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
# This is the official list of people who can contribute
|
||||
# (and typically have contributed) code to the go-diff
|
||||
# repository.
|
||||
#
|
||||
# The AUTHORS file lists the copyright holders; this file
|
||||
# lists people. For example, ACME Inc. employees would be listed here
|
||||
# but not in AUTHORS, because ACME Inc. would hold the copyright.
|
||||
#
|
||||
# When adding J Random Contributor's name to this file,
|
||||
# either J's name or J's organization's name should be
|
||||
# added to the AUTHORS file.
|
||||
#
|
||||
# Names should be added to this file like so:
|
||||
# Name <email address>
|
||||
#
|
||||
# Please keep the list sorted.
|
||||
|
||||
Danny Yoo <dannyyoo@google.com>
|
||||
James Kolb <jkolb@google.com>
|
||||
Jonathan Amsterdam <jba@google.com>
|
||||
Markus Zimmermann <markus.zimmermann@nethead.at> <markus.zimmermann@symflower.com> <zimmski@gmail.com>
|
||||
Matt Kovars <akaskik@gmail.com>
|
||||
Örjan Persson <orjan@spotify.com>
|
||||
Osman Masood <oamasood@gmail.com>
|
||||
Robert Carlsen <rwcarlsen@gmail.com>
|
||||
Rory Flynn <roryflynn@users.noreply.github.com>
|
||||
Sergi Mansilla <sergi.mansilla@gmail.com>
|
||||
Shatrugna Sadhu <ssadhu@apcera.com>
|
||||
Shawn Smith <shawnpsmith@gmail.com>
|
||||
Stas Maksimov <maksimov@gmail.com>
|
||||
Tor Arvid Lund <torarvid@gmail.com>
|
||||
Zac Bergquist <zbergquist99@gmail.com>
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
Copyright (c) 2012-2016 The go-diff Authors. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
|
||||
// https://github.com/sergi/go-diff
|
||||
// See the included LICENSE file for license details.
|
||||
//
|
||||
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
|
||||
// Original library is Copyright (c) 2006 Google Inc.
|
||||
// http://code.google.com/p/google-diff-match-patch/
|
||||
|
||||
// Package diffmatchpatch offers robust algorithms to perform the operations required for synchronizing plain text.
|
||||
package diffmatchpatch
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// DiffMatchPatch holds the configuration for diff-match-patch operations.
|
||||
type DiffMatchPatch struct {
|
||||
// Number of seconds to map a diff before giving up (0 for infinity).
|
||||
DiffTimeout time.Duration
|
||||
// Cost of an empty edit operation in terms of edit characters.
|
||||
DiffEditCost int
|
||||
// How far to search for a match (0 = exact location, 1000+ = broad match). A match this many characters away from the expected location will add 1.0 to the score (0.0 is a perfect match).
|
||||
MatchDistance int
|
||||
// When deleting a large block of text (over ~64 characters), how close do the contents have to be to match the expected contents. (0.0 = perfection, 1.0 = very loose). Note that MatchThreshold controls how closely the end points of a delete need to match.
|
||||
PatchDeleteThreshold float64
|
||||
// Chunk size for context length.
|
||||
PatchMargin int
|
||||
// The number of bits in an int.
|
||||
MatchMaxBits int
|
||||
// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
|
||||
MatchThreshold float64
|
||||
}
|
||||
|
||||
// New creates a new DiffMatchPatch object with default parameters.
|
||||
func New() *DiffMatchPatch {
|
||||
// Defaults.
|
||||
return &DiffMatchPatch{
|
||||
DiffTimeout: time.Second,
|
||||
DiffEditCost: 4,
|
||||
MatchThreshold: 0.5,
|
||||
MatchDistance: 1000,
|
||||
PatchDeleteThreshold: 0.5,
|
||||
PatchMargin: 4,
|
||||
MatchMaxBits: 32,
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,160 @@
|
|||
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
|
||||
// https://github.com/sergi/go-diff
|
||||
// See the included LICENSE file for license details.
|
||||
//
|
||||
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
|
||||
// Original library is Copyright (c) 2006 Google Inc.
|
||||
// http://code.google.com/p/google-diff-match-patch/
|
||||
|
||||
package diffmatchpatch
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// MatchMain locates the best instance of 'pattern' in 'text' near 'loc'.
|
||||
// Returns -1 if no match found.
|
||||
func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int {
|
||||
// Check for null inputs not needed since null can't be passed in C#.
|
||||
|
||||
loc = int(math.Max(0, math.Min(float64(loc), float64(len(text)))))
|
||||
if text == pattern {
|
||||
// Shortcut (potentially not guaranteed by the algorithm)
|
||||
return 0
|
||||
} else if len(text) == 0 {
|
||||
// Nothing to match.
|
||||
return -1
|
||||
} else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern {
|
||||
// Perfect match at the perfect spot! (Includes case of null pattern)
|
||||
return loc
|
||||
}
|
||||
// Do a fuzzy compare.
|
||||
return dmp.MatchBitap(text, pattern, loc)
|
||||
}
|
||||
|
||||
// MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm.
|
||||
// Returns -1 if no match was found.
|
||||
func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int {
|
||||
// Initialise the alphabet.
|
||||
s := dmp.MatchAlphabet(pattern)
|
||||
|
||||
// Highest score beyond which we give up.
|
||||
scoreThreshold := dmp.MatchThreshold
|
||||
// Is there a nearby exact match? (speedup)
|
||||
bestLoc := indexOf(text, pattern, loc)
|
||||
if bestLoc != -1 {
|
||||
scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
|
||||
pattern), scoreThreshold)
|
||||
// What about in the other direction? (speedup)
|
||||
bestLoc = lastIndexOf(text, pattern, loc+len(pattern))
|
||||
if bestLoc != -1 {
|
||||
scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
|
||||
pattern), scoreThreshold)
|
||||
}
|
||||
}
|
||||
|
||||
// Initialise the bit arrays.
|
||||
matchmask := 1 << uint((len(pattern) - 1))
|
||||
bestLoc = -1
|
||||
|
||||
var binMin, binMid int
|
||||
binMax := len(pattern) + len(text)
|
||||
lastRd := []int{}
|
||||
for d := 0; d < len(pattern); d++ {
|
||||
// Scan for the best match; each iteration allows for one more error. Run a binary search to determine how far from 'loc' we can stray at this error level.
|
||||
binMin = 0
|
||||
binMid = binMax
|
||||
for binMin < binMid {
|
||||
if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold {
|
||||
binMin = binMid
|
||||
} else {
|
||||
binMax = binMid
|
||||
}
|
||||
binMid = (binMax-binMin)/2 + binMin
|
||||
}
|
||||
// Use the result from this iteration as the maximum for the next.
|
||||
binMax = binMid
|
||||
start := int(math.Max(1, float64(loc-binMid+1)))
|
||||
finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern)))
|
||||
|
||||
rd := make([]int, finish+2)
|
||||
rd[finish+1] = (1 << uint(d)) - 1
|
||||
|
||||
for j := finish; j >= start; j-- {
|
||||
var charMatch int
|
||||
if len(text) <= j-1 {
|
||||
// Out of range.
|
||||
charMatch = 0
|
||||
} else if _, ok := s[text[j-1]]; !ok {
|
||||
charMatch = 0
|
||||
} else {
|
||||
charMatch = s[text[j-1]]
|
||||
}
|
||||
|
||||
if d == 0 {
|
||||
// First pass: exact match.
|
||||
rd[j] = ((rd[j+1] << 1) | 1) & charMatch
|
||||
} else {
|
||||
// Subsequent passes: fuzzy match.
|
||||
rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1]
|
||||
}
|
||||
if (rd[j] & matchmask) != 0 {
|
||||
score := dmp.matchBitapScore(d, j-1, loc, pattern)
|
||||
// This match will almost certainly be better than any existing match. But check anyway.
|
||||
if score <= scoreThreshold {
|
||||
// Told you so.
|
||||
scoreThreshold = score
|
||||
bestLoc = j - 1
|
||||
if bestLoc > loc {
|
||||
// When passing loc, don't exceed our current distance from loc.
|
||||
start = int(math.Max(1, float64(2*loc-bestLoc)))
|
||||
} else {
|
||||
// Already passed loc, downhill from here on in.
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold {
|
||||
// No hope for a (better) match at greater error levels.
|
||||
break
|
||||
}
|
||||
lastRd = rd
|
||||
}
|
||||
return bestLoc
|
||||
}
|
||||
|
||||
// matchBitapScore computes and returns the score for a match with e errors and x location.
|
||||
func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 {
|
||||
accuracy := float64(e) / float64(len(pattern))
|
||||
proximity := math.Abs(float64(loc - x))
|
||||
if dmp.MatchDistance == 0 {
|
||||
// Dodge divide by zero error.
|
||||
if proximity == 0 {
|
||||
return accuracy
|
||||
}
|
||||
|
||||
return 1.0
|
||||
}
|
||||
return accuracy + (proximity / float64(dmp.MatchDistance))
|
||||
}
|
||||
|
||||
// MatchAlphabet initialises the alphabet for the Bitap algorithm.
|
||||
func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int {
|
||||
s := map[byte]int{}
|
||||
charPattern := []byte(pattern)
|
||||
for _, c := range charPattern {
|
||||
_, ok := s[c]
|
||||
if !ok {
|
||||
s[c] = 0
|
||||
}
|
||||
}
|
||||
i := 0
|
||||
|
||||
for _, c := range charPattern {
|
||||
value := s[c] | int(uint(1)<<uint((len(pattern)-i-1)))
|
||||
s[c] = value
|
||||
i++
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
|
||||
// https://github.com/sergi/go-diff
|
||||
// See the included LICENSE file for license details.
|
||||
//
|
||||
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
|
||||
// Original library is Copyright (c) 2006 Google Inc.
|
||||
// http://code.google.com/p/google-diff-match-patch/
|
||||
|
||||
package diffmatchpatch
|
||||
|
||||
func min(x, y int) int {
|
||||
if x < y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
func max(x, y int) int {
|
||||
if x > y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
|
@ -0,0 +1,556 @@
|
|||
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
|
||||
// https://github.com/sergi/go-diff
|
||||
// See the included LICENSE file for license details.
|
||||
//
|
||||
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
|
||||
// Original library is Copyright (c) 2006 Google Inc.
|
||||
// http://code.google.com/p/google-diff-match-patch/
|
||||
|
||||
package diffmatchpatch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"math"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Patch represents one patch operation.
|
||||
type Patch struct {
|
||||
diffs []Diff
|
||||
Start1 int
|
||||
Start2 int
|
||||
Length1 int
|
||||
Length2 int
|
||||
}
|
||||
|
||||
// String emulates GNU diff's format.
|
||||
// Header: @@ -382,8 +481,9 @@
|
||||
// Indices are printed as 1-based, not 0-based.
|
||||
func (p *Patch) String() string {
|
||||
var coords1, coords2 string
|
||||
|
||||
if p.Length1 == 0 {
|
||||
coords1 = strconv.Itoa(p.Start1) + ",0"
|
||||
} else if p.Length1 == 1 {
|
||||
coords1 = strconv.Itoa(p.Start1 + 1)
|
||||
} else {
|
||||
coords1 = strconv.Itoa(p.Start1+1) + "," + strconv.Itoa(p.Length1)
|
||||
}
|
||||
|
||||
if p.Length2 == 0 {
|
||||
coords2 = strconv.Itoa(p.Start2) + ",0"
|
||||
} else if p.Length2 == 1 {
|
||||
coords2 = strconv.Itoa(p.Start2 + 1)
|
||||
} else {
|
||||
coords2 = strconv.Itoa(p.Start2+1) + "," + strconv.Itoa(p.Length2)
|
||||
}
|
||||
|
||||
var text bytes.Buffer
|
||||
_, _ = text.WriteString("@@ -" + coords1 + " +" + coords2 + " @@\n")
|
||||
|
||||
// Escape the body of the patch with %xx notation.
|
||||
for _, aDiff := range p.diffs {
|
||||
switch aDiff.Type {
|
||||
case DiffInsert:
|
||||
_, _ = text.WriteString("+")
|
||||
case DiffDelete:
|
||||
_, _ = text.WriteString("-")
|
||||
case DiffEqual:
|
||||
_, _ = text.WriteString(" ")
|
||||
}
|
||||
|
||||
_, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1))
|
||||
_, _ = text.WriteString("\n")
|
||||
}
|
||||
|
||||
return unescaper.Replace(text.String())
|
||||
}
|
||||
|
||||
// PatchAddContext increases the context until it is unique, but doesn't let the pattern expand beyond MatchMaxBits.
|
||||
func (dmp *DiffMatchPatch) PatchAddContext(patch Patch, text string) Patch {
|
||||
if len(text) == 0 {
|
||||
return patch
|
||||
}
|
||||
|
||||
pattern := text[patch.Start2 : patch.Start2+patch.Length1]
|
||||
padding := 0
|
||||
|
||||
// Look for the first and last matches of pattern in text. If two different matches are found, increase the pattern length.
|
||||
for strings.Index(text, pattern) != strings.LastIndex(text, pattern) &&
|
||||
len(pattern) < dmp.MatchMaxBits-2*dmp.PatchMargin {
|
||||
padding += dmp.PatchMargin
|
||||
maxStart := max(0, patch.Start2-padding)
|
||||
minEnd := min(len(text), patch.Start2+patch.Length1+padding)
|
||||
pattern = text[maxStart:minEnd]
|
||||
}
|
||||
// Add one chunk for good luck.
|
||||
padding += dmp.PatchMargin
|
||||
|
||||
// Add the prefix.
|
||||
prefix := text[max(0, patch.Start2-padding):patch.Start2]
|
||||
if len(prefix) != 0 {
|
||||
patch.diffs = append([]Diff{Diff{DiffEqual, prefix}}, patch.diffs...)
|
||||
}
|
||||
// Add the suffix.
|
||||
suffix := text[patch.Start2+patch.Length1 : min(len(text), patch.Start2+patch.Length1+padding)]
|
||||
if len(suffix) != 0 {
|
||||
patch.diffs = append(patch.diffs, Diff{DiffEqual, suffix})
|
||||
}
|
||||
|
||||
// Roll back the start points.
|
||||
patch.Start1 -= len(prefix)
|
||||
patch.Start2 -= len(prefix)
|
||||
// Extend the lengths.
|
||||
patch.Length1 += len(prefix) + len(suffix)
|
||||
patch.Length2 += len(prefix) + len(suffix)
|
||||
|
||||
return patch
|
||||
}
|
||||
|
||||
// PatchMake computes a list of patches.
|
||||
func (dmp *DiffMatchPatch) PatchMake(opt ...interface{}) []Patch {
|
||||
if len(opt) == 1 {
|
||||
diffs, _ := opt[0].([]Diff)
|
||||
text1 := dmp.DiffText1(diffs)
|
||||
return dmp.PatchMake(text1, diffs)
|
||||
} else if len(opt) == 2 {
|
||||
text1 := opt[0].(string)
|
||||
switch t := opt[1].(type) {
|
||||
case string:
|
||||
diffs := dmp.DiffMain(text1, t, true)
|
||||
if len(diffs) > 2 {
|
||||
diffs = dmp.DiffCleanupSemantic(diffs)
|
||||
diffs = dmp.DiffCleanupEfficiency(diffs)
|
||||
}
|
||||
return dmp.PatchMake(text1, diffs)
|
||||
case []Diff:
|
||||
return dmp.patchMake2(text1, t)
|
||||
}
|
||||
} else if len(opt) == 3 {
|
||||
return dmp.PatchMake(opt[0], opt[2])
|
||||
}
|
||||
return []Patch{}
|
||||
}
|
||||
|
||||
// patchMake2 computes a list of patches to turn text1 into text2.
|
||||
// text2 is not provided, diffs are the delta between text1 and text2.
|
||||
func (dmp *DiffMatchPatch) patchMake2(text1 string, diffs []Diff) []Patch {
|
||||
// Check for null inputs not needed since null can't be passed in C#.
|
||||
patches := []Patch{}
|
||||
if len(diffs) == 0 {
|
||||
return patches // Get rid of the null case.
|
||||
}
|
||||
|
||||
patch := Patch{}
|
||||
charCount1 := 0 // Number of characters into the text1 string.
|
||||
charCount2 := 0 // Number of characters into the text2 string.
|
||||
// Start with text1 (prepatchText) and apply the diffs until we arrive at text2 (postpatchText). We recreate the patches one by one to determine context info.
|
||||
prepatchText := text1
|
||||
postpatchText := text1
|
||||
|
||||
for i, aDiff := range diffs {
|
||||
if len(patch.diffs) == 0 && aDiff.Type != DiffEqual {
|
||||
// A new patch starts here.
|
||||
patch.Start1 = charCount1
|
||||
patch.Start2 = charCount2
|
||||
}
|
||||
|
||||
switch aDiff.Type {
|
||||
case DiffInsert:
|
||||
patch.diffs = append(patch.diffs, aDiff)
|
||||
patch.Length2 += len(aDiff.Text)
|
||||
postpatchText = postpatchText[:charCount2] +
|
||||
aDiff.Text + postpatchText[charCount2:]
|
||||
case DiffDelete:
|
||||
patch.Length1 += len(aDiff.Text)
|
||||
patch.diffs = append(patch.diffs, aDiff)
|
||||
postpatchText = postpatchText[:charCount2] + postpatchText[charCount2+len(aDiff.Text):]
|
||||
case DiffEqual:
|
||||
if len(aDiff.Text) <= 2*dmp.PatchMargin &&
|
||||
len(patch.diffs) != 0 && i != len(diffs)-1 {
|
||||
// Small equality inside a patch.
|
||||
patch.diffs = append(patch.diffs, aDiff)
|
||||
patch.Length1 += len(aDiff.Text)
|
||||
patch.Length2 += len(aDiff.Text)
|
||||
}
|
||||
if len(aDiff.Text) >= 2*dmp.PatchMargin {
|
||||
// Time for a new patch.
|
||||
if len(patch.diffs) != 0 {
|
||||
patch = dmp.PatchAddContext(patch, prepatchText)
|
||||
patches = append(patches, patch)
|
||||
patch = Patch{}
|
||||
// Unlike Unidiff, our patch lists have a rolling context. http://code.google.com/p/google-diff-match-patch/wiki/Unidiff Update prepatch text & pos to reflect the application of the just completed patch.
|
||||
prepatchText = postpatchText
|
||||
charCount1 = charCount2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update the current character count.
|
||||
if aDiff.Type != DiffInsert {
|
||||
charCount1 += len(aDiff.Text)
|
||||
}
|
||||
if aDiff.Type != DiffDelete {
|
||||
charCount2 += len(aDiff.Text)
|
||||
}
|
||||
}
|
||||
|
||||
// Pick up the leftover patch if not empty.
|
||||
if len(patch.diffs) != 0 {
|
||||
patch = dmp.PatchAddContext(patch, prepatchText)
|
||||
patches = append(patches, patch)
|
||||
}
|
||||
|
||||
return patches
|
||||
}
|
||||
|
||||
// PatchDeepCopy returns an array that is identical to a given an array of patches.
|
||||
func (dmp *DiffMatchPatch) PatchDeepCopy(patches []Patch) []Patch {
|
||||
patchesCopy := []Patch{}
|
||||
for _, aPatch := range patches {
|
||||
patchCopy := Patch{}
|
||||
for _, aDiff := range aPatch.diffs {
|
||||
patchCopy.diffs = append(patchCopy.diffs, Diff{
|
||||
aDiff.Type,
|
||||
aDiff.Text,
|
||||
})
|
||||
}
|
||||
patchCopy.Start1 = aPatch.Start1
|
||||
patchCopy.Start2 = aPatch.Start2
|
||||
patchCopy.Length1 = aPatch.Length1
|
||||
patchCopy.Length2 = aPatch.Length2
|
||||
patchesCopy = append(patchesCopy, patchCopy)
|
||||
}
|
||||
return patchesCopy
|
||||
}
|
||||
|
||||
// PatchApply merges a set of patches onto the text. Returns a patched text, as well as an array of true/false values indicating which patches were applied.
|
||||
func (dmp *DiffMatchPatch) PatchApply(patches []Patch, text string) (string, []bool) {
|
||||
if len(patches) == 0 {
|
||||
return text, []bool{}
|
||||
}
|
||||
|
||||
// Deep copy the patches so that no changes are made to originals.
|
||||
patches = dmp.PatchDeepCopy(patches)
|
||||
|
||||
nullPadding := dmp.PatchAddPadding(patches)
|
||||
text = nullPadding + text + nullPadding
|
||||
patches = dmp.PatchSplitMax(patches)
|
||||
|
||||
x := 0
|
||||
// delta keeps track of the offset between the expected and actual location of the previous patch. If there are patches expected at positions 10 and 20, but the first patch was found at 12, delta is 2 and the second patch has an effective expected position of 22.
|
||||
delta := 0
|
||||
results := make([]bool, len(patches))
|
||||
for _, aPatch := range patches {
|
||||
expectedLoc := aPatch.Start2 + delta
|
||||
text1 := dmp.DiffText1(aPatch.diffs)
|
||||
var startLoc int
|
||||
endLoc := -1
|
||||
if len(text1) > dmp.MatchMaxBits {
|
||||
// PatchSplitMax will only provide an oversized pattern in the case of a monster delete.
|
||||
startLoc = dmp.MatchMain(text, text1[:dmp.MatchMaxBits], expectedLoc)
|
||||
if startLoc != -1 {
|
||||
endLoc = dmp.MatchMain(text,
|
||||
text1[len(text1)-dmp.MatchMaxBits:], expectedLoc+len(text1)-dmp.MatchMaxBits)
|
||||
if endLoc == -1 || startLoc >= endLoc {
|
||||
// Can't find valid trailing context. Drop this patch.
|
||||
startLoc = -1
|
||||
}
|
||||
}
|
||||
} else {
|
||||
startLoc = dmp.MatchMain(text, text1, expectedLoc)
|
||||
}
|
||||
if startLoc == -1 {
|
||||
// No match found. :(
|
||||
results[x] = false
|
||||
// Subtract the delta for this failed patch from subsequent patches.
|
||||
delta -= aPatch.Length2 - aPatch.Length1
|
||||
} else {
|
||||
// Found a match. :)
|
||||
results[x] = true
|
||||
delta = startLoc - expectedLoc
|
||||
var text2 string
|
||||
if endLoc == -1 {
|
||||
text2 = text[startLoc:int(math.Min(float64(startLoc+len(text1)), float64(len(text))))]
|
||||
} else {
|
||||
text2 = text[startLoc:int(math.Min(float64(endLoc+dmp.MatchMaxBits), float64(len(text))))]
|
||||
}
|
||||
if text1 == text2 {
|
||||
// Perfect match, just shove the Replacement text in.
|
||||
text = text[:startLoc] + dmp.DiffText2(aPatch.diffs) + text[startLoc+len(text1):]
|
||||
} else {
|
||||
// Imperfect match. Run a diff to get a framework of equivalent indices.
|
||||
diffs := dmp.DiffMain(text1, text2, false)
|
||||
if len(text1) > dmp.MatchMaxBits && float64(dmp.DiffLevenshtein(diffs))/float64(len(text1)) > dmp.PatchDeleteThreshold {
|
||||
// The end points match, but the content is unacceptably bad.
|
||||
results[x] = false
|
||||
} else {
|
||||
diffs = dmp.DiffCleanupSemanticLossless(diffs)
|
||||
index1 := 0
|
||||
for _, aDiff := range aPatch.diffs {
|
||||
if aDiff.Type != DiffEqual {
|
||||
index2 := dmp.DiffXIndex(diffs, index1)
|
||||
if aDiff.Type == DiffInsert {
|
||||
// Insertion
|
||||
text = text[:startLoc+index2] + aDiff.Text + text[startLoc+index2:]
|
||||
} else if aDiff.Type == DiffDelete {
|
||||
// Deletion
|
||||
startIndex := startLoc + index2
|
||||
text = text[:startIndex] +
|
||||
text[startIndex+dmp.DiffXIndex(diffs, index1+len(aDiff.Text))-index2:]
|
||||
}
|
||||
}
|
||||
if aDiff.Type != DiffDelete {
|
||||
index1 += len(aDiff.Text)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
x++
|
||||
}
|
||||
// Strip the padding off.
|
||||
text = text[len(nullPadding) : len(nullPadding)+(len(text)-2*len(nullPadding))]
|
||||
return text, results
|
||||
}
|
||||
|
||||
// PatchAddPadding adds some padding on text start and end so that edges can match something.
|
||||
// Intended to be called only from within patchApply.
|
||||
func (dmp *DiffMatchPatch) PatchAddPadding(patches []Patch) string {
|
||||
paddingLength := dmp.PatchMargin
|
||||
nullPadding := ""
|
||||
for x := 1; x <= paddingLength; x++ {
|
||||
nullPadding += string(x)
|
||||
}
|
||||
|
||||
// Bump all the patches forward.
|
||||
for i := range patches {
|
||||
patches[i].Start1 += paddingLength
|
||||
patches[i].Start2 += paddingLength
|
||||
}
|
||||
|
||||
// Add some padding on start of first diff.
|
||||
if len(patches[0].diffs) == 0 || patches[0].diffs[0].Type != DiffEqual {
|
||||
// Add nullPadding equality.
|
||||
patches[0].diffs = append([]Diff{Diff{DiffEqual, nullPadding}}, patches[0].diffs...)
|
||||
patches[0].Start1 -= paddingLength // Should be 0.
|
||||
patches[0].Start2 -= paddingLength // Should be 0.
|
||||
patches[0].Length1 += paddingLength
|
||||
patches[0].Length2 += paddingLength
|
||||
} else if paddingLength > len(patches[0].diffs[0].Text) {
|
||||
// Grow first equality.
|
||||
extraLength := paddingLength - len(patches[0].diffs[0].Text)
|
||||
patches[0].diffs[0].Text = nullPadding[len(patches[0].diffs[0].Text):] + patches[0].diffs[0].Text
|
||||
patches[0].Start1 -= extraLength
|
||||
patches[0].Start2 -= extraLength
|
||||
patches[0].Length1 += extraLength
|
||||
patches[0].Length2 += extraLength
|
||||
}
|
||||
|
||||
// Add some padding on end of last diff.
|
||||
last := len(patches) - 1
|
||||
if len(patches[last].diffs) == 0 || patches[last].diffs[len(patches[last].diffs)-1].Type != DiffEqual {
|
||||
// Add nullPadding equality.
|
||||
patches[last].diffs = append(patches[last].diffs, Diff{DiffEqual, nullPadding})
|
||||
patches[last].Length1 += paddingLength
|
||||
patches[last].Length2 += paddingLength
|
||||
} else if paddingLength > len(patches[last].diffs[len(patches[last].diffs)-1].Text) {
|
||||
// Grow last equality.
|
||||
lastDiff := patches[last].diffs[len(patches[last].diffs)-1]
|
||||
extraLength := paddingLength - len(lastDiff.Text)
|
||||
patches[last].diffs[len(patches[last].diffs)-1].Text += nullPadding[:extraLength]
|
||||
patches[last].Length1 += extraLength
|
||||
patches[last].Length2 += extraLength
|
||||
}
|
||||
|
||||
return nullPadding
|
||||
}
|
||||
|
||||
// PatchSplitMax looks through the patches and breaks up any which are longer than the maximum limit of the match algorithm.
|
||||
// Intended to be called only from within patchApply.
|
||||
func (dmp *DiffMatchPatch) PatchSplitMax(patches []Patch) []Patch {
|
||||
patchSize := dmp.MatchMaxBits
|
||||
for x := 0; x < len(patches); x++ {
|
||||
if patches[x].Length1 <= patchSize {
|
||||
continue
|
||||
}
|
||||
bigpatch := patches[x]
|
||||
// Remove the big old patch.
|
||||
patches = append(patches[:x], patches[x+1:]...)
|
||||
x--
|
||||
|
||||
Start1 := bigpatch.Start1
|
||||
Start2 := bigpatch.Start2
|
||||
precontext := ""
|
||||
for len(bigpatch.diffs) != 0 {
|
||||
// Create one of several smaller patches.
|
||||
patch := Patch{}
|
||||
empty := true
|
||||
patch.Start1 = Start1 - len(precontext)
|
||||
patch.Start2 = Start2 - len(precontext)
|
||||
if len(precontext) != 0 {
|
||||
patch.Length1 = len(precontext)
|
||||
patch.Length2 = len(precontext)
|
||||
patch.diffs = append(patch.diffs, Diff{DiffEqual, precontext})
|
||||
}
|
||||
for len(bigpatch.diffs) != 0 && patch.Length1 < patchSize-dmp.PatchMargin {
|
||||
diffType := bigpatch.diffs[0].Type
|
||||
diffText := bigpatch.diffs[0].Text
|
||||
if diffType == DiffInsert {
|
||||
// Insertions are harmless.
|
||||
patch.Length2 += len(diffText)
|
||||
Start2 += len(diffText)
|
||||
patch.diffs = append(patch.diffs, bigpatch.diffs[0])
|
||||
bigpatch.diffs = bigpatch.diffs[1:]
|
||||
empty = false
|
||||
} else if diffType == DiffDelete && len(patch.diffs) == 1 && patch.diffs[0].Type == DiffEqual && len(diffText) > 2*patchSize {
|
||||
// This is a large deletion. Let it pass in one chunk.
|
||||
patch.Length1 += len(diffText)
|
||||
Start1 += len(diffText)
|
||||
empty = false
|
||||
patch.diffs = append(patch.diffs, Diff{diffType, diffText})
|
||||
bigpatch.diffs = bigpatch.diffs[1:]
|
||||
} else {
|
||||
// Deletion or equality. Only take as much as we can stomach.
|
||||
diffText = diffText[:min(len(diffText), patchSize-patch.Length1-dmp.PatchMargin)]
|
||||
|
||||
patch.Length1 += len(diffText)
|
||||
Start1 += len(diffText)
|
||||
if diffType == DiffEqual {
|
||||
patch.Length2 += len(diffText)
|
||||
Start2 += len(diffText)
|
||||
} else {
|
||||
empty = false
|
||||
}
|
||||
patch.diffs = append(patch.diffs, Diff{diffType, diffText})
|
||||
if diffText == bigpatch.diffs[0].Text {
|
||||
bigpatch.diffs = bigpatch.diffs[1:]
|
||||
} else {
|
||||
bigpatch.diffs[0].Text =
|
||||
bigpatch.diffs[0].Text[len(diffText):]
|
||||
}
|
||||
}
|
||||
}
|
||||
// Compute the head context for the next patch.
|
||||
precontext = dmp.DiffText2(patch.diffs)
|
||||
precontext = precontext[max(0, len(precontext)-dmp.PatchMargin):]
|
||||
|
||||
postcontext := ""
|
||||
// Append the end context for this patch.
|
||||
if len(dmp.DiffText1(bigpatch.diffs)) > dmp.PatchMargin {
|
||||
postcontext = dmp.DiffText1(bigpatch.diffs)[:dmp.PatchMargin]
|
||||
} else {
|
||||
postcontext = dmp.DiffText1(bigpatch.diffs)
|
||||
}
|
||||
|
||||
if len(postcontext) != 0 {
|
||||
patch.Length1 += len(postcontext)
|
||||
patch.Length2 += len(postcontext)
|
||||
if len(patch.diffs) != 0 && patch.diffs[len(patch.diffs)-1].Type == DiffEqual {
|
||||
patch.diffs[len(patch.diffs)-1].Text += postcontext
|
||||
} else {
|
||||
patch.diffs = append(patch.diffs, Diff{DiffEqual, postcontext})
|
||||
}
|
||||
}
|
||||
if !empty {
|
||||
x++
|
||||
patches = append(patches[:x], append([]Patch{patch}, patches[x:]...)...)
|
||||
}
|
||||
}
|
||||
}
|
||||
return patches
|
||||
}
|
||||
|
||||
// PatchToText takes a list of patches and returns a textual representation.
|
||||
func (dmp *DiffMatchPatch) PatchToText(patches []Patch) string {
|
||||
var text bytes.Buffer
|
||||
for _, aPatch := range patches {
|
||||
_, _ = text.WriteString(aPatch.String())
|
||||
}
|
||||
return text.String()
|
||||
}
|
||||
|
||||
// PatchFromText parses a textual representation of patches and returns a List of Patch objects.
|
||||
func (dmp *DiffMatchPatch) PatchFromText(textline string) ([]Patch, error) {
|
||||
patches := []Patch{}
|
||||
if len(textline) == 0 {
|
||||
return patches, nil
|
||||
}
|
||||
text := strings.Split(textline, "\n")
|
||||
textPointer := 0
|
||||
patchHeader := regexp.MustCompile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$")
|
||||
|
||||
var patch Patch
|
||||
var sign uint8
|
||||
var line string
|
||||
for textPointer < len(text) {
|
||||
|
||||
if !patchHeader.MatchString(text[textPointer]) {
|
||||
return patches, errors.New("Invalid patch string: " + text[textPointer])
|
||||
}
|
||||
|
||||
patch = Patch{}
|
||||
m := patchHeader.FindStringSubmatch(text[textPointer])
|
||||
|
||||
patch.Start1, _ = strconv.Atoi(m[1])
|
||||
if len(m[2]) == 0 {
|
||||
patch.Start1--
|
||||
patch.Length1 = 1
|
||||
} else if m[2] == "0" {
|
||||
patch.Length1 = 0
|
||||
} else {
|
||||
patch.Start1--
|
||||
patch.Length1, _ = strconv.Atoi(m[2])
|
||||
}
|
||||
|
||||
patch.Start2, _ = strconv.Atoi(m[3])
|
||||
|
||||
if len(m[4]) == 0 {
|
||||
patch.Start2--
|
||||
patch.Length2 = 1
|
||||
} else if m[4] == "0" {
|
||||
patch.Length2 = 0
|
||||
} else {
|
||||
patch.Start2--
|
||||
patch.Length2, _ = strconv.Atoi(m[4])
|
||||
}
|
||||
textPointer++
|
||||
|
||||
for textPointer < len(text) {
|
||||
if len(text[textPointer]) > 0 {
|
||||
sign = text[textPointer][0]
|
||||
} else {
|
||||
textPointer++
|
||||
continue
|
||||
}
|
||||
|
||||
line = text[textPointer][1:]
|
||||
line = strings.Replace(line, "+", "%2b", -1)
|
||||
line, _ = url.QueryUnescape(line)
|
||||
if sign == '-' {
|
||||
// Deletion.
|
||||
patch.diffs = append(patch.diffs, Diff{DiffDelete, line})
|
||||
} else if sign == '+' {
|
||||
// Insertion.
|
||||
patch.diffs = append(patch.diffs, Diff{DiffInsert, line})
|
||||
} else if sign == ' ' {
|
||||
// Minor equality.
|
||||
patch.diffs = append(patch.diffs, Diff{DiffEqual, line})
|
||||
} else if sign == '@' {
|
||||
// Start of next patch.
|
||||
break
|
||||
} else {
|
||||
// WTF?
|
||||
return patches, errors.New("Invalid patch mode '" + string(sign) + "' in: " + string(line))
|
||||
}
|
||||
textPointer++
|
||||
}
|
||||
|
||||
patches = append(patches, patch)
|
||||
}
|
||||
return patches, nil
|
||||
}
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
|
||||
// https://github.com/sergi/go-diff
|
||||
// See the included LICENSE file for license details.
|
||||
//
|
||||
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
|
||||
// Original library is Copyright (c) 2006 Google Inc.
|
||||
// http://code.google.com/p/google-diff-match-patch/
|
||||
|
||||
package diffmatchpatch
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// unescaper unescapes selected chars for compatibility with JavaScript's encodeURI.
|
||||
// In speed critical applications this could be dropped since the receiving application will certainly decode these fine. Note that this function is case-sensitive. Thus "%3F" would not be unescaped. But this is ok because it is only called with the output of HttpUtility.UrlEncode which returns lowercase hex. Example: "%3f" -> "?", "%24" -> "$", etc.
|
||||
var unescaper = strings.NewReplacer(
|
||||
"%21", "!", "%7E", "~", "%27", "'",
|
||||
"%28", "(", "%29", ")", "%3B", ";",
|
||||
"%2F", "/", "%3F", "?", "%3A", ":",
|
||||
"%40", "@", "%26", "&", "%3D", "=",
|
||||
"%2B", "+", "%24", "$", "%2C", ",", "%23", "#", "%2A", "*")
|
||||
|
||||
// indexOf returns the first index of pattern in str, starting at str[i].
|
||||
func indexOf(str string, pattern string, i int) int {
|
||||
if i > len(str)-1 {
|
||||
return -1
|
||||
}
|
||||
if i <= 0 {
|
||||
return strings.Index(str, pattern)
|
||||
}
|
||||
ind := strings.Index(str[i:], pattern)
|
||||
if ind == -1 {
|
||||
return -1
|
||||
}
|
||||
return ind + i
|
||||
}
|
||||
|
||||
// lastIndexOf returns the last index of pattern in str, starting at str[i].
|
||||
func lastIndexOf(str string, pattern string, i int) int {
|
||||
if i < 0 {
|
||||
return -1
|
||||
}
|
||||
if i >= len(str) {
|
||||
return strings.LastIndex(str, pattern)
|
||||
}
|
||||
_, size := utf8.DecodeRuneInString(str[i:])
|
||||
return strings.LastIndex(str[:i+size], pattern)
|
||||
}
|
||||
|
||||
// runesIndexOf returns the index of pattern in target, starting at target[i].
|
||||
func runesIndexOf(target, pattern []rune, i int) int {
|
||||
if i > len(target)-1 {
|
||||
return -1
|
||||
}
|
||||
if i <= 0 {
|
||||
return runesIndex(target, pattern)
|
||||
}
|
||||
ind := runesIndex(target[i:], pattern)
|
||||
if ind == -1 {
|
||||
return -1
|
||||
}
|
||||
return ind + i
|
||||
}
|
||||
|
||||
func runesEqual(r1, r2 []rune) bool {
|
||||
if len(r1) != len(r2) {
|
||||
return false
|
||||
}
|
||||
for i, c := range r1 {
|
||||
if c != r2[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// runesIndex is the equivalent of strings.Index for rune slices.
|
||||
func runesIndex(r1, r2 []rune) int {
|
||||
last := len(r1) - len(r2)
|
||||
for i := 0; i <= last; i++ {
|
||||
if runesEqual(r1[i:i+len(r2)], r2) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
Loading…
Reference in New Issue