Merge pull request #3493 from cblecker/gomod
Switch over from dep to go modules
This commit is contained in:
commit
7f6b0c35f1
|
@ -1,24 +0,0 @@
|
|||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/client9/misspell"
|
||||
packages = [
|
||||
".",
|
||||
"cmd/misspell"
|
||||
]
|
||||
revision = "b90dc15cfd220ecf8bbc9043ecb928cef381f011"
|
||||
version = "v0.3.4"
|
||||
|
||||
[[projects]]
|
||||
name = "gopkg.in/yaml.v2"
|
||||
packages = ["."]
|
||||
revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183"
|
||||
version = "v2.2.1"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "15876bcf5bae26a3d8ca6af5a0463be3842912808256ce8818c2f34e0b3859fd"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
|
@ -1,6 +0,0 @@
|
|||
required = ["github.com/client9/misspell/cmd/misspell"]
|
||||
|
||||
[prune]
|
||||
go-tests = true
|
||||
unused-packages = true
|
||||
non-go = true
|
|
@ -0,0 +1,8 @@
|
|||
module k8s.io/community
|
||||
|
||||
go 1.12
|
||||
|
||||
require (
|
||||
github.com/client9/misspell v0.3.4
|
||||
gopkg.in/yaml.v2 v2.2.2
|
||||
)
|
|
@ -0,0 +1,6 @@
|
|||
github.com/client9/misspell v0.3.4 h1:ta993UF76GwbvJcIo3Y68y/M3WxlpEHPWIGDkJYwzJI=
|
||||
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
|
@ -0,0 +1,22 @@
|
|||
// +build tools
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package tools
|
||||
|
||||
import (
|
||||
_ "github.com/client9/misspell/cmd/misspell"
|
||||
)
|
|
@ -0,0 +1,34 @@
|
|||
dist/
|
||||
bin/
|
||||
vendor/
|
||||
|
||||
# editor turds
|
||||
*~
|
||||
*.gz
|
||||
*.bz2
|
||||
*.csv
|
||||
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
|
@ -0,0 +1,20 @@
|
|||
sudo: required
|
||||
dist: trusty
|
||||
group: edge
|
||||
language: go
|
||||
go:
|
||||
- "1.10"
|
||||
git:
|
||||
depth: 1
|
||||
|
||||
script:
|
||||
- ./scripts/travis.sh
|
||||
|
||||
# calls goreleaser when a new tag is pushed
|
||||
deploy:
|
||||
- provider: script
|
||||
skip_cleanup: true
|
||||
script: curl -sL http://git.io/goreleaser | bash
|
||||
on:
|
||||
tags: true
|
||||
condition: $TRAVIS_OS_NAME = linux
|
|
@ -0,0 +1,37 @@
|
|||
FROM golang:1.10.0-alpine
|
||||
|
||||
# cache buster
|
||||
RUN echo 4
|
||||
|
||||
# git is needed for "go get" below
|
||||
RUN apk add --no-cache git make
|
||||
|
||||
# these are my standard testing / linting tools
|
||||
RUN /bin/true \
|
||||
&& go get -u github.com/golang/dep/cmd/dep \
|
||||
&& go get -u github.com/alecthomas/gometalinter \
|
||||
&& gometalinter --install \
|
||||
&& rm -rf /go/src /go/pkg
|
||||
#
|
||||
# * SCOWL word list
|
||||
#
|
||||
# Downloads
|
||||
# http://wordlist.aspell.net/dicts/
|
||||
# --> http://app.aspell.net/create
|
||||
#
|
||||
|
||||
# use en_US large size
|
||||
# use regular size for others
|
||||
ENV SOURCE_US_BIG http://app.aspell.net/create?max_size=70&spelling=US&max_variant=2&diacritic=both&special=hacker&special=roman-numerals&download=wordlist&encoding=utf-8&format=inline
|
||||
|
||||
# should be able tell difference between English variations using this
|
||||
ENV SOURCE_US http://app.aspell.net/create?max_size=60&spelling=US&max_variant=1&diacritic=both&download=wordlist&encoding=utf-8&format=inline
|
||||
ENV SOURCE_GB_ISE http://app.aspell.net/create?max_size=60&spelling=GBs&max_variant=2&diacritic=both&download=wordlist&encoding=utf-8&format=inline
|
||||
ENV SOURCE_GB_IZE http://app.aspell.net/create?max_size=60&spelling=GBz&max_variant=2&diacritic=both&download=wordlist&encoding=utf-8&format=inline
|
||||
ENV SOURCE_CA http://app.aspell.net/create?max_size=60&spelling=CA&max_variant=2&diacritic=both&download=wordlist&encoding=utf-8&format=inline
|
||||
|
||||
RUN /bin/true \
|
||||
&& mkdir /scowl-wl \
|
||||
&& wget -O /scowl-wl/words-US-60.txt ${SOURCE_US} \
|
||||
&& wget -O /scowl-wl/words-GB-ise-60.txt ${SOURCE_GB_ISE}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/gobwas/glob"
|
||||
packages = [
|
||||
".",
|
||||
"compiler",
|
||||
"match",
|
||||
"syntax",
|
||||
"syntax/ast",
|
||||
"syntax/lexer",
|
||||
"util/runes",
|
||||
"util/strings"
|
||||
]
|
||||
revision = "5ccd90ef52e1e632236f7326478d4faa74f99438"
|
||||
version = "v0.2.3"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "087ea4c49358ea8258ad9edfe514cd5ce9975c889c258e5ec7b5d2b720aae113"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
|
@ -0,0 +1,34 @@
|
|||
# Gopkg.toml example
|
||||
#
|
||||
# Refer to https://golang.github.io/dep/docs/Gopkg.toml.html
|
||||
# for detailed Gopkg.toml documentation.
|
||||
#
|
||||
# required = ["github.com/user/thing/cmd/thing"]
|
||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project"
|
||||
# version = "1.0.0"
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project2"
|
||||
# branch = "dev"
|
||||
# source = "github.com/myfork/project2"
|
||||
#
|
||||
# [[override]]
|
||||
# name = "github.com/x/y"
|
||||
# version = "2.4.0"
|
||||
#
|
||||
# [prune]
|
||||
# non-go = false
|
||||
# go-tests = true
|
||||
# unused-packages = true
|
||||
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/gobwas/glob"
|
||||
version = "0.2.3"
|
||||
|
||||
[prune]
|
||||
go-tests = true
|
||||
unused-packages = true
|
|
@ -0,0 +1,74 @@
|
|||
CONTAINER=nickg/misspell
|
||||
|
||||
install: ## install misspell into GOPATH/bin
|
||||
go install ./cmd/misspell
|
||||
|
||||
build: hooks ## build and lint misspell
|
||||
./scripts/build.sh
|
||||
|
||||
test: ## run all tests
|
||||
go test .
|
||||
|
||||
# real publishing is done only by travis
|
||||
publish: ## test goreleaser
|
||||
./scripts/goreleaser-dryrun.sh
|
||||
|
||||
# the grep in line 2 is to remove misspellings in the spelling dictionary
|
||||
# that trigger false positives!!
|
||||
falsepositives: /scowl-wl
|
||||
cat /scowl-wl/words-US-60.txt | \
|
||||
grep -i -v -E "payed|Tyre|Euclidian|nonoccurence|dependancy|reenforced|accidently|surprize|dependance|idealogy|binominal|causalities|conquerer|withing|casette|analyse|analogue|dialogue|paralyse|catalogue|archaeolog|clarinettist|catalyses|cancell|chisell|ageing|cataloguing" | \
|
||||
misspell -debug -error
|
||||
cat /scowl-wl/words-GB-ise-60.txt | \
|
||||
grep -v -E "payed|nonoccurence|withing" | \
|
||||
misspell -locale=UK -debug -error
|
||||
# cat /scowl-wl/words-GB-ize-60.txt | \
|
||||
# grep -v -E "withing" | \
|
||||
# misspell -debug -error
|
||||
# cat /scowl-wl/words-CA-60.txt | \
|
||||
# grep -v -E "withing" | \
|
||||
# misspell -debug -error
|
||||
|
||||
bench: ## run benchmarks
|
||||
go test -bench '.*'
|
||||
|
||||
clean: ## clean up time
|
||||
rm -rf dist/ bin/
|
||||
go clean ./...
|
||||
git gc --aggressive
|
||||
|
||||
ci: ## run test like travis-ci does, requires docker
|
||||
docker run --rm \
|
||||
-v $(PWD):/go/src/github.com/client9/misspell \
|
||||
-w /go/src/github.com/client9/misspell \
|
||||
${CONTAINER} \
|
||||
make build falsepositives
|
||||
|
||||
docker-build: ## build a docker test image
|
||||
docker build -t ${CONTAINER} .
|
||||
|
||||
docker-pull: ## pull latest test image
|
||||
docker pull ${CONTAINER}
|
||||
|
||||
docker-console: ## log into the test image
|
||||
docker run --rm -it \
|
||||
-v $(PWD):/go/src/github.com/client9/misspell \
|
||||
-w /go/src/github.com/client9/misspell \
|
||||
${CONTAINER} sh
|
||||
|
||||
.git/hooks/pre-commit: scripts/pre-commit.sh
|
||||
cp -f scripts/pre-commit.sh .git/hooks/pre-commit
|
||||
.git/hooks/commit-msg: scripts/commit-msg.sh
|
||||
cp -f scripts/commit-msg.sh .git/hooks/commit-msg
|
||||
hooks: .git/hooks/pre-commit .git/hooks/commit-msg ## install git precommit hooks
|
||||
|
||||
.PHONY: help ci console docker-build bench
|
||||
|
||||
# https://www.client9.com/self-documenting-makefiles/
|
||||
help:
|
||||
@awk -F ':|##' '/^[^\t].+?:.*?##/ {\
|
||||
printf "\033[36m%-30s\033[0m %s\n", $$1, $$NF \
|
||||
}' $(MAKEFILE_LIST)
|
||||
.DEFAULT_GOAL=help
|
||||
.PHONY=help
|
||||
|
|
@ -0,0 +1,424 @@
|
|||
[](https://travis-ci.org/client9/misspell) [](https://goreportcard.com/report/github.com/client9/misspell) [](https://godoc.org/github.com/client9/misspell) [](http://gocover.io/github.com/client9/misspell) [](https://raw.githubusercontent.com/client9/misspell/master/LICENSE)
|
||||
|
||||
Correct commonly misspelled English words... quickly.
|
||||
|
||||
### Install
|
||||
|
||||
|
||||
If you just want a binary and to start using `misspell`:
|
||||
|
||||
```
|
||||
curl -L -o ./install-misspell.sh https://git.io/misspell
|
||||
sh ./install-misspell.sh
|
||||
```
|
||||
|
||||
|
||||
Both will install as `./bin/misspell`. You can adjust the download location using the `-b` flag. File a ticket if you want another platform supported.
|
||||
|
||||
|
||||
If you use [Go](https://golang.org/), the best way to run `misspell` is by using [gometalinter](#gometalinter). Otherwise, install `misspell` the old-fashioned way:
|
||||
|
||||
```
|
||||
go get -u github.com/client9/misspell/cmd/misspell
|
||||
```
|
||||
|
||||
and misspell will be in your `GOPATH`
|
||||
|
||||
|
||||
Also if you like to live dangerously, one could do
|
||||
|
||||
```bash
|
||||
curl -L https://git.io/misspell | bash
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
|
||||
```bash
|
||||
$ misspell all.html your.txt important.md files.go
|
||||
your.txt:42:10 found "langauge" a misspelling of "language"
|
||||
|
||||
# ^ file, line, column
|
||||
```
|
||||
|
||||
```
|
||||
$ misspell -help
|
||||
Usage of misspell:
|
||||
-debug
|
||||
Debug matching, very slow
|
||||
-error
|
||||
Exit with 2 if misspelling found
|
||||
-f string
|
||||
'csv', 'sqlite3' or custom Golang template for output
|
||||
-i string
|
||||
ignore the following corrections, comma separated
|
||||
-j int
|
||||
Number of workers, 0 = number of CPUs
|
||||
-legal
|
||||
Show legal information and exit
|
||||
-locale string
|
||||
Correct spellings using locale perferances for US or UK. Default is to use a neutral variety of English. Setting locale to US will correct the British spelling of 'colour' to 'color'
|
||||
-o string
|
||||
output file or [stderr|stdout|] (default "stdout")
|
||||
-q Do not emit misspelling output
|
||||
-source string
|
||||
Source mode: auto=guess, go=golang source, text=plain or markdown-like text (default "auto")
|
||||
-w Overwrite file with corrections (default is just to display)
|
||||
```
|
||||
|
||||
## FAQ
|
||||
|
||||
* [Automatic Corrections](#correct)
|
||||
* [Converting UK spellings to US](#locale)
|
||||
* [Using pipes and stdin](#stdin)
|
||||
* [Golang special support](#golang)
|
||||
* [gometalinter support](#gometalinter)
|
||||
* [CSV Output](#csv)
|
||||
* [Using SQLite3](#sqlite)
|
||||
* [Changing output format](#output)
|
||||
* [Checking a folder recursively](#recursive)
|
||||
* [Performance](#performance)
|
||||
* [Known Issues](#issues)
|
||||
* [Debugging](#debug)
|
||||
* [False Negatives and missing words](#missing)
|
||||
* [Origin of Word Lists](#words)
|
||||
* [Software License](#license)
|
||||
* [Problem statement](#problem)
|
||||
* [Other spelling correctors](#others)
|
||||
* [Other ideas](#otherideas)
|
||||
|
||||
<a name="correct"></a>
|
||||
### How can I make the corrections automatically?
|
||||
|
||||
Just add the `-w` flag!
|
||||
|
||||
```
|
||||
$ misspell -w all.html your.txt important.md files.go
|
||||
your.txt:9:21:corrected "langauge" to "language"
|
||||
|
||||
# ^ File is rewritten only if a misspelling is found
|
||||
```
|
||||
|
||||
<a name="locale"></a>
|
||||
### How do I convert British spellings to American (or vice-versa)?
|
||||
|
||||
Add the `-locale US` flag!
|
||||
|
||||
```bash
|
||||
$ misspell -locale US important.txt
|
||||
important.txt:10:20 found "colour" a misspelling of "color"
|
||||
```
|
||||
|
||||
Add the `-locale UK` flag!
|
||||
|
||||
```bash
|
||||
$ echo "My favorite color is blue" | misspell -locale UK
|
||||
stdin:1:3:found "favorite color" a misspelling of "favourite colour"
|
||||
```
|
||||
|
||||
Help is appreciated as I'm neither British nor an
|
||||
expert in the English language.
|
||||
|
||||
<a name="recursive"></a>
|
||||
### How do you check an entire folder recursively?
|
||||
|
||||
Just list a directory you'd like to check
|
||||
|
||||
```bash
|
||||
misspell .
|
||||
misspell aDirectory anotherDirectory aFile
|
||||
```
|
||||
|
||||
You can also run misspell recursively using the following shell tricks:
|
||||
|
||||
```bash
|
||||
misspell directory/**/*
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
find . -type f | xargs misspell
|
||||
```
|
||||
|
||||
You can select a type of file as well. The following examples selects all `.txt` files that are *not* in the `vendor` directory:
|
||||
|
||||
```bash
|
||||
find . -type f -name '*.txt' | grep -v vendor/ | xargs misspell -error
|
||||
```
|
||||
|
||||
<a name="stdin"></a>
|
||||
### Can I use pipes or `stdin` for input?
|
||||
|
||||
Yes!
|
||||
|
||||
Print messages to `stderr` only:
|
||||
|
||||
```bash
|
||||
$ echo "zeebra" | misspell
|
||||
stdin:1:0:found "zeebra" a misspelling of "zebra"
|
||||
```
|
||||
|
||||
Print messages to `stderr`, and corrected text to `stdout`:
|
||||
|
||||
```bash
|
||||
$ echo "zeebra" | misspell -w
|
||||
stdin:1:0:corrected "zeebra" to "zebra"
|
||||
zebra
|
||||
```
|
||||
|
||||
Only print the corrected text to `stdout`:
|
||||
|
||||
```bash
|
||||
$ echo "zeebra" | misspell -w -q
|
||||
zebra
|
||||
```
|
||||
|
||||
<a name="golang"></a>
|
||||
### Are there special rules for golang source files?
|
||||
|
||||
Yes! If the file ends in `.go`, then misspell will only check spelling in
|
||||
comments.
|
||||
|
||||
If you want to force a file to be checked as a golang source, use `-source=go`
|
||||
on the command line. Conversely, you can check a golang source as if it were
|
||||
pure text by using `-source=text`. You might want to do this since many
|
||||
variable names have misspellings in them!
|
||||
|
||||
### Can I check only-comments in other other programming languages?
|
||||
|
||||
I'm told the using `-source=go` works well for ruby, javascript, java, c and
|
||||
c++.
|
||||
|
||||
It doesn't work well for python and bash.
|
||||
|
||||
<a name="gometalinter"></a>
|
||||
### Does this work with gometalinter?
|
||||
|
||||
[gometalinter](https://github.com/alecthomas/gometalinter) runs
|
||||
multiple golang linters. Starting on [2016-06-12](https://github.com/alecthomas/gometalinter/pull/134)
|
||||
gometalinter supports `misspell` natively but it is disabled by default.
|
||||
|
||||
```bash
|
||||
# update your copy of gometalinter
|
||||
go get -u github.com/alecthomas/gometalinter
|
||||
|
||||
# install updates and misspell
|
||||
gometalinter --install --update
|
||||
```
|
||||
|
||||
To use, just enable `misspell`
|
||||
|
||||
```
|
||||
gometalinter --enable misspell ./...
|
||||
```
|
||||
|
||||
Note that gometalinter only checks golang files, and uses the default options
|
||||
of `misspell`
|
||||
|
||||
You may wish to run this on your plaintext (.txt) and/or markdown files too.
|
||||
|
||||
|
||||
<a name="csv"></a>
|
||||
### How Can I Get CSV Output?
|
||||
|
||||
Using `-f csv`, the output is standard comma-seprated values with headers in the first row.
|
||||
|
||||
```
|
||||
misspell -f csv *
|
||||
file,line,column,typo,corrected
|
||||
"README.md",9,22,langauge,language
|
||||
"README.md",47,25,langauge,language
|
||||
```
|
||||
|
||||
<a name="sqlite"></a>
|
||||
### How can I export to SQLite3?
|
||||
|
||||
Using `-f sqlite`, the output is a [sqlite3](https://www.sqlite.org/index.html) dump-file.
|
||||
|
||||
```bash
|
||||
$ misspell -f sqlite * > /tmp/misspell.sql
|
||||
$ cat /tmp/misspell.sql
|
||||
|
||||
PRAGMA foreign_keys=OFF;
|
||||
BEGIN TRANSACTION;
|
||||
CREATE TABLE misspell(
|
||||
"file" TEXT,
|
||||
"line" INTEGER,i
|
||||
"column" INTEGER,i
|
||||
"typo" TEXT,
|
||||
"corrected" TEXT
|
||||
);
|
||||
INSERT INTO misspell VALUES("install.txt",202,31,"immediatly","immediately");
|
||||
# etc...
|
||||
COMMIT;
|
||||
```
|
||||
|
||||
```bash
|
||||
$ sqlite3 -init /tmp/misspell.sql :memory: 'select count(*) from misspell'
|
||||
1
|
||||
```
|
||||
|
||||
With some tricks you can directly pipe output to sqlite3 by using `-init /dev/stdin`:
|
||||
|
||||
```
|
||||
misspell -f sqlite * | sqlite3 -init /dev/stdin -column -cmd '.width 60 15' ':memory' \
|
||||
'select substr(file,35),typo,count(*) as count from misspell group by file, typo order by count desc;'
|
||||
```
|
||||
|
||||
<a name="ignore"></a>
|
||||
### How can I ignore rules?
|
||||
|
||||
Using the `-i "comma,separated,rules"` flag you can specify corrections to ignore.
|
||||
|
||||
For example, if you were to run `misspell -w -error -source=text` against document that contains the string `Guy Finkelshteyn Braswell`, misspell would change the text to `Guy Finkelstheyn Bras well`. You can then
|
||||
determine the rules to ignore by reverting the change and running the with the `-debug` flag. You can then see
|
||||
that the corrections were `htey -> they` and `aswell -> as well`. To ignore these two rules, you add `-i "htey,aswell"` to
|
||||
your command. With debug mode on, you can see it print the corrections, but it will no longer make them.
|
||||
|
||||
<a name="output"></a>
|
||||
### How can I change the output format?
|
||||
|
||||
Using the `-f template` flag you can pass in a
|
||||
[golang text template](https://golang.org/pkg/text/template/) to format the output.
|
||||
|
||||
One can use `printf "%q" VALUE` to safely quote a value.
|
||||
|
||||
The default template is compatible with [gometalinter](https://github.com/alecthomas/gometalinter)
|
||||
```
|
||||
{{ .Filename }}:{{ .Line }}:{{ .Column }}:corrected {{ printf "%q" .Original }} to "{{ printf "%q" .Corrected }}"
|
||||
```
|
||||
|
||||
To just print probable misspellings:
|
||||
|
||||
```
|
||||
-f '{{ .Original }}'
|
||||
```
|
||||
|
||||
<a name="problem"></a>
|
||||
### What problem does this solve?
|
||||
|
||||
This corrects commonly misspelled English words in computer source
|
||||
code, and other text-based formats (`.txt`, `.md`, etc).
|
||||
|
||||
It is designed to run quickly so it can be
|
||||
used as a [pre-commit hook](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks)
|
||||
with minimal burden on the developer.
|
||||
|
||||
It does not work with binary formats (e.g. Word, etc).
|
||||
|
||||
It is not a complete spell-checking program nor a grammar checker.
|
||||
|
||||
<a name="others"></a>
|
||||
### What are other misspelling correctors and what's wrong with them?
|
||||
|
||||
Some other misspelling correctors:
|
||||
|
||||
* https://github.com/vlajos/misspell_fixer
|
||||
* https://github.com/lyda/misspell-check
|
||||
* https://github.com/lucasdemarchi/codespell
|
||||
|
||||
They all work but had problems that prevented me from using them at scale:
|
||||
|
||||
* slow, all of the above check one misspelling at a time (i.e. linear) using regexps
|
||||
* not MIT/Apache2 licensed (or equivalent)
|
||||
* have dependencies that don't work for me (python3, bash, linux sed, etc)
|
||||
* don't understand American vs. British English and sometimes makes unwelcome "corrections"
|
||||
|
||||
That said, they might be perfect for you and many have more features
|
||||
than this project!
|
||||
|
||||
<a name="performance"></a>
|
||||
### How fast is it?
|
||||
|
||||
Misspell is easily 100x to 1000x faster than other spelling correctors. You
|
||||
should be able to check and correct 1000 files in under 250ms.
|
||||
|
||||
This uses the mighty power of golang's
|
||||
[strings.Replacer](https://golang.org/pkg/strings/#Replacer) which is
|
||||
a implementation or variation of the
|
||||
[Aho–Corasick algorithm](https://en.wikipedia.org/wiki/Aho–Corasick_algorithm).
|
||||
This makes multiple substring matches *simultaneously*.
|
||||
|
||||
In addition this uses multiple CPU cores to work on multiple files.
|
||||
|
||||
<a name="issues"></a>
|
||||
### What problems does it have?
|
||||
|
||||
Unlike the other projects, this doesn't know what a "word" is. There may be
|
||||
more false positives and false negatives due to this. On the other hand, it
|
||||
sometimes catches things others don't.
|
||||
|
||||
Either way, please file bugs and we'll fix them!
|
||||
|
||||
Since it operates in parallel to make corrections, it can be non-obvious to
|
||||
determine exactly what word was corrected.
|
||||
|
||||
<a name="debug"></a>
|
||||
### It's making mistakes. How can I debug?
|
||||
|
||||
Run using `-debug` flag on the file you want. It should then print what word
|
||||
it is trying to correct. Then [file a
|
||||
bug](https://github.com/client9/misspell/issues) describing the problem.
|
||||
Thanks!
|
||||
|
||||
<a name="missing"></a>
|
||||
### Why is it making mistakes or missing items in golang files?
|
||||
|
||||
The matching function is *case-sensitive*, so variable names that are multiple
|
||||
worlds either in all-upper or all-lower case sometimes can cause false
|
||||
positives. For instance a variable named `bodyreader` could trigger a false
|
||||
positive since `yrea` is in the middle that could be corrected to `year`.
|
||||
Other problems happen if the variable name uses a English contraction that
|
||||
should use an apostrophe. The best way of fixing this is to use the
|
||||
[Effective Go naming
|
||||
conventions](https://golang.org/doc/effective_go.html#mixed-caps) and use
|
||||
[camelCase](https://en.wikipedia.org/wiki/CamelCase) for variable names. You
|
||||
can check your code using [golint](https://github.com/golang/lint)
|
||||
|
||||
<a name="license"></a>
|
||||
### What license is this?
|
||||
|
||||
The main code is [MIT](https://github.com/client9/misspell/blob/master/LICENSE).
|
||||
|
||||
Misspell also makes uses of the Golang standard library and contains a modified version of Golang's [strings.Replacer](https://golang.org/pkg/strings/#Replacer)
|
||||
which are covered under a [BSD License](https://github.com/golang/go/blob/master/LICENSE). Type `misspell -legal` for more details or see [legal.go](https://github.com/client9/misspell/blob/master/legal.go)
|
||||
|
||||
<a name="words"></a>
|
||||
### Where do the word lists come from?
|
||||
|
||||
It started with a word list from
|
||||
[Wikipedia](https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines).
|
||||
Unfortunately, this list had to be highly edited as many of the words are
|
||||
obsolete or based from mistakes on mechanical typewriters (I'm guessing).
|
||||
|
||||
Additional words were added based on actually mistakes seen in
|
||||
the wild (meaning self-generated).
|
||||
|
||||
Variations of UK and US spellings are based on many sources including:
|
||||
|
||||
* http://www.tysto.com/uk-us-spelling-list.html (with heavy editing, many are incorrect)
|
||||
* http://www.oxforddictionaries.com/us/words/american-and-british-spelling-american (excellent site but incomplete)
|
||||
* Diffing US and UK [scowl dictionaries](http://wordlist.aspell.net)
|
||||
|
||||
American English is more accepting of spelling variations than is British
|
||||
English, so "what is American or not" is subject to opinion. Corrections and help welcome.
|
||||
|
||||
<a name="otherideas"></a>
|
||||
### What are some other enhancements that could be done?
|
||||
|
||||
Here's some ideas for enhancements:
|
||||
|
||||
*Capitalization of proper nouns* could be done (e.g. weekday and month names, country names, language names)
|
||||
|
||||
*Opinionated US spellings* US English has a number of words with alternate
|
||||
spellings. Think [adviser vs.
|
||||
advisor](http://grammarist.com/spelling/adviser-advisor/). While "advisor" is not wrong, the opinionated US
|
||||
locale would correct "advisor" to "adviser".
|
||||
|
||||
*Versioning* Some type of versioning is needed so reporting mistakes and errors is easier.
|
||||
|
||||
*Feedback* Mistakes would be sent to some server for agregation and feedback review.
|
||||
|
||||
*Contractions and Apostrophes* This would optionally correct "isnt" to
|
||||
"isn't", etc.
|
|
@ -0,0 +1,38 @@
|
|||
# Release HOWTO
|
||||
|
||||
since I forget.
|
||||
|
||||
|
||||
1. Review existing tags and pick new release number
|
||||
|
||||
```sh
|
||||
git tag
|
||||
```
|
||||
|
||||
2. Tag locally
|
||||
|
||||
```sh
|
||||
git tag -a v0.1.0 -m "First release"
|
||||
```
|
||||
|
||||
If things get screwed up, delete the tag with
|
||||
|
||||
```sh
|
||||
git tag -d v0.1.0
|
||||
```
|
||||
|
||||
3. Test goreleaser
|
||||
|
||||
TODO: how to install goreleaser
|
||||
|
||||
```sh
|
||||
./scripts/goreleaser-dryrun.sh
|
||||
```
|
||||
|
||||
4. Push
|
||||
|
||||
```bash
|
||||
git push origin v0.1.0
|
||||
```
|
||||
|
||||
5. Verify release and edit notes. See https://github.com/client9/misspell/releases
|
|
@ -0,0 +1,38 @@
|
|||
# goreleaser.yml
|
||||
# https://github.com/goreleaser/goreleaser
|
||||
|
||||
project_name: misspell
|
||||
|
||||
builds:
|
||||
-
|
||||
main: cmd/misspell/main.go
|
||||
binary: misspell
|
||||
ldflags: -s -w -X main.version={{.Version}}
|
||||
goos:
|
||||
- darwin
|
||||
- linux
|
||||
- windows
|
||||
goarch:
|
||||
- amd64
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
ignore:
|
||||
- goos: darwin
|
||||
goarch: 386
|
||||
- goos: windows
|
||||
goarch: 386
|
||||
|
||||
archive:
|
||||
name_template: "{{ .Binary }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
|
||||
replacements:
|
||||
amd64: 64bit
|
||||
386: 32bit
|
||||
darwin: mac
|
||||
files:
|
||||
- none*
|
||||
|
||||
checksum:
|
||||
name_template: "{{ .ProjectName }}_{{ .Version }}_checksums.txt"
|
||||
|
||||
snapshot:
|
||||
name_template: "SNAPSHOT-{{.Commit}}"
|
|
@ -0,0 +1,318 @@
|
|||
#!/bin/sh
|
||||
set -e
|
||||
# Code generated by godownloader. DO NOT EDIT.
|
||||
#
|
||||
|
||||
usage() {
|
||||
this=$1
|
||||
cat <<EOF
|
||||
$this: download go binaries for client9/misspell
|
||||
|
||||
Usage: $this [-b] bindir [version]
|
||||
-b sets bindir or installation directory, default "./bin"
|
||||
[version] is a version number from
|
||||
https://github.com/client9/misspell/releases
|
||||
If version is missing, then an attempt to find the latest will be found.
|
||||
|
||||
Generated by godownloader
|
||||
https://github.com/goreleaser/godownloader
|
||||
|
||||
EOF
|
||||
exit 2
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
#BINDIR is ./bin unless set be ENV
|
||||
# over-ridden by flag below
|
||||
|
||||
BINDIR=${BINDIR:-./bin}
|
||||
while getopts "b:h?" arg; do
|
||||
case "$arg" in
|
||||
b) BINDIR="$OPTARG" ;;
|
||||
h | \?) usage "$0" ;;
|
||||
esac
|
||||
done
|
||||
shift $((OPTIND - 1))
|
||||
VERSION=$1
|
||||
}
|
||||
# this function wraps all the destructive operations
|
||||
# if a curl|bash cuts off the end of the script due to
|
||||
# network, either nothing will happen or will syntax error
|
||||
# out preventing half-done work
|
||||
execute() {
|
||||
TMPDIR=$(mktmpdir)
|
||||
echo "$PREFIX: downloading ${TARBALL_URL}"
|
||||
http_download "${TMPDIR}/${TARBALL}" "${TARBALL_URL}"
|
||||
|
||||
echo "$PREFIX: verifying checksums"
|
||||
http_download "${TMPDIR}/${CHECKSUM}" "${CHECKSUM_URL}"
|
||||
hash_sha256_verify "${TMPDIR}/${TARBALL}" "${TMPDIR}/${CHECKSUM}"
|
||||
|
||||
(cd "${TMPDIR}" && untar "${TARBALL}")
|
||||
install -d "${BINDIR}"
|
||||
install "${TMPDIR}/${BINARY}" "${BINDIR}/"
|
||||
echo "$PREFIX: installed as ${BINDIR}/${BINARY}"
|
||||
}
|
||||
is_supported_platform() {
|
||||
platform=$1
|
||||
found=1
|
||||
case "$platform" in
|
||||
darwin/amd64) found=0 ;;
|
||||
linux/amd64) found=0 ;;
|
||||
esac
|
||||
case "$platform" in
|
||||
darwin/386) found=1 ;;
|
||||
esac
|
||||
return $found
|
||||
}
|
||||
check_platform() {
|
||||
if is_supported_platform "$PLATFORM"; then
|
||||
# optional logging goes here
|
||||
true
|
||||
else
|
||||
echo "${PREFIX}: platform $PLATFORM is not supported. Make sure this script is up-to-date and file request at https://github.com/${PREFIX}/issues/new"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
adjust_version() {
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "$PREFIX: checking GitHub for latest version"
|
||||
VERSION=$(github_last_release "$OWNER/$REPO")
|
||||
fi
|
||||
# if version starts with 'v', remove it
|
||||
VERSION=${VERSION#v}
|
||||
}
|
||||
adjust_format() {
|
||||
# change format (tar.gz or zip) based on ARCH
|
||||
true
|
||||
}
|
||||
adjust_os() {
|
||||
# adjust archive name based on OS
|
||||
case ${OS} in
|
||||
386) OS=32bit ;;
|
||||
amd64) OS=64bit ;;
|
||||
darwin) OS=mac ;;
|
||||
esac
|
||||
true
|
||||
}
|
||||
adjust_arch() {
|
||||
# adjust archive name based on ARCH
|
||||
case ${ARCH} in
|
||||
386) ARCH=32bit ;;
|
||||
amd64) ARCH=64bit ;;
|
||||
darwin) ARCH=mac ;;
|
||||
esac
|
||||
true
|
||||
}
|
||||
|
||||
cat /dev/null <<EOF
|
||||
------------------------------------------------------------------------
|
||||
https://github.com/client9/shlib - portable posix shell functions
|
||||
Public domain - http://unlicense.org
|
||||
https://github.com/client9/shlib/blob/master/LICENSE.md
|
||||
but credit (and pull requests) appreciated.
|
||||
------------------------------------------------------------------------
|
||||
EOF
|
||||
is_command() {
|
||||
command -v "$1" >/dev/null
|
||||
}
|
||||
uname_os() {
|
||||
os=$(uname -s | tr '[:upper:]' '[:lower:]')
|
||||
echo "$os"
|
||||
}
|
||||
uname_arch() {
|
||||
arch=$(uname -m)
|
||||
case $arch in
|
||||
x86_64) arch="amd64" ;;
|
||||
x86) arch="386" ;;
|
||||
i686) arch="386" ;;
|
||||
i386) arch="386" ;;
|
||||
aarch64) arch="arm64" ;;
|
||||
armv5*) arch="arm5" ;;
|
||||
armv6*) arch="arm6" ;;
|
||||
armv7*) arch="arm7" ;;
|
||||
esac
|
||||
echo ${arch}
|
||||
}
|
||||
uname_os_check() {
|
||||
os=$(uname_os)
|
||||
case "$os" in
|
||||
darwin) return 0 ;;
|
||||
dragonfly) return 0 ;;
|
||||
freebsd) return 0 ;;
|
||||
linux) return 0 ;;
|
||||
android) return 0 ;;
|
||||
nacl) return 0 ;;
|
||||
netbsd) return 0 ;;
|
||||
openbsd) return 0 ;;
|
||||
plan9) return 0 ;;
|
||||
solaris) return 0 ;;
|
||||
windows) return 0 ;;
|
||||
esac
|
||||
echo "$0: uname_os_check: internal error '$(uname -s)' got converted to '$os' which is not a GOOS value. Please file bug at https://github.com/client9/shlib"
|
||||
return 1
|
||||
}
|
||||
uname_arch_check() {
|
||||
arch=$(uname_arch)
|
||||
case "$arch" in
|
||||
386) return 0 ;;
|
||||
amd64) return 0 ;;
|
||||
arm64) return 0 ;;
|
||||
armv5) return 0 ;;
|
||||
armv6) return 0 ;;
|
||||
armv7) return 0 ;;
|
||||
ppc64) return 0 ;;
|
||||
ppc64le) return 0 ;;
|
||||
mips) return 0 ;;
|
||||
mipsle) return 0 ;;
|
||||
mips64) return 0 ;;
|
||||
mips64le) return 0 ;;
|
||||
s390x) return 0 ;;
|
||||
amd64p32) return 0 ;;
|
||||
esac
|
||||
echo "$0: uname_arch_check: internal error '$(uname -m)' got converted to '$arch' which is not a GOARCH value. Please file bug report at https://github.com/client9/shlib"
|
||||
return 1
|
||||
}
|
||||
untar() {
|
||||
tarball=$1
|
||||
case "${tarball}" in
|
||||
*.tar.gz | *.tgz) tar -xzf "${tarball}" ;;
|
||||
*.tar) tar -xf "${tarball}" ;;
|
||||
*.zip) unzip "${tarball}" ;;
|
||||
*)
|
||||
echo "Unknown archive format for ${tarball}"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
mktmpdir() {
|
||||
test -z "$TMPDIR" && TMPDIR="$(mktemp -d)"
|
||||
mkdir -p "${TMPDIR}"
|
||||
echo "${TMPDIR}"
|
||||
}
|
||||
http_download() {
|
||||
local_file=$1
|
||||
source_url=$2
|
||||
header=$3
|
||||
headerflag=''
|
||||
destflag=''
|
||||
if is_command curl; then
|
||||
cmd='curl --fail -sSL'
|
||||
destflag='-o'
|
||||
headerflag='-H'
|
||||
elif is_command wget; then
|
||||
cmd='wget -q'
|
||||
destflag='-O'
|
||||
headerflag='--header'
|
||||
else
|
||||
echo "http_download: unable to find wget or curl"
|
||||
return 1
|
||||
fi
|
||||
if [ -z "$header" ]; then
|
||||
$cmd $destflag "$local_file" "$source_url"
|
||||
else
|
||||
$cmd $headerflag "$header" $destflag "$local_file" "$source_url"
|
||||
fi
|
||||
}
|
||||
github_api() {
|
||||
local_file=$1
|
||||
source_url=$2
|
||||
header=""
|
||||
case "$source_url" in
|
||||
https://api.github.com*)
|
||||
test -z "$GITHUB_TOKEN" || header="Authorization: token $GITHUB_TOKEN"
|
||||
;;
|
||||
esac
|
||||
http_download "$local_file" "$source_url" "$header"
|
||||
}
|
||||
github_last_release() {
|
||||
owner_repo=$1
|
||||
giturl="https://api.github.com/repos/${owner_repo}/releases/latest"
|
||||
html=$(github_api - "$giturl")
|
||||
version=$(echo "$html" | grep -m 1 "\"tag_name\":" | cut -f4 -d'"')
|
||||
test -z "$version" && return 1
|
||||
echo "$version"
|
||||
}
|
||||
hash_sha256() {
|
||||
TARGET=${1:-/dev/stdin}
|
||||
if is_command gsha256sum; then
|
||||
hash=$(gsha256sum "$TARGET") || return 1
|
||||
echo "$hash" | cut -d ' ' -f 1
|
||||
elif is_command sha256sum; then
|
||||
hash=$(sha256sum "$TARGET") || return 1
|
||||
echo "$hash" | cut -d ' ' -f 1
|
||||
elif is_command shasum; then
|
||||
hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1
|
||||
echo "$hash" | cut -d ' ' -f 1
|
||||
elif is_command openssl; then
|
||||
hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1
|
||||
echo "$hash" | cut -d ' ' -f a
|
||||
else
|
||||
echo "hash_sha256: unable to find command to compute sha-256 hash"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
hash_sha256_verify() {
|
||||
TARGET=$1
|
||||
checksums=$2
|
||||
if [ -z "$checksums" ]; then
|
||||
echo "hash_sha256_verify: checksum file not specified in arg2"
|
||||
return 1
|
||||
fi
|
||||
BASENAME=${TARGET##*/}
|
||||
want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1)
|
||||
if [ -z "$want" ]; then
|
||||
echo "hash_sha256_verify: unable to find checksum for '${TARGET}' in '${checksums}'"
|
||||
return 1
|
||||
fi
|
||||
got=$(hash_sha256 "$TARGET")
|
||||
if [ "$want" != "$got" ]; then
|
||||
echo "hash_sha256_verify: checksum for '$TARGET' did not verify ${want} vs $got"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
cat /dev/null <<EOF
|
||||
------------------------------------------------------------------------
|
||||
End of functions from https://github.com/client9/shlib
|
||||
------------------------------------------------------------------------
|
||||
EOF
|
||||
|
||||
OWNER=client9
|
||||
REPO=misspell
|
||||
BINARY=misspell
|
||||
FORMAT=tar.gz
|
||||
OS=$(uname_os)
|
||||
ARCH=$(uname_arch)
|
||||
PREFIX="$OWNER/$REPO"
|
||||
PLATFORM="${OS}/${ARCH}"
|
||||
GITHUB_DOWNLOAD=https://github.com/${OWNER}/${REPO}/releases/download
|
||||
|
||||
uname_os_check "$OS"
|
||||
uname_arch_check "$ARCH"
|
||||
|
||||
parse_args "$@"
|
||||
|
||||
check_platform
|
||||
|
||||
adjust_version
|
||||
|
||||
adjust_format
|
||||
|
||||
adjust_os
|
||||
|
||||
adjust_arch
|
||||
|
||||
echo "$PREFIX: found version ${VERSION} for ${OS}/${ARCH}"
|
||||
|
||||
NAME=${BINARY}_${VERSION}_${OS}_${ARCH}
|
||||
TARBALL=${NAME}.${FORMAT}
|
||||
TARBALL_URL=${GITHUB_DOWNLOAD}/v${VERSION}/${TARBALL}
|
||||
CHECKSUM=${REPO}_checksums.txt
|
||||
CHECKSUM_URL=${GITHUB_DOWNLOAD}/v${VERSION}/${CHECKSUM}
|
||||
|
||||
# Adjust binary name if windows
|
||||
if [ "$OS" = "windows" ]; then
|
||||
BINARY="${BINARY}.exe"
|
||||
fi
|
||||
|
||||
execute
|
|
@ -0,0 +1,421 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package misspell_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
. "github.com/client9/misspell"
|
||||
)
|
||||
|
||||
var htmlEscaper = NewStringReplacer(
|
||||
"&", "&",
|
||||
"<", "<",
|
||||
">", ">",
|
||||
`"`, """,
|
||||
"'", "'",
|
||||
)
|
||||
|
||||
var htmlUnescaper = NewStringReplacer(
|
||||
"&", "&",
|
||||
"<", "<",
|
||||
">", ">",
|
||||
""", `"`,
|
||||
"'", "'",
|
||||
)
|
||||
|
||||
// The http package's old HTML escaping function.
|
||||
func oldHTMLEscape(s string) string {
|
||||
s = strings.Replace(s, "&", "&", -1)
|
||||
s = strings.Replace(s, "<", "<", -1)
|
||||
s = strings.Replace(s, ">", ">", -1)
|
||||
s = strings.Replace(s, `"`, """, -1)
|
||||
s = strings.Replace(s, "'", "'", -1)
|
||||
return s
|
||||
}
|
||||
|
||||
var capitalLetters = NewStringReplacer("a", "A", "b", "B")
|
||||
|
||||
// TestReplacer tests the replacer implementations.
|
||||
func TestReplacer(t *testing.T) {
|
||||
type testCase struct {
|
||||
r *StringReplacer
|
||||
in, out string
|
||||
}
|
||||
var testCases []testCase
|
||||
|
||||
// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
|
||||
str := func(b byte) string {
|
||||
return string([]byte{b})
|
||||
}
|
||||
var s []string
|
||||
|
||||
// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
|
||||
for i := 0; i < 256; i++ {
|
||||
s = append(s, str(byte(i)), str(byte(i+1)))
|
||||
}
|
||||
inc := NewStringReplacer(s...)
|
||||
|
||||
// Test cases with 1-byte old strings, 1-byte new strings.
|
||||
testCases = append(testCases,
|
||||
testCase{capitalLetters, "brad", "BrAd"},
|
||||
testCase{capitalLetters, strings.Repeat("a", (32<<10)+123), strings.Repeat("A", (32<<10)+123)},
|
||||
testCase{capitalLetters, "", ""},
|
||||
|
||||
testCase{inc, "brad", "csbe"},
|
||||
testCase{inc, "\x00\xff", "\x01\x00"},
|
||||
testCase{inc, "", ""},
|
||||
|
||||
testCase{NewStringReplacer("a", "1", "a", "2"), "brad", "br1d"},
|
||||
)
|
||||
|
||||
// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
|
||||
s = nil
|
||||
for i := 0; i < 256; i++ {
|
||||
n := i + 1 - 'a'
|
||||
if n < 1 {
|
||||
n = 1
|
||||
}
|
||||
s = append(s, str(byte(i)), strings.Repeat(str(byte(i)), n))
|
||||
}
|
||||
repeat := NewStringReplacer(s...)
|
||||
|
||||
// Test cases with 1-byte old strings, variable length new strings.
|
||||
testCases = append(testCases,
|
||||
testCase{htmlEscaper, "No changes", "No changes"},
|
||||
testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"},
|
||||
testCase{htmlEscaper, "&&&", "&&&"},
|
||||
testCase{htmlEscaper, "", ""},
|
||||
|
||||
testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
|
||||
testCase{repeat, "abba", "abbbba"},
|
||||
testCase{repeat, "", ""},
|
||||
|
||||
testCase{NewStringReplacer("a", "11", "a", "22"), "brad", "br11d"},
|
||||
)
|
||||
|
||||
// The remaining test cases have variable length old strings.
|
||||
|
||||
testCases = append(testCases,
|
||||
testCase{htmlUnescaper, "&amp;", "&"},
|
||||
testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"},
|
||||
testCase{htmlUnescaper, "", ""},
|
||||
|
||||
testCase{NewStringReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
|
||||
|
||||
testCase{NewStringReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
|
||||
|
||||
testCase{NewStringReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
|
||||
)
|
||||
|
||||
// gen1 has multiple old strings of variable length. There is no
|
||||
// overall non-empty common prefix, but some pairwise common prefixes.
|
||||
gen1 := NewStringReplacer(
|
||||
"aaa", "3[aaa]",
|
||||
"aa", "2[aa]",
|
||||
"a", "1[a]",
|
||||
"i", "i",
|
||||
"longerst", "most long",
|
||||
"longer", "medium",
|
||||
"long", "short",
|
||||
"xx", "xx",
|
||||
"x", "X",
|
||||
"X", "Y",
|
||||
"Y", "Z",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
|
||||
testCase{gen1, "long, longerst, longer", "short, most long, medium"},
|
||||
testCase{gen1, "xxxxx", "xxxxX"},
|
||||
testCase{gen1, "XiX", "YiY"},
|
||||
testCase{gen1, "", ""},
|
||||
)
|
||||
|
||||
// gen2 has multiple old strings with no pairwise common prefix.
|
||||
gen2 := NewStringReplacer(
|
||||
"roses", "red",
|
||||
"violets", "blue",
|
||||
"sugar", "sweet",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
|
||||
testCase{gen2, "", ""},
|
||||
)
|
||||
|
||||
// gen3 has multiple old strings with an overall common prefix.
|
||||
gen3 := NewStringReplacer(
|
||||
"abracadabra", "poof",
|
||||
"abracadabrakazam", "splat",
|
||||
"abraham", "lincoln",
|
||||
"abrasion", "scrape",
|
||||
"abraham", "isaac",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
|
||||
testCase{gen3, "abrasion abracad", "scrape abracad"},
|
||||
testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
|
||||
testCase{gen3, "", ""},
|
||||
)
|
||||
|
||||
// foo{1,2,3,4} have multiple old strings with an overall common prefix
|
||||
// and 1- or 2- byte extensions from the common prefix.
|
||||
foo1 := NewStringReplacer(
|
||||
"foo1", "A",
|
||||
"foo2", "B",
|
||||
"foo3", "C",
|
||||
)
|
||||
foo2 := NewStringReplacer(
|
||||
"foo1", "A",
|
||||
"foo2", "B",
|
||||
"foo31", "C",
|
||||
"foo32", "D",
|
||||
)
|
||||
foo3 := NewStringReplacer(
|
||||
"foo11", "A",
|
||||
"foo12", "B",
|
||||
"foo31", "C",
|
||||
"foo32", "D",
|
||||
)
|
||||
foo4 := NewStringReplacer(
|
||||
"foo12", "B",
|
||||
"foo32", "D",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
|
||||
testCase{foo1, "", ""},
|
||||
|
||||
testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
|
||||
testCase{foo2, "", ""},
|
||||
|
||||
testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
|
||||
testCase{foo3, "", ""},
|
||||
|
||||
testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
|
||||
testCase{foo4, "", ""},
|
||||
)
|
||||
|
||||
// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
|
||||
allBytes := make([]byte, 256)
|
||||
for i := range allBytes {
|
||||
allBytes[i] = byte(i)
|
||||
}
|
||||
allString := string(allBytes)
|
||||
genAll := NewStringReplacer(
|
||||
allString, "[all]",
|
||||
"\xff", "[ff]",
|
||||
"\x00", "[00]",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{genAll, allString, "[all]"},
|
||||
testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
|
||||
testCase{genAll, "", ""},
|
||||
)
|
||||
|
||||
// Test cases with empty old strings.
|
||||
|
||||
blankToX1 := NewStringReplacer("", "X")
|
||||
blankToX2 := NewStringReplacer("", "X", "", "")
|
||||
blankHighPriority := NewStringReplacer("", "X", "o", "O")
|
||||
blankLowPriority := NewStringReplacer("o", "O", "", "X")
|
||||
blankNoOp1 := NewStringReplacer("", "")
|
||||
blankNoOp2 := NewStringReplacer("", "", "", "A")
|
||||
blankFoo := NewStringReplacer("", "X", "foobar", "R", "foobaz", "Z")
|
||||
testCases = append(testCases,
|
||||
testCase{blankToX1, "foo", "XfXoXoX"},
|
||||
testCase{blankToX1, "", "X"},
|
||||
|
||||
testCase{blankToX2, "foo", "XfXoXoX"},
|
||||
testCase{blankToX2, "", "X"},
|
||||
|
||||
testCase{blankHighPriority, "oo", "XOXOX"},
|
||||
testCase{blankHighPriority, "ii", "XiXiX"},
|
||||
testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
|
||||
testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
|
||||
testCase{blankHighPriority, "", "X"},
|
||||
|
||||
testCase{blankLowPriority, "oo", "OOX"},
|
||||
testCase{blankLowPriority, "ii", "XiXiX"},
|
||||
testCase{blankLowPriority, "oiio", "OXiXiOX"},
|
||||
testCase{blankLowPriority, "iooi", "XiOOXiX"},
|
||||
testCase{blankLowPriority, "", "X"},
|
||||
|
||||
testCase{blankNoOp1, "foo", "foo"},
|
||||
testCase{blankNoOp1, "", ""},
|
||||
|
||||
testCase{blankNoOp2, "foo", "foo"},
|
||||
testCase{blankNoOp2, "", ""},
|
||||
|
||||
testCase{blankFoo, "foobarfoobaz", "XRXZX"},
|
||||
testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
|
||||
testCase{blankFoo, "", "X"},
|
||||
)
|
||||
|
||||
// single string replacer
|
||||
|
||||
abcMatcher := NewStringReplacer("abc", "[match]")
|
||||
|
||||
testCases = append(testCases,
|
||||
testCase{abcMatcher, "", ""},
|
||||
testCase{abcMatcher, "ab", "ab"},
|
||||
testCase{abcMatcher, "abc", "[match]"},
|
||||
testCase{abcMatcher, "abcd", "[match]d"},
|
||||
testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
|
||||
)
|
||||
|
||||
// Issue 6659 cases (more single string replacer)
|
||||
|
||||
noHello := NewStringReplacer("Hello", "")
|
||||
testCases = append(testCases,
|
||||
testCase{noHello, "Hello", ""},
|
||||
testCase{noHello, "Hellox", "x"},
|
||||
testCase{noHello, "xHello", "x"},
|
||||
testCase{noHello, "xHellox", "xx"},
|
||||
)
|
||||
|
||||
// No-arg test cases.
|
||||
|
||||
nop := NewStringReplacer()
|
||||
testCases = append(testCases,
|
||||
testCase{nop, "abc", "abc"},
|
||||
testCase{nop, "", ""},
|
||||
)
|
||||
|
||||
// Run the test cases.
|
||||
|
||||
for i, tc := range testCases {
|
||||
if s := tc.r.Replace(tc.in); s != tc.out {
|
||||
t.Errorf("%d. strings.Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
n, err := tc.r.WriteString(&buf, tc.in)
|
||||
if err != nil {
|
||||
t.Errorf("%d. WriteString: %v", i, err)
|
||||
continue
|
||||
}
|
||||
got := buf.String()
|
||||
if got != tc.out {
|
||||
t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
|
||||
continue
|
||||
}
|
||||
if n != len(tc.out) {
|
||||
t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
|
||||
i, tc.in, n, len(tc.out), tc.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type errWriter struct{}
|
||||
|
||||
func (errWriter) Write(p []byte) (n int, err error) {
|
||||
return 0, fmt.Errorf("unwritable")
|
||||
}
|
||||
|
||||
func BenchmarkGenericNoMatch(b *testing.B) {
|
||||
str := strings.Repeat("A", 100) + strings.Repeat("B", 100)
|
||||
generic := NewStringReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
|
||||
for i := 0; i < b.N; i++ {
|
||||
generic.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGenericMatch1(b *testing.B) {
|
||||
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
|
||||
generic := NewStringReplacer("a", "A", "b", "B", "12", "123")
|
||||
for i := 0; i < b.N; i++ {
|
||||
generic.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGenericMatch2(b *testing.B) {
|
||||
str := strings.Repeat("It's <b>HTML</b>!", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlUnescaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSingleString(b *testing.B, pattern, text string) {
|
||||
r := NewStringReplacer(pattern, "[match]")
|
||||
b.SetBytes(int64(len(text)))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
r.Replace(text)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSingleMaxSkipping(b *testing.B) {
|
||||
benchmarkSingleString(b, strings.Repeat("b", 25), strings.Repeat("a", 10000))
|
||||
}
|
||||
|
||||
func BenchmarkSingleLongSuffixFail(b *testing.B) {
|
||||
benchmarkSingleString(b, "b"+strings.Repeat("a", 500), strings.Repeat("a", 1002))
|
||||
}
|
||||
|
||||
func BenchmarkSingleMatch(b *testing.B) {
|
||||
benchmarkSingleString(b, "abcdef", strings.Repeat("abcdefghijklmno", 1000))
|
||||
}
|
||||
|
||||
func BenchmarkByteByteNoMatch(b *testing.B) {
|
||||
str := strings.Repeat("A", 100) + strings.Repeat("B", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
capitalLetters.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteByteMatch(b *testing.B) {
|
||||
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
capitalLetters.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteStringMatch(b *testing.B) {
|
||||
str := "<" + strings.Repeat("a", 99) + strings.Repeat("b", 99) + ">"
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHTMLEscapeNew(b *testing.B) {
|
||||
str := "I <3 to escape HTML & other text too."
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHTMLEscapeOld(b *testing.B) {
|
||||
str := "I <3 to escape HTML & other text too."
|
||||
for i := 0; i < b.N; i++ {
|
||||
oldHTMLEscape(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteStringReplacerWriteString(b *testing.B) {
|
||||
str := strings.Repeat("I <3 to escape HTML & other text too.", 100)
|
||||
buf := new(bytes.Buffer)
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.WriteString(buf, str)
|
||||
buf.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteReplacerWriteString(b *testing.B) {
|
||||
str := strings.Repeat("abcdefghijklmnopqrstuvwxyz", 100)
|
||||
buf := new(bytes.Buffer)
|
||||
for i := 0; i < b.N; i++ {
|
||||
capitalLetters.WriteString(buf, str)
|
||||
buf.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
|
||||
func BenchmarkByteByteReplaces(b *testing.B) {
|
||||
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
strings.Replace(strings.Replace(str, "a", "A", -1), "b", "B", -1)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.4
|
||||
- 1.5
|
||||
- 1.6
|
||||
- 1.7
|
||||
- 1.8
|
||||
- 1.9
|
||||
- tip
|
||||
|
||||
go_import_path: gopkg.in/yaml.v2
|
|
@ -0,0 +1,133 @@
|
|||
# YAML support for the Go language
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The yaml package enables Go programs to comfortably encode and decode YAML
|
||||
values. It was developed within [Canonical](https://www.canonical.com) as
|
||||
part of the [juju](https://juju.ubuntu.com) project, and is based on a
|
||||
pure Go port of the well-known [libyaml](http://pyyaml.org/wiki/LibYAML)
|
||||
C library to parse and generate YAML data quickly and reliably.
|
||||
|
||||
Compatibility
|
||||
-------------
|
||||
|
||||
The yaml package supports most of YAML 1.1 and 1.2, including support for
|
||||
anchors, tags, map merging, etc. Multi-document unmarshalling is not yet
|
||||
implemented, and base-60 floats from YAML 1.1 are purposefully not
|
||||
supported since they're a poor design and are gone in YAML 1.2.
|
||||
|
||||
Installation and usage
|
||||
----------------------
|
||||
|
||||
The import path for the package is *gopkg.in/yaml.v2*.
|
||||
|
||||
To install it, run:
|
||||
|
||||
go get gopkg.in/yaml.v2
|
||||
|
||||
API documentation
|
||||
-----------------
|
||||
|
||||
If opened in a browser, the import path itself leads to the API documentation:
|
||||
|
||||
* [https://gopkg.in/yaml.v2](https://gopkg.in/yaml.v2)
|
||||
|
||||
API stability
|
||||
-------------
|
||||
|
||||
The package API for yaml v2 will remain stable as described in [gopkg.in](https://gopkg.in).
|
||||
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
The yaml package is licensed under the Apache License 2.0. Please see the LICENSE file for details.
|
||||
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
```Go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
var data = `
|
||||
a: Easy!
|
||||
b:
|
||||
c: 2
|
||||
d: [3, 4]
|
||||
`
|
||||
|
||||
// Note: struct fields must be public in order for unmarshal to
|
||||
// correctly populate the data.
|
||||
type T struct {
|
||||
A string
|
||||
B struct {
|
||||
RenamedC int `yaml:"c"`
|
||||
D []int `yaml:",flow"`
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
t := T{}
|
||||
|
||||
err := yaml.Unmarshal([]byte(data), &t)
|
||||
if err != nil {
|
||||
log.Fatalf("error: %v", err)
|
||||
}
|
||||
fmt.Printf("--- t:\n%v\n\n", t)
|
||||
|
||||
d, err := yaml.Marshal(&t)
|
||||
if err != nil {
|
||||
log.Fatalf("error: %v", err)
|
||||
}
|
||||
fmt.Printf("--- t dump:\n%s\n\n", string(d))
|
||||
|
||||
m := make(map[interface{}]interface{})
|
||||
|
||||
err = yaml.Unmarshal([]byte(data), &m)
|
||||
if err != nil {
|
||||
log.Fatalf("error: %v", err)
|
||||
}
|
||||
fmt.Printf("--- m:\n%v\n\n", m)
|
||||
|
||||
d, err = yaml.Marshal(&m)
|
||||
if err != nil {
|
||||
log.Fatalf("error: %v", err)
|
||||
}
|
||||
fmt.Printf("--- m dump:\n%s\n\n", string(d))
|
||||
}
|
||||
```
|
||||
|
||||
This example will generate the following output:
|
||||
|
||||
```
|
||||
--- t:
|
||||
{Easy! {2 [3 4]}}
|
||||
|
||||
--- t dump:
|
||||
a: Easy!
|
||||
b:
|
||||
c: 2
|
||||
d: [3, 4]
|
||||
|
||||
|
||||
--- m:
|
||||
map[a:Easy! b:map[c:2 d:[3 4]]]
|
||||
|
||||
--- m dump:
|
||||
a: Easy!
|
||||
b:
|
||||
c: 2
|
||||
d:
|
||||
- 3
|
||||
- 4
|
||||
```
|
||||
|
|
@ -13,6 +13,19 @@ import (
|
|||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// jsonNumber is the interface of the encoding/json.Number datatype.
|
||||
// Repeating the interface here avoids a dependency on encoding/json, and also
|
||||
// supports other libraries like jsoniter, which use a similar datatype with
|
||||
// the same interface. Detecting this interface is useful when dealing with
|
||||
// structures containing json.Number, which is a string under the hood. The
|
||||
// encoder should prefer the use of Int64(), Float64() and string(), in that
|
||||
// order, when encoding this type.
|
||||
type jsonNumber interface {
|
||||
Float64() (float64, error)
|
||||
Int64() (int64, error)
|
||||
String() string
|
||||
}
|
||||
|
||||
type encoder struct {
|
||||
emitter yaml_emitter_t
|
||||
event yaml_event_t
|
||||
|
@ -89,6 +102,21 @@ func (e *encoder) marshal(tag string, in reflect.Value) {
|
|||
}
|
||||
iface := in.Interface()
|
||||
switch m := iface.(type) {
|
||||
case jsonNumber:
|
||||
integer, err := m.Int64()
|
||||
if err == nil {
|
||||
// In this case the json.Number is a valid int64
|
||||
in = reflect.ValueOf(integer)
|
||||
break
|
||||
}
|
||||
float, err := m.Float64()
|
||||
if err == nil {
|
||||
// In this case the json.Number is a valid float64
|
||||
in = reflect.ValueOf(float)
|
||||
break
|
||||
}
|
||||
// fallback case - no number could be obtained
|
||||
in = reflect.ValueOf(m.String())
|
||||
case time.Time, *time.Time:
|
||||
// Although time.Time implements TextMarshaler,
|
||||
// we don't want to treat it as a string for YAML
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
module "gopkg.in/yaml.v2"
|
||||
|
||||
require (
|
||||
"gopkg.in/check.v1" v0.0.0-20161208181325-20d25e280405
|
||||
)
|
|
@ -0,0 +1,5 @@
|
|||
# github.com/client9/misspell v0.3.4
|
||||
github.com/client9/misspell/cmd/misspell
|
||||
github.com/client9/misspell
|
||||
# gopkg.in/yaml.v2 v2.2.2
|
||||
gopkg.in/yaml.v2
|
Loading…
Reference in New Issue