Exclude generated files from language statistics (#11653) (#11670)

* Update go-enry to v2.5.2
2025-12-07 13:28:25 +00:00 · 2020-05-29 10:02:00 +01:00
parent 0c40b0badd
commit 42f0769e30
28 changed files with 1402 additions and 1260 deletions
@@ -37,7 +37,7 @@ require (
 	github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect
 	github.com/gliderlabs/ssh v0.2.2
 	github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect
-	github.com/go-enry/go-enry/v2 v2.3.0
+	github.com/go-enry/go-enry/v2 v2.5.2
 	github.com/go-git/go-billy/v5 v5.0.0
 	github.com/go-git/go-git/v5 v5.0.0
 	github.com/go-openapi/jsonreference v0.19.3 // indirect
@@ -193,10 +193,10 @@ github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqo
 github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
 github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
 github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
-github.com/go-enry/go-enry/v2 v2.3.0 h1:o8KwgY6uSplysrIpj+Y42J/xGPp90ogVpxE2Z3s8Unk=
+github.com/go-enry/go-enry/v2 v2.5.2 h1:3f3PFAO6JitWkPi1GQ5/m6Xu4gNL1U5soJ8QaYqJ0YQ=
-github.com/go-enry/go-enry/v2 v2.3.0/go.mod h1:+xFJwbqWi15bvqFHb2ELUWVRKFQtwB61+sDrkvvxxGI=
+github.com/go-enry/go-enry/v2 v2.5.2/go.mod h1:GVzIiAytiS5uT/QiuakK7TF1u4xDab87Y8V5EJRpsIQ=
-github.com/go-enry/go-oniguruma v1.2.0 h1:oBO9XC1IDT9+AoWW5oFsa/7gFeOPacEqDbyXZKWXuDs=
+github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo=
-github.com/go-enry/go-oniguruma v1.2.0/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
+github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
 github.com/go-git/gcfg v1.5.0 h1:Q5ViNfGF8zFgyJWPqYwA7qGFoMTEiBmdlkcfRmpIMa4=
 github.com/go-git/gcfg v1.5.0/go.mod h1:5m20vg6GwYabIxaOonVkTdrILxQMpEShl1xiMF4ua+E=
 github.com/go-git/go-billy/v5 v5.0.0 h1:7NQHvd9FVid8VL4qVUMm8XifBK+2xCoZ2lSk0agRrHM=
@@ -616,8 +616,6 @@ github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDW
 github.com/tinylib/msgp v1.1.2 h1:gWmO7n0Ys2RBEb7GPYB9Ujq8Mk5p2U08lRnmMcGy6BQ=
 github.com/tinylib/msgp v1.1.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
 github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
 github.com/toqueteos/trie v1.0.0 h1:8i6pXxNUXNRAqP246iibb7w/pSFquNTQ+uNfriG7vlk=
 github.com/toqueteos/trie v1.0.0/go.mod h1:Ywk48QhEqhU1+DwhMkJ2x7eeGxDHiGkAdc9+0DYcbsM=
 github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ=
 github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM=
 github.com/tstranex/u2f v1.0.0 h1:HhJkSzDDlVSVIVt7pDJwCHQj67k7A5EeBgPmeD+pVsQ=
@@ -876,8 +874,6 @@ gopkg.in/testfixtures.v2 v2.5.0 h1:N08B7l2GzFQenyYbzqthDnKAA+cmb17iAZhhFxr7JHw=
 gopkg.in/testfixtures.v2 v2.5.0/go.mod h1:vyAq+MYCgNpR29qitQdLZhdbLFf4mR/2MFJRFoQZZ2M=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
 gopkg.in/toqueteos/substring.v1 v1.0.2 h1:urLqCeMm6x/eTuQa1oZerNw8N1KNOIp5hD5kGL7lFsE=
 gopkg.in/toqueteos/substring.v1 v1.0.2/go.mod h1:Eb2Z1UYehlVK8LYW2WBVR2rwbujsz3aX8XDrM1vbNew=
 gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=
 gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=
 gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
@@ -10,8 +10,8 @@ import (
 	"github.com/go-enry/go-enry/v2"
 )
-// GetCodeLanguageWithCallback detects code language based on file name and content using callback
+// GetCodeLanguage detects code language based on file name and content
-func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, error)) string {
+func GetCodeLanguage(filename string, content []byte) string {
 	if language, ok := enry.GetLanguageByExtension(filename); ok {
 		return language
 	}
@@ -20,17 +20,9 @@ func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, er
 		return language
 	}
-	content, err := contentFunc()
+	if len(content) == 0 {
 	if err != nil {
 		return enry.OtherLanguage
 	}
 	return enry.GetLanguage(filepath.Base(filename), content)
 }
 // GetCodeLanguage detects code language based on file name and content
 func GetCodeLanguage(filename string, content []byte) string {
 	return GetCodeLanguageWithCallback(filename, func() ([]byte, error) {
 		return content, nil
 	})
 }
@@ -50,11 +50,15 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
 			return nil
 		}
 		// If content can not be read just do detection by filename
 		content, _ := readFile(f, fileSizeLimit)
 		if enry.IsGenerated(f.Name, content) {
 			return nil
 		}
 		// TODO: Use .gitattributes file for linguist overrides
-		language := analyze.GetCodeLanguageWithCallback(f.Name, func() ([]byte, error) {
+		language := analyze.GetCodeLanguage(f.Name, content)
 			return readFile(f, fileSizeLimit)
 		})
 		if language == enry.OtherLanguage || language == "" {
 			return nil
 		}
@@ -1,26 +1,26 @@
 # go-enry [![GoDoc](https://godoc.org/github.com/go-enry/go-enry?status.svg)](https://pkg.go.dev/github.com/go-enry/go-enry/v2) [![Test](https://github.com/go-enry/go-enry/workflows/Test/badge.svg)](https://github.com/go-enry/go-enry/actions?query=workflow%3ATest+branch%3Amaster) [![codecov](https://codecov.io/gh/go-enry/go-enry/branch/master/graph/badge.svg)](https://codecov.io/gh/go-enry/go-enry)
-Programming language detector and toolbox to ignore binary or vendored files. *enry*, started as a port to _Go_ of the original [Linguist](https://github.com/github/linguist) _Ruby_ library, that has an improved *2x performance*.
+Programming language detector and toolbox to ignore binary or vendored files. _enry_, started as a port to _Go_ of the original [Linguist](https://github.com/github/linguist) _Ruby_ library, that has an improved _2x performance_.
-* [CLI](#cli)
+- [CLI](#cli)
-* [Library](#library)
+- [Library](#library)
-    * [Use cases](#use-cases)
+  - [Use cases](#use-cases)
-        * [By filename](#by-filename)
+    - [By filename](#by-filename)
-        * [By text](#by-text)
+    - [By text](#by-text)
-        * [By file](#by-file)
+    - [By file](#by-file)
-        * [Filtering](#filtering-vendoring-binaries-etc)
+    - [Filtering](#filtering-vendoring-binaries-etc)
-        * [Coloring](#language-colors-and-groups)
+    - [Coloring](#language-colors-and-groups)
-    * [Languages](#languages)
+  - [Languages](#languages)
-        * [Go](#go)
+    - [Go](#go)
-        * [Java bindings](#java-bindings)
+    - [Java bindings](#java-bindings)
-        * [Python bindings](#python-bindings)
+    - [Python bindings](#python-bindings)
-* [Divergences from linguist](#divergences-from-linguist)
+- [Divergences from linguist](#divergences-from-linguist)
-* [Benchmarks](#benchmarks)
+- [Benchmarks](#benchmarks)
-* [Why Enry?](#why-enry)
+- [Why Enry?](#why-enry)
-* [Development](#development)
+- [Development](#development)
-    * [Sync with github/linguist upstream](#sync-with-githublinguist-upstream)
+  - [Sync with github/linguist upstream](#sync-with-githublinguist-upstream)
-* [Misc](#misc)
+- [Misc](#misc)
-* [License](#license)
+- [License](#license)
 # CLI
@@ -28,50 +28,62 @@ The CLI binary is hosted in a separate repository [go-enry/enry](https://github.
 # Library
-*enry* is also a Go library for guessing a programming language that exposes API through FFI to multiple programming environments.
+_enry_ is also a Go library for guessing a programming language that exposes API through FFI to multiple programming environments.
 ## Use cases
-*enry* guesses a programming language using a sequence of matching *strategies* that are
+_enry_ guesses a programming language using a sequence of matching _strategies_ that are
-applied progressively to narrow down the possible options. Each *strategy* varies on the type
+applied progressively to narrow down the possible options. Each _strategy_ varies on the type
 of input data that it needs to make a decision: file name, extension, the first line of the file, the full content of the file, etc.
 Depending on available input data, enry API can be roughly divided into the next categories or use cases.
 ### By filename
 Next functions require only a name of the file to make a guess:
 - `GetLanguageByExtension` uses only file extension (wich may be ambiguous)
 - `GetLanguageByFilename` useful for cases like `.gitignore`, `.bashrc`, etc
 - all [filtering helpers](#filtering)
- Please note that such guesses are expected not to be very accurate.
+Next functions require only a name of the file to make a guess:
 - `GetLanguageByExtension` uses only file extension (wich may be ambiguous)
 - `GetLanguageByFilename` useful for cases like `.gitignore`, `.bashrc`, etc
 - all [filtering helpers](#filtering)
 Please note that such guesses are expected not to be very accurate.
 ### By text
 To make a guess only based on the content of the file or a text snippet, use
 - `GetLanguageByShebang` reads only the first line of text to identify the [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)).
 - `GetLanguageByModeline` for cases when Vim/Emacs modeline e.g. `/* vim: set ft=cpp: */` may be present at a head or a tail of the text.
 - `GetLanguageByClassifier` uses a Bayesian classifier trained on all the `./samples/` from Linguist.
-   It usually is a last-resort strategy that is used to disambiguate the guess of the previous strategies, and thus it requires a list of "candidate" guesses. One can provide a list of all known languages - keys from the `data.LanguagesLogProbabilities` as possible candidates if more intelligent hypotheses are not available, at the price of possibly suboptimal accuracy.
+To make a guess only based on the content of the file or a text snippet, use
 - `GetLanguageByShebang` reads only the first line of text to identify the [shebang](<https://en.wikipedia.org/wiki/Shebang_(Unix)>).
 - `GetLanguageByModeline` for cases when Vim/Emacs modeline e.g. `/* vim: set ft=cpp: */` may be present at a head or a tail of the text.
 - `GetLanguageByClassifier` uses a Bayesian classifier trained on all the `./samples/` from Linguist.
  It usually is a last-resort strategy that is used to disambiguate the guess of the previous strategies, and thus it requires a list of "candidate" guesses. One can provide a list of all known languages - keys from the `data.LanguagesLogProbabilities` as possible candidates if more intelligent hypotheses are not available, at the price of possibly suboptimal accuracy.
 ### By file
 The most accurate guess would be one when both, the file name and the content are available:
- - `GetLanguagesByContent` only uses file extension and a set of regexp-based content heuristics.
+
- - `GetLanguages` uses the full set of matching strategies and is expected to be most accurate.
+- `GetLanguagesByContent` only uses file extension and a set of regexp-based content heuristics.
 - `GetLanguages` uses the full set of matching strategies and is expected to be most accurate.
 ### Filtering: vendoring, binaries, etc
-*enry* expose a set of file-level helpers `Is*` to simplify filtering out the files that are less interesting for the purpose of source code analysis:
+
- - `IsBinary`
+_enry_ expose a set of file-level helpers `Is*` to simplify filtering out the files that are less interesting for the purpose of source code analysis:
- - `IsVendor`
+
- - `IsConfiguration`
+- `IsBinary`
- - `IsDocumentation`
+- `IsVendor`
- - `IsDotFile`
+- `IsConfiguration`
- - `IsImage`
+- `IsDocumentation`
 - `IsDotFile`
 - `IsImage`
 - `IsTest`
 - `IsGenerated`
 ### Language colors and groups
-*enry* exposes function to get language color to use for example in presenting statistics in graphs:
+
- - `GetColor`
+_enry_ exposes function to get language color to use for example in presenting statistics in graphs:
- - `GetLanguageGroup` can be used to group similar languages together e.g. for `Less` this function will return `CSS`
+
 - `GetColor`
 - `GetLanguageGroup` can be used to group similar languages together e.g. for `Less` this function will return `CSS`
 ## Languages
@@ -136,39 +148,36 @@ Generated Python bindings using a C shared library and cffi are WIP under [src-d
 A library is going to be published on pypi as [enry](https://pypi.org/project/enry/) for
 macOS and linux platforms. Windows support is planned under [src-d/enry#150](https://github.com/src-d/enry/issues/150).
-Divergences from Linguist
+## Divergences from Linguist
 ------------
 The `enry` library is based on the data from `github/linguist` version **v7.9.0**.
 Parsing [linguist/samples](https://github.com/github/linguist/tree/master/samples) the following `enry` results are different from the Linguist:
-* [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine.
+- [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine.
-* [Heuristics for ".rno" extension](https://github.com/github/linguist/blob/3a1bd3c3d3e741a8aaec4704f782e06f5cd2a00d/lib/linguist/heuristics.yml#L365) in RUNOFF could not be parsed, due to unsupported lookahead in RE2 regexp engine.
+- [Heuristics for ".rno" extension](https://github.com/github/linguist/blob/3a1bd3c3d3e741a8aaec4704f782e06f5cd2a00d/lib/linguist/heuristics.yml#L365) in RUNOFF could not be parsed, due to unsupported lookahead in RE2 regexp engine.
-* [Heuristics for ".inc" extension](https://github.com/github/linguist/blob/f0e2d0d7f1ce600b2a5acccaef6b149c87d8b99c/lib/linguist/heuristics.yml#L222) in NASL could not be parsed, due to unsupported possessive quantifier in RE2 regexp engine.
+- [Heuristics for ".inc" extension](https://github.com/github/linguist/blob/f0e2d0d7f1ce600b2a5acccaef6b149c87d8b99c/lib/linguist/heuristics.yml#L222) in NASL could not be parsed, due to unsupported possessive quantifier in RE2 regexp engine.
-* As of [Linguist v5.3.2](https://github.com/github/linguist/releases/tag/v5.3.2) it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry still uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193).
+- As of [Linguist v5.3.2](https://github.com/github/linguist/releases/tag/v5.3.2) it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry still uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193).
-* Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194).
+- Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194).
-* Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
+- Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
- (Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213).
+  (Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213).
-* XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192).
+- XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192).
-* Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18).
+- Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18).
-* `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does
+- `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does
 In all the cases above that have an issue number - we plan to update enry to match Linguist behavior.
 ## Benchmarks
-Benchmarks
+Enry's language detection has been compared with Linguist's on [_linguist/samples_](https://github.com/github/linguist/tree/master/samples).
 ------------
 Enry's language detection has been compared with Linguist's on [*linguist/samples*](https://github.com/github/linguist/tree/master/samples).
 We got these results:
@@ -182,9 +191,7 @@ Go regexp engine being slower than Ruby's on, wich is based on [oniguruma](https
 See [instructions](#misc) for running enry with oniguruma.
-
+## Why Enry?
 Why Enry?
 ------------
 In the movie [My Fair Lady](https://en.wikipedia.org/wiki/My_Fair_Lady), [Professor Henry Higgins](http://www.imdb.com/character/ch0011719/) is a linguist who at the very beginning of the movie enjoys guessing the origin of people based on their accent.
@@ -199,10 +206,9 @@ To run the tests use:
 Setting `ENRY_TEST_REPO` to the path to existing checkout of Linguist will avoid cloning it and sepeed tests up.
 Setting `ENRY_DEBUG=1` will provide insight in the Bayesian classifier building done by `make code-generate`.
 ### Sync with github/linguist upstream
-*enry* re-uses parts of the original [github/linguist](https://github.com/github/linguist) to generate internal data structures.
+_enry_ re-uses parts of the original [github/linguist](https://github.com/github/linguist) to generate internal data structures.
 In order to update to the latest release of linguist do:
 ```bash
@@ -217,10 +223,10 @@ $ make code-generate
 To stay in sync, enry needs to be updated when a new release of the linguist includes changes to any of the following files:
-* [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml)
+- [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml)
-* [heuristics.yml](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.yml)
+- [heuristics.yml](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.yml)
-* [vendor.yml](https://github.com/github/linguist/blob/master/lib/linguist/vendor.yml)
+- [vendor.yml](https://github.com/github/linguist/blob/master/lib/linguist/vendor.yml)
-* [documentation.yml](https://github.com/github/linguist/blob/master/lib/linguist/documentation.yml)
+- [documentation.yml](https://github.com/github/linguist/blob/master/lib/linguist/documentation.yml)
 There is no automation for detecting the changes in the linguist project, so this process above has to be done manually from time to time.
@@ -229,8 +235,6 @@ the generated files (in [data](https://github.com/go-enry/go-enry/blob/master/da
 Separating all the necessary "manual" code changes to a different PR that includes some background description and an update to the documentation on ["divergences from linguist"](#divergences-from-linguist) is very much appreciated as it simplifies the maintenance (review/release notes/etc).
 ## Misc
 <details>
@@ -238,19 +242,20 @@ Separating all the necessary "manual" code changes to a different PR that includ
 ### Benchmark
-All benchmark scripts are in [*benchmarks*](https://github.com/go-enry/go-enry/blob/master/benchmarks) directory.
+All benchmark scripts are in [_benchmarks_](https://github.com/go-enry/go-enry/blob/master/benchmarks) directory.
 #### Dependencies
 As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
 - Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
 - Docker
 - [native dependencies](https://github.com/github/linguist/#dependencies) installed
 - Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
 - Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
 As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
 - Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
 - Docker
 - [native dependencies](https://github.com/github/linguist/#dependencies) installed
 - Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
 - Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
 #### Quick benchmark
 To run quicker benchmarks
    make benchmarks
@@ -259,19 +264,20 @@ to get average times for the primary detection function and strategies for the w
    make benchmarks-samples
 #### Full benchmark
 If you want to reproduce the same benchmarks as reported above:
- - Make sure all [dependencies](#benchmark-dependencies) are installed
+
- - Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
+- Make sure all [dependencies](#benchmark-dependencies) are installed
- - Run `ENRY_TEST_REPO="$PWD/.linguist" benchmarks/run.sh` (takes ~15h)
+- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
 - Run `ENRY_TEST_REPO="$PWD/.linguist" benchmarks/run.sh` (takes ~15h)
 It will run the benchmarks for enry and Linguist, parse the output, create csv files and plot the histogram.
 ### Faster regexp engine (optional)
 [Oniguruma](https://github.com/kkos/oniguruma) is CRuby's regular expression engine.
-It is very fast and performs better than the one built into Go runtime. *enry* supports swapping
+It is very fast and performs better than the one built into Go runtime. _enry_ supports swapping
 between those two engines thanks to [rubex](https://github.com/moovweb/rubex) project.
 The typical overall speedup from using Oniguruma is 1.5-2x. However, it requires CGo and the external shared library.
 On macOS with [Homebrew](https://brew.sh/), it is:
@@ -296,8 +302,6 @@ and then rebuild the project.
 </details>
-
+## License
 License
 ------------
 Apache License, Version 2.0. See [LICENSE](LICENSE)
@@ -328,15 +328,13 @@ func getInterpreter(data []byte) (interpreter string) {
 	return
 }
-func getFirstLine(data []byte) []byte {
+func getFirstLine(content []byte) []byte {
-	buf := bufio.NewScanner(bytes.NewReader(data))
+	nlpos := bytes.IndexByte(content, '\n')
-	buf.Scan()
+	if nlpos < 0 {
-	line := buf.Bytes()
+		return content
 	if err := buf.Err(); err != nil {
 		return nil
 	}
-	return line
+	return content[:nlpos]
 }
 func hasShebang(line []byte) bool {
@@ -3,24 +3,24 @@
 package data
-import "gopkg.in/toqueteos/substring.v1"
+import "github.com/go-enry/go-enry/v2/regex"
-var DocumentationMatchers = substring.Or(
+var DocumentationMatchers = []regex.EnryRegexp{
-	substring.Regexp(`^[Dd]ocs?/`),
+	regex.MustCompile(`^[Dd]ocs?/`),
-	substring.Regexp(`(^|/)[Dd]ocumentation/`),
+	regex.MustCompile(`(^|/)[Dd]ocumentation/`),
-	substring.Regexp(`(^|/)[Gg]roovydoc/`),
+	regex.MustCompile(`(^|/)[Gg]roovydoc/`),
-	substring.Regexp(`(^|/)[Jj]avadoc/`),
+	regex.MustCompile(`(^|/)[Jj]avadoc/`),
-	substring.Regexp(`^[Mm]an/`),
+	regex.MustCompile(`^[Mm]an/`),
-	substring.Regexp(`^[Ee]xamples/`),
+	regex.MustCompile(`^[Ee]xamples/`),
-	substring.Regexp(`^[Dd]emos?/`),
+	regex.MustCompile(`^[Dd]emos?/`),
-	substring.Regexp(`(^|/)inst/doc/`),
+	regex.MustCompile(`(^|/)inst/doc/`),
-	substring.Regexp(`(^|/)CHANGE(S|LOG)?(\.|$)`),
+	regex.MustCompile(`(^|/)CHANGE(S|LOG)?(\.|$)`),
-	substring.Regexp(`(^|/)CONTRIBUTING(\.|$)`),
+	regex.MustCompile(`(^|/)CONTRIBUTING(\.|$)`),
-	substring.Regexp(`(^|/)COPYING(\.|$)`),
+	regex.MustCompile(`(^|/)COPYING(\.|$)`),
-	substring.Regexp(`(^|/)INSTALL(\.|$)`),
+	regex.MustCompile(`(^|/)INSTALL(\.|$)`),
-	substring.Regexp(`(^|/)LICEN[CS]E(\.|$)`),
+	regex.MustCompile(`(^|/)LICEN[CS]E(\.|$)`),
-	substring.Regexp(`(^|/)[Ll]icen[cs]e(\.|$)`),
+	regex.MustCompile(`(^|/)[Ll]icen[cs]e(\.|$)`),
-	substring.Regexp(`(^|/)README(\.|$)`),
+	regex.MustCompile(`(^|/)README(\.|$)`),
-	substring.Regexp(`(^|/)[Rr]eadme(\.|$)`),
+	regex.MustCompile(`(^|/)[Rr]eadme(\.|$)`),
-	substring.Regexp(`^[Ss]amples?/`),
+	regex.MustCompile(`^[Ss]amples?/`),
-)
+}
@@ -0,0 +1,823 @@
 package data
 import (
 	"bytes"
 	"strings"
 	"github.com/go-enry/go-enry/v2/regex"
 )
 // GeneratedCodeExtensions contains all extensions that belong to generated
 // files for sure.
 var GeneratedCodeExtensions = map[string]struct{}{
 	// XCode files
 	".nib":             {},
 	".xcworkspacedata": {},
 	".xcuserstate":     {},
 }
 // GeneratedCodeNameMatcher is a function that tells whether the file with the
 // given name is generated.
 type GeneratedCodeNameMatcher func(string) bool
 func nameMatches(pattern string) GeneratedCodeNameMatcher {
 	r := regex.MustCompile(pattern)
 	return func(name string) bool {
 		return r.MatchString(name)
 	}
 }
 func nameContains(pattern string) GeneratedCodeNameMatcher {
 	return func(name string) bool {
 		return strings.Contains(name, pattern)
 	}
 }
 func nameEndsWith(pattern string) GeneratedCodeNameMatcher {
 	return func(name string) bool {
 		return strings.HasSuffix(name, pattern)
 	}
 }
 // GeneratedCodeNameMatchers are all the matchers that check whether the code
 // is generated based only on the file name.
 var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{
 	// Cocoa pods
 	nameMatches(`(^Pods|\/Pods)\/`),
 	// Carthage build
 	nameMatches(`(^|\/)Carthage\/Build\/`),
 	// NET designer file
 	nameMatches(`(?i)\.designer\.(cs|vb)$`),
 	// Generated NET specflow feature file
 	nameEndsWith(".feature.cs"),
 	// Node modules
 	nameContains("node_modules/"),
 	// Go vendor
 	nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`),
 	// Go lock
 	nameEndsWith("Gopkg.lock"),
 	nameEndsWith("glide.lock"),
 	// Esy lock
 	nameMatches(`(^|\/)(\w+\.)?esy.lock$`),
 	// NPM shrinkwrap
 	nameEndsWith("npm-shrinkwrap.json"),
 	// NPM package lock
 	nameEndsWith("package-lock.json"),
 	// Yarn plugnplay
 	nameMatches(`(^|\/)\.pnp\.(c|m)?js$`),
 	// Godeps
 	nameContains("Godeps/"),
 	// Composer lock
 	nameEndsWith("composer.lock"),
 	// Generated by zephir
 	nameMatches(`.\.zep\.(?:c|h|php)$`),
 	// Cargo lock
 	nameEndsWith("Cargo.lock"),
 	// Pipenv lock
 	nameEndsWith("Pipfile.lock"),
 	// GraphQL relay
 	nameContains("__generated__/"),
 }
 // GeneratedCodeMatcher checks whether the file with the given data is
 // generated code.
 type GeneratedCodeMatcher func(path, ext string, content []byte) bool
 // GeneratedCodeMatchers is the list of all generated code matchers that
 // rely on checking the content of the file to make the guess.
 var GeneratedCodeMatchers = []GeneratedCodeMatcher{
 	isMinifiedFile,
 	hasSourceMapReference,
 	isSourceMap,
 	isCompiledCoffeeScript,
 	isGeneratedNetDocfile,
 	isGeneratedJavaScriptPEGParser,
 	isGeneratedPostScript,
 	isGeneratedGo,
 	isGeneratedProtobuf,
 	isGeneratedJavaScriptProtocolBuffer,
 	isGeneratedApacheThrift,
 	isGeneratedJNIHeader,
 	isVCRCassette,
 	isCompiledCythonFile,
 	isGeneratedModule,
 	isGeneratedUnity3DMeta,
 	isGeneratedRacc,
 	isGeneratedJFlex,
 	isGeneratedGrammarKit,
 	isGeneratedRoxygen2,
 	isGeneratedJison,
 	isGeneratedGRPCCpp,
 	isGeneratedDart,
 	isGeneratedPerlPPPortHeader,
 	isGeneratedGameMakerStudio,
 	isGeneratedGimp,
 	isGeneratedVisualStudio6,
 	isGeneratedHaxe,
 	isGeneratedHTML,
 	isGeneratedJooq,
 }
 func canBeMinified(ext string) bool {
 	return ext == ".js" || ext == ".css"
 }
 // isMinifiedFile returns whether the file may be minified.
 // We consider a minified file any css or js file whose average number of chars
 // per line is more than 110.
 func isMinifiedFile(path, ext string, content []byte) bool {
 	if !canBeMinified(ext) {
 		return false
 	}
 	var chars, lines uint64
 	forEachLine(content, func(line []byte) {
 		chars += uint64(len(line))
 		lines++
 	})
 	if lines == 0 {
 		return false
 	}
 	return chars/lines > 110
 }
 var sourceMapRegex = regex.MustCompile(`^\/[*\/][\#@] source(?:Mapping)?URL|sourceURL=`)
 // hasSourceMapReference returns whether the file contains a reference to a
 // source-map file.
 func hasSourceMapReference(_ string, ext string, content []byte) bool {
 	if !canBeMinified(ext) {
 		return false
 	}
 	for _, line := range getLines(content, -2) {
 		if sourceMapRegex.Match(line) {
 			return true
 		}
 	}
 	return false
 }
 var sourceMapRegexps = []regex.EnryRegexp{
 	regex.MustCompile(`^{"version":\d+,`),
 	regex.MustCompile(`^\/\*\* Begin line maps\. \*\*\/{`),
 }
 // isSourceMap returns whether the file itself is a source map.
 func isSourceMap(path, _ string, content []byte) bool {
 	if strings.HasSuffix(path, ".js.map") || strings.HasSuffix(path, ".css.map") {
 		return true
 	}
 	firstLine := getFirstLine(content)
 	if len(firstLine) == 0 {
 		return false
 	}
 	for _, r := range sourceMapRegexps {
 		if r.Match(firstLine) {
 			return true
 		}
 	}
 	return false
 }
 func isCompiledCoffeeScript(path, ext string, content []byte) bool {
 	if ext != ".js" {
 		return false
 	}
 	firstLine := getFirstLine(content)
 	lastLines := getLines(content, -2)
 	if len(lastLines) < 2 {
 		return false
 	}
 	if string(firstLine) == "(function() {" &&
 		string(lastLines[1]) == "}).call(this);" &&
 		string(lastLines[0]) == "" {
 		score := 0
 		forEachLine(content, func(line []byte) {
 			if bytes.Contains(line, []byte("var ")) {
 				// Underscored temp vars are likely to be Coffee
 				score += 1 * countAppearancesInLine(line, "_fn", "_i", "_len", "_ref", "_results")
 				// bind and extend functions are very Coffee specific
 				score += 3 * countAppearancesInLine(line, "__bind", "__extends", "__hasProp", "__indexOf", "__slice")
 			}
 		})
 		// Require a score of 3. This is fairly abritrary. Consider tweaking later.
 		// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
 		return score >= 3
 	}
 	return false
 }
 func isGeneratedNetDocfile(_, ext string, content []byte) bool {
 	if ext != ".xml" {
 		return false
 	}
 	lines := bytes.Split(content, []byte{'\n'})
 	if len(lines) <= 3 {
 		return false
 	}
 	return bytes.Contains(lines[1], []byte("<doc>")) &&
 		bytes.Contains(lines[2], []byte("<assembly>")) &&
 		bytes.Contains(lines[len(lines)-2], []byte("</doc>"))
 }
 var pegJavaScriptGeneratedRegex = regex.MustCompile(`^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js`)
 func isGeneratedJavaScriptPEGParser(_, ext string, content []byte) bool {
 	if ext != ".js" {
 		return false
 	}
 	// PEG.js-generated parsers include a comment near the top  of the file
 	// that marks them as such.
 	return pegJavaScriptGeneratedRegex.Match(bytes.Join(getLines(content, 5), []byte("")))
 }
 var postScriptType1And42Regex = regex.MustCompile(`(\n|\r\n|\r)\s*(?:currentfile eexec\s+|\/sfnts\s+\[)`)
 var postScriptRegexes = []regex.EnryRegexp{
 	regex.MustCompile(`[0-9]|draw|mpage|ImageMagick|inkscape|MATLAB`),
 	regex.MustCompile(`PCBNEW|pnmtops|\(Unknown\)|Serif Affinity|Filterimage -tops`),
 }
 func isGeneratedPostScript(_, ext string, content []byte) bool {
 	if ext != ".ps" && ext != ".eps" && ext != ".pfa" {
 		return false
 	}
 	// Type 1 and Type 42 fonts converted to PostScript are stored as hex-encoded byte streams; these
 	// streams are always preceded the `eexec` operator (if Type 1), or the `/sfnts` key (if Type 42).
 	if postScriptType1And42Regex.Match(content) {
 		return true
 	}
 	// We analyze the "%%Creator:" comment, which contains the author/generator
 	// of the file. If there is one, it should be in one of the first few lines.
 	var creator []byte
 	for _, line := range getLines(content, 10) {
 		if bytes.HasPrefix(line, []byte("%%Creator: ")) {
 			creator = line
 			break
 		}
 	}
 	if len(creator) == 0 {
 		return false
 	}
 	// EAGLE doesn't include a version number when it generates PostScript.
 	// However, it does prepend its name to the document's "%%Title" field.
 	if bytes.Contains(creator, []byte("EAGLE")) {
 		for _, line := range getLines(content, 5) {
 			if bytes.HasPrefix(line, []byte("%%Title: EAGLE Drawing ")) {
 				return true
 			}
 		}
 	}
 	// Most generators write their version number, while human authors' or companies'
 	// names don't contain numbers. So look if the line contains digits. Also
 	// look for some special cases without version numbers.
 	for _, r := range postScriptRegexes {
 		if r.Match(creator) {
 			return true
 		}
 	}
 	return false
 }
 func isGeneratedGo(_, ext string, content []byte) bool {
 	if ext != ".go" {
 		return false
 	}
 	lines := getLines(content, 40)
 	if len(lines) <= 1 {
 		return false
 	}
 	for _, line := range lines {
 		if bytes.Contains(line, []byte("Code generated by")) {
 			return true
 		}
 	}
 	return false
 }
 var protoExtensions = map[string]struct{}{
 	".py":   {},
 	".java": {},
 	".h":    {},
 	".cc":   {},
 	".cpp":  {},
 	".m":    {},
 	".rb":   {},
 	".php":  {},
 }
 func isGeneratedProtobuf(_, ext string, content []byte) bool {
 	if _, ok := protoExtensions[ext]; !ok {
 		return false
 	}
 	lines := getLines(content, 3)
 	if len(lines) <= 1 {
 		return false
 	}
 	for _, line := range lines {
 		if bytes.Contains(line, []byte("Generated by the protocol buffer compiler.  DO NOT EDIT!")) {
 			return true
 		}
 	}
 	return false
 }
 func isGeneratedJavaScriptProtocolBuffer(_, ext string, content []byte) bool {
 	if ext != ".js" {
 		return false
 	}
 	lines := getLines(content, 6)
 	if len(lines) < 6 {
 		return false
 	}
 	return bytes.Contains(lines[5], []byte("GENERATED CODE -- DO NOT EDIT!"))
 }
 var apacheThriftExtensions = map[string]struct{}{
 	".rb":   {},
 	".py":   {},
 	".go":   {},
 	".js":   {},
 	".m":    {},
 	".java": {},
 	".h":    {},
 	".cc":   {},
 	".cpp":  {},
 	".php":  {},
 }
 func isGeneratedApacheThrift(_, ext string, content []byte) bool {
 	if _, ok := apacheThriftExtensions[ext]; !ok {
 		return false
 	}
 	for _, line := range getLines(content, 6) {
 		if bytes.Contains(line, []byte("Autogenerated by Thrift Compiler")) {
 			return true
 		}
 	}
 	return false
 }
 func isGeneratedJNIHeader(_, ext string, content []byte) bool {
 	if ext != ".h" {
 		return false
 	}
 	lines := getLines(content, 2)
 	if len(lines) < 2 {
 		return false
 	}
 	return bytes.Contains(lines[0], []byte("/* DO NOT EDIT THIS FILE - it is machine generated */")) &&
 		bytes.Contains(lines[1], []byte("#include <jni.h>"))
 }
 func isVCRCassette(_, ext string, content []byte) bool {
 	if ext != ".yml" {
 		return false
 	}
 	lines := getLines(content, -2)
 	if len(lines) < 2 {
 		return false
 	}
 	return bytes.Contains(lines[1], []byte("recorded_with: VCR"))
 }
 func isCompiledCythonFile(_, ext string, content []byte) bool {
 	if ext != ".c" && ext != ".cpp" {
 		return false
 	}
 	lines := getLines(content, 1)
 	if len(lines) < 1 {
 		return false
 	}
 	return bytes.Contains(lines[0], []byte("Generated by Cython"))
 }
 func isGeneratedModule(_, ext string, content []byte) bool {
 	if ext != ".mod" {
 		return false
 	}
 	lines := getLines(content, 1)
 	if len(lines) < 1 {
 		return false
 	}
 	return bytes.Contains(lines[0], []byte("PCBNEW-LibModule-V")) ||
 		bytes.Contains(lines[0], []byte("GFORTRAN module version '"))
 }
 func isGeneratedUnity3DMeta(_, ext string, content []byte) bool {
 	if ext != ".meta" {
 		return false
 	}
 	lines := getLines(content, 1)
 	if len(lines) < 1 {
 		return false
 	}
 	return bytes.Contains(lines[0], []byte("fileFormatVersion: "))
 }
 func isGeneratedRacc(_, ext string, content []byte) bool {
 	if ext != ".rb" {
 		return false
 	}
 	lines := getLines(content, 3)
 	if len(lines) < 3 {
 		return false
 	}
 	return bytes.HasPrefix(lines[2], []byte("# This file is automatically generated by Racc"))
 }
 func isGeneratedJFlex(_, ext string, content []byte) bool {
 	if ext != ".java" {
 		return false
 	}
 	lines := getLines(content, 1)
 	if len(lines) < 1 {
 		return false
 	}
 	return bytes.HasPrefix(lines[0], []byte("/* The following code was generated by JFlex "))
 }
 func isGeneratedGrammarKit(_, ext string, content []byte) bool {
 	if ext != ".java" {
 		return false
 	}
 	lines := getLines(content, 1)
 	if len(lines) < 1 {
 		return false
 	}
 	return bytes.Contains(lines[0], []byte("// This is a generated file. Not intended for manual editing."))
 }
 func isGeneratedRoxygen2(_, ext string, content []byte) bool {
 	if ext != ".rd" {
 		return false
 	}
 	lines := getLines(content, 1)
 	if len(lines) < 1 {
 		return false
 	}
 	return bytes.Contains(lines[0], []byte("% Generated by roxygen2: do not edit by hand"))
 }
 func isGeneratedJison(_, ext string, content []byte) bool {
 	if ext != ".js" {
 		return false
 	}
 	lines := getLines(content, 1)
 	if len(lines) < 1 {
 		return false
 	}
 	return bytes.Contains(lines[0], []byte("/* parser generated by jison ")) ||
 		bytes.Contains(lines[0], []byte("/* generated by jison-lex "))
 }
 func isGeneratedGRPCCpp(_, ext string, content []byte) bool {
 	switch ext {
 	case ".cpp", ".hpp", ".h", ".cc":
 		lines := getLines(content, 1)
 		if len(lines) < 1 {
 			return false
 		}
 		return bytes.Contains(lines[0], []byte("// Generated by the gRPC"))
 	default:
 		return false
 	}
 }
 var dartRegex = regex.MustCompile(`generated code\W{2,3}do not modify`)
 func isGeneratedDart(_, ext string, content []byte) bool {
 	if ext != ".dart" {
 		return false
 	}
 	lines := getLines(content, 1)
 	if len(lines) < 1 {
 		return false
 	}
 	return dartRegex.Match(bytes.ToLower(lines[0]))
 }
 func isGeneratedPerlPPPortHeader(name, _ string, content []byte) bool {
 	if !strings.HasSuffix(name, "ppport.h") {
 		return false
 	}
 	lines := getLines(content, 10)
 	if len(lines) < 10 {
 		return false
 	}
 	return bytes.Contains(lines[8], []byte("Automatically created by Devel::PPPort"))
 }
 var (
 	gameMakerStudioFirstLineRegex = regex.MustCompile(`^\d\.\d\.\d.+\|\{`)
 	gameMakerStudioThirdLineRegex = regex.MustCompile(`\"modelName\"\:\s*\"GM`)
 )
 func isGeneratedGameMakerStudio(_, ext string, content []byte) bool {
 	if ext != ".yy" && ext != ".yyp" {
 		return false
 	}
 	lines := getLines(content, 3)
 	if len(lines) < 3 {
 		return false
 	}
 	return gameMakerStudioThirdLineRegex.Match(lines[2]) ||
 		gameMakerStudioFirstLineRegex.Match(lines[0])
 }
 var gimpRegexes = []regex.EnryRegexp{
 	regex.MustCompile(`\/\* GIMP [a-zA-Z0-9\- ]+ C\-Source image dump \(.+?\.c\) \*\/`),
 	regex.MustCompile(`\/\*  GIMP header image file format \([a-zA-Z0-9\- ]+\)\: .+?\.h  \*\/`),
 }
 func isGeneratedGimp(_, ext string, content []byte) bool {
 	if ext != ".c" && ext != ".h" {
 		return false
 	}
 	lines := getLines(content, 1)
 	if len(lines) < 1 {
 		return false
 	}
 	for _, r := range gimpRegexes {
 		if r.Match(lines[0]) {
 			return true
 		}
 	}
 	return false
 }
 func isGeneratedVisualStudio6(_, ext string, content []byte) bool {
 	if ext != ".dsp" {
 		return false
 	}
 	for _, l := range getLines(content, 3) {
 		if bytes.Contains(l, []byte("# Microsoft Developer Studio Generated Build File")) {
 			return true
 		}
 	}
 	return false
 }
 var haxeExtensions = map[string]struct{}{
 	".js":   {},
 	".py":   {},
 	".lua":  {},
 	".cpp":  {},
 	".h":    {},
 	".java": {},
 	".cs":   {},
 	".php":  {},
 }
 func isGeneratedHaxe(_, ext string, content []byte) bool {
 	if _, ok := haxeExtensions[ext]; !ok {
 		return false
 	}
 	for _, l := range getLines(content, 3) {
 		if bytes.Contains(l, []byte("Generated by Haxe")) {
 			return true
 		}
 	}
 	return false
 }
 var (
 	doxygenRegex         = regex.MustCompile(`<!--\s+Generated by Doxygen\s+[.0-9]+\s*-->`)
 	htmlMetaRegex        = regex.MustCompile(`<meta(\s+[^>]+)>`)
 	htmlMetaContentRegex = regex.MustCompile(`\s+(name|content|value)\s*=\s*("[^"]+"|'[^']+'|[^\s"']+)`)
 	orgModeMetaRegex     = regex.MustCompile(`org\s+mode`)
 )
 func isGeneratedHTML(_, ext string, content []byte) bool {
 	if ext != ".html" && ext != ".htm" && ext != ".xhtml" {
 		return false
 	}
 	lines := getLines(content, 30)
 	// Pkgdown
 	if len(lines) >= 2 {
 		for _, l := range lines[:2] {
 			if bytes.Contains(l, []byte("<!-- Generated by pkgdown: do not edit by hand -->")) {
 				return true
 			}
 		}
 	}
 	// Mandoc
 	if len(lines) > 2 &&
 		bytes.HasPrefix(lines[2], []byte("<!-- This is an automatically generated file.")) {
 		return true
 	}
 	// Doxygen
 	for _, l := range lines {
 		if doxygenRegex.Match(l) {
 			return true
 		}
 	}
 	// HTML tag: <meta name="generator" content="" />
 	part := bytes.ToLower(bytes.Join(lines, []byte{' '}))
 	part = bytes.ReplaceAll(part, []byte{'\n'}, []byte{})
 	part = bytes.ReplaceAll(part, []byte{'\r'}, []byte{})
 	matches := htmlMetaRegex.FindAll(part, -1)
 	if len(matches) == 0 {
 		return false
 	}
 	for _, m := range matches {
 		var name, value, content string
 		ms := htmlMetaContentRegex.FindAllStringSubmatch(string(m), -1)
 		for _, m := range ms {
 			switch m[1] {
 			case "name":
 				name = m[2]
 			case "value":
 				value = m[2]
 			case "content":
 				content = m[2]
 			}
 		}
 		var val = value
 		if val == "" {
 			val = content
 		}
 		name = strings.Trim(name, `"'`)
 		val = strings.Trim(val, `"'`)
 		if name != "generator" || val == "" {
 			continue
 		}
 		if strings.Contains(val, "jlatex2html") ||
 			strings.Contains(val, "latex2html") ||
 			strings.Contains(val, "groff") ||
 			strings.Contains(val, "makeinfo") ||
 			strings.Contains(val, "texi2html") ||
 			strings.Contains(val, "ronn") ||
 			orgModeMetaRegex.MatchString(val) {
 			return true
 		}
 	}
 	return false
 }
 func isGeneratedJooq(_, ext string, content []byte) bool {
 	if ext != ".java" {
 		return false
 	}
 	for _, l := range getLines(content, 2) {
 		if bytes.Contains(l, []byte("This file is generated by jOOQ.")) {
 			return true
 		}
 	}
 	return false
 }
 func getFirstLine(content []byte) []byte {
 	lines := getLines(content, 1)
 	if len(lines) > 0 {
 		return lines[0]
 	}
 	return nil
 }
 // getLines returns up to the first n lines. A negative index will return up to
 // the last n lines in reverse order.
 func getLines(content []byte, n int) [][]byte {
 	var result [][]byte
 	if n < 0 {
 		for pos := len(content); pos > 0 && len(result) < -n; {
 			nlpos := bytes.LastIndexByte(content[:pos], '\n')
 			if nlpos+1 < len(content)-1 {
 				result = append(result, content[nlpos+1:pos])
 			}
 			pos = nlpos
 		}
 	} else {
 		for pos := 0; pos < len(content) && len(result) < n; {
 			nlpos := bytes.IndexByte(content[pos:], '\n')
 			if nlpos < 0 && pos < len(content) {
 				nlpos = len(content)
 			} else if nlpos >= 0 {
 				nlpos += pos
 			}
 			result = append(result, content[pos:nlpos])
 			pos = nlpos + 1
 		}
 	}
 	return result
 }
 func forEachLine(content []byte, cb func([]byte)) {
 	var pos int
 	for pos < len(content) {
 		nlpos := bytes.IndexByte(content[pos:], '\n')
 		if nlpos < 0 && pos < len(content) {
 			nlpos = len(content)
 		} else if nlpos >= 0 {
 			nlpos += pos
 		}
 		cb(content[pos:nlpos])
 		pos = nlpos + 1
 	}
 }
 func countAppearancesInLine(line []byte, targets ...string) int {
 	var count int
 	for _, t := range targets {
 		count += bytes.Count(line, []byte(t))
 	}
 	return count
 }
@@ -0,0 +1,17 @@
 package data
 import "github.com/go-enry/go-enry/v2/regex"
 // TestMatchers is hand made collection of regexp used by the function `enry.IsTest`
 // to identify test files in different languages.
 var TestMatchers = []regex.EnryRegexp{
 	regex.MustCompile(`(^|/)tests/.*Test\.php$`),
 	regex.MustCompile(`(^|/)test/.*Test(s?)\.java$`),
 	regex.MustCompile(`(^|/)test(/|/.*/)Test.*\.java$`),
 	regex.MustCompile(`(^|/)test/.*(Test(s?)|Spec(s?))\.scala$`),
 	regex.MustCompile(`(^|/)test_.*\.py$`),
 	regex.MustCompile(`(^|/).*_test\.go$`),
 	regex.MustCompile(`(^|/).*_(test|spec)\.rb$`),
 	regex.MustCompile(`(^|/).*Test(s?)\.cs$`),
 	regex.MustCompile(`(^|/).*\.(test|spec)\.(ts|tsx|js)$`),
 }
@@ -3,167 +3,167 @@
 package data
-import "gopkg.in/toqueteos/substring.v1"
+import "github.com/go-enry/go-enry/v2/regex"
-var VendorMatchers = substring.Or(
+var VendorMatchers = []regex.EnryRegexp{
-	substring.Regexp(`(^|/)cache/`),
+	regex.MustCompile(`(^|/)cache/`),
-	substring.Regexp(`^[Dd]ependencies/`),
+	regex.MustCompile(`^[Dd]ependencies/`),
-	substring.Regexp(`(^|/)dist/`),
+	regex.MustCompile(`(^|/)dist/`),
-	substring.Regexp(`^deps/`),
+	regex.MustCompile(`^deps/`),
-	substring.Regexp(`(^|/)configure$`),
+	regex.MustCompile(`(^|/)configure$`),
-	substring.Regexp(`(^|/)config.guess$`),
+	regex.MustCompile(`(^|/)config.guess$`),
-	substring.Regexp(`(^|/)config.sub$`),
+	regex.MustCompile(`(^|/)config.sub$`),
-	substring.Regexp(`(^|/)aclocal.m4`),
+	regex.MustCompile(`(^|/)aclocal.m4`),
-	substring.Regexp(`(^|/)libtool.m4`),
+	regex.MustCompile(`(^|/)libtool.m4`),
-	substring.Regexp(`(^|/)ltoptions.m4`),
+	regex.MustCompile(`(^|/)ltoptions.m4`),
-	substring.Regexp(`(^|/)ltsugar.m4`),
+	regex.MustCompile(`(^|/)ltsugar.m4`),
-	substring.Regexp(`(^|/)ltversion.m4`),
+	regex.MustCompile(`(^|/)ltversion.m4`),
-	substring.Regexp(`(^|/)lt~obsolete.m4`),
+	regex.MustCompile(`(^|/)lt~obsolete.m4`),
-	substring.Regexp(`dotnet-install\.(ps1|sh)$`),
+	regex.MustCompile(`dotnet-install\.(ps1|sh)$`),
-	substring.Regexp(`cpplint.py`),
+	regex.MustCompile(`cpplint.py`),
-	substring.Regexp(`node_modules/`),
+	regex.MustCompile(`node_modules/`),
-	substring.Regexp(`(^|/)\.yarn/releases/`),
+	regex.MustCompile(`(^|/)\.yarn/releases/`),
-	substring.Regexp(`(^|/)_esy$`),
+	regex.MustCompile(`(^|/)_esy$`),
-	substring.Regexp(`bower_components/`),
+	regex.MustCompile(`bower_components/`),
-	substring.Regexp(`^rebar$`),
+	regex.MustCompile(`^rebar$`),
-	substring.Regexp(`erlang.mk`),
+	regex.MustCompile(`erlang.mk`),
-	substring.Regexp(`Godeps/_workspace/`),
+	regex.MustCompile(`Godeps/_workspace/`),
-	substring.Regexp(`(^|/)testdata/`),
+	regex.MustCompile(`(^|/)testdata/`),
-	substring.Regexp(`.indent.pro`),
+	regex.MustCompile(`.indent.pro`),
-	substring.Regexp(`(\.|-)min\.(js|css)$`),
+	regex.MustCompile(`(\.|-)min\.(js|css)$`),
-	substring.Regexp(`([^\s]*)import\.(css|less|scss|styl)$`),
+	regex.MustCompile(`([^\s]*)import\.(css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)bootstrap([^.]*)\.(js|css|less|scss|styl)$`),
+	regex.MustCompile(`(^|/)bootstrap([^.]*)\.(js|css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$`),
+	regex.MustCompile(`(^|/)custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)font-?awesome\.(css|less|scss|styl)$`),
+	regex.MustCompile(`(^|/)font-?awesome\.(css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)font-?awesome/.*\.(css|less|scss|styl)$`),
+	regex.MustCompile(`(^|/)font-?awesome/.*\.(css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)foundation\.(css|less|scss|styl)$`),
+	regex.MustCompile(`(^|/)foundation\.(css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)normalize\.(css|less|scss|styl)$`),
+	regex.MustCompile(`(^|/)normalize\.(css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)skeleton\.(css|less|scss|styl)$`),
+	regex.MustCompile(`(^|/)skeleton\.(css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)[Bb]ourbon/.*\.(css|less|scss|styl)$`),
+	regex.MustCompile(`(^|/)[Bb]ourbon/.*\.(css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)animate\.(css|less|scss|styl)$`),
+	regex.MustCompile(`(^|/)animate\.(css|less|scss|styl)$`),
-	substring.Regexp(`(^|/)materialize\.(css|less|scss|styl|js)$`),
+	regex.MustCompile(`(^|/)materialize\.(css|less|scss|styl|js)$`),
-	substring.Regexp(`(^|/)select2/.*\.(css|scss|js)$`),
+	regex.MustCompile(`(^|/)select2/.*\.(css|scss|js)$`),
-	substring.Regexp(`(^|/)bulma\.(css|sass|scss)$`),
+	regex.MustCompile(`(^|/)bulma\.(css|sass|scss)$`),
-	substring.Regexp(`(3rd|[Tt]hird)[-_]?[Pp]arty/`),
+	regex.MustCompile(`(3rd|[Tt]hird)[-_]?[Pp]arty/`),
-	substring.Regexp(`vendors?/`),
+	regex.MustCompile(`vendors?/`),
-	substring.Regexp(`extern(al)?/`),
+	regex.MustCompile(`extern(al)?/`),
-	substring.Regexp(`(^|/)[Vv]+endor/`),
+	regex.MustCompile(`(^|/)[Vv]+endor/`),
-	substring.Regexp(`^debian/`),
+	regex.MustCompile(`^debian/`),
-	substring.Regexp(`run.n$`),
+	regex.MustCompile(`run.n$`),
-	substring.Regexp(`bootstrap-datepicker/`),
+	regex.MustCompile(`bootstrap-datepicker/`),
-	substring.Regexp(`(^|/)jquery([^.]*)\.js$`),
+	regex.MustCompile(`(^|/)jquery([^.]*)\.js$`),
-	substring.Regexp(`(^|/)jquery\-\d\.\d+(\.\d+)?\.js$`),
+	regex.MustCompile(`(^|/)jquery\-\d\.\d+(\.\d+)?\.js$`),
-	substring.Regexp(`(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$`),
+	regex.MustCompile(`(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$`),
-	substring.Regexp(`(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$`),
+	regex.MustCompile(`(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$`),
-	substring.Regexp(`jquery.fn.gantt.js`),
+	regex.MustCompile(`jquery.fn.gantt.js`),
-	substring.Regexp(`jquery.fancybox.(js|css)`),
+	regex.MustCompile(`jquery.fancybox.(js|css)`),
-	substring.Regexp(`fuelux.js`),
+	regex.MustCompile(`fuelux.js`),
-	substring.Regexp(`(^|/)jquery\.fileupload(-\w+)?\.js$`),
+	regex.MustCompile(`(^|/)jquery\.fileupload(-\w+)?\.js$`),
-	substring.Regexp(`jquery.dataTables.js`),
+	regex.MustCompile(`jquery.dataTables.js`),
-	substring.Regexp(`bootbox.js`),
+	regex.MustCompile(`bootbox.js`),
-	substring.Regexp(`pdf.worker.js`),
+	regex.MustCompile(`pdf.worker.js`),
-	substring.Regexp(`(^|/)slick\.\w+.js$`),
+	regex.MustCompile(`(^|/)slick\.\w+.js$`),
-	substring.Regexp(`(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$`),
+	regex.MustCompile(`(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$`),
-	substring.Regexp(`leaflet.draw-src.js`),
+	regex.MustCompile(`leaflet.draw-src.js`),
-	substring.Regexp(`leaflet.draw.css`),
+	regex.MustCompile(`leaflet.draw.css`),
-	substring.Regexp(`Control.FullScreen.css`),
+	regex.MustCompile(`Control.FullScreen.css`),
-	substring.Regexp(`Control.FullScreen.js`),
+	regex.MustCompile(`Control.FullScreen.js`),
-	substring.Regexp(`leaflet.spin.js`),
+	regex.MustCompile(`leaflet.spin.js`),
-	substring.Regexp(`wicket-leaflet.js`),
+	regex.MustCompile(`wicket-leaflet.js`),
-	substring.Regexp(`.sublime-project`),
+	regex.MustCompile(`.sublime-project`),
-	substring.Regexp(`.sublime-workspace`),
+	regex.MustCompile(`.sublime-workspace`),
-	substring.Regexp(`.vscode`),
+	regex.MustCompile(`.vscode`),
-	substring.Regexp(`(^|/)prototype(.*)\.js$`),
+	regex.MustCompile(`(^|/)prototype(.*)\.js$`),
-	substring.Regexp(`(^|/)effects\.js$`),
+	regex.MustCompile(`(^|/)effects\.js$`),
-	substring.Regexp(`(^|/)controls\.js$`),
+	regex.MustCompile(`(^|/)controls\.js$`),
-	substring.Regexp(`(^|/)dragdrop\.js$`),
+	regex.MustCompile(`(^|/)dragdrop\.js$`),
-	substring.Regexp(`(.*?)\.d\.ts$`),
+	regex.MustCompile(`(.*?)\.d\.ts$`),
-	substring.Regexp(`(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$`),
+	regex.MustCompile(`(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$`),
-	substring.Regexp(`(^|/)dojo\.js$`),
+	regex.MustCompile(`(^|/)dojo\.js$`),
-	substring.Regexp(`(^|/)MochiKit\.js$`),
+	regex.MustCompile(`(^|/)MochiKit\.js$`),
-	substring.Regexp(`(^|/)yahoo-([^.]*)\.js$`),
+	regex.MustCompile(`(^|/)yahoo-([^.]*)\.js$`),
-	substring.Regexp(`(^|/)yui([^.]*)\.js$`),
+	regex.MustCompile(`(^|/)yui([^.]*)\.js$`),
-	substring.Regexp(`(^|/)ckeditor\.js$`),
+	regex.MustCompile(`(^|/)ckeditor\.js$`),
-	substring.Regexp(`(^|/)tiny_mce([^.]*)\.js$`),
+	regex.MustCompile(`(^|/)tiny_mce([^.]*)\.js$`),
-	substring.Regexp(`(^|/)tiny_mce/(langs|plugins|themes|utils)`),
+	regex.MustCompile(`(^|/)tiny_mce/(langs|plugins|themes|utils)`),
-	substring.Regexp(`(^|/)ace-builds/`),
+	regex.MustCompile(`(^|/)ace-builds/`),
-	substring.Regexp(`(^|/)fontello(.*?)\.css$`),
+	regex.MustCompile(`(^|/)fontello(.*?)\.css$`),
-	substring.Regexp(`(^|/)MathJax/`),
+	regex.MustCompile(`(^|/)MathJax/`),
-	substring.Regexp(`(^|/)Chart\.js$`),
+	regex.MustCompile(`(^|/)Chart\.js$`),
-	substring.Regexp(`(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)`),
+	regex.MustCompile(`(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)`),
-	substring.Regexp(`(^|/)shBrush([^.]*)\.js$`),
+	regex.MustCompile(`(^|/)shBrush([^.]*)\.js$`),
-	substring.Regexp(`(^|/)shCore\.js$`),
+	regex.MustCompile(`(^|/)shCore\.js$`),
-	substring.Regexp(`(^|/)shLegacy\.js$`),
+	regex.MustCompile(`(^|/)shLegacy\.js$`),
-	substring.Regexp(`(^|/)angular([^.]*)\.js$`),
+	regex.MustCompile(`(^|/)angular([^.]*)\.js$`),
-	substring.Regexp(`(^|\/)d3(\.v\d+)?([^.]*)\.js$`),
+	regex.MustCompile(`(^|\/)d3(\.v\d+)?([^.]*)\.js$`),
-	substring.Regexp(`(^|/)react(-[^.]*)?\.js$`),
+	regex.MustCompile(`(^|/)react(-[^.]*)?\.js$`),
-	substring.Regexp(`(^|/)flow-typed/.*\.js$`),
+	regex.MustCompile(`(^|/)flow-typed/.*\.js$`),
-	substring.Regexp(`(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$`),
+	regex.MustCompile(`(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$`),
-	substring.Regexp(`(^|/)modernizr\.custom\.\d+\.js$`),
+	regex.MustCompile(`(^|/)modernizr\.custom\.\d+\.js$`),
-	substring.Regexp(`(^|/)knockout-(\d+\.){3}(debug\.)?js$`),
+	regex.MustCompile(`(^|/)knockout-(\d+\.){3}(debug\.)?js$`),
-	substring.Regexp(`(^|/)docs?/_?(build|themes?|templates?|static)/`),
+	regex.MustCompile(`(^|/)docs?/_?(build|themes?|templates?|static)/`),
-	substring.Regexp(`(^|/)admin_media/`),
+	regex.MustCompile(`(^|/)admin_media/`),
-	substring.Regexp(`(^|/)env/`),
+	regex.MustCompile(`(^|/)env/`),
-	substring.Regexp(`^fabfile\.py$`),
+	regex.MustCompile(`^fabfile\.py$`),
-	substring.Regexp(`^waf$`),
+	regex.MustCompile(`^waf$`),
-	substring.Regexp(`^.osx$`),
+	regex.MustCompile(`^.osx$`),
-	substring.Regexp(`\.xctemplate/`),
+	regex.MustCompile(`\.xctemplate/`),
-	substring.Regexp(`\.imageset/`),
+	regex.MustCompile(`\.imageset/`),
-	substring.Regexp(`(^|/)Carthage/`),
+	regex.MustCompile(`(^|/)Carthage/`),
-	substring.Regexp(`(^|/)Sparkle/`),
+	regex.MustCompile(`(^|/)Sparkle/`),
-	substring.Regexp(`Crashlytics.framework/`),
+	regex.MustCompile(`Crashlytics.framework/`),
-	substring.Regexp(`Fabric.framework/`),
+	regex.MustCompile(`Fabric.framework/`),
-	substring.Regexp(`BuddyBuildSDK.framework/`),
+	regex.MustCompile(`BuddyBuildSDK.framework/`),
-	substring.Regexp(`Realm.framework`),
+	regex.MustCompile(`Realm.framework`),
-	substring.Regexp(`RealmSwift.framework`),
+	regex.MustCompile(`RealmSwift.framework`),
-	substring.Regexp(`gitattributes$`),
+	regex.MustCompile(`gitattributes$`),
-	substring.Regexp(`gitignore$`),
+	regex.MustCompile(`gitignore$`),
-	substring.Regexp(`gitmodules$`),
+	regex.MustCompile(`gitmodules$`),
-	substring.Regexp(`(^|/)gradlew$`),
+	regex.MustCompile(`(^|/)gradlew$`),
-	substring.Regexp(`(^|/)gradlew\.bat$`),
+	regex.MustCompile(`(^|/)gradlew\.bat$`),
-	substring.Regexp(`(^|/)gradle/wrapper/`),
+	regex.MustCompile(`(^|/)gradle/wrapper/`),
-	substring.Regexp(`(^|/)mvnw$`),
+	regex.MustCompile(`(^|/)mvnw$`),
-	substring.Regexp(`(^|/)mvnw\.cmd$`),
+	regex.MustCompile(`(^|/)mvnw\.cmd$`),
-	substring.Regexp(`(^|/)\.mvn/wrapper/`),
+	regex.MustCompile(`(^|/)\.mvn/wrapper/`),
-	substring.Regexp(`-vsdoc\.js$`),
+	regex.MustCompile(`-vsdoc\.js$`),
-	substring.Regexp(`\.intellisense\.js$`),
+	regex.MustCompile(`\.intellisense\.js$`),
-	substring.Regexp(`(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$`),
+	regex.MustCompile(`(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$`),
-	substring.Regexp(`(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$`),
+	regex.MustCompile(`(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$`),
-	substring.Regexp(`(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$`),
+	regex.MustCompile(`(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$`),
-	substring.Regexp(`^[Pp]ackages\/.+\.\d+\/`),
+	regex.MustCompile(`^[Pp]ackages\/.+\.\d+\/`),
-	substring.Regexp(`(^|/)extjs/.*?\.js$`),
+	regex.MustCompile(`(^|/)extjs/.*?\.js$`),
-	substring.Regexp(`(^|/)extjs/.*?\.xml$`),
+	regex.MustCompile(`(^|/)extjs/.*?\.xml$`),
-	substring.Regexp(`(^|/)extjs/.*?\.txt$`),
+	regex.MustCompile(`(^|/)extjs/.*?\.txt$`),
-	substring.Regexp(`(^|/)extjs/.*?\.html$`),
+	regex.MustCompile(`(^|/)extjs/.*?\.html$`),
-	substring.Regexp(`(^|/)extjs/.*?\.properties$`),
+	regex.MustCompile(`(^|/)extjs/.*?\.properties$`),
-	substring.Regexp(`(^|/)extjs/.sencha/`),
+	regex.MustCompile(`(^|/)extjs/.sencha/`),
-	substring.Regexp(`(^|/)extjs/docs/`),
+	regex.MustCompile(`(^|/)extjs/docs/`),
-	substring.Regexp(`(^|/)extjs/builds/`),
+	regex.MustCompile(`(^|/)extjs/builds/`),
-	substring.Regexp(`(^|/)extjs/cmd/`),
+	regex.MustCompile(`(^|/)extjs/cmd/`),
-	substring.Regexp(`(^|/)extjs/examples/`),
+	regex.MustCompile(`(^|/)extjs/examples/`),
-	substring.Regexp(`(^|/)extjs/locale/`),
+	regex.MustCompile(`(^|/)extjs/locale/`),
-	substring.Regexp(`(^|/)extjs/packages/`),
+	regex.MustCompile(`(^|/)extjs/packages/`),
-	substring.Regexp(`(^|/)extjs/plugins/`),
+	regex.MustCompile(`(^|/)extjs/plugins/`),
-	substring.Regexp(`(^|/)extjs/resources/`),
+	regex.MustCompile(`(^|/)extjs/resources/`),
-	substring.Regexp(`(^|/)extjs/src/`),
+	regex.MustCompile(`(^|/)extjs/src/`),
-	substring.Regexp(`(^|/)extjs/welcome/`),
+	regex.MustCompile(`(^|/)extjs/welcome/`),
-	substring.Regexp(`(^|/)html5shiv\.js$`),
+	regex.MustCompile(`(^|/)html5shiv\.js$`),
-	substring.Regexp(`^[Tt]ests?/fixtures/`),
+	regex.MustCompile(`^[Tt]ests?/fixtures/`),
-	substring.Regexp(`^[Ss]pecs?/fixtures/`),
+	regex.MustCompile(`^[Ss]pecs?/fixtures/`),
-	substring.Regexp(`(^|/)cordova([^.]*)\.js$`),
+	regex.MustCompile(`(^|/)cordova([^.]*)\.js$`),
-	substring.Regexp(`(^|/)cordova\-\d\.\d(\.\d)?\.js$`),
+	regex.MustCompile(`(^|/)cordova\-\d\.\d(\.\d)?\.js$`),
-	substring.Regexp(`foundation(\..*)?\.js$`),
+	regex.MustCompile(`foundation(\..*)?\.js$`),
-	substring.Regexp(`^Vagrantfile$`),
+	regex.MustCompile(`^Vagrantfile$`),
-	substring.Regexp(`.[Dd][Ss]_[Ss]tore$`),
+	regex.MustCompile(`.[Dd][Ss]_[Ss]tore$`),
-	substring.Regexp(`^vignettes/`),
+	regex.MustCompile(`^vignettes/`),
-	substring.Regexp(`^inst/extdata/`),
+	regex.MustCompile(`^inst/extdata/`),
-	substring.Regexp(`octicons.css`),
+	regex.MustCompile(`octicons.css`),
-	substring.Regexp(`sprockets-octicons.scss`),
+	regex.MustCompile(`sprockets-octicons.scss`),
-	substring.Regexp(`(^|/)activator$`),
+	regex.MustCompile(`(^|/)activator$`),
-	substring.Regexp(`(^|/)activator\.bat$`),
+	regex.MustCompile(`(^|/)activator\.bat$`),
-	substring.Regexp(`proguard.pro`),
+	regex.MustCompile(`proguard.pro`),
-	substring.Regexp(`proguard-rules.pro`),
+	regex.MustCompile(`proguard-rules.pro`),
-	substring.Regexp(`^puphpet/`),
+	regex.MustCompile(`^puphpet/`),
-	substring.Regexp(`(^|/)\.google_apis/`),
+	regex.MustCompile(`(^|/)\.google_apis/`),
-	substring.Regexp(`^Jenkinsfile$`),
+	regex.MustCompile(`^Jenkinsfile$`),
-)
+}
@@ -3,9 +3,7 @@ module github.com/go-enry/go-enry/v2
 go 1.14
 require (
-	github.com/go-enry/go-oniguruma v1.2.0
+	github.com/go-enry/go-oniguruma v1.2.1
 	github.com/stretchr/testify v1.3.0
 	github.com/toqueteos/trie v1.0.0 // indirect
 	gopkg.in/toqueteos/substring.v1 v1.0.2
 	gopkg.in/yaml.v2 v2.2.8
 )
@@ -2,17 +2,15 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/go-enry/go-oniguruma v1.2.0 h1:oBO9XC1IDT9+AoWW5oFsa/7gFeOPacEqDbyXZKWXuDs=
 github.com/go-enry/go-oniguruma v1.2.0/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
 github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo=
 github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/toqueteos/trie v1.0.0 h1:8i6pXxNUXNRAqP246iibb7w/pSFquNTQ+uNfriG7vlk=
 github.com/toqueteos/trie v1.0.0/go.mod h1:Ywk48QhEqhU1+DwhMkJ2x7eeGxDHiGkAdc9+0DYcbsM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/toqueteos/substring.v1 v1.0.2 h1:urLqCeMm6x/eTuQa1oZerNw8N1KNOIp5hD5kGL7lFsE=
 gopkg.in/toqueteos/substring.v1 v1.0.2/go.mod h1:Eb2Z1UYehlVK8LYW2WBVR2rwbujsz3aX8XDrM1vbNew=
 gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
@@ -6,12 +6,18 @@ import (
 	"strings"
 	"github.com/go-enry/go-enry/v2/data"
 	"github.com/go-enry/go-enry/v2/regex"
 )
 const binSniffLen = 8000
-var configurationLanguages = map[string]bool{
+var configurationLanguages = map[string]struct{}{
-	"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
+	"XML":  {},
 	"JSON": {},
 	"TOML": {},
 	"YAML": {},
 	"INI":  {},
 	"SQL":  {},
 }
 // IsConfiguration tells if filename is in one of the configuration languages.
@@ -46,7 +52,7 @@ func GetMIMEType(path string, language string) string {
 // IsDocumentation returns whether or not path is a documentation path.
 func IsDocumentation(path string) bool {
-	return data.DocumentationMatchers.Match(path)
+	return matchRegexSlice(data.DocumentationMatchers, path)
 }
 // IsDotFile returns whether or not path has dot as a prefix.
@@ -57,7 +63,12 @@ func IsDotFile(path string) bool {
 // IsVendor returns whether or not path is a vendor path.
 func IsVendor(path string) bool {
-	return data.VendorMatchers.Match(path)
+	return matchRegexSlice(data.VendorMatchers, path)
 }
 // IsTest returns whether or not path is a test path.
 func IsTest(path string) bool {
 	return matchRegexSlice(data.TestMatchers, path)
 }
 // IsBinary detects if data is a binary value based on:
@@ -86,3 +97,37 @@ func GetColor(language string) string {
 	return "#cccccc"
 }
 func matchRegexSlice(exprs []regex.EnryRegexp, str string) bool {
 	for _, expr := range exprs {
 		if expr.MatchString(str) {
 			return true
 		}
 	}
 	return false
 }
 // IsGenerated returns whether the file with the given path and content is a
 // generated file.
 func IsGenerated(path string, content []byte) bool {
 	ext := strings.ToLower(filepath.Ext(path))
 	if _, ok := data.GeneratedCodeExtensions[ext]; ok {
 		return true
 	}
 	for _, m := range data.GeneratedCodeNameMatchers {
 		if m(path) {
 			return true
 		}
 	}
 	path = strings.ToLower(path)
 	for _, m := range data.GeneratedCodeMatchers {
 		if m(path, ext, content) {
 			return true
 		}
 	}
 	return false
 }
@@ -7,7 +7,7 @@
 #include "chelper.h"
 int NewOnigRegex( char *pattern, int pattern_length, int option,
-                  OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) {
+                  OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) {
    int ret = ONIG_NORMAL;
    int error_msg_len = 0;
@@ -23,8 +23,6 @@ int NewOnigRegex( char *pattern, int pattern_length, int option,
    memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
    *region = onig_region_new();
    ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info);
    if (ret != ONIG_NORMAL) {
@@ -38,9 +36,10 @@ int NewOnigRegex( char *pattern, int pattern_length, int option,
 }
 int SearchOnigRegex( void *str, int str_length, int offset, int option,
-                  OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) {
+                  OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) {
    int ret = ONIG_MISMATCH;
    int error_msg_len = 0;
    OnigRegion *region;
 #ifdef BENCHMARK_CHELP
    struct timeval tim1, tim2;
    long t;
@@ -55,6 +54,8 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option,
    gettimeofday(&tim1, NULL);
 #endif
    region = onig_region_new();
    ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option);
    if (ret < 0 && error_buffer != NULL) {
        error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info);
@@ -74,6 +75,8 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option,
        *numCaptures = count;
    }
    onig_region_free(region, 1);
 #ifdef BENCHMARK_CHELP
    gettimeofday(&tim2, NULL);
    t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
@@ -83,9 +86,10 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option,
 }
 int MatchOnigRegex(void *str, int str_length, int offset, int option,
-                  OnigRegex regex, OnigRegion *region) {
+                  OnigRegex regex) {
    int ret = ONIG_MISMATCH;
    int error_msg_len = 0;
    OnigRegion *region;
 #ifdef BENCHMARK_CHELP
    struct timeval tim1, tim2;
    long t;
@@ -98,7 +102,9 @@ int MatchOnigRegex(void *str, int str_length, int offset, int option,
 #ifdef BENCHMARK_CHELP
    gettimeofday(&tim1, NULL);
 #endif
    region = onig_region_new();
    ret = onig_match(regex, str_start, str_end, search_start, region, option);
    onig_region_free(region, 1);
 #ifdef BENCHMARK_CHELP
    gettimeofday(&tim2, NULL);
    t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
@@ -108,8 +114,9 @@ int MatchOnigRegex(void *str, int str_length, int offset, int option,
 }
 int LookupOnigCaptureByName(char *name, int name_length,
-                  OnigRegex regex, OnigRegion *region) {
+                  OnigRegex regex) {
    int ret = ONIGERR_UNDEFINED_NAME_REFERENCE;
    OnigRegion *region;
 #ifdef BENCHMARK_CHELP
    struct timeval tim1, tim2;
    long t;
@@ -119,7 +126,9 @@ int LookupOnigCaptureByName(char *name, int name_length,
 #ifdef BENCHMARK_CHELP
    gettimeofday(&tim1, NULL);
 #endif
    region = onig_region_new();
    ret = onig_name_to_backref_number(regex, name_start, name_end, region);
    onig_region_free(region, 1);
 #ifdef BENCHMARK_CHELP
    gettimeofday(&tim2, NULL);
    t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
@@ -181,4 +190,3 @@ int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbe
    onig_foreach_name(reg, name_callback, (void* )&groupInfo);
    return groupInfo.bufferOffset;
 }
@@ -1,14 +1,14 @@
 #include <oniguruma.h>
 extern int NewOnigRegex( char *pattern, int pattern_length, int option,
-                                  OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer);
+                                  OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer);
 extern int SearchOnigRegex( void *str, int str_length, int offset, int option,
-                                  OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures);
+                                  OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures);
 extern int MatchOnigRegex( void *str, int str_length, int offset, int option,
-                  OnigRegex regex, OnigRegion *region);
+                  OnigRegex regex);
-extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex, OnigRegion *region);
+extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex);
 extern int GetCaptureNames(OnigRegex regex, void *buffer, int bufferSize, int* groupNumbers);
@@ -14,7 +14,6 @@ import (
 	"errors"
 	"fmt"
 	"io"
 	"log"
 	"runtime"
 	"strconv"
 	"sync"
@@ -22,62 +21,52 @@ import (
 	"unsafe"
 )
 type strRange []int
 const numMatchStartSize = 4
 const numReadBufferStartSize = 256
 var mutex sync.Mutex
 type MatchData struct {
 	count   int
 	indexes [][]int32
 }
 type NamedGroupInfo map[string]int
 type Regexp struct {
-	pattern        string
+	pattern   string
-	regex          C.OnigRegex
+	regex     C.OnigRegex
-	region         *C.OnigRegion
+	encoding  C.OnigEncoding
-	encoding       C.OnigEncoding
+	errorInfo *C.OnigErrorInfo
-	errorInfo      *C.OnigErrorInfo
+	errorBuf  *C.char
-	errorBuf       *C.char
+
-	matchData      *MatchData
+	numCaptures    int32
 	namedGroupInfo NamedGroupInfo
 }
 // NewRegexp creates and initializes a new Regexp with the given pattern and option.
-func NewRegexp(pattern string, option int) (re *Regexp, err error) {
+func NewRegexp(pattern string, option int) (*Regexp, error) {
 	return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_UTF8}, option)
 }
 // NewRegexpASCII is equivalent to NewRegexp, but with the encoding restricted to ASCII.
-func NewRegexpASCII(pattern string, option int) (re *Regexp, err error) {
+func NewRegexpASCII(pattern string, option int) (*Regexp, error) {
 	return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_ASCII}, option)
 }
 func initRegexp(re *Regexp, option int) (*Regexp, error) {
 	var err error
 	patternCharPtr := C.CString(re.pattern)
 	defer C.free(unsafe.Pointer(patternCharPtr))
 	mutex.Lock()
 	defer mutex.Unlock()
-	errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.region, &re.encoding, &re.errorInfo, &re.errorBuf)
+
 	errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.encoding, &re.errorInfo, &re.errorBuf)
 	if errorCode != C.ONIG_NORMAL {
-		err = errors.New(C.GoString(re.errorBuf))
+		return re, errors.New(C.GoString(re.errorBuf))
 	} else {
 		err = nil
 		numCapturesInPattern := int(C.onig_number_of_captures(re.regex)) + 1
 		re.matchData = &MatchData{}
 		re.matchData.indexes = make([][]int32, numMatchStartSize)
 		for i := 0; i < numMatchStartSize; i++ {
 			re.matchData.indexes[i] = make([]int32, numCapturesInPattern*2)
 		}
 		re.namedGroupInfo = re.getNamedGroupInfo()
 		runtime.SetFinalizer(re, (*Regexp).Free)
 	}
-	return re, err
+
 	re.numCaptures = int32(C.onig_number_of_captures(re.regex)) + 1
 	re.namedGroupInfo = re.getNamedGroupInfo()
 	runtime.SetFinalizer(re, (*Regexp).Free)
 	return re, nil
 }
 func Compile(str string) (*Regexp, error) {
@@ -89,6 +78,7 @@ func MustCompile(str string) *Regexp {
 	if error != nil {
 		panic("regexp: compiling " + str + ": " + error.Error())
 	}
 	return regexp
 }
@@ -101,6 +91,7 @@ func MustCompileWithOption(str string, option int) *Regexp {
 	if error != nil {
 		panic("regexp: compiling " + str + ": " + error.Error())
 	}
 	return regexp
 }
@@ -110,6 +101,7 @@ func MustCompileASCII(str string) *Regexp {
 	if error != nil {
 		panic("regexp: compiling " + str + ": " + error.Error())
 	}
 	return regexp
 }
@@ -119,10 +111,6 @@ func (re *Regexp) Free() {
 		C.onig_free(re.regex)
 		re.regex = nil
 	}
 	if re.region != nil {
 		C.onig_region_free(re.region, 1)
 		re.region = nil
 	}
 	mutex.Unlock()
 	if re.errorInfo != nil {
 		C.free(unsafe.Pointer(re.errorInfo))
@@ -134,149 +122,149 @@ func (re *Regexp) Free() {
 	}
 }
-func (re *Regexp) getNamedGroupInfo() (namedGroupInfo NamedGroupInfo) {
+func (re *Regexp) getNamedGroupInfo() NamedGroupInfo {
 	numNamedGroups := int(C.onig_number_of_names(re.regex))
-	//when any named capture exisits, there is no numbered capture even if there are unnamed captures
+	// when any named capture exists, there is no numbered capture even if
-	if numNamedGroups > 0 {
+	// there are unnamed captures.
-		namedGroupInfo = make(map[string]int)
+	if numNamedGroups == 0 {
-		//try to get the names
+		return nil
 		bufferSize := len(re.pattern) * 2
 		nameBuffer := make([]byte, bufferSize)
 		groupNumbers := make([]int32, numNamedGroups)
 		bufferPtr := unsafe.Pointer(&nameBuffer[0])
 		numbersPtr := unsafe.Pointer(&groupNumbers[0])
 		length := int(C.GetCaptureNames(re.regex, bufferPtr, (C.int)(bufferSize), (*C.int)(numbersPtr)))
 		if length > 0 {
 			namesAsBytes := bytes.Split(nameBuffer[:length], ([]byte)(";"))
 			if len(namesAsBytes) != numNamedGroups {
 				log.Fatalf("the number of named groups (%d) does not match the number names found (%d)\n", numNamedGroups, len(namesAsBytes))
 			}
 			for i, nameAsBytes := range namesAsBytes {
 				name := string(nameAsBytes)
 				namedGroupInfo[name] = int(groupNumbers[i])
 			}
 		} else {
 			log.Fatalf("could not get the capture group names from %q", re.String())
 		}
 	}
 	return
 }
-func (re *Regexp) groupNameToId(name string) (id int) {
+	namedGroupInfo := make(map[string]int)
-	if re.namedGroupInfo == nil {
+
-		id = ONIGERR_UNDEFINED_NAME_REFERENCE
+	//try to get the names
-	} else {
+	bufferSize := len(re.pattern) * 2
-		id = re.namedGroupInfo[name]
+	nameBuffer := make([]byte, bufferSize)
 	groupNumbers := make([]int32, numNamedGroups)
 	bufferPtr := unsafe.Pointer(&nameBuffer[0])
 	numbersPtr := unsafe.Pointer(&groupNumbers[0])
 	length := int(C.GetCaptureNames(re.regex, bufferPtr, (C.int)(bufferSize), (*C.int)(numbersPtr)))
 	if length == 0 {
 		panic(fmt.Errorf("could not get the capture group names from %q", re.String()))
 	}
 	return
 }
-func (re *Regexp) processMatch(numCaptures int) (match []int32) {
+	namesAsBytes := bytes.Split(nameBuffer[:length], ([]byte)(";"))
-	if numCaptures <= 0 {
+	if len(namesAsBytes) != numNamedGroups {
-		panic("cannot have 0 captures when processing a match")
+		panic(fmt.Errorf(
 			"the number of named groups (%d) does not match the number names found (%d)",
 			numNamedGroups, len(namesAsBytes),
 		))
 	}
-	matchData := re.matchData
+
-	return matchData.indexes[matchData.count][:numCaptures*2]
+	for i, nameAsBytes := range namesAsBytes {
 		name := string(nameAsBytes)
 		namedGroupInfo[name] = int(groupNumbers[i])
 	}
 	return namedGroupInfo
 }
-func (re *Regexp) ClearMatchData() {
+func (re *Regexp) find(b []byte, n int, offset int) []int {
-	matchData := re.matchData
+	match := make([]int, re.numCaptures*2)
 	matchData.count = 0
 }
 func (re *Regexp) find(b []byte, n int, offset int) (match []int) {
 	if n == 0 {
 		b = []byte{0}
 	}
-	ptr := unsafe.Pointer(&b[0])
+
-	matchData := re.matchData
+	bytesPtr := unsafe.Pointer(&b[0])
-	capturesPtr := unsafe.Pointer(&(matchData.indexes[matchData.count][0]))
+
-	numCaptures := int32(0)
+	// captures contains two pairs of ints, start and end, so we need list
 	// twice the size of the capture groups.
 	captures := make([]C.int, re.numCaptures*2)
 	capturesPtr := unsafe.Pointer(&captures[0])
 	var numCaptures int32
 	numCapturesPtr := unsafe.Pointer(&numCaptures)
-	pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr)))
+
-	if pos >= 0 {
+	pos := int(C.SearchOnigRegex(
-		if numCaptures <= 0 {
+		bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT),
-			panic("cannot have 0 captures when processing a match")
+		re.regex, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr),
-		}
+	))
-		match2 := matchData.indexes[matchData.count][:numCaptures*2]
+
-		match = make([]int, len(match2))
+	if pos < 0 {
-		for i := range match2 {
+		return nil
 			match[i] = int(match2[i])
 		}
 		numCapturesInPattern := int32(C.onig_number_of_captures(re.regex)) + 1
 		if numCapturesInPattern != numCaptures {
 			log.Fatalf("expected %d captures but got %d\n", numCapturesInPattern, numCaptures)
 		}
 	}
-	return
+
 	if numCaptures <= 0 {
 		panic("cannot have 0 captures when processing a match")
 	}
 	if re.numCaptures != numCaptures {
 		panic(fmt.Errorf("expected %d captures but got %d", re.numCaptures, numCaptures))
 	}
 	for i := range captures {
 		match[i] = int(captures[i])
 	}
 	return match
 }
 func getCapture(b []byte, beg int, end int) []byte {
 	if beg < 0 || end < 0 {
 		return nil
 	}
 	return b[beg:end]
 }
 func (re *Regexp) match(b []byte, n int, offset int) bool {
 	re.ClearMatchData()
 	if n == 0 {
 		b = []byte{0}
 	}
-	ptr := unsafe.Pointer(&b[0])
+
-	pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(nil), (*C.int)(nil)))
+	bytesPtr := unsafe.Pointer(&b[0])
 	pos := int(C.SearchOnigRegex(
 		bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT),
 		re.regex, re.errorInfo, nil, nil, nil,
 	))
 	return pos >= 0
 }
-func (re *Regexp) findAll(b []byte, n int) (matches [][]int) {
+func (re *Regexp) findAll(b []byte, n int) [][]int {
 	re.ClearMatchData()
 	if n < 0 {
 		n = len(b)
 	}
-	matchData := re.matchData
+
-	offset := 0
+	capture := make([][]int, 0, numMatchStartSize)
 	var offset int
 	for offset <= n {
-		if matchData.count >= len(matchData.indexes) {
+		match := re.find(b, n, offset)
-			length := len(matchData.indexes[0])
+		if match == nil {
 			matchData.indexes = append(matchData.indexes, make([]int32, length))
 		}
 		if match := re.find(b, n, offset); len(match) > 0 {
 			matchData.count += 1
 			//move offset to the ending index of the current match and prepare to find the next non-overlapping match
 			offset = match[1]
 			//if match[0] == match[1], it means the current match does not advance the search. we need to exit the loop to avoid getting stuck here.
 			if match[0] == match[1] {
 				if offset < n && offset >= 0 {
 					//there are more bytes, so move offset by a word
 					_, width := utf8.DecodeRune(b[offset:])
 					offset += width
 				} else {
 					//search is over, exit loop
 					break
 				}
 			}
 		} else {
 			break
 		}
-	}
+
-	matches2 := matchData.indexes[:matchData.count]
+		capture = append(capture, match)
-	matches = make([][]int, len(matches2))
+
-	for i, v := range matches2 {
+		// move offset to the ending index of the current match and prepare to
-		matches[i] = make([]int, len(v))
+		// find the next non-overlapping match.
-		for j, v2 := range v {
+		offset = match[1]
-			matches[i][j] = int(v2)
+
 		// if match[0] == match[1], it means the current match does not advance
 		// the search. we need to exit the loop to avoid getting stuck here.
 		if match[0] == match[1] {
 			if offset < n && offset >= 0 {
 				//there are more bytes, so move offset by a word
 				_, width := utf8.DecodeRune(b[offset:])
 				offset += width
 			} else {
 				//search is over, exit loop
 				break
 			}
 		}
 	}
-	return
+
 	return capture
 }
 func (re *Regexp) FindIndex(b []byte) []int {
 	re.ClearMatchData()
 	match := re.find(b, len(b), 0)
 	if len(match) == 0 {
 		return nil
 	}
 	return match[:2]
 }
@@ -285,21 +273,21 @@ func (re *Regexp) Find(b []byte) []byte {
 	if loc == nil {
 		return nil
 	}
 	return getCapture(b, loc[0], loc[1])
 }
 func (re *Regexp) FindString(s string) string {
-	b := []byte(s)
+	mb := re.Find([]byte(s))
 	mb := re.Find(b)
 	if mb == nil {
 		return ""
 	}
 	return string(mb)
 }
 func (re *Regexp) FindStringIndex(s string) []int {
-	b := []byte(s)
+	return re.FindIndex([]byte(s))
 	return re.FindIndex(b)
 }
 func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
@@ -307,6 +295,7 @@ func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
 	if len(matches) == 0 {
 		return nil
 	}
 	return matches
 }
@@ -315,10 +304,12 @@ func (re *Regexp) FindAll(b []byte, n int) [][]byte {
 	if matches == nil {
 		return nil
 	}
 	matchBytes := make([][]byte, 0, len(matches))
 	for _, match := range matches {
 		matchBytes = append(matchBytes, getCapture(b, match[0], match[1]))
 	}
 	return matchBytes
 }
@@ -328,6 +319,7 @@ func (re *Regexp) FindAllString(s string, n int) []string {
 	if matches == nil {
 		return nil
 	}
 	matchStrings := make([]string, 0, len(matches))
 	for _, match := range matches {
 		m := getCapture(b, match[0], match[1])
@@ -337,51 +329,50 @@ func (re *Regexp) FindAllString(s string, n int) []string {
 			matchStrings = append(matchStrings, string(m))
 		}
 	}
 	return matchStrings
 }
 func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
-	b := []byte(s)
+	return re.FindAllIndex([]byte(s), n)
 	return re.FindAllIndex(b, n)
 }
 func (re *Regexp) findSubmatchIndex(b []byte) (match []int) {
 	re.ClearMatchData()
 	match = re.find(b, len(b), 0)
 	return
 }
 func (re *Regexp) FindSubmatchIndex(b []byte) []int {
-	match := re.findSubmatchIndex(b)
+	match := re.find(b, len(b), 0)
 	if len(match) == 0 {
 		return nil
 	}
 	return match
 }
 func (re *Regexp) FindSubmatch(b []byte) [][]byte {
-	match := re.findSubmatchIndex(b)
+	match := re.FindSubmatchIndex(b)
 	if match == nil {
 		return nil
 	}
 	length := len(match) / 2
 	if length == 0 {
 		return nil
 	}
 	results := make([][]byte, 0, length)
 	for i := 0; i < length; i++ {
 		results = append(results, getCapture(b, match[2*i], match[2*i+1]))
 	}
 	return results
 }
 func (re *Regexp) FindStringSubmatch(s string) []string {
 	b := []byte(s)
-	match := re.findSubmatchIndex(b)
+	match := re.FindSubmatchIndex(b)
 	if match == nil {
 		return nil
 	}
 	length := len(match) / 2
 	if length == 0 {
 		return nil
@@ -396,12 +387,12 @@ func (re *Regexp) FindStringSubmatch(s string) []string {
 			results = append(results, string(cap))
 		}
 	}
 	return results
 }
 func (re *Regexp) FindStringSubmatchIndex(s string) []int {
-	b := []byte(s)
+	return re.FindSubmatchIndex([]byte(s))
 	return re.FindSubmatchIndex(b)
 }
 func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
@@ -409,6 +400,7 @@ func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
 	if len(matches) == 0 {
 		return nil
 	}
 	return matches
 }
@@ -417,6 +409,7 @@ func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
 	if len(matches) == 0 {
 		return nil
 	}
 	allCapturedBytes := make([][][]byte, 0, len(matches))
 	for _, match := range matches {
 		length := len(match) / 2
@@ -424,6 +417,7 @@ func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
 		for i := 0; i < length; i++ {
 			capturedBytes = append(capturedBytes, getCapture(b, match[2*i], match[2*i+1]))
 		}
 		allCapturedBytes = append(allCapturedBytes, capturedBytes)
 	}
@@ -432,10 +426,12 @@ func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
 func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
 	b := []byte(s)
 	matches := re.findAll(b, n)
 	if len(matches) == 0 {
 		return nil
 	}
 	allCapturedStrings := make([][]string, 0, len(matches))
 	for _, match := range matches {
 		length := len(match) / 2
@@ -448,14 +444,15 @@ func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
 				capturedStrings = append(capturedStrings, string(cap))
 			}
 		}
 		allCapturedStrings = append(allCapturedStrings, capturedStrings)
 	}
 	return allCapturedStrings
 }
 func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
-	b := []byte(s)
+	return re.FindAllSubmatchIndex([]byte(s), n)
 	return re.FindAllSubmatchIndex(b, n)
 }
 func (re *Regexp) Match(b []byte) bool {
@@ -463,44 +460,25 @@ func (re *Regexp) Match(b []byte) bool {
 }
 func (re *Regexp) MatchString(s string) bool {
-	b := []byte(s)
+	return re.Match([]byte(s))
 	return re.Match(b)
 }
 func (re *Regexp) NumSubexp() int {
 	return (int)(C.onig_number_of_captures(re.regex))
 }
 func (re *Regexp) getNamedCapture(name []byte, capturedBytes [][]byte) []byte {
 	nameStr := string(name)
 	capNum := re.groupNameToId(nameStr)
 	if capNum < 0 || capNum >= len(capturedBytes) {
 		panic(fmt.Sprintf("capture group name (%q) has error\n", nameStr))
 	}
 	return capturedBytes[capNum]
 }
 func (re *Regexp) getNumberedCapture(num int, capturedBytes [][]byte) []byte {
 	//when named capture groups exist, numbered capture groups returns ""
 	if re.namedGroupInfo == nil && num <= (len(capturedBytes)-1) && num >= 0 {
 		return capturedBytes[num]
 	}
 	return ([]byte)("")
 }
 func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) []byte {
 	replLen := len(repl)
 	newRepl := make([]byte, 0, replLen*3)
 	inEscapeMode := false
 	inGroupNameMode := false
 	groupName := make([]byte, 0, replLen)
-	for index := 0; index < replLen; index += 1 {
+
 	var inGroupNameMode, inEscapeMode bool
 	for index := 0; index < replLen; index++ {
 		ch := repl[index]
 		if inGroupNameMode && ch == byte('<') {
 		} else if inGroupNameMode && ch == byte('>') {
 			inGroupNameMode = false
-			groupNameStr := string(groupName)
+			capBytes := capturedBytes[string(groupName)]
 			capBytes := capturedBytes[groupNameStr]
 			newRepl = append(newRepl, capBytes...)
 			groupName = groupName[:0] //reset the name
 		} else if inGroupNameMode {
@@ -512,7 +490,7 @@ func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte)
 		} else if inEscapeMode && ch == byte('k') && (index+1) < replLen && repl[index+1] == byte('<') {
 			inGroupNameMode = true
 			inEscapeMode = false
-			index += 1 //bypass the next char '<'
+			index++ //bypass the next char '<'
 		} else if inEscapeMode {
 			newRepl = append(newRepl, '\\')
 			newRepl = append(newRepl, ch)
@@ -523,6 +501,7 @@ func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte)
 			inEscapeMode = !inEscapeMode
 		}
 	}
 	return newRepl
 }
@@ -532,10 +511,12 @@ func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map
 	if len(matches) == 0 {
 		return src
 	}
 	dest := make([]byte, 0, srcLen)
 	for i, match := range matches {
 		length := len(match) / 2
 		capturedBytes := make(map[string][]byte)
 		if re.namedGroupInfo == nil {
 			for j := 0; j < length; j++ {
 				capturedBytes[strconv.Itoa(j)] = getCapture(src, match[2*j], match[2*j+1])
@@ -545,6 +526,7 @@ func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map
 				capturedBytes[name] = getCapture(src, match[2*j], match[2*j+1])
 			}
 		}
 		matchBytes := getCapture(src, match[0], match[1])
 		newRepl := replFunc(repl, matchBytes, capturedBytes)
 		prevEnd := 0
@@ -552,15 +534,19 @@ func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map
 			prevMatch := matches[i-1][:2]
 			prevEnd = prevMatch[1]
 		}
 		if match[0] > prevEnd && prevEnd >= 0 && match[0] <= srcLen {
 			dest = append(dest, src[prevEnd:match[0]]...)
 		}
 		dest = append(dest, newRepl...)
 	}
 	lastEnd := matches[len(matches)-1][1]
 	if lastEnd < srcLen && lastEnd >= 0 {
 		dest = append(dest, src[lastEnd:]...)
 	}
 	return dest
 }
@@ -569,7 +555,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
 }
 func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
-	return re.replaceAll(src, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte {
+	return re.replaceAll(src, nil, func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte {
 		return repl(matchBytes)
 	})
 }
@@ -579,43 +565,44 @@ func (re *Regexp) ReplaceAllString(src, repl string) string {
 }
 func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
-	srcB := []byte(src)
+	return string(re.replaceAll([]byte(src), nil, func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte {
 	destB := re.replaceAll(srcB, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte {
 		return []byte(repl(string(matchBytes)))
-	})
+	}))
 	return string(destB)
 }
 func (re *Regexp) String() string {
 	return re.pattern
 }
-func grow_buffer(b []byte, offset int, n int) []byte {
+func growBuffer(b []byte, offset int, n int) []byte {
 	if offset+n > cap(b) {
 		buf := make([]byte, 2*cap(b)+n)
 		copy(buf, b[:offset])
 		return buf
 	}
 	return b
 }
 func fromReader(r io.RuneReader) []byte {
 	b := make([]byte, numReadBufferStartSize)
-	offset := 0
+
-	var err error = nil
+	var offset int
-	for err == nil {
+	for {
 		rune, runeWidth, err := r.ReadRune()
-		if err == nil {
+		if err != nil {
 			b = grow_buffer(b, offset, runeWidth)
 			writeWidth := utf8.EncodeRune(b[offset:], rune)
 			if runeWidth != writeWidth {
 				panic("reading rune width not equal to the written rune width")
 			}
 			offset += writeWidth
 		} else {
 			break
 		}
 		b = growBuffer(b, offset, runeWidth)
 		writeWidth := utf8.EncodeRune(b[offset:], rune)
 		if runeWidth != writeWidth {
 			panic("reading rune width not equal to the written rune width")
 		}
 		offset += writeWidth
 	}
 	return b[:offset]
 }
@@ -644,25 +631,25 @@ func MatchString(pattern string, s string) (matched bool, error error) {
 	if err != nil {
 		return false, err
 	}
 	return re.MatchString(s), nil
 }
 func (re *Regexp) Gsub(src, repl string) string {
-	srcBytes := ([]byte)(src)
+	return string(re.replaceAll([]byte(src), []byte(repl), fillCapturedValues))
 	replBytes := ([]byte)(repl)
 	replaced := re.replaceAll(srcBytes, replBytes, fillCapturedValues)
 	return string(replaced)
 }
 func (re *Regexp) GsubFunc(src string, replFunc func(string, map[string]string) string) string {
-	srcBytes := ([]byte)(src)
+	replaced := re.replaceAll([]byte(src), nil,
-	replaced := re.replaceAll(srcBytes, nil, func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte {
+		func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte {
-		capturedStrings := make(map[string]string)
+			capturedStrings := make(map[string]string)
-		for name, capBytes := range capturedBytes {
+			for name, capBytes := range capturedBytes {
-			capturedStrings[name] = string(capBytes)
+				capturedStrings[name] = string(capBytes)
-		}
+			}
-		matchString := string(matchBytes)
+			matchString := string(matchBytes)
-		return ([]byte)(replFunc(matchString, capturedStrings))
+			return ([]byte)(replFunc(matchString, capturedStrings))
-	})
+		},
 	)
 	return string(replaced)
 }
@@ -1,22 +0,0 @@
 Copyright (c) 2013 Caleb Spare
 MIT License
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be
 included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,7 +0,0 @@
 # Trie
 [![GoDoc](http://godoc.org/github.com/toqueteos/trie?status.png)](http://godoc.org/github.com/toqueteos/trie)
 This is a fork of https://github.com/cespare/go-trie that adds the `PrefixIndex` method.
 It's required for https://github.com/toqueteos/substring.
@@ -1 +0,0 @@
 module github.com/toqueteos/trie
@@ -1,102 +0,0 @@
 // Package trie is an implementation of a trie (prefix tree) data structure over byte slices. It provides a
 // small and simple API for usage as a set as well as a 'Node' API for walking the trie.
 package trie
 // A Trie is a a prefix tree.
 type Trie struct {
 	root *Node
 }
 // New construct a new, empty Trie ready for use.
 func New() *Trie {
 	return &Trie{
 		root: &Node{},
 	}
 }
 // Insert puts b into the Trie. It returns true if the element was not previously in t.
 func (t *Trie) Insert(b []byte) bool {
 	n := t.root
 	for _, c := range b {
 		next, ok := n.Walk(c)
 		if !ok {
 			next = &Node{}
 			n.branches[c] = next
 			n.hasChildren = true
 		}
 		n = next
 	}
 	if n.terminal {
 		return false
 	}
 	n.terminal = true
 	return true
 }
 // Contains checks t for membership of b.
 func (t *Trie) Contains(b []byte) bool {
 	n := t.root
 	for _, c := range b {
 		next, ok := n.Walk(c)
 		if !ok {
 			return false
 		}
 		n = next
 	}
 	return n.terminal
 }
 // PrefixIndex walks through `b` until a prefix is found (terminal node) or it is exhausted.
 func (t *Trie) PrefixIndex(b []byte) int {
 	var idx int
 	n := t.root
 	for _, c := range b {
 		next, ok := n.Walk(c)
 		if !ok {
 			return -1
 		}
 		if next.terminal {
 			return idx
 		}
 		n = next
 		idx++
 	}
 	if !n.terminal {
 		idx = -1
 	}
 	return idx
 }
 // Root returns the root node of a Trie. A valid Trie (i.e., constructed with New), always has a non-nil root
 // node.
 func (t *Trie) Root() *Node {
 	return t.root
 }
 // A Node represents a logical vertex in the trie structure.
 type Node struct {
 	branches    [256]*Node
 	terminal    bool
 	hasChildren bool
 }
 // Walk returns the node reached along edge c, if one exists. The ok value indicates whether such a node
 // exist.
 func (n *Node) Walk(c byte) (next *Node, ok bool) {
 	next = n.branches[int(c)]
 	return next, (next != nil)
 }
 // Terminal indicates whether n is terminal in the trie (that is, whether the path from the root to n
 // represents an element in the set). For instance, if the root node is terminal, then []byte{} is in the
 // trie.
 func (n *Node) Terminal() bool {
 	return n.terminal
 }
 // Leaf indicates whether n is a leaf node in the trie (that is, whether it has children). A leaf node must be
 // terminal (else it would not exist). Logically, if n is a leaf node then the []byte represented by the path
 // from the root to n is not a proper prefix of any element of the trie.
 func (n *Node) Leaf() bool {
 	return !n.hasChildren
 }
@@ -1,24 +0,0 @@
 # Compiled Object files, Static and Dynamic libs (Shared Objects)
 *.o
 *.a
 *.so
 # Folders
 _obj
 _test
 # Architecture specific extensions/prefixes
 *.[568vq]
 [568vq].out
 *.cgo1.go
 *.cgo2.c
 _cgo_defun.c
 _cgo_gotypes.go
 _cgo_export.*
 _testmain.go
 *.exe
 *.test
 *.prof
@@ -1,11 +0,0 @@
 language: go
 go:
  - 1.2
  - 1.3
  - 1.4
  - tip
 script:
  - go get launchpad.net/gocheck
  - go test
@@ -1,22 +0,0 @@
 The MIT License (MIT)
 Copyright (c) 2015 Carlos Cobo
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
@@ -1,80 +0,0 @@
 # substring [![Build Status](https://travis-ci.org/toqueteos/substring.png?branch=master)](https://travis-ci.org/toqueteos/substring) [![GoDoc](http://godoc.org/github.com/toqueteos/substring?status.png)](http://godoc.org/github.com/toqueteos/substring) [![GitHub release](https://img.shields.io/github/release/toqueteos/substring.svg)](https://github.com/toqueteos/substring/releases)
 Simple and composable alternative to [regexp](http://golang.org/pkg/regexp/) package for fast substring searches.
 ## Installation
 The recommended way to install substring
 ```
 go get -t gopkg.in/toqueteos/substring.v1
 ```
 The `-t` flag is for fetching [gocheck](https://gopkg.in/check.v1), required for tests and benchmarks.
 ## Examples
 A basic example with two matchers:
 ```go
 package main
 import (
    "fmt"
    "regexp"
    "gopkg.in/toqueteos/substring.v1"
 )
 func main() {
    m1 := substring.After("assets/", substring.Or(
        substring.Has("jquery"),
        substring.Has("angular"),
        substring.Suffixes(".js", ".css", ".html"),
    ))
    fmt.Println(m1.Match("assets/angular/foo/bar")) //Prints: true
    fmt.Println(m1.Match("assets/js/file.js"))      //Prints: true
    fmt.Println(m1.Match("assets/style/bar.css"))   //Prints: true
    fmt.Println(m1.Match("assets/foo/bar.html"))    //Prints: false
    fmt.Println(m1.Match("assets/js/qux.json"))     //Prints: false
    fmt.Println(m1.Match("core/file.html"))         //Prints: false
    fmt.Println(m1.Match("foobar/that.jsx"))        //Prints: false
    m2 := substring.After("vendor/", substring.Suffixes(".css", ".js", ".less"))
    fmt.Println(m2.Match("foo/vendor/bar/qux.css")) //Prints: true
    fmt.Println(m2.Match("foo/var/qux.less"))       //Prints: false
    re := regexp.MustCompile(`vendor\/.*\.(css|js|less)$`)
    fmt.Println(re.MatchString("foo/vendor/bar/qux.css")) //Prints: true
    fmt.Println(re.MatchString("foo/var/qux.less"))       //Prints: false
 }
 ```
 ## How fast?
 It may vary depending on your use case but 1~2 orders of magnitude faster than `regexp` is pretty common.
 Test it out for yourself by running `go test -check.b`!
 ```
 $ go test -check.b
 PASS: lib_test.go:18: LibSuite.BenchmarkExample1        10000000               221 ns/op
 PASS: lib_test.go:23: LibSuite.BenchmarkExample2        10000000               229 ns/op
 PASS: lib_test.go:28: LibSuite.BenchmarkExample3        10000000               216 ns/op
 PASS: lib_test.go:33: LibSuite.BenchmarkExample4        10000000               208 ns/op
 PASS: lib_test.go:38: LibSuite.BenchmarkExample5        20000000                82.1 ns/op
 PASS: lib_test.go:48: LibSuite.BenchmarkExampleRe1        500000              4136 ns/op
 PASS: lib_test.go:53: LibSuite.BenchmarkExampleRe2        500000              5222 ns/op
 PASS: lib_test.go:58: LibSuite.BenchmarkExampleRe3        500000              5116 ns/op
 PASS: lib_test.go:63: LibSuite.BenchmarkExampleRe4        500000              4020 ns/op
 PASS: lib_test.go:68: LibSuite.BenchmarkExampleRe5      10000000               226 ns/op
 OK: 10 passed
 PASS
 ok      gopkg.in/toqueteos/substring.v1 23.471s
 ```
 License
 -------
 MIT, see [LICENSE](LICENSE)
@@ -1,229 +0,0 @@
 package substring
 import (
 	"bytes"
 	"regexp"
 	"github.com/toqueteos/trie"
 )
 type BytesMatcher interface {
 	Match(b []byte) bool
 	MatchIndex(b []byte) int
 }
 // regexp
 type regexpBytes struct{ re *regexp.Regexp }
 func BytesRegexp(pat string) *regexpBytes  { return &regexpBytes{regexp.MustCompile(pat)} }
 func (m *regexpBytes) Match(b []byte) bool { return m.re.Match(b) }
 func (m *regexpBytes) MatchIndex(b []byte) int {
 	found := m.re.FindIndex(b)
 	if found != nil {
 		return found[1]
 	}
 	return -1
 }
 // exact
 type exactBytes struct{ pat []byte }
 func BytesExact(pat string) *exactBytes { return &exactBytes{[]byte(pat)} }
 func (m *exactBytes) Match(b []byte) bool {
 	l, r := len(m.pat), len(b)
 	if l != r {
 		return false
 	}
 	for i := 0; i < l; i++ {
 		if b[i] != m.pat[i] {
 			return false
 		}
 	}
 	return true
 }
 func (m *exactBytes) MatchIndex(b []byte) int {
 	if m.Match(b) {
 		return len(b)
 	}
 	return -1
 }
 // any, search `s` in `.Match(pat)`
 type anyBytes struct {
 	pat []byte
 }
 func BytesAny(pat string) *anyBytes     { return &anyBytes{[]byte(pat)} }
 func (m *anyBytes) Match(b []byte) bool { return bytes.Index(m.pat, b) >= 0 }
 func (m *anyBytes) MatchIndex(b []byte) int {
 	if idx := bytes.Index(m.pat, b); idx >= 0 {
 		return idx + len(b)
 	}
 	return -1
 }
 // has, search `pat` in `.Match(s)`
 type hasBytes struct {
 	pat []byte
 }
 func BytesHas(pat string) *hasBytes     { return &hasBytes{[]byte(pat)} }
 func (m *hasBytes) Match(b []byte) bool { return bytes.Index(b, m.pat) >= 0 }
 func (m *hasBytes) MatchIndex(b []byte) int {
 	if idx := bytes.Index(b, m.pat); idx >= 0 {
 		return idx + len(m.pat)
 	}
 	return -1
 }
 // prefix
 type prefixBytes struct{ pat []byte }
 func BytesPrefix(pat string) *prefixBytes  { return &prefixBytes{[]byte(pat)} }
 func (m *prefixBytes) Match(b []byte) bool { return bytes.HasPrefix(b, m.pat) }
 func (m *prefixBytes) MatchIndex(b []byte) int {
 	if bytes.HasPrefix(b, m.pat) {
 		return len(m.pat)
 	}
 	return -1
 }
 // prefixes
 type prefixesBytes struct {
 	t *trie.Trie
 }
 func BytesPrefixes(pats ...string) *prefixesBytes {
 	t := trie.New()
 	for _, pat := range pats {
 		t.Insert([]byte(pat))
 	}
 	return &prefixesBytes{t}
 }
 func (m *prefixesBytes) Match(b []byte) bool { return m.t.PrefixIndex(b) >= 0 }
 func (m *prefixesBytes) MatchIndex(b []byte) int {
 	if idx := m.t.PrefixIndex(b); idx >= 0 {
 		return idx
 	}
 	return -1
 }
 // suffix
 type suffixBytes struct{ pat []byte }
 func BytesSuffix(pat string) *suffixBytes  { return &suffixBytes{[]byte(pat)} }
 func (m *suffixBytes) Match(b []byte) bool { return bytes.HasSuffix(b, m.pat) }
 func (m *suffixBytes) MatchIndex(b []byte) int {
 	if bytes.HasSuffix(b, m.pat) {
 		return len(m.pat)
 	}
 	return -1
 }
 // suffixes
 type suffixesBytes struct {
 	t *trie.Trie
 }
 func BytesSuffixes(pats ...string) *suffixesBytes {
 	t := trie.New()
 	for _, pat := range pats {
 		t.Insert(reverse([]byte(pat)))
 	}
 	return &suffixesBytes{t}
 }
 func (m *suffixesBytes) Match(b []byte) bool {
 	return m.t.PrefixIndex(reverse(b)) >= 0
 }
 func (m *suffixesBytes) MatchIndex(b []byte) int {
 	if idx := m.t.PrefixIndex(reverse(b)); idx >= 0 {
 		return idx
 	}
 	return -1
 }
 // after
 type afterBytes struct {
 	first   []byte
 	matcher BytesMatcher
 }
 func BytesAfter(first string, m BytesMatcher) *afterBytes { return &afterBytes{[]byte(first), m} }
 func (a *afterBytes) Match(b []byte) bool {
 	if idx := bytes.Index(b, a.first); idx >= 0 {
 		return a.matcher.Match(b[idx+len(a.first):])
 	}
 	return false
 }
 func (a *afterBytes) MatchIndex(b []byte) int {
 	if idx := bytes.Index(b, a.first); idx >= 0 {
 		return idx + a.matcher.MatchIndex(b[idx:])
 	}
 	return -1
 }
 // and, returns true iff all matchers return true
 type andBytes struct{ matchers []BytesMatcher }
 func BytesAnd(m ...BytesMatcher) *andBytes { return &andBytes{m} }
 func (a *andBytes) Match(b []byte) bool {
 	for _, m := range a.matchers {
 		if !m.Match(b) {
 			return false
 		}
 	}
 	return true
 }
 func (a *andBytes) MatchIndex(b []byte) int {
 	longest := 0
 	for _, m := range a.matchers {
 		if idx := m.MatchIndex(b); idx < 0 {
 			return -1
 		} else if idx > longest {
 			longest = idx
 		}
 	}
 	return longest
 }
 // or, returns true iff any matcher returns true
 type orBytes struct{ matchers []BytesMatcher }
 func BytesOr(m ...BytesMatcher) *orBytes { return &orBytes{m} }
 func (o *orBytes) Match(b []byte) bool {
 	for _, m := range o.matchers {
 		if m.Match(b) {
 			return true
 		}
 	}
 	return false
 }
 func (o *orBytes) MatchIndex(b []byte) int {
 	for _, m := range o.matchers {
 		if idx := m.MatchIndex(b); idx >= 0 {
 			return idx
 		}
 	}
 	return -1
 }
 type suffixGroupBytes struct {
 	suffix   BytesMatcher
 	matchers []BytesMatcher
 }
 func BytesSuffixGroup(s string, m ...BytesMatcher) *suffixGroupBytes {
 	return &suffixGroupBytes{BytesSuffix(s), m}
 }
 func (sg *suffixGroupBytes) Match(b []byte) bool {
 	if sg.suffix.Match(b) {
 		return BytesOr(sg.matchers...).Match(b)
 	}
 	return false
 }
 func (sg *suffixGroupBytes) MatchIndex(b []byte) int {
 	if sg.suffix.MatchIndex(b) >= 0 {
 		return BytesOr(sg.matchers...).MatchIndex(b)
 	}
 	return -1
 }
@@ -1,10 +0,0 @@
 package substring
 // reverse is a helper fn for Suffixes
 func reverse(b []byte) []byte {
 	n := len(b)
 	for i := 0; i < n/2; i++ {
 		b[i], b[n-1-i] = b[n-1-i], b[i]
 	}
 	return b
 }
@@ -1,216 +0,0 @@
 package substring
 import (
 	"regexp"
 	"strings"
 	"github.com/toqueteos/trie"
 )
 type StringsMatcher interface {
 	Match(s string) bool
 	MatchIndex(s string) int
 }
 // regexp
 type regexpString struct{ re *regexp.Regexp }
 func Regexp(pat string) *regexpString       { return &regexpString{regexp.MustCompile(pat)} }
 func (m *regexpString) Match(s string) bool { return m.re.MatchString(s) }
 func (m *regexpString) MatchIndex(s string) int {
 	found := m.re.FindStringIndex(s)
 	if found != nil {
 		return found[1]
 	}
 	return -1
 }
 // exact
 type exactString struct{ pat string }
 func Exact(pat string) *exactString        { return &exactString{pat} }
 func (m *exactString) Match(s string) bool { return m.pat == s }
 func (m *exactString) MatchIndex(s string) int {
 	if m.pat == s {
 		return len(s)
 	}
 	return -1
 }
 // any, search `s` in `.Match(pat)`
 type anyString struct{ pat string }
 func Any(pat string) *anyString { return &anyString{pat} }
 func (m *anyString) Match(s string) bool {
 	return strings.Index(m.pat, s) >= 0
 }
 func (m *anyString) MatchIndex(s string) int {
 	if idx := strings.Index(m.pat, s); idx >= 0 {
 		return idx + len(s)
 	}
 	return -1
 }
 // has, search `pat` in `.Match(s)`
 type hasString struct{ pat string }
 func Has(pat string) *hasString { return &hasString{pat} }
 func (m *hasString) Match(s string) bool {
 	return strings.Index(s, m.pat) >= 0
 }
 func (m *hasString) MatchIndex(s string) int {
 	if idx := strings.Index(s, m.pat); idx >= 0 {
 		return idx + len(m.pat)
 	}
 	return -1
 }
 // prefix
 type prefixString struct{ pat string }
 func Prefix(pat string) *prefixString       { return &prefixString{pat} }
 func (m *prefixString) Match(s string) bool { return strings.HasPrefix(s, m.pat) }
 func (m *prefixString) MatchIndex(s string) int {
 	if strings.HasPrefix(s, m.pat) {
 		return len(m.pat)
 	}
 	return -1
 }
 // prefixes
 type prefixesString struct{ t *trie.Trie }
 func Prefixes(pats ...string) *prefixesString {
 	t := trie.New()
 	for _, pat := range pats {
 		t.Insert([]byte(pat))
 	}
 	return &prefixesString{t}
 }
 func (m *prefixesString) Match(s string) bool { return m.t.PrefixIndex([]byte(s)) >= 0 }
 func (m *prefixesString) MatchIndex(s string) int {
 	if idx := m.t.PrefixIndex([]byte(s)); idx >= 0 {
 		return idx
 	}
 	return -1
 }
 // suffix
 type suffixString struct{ pat string }
 func Suffix(pat string) *suffixString       { return &suffixString{pat} }
 func (m *suffixString) Match(s string) bool { return strings.HasSuffix(s, m.pat) }
 func (m *suffixString) MatchIndex(s string) int {
 	if strings.HasSuffix(s, m.pat) {
 		return len(m.pat)
 	}
 	return -1
 }
 // suffixes
 type suffixesString struct{ t *trie.Trie }
 func Suffixes(pats ...string) *suffixesString {
 	t := trie.New()
 	for _, pat := range pats {
 		t.Insert(reverse([]byte(pat)))
 	}
 	return &suffixesString{t}
 }
 func (m *suffixesString) Match(s string) bool {
 	return m.t.PrefixIndex(reverse([]byte(s))) >= 0
 }
 func (m *suffixesString) MatchIndex(s string) int {
 	if idx := m.t.PrefixIndex(reverse([]byte(s))); idx >= 0 {
 		return idx
 	}
 	return -1
 }
 // after
 type afterString struct {
 	first   string
 	matcher StringsMatcher
 }
 func After(first string, m StringsMatcher) *afterString {
 	return &afterString{first, m}
 }
 func (a *afterString) Match(s string) bool {
 	if idx := strings.Index(s, a.first); idx >= 0 {
 		return a.matcher.Match(s[idx+len(a.first):])
 	}
 	return false
 }
 func (a *afterString) MatchIndex(s string) int {
 	if idx := strings.Index(s, a.first); idx >= 0 {
 		return idx + a.matcher.MatchIndex(s[idx+len(a.first):])
 	}
 	return -1
 }
 // and, returns true iff all matchers return true
 type andString struct{ matchers []StringsMatcher }
 func And(m ...StringsMatcher) *andString { return &andString{m} }
 func (a *andString) Match(s string) bool {
 	for _, m := range a.matchers {
 		if !m.Match(s) {
 			return false
 		}
 	}
 	return true
 }
 func (a *andString) MatchIndex(s string) int {
 	longest := 0
 	for _, m := range a.matchers {
 		if idx := m.MatchIndex(s); idx < 0 {
 			return -1
 		} else if idx > longest {
 			longest = idx
 		}
 	}
 	return longest
 }
 // or, returns true iff any matcher returns true
 type orString struct{ matchers []StringsMatcher }
 func Or(m ...StringsMatcher) *orString { return &orString{m} }
 func (o *orString) Match(s string) bool {
 	for _, m := range o.matchers {
 		if m.Match(s) {
 			return true
 		}
 	}
 	return false
 }
 func (o *orString) MatchIndex(s string) int {
 	for _, m := range o.matchers {
 		if idx := m.MatchIndex(s); idx >= 0 {
 			return idx
 		}
 	}
 	return -1
 }
 type suffixGroupString struct {
 	suffix   StringsMatcher
 	matchers []StringsMatcher
 }
 func SuffixGroup(s string, m ...StringsMatcher) *suffixGroupString {
 	return &suffixGroupString{Suffix(s), m}
 }
 func (sg *suffixGroupString) Match(s string) bool {
 	if sg.suffix.Match(s) {
 		return Or(sg.matchers...).Match(s)
 	}
 	return false
 }
 func (sg *suffixGroupString) MatchIndex(s string) int {
 	if sg.suffix.MatchIndex(s) >= 0 {
 		return Or(sg.matchers...).MatchIndex(s)
 	}
 	return -1
 }
@@ -202,7 +202,7 @@ github.com/gliderlabs/ssh
 # github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a
 ## explicit
 github.com/glycerine/go-unsnap-stream
-# github.com/go-enry/go-enry/v2 v2.3.0
+# github.com/go-enry/go-enry/v2 v2.5.2
 ## explicit
 github.com/go-enry/go-enry/v2
 github.com/go-enry/go-enry/v2/data
@@ -210,7 +210,7 @@ github.com/go-enry/go-enry/v2/data/rule
 github.com/go-enry/go-enry/v2/internal/tokenizer
 github.com/go-enry/go-enry/v2/internal/tokenizer/flex
 github.com/go-enry/go-enry/v2/regex
-# github.com/go-enry/go-oniguruma v1.2.0
+# github.com/go-enry/go-oniguruma v1.2.1
 github.com/go-enry/go-oniguruma
 # github.com/go-git/gcfg v1.5.0
 github.com/go-git/gcfg
@@ -614,8 +614,6 @@ github.com/syndtr/goleveldb/leveldb/util
 # github.com/tinylib/msgp v1.1.2
 ## explicit
 github.com/tinylib/msgp/msgp
 # github.com/toqueteos/trie v1.0.0
 github.com/toqueteos/trie
 # github.com/toqueteos/webbrowser v1.2.0
 github.com/toqueteos/webbrowser
 # github.com/tstranex/u2f v1.0.0
@@ -836,8 +834,6 @@ gopkg.in/ldap.v3
 # gopkg.in/testfixtures.v2 v2.5.0
 ## explicit
 gopkg.in/testfixtures.v2
 # gopkg.in/toqueteos/substring.v1 v1.0.2
 gopkg.in/toqueteos/substring.v1
 # gopkg.in/warnings.v0 v0.1.2
 gopkg.in/warnings.v0
 # gopkg.in/yaml.v2 v2.2.8