[bugfix] html escape special characters in text instead of totally removing them (#719)

* remove minify dependency

* tidy up some tests

* remove pre + postformat funcs

* rework sanitization + formatting

* update tests

* add some more markdown tests
This commit is contained in:
tobi 2022-07-19 15:21:17 +02:00 committed by GitHub
parent 098dbe6ff4
commit c84384e660
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
51 changed files with 129 additions and 7419 deletions

View file

@ -244,7 +244,6 @@ The following libraries and frameworks are used by GoToSocial, with gratitude
- [superseriousbusiness/activity](https://github.com/superseriousbusiness/activity) forked from [go-fed/activity](https://github.com/go-fed/activity); Golang ActivityPub/ActivityStreams library. [BSD-3-Clause License](https://spdx.org/licenses/BSD-3-Clause.html).
- [superseriousbusiness/oauth2](https://github.com/superseriousbusiness/oauth2) forked from [go-oauth2/oauth2](https://github.com/go-oauth2/oauth2); oauth server framework and token handling. [MIT License](https://spdx.org/licenses/MIT.html).
- [go-swagger/go-swagger](https://github.com/go-swagger/go-swagger); Swagger OpenAPI spec generation. [Apache-2.0 License](https://spdx.org/licenses/Apache-2.0.html).
- [tdewolff/minify](https://github.com/tdewolff/minify); HTML minification. [MIT License](https://spdx.org/licenses/MIT.html).
- [uptrace/bun](https://github.com/uptrace/bun); database ORM. [BSD-2-Clause License](https://spdx.org/licenses/BSD-2-Clause.html).
- [wagslane/go-password-validator](https://github.com/wagslane/go-password-validator); password strength validation. [MIT License](https://spdx.org/licenses/MIT.html).

2
go.mod
View file

@ -41,7 +41,6 @@ require (
github.com/superseriousbusiness/activity v1.1.0-gts
github.com/superseriousbusiness/exif-terminator v0.3.0
github.com/superseriousbusiness/oauth2/v4 v4.3.2-SSB
github.com/tdewolff/minify/v2 v2.11.2
github.com/uptrace/bun v1.1.3
github.com/uptrace/bun/dialect/pgdialect v1.1.3
github.com/uptrace/bun/dialect/sqlitedialect v1.1.3
@ -121,7 +120,6 @@ require (
github.com/spf13/pflag v1.0.5 // indirect
github.com/subosito/gotenv v1.2.0 // indirect
github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe // indirect
github.com/tdewolff/parse/v2 v2.5.29 // indirect
github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc // indirect
github.com/ugorji/go/codec v1.2.7 // indirect
github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect

10
go.sum
View file

@ -101,7 +101,6 @@ github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd3
github.com/buckket/go-blurhash v1.1.0 h1:X5M6r0LIvwdvKiUtiNcRL2YlmOfMzYobI3VCKCZc9Do=
github.com/buckket/go-blurhash v1.1.0/go.mod h1:aT2iqo5W9vu9GpyoLErKfTHwgODsZp3bQfXjXJUxNb8=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cheekybits/is v0.0.0-20150225183255-68e9c0620927/go.mod h1:h/aW8ynjgkuj+NQRlZcDbAbM1ORAbXjXX77sX7T289U=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
@ -121,7 +120,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/djherbis/atime v1.1.0/go.mod h1:28OF6Y8s3NQWwacXc5eZTsEsiMzp7LF8MbXE+XJPdBE=
github.com/dsoprea/go-exif/v2 v2.0.0-20200321225314-640175a69fe4/go.mod h1:Lm2lMM2zx8p4a34ZemkaUV95AnMl4ZvLbCUbwOvLC2E=
github.com/dsoprea/go-exif/v3 v3.0.0-20200717053412-08f1b6708903/go.mod h1:0nsO1ce0mh5czxGeLo4+OCZ/C6Eo6ZlMWsz7rH/Gxv8=
github.com/dsoprea/go-exif/v3 v3.0.0-20210428042052-dca55bf8ca15/go.mod h1:cg5SNYKHMmzxsr9X6ZeLh/nfBRHHp5PngtEPcujONtk=
@ -154,7 +152,6 @@ github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga
github.com/felixge/httpsnoop v1.0.2/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.5.3/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
github.com/gavv/httpexpect v2.0.0+incompatible h1:1X9kcRshkSKEjNJJxX9Y9mQ5BRfbxU5kORdjhlA1yX8=
@ -398,7 +395,6 @@ github.com/lib/pq v1.10.2/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lib/pq v1.10.3 h1:v9QZf2Sn6AmjXtQeFpdoq/eaNtYP6IN+7lcrygsIAtg=
github.com/magiconair/properties v1.8.6 h1:5ibWZ6iY0NctNGWo87LalDlEZ6R41TqbbDamhfG/Qzo=
github.com/magiconair/properties v1.8.6/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2/go.mod h1:0KeJpeMD6o+O4hW7qJOT7vyQPKrWmj26uf5wMc/IiIs=
github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
@ -518,12 +514,6 @@ github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430
github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe/go.mod h1:gH4P6gN1V+wmIw5o97KGaa1RgXB/tVpC2UNzijhg3E4=
github.com/superseriousbusiness/oauth2/v4 v4.3.2-SSB h1:PtW2w6budTvRV2J5QAoSvThTHBuvh8t/+BXIZFAaBSc=
github.com/superseriousbusiness/oauth2/v4 v4.3.2-SSB/go.mod h1:uYC/W92oVRJ49Vh1GcvTqpeFqHi+Ovrl2sMllQWRAEo=
github.com/tdewolff/minify/v2 v2.11.2 h1:PpaPWhNlMVjkAKaOj0bbPv6KCVnrm8jbVwG7OtSdAqw=
github.com/tdewolff/minify/v2 v2.11.2/go.mod h1:NxozhBtgUVypPLzQdV96wkIu9J9vAiVmBcKhfC2zMfg=
github.com/tdewolff/parse/v2 v2.5.29 h1:Uf0OtZL9YaUXTuHEOitdo9lD90P0XTwCjZi+KbGChuM=
github.com/tdewolff/parse/v2 v2.5.29/go.mod h1:WzaJpRSbwq++EIQHYIRTpbYKNA3gn9it1Ik++q4zyho=
github.com/tdewolff/test v1.0.6 h1:76mzYJQ83Op284kMT+63iCNCI7NEERsIN8dLM+RiKr4=
github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=
github.com/tidwall/btree v0.0.0-20191029221954-400434d76274 h1:G6Z6HvJuPjG6XfNGi/feOATzeJrfgTNJY+rGrHbA04E=
github.com/tidwall/btree v0.0.0-20191029221954-400434d76274/go.mod h1:huei1BkDWJ3/sLXmO+bsCNELL+Bp2Kks9OLyQFkzvA8=
github.com/tidwall/buntdb v1.1.2 h1:noCrqQXL9EKMtcdwJcmuVKSEjqu1ua99RHHgbLTEHRo=

View file

@ -178,7 +178,7 @@ func (suite *StatusCreateTestSuite) TestPostAnotherNewStatus() {
err = json.Unmarshal(b, statusReply)
suite.NoError(err)
suite.Equal("<p><a href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>test</span></a> alright, should be able to post <a href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>links</span></a> with fragments in them now, let's see........<br><br><a href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"noopener nofollow noreferrer\" target=\"_blank\">docs.gotosocial.org/en/latest/user_guide/posts/#links</a><br><br><a href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>gotosocial</span></a><br><br>(tobi remember to pull the docker image challenge)</p>", statusReply.Content)
suite.Equal("<p><a href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>test</span></a> alright, should be able to post <a href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>links</span></a> with fragments in them now, let&#39;s see........<br/><br/><a href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"noopener nofollow noreferrer\" target=\"_blank\">docs.gotosocial.org/en/latest/user_guide/posts/#links</a><br/><br/><a href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>gotosocial</span></a><br/><br/>(tobi remember to pull the docker image challenge)</p>", statusReply.Content)
}
func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() {
@ -211,7 +211,7 @@ func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() {
suite.NoError(err)
suite.Equal("", statusReply.SpoilerText)
suite.Equal("<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:<br>here's an emoji that isn't in the db: :test_emoji:</p>", statusReply.Content)
suite.Equal("<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow: <br/> here&#39;s an emoji that isn&#39;t in the db: :test_emoji:</p>", statusReply.Content)
suite.Len(statusReply.Emojis, 1)
apiEmoji := statusReply.Emojis[0]
@ -330,7 +330,7 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() {
suite.NoError(err)
suite.Equal("", statusResponse.SpoilerText)
suite.Equal("<p>here's an image attachment</p>", statusResponse.Content)
suite.Equal("<p>here&#39;s an image attachment</p>", statusResponse.Content)
suite.False(statusResponse.Sensitive)
suite.Equal(model.VisibilityPublic, statusResponse.Visibility)

View file

@ -73,13 +73,12 @@ func (suite *AccountUpdateTestSuite) TestAccountUpdateSimple() {
func (suite *AccountUpdateTestSuite) TestAccountUpdateWithMention() {
testAccount := suite.testAccounts["local_account_1"]
locked := true
displayName := "new display name"
note := `#hello here i am!
go check out @1happyturtle, they have a cool account!
`
noteExpected := `<p><a href="http://localhost:8080/tags/hello" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>hello</span></a> here i am!<br><br>go check out <span class="h-card"><a href="http://localhost:8080/@1happyturtle" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>1happyturtle</span></a></span>, they have a cool account!</p>`
var (
locked = true
displayName = "new display name"
note = "#hello here i am!\n\ngo check out @1happyturtle, they have a cool account!\n"
noteExpected = "<p><a href=\"http://localhost:8080/tags/hello\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hello</span></a> here i am!<br/><br/>go check out <span class=\"h-card\"><a href=\"http://localhost:8080/@1happyturtle\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>1happyturtle</span></a></span>, they have a cool account!</p>"
)
form := &apimodel.UpdateCredentialsRequest{
DisplayName: &displayName,

View file

@ -23,31 +23,20 @@ import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
const statusText1 = `Another test @foss_satan@fossbros-anonymous.io
#Hashtag
Text`
const (
statusText1ExpectedFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text</p>"
statusText1ExpectedPartial = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br>#Hashtag<br><br>Text</p>"
statusText1 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
statusText1ExpectedFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/>Text</p>"
statusText1ExpectedPartial = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/>#Hashtag<br/><br/>Text</p>"
statusText2 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\n#hashTAG"
status2TextExpectedFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashTAG</span></a></p>"
status2TextExpectedPartial = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/>#Hashtag<br/><br/>#hashTAG</p>"
)
const statusText2 = `Another test @foss_satan@fossbros-anonymous.io
#Hashtag
#hashTAG`
const status2TextExpectedFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashTAG</span></a></p>"
type UtilTestSuite struct {
StatusStandardTestSuite
}
@ -82,21 +71,21 @@ func (suite *UtilTestSuite) TestProcessMentions1() {
}
err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
suite.NoError(err)
assert.Len(suite.T(), status.Mentions, 1)
suite.Len(status.Mentions, 1)
newMention := status.Mentions[0]
assert.Equal(suite.T(), mentionedAccount.ID, newMention.TargetAccountID)
assert.Equal(suite.T(), creatingAccount.ID, newMention.OriginAccountID)
assert.Equal(suite.T(), creatingAccount.URI, newMention.OriginAccountURI)
assert.Equal(suite.T(), status.ID, newMention.StatusID)
assert.Equal(suite.T(), fmt.Sprintf("@%s@%s", mentionedAccount.Username, mentionedAccount.Domain), newMention.NameString)
assert.Equal(suite.T(), mentionedAccount.URI, newMention.TargetAccountURI)
assert.Equal(suite.T(), mentionedAccount.URL, newMention.TargetAccountURL)
assert.NotNil(suite.T(), newMention.OriginAccount)
suite.Equal(mentionedAccount.ID, newMention.TargetAccountID)
suite.Equal(creatingAccount.ID, newMention.OriginAccountID)
suite.Equal(creatingAccount.URI, newMention.OriginAccountURI)
suite.Equal(status.ID, newMention.StatusID)
suite.Equal(fmt.Sprintf("@%s@%s", mentionedAccount.Username, mentionedAccount.Domain), newMention.NameString)
suite.Equal(mentionedAccount.URI, newMention.TargetAccountURI)
suite.Equal(mentionedAccount.URL, newMention.TargetAccountURL)
suite.NotNil(newMention.OriginAccount)
assert.Len(suite.T(), status.MentionIDs, 1)
assert.Equal(suite.T(), newMention.ID, status.MentionIDs[0])
suite.Len(status.MentionIDs, 1)
suite.Equal(newMention.ID, status.MentionIDs[0])
}
func (suite *UtilTestSuite) TestProcessContentFull1() {
@ -131,20 +120,20 @@ func (suite *UtilTestSuite) TestProcessContentFull1() {
}
err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
assert.Empty(suite.T(), status.Content) // shouldn't be set yet
suite.NoError(err)
suite.Empty(status.Content) // shouldn't be set yet
err = suite.status.ProcessTags(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
assert.Empty(suite.T(), status.Content) // shouldn't be set yet
suite.NoError(err)
suite.Empty(status.Content) // shouldn't be set yet
/*
ACTUAL TEST
*/
err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
assert.Equal(suite.T(), statusText1ExpectedFull, status.Content)
suite.NoError(err)
suite.Equal(statusText1ExpectedFull, status.Content)
}
func (suite *UtilTestSuite) TestProcessContentPartial1() {
@ -179,16 +168,16 @@ func (suite *UtilTestSuite) TestProcessContentPartial1() {
}
err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
assert.Empty(suite.T(), status.Content) // shouldn't be set yet
suite.NoError(err)
suite.Empty(status.Content) // shouldn't be set yet
/*
ACTUAL TEST
*/
err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
assert.Equal(suite.T(), statusText1ExpectedPartial, status.Content)
suite.NoError(err)
suite.Equal(statusText1ExpectedPartial, status.Content)
}
func (suite *UtilTestSuite) TestProcessMentions2() {
@ -221,21 +210,21 @@ func (suite *UtilTestSuite) TestProcessMentions2() {
}
err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
suite.NoError(err)
assert.Len(suite.T(), status.Mentions, 1)
suite.Len(status.Mentions, 1)
newMention := status.Mentions[0]
assert.Equal(suite.T(), mentionedAccount.ID, newMention.TargetAccountID)
assert.Equal(suite.T(), creatingAccount.ID, newMention.OriginAccountID)
assert.Equal(suite.T(), creatingAccount.URI, newMention.OriginAccountURI)
assert.Equal(suite.T(), status.ID, newMention.StatusID)
assert.Equal(suite.T(), fmt.Sprintf("@%s@%s", mentionedAccount.Username, mentionedAccount.Domain), newMention.NameString)
assert.Equal(suite.T(), mentionedAccount.URI, newMention.TargetAccountURI)
assert.Equal(suite.T(), mentionedAccount.URL, newMention.TargetAccountURL)
assert.NotNil(suite.T(), newMention.OriginAccount)
suite.Equal(mentionedAccount.ID, newMention.TargetAccountID)
suite.Equal(creatingAccount.ID, newMention.OriginAccountID)
suite.Equal(creatingAccount.URI, newMention.OriginAccountURI)
suite.Equal(status.ID, newMention.StatusID)
suite.Equal(fmt.Sprintf("@%s@%s", mentionedAccount.Username, mentionedAccount.Domain), newMention.NameString)
suite.Equal(mentionedAccount.URI, newMention.TargetAccountURI)
suite.Equal(mentionedAccount.URL, newMention.TargetAccountURL)
suite.NotNil(newMention.OriginAccount)
assert.Len(suite.T(), status.MentionIDs, 1)
assert.Equal(suite.T(), newMention.ID, status.MentionIDs[0])
suite.Len(status.MentionIDs, 1)
suite.Equal(newMention.ID, status.MentionIDs[0])
}
func (suite *UtilTestSuite) TestProcessContentFull2() {
@ -270,21 +259,21 @@ func (suite *UtilTestSuite) TestProcessContentFull2() {
}
err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
assert.Empty(suite.T(), status.Content) // shouldn't be set yet
suite.NoError(err)
suite.Empty(status.Content) // shouldn't be set yet
err = suite.status.ProcessTags(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
assert.Empty(suite.T(), status.Content) // shouldn't be set yet
suite.NoError(err)
suite.Empty(status.Content) // shouldn't be set yet
/*
ACTUAL TEST
*/
err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
suite.NoError(err)
assert.Equal(suite.T(), status2TextExpectedFull, status.Content)
suite.Equal(status2TextExpectedFull, status.Content)
}
func (suite *UtilTestSuite) TestProcessContentPartial2() {
@ -319,18 +308,13 @@ func (suite *UtilTestSuite) TestProcessContentPartial2() {
}
err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
assert.Empty(suite.T(), status.Content) // shouldn't be set yet
/*
ACTUAL TEST
*/
suite.NoError(err)
suite.Empty(status.Content)
err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
assert.NoError(suite.T(), err)
suite.NoError(err)
fmt.Println(status.Content)
// assert.Equal(suite.T(), statusText2ExpectedPartial, status.Content)
suite.Equal(status2TextExpectedPartial, status.Content)
}
func TestUtilTestSuite(t *testing.T) {

View file

@ -21,7 +21,6 @@ package text
import (
"bytes"
"context"
"html"
"strings"
"unicode"
@ -30,38 +29,6 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/regexes"
)
// preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text.
func preformat(in string) string {
// do some preformatting of the text
// 1. unescape everything that might be html escaped
s := html.UnescapeString(in)
// 2. trim leading or trailing whitespace
s = strings.TrimSpace(s)
return s
}
// postformat contains some common logic for html sanitization of text, wrapping elements, and trimming newlines and whitespace
func postformat(in string) string {
// do some postformatting of the text
// 1. sanitize html to remove potentially dangerous elements
s := SanitizeHTML(in)
// 2. the sanitize step tends to escape characters inside codeblocks, which is behavior we don't want, so unescape everything again
s = html.UnescapeString(s)
// 3. minify html to remove any trailing newlines, spaces, unnecessary elements, etc etc
mini, err := MinifyHTML(s)
if err != nil {
// if the minify failed, just return what we have
return s
}
// return minified version of the html
return mini
}
func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string {
return regexes.ReplaceAllStringFunc(regexes.HashtagFinder, in, func(match string, buf *bytes.Buffer) string {
// we have a match

View file

@ -28,44 +28,14 @@ import (
)
const (
replaceMentionsString = `Another test @foss_satan@fossbros-anonymous.io
#Hashtag
Text`
replaceMentionsExpected = `Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention">@<span>foss_satan</span></a></span>
#Hashtag
Text`
replaceHashtagsExpected = `Another test @foss_satan@fossbros-anonymous.io
<a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag">#<span>Hashtag</span></a>
Text`
replaceHashtagsAfterMentionsExpected = `Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention">@<span>foss_satan</span></a></span>
<a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag">#<span>Hashtag</span></a>
Text`
replaceMentionsWithLinkString = `Another test @foss_satan@fossbros-anonymous.io
http://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060`
replaceMentionsWithLinkStringExpected = `Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention">@<span>foss_satan</span></a></span>
http://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060`
replaceMentionsWithLinkSelfString = `Mentioning myself: @the_mighty_zork
and linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR`
replaceMemtionsWithLinkSelfExpected = `Mentioning myself: <span class="h-card"><a href="http://localhost:8080/@the_mighty_zork" class="u-url mention">@<span>the_mighty_zork</span></a></span>
and linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR`
replaceMentionsString = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
replaceMentionsExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n#Hashtag\n\nText"
replaceHashtagsExpected = "Another test @foss_satan@fossbros-anonymous.io\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText"
replaceHashtagsAfterMentionsExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText"
replaceMentionsWithLinkString = "Another test @foss_satan@fossbros-anonymous.io\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060"
replaceMentionsWithLinkStringExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060"
replaceMentionsWithLinkSelfString = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR"
replaceMemtionsWithLinkSelfExpected = "Mentioning myself: <span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\">@<span>the_mighty_zork</span></a></span>\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR"
)
type CommonTestSuite struct {

View file

@ -71,16 +71,16 @@ type LinkTestSuite struct {
func (suite *LinkTestSuite) TestParseSimple() {
f := suite.formatter.FromPlain(context.Background(), simple, nil, nil)
assert.Equal(suite.T(), simpleExpected, f)
suite.Equal(simpleExpected, f)
}
func (suite *LinkTestSuite) TestParseURLsFromText1() {
urls := text.FindLinks(text1)
assert.Equal(suite.T(), "https://example.org/link/to/something#fragment", urls[0].String())
assert.Equal(suite.T(), "http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String())
assert.Equal(suite.T(), "https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String())
assert.Equal(suite.T(), "https://example.orghttps://google.com", urls[3].String())
suite.Equal("https://example.org/link/to/something#fragment", urls[0].String())
suite.Equal("http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String())
suite.Equal("https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String())
suite.Equal("https://example.orghttps://google.com", urls[3].String())
}
func (suite *LinkTestSuite) TestParseURLsFromText2() {
@ -99,7 +99,7 @@ func (suite *LinkTestSuite) TestParseURLsFromText3() {
func (suite *LinkTestSuite) TestReplaceLinksFromText1() {
replaced := suite.formatter.ReplaceLinks(context.Background(), text1)
assert.Equal(suite.T(), `
suite.Equal(`
This is a text with some links in it. Here's link number one: <a href="https://example.org/link/to/something#fragment" rel="noopener">example.org/link/to/something#fragment</a>
Here's link number two: <a href="http://test.example.org?q=bahhhhhhhhhhhh" rel="noopener">test.example.org?q=bahhhhhhhhhhhh</a>
@ -114,7 +114,7 @@ really.cool.website <-- this one shouldn't be parsed as a link because it doesn'
func (suite *LinkTestSuite) TestReplaceLinksFromText2() {
replaced := suite.formatter.ReplaceLinks(context.Background(), text2)
assert.Equal(suite.T(), `
suite.Equal(`
this is one link: <a href="https://example.org" rel="noopener">example.org</a>
this is the same link again: <a href="https://example.org" rel="noopener">example.org</a>
@ -126,14 +126,14 @@ these should be deduplicated
func (suite *LinkTestSuite) TestReplaceLinksFromText3() {
// we know mailto links won't be replaced with hrefs -- we only accept https and http
replaced := suite.formatter.ReplaceLinks(context.Background(), text3)
assert.Equal(suite.T(), `
suite.Equal(`
here's a mailto link: mailto:whatever@test.org
`, replaced)
}
func (suite *LinkTestSuite) TestReplaceLinksFromText4() {
replaced := suite.formatter.ReplaceLinks(context.Background(), text4)
assert.Equal(suite.T(), `
suite.Equal(`
two similar links:
<a href="https://example.org" rel="noopener">example.org</a>
@ -145,7 +145,7 @@ two similar links:
func (suite *LinkTestSuite) TestReplaceLinksFromText5() {
// we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function
replaced := suite.formatter.ReplaceLinks(context.Background(), text5)
assert.Equal(suite.T(), `
suite.Equal(`
what happens when we already have a link within an href?
<a href="<a href="https://example.org" rel="noopener">example.org</a>"><a href="https://example.org" rel="noopener">example.org</a></a>

View file

@ -26,13 +26,11 @@ import (
)
func (f *formatter) FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string {
content := preformat(md)
// do the markdown parsing *first*
contentBytes := blackfriday.Run([]byte(content))
contentBytes := blackfriday.Run([]byte(md))
// format tags nicely
content = f.ReplaceTags(ctx, string(contentBytes), tags)
content := f.ReplaceTags(ctx, string(contentBytes), tags)
// format mentions nicely
content = f.ReplaceMentions(ctx, content, mentions)

View file

@ -53,6 +53,10 @@ const (
withInlineCode2Expected = "<p><code>Nobody tells you about the &lt;/code&gt;&lt;del&gt;SECRET CODE&lt;/del&gt;&lt;code&gt;, do they?</code></p>\n"
withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!"
withHashtagExpected = "<h1>Title</h1>\n\n<p>heres a simple status that uses hashtag <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a>!</p>\n"
mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a <a href=\"https://example.org\">link</a>.\n\nHere's an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\">"
mdWithHTMLExpected = "<h1>Title</h1>\n\n<p>Heres a simple text in markdown.</p>\n\n<p>Heres a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p>\n\n<p>Heres an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\" crossorigin=\"anonymous\"></p>\n"
mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: <script>alert(ahhhh)</script>"
mdWithCheekyHTMLExpected = "<h1>Title</h1>\n\n<p>Heres a simple text in markdown.</p>\n\n<p>Heres a cheeky little script: </p>\n"
)
type MarkdownTestSuite struct {
@ -88,6 +92,16 @@ func (suite *MarkdownTestSuite) TestParseWithHashtag() {
suite.Equal(withHashtagExpected, s)
}
func (suite *MarkdownTestSuite) TestParseWithHTML() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil)
suite.Equal(mdWithHTMLExpected, s)
}
func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil)
suite.Equal(mdWithCheekyHTMLExpected, s)
}
func TestMarkdownTestSuite(t *testing.T) {
suite.Run(t, new(MarkdownTestSuite))
}

View file

@ -1,39 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/html"
)
var m *minify.M
// MinifyHTML runs html through a minifier, reducing it in size.
func MinifyHTML(in string) (string, error) {
if m == nil {
m = minify.New()
m.Add("text/html", &html.Minifier{
KeepQuotes: true,
KeepEndTags: true,
KeepDocumentTags: true,
})
}
return m.String("text/html", in)
}

View file

@ -20,6 +20,7 @@ package text
import (
"context"
"html"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
@ -32,10 +33,11 @@ var breakReplacer = strings.NewReplacer(
)
func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string {
content := preformat(plain)
// trim any crap
content := strings.TrimSpace(plain)
// sanitize any html elements
content = removeHTML(content)
// clean 'er up
content = html.EscapeString(content)
// format links nicely
content = f.ReplaceLinks(ctx, content)
@ -52,5 +54,5 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts
// wrap the whole thing in a pee
content = `<p>` + content + `</p>`
return postformat(content)
return SanitizeHTML(content)
}

View file

@ -20,10 +20,8 @@ package text_test
import (
"context"
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
@ -31,16 +29,12 @@ import (
const (
simple = "this is a plain and simple status"
simpleExpected = "<p>this is a plain and simple status</p>"
withTag = "here's a simple status that uses hashtag #welcome!"
withTagExpected = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>"
moreComplex = `Another test @foss_satan@fossbros-anonymous.io
#Hashtag
Text`
moreComplexFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text</p>"
withTagExpected = "<p>here&#39;s a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>"
withHTML = "<div>blah this should just be html escaped blah</div>"
withHTMLExpected = "<p>&lt;div&gt;blah this should just be html escaped blah&lt;/div&gt;</p>"
moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
moreComplexFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/>Text</p>"
)
type PlainTestSuite struct {
@ -49,7 +43,7 @@ type PlainTestSuite struct {
func (suite *PlainTestSuite) TestParseSimple() {
f := suite.formatter.FromPlain(context.Background(), simple, nil, nil)
assert.Equal(suite.T(), simpleExpected, f)
suite.Equal(simpleExpected, f)
}
func (suite *PlainTestSuite) TestParseWithTag() {
@ -58,7 +52,12 @@ func (suite *PlainTestSuite) TestParseWithTag() {
}
f := suite.formatter.FromPlain(context.Background(), withTag, nil, foundTags)
assert.Equal(suite.T(), withTagExpected, f)
suite.Equal(withTagExpected, f)
}
func (suite *PlainTestSuite) TestParseWithHTML() {
f := suite.formatter.FromPlain(context.Background(), withHTML, nil, nil)
suite.Equal(withHTMLExpected, f)
}
func (suite *PlainTestSuite) TestParseMoreComplex() {
@ -71,10 +70,7 @@ func (suite *PlainTestSuite) TestParseMoreComplex() {
}
f := suite.formatter.FromPlain(context.Background(), moreComplex, foundMentions, foundTags)
fmt.Println(f)
assert.Equal(suite.T(), moreComplexFull, f)
suite.Equal(moreComplexFull, f)
}
func TestPlainTestSuite(t *testing.T) {

View file

@ -19,7 +19,9 @@
package text
import (
"html"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday"
)
@ -59,7 +61,8 @@ func SanitizeHTML(in string) string {
// SanitizePlaintext runs text through basic sanitization. This removes
// any html elements that were in the string, and returns clean plaintext.
func SanitizePlaintext(in string) string {
content := preformat(in)
content := html.UnescapeString(in)
content = removeHTML(content)
return postformat(content)
content = html.UnescapeString(content)
return strings.TrimSpace(content)
}

View file

@ -1,2 +0,0 @@
benchmarks/sample_* linguist-generated
tests/*/corpus/* linguist-generated

View file

@ -1,24 +0,0 @@
release.sh
dist/
benchmarks/*
!benchmarks/*.go
!benchmarks/sample_*
tests/*/fuzz-fuzz.zip
tests/*/crashers
tests/*/suppressions
tests/*/corpus/*
!tests/*/corpus/*.*
parse/tests/*/fuzz-fuzz.zip
parse/tests/*/crashers
parse/tests/*/suppressions
parse/tests/*/corpus/*
!parse/tests/*/corpus/*.*
bindings/js/build
bindings/js/minify.h
bindings/js/minify.a
bindings/js/test.min.html
bindings/js/node_modules
bindings/py/minify.h
bindings/py/minify.so
bindings/py/test.min.html
bindings/py/tdewolff_minify.egg-info

View file

@ -1,16 +0,0 @@
linters:
enable:
- depguard
- dogsled
- gofmt
- goimports
- golint
- gosec
- govet
- megacheck
- misspell
- nakedret
- prealloc
- unconvert
- unparam
- wastedassign

View file

@ -1,14 +0,0 @@
# Use this image to build the executable
FROM golang:1.16-alpine AS compiler
RUN apk add --no-cache git ca-certificates make
WORKDIR $GOPATH/src/minify
COPY . .
RUN /usr/bin/env bash -c make install
# Final image containing the executable from the previous step
FROM alpine:3
COPY --from=compiler /bin/minify /bin/minify

View file

@ -1,22 +0,0 @@
Copyright (c) 2015 Taco de Wolff
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,58 +0,0 @@
SHELL=/usr/bin/env bash
NAME=minify
CMD=./cmd/minify
TARGETS=linux_amd64 darwin_amd64 freebsd_amd64 netbsd_amd64 openbsd_amd64 windows_amd64
VERSION=`git describe --tags`
FLAGS=-ldflags "-s -w -X 'main.Version=${VERSION}'" -trimpath
ENVS=GO111MODULES=on CGO_ENABLED=0
all: install
install:
echo "Installing ${VERSION}"
${ENVS} go install ${FLAGS} ./cmd/minify
. cmd/minify/bash_completion
release:
TAG=$(shell git describe --tags --exact-match 2> /dev/null);
if [ "${.SHELLSTATUS}" -eq 0 ]; then \
echo "Releasing ${VERSION}"; \
else \
echo "ERROR: commit is not tagged with a version"; \
echo ""; \
exit 1; \
fi
rm -rf dist
mkdir -p dist
for t in ${TARGETS}; do \
echo Building $$t...; \
mkdir dist/$$t; \
os=$$(echo $$t | cut -f1 -d_); \
arch=$$(echo $$t | cut -f2 -d_); \
${ENVS} GOOS=$$os GOARCH=$$arch go build ${FLAGS} -o dist/$$t/${NAME} ${CMD}; \
\
cp LICENSE dist/$$t/.; \
cp cmd/minify/README.md dist/$$t/.; \
if [ "$$os" == "windows" ]; then \
mv dist/$$t/${NAME} dist/$$t/${NAME}.exe; \
zip -jq dist/${NAME}_$$t.zip dist/$$t/*; \
cd dist; \
sha256sum ${NAME}_$$t.zip >> checksums.txt; \
cd ..; \
else \
cp cmd/minify/bash_completion dist/$$t/.; \
cd dist/$$t; \
tar -cf - * | gzip -9 > ../${NAME}_$$t.tar.gz; \
cd ..; \
sha256sum ${NAME}_$$t.tar.gz >> checksums.txt; \
cd ..; \
fi; \
rm -rf dist/$$t; \
done
clean:
echo "Cleaning dist/"
rm -rf dist
.PHONY: install release clean
.SILENT: install release clean

View file

@ -1,724 +0,0 @@
# Minify <a name="minify"></a> [![API reference](https://img.shields.io/badge/godoc-reference-5272B4)](https://pkg.go.dev/github.com/tdewolff/minify/v2?tab=doc) [![Go Report Card](https://goreportcard.com/badge/github.com/tdewolff/minify)](https://goreportcard.com/report/github.com/tdewolff/minify) [![codecov](https://codecov.io/gh/tdewolff/minify/branch/master/graph/badge.svg?token=Cr7r2EKPj2)](https://codecov.io/gh/tdewolff/minify) [![Donate](https://img.shields.io/badge/patreon-donate-DFB317)](https://www.patreon.com/tdewolff)
**[Online demo](https://go.tacodewolff.nl/minify)** if you need to minify files *now*.
**[Binaries](https://github.com/tdewolff/minify/releases) of CLI for various platforms.** See [CLI](https://github.com/tdewolff/minify/tree/master/cmd/minify) for more installation instructions.
**[Python bindings](https://pypi.org/project/tdewolff-minify/)** install with `pip install tdewolff-minify`
**[JavaScript bindings](https://www.npmjs.com/package/tdewolff-minify)**
---
*Did you know that the shortest valid piece of HTML5 is `<!doctype html><title>x</title>`? See for yourself at the [W3C Validator](http://validator.w3.org/)!*
Minify is a minifier package written in [Go][1]. It provides HTML5, CSS3, JS, JSON, SVG and XML minifiers and an interface to implement any other minifier. Minification is the process of removing bytes from a file (such as whitespace) without changing its output and therefore shrinking its size and speeding up transmission over the internet and possibly parsing. The implemented minifiers are designed for high performance.
The core functionality associates mimetypes with minification functions, allowing embedded resources (like CSS or JS within HTML files) to be minified as well. Users can add new implementations that are triggered based on a mimetype (or pattern), or redirect to an external command (like ClosureCompiler, UglifyCSS, ...).
### Sponsors
[![SiteGround](https://www.siteground.com/img/downloads/siteground-logo-black-transparent-vector.svg)](https://www.siteground.com/)
Please see https://www.patreon.com/tdewolff for ways to contribute, otherwise please contact me directly!
#### Table of Contents
- [Minify](#minify)
- [Prologue](#prologue)
- [Installation](#installation)
- [API stability](#api-stability)
- [Testing](#testing)
- [Performance](#performance)
- [HTML](#html)
- [Whitespace removal](#whitespace-removal)
- [CSS](#css)
- [JS](#js)
- [Comparison with other tools](#comparison-with-other-tools)
- [Compression ratio (lower is better)](#compression-ratio-lower-is-better)
- [Time (lower is better)](#time-lower-is-better)
- [JSON](#json)
- [SVG](#svg)
- [XML](#xml)
- [Usage](#usage)
- [New](#new)
- [From reader](#from-reader)
- [From bytes](#from-bytes)
- [From string](#from-string)
- [To reader](#to-reader)
- [To writer](#to-writer)
- [Middleware](#middleware)
- [Custom minifier](#custom-minifier)
- [Mediatypes](#mediatypes)
- [Examples](#examples)
- [Common minifiers](#common-minifiers)
- [External minifiers](#external-minifiers)
- [Closure Compiler](#closure-compiler)
- [UglifyJS](#uglifyjs)
- [esbuild](#esbuild)
- [Custom minifier](#custom-minifier-example)
- [ResponseWriter](#responsewriter)
- [Templates](#templates)
- [License](#license)
### Roadmap
- [ ] Use ASM/SSE to further speed-up core parts of the parsers/minifiers
- [x] Improve JS minifiers by shortening variables and proper semicolon omission
- [ ] Speed-up SVG minifier, it is very slow
- [x] Proper parser error reporting and line number + column information
- [ ] Generation of source maps (uncertain, might slow down parsers too much if it cannot run separately nicely)
- [ ] Create a cmd to pack webfiles (much like webpack), ie. merging CSS and JS files, inlining small external files, minification and gzipping. This would work on HTML files.
## Prologue
Minifiers or bindings to minifiers exist in almost all programming languages. Some implementations are merely using several regular expressions to trim whitespace and comments (even though regex for parsing HTML/XML is ill-advised, for a good read see [Regular Expressions: Now You Have Two Problems](http://blog.codinghorror.com/regular-expressions-now-you-have-two-problems/)). Some implementations are much more profound, such as the [YUI Compressor](http://yui.github.io/yuicompressor/) and [Google Closure Compiler](https://github.com/google/closure-compiler) for JS. As most existing implementations either use JavaScript, use regexes, and don't focus on performance, they are pretty slow.
This minifier proves to be that fast and extensive minifier that can handle HTML and any other filetype it may contain (CSS, JS, ...). It is usually orders of magnitude faster than existing minifiers.
## Installation
Make sure you have [Git](https://git-scm.com/) and [Go](https://golang.org/dl/) (1.13 or higher) installed, run
```
mkdir Project
cd Project
go mod init
go get -u github.com/tdewolff/minify/v2
```
Then add the following imports to be able to use the various minifiers
``` go
import (
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/css"
"github.com/tdewolff/minify/v2/html"
"github.com/tdewolff/minify/v2/js"
"github.com/tdewolff/minify/v2/json"
"github.com/tdewolff/minify/v2/svg"
"github.com/tdewolff/minify/v2/xml"
)
```
You can optionally run `go mod tidy` to clean up the `go.mod` and `go.sum` files.
See [CLI tool](https://github.com/tdewolff/minify/tree/master/cmd/minify) for installation instructions of the binary.
### Docker
If you want to use Docker, please see https://hub.docker.com/r/tdewolff/minify.
```bash
$ docker run -it tdewolff/minify
/ # minify --version
```
## API stability
There is no guarantee for absolute stability, but I take issues and bugs seriously and don't take API changes lightly. The library will be maintained in a compatible way unless vital bugs prevent me from doing so. There has been one API change after v1 which added options support and I took the opportunity to push through some more API clean up as well. There are no plans whatsoever for future API changes.
## Testing
For all subpackages and the imported `parse` package, test coverage of 100% is pursued. Besides full coverage, the minifiers are [fuzz tested](https://github.com/tdewolff/fuzz) using [github.com/dvyukov/go-fuzz](http://www.github.com/dvyukov/go-fuzz), see [the wiki](https://github.com/tdewolff/minify/wiki) for the most important bugs found by fuzz testing. These tests ensure that everything works as intended and that the code does not crash (whatever the input). If you still encounter a bug, please file a [bug report](https://github.com/tdewolff/minify/issues)!
## Performance
The benchmarks directory contains a number of standardized samples used to compare performance between changes. To give an indication of the speed of this library, I've ran the tests on my Thinkpad T460 (i5-6300U quad-core 2.4GHz running Arch Linux) using Go 1.15.
```
name time/op
CSS/sample_bootstrap.css-4 2.70ms ± 0%
CSS/sample_gumby.css-4 3.57ms ± 0%
CSS/sample_fontawesome.css-4 767µs ± 0%
CSS/sample_normalize.css-4 85.5µs ± 0%
HTML/sample_amazon.html-4 15.2ms ± 0%
HTML/sample_bbc.html-4 3.90ms ± 0%
HTML/sample_blogpost.html-4 420µs ± 0%
HTML/sample_es6.html-4 15.6ms ± 0%
HTML/sample_stackoverflow.html-4 3.73ms ± 0%
HTML/sample_wikipedia.html-4 6.60ms ± 0%
JS/sample_ace.js-4 28.7ms ± 0%
JS/sample_dot.js-4 357µs ± 0%
JS/sample_jquery.js-4 10.0ms ± 0%
JS/sample_jqueryui.js-4 20.4ms ± 0%
JS/sample_moment.js-4 3.47ms ± 0%
JSON/sample_large.json-4 3.25ms ± 0%
JSON/sample_testsuite.json-4 1.74ms ± 0%
JSON/sample_twitter.json-4 24.2µs ± 0%
SVG/sample_arctic.svg-4 34.7ms ± 0%
SVG/sample_gopher.svg-4 307µs ± 0%
SVG/sample_usa.svg-4 57.4ms ± 0%
SVG/sample_car.svg-4 18.0ms ± 0%
SVG/sample_tiger.svg-4 5.61ms ± 0%
XML/sample_books.xml-4 54.7µs ± 0%
XML/sample_catalog.xml-4 33.0µs ± 0%
XML/sample_omg.xml-4 7.17ms ± 0%
name speed
CSS/sample_bootstrap.css-4 50.7MB/s ± 0%
CSS/sample_gumby.css-4 52.1MB/s ± 0%
CSS/sample_fontawesome.css-4 61.2MB/s ± 0%
CSS/sample_normalize.css-4 70.8MB/s ± 0%
HTML/sample_amazon.html-4 31.1MB/s ± 0%
HTML/sample_bbc.html-4 29.5MB/s ± 0%
HTML/sample_blogpost.html-4 49.8MB/s ± 0%
HTML/sample_es6.html-4 65.6MB/s ± 0%
HTML/sample_stackoverflow.html-4 55.0MB/s ± 0%
HTML/sample_wikipedia.html-4 67.5MB/s ± 0%
JS/sample_ace.js-4 22.4MB/s ± 0%
JS/sample_dot.js-4 14.5MB/s ± 0%
JS/sample_jquery.js-4 24.8MB/s ± 0%
JS/sample_jqueryui.js-4 23.0MB/s ± 0%
JS/sample_moment.js-4 28.6MB/s ± 0%
JSON/sample_large.json-4 234MB/s ± 0%
JSON/sample_testsuite.json-4 394MB/s ± 0%
JSON/sample_twitter.json-4 63.0MB/s ± 0%
SVG/sample_arctic.svg-4 42.4MB/s ± 0%
SVG/sample_gopher.svg-4 19.0MB/s ± 0%
SVG/sample_usa.svg-4 17.8MB/s ± 0%
SVG/sample_car.svg-4 29.3MB/s ± 0%
SVG/sample_tiger.svg-4 12.2MB/s ± 0%
XML/sample_books.xml-4 81.0MB/s ± 0%
XML/sample_catalog.xml-4 58.6MB/s ± 0%
XML/sample_omg.xml-4 159MB/s ± 0%
```
## HTML
HTML (with JS and CSS) minification typically shaves off about 10%.
The HTML5 minifier uses these minifications:
- strip unnecessary whitespace and otherwise collapse it to one space (or newline if it originally contained a newline)
- strip superfluous quotes, or uses single/double quotes whichever requires fewer escapes
- strip default attribute values and attribute boolean values
- strip some empty attributes
- strip unrequired tags (`html`, `head`, `body`, ...)
- strip unrequired end tags (`tr`, `td`, `li`, ... and often `p`)
- strip default protocols (`http:`, `https:` and `javascript:`)
- strip all comments (including conditional comments, old IE versions are not supported anymore by Microsoft)
- shorten `doctype` and `meta` charset
- lowercase tags, attributes and some values to enhance gzip compression
Options:
- `KeepConditionalComments` preserve all IE conditional comments such as `<!--[if IE 6]><![endif]-->` and `<![if IE 6]><![endif]>`, see https://msdn.microsoft.com/en-us/library/ms537512(v=vs.85).aspx#syntax
- `KeepDefaultAttrVals` preserve default attribute values such as `<script type="application/javascript">`
- `KeepDocumentTags` preserve `html`, `head` and `body` tags
- `KeepEndTags` preserve all end tags
- `KeepQuotes` preserve quotes around attribute values
- `KeepWhitespace` preserve whitespace between inline tags but still collapse multiple whitespace characters into one
After recent benchmarking and profiling it became really fast and minifies pages in the 10ms range, making it viable for on-the-fly minification.
However, be careful when doing on-the-fly minification. Minification typically trims off 10% and does this at worst around about 20MB/s. This means users have to download slower than 2MB/s to make on-the-fly minification worthwhile. This may or may not apply in your situation. Rather use caching!
### Whitespace removal
The whitespace removal mechanism collapses all sequences of whitespace (spaces, newlines, tabs) to a single space. If the sequence contained a newline or carriage return it will collapse into a newline character instead. It trims all text parts (in between tags) depending on whether it was preceded by a space from a previous piece of text and whether it is followed up by a block element or an inline element. In the former case we can omit spaces while for inline elements whitespace has significance.
Make sure your HTML doesn't depend on whitespace between `block` elements that have been changed to `inline` or `inline-block` elements using CSS. Your layout *should not* depend on those whitespaces as the minifier will remove them. An example is a menu consisting of multiple `<li>` that have `display:inline-block` applied and have whitespace in between them. It is bad practise to rely on whitespace for element positioning anyways!
## CSS
Minification typically shaves off about 10%-15%. This CSS minifier will _not_ do structural changes to your stylesheets. Although this could result in smaller files, the complexity is quite high and the risk of breaking website is high too.
The CSS minifier will only use safe minifications:
- remove comments and unnecessary whitespace (but keep `/*! ... */` which usually contains the license)
- remove trailing semicolons
- optimize `margin`, `padding` and `border-width` number of sides
- shorten numbers by removing unnecessary `+` and zeros and rewriting with/without exponent
- remove dimension and percentage for zero values
- remove quotes for URLs
- remove quotes for font families and make lowercase
- rewrite hex colors to/from color names, or to three digit hex
- rewrite `rgb(`, `rgba(`, `hsl(` and `hsla(` colors to hex or name
- use four digit hex for alpha values (`transparent` &#8594; `#0000`)
- replace `normal` and `bold` by numbers for `font-weight` and `font`
- replace `none` &#8594; `0` for `border`, `background` and `outline`
- lowercase all identifiers except classes, IDs and URLs to enhance gzip compression
- shorten MS alpha function
- rewrite data URIs with base64 or ASCII whichever is shorter
- calls minifier for data URI mediatypes, thus you can compress embedded SVG files if you have that minifier attached
- shorten aggregate declarations such as `background` and `font`
It does purposely not use the following techniques:
- (partially) merge rulesets
- (partially) split rulesets
- collapse multiple declarations when main declaration is defined within a ruleset (don't put `font-weight` within an already existing `font`, too complex)
- remove overwritten properties in ruleset (this not always overwrites it, for example with `!important`)
- rewrite properties into one ruleset if possible (like `margin-top`, `margin-right`, `margin-bottom` and `margin-left` &#8594; `margin`)
- put nested ID selector at the front (`body > div#elem p` &#8594; `#elem p`)
- rewrite attribute selectors for IDs and classes (`div[id=a]` &#8594; `div#a`)
- put space after pseudo-selectors (IE6 is old, move on!)
There are a couple of comparison tables online, such as [CSS Minifier Comparison](http://www.codenothing.com/benchmarks/css-compressor-3.0/full.html), [CSS minifiers comparison](http://www.phpied.com/css-minifiers-comparison/) and [CleanCSS tests](http://goalsmashers.github.io/css-minification-benchmark/). Comparing speed between each, this minifier will usually be between 10x-300x faster than existing implementations, and even rank among the top for minification ratios. It falls short with the purposely not implemented and often unsafe techniques.
Options:
- `KeepCSS2` prohibits using CSS3 syntax (such as exponents in numbers, or `rgba(` &#8594; `rgb(`), might be incomplete
- `Precision` number of significant digits to preserve for numbers, `0` means no trimming
## JS
The JS minifier typically shaves off about 35% -- 65% of filesize depening on the file, which is a compression close to many other minifiers. Common speeds of PHP and JS implementations are about 100-300kB/s (see [Uglify2](http://lisperator.net/uglifyjs/), [Adventures in PHP web asset minimization](https://www.happyassassin.net/2014/12/29/adventures-in-php-web-asset-minimization/)). This implementation is orders of magnitude faster at around ~25MB/s.
The following features are implemented:
- remove superfluous whitespace
- remove superfluous semicolons
- shorten `true`, `false`, and `undefined` to `!0`, `!1` and `void 0`
- rename variables and functions to shorter names (not in global scope)
- move `var` declarations to the top of the global/function scope (if more than one)
- collapse if/else statements to expressions
- minify conditional expressions to simpler ones
- merge sequential expression statements to one, including into `return` and `throw`
- remove superfluous grouping in expressions
- shorten or remove string escapes
- convert object key or index expression from string to identifier or decimal
- merge concatenated strings
- rewrite numbers (binary, octal, decimal, hexadecimal) to shorter representations
Options:
- `KeepVarNames` keeps variable names as they are and omits shortening variable names
- `Precision` number of significant digits to preserve for numbers, `0` means no trimming
### Comparison with other tools
Performance is measured with `time [command]` ran 10 times and selecting the fastest one, on a Thinkpad T460 (i5-6300U quad-core 2.4GHz running Arch Linux) using Go 1.15.
- [minify](https://github.com/tdewolff/minify): `minify -o script.min.js script.js`
- [esbuild](https://github.com/evanw/esbuild): `esbuild --minify --outfile=script.min.js script.js`
- [terser](https://github.com/terser/terser): `terser script.js --compress --mangle -o script.min.js`
- [UglifyJS](https://github.com/Skalman/UglifyJS-online): `uglifyjs --compress --mangle -o script.min.js script.js`
- [Closure Compiler](https://github.com/google/closure-compiler): `closure-compiler -O SIMPLE --js script.js --js_output_file script.min.js --language_in ECMASCRIPT_NEXT -W QUIET --jscomp_off=checkVars` optimization level `SIMPLE` instead of `ADVANCED` to make similar assumptions as do the other tools (do not rename/assume anything of global level variables)
#### Compression ratio (lower is better)
All tools give very similar results, although UglifyJS compresses slightly better.
| Tool | ace.js | dot.js | jquery.js | jqueryui.js | moment.js |
| --- | --- | --- | --- | --- | --- |
| **minify** | 53.7% | 64.8% | 34.2% | 51.3% | 34.8% |
| esbuild | 53.8% | 66.3% | 34.4% | 53.1% | 34.8% |
| terser | 53.2% | 65.2% | 34.2% | 51.8% | 34.7% |
| UglifyJS | 53.1% | 64.7% | 33.8% | 50.7% | 34.2% |
| Closure Compiler | 53.4% | 64.0% | 35.7% | 53.6% | 34.3% |
#### Time (lower is better)
Most tools are extremely slow, with `minify` and `esbuild` being orders of magnitudes faster.
| Tool | ace.js | dot.js | jquery.js | jqueryui.js | moment.js |
| --- | --- | --- | --- | --- | --- |
| **minify** | 49ms | 5ms | 22ms | 35ms | 13ms |
| esbuild | 64ms | 9ms | 31ms | 51ms | 17ms |
| terser | 2900s | 180ms | 1400ms | 2200ms | 730ms |
| UglifyJS | 3900ms | 210ms | 2000ms | 3100ms | 910ms |
| Closure Compiler | 6100ms | 2500ms | 4400ms | 5300ms | 3500ms |
## JSON
Minification typically shaves off about 15% of filesize for common indented JSON such as generated by [JSON Generator](http://www.json-generator.com/).
The JSON minifier only removes whitespace, which is the only thing that can be left out, and minifies numbers (`1000` => `1e3`).
Options:
- `Precision` number of significant digits to preserve for numbers, `0` means no trimming
- `KeepNumbers` do not minify numbers if set to `true`, by default numbers will be minified
## SVG
The SVG minifier uses these minifications:
- trim and collapse whitespace between all tags
- strip comments, empty `doctype`, XML prelude, `metadata`
- strip SVG version
- strip CDATA sections wherever possible
- collapse tags with no content to a void tag
- minify style tag and attributes with the CSS minifier
- minify colors
- shorten lengths and numbers and remove default `px` unit
- shorten `path` data
- use relative or absolute positions in path data whichever is shorter
TODO:
- convert attributes to style attribute whenever shorter
- merge path data? (same style and no intersection -- the latter is difficult)
Options:
- `Precision` number of significant digits to preserve for numbers, `0` means no trimming
## XML
The XML minifier uses these minifications:
- strip unnecessary whitespace and otherwise collapse it to one space (or newline if it originally contained a newline)
- strip comments
- collapse tags with no content to a void tag
- strip CDATA sections wherever possible
Options:
- `KeepWhitespace` preserve whitespace between inline tags but still collapse multiple whitespace characters into one
## Usage
Any input stream is being buffered by the minification functions. This is how the underlying buffer package inherently works to ensure high performance. The output stream however is not buffered. It is wise to preallocate a buffer as big as the input to which the output is written, or otherwise use `bufio` to buffer to a streaming writer.
### New
Retrieve a minifier struct which holds a map of mediatype &#8594; minifier functions.
``` go
m := minify.New()
```
The following loads all provided minifiers.
``` go
m := minify.New()
m.AddFunc("text/css", css.Minify)
m.AddFunc("text/html", html.Minify)
m.AddFunc("image/svg+xml", svg.Minify)
m.AddFuncRegexp(regexp.MustCompile("^(application|text)/(x-)?(java|ecma)script$"), js.Minify)
m.AddFuncRegexp(regexp.MustCompile("[/+]json$"), json.Minify)
m.AddFuncRegexp(regexp.MustCompile("[/+]xml$"), xml.Minify)
```
You can set options to several minifiers.
``` go
m.Add("text/html", &html.Minifier{
KeepDefaultAttrVals: true,
KeepWhitespace: true,
})
```
### From reader
Minify from an `io.Reader` to an `io.Writer` for a specific mediatype.
``` go
if err := m.Minify(mediatype, w, r); err != nil {
panic(err)
}
```
### From bytes
Minify from and to a `[]byte` for a specific mediatype.
``` go
b, err = m.Bytes(mediatype, b)
if err != nil {
panic(err)
}
```
### From string
Minify from and to a `string` for a specific mediatype.
``` go
s, err = m.String(mediatype, s)
if err != nil {
panic(err)
}
```
### To reader
Get a minifying reader for a specific mediatype.
``` go
mr := m.Reader(mediatype, r)
if _, err := mr.Read(b); err != nil {
panic(err)
}
```
### To writer
Get a minifying writer for a specific mediatype. Must be explicitly closed because it uses an `io.Pipe` underneath.
``` go
mw := m.Writer(mediatype, w)
if mw.Write([]byte("input")); err != nil {
panic(err)
}
if err := mw.Close(); err != nil {
panic(err)
}
```
### Middleware
Minify resources on the fly using middleware. It passes a wrapped response writer to the handler that removes the Content-Length header. The minifier is chosen based on the Content-Type header or, if the header is empty, by the request URI file extension. This is on-the-fly processing, you should preferably cache the results though!
``` go
fs := http.FileServer(http.Dir("www/"))
http.Handle("/", m.Middleware(fs))
```
### Custom minifier
Add a minifier for a specific mimetype.
``` go
type CustomMinifier struct {
KeepLineBreaks bool
}
func (c *CustomMinifier) Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
// ...
return nil
}
m.Add(mimetype, &CustomMinifier{KeepLineBreaks: true})
// or
m.AddRegexp(regexp.MustCompile("/x-custom$"), &CustomMinifier{KeepLineBreaks: true})
```
Add a minify function for a specific mimetype.
``` go
m.AddFunc(mimetype, func(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
// ...
return nil
})
m.AddFuncRegexp(regexp.MustCompile("/x-custom$"), func(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
// ...
return nil
})
```
Add a command `cmd` with arguments `args` for a specific mimetype.
``` go
m.AddCmd(mimetype, exec.Command(cmd, args...))
m.AddCmdRegexp(regexp.MustCompile("/x-custom$"), exec.Command(cmd, args...))
```
### Mediatypes
Using the `params map[string]string` argument one can pass parameters to the minifier such as seen in mediatypes (`type/subtype; key1=val2; key2=val2`). Examples are the encoding or charset of the data. Calling `Minify` will split the mimetype and parameters for the minifiers for you, but `MinifyMimetype` can be used if you already have them split up.
Minifiers can also be added using a regular expression. For example a minifier with `image/.*` will match any image mime.
## Examples
### Common minifiers
Basic example that minifies from stdin to stdout and loads the default HTML, CSS and JS minifiers. Optionally, one can enable `java -jar build/compiler.jar` to run for JS (for example the [ClosureCompiler](https://code.google.com/p/closure-compiler/)). Note that reading the file into a buffer first and writing to a pre-allocated buffer would be faster (but would disable streaming).
``` go
package main
import (
"log"
"os"
"os/exec"
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/css"
"github.com/tdewolff/minify/v2/html"
"github.com/tdewolff/minify/v2/js"
"github.com/tdewolff/minify/v2/json"
"github.com/tdewolff/minify/v2/svg"
"github.com/tdewolff/minify/v2/xml"
)
func main() {
m := minify.New()
m.AddFunc("text/css", css.Minify)
m.AddFunc("text/html", html.Minify)
m.AddFunc("image/svg+xml", svg.Minify)
m.AddFuncRegexp(regexp.MustCompile("^(application|text)/(x-)?(java|ecma)script$"), js.Minify)
m.AddFuncRegexp(regexp.MustCompile("[/+]json$"), json.Minify)
m.AddFuncRegexp(regexp.MustCompile("[/+]xml$"), xml.Minify)
if err := m.Minify("text/html", os.Stdout, os.Stdin); err != nil {
panic(err)
}
}
```
### External minifiers
Below are some examples of using common external minifiers.
#### Closure Compiler
See [Closure Compiler Application](https://developers.google.com/closure/compiler/docs/gettingstarted_app). Not tested.
``` go
m.AddCmdRegexp(regexp.MustCompile("^(application|text)/(x-)?(java|ecma)script$"),
exec.Command("java", "-jar", "build/compiler.jar"))
```
### UglifyJS
See [UglifyJS](https://github.com/mishoo/UglifyJS2).
``` go
m.AddCmdRegexp(regexp.MustCompile("^(application|text)/(x-)?(java|ecma)script$"),
exec.Command("uglifyjs"))
```
### esbuild
See [esbuild](https://github.com/evanw/esbuild).
``` go
m.AddCmdRegexp(regexp.MustCompile("^(application|text)/(x-)?(java|ecma)script$"),
exec.Command("esbuild", "$in.js", "--minify", "--outfile=$out.js"))
```
### <a name="custom-minifier-example"></a> Custom minifier
Custom minifier showing an example that implements the minifier function interface. Within a custom minifier, it is possible to call any minifier function (through `m minify.Minifier`) recursively when dealing with embedded resources.
``` go
package main
import (
"bufio"
"fmt"
"io"
"log"
"strings"
"github.com/tdewolff/minify/v2"
)
func main() {
m := minify.New()
m.AddFunc("text/plain", func(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
// remove newlines and spaces
rb := bufio.NewReader(r)
for {
line, err := rb.ReadString('\n')
if err != nil && err != io.EOF {
return err
}
if _, errws := io.WriteString(w, strings.Replace(line, " ", "", -1)); errws != nil {
return errws
}
if err == io.EOF {
break
}
}
return nil
})
in := "Because my coffee was too cold, I heated it in the microwave."
out, err := m.String("text/plain", in)
if err != nil {
panic(err)
}
fmt.Println(out)
// Output: Becausemycoffeewastoocold,Iheateditinthemicrowave.
}
```
### ResponseWriter
#### Middleware
``` go
func main() {
m := minify.New()
m.AddFunc("text/css", css.Minify)
m.AddFunc("text/html", html.Minify)
m.AddFunc("image/svg+xml", svg.Minify)
m.AddFuncRegexp(regexp.MustCompile("^(application|text)/(x-)?(java|ecma)script$"), js.Minify)
m.AddFuncRegexp(regexp.MustCompile("[/+]json$"), json.Minify)
m.AddFuncRegexp(regexp.MustCompile("[/+]xml$"), xml.Minify)
fs := http.FileServer(http.Dir("www/"))
http.Handle("/", m.MiddlewareWithError(fs))
}
func handleError(w http.ResponseWriter, r *http.Request, err error) {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
```
In order to properly handle minify errors, it is necessary to close the response writer since all writes are concurrently handled. There is no need to check errors on writes since they will be returned on closing.
```go
func main() {
m := minify.New()
m.AddFunc("text/html", html.Minify)
m.AddFuncRegexp(regexp.MustCompile("^(application|text)/(x-)?(java|ecma)script$"), js.Minify)
input := `<script>const i = 1_000_</script>` // Faulty JS
req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
m.Middleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(input))
if err = w.(io.Closer).Close(); err != nil {
panic(err)
}
})).ServeHTTP(rec, req)
}
```
#### ResponseWriter
``` go
func Serve(w http.ResponseWriter, r *http.Request) {
mw := m.ResponseWriter(w, r)
defer mw.Close()
w = mw
http.ServeFile(w, r, path.Join("www", r.URL.Path))
}
```
#### Custom response writer
ResponseWriter example which returns a ResponseWriter that minifies the content and then writes to the original ResponseWriter. Any write after applying this filter will be minified.
``` go
type MinifyResponseWriter struct {
http.ResponseWriter
io.WriteCloser
}
func (m MinifyResponseWriter) Write(b []byte) (int, error) {
return m.WriteCloser.Write(b)
}
// MinifyResponseWriter must be closed explicitly by calling site.
func MinifyFilter(mediatype string, res http.ResponseWriter) MinifyResponseWriter {
m := minify.New()
// add minfiers
mw := m.Writer(mediatype, res)
return MinifyResponseWriter{res, mw}
}
```
``` go
// Usage
func(w http.ResponseWriter, req *http.Request) {
w = MinifyFilter("text/html", w)
if _, err := io.WriteString(w, "<p class="message"> This HTTP response will be minified. </p>"); err != nil {
panic(err)
}
if err := w.Close(); err != nil {
panic(err)
}
// Output: <p class=message>This HTTP response will be minified.
}
```
### Templates
Here's an example of a replacement for `template.ParseFiles` from `template/html`, which automatically minifies each template before parsing it.
Be aware that minifying templates will work in most cases but not all. Because the HTML minifier only works for valid HTML5, your template must be valid HTML5 of itself. Template tags are parsed as regular text by the minifier.
``` go
func compileTemplates(filenames ...string) (*template.Template, error) {
m := minify.New()
m.AddFunc("text/html", html.Minify)
var tmpl *template.Template
for _, filename := range filenames {
name := filepath.Base(filename)
if tmpl == nil {
tmpl = template.New(name)
} else {
tmpl = tmpl.New(name)
}
b, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
mb, err := m.Bytes("text/html", b)
if err != nil {
return nil, err
}
tmpl.Parse(string(mb))
}
return tmpl, nil
}
```
Example usage:
``` go
templates := template.Must(compileTemplates("view.html", "home.html"))
```
## License
Released under the [MIT license](LICENSE.md).
[1]: http://golang.org/ "Go Language"

View file

@ -1,515 +0,0 @@
package minify
import (
"bytes"
"encoding/base64"
"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/strconv"
)
var (
textMimeBytes = []byte("text/plain")
charsetASCIIBytes = []byte("charset=us-ascii")
dataBytes = []byte("data:")
base64Bytes = []byte(";base64")
)
// Epsilon is the closest number to zero that is not considered to be zero.
var Epsilon = 0.00001
// Mediatype minifies a given mediatype by removing all whitespace.
func Mediatype(b []byte) []byte {
j := 0
start := 0
inString := false
for i, c := range b {
if !inString && parse.IsWhitespace(c) {
if start != 0 {
j += copy(b[j:], b[start:i])
} else {
j += i
}
start = i + 1
} else if c == '"' {
inString = !inString
}
}
if start != 0 {
j += copy(b[j:], b[start:])
return parse.ToLower(b[:j])
}
return parse.ToLower(b)
}
// DataURI minifies a data URI and calls a minifier by the specified mediatype. Specifications: https://www.ietf.org/rfc/rfc2397.txt.
func DataURI(m *M, dataURI []byte) []byte {
origData := parse.Copy(dataURI)
mediatype, data, err := parse.DataURI(dataURI)
if err != nil {
return dataURI
}
data, _ = m.Bytes(string(mediatype), data)
base64Len := len(";base64") + base64.StdEncoding.EncodedLen(len(data))
asciiLen := len(data)
for _, c := range data {
if parse.DataURIEncodingTable[c] {
asciiLen += 2
}
if asciiLen > base64Len {
break
}
}
if len(origData) < base64Len && len(origData) < asciiLen {
return origData
}
if base64Len < asciiLen {
encoded := make([]byte, base64Len-len(";base64"))
base64.StdEncoding.Encode(encoded, data)
data = encoded
mediatype = append(mediatype, base64Bytes...)
} else {
data = parse.EncodeURL(data, parse.DataURIEncodingTable)
}
if len("text/plain") <= len(mediatype) && parse.EqualFold(mediatype[:len("text/plain")], textMimeBytes) {
mediatype = mediatype[len("text/plain"):]
}
for i := 0; i+len(";charset=us-ascii") <= len(mediatype); i++ {
// must start with semicolon and be followed by end of mediatype or semicolon
if mediatype[i] == ';' && parse.EqualFold(mediatype[i+1:i+len(";charset=us-ascii")], charsetASCIIBytes) && (i+len(";charset=us-ascii") >= len(mediatype) || mediatype[i+len(";charset=us-ascii")] == ';') {
mediatype = append(mediatype[:i], mediatype[i+len(";charset=us-ascii"):]...)
break
}
}
return append(append(append(dataBytes, mediatype...), ','), data...)
}
// MaxInt is the maximum value of int.
const MaxInt = int(^uint(0) >> 1)
// MinInt is the minimum value of int.
const MinInt = -MaxInt - 1
// Decimal minifies a given byte slice containing a decimal and removes superfluous characters. It differs from Number in that it does not parse exponents.
// It does not parse or output exponents. prec is the number of significant digits. When prec is zero it will keep all digits. Only digits after the dot can be removed to reach the number of significant digits. Very large number may thus have more significant digits.
func Decimal(num []byte, prec int) []byte {
if len(num) <= 1 {
return num
}
// omit first + and register mantissa start and end, whether it's negative and the exponent
neg := false
start := 0
dot := -1
end := len(num)
if 0 < end && (num[0] == '+' || num[0] == '-') {
if num[0] == '-' {
neg = true
}
start++
}
for i, c := range num[start:] {
if c == '.' {
dot = start + i
break
}
}
if dot == -1 {
dot = end
}
// trim leading zeros but leave at least one digit
for start < end-1 && num[start] == '0' {
start++
}
// trim trailing zeros
i := end - 1
for ; dot < i; i-- {
if num[i] != '0' {
end = i + 1
break
}
}
if i == dot {
end = dot
if start == end {
num[start] = '0'
return num[start : start+1]
}
} else if start == end-1 && num[start] == '0' {
return num[start:end]
}
// apply precision
if 0 < prec && dot <= start+prec {
precEnd := start + prec + 1 // include dot
if dot == start { // for numbers like .012
digit := start + 1
for digit < end && num[digit] == '0' {
digit++
}
precEnd = digit + prec
}
if precEnd < end {
end = precEnd
// process either an increase from a lesser significant decimal (>= 5)
// or remove trailing zeros after the dot, or both
i := end - 1
inc := '5' <= num[end]
for ; start < i; i-- {
if i == dot {
// no-op
} else if inc && num[i] != '9' {
num[i]++
inc = false
break
} else if inc && i < dot { // end inc for integer
num[i] = '0'
} else if !inc && (i < dot || num[i] != '0') {
break
}
}
if i < dot {
end = dot
} else {
end = i + 1
}
if inc {
if dot == start && end == start+1 {
num[start] = '1'
} else if num[start] == '9' {
num[start] = '1'
num[start+1] = '0'
end++
} else {
num[start]++
}
}
}
}
if neg {
start--
num[start] = '-'
}
return num[start:end]
}
// Number minifies a given byte slice containing a number and removes superfluous characters.
func Number(num []byte, prec int) []byte {
if len(num) <= 1 {
return num
}
// omit first + and register mantissa start and end, whether it's negative and the exponent
neg := false
start := 0
dot := -1
end := len(num)
origExp := 0
if num[0] == '+' || num[0] == '-' {
if num[0] == '-' {
neg = true
}
start++
}
for i, c := range num[start:] {
if c == '.' {
dot = start + i
} else if c == 'e' || c == 'E' {
end = start + i
i += start + 1
if i < len(num) && num[i] == '+' {
i++
}
if tmpOrigExp, n := strconv.ParseInt(num[i:]); 0 < n && int64(MinInt) <= tmpOrigExp && tmpOrigExp <= int64(MaxInt) {
// range checks for when int is 32 bit
origExp = int(tmpOrigExp)
} else {
return num
}
break
}
}
if dot == -1 {
dot = end
}
// trim leading zeros but leave at least one digit
for start < end-1 && num[start] == '0' {
start++
}
// trim trailing zeros
i := end - 1
for ; dot < i; i-- {
if num[i] != '0' {
end = i + 1
break
}
}
if i == dot {
end = dot
if start == end {
num[start] = '0'
return num[start : start+1]
}
} else if start == end-1 && num[start] == '0' {
return num[start:end]
}
// apply precision
if 0 < prec { //&& (dot <= start+prec || start+prec+1 < dot || 0 < origExp) { // don't minify 9 to 10, but do 999 to 1e3 and 99e1 to 1e3
precEnd := start + prec
if dot == start { // for numbers like .012
digit := start + 1
for digit < end && num[digit] == '0' {
digit++
}
precEnd = digit + prec
} else if dot < precEnd { // for numbers where precision will include the dot
precEnd++
}
if precEnd < end && (dot < end || 1 < dot-precEnd+origExp) { // do not minify 9=>10 or 99=>100 or 9e1=>1e2 (but 90), but 999=>1e3 and 99e1=>1e3
end = precEnd
inc := '5' <= num[end]
if dot == end {
inc = end+1 < len(num) && '5' <= num[end+1]
}
if precEnd < dot {
origExp += dot - precEnd
dot = precEnd
}
// process either an increase from a lesser significant decimal (>= 5)
// and remove trailing zeros
i := end - 1
for ; start < i; i-- {
if i == dot {
// no-op
} else if inc && num[i] != '9' {
num[i]++
inc = false
break
} else if !inc && num[i] != '0' {
break
}
}
end = i + 1
if end < dot {
origExp += dot - end
dot = end
}
if inc { // single digit left
if dot == start {
num[start] = '1'
dot = start + 1
} else if num[start] == '9' {
num[start] = '1'
origExp++
} else {
num[start]++
}
}
}
}
// n is the number of significant digits
// normExp would be the exponent if it were normalised (0.1 <= f < 1)
n := 0
normExp := 0
if dot == start {
for i = dot + 1; i < end; i++ {
if num[i] != '0' {
n = end - i
normExp = dot - i + 1
break
}
}
} else if dot == end {
normExp = end - start
for i = end - 1; start <= i; i-- {
if num[i] != '0' {
n = i + 1 - start
end = i + 1
break
}
}
} else {
n = end - start - 1
normExp = dot - start
}
if origExp < 0 && (normExp < MinInt-origExp || normExp-n < MinInt-origExp) || 0 < origExp && (MaxInt-origExp < normExp || MaxInt-origExp < normExp-n) {
return num // exponent overflow
}
normExp += origExp
// intExp would be the exponent if it were an integer
intExp := normExp - n
lenIntExp := strconv.LenInt(int64(intExp))
lenNormExp := strconv.LenInt(int64(normExp))
// there are three cases to consider when printing the number
// case 1: without decimals and with a positive exponent (large numbers: 5e4)
// case 2: with decimals and with a negative exponent (small numbers with many digits: .123456e-4)
// case 3: with decimals and without an exponent (around zero: 5.6)
// case 4: without decimals and with a negative exponent (small numbers: 123456e-9)
if n <= normExp {
// case 1: print number with positive exponent
if dot < end {
// remove dot, either from the front or copy the smallest part
if dot == start {
start = end - n
} else if dot-start < end-dot-1 {
copy(num[start+1:], num[start:dot])
start++
} else {
copy(num[dot:], num[dot+1:end])
end--
}
}
if n+3 <= normExp {
num[end] = 'e'
end++
for i := end + lenIntExp - 1; end <= i; i-- {
num[i] = byte(intExp%10) + '0'
intExp /= 10
}
end += lenIntExp
} else if n+2 == normExp {
num[end] = '0'
num[end+1] = '0'
end += 2
} else if n+1 == normExp {
num[end] = '0'
end++
}
} else if normExp < -3 && lenNormExp < lenIntExp && dot < end {
// case 2: print normalized number (0.1 <= f < 1)
zeroes := -normExp + origExp
if 0 < zeroes {
copy(num[start+1:], num[start+1+zeroes:end])
end -= zeroes
} else if zeroes < 0 {
copy(num[start+1:], num[start:dot])
num[start] = '.'
}
num[end] = 'e'
num[end+1] = '-'
end += 2
for i := end + lenNormExp - 1; end <= i; i-- {
num[i] = -byte(normExp%10) + '0'
normExp /= 10
}
end += lenNormExp
} else if -lenIntExp-1 <= normExp {
// case 3: print number without exponent
zeroes := -normExp
if 0 < zeroes {
// dot placed at the front and negative exponent, adding zeroes
newDot := end - n - zeroes - 1
if newDot != dot {
d := start - newDot
if 0 < d {
if dot < end {
// copy original digits after the dot towards the end
copy(num[dot+1+d:], num[dot+1:end])
if start < dot {
// copy original digits before the dot towards the end
copy(num[start+d+1:], num[start:dot])
}
} else if start < dot {
// copy original digits before the dot towards the end
copy(num[start+d:], num[start:dot])
}
newDot = start
end += d
} else {
start += -d
}
num[newDot] = '.'
for i := 0; i < zeroes; i++ {
num[newDot+1+i] = '0'
}
}
} else {
// dot placed in the middle of the number
if dot == start {
// when there are zeroes after the dot
dot = end - n - 1
start = dot
} else if end <= dot {
// when input has no dot in it
dot = end
end++
}
newDot := start + normExp
// move digits between dot and newDot towards the end
if dot < newDot {
copy(num[dot:], num[dot+1:newDot+1])
} else if newDot < dot {
copy(num[newDot+1:], num[newDot:dot])
}
num[newDot] = '.'
}
} else {
// case 4: print number with negative exponent
// find new end, considering moving numbers to the front, removing the dot and increasing the length of the exponent
newEnd := end
if dot == start {
newEnd = start + n
} else {
newEnd--
}
newEnd += 2 + lenIntExp
exp := intExp
lenExp := lenIntExp
if newEnd < len(num) {
// it saves space to convert the decimal to an integer and decrease the exponent
if dot < end {
if dot == start {
copy(num[start:], num[end-n:end])
end = start + n
} else {
copy(num[dot:], num[dot+1:end])
end--
}
}
} else {
// it does not save space and will panic, so we revert to the original representation
exp = origExp
lenExp = 1
if origExp <= -10 || 10 <= origExp {
lenExp = strconv.LenInt(int64(origExp))
}
}
num[end] = 'e'
num[end+1] = '-'
end += 2
for i := end + lenExp - 1; end <= i; i-- {
num[i] = -byte(exp%10) + '0'
exp /= 10
}
end += lenExp
}
if neg {
start--
num[start] = '-'
}
return num[start:end]
}
func UpdateErrorPosition(err error, input *parse.Input, offset int) error {
if perr, ok := err.(*parse.Error); ok {
r := bytes.NewBuffer(input.Bytes())
line, column, _ := parse.Position(r, offset)
perr.Line += line - 1
perr.Column += column - 1
return perr
}
return err
}

View file

@ -1,137 +0,0 @@
package html
import (
"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/html"
)
// Token is a single token unit with an attribute value (if given) and hash of the data.
type Token struct {
html.TokenType
Hash Hash
Data []byte
Text []byte
AttrVal []byte
Traits traits
Offset int
}
// TokenBuffer is a buffer that allows for token look-ahead.
type TokenBuffer struct {
r *parse.Input
l *html.Lexer
buf []Token
pos int
attrBuffer []*Token
}
// NewTokenBuffer returns a new TokenBuffer.
func NewTokenBuffer(r *parse.Input, l *html.Lexer) *TokenBuffer {
return &TokenBuffer{
r: r,
l: l,
buf: make([]Token, 0, 8),
}
}
func (z *TokenBuffer) read(t *Token) {
t.Offset = z.r.Offset()
t.TokenType, t.Data = z.l.Next()
t.Text = z.l.Text()
if t.TokenType == html.AttributeToken {
t.Offset += 1 + len(t.Text) + 1
t.AttrVal = z.l.AttrVal()
if len(t.AttrVal) > 1 && (t.AttrVal[0] == '"' || t.AttrVal[0] == '\'') {
t.Offset++
t.AttrVal = t.AttrVal[1 : len(t.AttrVal)-1] // quotes will be readded in attribute loop if necessary
}
t.Hash = ToHash(t.Text)
t.Traits = attrMap[t.Hash]
} else if t.TokenType == html.StartTagToken || t.TokenType == html.EndTagToken {
t.AttrVal = nil
t.Hash = ToHash(t.Text)
t.Traits = tagMap[t.Hash] // zero if not exist
} else {
t.AttrVal = nil
t.Hash = 0
t.Traits = 0
}
}
// Peek returns the ith element and possibly does an allocation.
// Peeking past an error will panic.
func (z *TokenBuffer) Peek(pos int) *Token {
pos += z.pos
if pos >= len(z.buf) {
if len(z.buf) > 0 && z.buf[len(z.buf)-1].TokenType == html.ErrorToken {
return &z.buf[len(z.buf)-1]
}
c := cap(z.buf)
d := len(z.buf) - z.pos
p := pos - z.pos + 1 // required peek length
var buf []Token
if 2*p > c {
buf = make([]Token, 0, 2*c+p)
} else {
buf = z.buf
}
copy(buf[:d], z.buf[z.pos:])
buf = buf[:p]
pos -= z.pos
for i := d; i < p; i++ {
z.read(&buf[i])
if buf[i].TokenType == html.ErrorToken {
buf = buf[:i+1]
pos = i
break
}
}
z.pos, z.buf = 0, buf
}
return &z.buf[pos]
}
// Shift returns the first element and advances position.
func (z *TokenBuffer) Shift() *Token {
if z.pos >= len(z.buf) {
t := &z.buf[:1][0]
z.read(t)
return t
}
t := &z.buf[z.pos]
z.pos++
return t
}
// Attributes extracts the gives attribute hashes from a tag.
// It returns in the same order pointers to the requested token data or nil.
func (z *TokenBuffer) Attributes(hashes ...Hash) []*Token {
n := 0
for {
if t := z.Peek(n); t.TokenType != html.AttributeToken {
break
}
n++
}
if len(hashes) > cap(z.attrBuffer) {
z.attrBuffer = make([]*Token, len(hashes))
} else {
z.attrBuffer = z.attrBuffer[:len(hashes)]
for i := range z.attrBuffer {
z.attrBuffer[i] = nil
}
}
for i := z.pos; i < z.pos+n; i++ {
attr := &z.buf[i]
for j, hash := range hashes {
if hash == attr.Hash {
z.attrBuffer[j] = attr
}
}
}
return z.attrBuffer
}

View file

@ -1,543 +0,0 @@
package html
// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate
// uses github.com/tdewolff/hasher
//go:generate hasher -type=Hash -file=hash.go
// Hash defines perfect hashes for a predefined list of strings
type Hash uint32
// Unique hash definitions to be used instead of strings
const (
A Hash = 0x1 // a
Abbr Hash = 0x37a04 // abbr
About Hash = 0x5 // about
Accept Hash = 0x1106 // accept
Accept_Charset Hash = 0x110e // accept-charset
Action Hash = 0x23f06 // action
Address Hash = 0x5a07 // address
Align Hash = 0x32705 // align
Alink Hash = 0x7005 // alink
Allowfullscreen Hash = 0x2ad0f // allowfullscreen
Amp_Boilerplate Hash = 0x610f // amp-boilerplate
Area Hash = 0x1e304 // area
Article Hash = 0x2707 // article
Aside Hash = 0xb405 // aside
Async Hash = 0xac05 // async
Audio Hash = 0xd105 // audio
Autofocus Hash = 0xe409 // autofocus
Autoplay Hash = 0x10808 // autoplay
Axis Hash = 0x11004 // axis
B Hash = 0x101 // b
Background Hash = 0x300a // background
Base Hash = 0x19604 // base
Bb Hash = 0x37b02 // bb
Bdi Hash = 0x7503 // bdi
Bdo Hash = 0x31f03 // bdo
Bgcolor Hash = 0x12607 // bgcolor
Blockquote Hash = 0x13e0a // blockquote
Body Hash = 0xd04 // body
Br Hash = 0x37c02 // br
Button Hash = 0x14806 // button
Canvas Hash = 0xb006 // canvas
Caption Hash = 0x21f07 // caption
Charset Hash = 0x1807 // charset
Checked Hash = 0x1b307 // checked
Cite Hash = 0xfb04 // cite
Class Hash = 0x15905 // class
Classid Hash = 0x15907 // classid
Clear Hash = 0x2b05 // clear
Code Hash = 0x19204 // code
Codebase Hash = 0x19208 // codebase
Codetype Hash = 0x1a408 // codetype
Col Hash = 0x12803 // col
Colgroup Hash = 0x1bb08 // colgroup
Color Hash = 0x12805 // color
Cols Hash = 0x1cf04 // cols
Colspan Hash = 0x1cf07 // colspan
Compact Hash = 0x1ec07 // compact
Content Hash = 0x28407 // content
Controls Hash = 0x20108 // controls
Data Hash = 0x1f04 // data
Datalist Hash = 0x1f08 // datalist
Datatype Hash = 0x4d08 // datatype
Dd Hash = 0x5b02 // dd
Declare Hash = 0xb707 // declare
Default Hash = 0x7f07 // default
DefaultChecked Hash = 0x1730e // defaultChecked
DefaultMuted Hash = 0x7f0c // defaultMuted
DefaultSelected Hash = 0x8a0f // defaultSelected
Defer Hash = 0x9805 // defer
Del Hash = 0x10503 // del
Details Hash = 0x15f07 // details
Dfn Hash = 0x16c03 // dfn
Dialog Hash = 0xa606 // dialog
Dir Hash = 0x7603 // dir
Disabled Hash = 0x18008 // disabled
Div Hash = 0x18703 // div
Dl Hash = 0x1b902 // dl
Dt Hash = 0x23102 // dt
Em Hash = 0x4302 // em
Embed Hash = 0x4905 // embed
Enabled Hash = 0x26c07 // enabled
Enctype Hash = 0x1fa07 // enctype
Face Hash = 0x5604 // face
Fieldset Hash = 0x21408 // fieldset
Figcaption Hash = 0x21c0a // figcaption
Figure Hash = 0x22606 // figure
Footer Hash = 0xdb06 // footer
For Hash = 0x23b03 // for
Form Hash = 0x23b04 // form
Formaction Hash = 0x23b0a // formaction
Formnovalidate Hash = 0x2450e // formnovalidate
Frame Hash = 0x28c05 // frame
Frameborder Hash = 0x28c0b // frameborder
H1 Hash = 0x2e002 // h1
H2 Hash = 0x25302 // h2
H3 Hash = 0x25502 // h3
H4 Hash = 0x25702 // h4
H5 Hash = 0x25902 // h5
H6 Hash = 0x25b02 // h6
Head Hash = 0x2d204 // head
Header Hash = 0x2d206 // header
Hgroup Hash = 0x25d06 // hgroup
Hidden Hash = 0x26806 // hidden
Hr Hash = 0x32d02 // hr
Href Hash = 0x32d04 // href
Hreflang Hash = 0x32d08 // hreflang
Html Hash = 0x27304 // html
Http_Equiv Hash = 0x2770a // http-equiv
I Hash = 0x2401 // i
Icon Hash = 0x28304 // icon
Id Hash = 0xb602 // id
Iframe Hash = 0x28b06 // iframe
Img Hash = 0x29703 // img
Inert Hash = 0xf605 // inert
Inlist Hash = 0x29a06 // inlist
Input Hash = 0x2a405 // input
Ins Hash = 0x2a903 // ins
Ismap Hash = 0x11205 // ismap
Itemscope Hash = 0xfc09 // itemscope
Kbd Hash = 0x7403 // kbd
Keygen Hash = 0x1f606 // keygen
Label Hash = 0xbe05 // label
Lang Hash = 0x33104 // lang
Language Hash = 0x33108 // language
Legend Hash = 0x2c506 // legend
Li Hash = 0x2302 // li
Link Hash = 0x7104 // link
Longdesc Hash = 0xc208 // longdesc
Main Hash = 0xf404 // main
Manifest Hash = 0x2bc08 // manifest
Map Hash = 0xee03 // map
Mark Hash = 0x2cb04 // mark
Math Hash = 0x2cf04 // math
Max Hash = 0x2d803 // max
Maxlength Hash = 0x2d809 // maxlength
Media Hash = 0xa405 // media
Menu Hash = 0x12204 // menu
Meta Hash = 0x2e204 // meta
Meter Hash = 0x2f705 // meter
Method Hash = 0x2fc06 // method
Multiple Hash = 0x30208 // multiple
Muted Hash = 0x30a05 // muted
Name Hash = 0xa204 // name
Nav Hash = 0x32403 // nav
Nohref Hash = 0x32b06 // nohref
Noresize Hash = 0x13608 // noresize
Noscript Hash = 0x14d08 // noscript
Noshade Hash = 0x16e07 // noshade
Novalidate Hash = 0x2490a // novalidate
Nowrap Hash = 0x1d506 // nowrap
Object Hash = 0xd506 // object
Ol Hash = 0xcb02 // ol
Open Hash = 0x32104 // open
Optgroup Hash = 0x35608 // optgroup
Option Hash = 0x30f06 // option
Output Hash = 0x206 // output
P Hash = 0x501 // p
Param Hash = 0xf005 // param
Pauseonexit Hash = 0x1160b // pauseonexit
Picture Hash = 0x1c207 // picture
Plaintext Hash = 0x1da09 // plaintext
Poster Hash = 0x26206 // poster
Pre Hash = 0x35d03 // pre
Prefix Hash = 0x35d06 // prefix
Profile Hash = 0x36407 // profile
Progress Hash = 0x34208 // progress
Property Hash = 0x31508 // property
Q Hash = 0x14301 // q
Rb Hash = 0x2f02 // rb
Readonly Hash = 0x1e408 // readonly
Rel Hash = 0xbc03 // rel
Required Hash = 0x22a08 // required
Resource Hash = 0x1c708 // resource
Rev Hash = 0x7803 // rev
Reversed Hash = 0x7808 // reversed
Rows Hash = 0x9c04 // rows
Rowspan Hash = 0x9c07 // rowspan
Rp Hash = 0x6a02 // rp
Rt Hash = 0x2802 // rt
Rtc Hash = 0xf903 // rtc
Ruby Hash = 0xe004 // ruby
Rules Hash = 0x12c05 // rules
S Hash = 0x1c01 // s
Samp Hash = 0x6004 // samp
Scope Hash = 0x10005 // scope
Scoped Hash = 0x10006 // scoped
Script Hash = 0x14f06 // script
Scrolling Hash = 0xc809 // scrolling
Seamless Hash = 0x19808 // seamless
Section Hash = 0x13007 // section
Select Hash = 0x16506 // select
Selected Hash = 0x16508 // selected
Shape Hash = 0x19f05 // shape
Size Hash = 0x13a04 // size
Slot Hash = 0x20804 // slot
Small Hash = 0x2ab05 // small
Sortable Hash = 0x2ef08 // sortable
Source Hash = 0x1c906 // source
Span Hash = 0x9f04 // span
Src Hash = 0x34903 // src
Srcset Hash = 0x34906 // srcset
Start Hash = 0x2505 // start
Strong Hash = 0x29e06 // strong
Style Hash = 0x2c205 // style
Sub Hash = 0x31d03 // sub
Summary Hash = 0x33907 // summary
Sup Hash = 0x34003 // sup
Svg Hash = 0x34f03 // svg
Tabindex Hash = 0x2e408 // tabindex
Table Hash = 0x2f205 // table
Target Hash = 0x706 // target
Tbody Hash = 0xc05 // tbody
Td Hash = 0x1e02 // td
Template Hash = 0x4208 // template
Text Hash = 0x1df04 // text
Textarea Hash = 0x1df08 // textarea
Tfoot Hash = 0xda05 // tfoot
Th Hash = 0x2d102 // th
Thead Hash = 0x2d105 // thead
Time Hash = 0x12004 // time
Title Hash = 0x15405 // title
Tr Hash = 0x1f202 // tr
Track Hash = 0x1f205 // track
Translate Hash = 0x20b09 // translate
Truespeed Hash = 0x23209 // truespeed
Type Hash = 0x5104 // type
Typemustmatch Hash = 0x1a80d // typemustmatch
Typeof Hash = 0x5106 // typeof
U Hash = 0x301 // u
Ul Hash = 0x8302 // ul
Undeterminate Hash = 0x370d // undeterminate
Usemap Hash = 0xeb06 // usemap
Valign Hash = 0x32606 // valign
Value Hash = 0x18905 // value
Valuetype Hash = 0x18909 // valuetype
Var Hash = 0x28003 // var
Video Hash = 0x35205 // video
Visible Hash = 0x36b07 // visible
Vlink Hash = 0x37205 // vlink
Vocab Hash = 0x37705 // vocab
Wbr Hash = 0x37e03 // wbr
Xmlns Hash = 0x2eb05 // xmlns
Xmp Hash = 0x36203 // xmp
)
// String returns the hash' name.
func (i Hash) String() string {
start := uint32(i >> 8)
n := uint32(i & 0xff)
if start+n > uint32(len(_Hash_text)) {
return ""
}
return _Hash_text[start : start+n]
}
// ToHash returns the hash whose name is s. It returns zero if there is no
// such hash. It is case sensitive.
func ToHash(s []byte) Hash {
if len(s) == 0 || len(s) > _Hash_maxLen {
return 0
}
h := uint32(_Hash_hash0)
for i := 0; i < len(s); i++ {
h ^= uint32(s[i])
h *= 16777619
}
if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) {
t := _Hash_text[i>>8 : i>>8+i&0xff]
for i := 0; i < len(s); i++ {
if t[i] != s[i] {
goto NEXT
}
}
return i
}
NEXT:
if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) {
t := _Hash_text[i>>8 : i>>8+i&0xff]
for i := 0; i < len(s); i++ {
if t[i] != s[i] {
return 0
}
}
return i
}
return 0
}
const _Hash_hash0 = 0x9acb0442
const _Hash_maxLen = 15
const _Hash_text = "aboutputargetbodyaccept-charsetdatalistarticlearbackgroundet" +
"erminatemplatembedatatypeofaceaddressamp-boilerplatealinkbdi" +
"reversedefaultMutedefaultSelectedeferowspanamedialogasyncanv" +
"asideclarelabelongdescrollingaudiobjectfooterubyautofocusema" +
"paramainertcitemscopedelautoplayaxismapauseonexitimenubgcolo" +
"rulesectionoresizeblockquotebuttonoscriptitleclassidetailsel" +
"ectedfnoshadefaultCheckedisabledivaluetypecodebaseamlesshape" +
"codetypemustmatcheckedlcolgroupicturesourcecolspanowraplaint" +
"extareadonlycompactrackeygenctypecontrolslotranslatefieldset" +
"figcaptionfigurequiredtruespeedformactionformnovalidateh2h3h" +
"4h5h6hgrouposterhiddenabledhtmlhttp-equivaricontentiframebor" +
"derimginlistronginputinsmallowfullscreenmanifestylegendmarkm" +
"atheadermaxlength1metabindexmlnsortablemetermethodmultiplemu" +
"tedoptionpropertysubdopenavalignohreflanguagesummarysuprogre" +
"ssrcsetsvgvideoptgrouprefixmprofilevisiblevlinkvocabbrwbr"
var _Hash_table = [1 << 9]Hash{
0x0: 0x1df08, // textarea
0x4: 0x32d02, // hr
0x8: 0x1c207, // picture
0xb: 0x18905, // value
0xf: 0x2e408, // tabindex
0x12: 0x15905, // class
0x15: 0x37e03, // wbr
0x18: 0x1a80d, // typemustmatch
0x1a: 0x1b902, // dl
0x1d: 0xf903, // rtc
0x1e: 0x25702, // h4
0x22: 0x2ef08, // sortable
0x24: 0x4208, // template
0x25: 0x28c0b, // frameborder
0x28: 0x37a04, // abbr
0x29: 0x28b06, // iframe
0x2a: 0x610f, // amp-boilerplate
0x2c: 0x1e408, // readonly
0x30: 0x23f06, // action
0x33: 0x28c05, // frame
0x35: 0x12c05, // rules
0x36: 0x30208, // multiple
0x38: 0x31f03, // bdo
0x39: 0x1d506, // nowrap
0x3e: 0x21408, // fieldset
0x3f: 0x7503, // bdi
0x46: 0x7f0c, // defaultMuted
0x49: 0x35205, // video
0x4c: 0x19808, // seamless
0x4d: 0x13608, // noresize
0x4f: 0xb602, // id
0x51: 0x25d06, // hgroup
0x52: 0x23102, // dt
0x55: 0x12805, // color
0x56: 0x34003, // sup
0x59: 0x370d, // undeterminate
0x5a: 0x35608, // optgroup
0x5b: 0x2d206, // header
0x5c: 0xb405, // aside
0x5f: 0x10005, // scope
0x60: 0x101, // b
0x61: 0xcb02, // ol
0x64: 0x32b06, // nohref
0x65: 0x1da09, // plaintext
0x66: 0x20804, // slot
0x67: 0x11004, // axis
0x68: 0x12803, // col
0x69: 0x32606, // valign
0x6c: 0x2d105, // thead
0x70: 0x34906, // srcset
0x71: 0x26806, // hidden
0x76: 0x1bb08, // colgroup
0x78: 0x34f03, // svg
0x7b: 0x2cb04, // mark
0x7e: 0x33104, // lang
0x81: 0x1cf04, // cols
0x86: 0x5a07, // address
0x8b: 0xf404, // main
0x8c: 0x4302, // em
0x8f: 0x32d08, // hreflang
0x93: 0x1b307, // checked
0x94: 0x25902, // h5
0x95: 0x301, // u
0x96: 0x32705, // align
0x97: 0x14301, // q
0x99: 0xd506, // object
0x9b: 0x28407, // content
0x9d: 0xc809, // scrolling
0x9f: 0x36407, // profile
0xa0: 0x34903, // src
0xa1: 0xda05, // tfoot
0xa3: 0x2f705, // meter
0xa4: 0x37705, // vocab
0xa6: 0xd04, // body
0xa8: 0x19204, // code
0xac: 0x20108, // controls
0xb0: 0x2ab05, // small
0xb1: 0x18008, // disabled
0xb5: 0x5604, // face
0xb6: 0x501, // p
0xb9: 0x2302, // li
0xbb: 0xe409, // autofocus
0xbf: 0x27304, // html
0xc2: 0x4d08, // datatype
0xc6: 0x35d06, // prefix
0xcb: 0x35d03, // pre
0xcc: 0x1106, // accept
0xd1: 0x23b03, // for
0xd5: 0x29e06, // strong
0xd6: 0x9c07, // rowspan
0xd7: 0x25502, // h3
0xd8: 0x2cf04, // math
0xde: 0x16e07, // noshade
0xdf: 0x19f05, // shape
0xe1: 0x10006, // scoped
0xe3: 0x706, // target
0xe6: 0x21c0a, // figcaption
0xe9: 0x1df04, // text
0xea: 0x1c708, // resource
0xec: 0xee03, // map
0xf0: 0x29a06, // inlist
0xf1: 0x16506, // select
0xf2: 0x1f606, // keygen
0xf3: 0x5106, // typeof
0xf6: 0xb006, // canvas
0xf7: 0x30f06, // option
0xf8: 0xbe05, // label
0xf9: 0xbc03, // rel
0xfb: 0x1f04, // data
0xfd: 0x6004, // samp
0x100: 0x110e, // accept-charset
0x101: 0xeb06, // usemap
0x103: 0x2bc08, // manifest
0x109: 0xa204, // name
0x10a: 0x14806, // button
0x10b: 0x2b05, // clear
0x10e: 0x33907, // summary
0x10f: 0x2e204, // meta
0x110: 0x33108, // language
0x112: 0x300a, // background
0x113: 0x2707, // article
0x116: 0x23b0a, // formaction
0x119: 0x1, // a
0x11b: 0x5, // about
0x11c: 0xfc09, // itemscope
0x11e: 0x14d08, // noscript
0x11f: 0x15907, // classid
0x120: 0x36203, // xmp
0x121: 0x19604, // base
0x123: 0x1c01, // s
0x124: 0x36b07, // visible
0x126: 0x37b02, // bb
0x127: 0x9c04, // rows
0x12d: 0x2450e, // formnovalidate
0x131: 0x1f205, // track
0x135: 0x18703, // div
0x136: 0xac05, // async
0x137: 0x31508, // property
0x13a: 0x16c03, // dfn
0x13e: 0xf605, // inert
0x142: 0x10503, // del
0x144: 0x25302, // h2
0x147: 0x2c205, // style
0x149: 0x29703, // img
0x14a: 0xc05, // tbody
0x14b: 0x7603, // dir
0x14c: 0x2eb05, // xmlns
0x14e: 0x1f08, // datalist
0x14f: 0x32d04, // href
0x150: 0x1f202, // tr
0x151: 0x13e0a, // blockquote
0x152: 0x18909, // valuetype
0x155: 0xdb06, // footer
0x157: 0x14f06, // script
0x158: 0x1cf07, // colspan
0x15d: 0x1730e, // defaultChecked
0x15f: 0x2490a, // novalidate
0x164: 0x1a408, // codetype
0x165: 0x2c506, // legend
0x16b: 0x1160b, // pauseonexit
0x16c: 0x21f07, // caption
0x16f: 0x26c07, // enabled
0x173: 0x26206, // poster
0x175: 0x30a05, // muted
0x176: 0x11205, // ismap
0x178: 0x2a903, // ins
0x17a: 0xe004, // ruby
0x17b: 0x37c02, // br
0x17c: 0x8a0f, // defaultSelected
0x17d: 0x7403, // kbd
0x17f: 0x1c906, // source
0x182: 0x9f04, // span
0x184: 0x2d803, // max
0x18a: 0x5b02, // dd
0x18b: 0x13a04, // size
0x18c: 0xa405, // media
0x18d: 0x19208, // codebase
0x18f: 0x4905, // embed
0x192: 0x5104, // type
0x193: 0xf005, // param
0x194: 0x25b02, // h6
0x197: 0x28304, // icon
0x198: 0x12607, // bgcolor
0x199: 0x2ad0f, // allowfullscreen
0x19a: 0x12004, // time
0x19b: 0x7803, // rev
0x19d: 0x34208, // progress
0x19e: 0x22606, // figure
0x1a0: 0x6a02, // rp
0x1a2: 0xa606, // dialog
0x1a4: 0x2802, // rt
0x1a7: 0x1e304, // area
0x1a8: 0x7808, // reversed
0x1aa: 0x32104, // open
0x1ac: 0x2d204, // head
0x1ad: 0x7005, // alink
0x1af: 0x28003, // var
0x1b0: 0x15f07, // details
0x1b1: 0x2401, // i
0x1b3: 0x1e02, // td
0x1b4: 0xb707, // declare
0x1b5: 0x8302, // ul
0x1ba: 0x2fc06, // method
0x1bd: 0x13007, // section
0x1be: 0x22a08, // required
0x1c2: 0x9805, // defer
0x1c3: 0x37205, // vlink
0x1c4: 0x15405, // title
0x1c5: 0x2770a, // http-equiv
0x1c6: 0x1fa07, // enctype
0x1c7: 0x1ec07, // compact
0x1c8: 0x2d809, // maxlength
0x1c9: 0x16508, // selected
0x1cc: 0xd105, // audio
0x1cd: 0xc208, // longdesc
0x1d1: 0xfb04, // cite
0x1da: 0x2505, // start
0x1de: 0x2d102, // th
0x1df: 0x10808, // autoplay
0x1e2: 0x7104, // link
0x1e3: 0x206, // output
0x1e5: 0x12204, // menu
0x1e6: 0x2a405, // input
0x1eb: 0x32403, // nav
0x1ec: 0x31d03, // sub
0x1ee: 0x1807, // charset
0x1ef: 0x7f07, // default
0x1f3: 0x2f205, // table
0x1f4: 0x23b04, // form
0x1f5: 0x23209, // truespeed
0x1f6: 0x2f02, // rb
0x1fb: 0x20b09, // translate
0x1fd: 0x2e002, // h1
}

View file

@ -1,511 +0,0 @@
// Package html minifies HTML5 following the specifications at http://www.w3.org/TR/html5/syntax.html.
package html
import (
"bytes"
"io"
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/buffer"
"github.com/tdewolff/parse/v2/html"
)
var (
gtBytes = []byte(">")
isBytes = []byte("=")
spaceBytes = []byte(" ")
doctypeBytes = []byte("<!doctype html>")
jsMimeBytes = []byte("application/javascript")
cssMimeBytes = []byte("text/css")
htmlMimeBytes = []byte("text/html")
svgMimeBytes = []byte("image/svg+xml")
formMimeBytes = []byte("application/x-www-form-urlencoded")
mathMimeBytes = []byte("application/mathml+xml")
dataSchemeBytes = []byte("data:")
jsSchemeBytes = []byte("javascript:")
httpBytes = []byte("http")
radioBytes = []byte("radio")
onBytes = []byte("on")
textBytes = []byte("text")
noneBytes = []byte("none")
submitBytes = []byte("submit")
allBytes = []byte("all")
rectBytes = []byte("rect")
dataBytes = []byte("data")
getBytes = []byte("get")
autoBytes = []byte("auto")
oneBytes = []byte("one")
inlineParams = map[string]string{"inline": "1"}
)
////////////////////////////////////////////////////////////////
// DefaultMinifier is the default minifier.
var DefaultMinifier = &Minifier{}
// Minifier is an HTML minifier.
type Minifier struct {
KeepComments bool
KeepConditionalComments bool
KeepDefaultAttrVals bool
KeepDocumentTags bool
KeepEndTags bool
KeepQuotes bool
KeepWhitespace bool
}
// Minify minifies HTML data, it reads from r and writes to w.
func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
return DefaultMinifier.Minify(m, w, r, params)
}
// Minify minifies HTML data, it reads from r and writes to w.
func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
var rawTagHash Hash
var rawTagMediatype []byte
omitSpace := true // if true the next leading space is omitted
inPre := false
attrMinifyBuffer := buffer.NewWriter(make([]byte, 0, 64))
attrByteBuffer := make([]byte, 0, 64)
z := parse.NewInput(r)
defer z.Restore()
l := html.NewLexer(z)
tb := NewTokenBuffer(z, l)
for {
t := *tb.Shift()
switch t.TokenType {
case html.ErrorToken:
if _, err := w.Write(nil); err != nil {
return err
}
if l.Err() == io.EOF {
return nil
}
return l.Err()
case html.DoctypeToken:
w.Write(doctypeBytes)
case html.CommentToken:
if o.KeepComments {
w.Write(t.Data)
} else if o.KeepConditionalComments && 6 < len(t.Text) && (bytes.HasPrefix(t.Text, []byte("[if ")) || bytes.HasSuffix(t.Text, []byte("[endif]")) || bytes.HasSuffix(t.Text, []byte("[endif]--"))) {
// [if ...] is always 7 or more characters, [endif] is only encountered for downlevel-revealed
// see https://msdn.microsoft.com/en-us/library/ms537512(v=vs.85).aspx#syntax
if bytes.HasPrefix(t.Data, []byte("<!--[if ")) && bytes.HasSuffix(t.Data, []byte("<![endif]-->")) { // downlevel-hidden
begin := bytes.IndexByte(t.Data, '>') + 1
end := len(t.Data) - len("<![endif]-->")
w.Write(t.Data[:begin])
if err := o.Minify(m, w, buffer.NewReader(t.Data[begin:end]), nil); err != nil {
return minify.UpdateErrorPosition(err, z, t.Offset)
}
w.Write(t.Data[end:])
} else {
w.Write(t.Data) // downlevel-revealed or short downlevel-hidden
}
} else if 1 < len(t.Text) && t.Text[0] == '#' {
// SSI tags
w.Write(t.Data)
}
case html.SvgToken:
if err := m.MinifyMimetype(svgMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil {
if err != minify.ErrNotExist {
return minify.UpdateErrorPosition(err, z, t.Offset)
}
w.Write(t.Data)
}
case html.MathToken:
if err := m.MinifyMimetype(mathMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil {
if err != minify.ErrNotExist {
return minify.UpdateErrorPosition(err, z, t.Offset)
}
w.Write(t.Data)
}
case html.TextToken:
// CSS and JS minifiers for inline code
if rawTagHash != 0 {
if rawTagHash == Style || rawTagHash == Script || rawTagHash == Iframe {
var mimetype []byte
var params map[string]string
if rawTagHash == Iframe {
mimetype = htmlMimeBytes
} else if len(rawTagMediatype) > 0 {
mimetype, params = parse.Mediatype(rawTagMediatype)
} else if rawTagHash == Script {
mimetype = jsMimeBytes
} else if rawTagHash == Style {
mimetype = cssMimeBytes
}
if err := m.MinifyMimetype(mimetype, w, buffer.NewReader(t.Data), params); err != nil {
if err != minify.ErrNotExist {
return minify.UpdateErrorPosition(err, z, t.Offset)
}
w.Write(t.Data)
}
} else {
w.Write(t.Data)
}
} else if inPre {
w.Write(t.Data)
} else {
t.Data = parse.ReplaceMultipleWhitespaceAndEntities(t.Data, EntitiesMap, TextRevEntitiesMap)
// whitespace removal; trim left
if omitSpace && parse.IsWhitespace(t.Data[0]) {
t.Data = t.Data[1:]
}
// whitespace removal; trim right
omitSpace = false
if len(t.Data) == 0 {
omitSpace = true
} else if parse.IsWhitespace(t.Data[len(t.Data)-1]) {
omitSpace = true
i := 0
for {
next := tb.Peek(i)
// trim if EOF, text token with leading whitespace or block token
if next.TokenType == html.ErrorToken {
t.Data = t.Data[:len(t.Data)-1]
omitSpace = false
break
} else if next.TokenType == html.TextToken {
// this only happens when a comment, doctype or phrasing end tag (only for !o.KeepWhitespace) was in between
// remove if the text token starts with a whitespace
if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) {
t.Data = t.Data[:len(t.Data)-1]
omitSpace = false
}
break
} else if next.TokenType == html.StartTagToken || next.TokenType == html.EndTagToken {
if o.KeepWhitespace {
break
}
// remove when followed up by a block tag
if next.Traits&nonPhrasingTag != 0 {
t.Data = t.Data[:len(t.Data)-1]
omitSpace = false
break
} else if next.TokenType == html.StartTagToken {
break
}
}
i++
}
}
w.Write(t.Data)
}
case html.StartTagToken, html.EndTagToken:
rawTagHash = 0
hasAttributes := false
if t.TokenType == html.StartTagToken {
if next := tb.Peek(0); next.TokenType == html.AttributeToken {
hasAttributes = true
}
if t.Traits&rawTag != 0 {
// ignore empty script and style tags
if !hasAttributes && (t.Hash == Script || t.Hash == Style) {
if next := tb.Peek(1); next.TokenType == html.EndTagToken {
tb.Shift()
tb.Shift()
break
}
}
rawTagHash = t.Hash
rawTagMediatype = nil
// do not minify content of <style amp-boilerplate>
if hasAttributes && t.Hash == Style {
if attrs := tb.Attributes(Amp_Boilerplate); attrs[0] != nil {
rawTagHash = 0
}
}
}
} else if t.Hash == Template {
omitSpace = true // EndTagToken
}
if t.Hash == Pre {
inPre = t.TokenType == html.StartTagToken
}
// remove superfluous tags, except for html, head and body tags when KeepDocumentTags is set
if !hasAttributes && (!o.KeepDocumentTags && (t.Hash == Html || t.Hash == Head || t.Hash == Body) || t.Hash == Colgroup) {
break
} else if t.TokenType == html.EndTagToken {
omitEndTag := false
if !o.KeepEndTags {
if t.Hash == Thead || t.Hash == Tbody || t.Hash == Tfoot || t.Hash == Tr || t.Hash == Th ||
t.Hash == Td || t.Hash == Option || t.Hash == Dd || t.Hash == Dt || t.Hash == Li ||
t.Hash == Rb || t.Hash == Rt || t.Hash == Rtc || t.Hash == Rp {
omitEndTag = true // omit end tags
} else if t.Hash == P {
i := 0
for {
next := tb.Peek(i)
i++
// continue if text token is empty or whitespace
if next.TokenType == html.TextToken && parse.IsAllWhitespace(next.Data) {
continue
}
if next.TokenType == html.ErrorToken || next.TokenType == html.EndTagToken && next.Traits&keepPTag == 0 || next.TokenType == html.StartTagToken && next.Traits&omitPTag != 0 {
omitEndTag = true // omit p end tag
}
break
}
} else if t.Hash == Optgroup {
i := 0
for {
next := tb.Peek(i)
i++
// continue if text token
if next.TokenType == html.TextToken {
continue
}
if next.TokenType == html.ErrorToken || next.Hash != Option {
omitEndTag = true // omit optgroup end tag
}
break
}
}
}
if t.Traits&nonPhrasingTag != 0 {
omitSpace = true // omit spaces after block elements
} else if o.KeepWhitespace || t.Traits&objectTag != 0 {
omitSpace = false
}
if !omitEndTag {
if len(t.Data) > 3+len(t.Text) {
t.Data[2+len(t.Text)] = '>'
t.Data = t.Data[:3+len(t.Text)]
}
w.Write(t.Data)
}
// skip text in select and optgroup tags
if t.Hash == Option || t.Hash == Optgroup {
if next := tb.Peek(0); next.TokenType == html.TextToken {
tb.Shift()
}
}
break
}
if o.KeepWhitespace || t.Traits&objectTag != 0 {
omitSpace = false
} else if t.Traits&nonPhrasingTag != 0 {
omitSpace = true // omit spaces after block elements
}
w.Write(t.Data)
if hasAttributes {
if t.Hash == Meta {
attrs := tb.Attributes(Content, Http_Equiv, Charset, Name)
if content := attrs[0]; content != nil {
if httpEquiv := attrs[1]; httpEquiv != nil {
httpEquiv.AttrVal = parse.TrimWhitespace(httpEquiv.AttrVal)
if charset := attrs[2]; charset == nil && parse.EqualFold(httpEquiv.AttrVal, []byte("content-type")) {
content.AttrVal = minify.Mediatype(content.AttrVal)
if bytes.Equal(content.AttrVal, []byte("text/html;charset=utf-8")) {
httpEquiv.Text = nil
content.Text = []byte("charset")
content.Hash = Charset
content.AttrVal = []byte("utf-8")
}
}
}
if name := attrs[3]; name != nil {
name.AttrVal = parse.TrimWhitespace(name.AttrVal)
if parse.EqualFold(name.AttrVal, []byte("keywords")) {
content.AttrVal = bytes.ReplaceAll(content.AttrVal, []byte(", "), []byte(","))
} else if parse.EqualFold(name.AttrVal, []byte("viewport")) {
content.AttrVal = bytes.ReplaceAll(content.AttrVal, []byte(" "), []byte(""))
for i := 0; i < len(content.AttrVal); i++ {
if content.AttrVal[i] == '=' && i+2 < len(content.AttrVal) {
i++
if n := parse.Number(content.AttrVal[i:]); n > 0 {
minNum := minify.Number(content.AttrVal[i:i+n], -1)
if len(minNum) < n {
copy(content.AttrVal[i:i+len(minNum)], minNum)
copy(content.AttrVal[i+len(minNum):], content.AttrVal[i+n:])
content.AttrVal = content.AttrVal[:len(content.AttrVal)+len(minNum)-n]
}
i += len(minNum)
}
i-- // mitigate for-loop increase
}
}
}
}
}
} else if t.Hash == Script {
attrs := tb.Attributes(Src, Charset)
if attrs[0] != nil && attrs[1] != nil {
attrs[1].Text = nil
}
} else if t.Hash == Input {
attrs := tb.Attributes(Type, Value)
if t, value := attrs[0], attrs[1]; t != nil && value != nil {
isRadio := parse.EqualFold(t.AttrVal, radioBytes)
if !isRadio && len(value.AttrVal) == 0 {
value.Text = nil
} else if isRadio && parse.EqualFold(value.AttrVal, onBytes) {
value.Text = nil
}
}
} else if t.Hash == A {
attrs := tb.Attributes(Id, Name)
if id, name := attrs[0], attrs[1]; id != nil && name != nil {
if bytes.Equal(id.AttrVal, name.AttrVal) {
name.Text = nil
}
}
}
// write attributes
for {
attr := *tb.Shift()
if attr.TokenType != html.AttributeToken {
break
} else if attr.Text == nil {
continue // removed attribute
}
val := attr.AttrVal
if attr.Traits&trimAttr != 0 {
val = parse.ReplaceMultipleWhitespaceAndEntities(val, EntitiesMap, nil)
val = parse.TrimWhitespace(val)
} else {
val = parse.ReplaceEntities(val, EntitiesMap, nil)
}
if t.Traits != 0 {
if len(val) == 0 && (attr.Hash == Class ||
attr.Hash == Dir ||
attr.Hash == Id ||
attr.Hash == Name ||
attr.Hash == Action && t.Hash == Form) {
continue // omit empty attribute values
}
if attr.Traits&caselessAttr != 0 {
val = parse.ToLower(val)
if attr.Hash == Enctype || attr.Hash == Codetype || attr.Hash == Accept || attr.Hash == Type && (t.Hash == A || t.Hash == Link || t.Hash == Embed || t.Hash == Object || t.Hash == Source || t.Hash == Script || t.Hash == Style) {
val = minify.Mediatype(val)
}
}
if rawTagHash != 0 && attr.Hash == Type {
rawTagMediatype = parse.Copy(val)
}
// default attribute values can be omitted
if !o.KeepDefaultAttrVals && (attr.Hash == Type && (t.Hash == Script && jsMimetypes[string(val)] ||
t.Hash == Style && bytes.Equal(val, cssMimeBytes) ||
t.Hash == Link && bytes.Equal(val, cssMimeBytes) ||
t.Hash == Input && bytes.Equal(val, textBytes) ||
t.Hash == Button && bytes.Equal(val, submitBytes)) ||
attr.Hash == Language && t.Hash == Script ||
attr.Hash == Method && bytes.Equal(val, getBytes) ||
attr.Hash == Enctype && bytes.Equal(val, formMimeBytes) ||
attr.Hash == Colspan && bytes.Equal(val, oneBytes) ||
attr.Hash == Rowspan && bytes.Equal(val, oneBytes) ||
attr.Hash == Shape && bytes.Equal(val, rectBytes) ||
attr.Hash == Span && bytes.Equal(val, oneBytes) ||
attr.Hash == Clear && bytes.Equal(val, noneBytes) ||
attr.Hash == Frameborder && bytes.Equal(val, oneBytes) ||
attr.Hash == Scrolling && bytes.Equal(val, autoBytes) ||
attr.Hash == Valuetype && bytes.Equal(val, dataBytes) ||
attr.Hash == Media && t.Hash == Style && bytes.Equal(val, allBytes)) {
continue
}
if attr.Hash == Style {
// CSS minifier for attribute inline code
val = parse.TrimWhitespace(val)
attrMinifyBuffer.Reset()
if err := m.MinifyMimetype(cssMimeBytes, attrMinifyBuffer, buffer.NewReader(val), inlineParams); err == nil {
val = attrMinifyBuffer.Bytes()
} else if err != minify.ErrNotExist {
return minify.UpdateErrorPosition(err, z, attr.Offset)
}
if len(val) == 0 {
continue
}
} else if len(attr.Text) > 2 && attr.Text[0] == 'o' && attr.Text[1] == 'n' {
// JS minifier for attribute inline code
val = parse.TrimWhitespace(val)
if len(val) >= 11 && parse.EqualFold(val[:11], jsSchemeBytes) {
val = val[11:]
}
attrMinifyBuffer.Reset()
if err := m.MinifyMimetype(jsMimeBytes, attrMinifyBuffer, buffer.NewReader(val), nil); err == nil {
val = attrMinifyBuffer.Bytes()
} else if err != minify.ErrNotExist {
return minify.UpdateErrorPosition(err, z, attr.Offset)
}
if len(val) == 0 {
continue
}
} else if attr.Traits&urlAttr != 0 { // anchors are already handled
val = parse.TrimWhitespace(val)
if 5 < len(val) {
if parse.EqualFold(val[:4], httpBytes) {
if val[4] == ':' {
if m.URL != nil && m.URL.Scheme == "http" {
val = val[5:]
} else {
parse.ToLower(val[:4])
}
} else if (val[4] == 's' || val[4] == 'S') && val[5] == ':' {
if m.URL != nil && m.URL.Scheme == "https" {
val = val[6:]
} else {
parse.ToLower(val[:5])
}
}
} else if parse.EqualFold(val[:5], dataSchemeBytes) {
val = minify.DataURI(m, val)
}
}
}
}
w.Write(spaceBytes)
w.Write(attr.Text)
if len(val) > 0 && attr.Traits&booleanAttr == 0 {
w.Write(isBytes)
// use double quotes for RDFa attributes
isXML := attr.Hash == Vocab || attr.Hash == Typeof || attr.Hash == Property || attr.Hash == Resource || attr.Hash == Prefix || attr.Hash == Content || attr.Hash == About || attr.Hash == Rev || attr.Hash == Datatype || attr.Hash == Inlist
// no quotes if possible, else prefer single or double depending on which occurs more often in value
val = html.EscapeAttrVal(&attrByteBuffer, attr.AttrVal, val, o.KeepQuotes || isXML)
w.Write(val)
}
}
} else {
_ = tb.Shift() // StartTagClose
}
w.Write(gtBytes)
// skip text in select and optgroup tags
if t.Hash == Select || t.Hash == Optgroup {
if next := tb.Peek(0); next.TokenType == html.TextToken {
tb.Shift()
}
}
// keep space after phrasing tags (<i>, <span>, ...) FontAwesome etc.
if t.TokenType == html.StartTagToken && t.Traits&nonPhrasingTag == 0 {
if next := tb.Peek(0); next.Hash == t.Hash && next.TokenType == html.EndTagToken {
omitSpace = false
}
}
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,371 +0,0 @@
// Package minify relates MIME type to minifiers. Several minifiers are provided in the subpackages.
package minify
import (
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"mime"
"net/http"
"net/url"
"os"
"os/exec"
"path"
"regexp"
"strings"
"sync"
"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/buffer"
)
// Warning is used to report usage warnings such as using a deprecated feature
var Warning = log.New(os.Stderr, "WARNING: ", 0)
// ErrNotExist is returned when no minifier exists for a given mimetype.
var ErrNotExist = errors.New("minifier does not exist for mimetype")
// ErrClosedWriter is returned when writing to a closed writer.
var ErrClosedWriter = errors.New("write on closed writer")
////////////////////////////////////////////////////////////////
// MinifierFunc is a function that implements Minifer.
type MinifierFunc func(*M, io.Writer, io.Reader, map[string]string) error
// Minify calls f(m, w, r, params)
func (f MinifierFunc) Minify(m *M, w io.Writer, r io.Reader, params map[string]string) error {
return f(m, w, r, params)
}
// Minifier is the interface for minifiers.
// The *M parameter is used for minifying embedded resources, such as JS within HTML.
type Minifier interface {
Minify(*M, io.Writer, io.Reader, map[string]string) error
}
////////////////////////////////////////////////////////////////
type patternMinifier struct {
pattern *regexp.Regexp
Minifier
}
type cmdMinifier struct {
cmd *exec.Cmd
}
var cmdArgExtension = regexp.MustCompile(`^\.[0-9a-zA-Z]+`)
func (c *cmdMinifier) Minify(_ *M, w io.Writer, r io.Reader, _ map[string]string) error {
cmd := &exec.Cmd{}
*cmd = *c.cmd // concurrency safety
var in, out *os.File
for i, arg := range cmd.Args {
if j := strings.Index(arg, "$in"); j != -1 {
var err error
ext := cmdArgExtension.FindString(arg[j+3:])
if in, err = ioutil.TempFile("", "minify-in-*"+ext); err != nil {
return err
}
cmd.Args[i] = arg[:j] + in.Name() + arg[j+3+len(ext):]
} else if j := strings.Index(arg, "$out"); j != -1 {
var err error
ext := cmdArgExtension.FindString(arg[j+4:])
if out, err = ioutil.TempFile("", "minify-out-*"+ext); err != nil {
return err
}
cmd.Args[i] = arg[:j] + out.Name() + arg[j+4+len(ext):]
}
}
if in == nil {
cmd.Stdin = r
} else if _, err := io.Copy(in, r); err != nil {
return err
}
if out == nil {
cmd.Stdout = w
} else {
defer io.Copy(w, out)
}
stderr := &bytes.Buffer{}
cmd.Stderr = stderr
err := cmd.Run()
if _, ok := err.(*exec.ExitError); ok {
if stderr.Len() != 0 {
err = fmt.Errorf("%s", stderr.String())
}
err = fmt.Errorf("command %s failed: %w", cmd.Path, err)
}
return err
}
////////////////////////////////////////////////////////////////
// M holds a map of mimetype => function to allow recursive minifier calls of the minifier functions.
type M struct {
mutex sync.RWMutex
literal map[string]Minifier
pattern []patternMinifier
URL *url.URL
}
// New returns a new M.
func New() *M {
return &M{
sync.RWMutex{},
map[string]Minifier{},
[]patternMinifier{},
nil,
}
}
// Add adds a minifier to the mimetype => function map (unsafe for concurrent use).
func (m *M) Add(mimetype string, minifier Minifier) {
m.mutex.Lock()
m.literal[mimetype] = minifier
m.mutex.Unlock()
}
// AddFunc adds a minify function to the mimetype => function map (unsafe for concurrent use).
func (m *M) AddFunc(mimetype string, minifier MinifierFunc) {
m.mutex.Lock()
m.literal[mimetype] = minifier
m.mutex.Unlock()
}
// AddRegexp adds a minifier to the mimetype => function map (unsafe for concurrent use).
func (m *M) AddRegexp(pattern *regexp.Regexp, minifier Minifier) {
m.mutex.Lock()
m.pattern = append(m.pattern, patternMinifier{pattern, minifier})
m.mutex.Unlock()
}
// AddFuncRegexp adds a minify function to the mimetype => function map (unsafe for concurrent use).
func (m *M) AddFuncRegexp(pattern *regexp.Regexp, minifier MinifierFunc) {
m.mutex.Lock()
m.pattern = append(m.pattern, patternMinifier{pattern, minifier})
m.mutex.Unlock()
}
// AddCmd adds a minify function to the mimetype => function map (unsafe for concurrent use) that executes a command to process the minification.
// It allows the use of external tools like ClosureCompiler, UglifyCSS, etc. for a specific mimetype.
func (m *M) AddCmd(mimetype string, cmd *exec.Cmd) {
m.mutex.Lock()
m.literal[mimetype] = &cmdMinifier{cmd}
m.mutex.Unlock()
}
// AddCmdRegexp adds a minify function to the mimetype => function map (unsafe for concurrent use) that executes a command to process the minification.
// It allows the use of external tools like ClosureCompiler, UglifyCSS, etc. for a specific mimetype regular expression.
func (m *M) AddCmdRegexp(pattern *regexp.Regexp, cmd *exec.Cmd) {
m.mutex.Lock()
m.pattern = append(m.pattern, patternMinifier{pattern, &cmdMinifier{cmd}})
m.mutex.Unlock()
}
// Match returns the pattern and minifier that gets matched with the mediatype.
// It returns nil when no matching minifier exists.
// It has the same matching algorithm as Minify.
func (m *M) Match(mediatype string) (string, map[string]string, MinifierFunc) {
m.mutex.RLock()
defer m.mutex.RUnlock()
mimetype, params := parse.Mediatype([]byte(mediatype))
if minifier, ok := m.literal[string(mimetype)]; ok { // string conversion is optimized away
return string(mimetype), params, minifier.Minify
}
for _, minifier := range m.pattern {
if minifier.pattern.Match(mimetype) {
return minifier.pattern.String(), params, minifier.Minify
}
}
return string(mimetype), params, nil
}
// Minify minifies the content of a Reader and writes it to a Writer (safe for concurrent use).
// An error is returned when no such mimetype exists (ErrNotExist) or when an error occurred in the minifier function.
// Mediatype may take the form of 'text/plain', 'text/*', '*/*' or 'text/plain; charset=UTF-8; version=2.0'.
func (m *M) Minify(mediatype string, w io.Writer, r io.Reader) error {
mimetype, params := parse.Mediatype([]byte(mediatype))
return m.MinifyMimetype(mimetype, w, r, params)
}
// MinifyMimetype minifies the content of a Reader and writes it to a Writer (safe for concurrent use).
// It is a lower level version of Minify and requires the mediatype to be split up into mimetype and parameters.
// It is mostly used internally by minifiers because it is faster (no need to convert a byte-slice to string and vice versa).
func (m *M) MinifyMimetype(mimetype []byte, w io.Writer, r io.Reader, params map[string]string) error {
m.mutex.RLock()
defer m.mutex.RUnlock()
if minifier, ok := m.literal[string(mimetype)]; ok { // string conversion is optimized away
return minifier.Minify(m, w, r, params)
}
for _, minifier := range m.pattern {
if minifier.pattern.Match(mimetype) {
return minifier.Minify(m, w, r, params)
}
}
return ErrNotExist
}
// Bytes minifies an array of bytes (safe for concurrent use). When an error occurs it return the original array and the error.
// It returns an error when no such mimetype exists (ErrNotExist) or any error occurred in the minifier function.
func (m *M) Bytes(mediatype string, v []byte) ([]byte, error) {
out := buffer.NewWriter(make([]byte, 0, len(v)))
if err := m.Minify(mediatype, out, buffer.NewReader(v)); err != nil {
return v, err
}
return out.Bytes(), nil
}
// String minifies a string (safe for concurrent use). When an error occurs it return the original string and the error.
// It returns an error when no such mimetype exists (ErrNotExist) or any error occurred in the minifier function.
func (m *M) String(mediatype string, v string) (string, error) {
out := buffer.NewWriter(make([]byte, 0, len(v)))
if err := m.Minify(mediatype, out, buffer.NewReader([]byte(v))); err != nil {
return v, err
}
return string(out.Bytes()), nil
}
// Reader wraps a Reader interface and minifies the stream.
// Errors from the minifier are returned by the reader.
func (m *M) Reader(mediatype string, r io.Reader) io.Reader {
pr, pw := io.Pipe()
go func() {
if err := m.Minify(mediatype, pw, r); err != nil {
pw.CloseWithError(err)
} else {
pw.Close()
}
}()
return pr
}
// writer makes sure that errors from the minifier are passed down through Close (can be blocking).
type writer struct {
pw *io.PipeWriter
wg sync.WaitGroup
err error
closed bool
}
// Write intercepts any writes to the writer.
func (w *writer) Write(b []byte) (int, error) {
if w.closed {
return 0, ErrClosedWriter
}
n, err := w.pw.Write(b)
if w.err != nil {
err = w.err
}
return n, err
}
// Close must be called when writing has finished. It returns the error from the minifier.
func (w *writer) Close() error {
if !w.closed {
w.pw.Close()
w.wg.Wait()
w.closed = true
}
return w.err
}
// Writer wraps a Writer interface and minifies the stream.
// Errors from the minifier are returned by Close on the writer.
// The writer must be closed explicitly.
func (m *M) Writer(mediatype string, w io.Writer) *writer {
pr, pw := io.Pipe()
mw := &writer{pw, sync.WaitGroup{}, nil, false}
mw.wg.Add(1)
go func() {
defer mw.wg.Done()
if err := m.Minify(mediatype, w, pr); err != nil {
mw.err = err
}
pr.Close()
}()
return mw
}
// responseWriter wraps an http.ResponseWriter and makes sure that errors from the minifier are passed down through Close (can be blocking).
// All writes to the response writer are intercepted and minified on the fly.
// http.ResponseWriter loses all functionality such as Pusher, Hijacker, Flusher, ...
type responseWriter struct {
http.ResponseWriter
writer *writer
m *M
mediatype string
}
// WriteHeader intercepts any header writes and removes the Content-Length header.
func (w *responseWriter) WriteHeader(status int) {
w.ResponseWriter.Header().Del("Content-Length")
w.ResponseWriter.WriteHeader(status)
}
// Write intercepts any writes to the response writer.
// The first write will extract the Content-Type as the mediatype. Otherwise it falls back to the RequestURI extension.
func (w *responseWriter) Write(b []byte) (int, error) {
if w.writer == nil {
// first write
if mediatype := w.ResponseWriter.Header().Get("Content-Type"); mediatype != "" {
w.mediatype = mediatype
}
w.writer = w.m.Writer(w.mediatype, w.ResponseWriter)
}
return w.writer.Write(b)
}
// Close must be called when writing has finished. It returns the error from the minifier.
func (w *responseWriter) Close() error {
if w.writer != nil {
return w.writer.Close()
}
return nil
}
// ResponseWriter minifies any writes to the http.ResponseWriter.
// http.ResponseWriter loses all functionality such as Pusher, Hijacker, Flusher, ...
// Minification might be slower than just sending the original file! Caching is advised.
func (m *M) ResponseWriter(w http.ResponseWriter, r *http.Request) *responseWriter {
mediatype := mime.TypeByExtension(path.Ext(r.RequestURI))
return &responseWriter{w, nil, m, mediatype}
}
// Middleware provides a middleware function that minifies content on the fly by intercepting writes to http.ResponseWriter.
// http.ResponseWriter loses all functionality such as Pusher, Hijacker, Flusher, ...
// Minification might be slower than just sending the original file! Caching is advised.
func (m *M) Middleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
mw := m.ResponseWriter(w, r)
next.ServeHTTP(mw, r)
mw.Close()
})
}
// MiddlewareWithError provides a middleware function that minifies content on the fly by intercepting writes to http.ResponseWriter. The error function allows handling minification errors.
// http.ResponseWriter loses all functionality such as Pusher, Hijacker, Flusher, ...
// Minification might be slower than just sending the original file! Caching is advised.
func (m *M) MiddlewareWithError(next http.Handler, errorFunc func(w http.ResponseWriter, r *http.Request, err error)) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
mw := m.ResponseWriter(w, r)
next.ServeHTTP(mw, r)
if err := mw.Close(); err != nil {
errorFunc(w, r, err)
return
}
})
}

View file

@ -1 +0,0 @@
tests/*/corpus/* linguist-generated

View file

@ -1,5 +0,0 @@
tests/*/fuzz-fuzz.zip
tests/*/crashers
tests/*/suppressions
tests/*/corpus/*
!tests/*/corpus/*.*

View file

@ -1,16 +0,0 @@
linters:
enable:
- depguard
- dogsled
- gofmt
- goimports
- golint
- gosec
- govet
- megacheck
- misspell
- nakedret
- prealloc
- unconvert
- unparam
- wastedassign

View file

@ -1,22 +0,0 @@
Copyright (c) 2015 Taco de Wolff
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,64 +0,0 @@
# Parse [![API reference](https://img.shields.io/badge/godoc-reference-5272B4)](https://pkg.go.dev/github.com/tdewolff/parse/v2?tab=doc) [![Go Report Card](https://goreportcard.com/badge/github.com/tdewolff/parse)](https://goreportcard.com/report/github.com/tdewolff/parse) [![Coverage Status](https://coveralls.io/repos/github/tdewolff/parse/badge.svg?branch=master)](https://coveralls.io/github/tdewolff/parse?branch=master) [![Donate](https://img.shields.io/badge/patreon-donate-DFB317)](https://www.patreon.com/tdewolff)
This package contains several lexers and parsers written in [Go][1]. All subpackages are built to be streaming, high performance and to be in accordance with the official (latest) specifications.
The lexers are implemented using `buffer.Lexer` in https://github.com/tdewolff/parse/buffer and the parsers work on top of the lexers. Some subpackages have hashes defined (using [Hasher](https://github.com/tdewolff/hasher)) that speed up common byte-slice comparisons.
## Buffer
### Reader
Reader is a wrapper around a `[]byte` that implements the `io.Reader` interface. It is comparable to `bytes.Reader` but has slightly different semantics (and a slightly smaller memory footprint).
### Writer
Writer is a buffer that implements the `io.Writer` interface and expands the buffer as needed. The reset functionality allows for better memory reuse. After calling `Reset`, it will overwrite the current buffer and thus reduce allocations.
### Lexer
Lexer is a read buffer specifically designed for building lexers. It keeps track of two positions: a start and end position. The start position is the beginning of the current token being parsed, the end position is being moved forward until a valid token is found. Calling `Shift` will collapse the positions to the end and return the parsed `[]byte`.
Moving the end position can go through `Move(int)` which also accepts negative integers. One can also use `Pos() int` to try and parse a token, and if it fails rewind with `Rewind(int)`, passing the previously saved position.
`Peek(int) byte` will peek forward (relative to the end position) and return the byte at that location. `PeekRune(int) (rune, int)` returns UTF-8 runes and its length at the given **byte** position. Upon an error `Peek` will return `0`, the **user must peek at every character** and not skip any, otherwise it may skip a `0` and panic on out-of-bounds indexing.
`Lexeme() []byte` will return the currently selected bytes, `Skip()` will collapse the selection. `Shift() []byte` is a combination of `Lexeme() []byte` and `Skip()`.
When the passed `io.Reader` returned an error, `Err() error` will return that error even if not at the end of the buffer.
### StreamLexer
StreamLexer behaves like Lexer but uses a buffer pool to read in chunks from `io.Reader`, retaining old buffers in memory that are still in use, and re-using old buffers otherwise. Calling `Free(n int)` frees up `n` bytes from the internal buffer(s). It holds an array of buffers to accommodate for keeping everything in-memory. Calling `ShiftLen() int` returns the number of bytes that have been shifted since the previous call to `ShiftLen`, which can be used to specify how many bytes need to be freed up from the buffer. If you don't need to keep returned byte slices around, call `Free(ShiftLen())` after every `Shift` call.
## Strconv
This package contains string conversion function much like the standard library's `strconv` package, but it is specifically tailored for the performance needs within the `minify` package.
For example, the floating-point to string conversion function is approximately twice as fast as the standard library, but it is not as precise.
## CSS
This package is a CSS3 lexer and parser. Both follow the specification at [CSS Syntax Module Level 3](http://www.w3.org/TR/css-syntax-3/). The lexer takes an io.Reader and converts it into tokens until the EOF. The parser returns a parse tree of the full io.Reader input stream, but the low-level `Next` function can be used for stream parsing to returns grammar units until the EOF.
[See README here](https://github.com/tdewolff/parse/tree/master/css).
## HTML
This package is an HTML5 lexer. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF.
[See README here](https://github.com/tdewolff/parse/tree/master/html).
## JS
This package is a JS lexer (ECMA-262, edition 6.0). It follows the specification at [ECMAScript Language Specification](http://www.ecma-international.org/ecma-262/6.0/). The lexer takes an io.Reader and converts it into tokens until the EOF.
[See README here](https://github.com/tdewolff/parse/tree/master/js).
## JSON
This package is a JSON parser (ECMA-404). It follows the specification at [JSON](http://json.org/). The parser takes an io.Reader and converts it into tokens until the EOF.
[See README here](https://github.com/tdewolff/parse/tree/master/json).
## SVG
This package contains common hashes for SVG1.1 tags and attributes.
## XML
This package is an XML1.0 lexer. It follows the specification at [Extensible Markup Language (XML) 1.0 (Fifth Edition)](http://www.w3.org/TR/xml/). The lexer takes an io.Reader and converts it into tokens until the EOF.
[See README here](https://github.com/tdewolff/parse/tree/master/xml).
## License
Released under the [MIT license](LICENSE.md).
[1]: http://golang.org/ "Go Language"

View file

@ -1,12 +0,0 @@
// Package buffer contains buffer and wrapper types for byte slices. It is useful for writing lexers or other high-performance byte slice handling.
// The `Reader` and `Writer` types implement the `io.Reader` and `io.Writer` respectively and provide a thinner and faster interface than `bytes.Buffer`.
// The `Lexer` type is useful for building lexers because it keeps track of the start and end position of a byte selection, and shifts the bytes whenever a valid token is found.
// The `StreamLexer` does the same, but keeps a buffer pool so that it reads a limited amount at a time, allowing to parse from streaming sources.
package buffer
// defaultBufSize specifies the default initial length of internal buffers.
var defaultBufSize = 4096
// MinBuf specifies the default initial length of internal buffers.
// Solely here to support old versions of parse.
var MinBuf = defaultBufSize

View file

@ -1,164 +0,0 @@
package buffer
import (
"io"
"io/ioutil"
)
var nullBuffer = []byte{0}
// Lexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader.
// It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
type Lexer struct {
buf []byte
pos int // index in buf
start int // index in buf
err error
restore func()
}
// NewLexer returns a new Lexer for a given io.Reader, and uses ioutil.ReadAll to read it into a byte slice.
// If the io.Reader implements Bytes, that is used instead.
// It will append a NULL at the end of the buffer.
func NewLexer(r io.Reader) *Lexer {
var b []byte
if r != nil {
if buffer, ok := r.(interface {
Bytes() []byte
}); ok {
b = buffer.Bytes()
} else {
var err error
b, err = ioutil.ReadAll(r)
if err != nil {
return &Lexer{
buf: nullBuffer,
err: err,
}
}
}
}
return NewLexerBytes(b)
}
// NewLexerBytes returns a new Lexer for a given byte slice, and appends NULL at the end.
// To avoid reallocation, make sure the capacity has room for one more byte.
func NewLexerBytes(b []byte) *Lexer {
z := &Lexer{
buf: b,
}
n := len(b)
if n == 0 {
z.buf = nullBuffer
} else {
// Append NULL to buffer, but try to avoid reallocation
if cap(b) > n {
// Overwrite next byte but restore when done
b = b[:n+1]
c := b[n]
b[n] = 0
z.buf = b
z.restore = func() {
b[n] = c
}
} else {
z.buf = append(b, 0)
}
}
return z
}
// Restore restores the replaced byte past the end of the buffer by NULL.
func (z *Lexer) Restore() {
if z.restore != nil {
z.restore()
z.restore = nil
}
}
// Err returns the error returned from io.Reader or io.EOF when the end has been reached.
func (z *Lexer) Err() error {
return z.PeekErr(0)
}
// PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err().
func (z *Lexer) PeekErr(pos int) error {
if z.err != nil {
return z.err
} else if z.pos+pos >= len(z.buf)-1 {
return io.EOF
}
return nil
}
// Peek returns the ith byte relative to the end position.
// Peek returns 0 when an error has occurred, Err returns the error.
func (z *Lexer) Peek(pos int) byte {
pos += z.pos
return z.buf[pos]
}
// PeekRune returns the rune and rune length of the ith byte relative to the end position.
func (z *Lexer) PeekRune(pos int) (rune, int) {
// from unicode/utf8
c := z.Peek(pos)
if c < 0xC0 || z.Peek(pos+1) == 0 {
return rune(c), 1
} else if c < 0xE0 || z.Peek(pos+2) == 0 {
return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
} else if c < 0xF0 || z.Peek(pos+3) == 0 {
return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
}
return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
}
// Move advances the position.
func (z *Lexer) Move(n int) {
z.pos += n
}
// Pos returns a mark to which can be rewinded.
func (z *Lexer) Pos() int {
return z.pos - z.start
}
// Rewind rewinds the position to the given position.
func (z *Lexer) Rewind(pos int) {
z.pos = z.start + pos
}
// Lexeme returns the bytes of the current selection.
func (z *Lexer) Lexeme() []byte {
return z.buf[z.start:z.pos:z.pos]
}
// Skip collapses the position to the end of the selection.
func (z *Lexer) Skip() {
z.start = z.pos
}
// Shift returns the bytes of the current selection and collapses the position to the end of the selection.
func (z *Lexer) Shift() []byte {
b := z.buf[z.start:z.pos:z.pos]
z.start = z.pos
return b
}
// Offset returns the character position in the buffer.
func (z *Lexer) Offset() int {
return z.pos
}
// Bytes returns the underlying buffer.
func (z *Lexer) Bytes() []byte {
return z.buf[: len(z.buf)-1 : len(z.buf)-1]
}
// Reset resets position to the underlying buffer.
func (z *Lexer) Reset() {
z.start = 0
z.pos = 0
}

View file

@ -1,44 +0,0 @@
package buffer
import "io"
// Reader implements an io.Reader over a byte slice.
type Reader struct {
buf []byte
pos int
}
// NewReader returns a new Reader for a given byte slice.
func NewReader(buf []byte) *Reader {
return &Reader{
buf: buf,
}
}
// Read reads bytes into the given byte slice and returns the number of bytes read and an error if occurred.
func (r *Reader) Read(b []byte) (n int, err error) {
if len(b) == 0 {
return 0, nil
}
if r.pos >= len(r.buf) {
return 0, io.EOF
}
n = copy(b, r.buf[r.pos:])
r.pos += n
return
}
// Bytes returns the underlying byte slice.
func (r *Reader) Bytes() []byte {
return r.buf
}
// Reset resets the position of the read pointer to the beginning of the underlying byte slice.
func (r *Reader) Reset() {
r.pos = 0
}
// Len returns the length of the buffer.
func (r *Reader) Len() int {
return len(r.buf)
}

View file

@ -1,223 +0,0 @@
package buffer
import (
"io"
)
type block struct {
buf []byte
next int // index in pool plus one
active bool
}
type bufferPool struct {
pool []block
head int // index in pool plus one
tail int // index in pool plus one
pos int // byte pos in tail
}
func (z *bufferPool) swap(oldBuf []byte, size int) []byte {
// find new buffer that can be reused
swap := -1
for i := 0; i < len(z.pool); i++ {
if !z.pool[i].active && size <= cap(z.pool[i].buf) {
swap = i
break
}
}
if swap == -1 { // no free buffer found for reuse
if z.tail == 0 && z.pos >= len(oldBuf) && size <= cap(oldBuf) { // but we can reuse the current buffer!
z.pos -= len(oldBuf)
return oldBuf[:0]
}
// allocate new
z.pool = append(z.pool, block{make([]byte, 0, size), 0, true})
swap = len(z.pool) - 1
}
newBuf := z.pool[swap].buf
// put current buffer into pool
z.pool[swap] = block{oldBuf, 0, true}
if z.head != 0 {
z.pool[z.head-1].next = swap + 1
}
z.head = swap + 1
if z.tail == 0 {
z.tail = swap + 1
}
return newBuf[:0]
}
func (z *bufferPool) free(n int) {
z.pos += n
// move the tail over to next buffers
for z.tail != 0 && z.pos >= len(z.pool[z.tail-1].buf) {
z.pos -= len(z.pool[z.tail-1].buf)
newTail := z.pool[z.tail-1].next
z.pool[z.tail-1].active = false // after this, any thread may pick up the inactive buffer, so it can't be used anymore
z.tail = newTail
}
if z.tail == 0 {
z.head = 0
}
}
// StreamLexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader.
// It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
type StreamLexer struct {
r io.Reader
err error
pool bufferPool
buf []byte
start int // index in buf
pos int // index in buf
prevStart int
free int
}
// NewStreamLexer returns a new StreamLexer for a given io.Reader with a 4kB estimated buffer size.
// If the io.Reader implements Bytes, that buffer is used instead.
func NewStreamLexer(r io.Reader) *StreamLexer {
return NewStreamLexerSize(r, defaultBufSize)
}
// NewStreamLexerSize returns a new StreamLexer for a given io.Reader and estimated required buffer size.
// If the io.Reader implements Bytes, that buffer is used instead.
func NewStreamLexerSize(r io.Reader, size int) *StreamLexer {
// if reader has the bytes in memory already, use that instead
if buffer, ok := r.(interface {
Bytes() []byte
}); ok {
return &StreamLexer{
err: io.EOF,
buf: buffer.Bytes(),
}
}
return &StreamLexer{
r: r,
buf: make([]byte, 0, size),
}
}
func (z *StreamLexer) read(pos int) byte {
if z.err != nil {
return 0
}
// free unused bytes
z.pool.free(z.free)
z.free = 0
// get new buffer
c := cap(z.buf)
p := pos - z.start + 1
if 2*p > c { // if the token is larger than half the buffer, increase buffer size
c = 2*c + p
}
d := len(z.buf) - z.start
buf := z.pool.swap(z.buf[:z.start], c)
copy(buf[:d], z.buf[z.start:]) // copy the left-overs (unfinished token) from the old buffer
// read in new data for the rest of the buffer
var n int
for pos-z.start >= d && z.err == nil {
n, z.err = z.r.Read(buf[d:cap(buf)])
d += n
}
pos -= z.start
z.pos -= z.start
z.start, z.buf = 0, buf[:d]
if pos >= d {
return 0
}
return z.buf[pos]
}
// Err returns the error returned from io.Reader. It may still return valid bytes for a while though.
func (z *StreamLexer) Err() error {
if z.err == io.EOF && z.pos < len(z.buf) {
return nil
}
return z.err
}
// Free frees up bytes of length n from previously shifted tokens.
// Each call to Shift should at one point be followed by a call to Free with a length returned by ShiftLen.
func (z *StreamLexer) Free(n int) {
z.free += n
}
// Peek returns the ith byte relative to the end position and possibly does an allocation.
// Peek returns zero when an error has occurred, Err returns the error.
// TODO: inline function
func (z *StreamLexer) Peek(pos int) byte {
pos += z.pos
if uint(pos) < uint(len(z.buf)) { // uint for BCE
return z.buf[pos]
}
return z.read(pos)
}
// PeekRune returns the rune and rune length of the ith byte relative to the end position.
func (z *StreamLexer) PeekRune(pos int) (rune, int) {
// from unicode/utf8
c := z.Peek(pos)
if c < 0xC0 {
return rune(c), 1
} else if c < 0xE0 {
return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
} else if c < 0xF0 {
return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
}
return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
}
// Move advances the position.
func (z *StreamLexer) Move(n int) {
z.pos += n
}
// Pos returns a mark to which can be rewinded.
func (z *StreamLexer) Pos() int {
return z.pos - z.start
}
// Rewind rewinds the position to the given position.
func (z *StreamLexer) Rewind(pos int) {
z.pos = z.start + pos
}
// Lexeme returns the bytes of the current selection.
func (z *StreamLexer) Lexeme() []byte {
return z.buf[z.start:z.pos]
}
// Skip collapses the position to the end of the selection.
func (z *StreamLexer) Skip() {
z.start = z.pos
}
// Shift returns the bytes of the current selection and collapses the position to the end of the selection.
// It also returns the number of bytes we moved since the last call to Shift. This can be used in calls to Free.
func (z *StreamLexer) Shift() []byte {
if z.pos > len(z.buf) { // make sure we peeked at least as much as we shift
z.read(z.pos - 1)
}
b := z.buf[z.start:z.pos]
z.start = z.pos
return b
}
// ShiftLen returns the number of bytes moved since the last call to ShiftLen. This can be used in calls to Free because it takes into account multiple Shifts or Skips.
func (z *StreamLexer) ShiftLen() int {
n := z.start - z.prevStart
z.prevStart = z.start
return n
}

View file

@ -1,65 +0,0 @@
package buffer
import (
"io"
)
// Writer implements an io.Writer over a byte slice.
type Writer struct {
buf []byte
err error
expand bool
}
// NewWriter returns a new Writer for a given byte slice.
func NewWriter(buf []byte) *Writer {
return &Writer{
buf: buf,
expand: true,
}
}
// NewStaticWriter returns a new Writer for a given byte slice. It does not reallocate and expand the byte-slice.
func NewStaticWriter(buf []byte) *Writer {
return &Writer{
buf: buf,
expand: false,
}
}
// Write writes bytes from the given byte slice and returns the number of bytes written and an error if occurred. When err != nil, n == 0.
func (w *Writer) Write(b []byte) (int, error) {
n := len(b)
end := len(w.buf)
if end+n > cap(w.buf) {
if !w.expand {
w.err = io.EOF
return 0, io.EOF
}
buf := make([]byte, end, 2*cap(w.buf)+n)
copy(buf, w.buf)
w.buf = buf
}
w.buf = w.buf[:end+n]
return copy(w.buf[end:], b), nil
}
// Len returns the length of the underlying byte slice.
func (w *Writer) Len() int {
return len(w.buf)
}
// Bytes returns the underlying byte slice.
func (w *Writer) Bytes() []byte {
return w.buf
}
// Reset empties and reuses the current buffer. Subsequent writes will overwrite the buffer, so any reference to the underlying slice is invalidated after this call.
func (w *Writer) Reset() {
w.buf = w.buf[:0]
}
// Close returns the last error.
func (w *Writer) Close() error {
return w.err
}

View file

@ -1,237 +0,0 @@
// Package parse contains a collection of parsers for various formats in its subpackages.
package parse
import (
"bytes"
"encoding/base64"
"errors"
)
var (
dataSchemeBytes = []byte("data:")
base64Bytes = []byte("base64")
textMimeBytes = []byte("text/plain")
)
// ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short.
var ErrBadDataURI = errors.New("not a data URI")
// Number returns the number of bytes that parse as a number of the regex format (+|-)?([0-9]+(\.[0-9]+)?|\.[0-9]+)((e|E)(+|-)?[0-9]+)?.
func Number(b []byte) int {
if len(b) == 0 {
return 0
}
i := 0
if b[i] == '+' || b[i] == '-' {
i++
if i >= len(b) {
return 0
}
}
firstDigit := (b[i] >= '0' && b[i] <= '9')
if firstDigit {
i++
for i < len(b) && b[i] >= '0' && b[i] <= '9' {
i++
}
}
if i < len(b) && b[i] == '.' {
i++
if i < len(b) && b[i] >= '0' && b[i] <= '9' {
i++
for i < len(b) && b[i] >= '0' && b[i] <= '9' {
i++
}
} else if firstDigit {
// . could belong to the next token
i--
return i
} else {
return 0
}
} else if !firstDigit {
return 0
}
iOld := i
if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
i++
if i < len(b) && (b[i] == '+' || b[i] == '-') {
i++
}
if i >= len(b) || b[i] < '0' || b[i] > '9' {
// e could belong to next token
return iOld
}
for i < len(b) && b[i] >= '0' && b[i] <= '9' {
i++
}
}
return i
}
// Dimension parses a byte-slice and returns the length of the number and its unit.
func Dimension(b []byte) (int, int) {
num := Number(b)
if num == 0 || num == len(b) {
return num, 0
} else if b[num] == '%' {
return num, 1
} else if b[num] >= 'a' && b[num] <= 'z' || b[num] >= 'A' && b[num] <= 'Z' {
i := num + 1
for i < len(b) && (b[i] >= 'a' && b[i] <= 'z' || b[i] >= 'A' && b[i] <= 'Z') {
i++
}
return num, i - num
}
return num, 0
}
// Mediatype parses a given mediatype and splits the mimetype from the parameters.
// It works similar to mime.ParseMediaType but is faster.
func Mediatype(b []byte) ([]byte, map[string]string) {
i := 0
for i < len(b) && b[i] == ' ' {
i++
}
b = b[i:]
n := len(b)
mimetype := b
var params map[string]string
for i := 3; i < n; i++ { // mimetype is at least three characters long
if b[i] == ';' || b[i] == ' ' {
mimetype = b[:i]
if b[i] == ' ' {
i++ // space
for i < n && b[i] == ' ' {
i++
}
if n <= i || b[i] != ';' {
break
}
}
params = map[string]string{}
s := string(b)
PARAM:
i++ // semicolon
for i < n && s[i] == ' ' {
i++
}
start := i
for i < n && s[i] != '=' && s[i] != ';' && s[i] != ' ' {
i++
}
key := s[start:i]
for i < n && s[i] == ' ' {
i++
}
if i < n && s[i] == '=' {
i++
for i < n && s[i] == ' ' {
i++
}
start = i
for i < n && s[i] != ';' && s[i] != ' ' {
i++
}
} else {
start = i
}
params[key] = s[start:i]
for i < n && s[i] == ' ' {
i++
}
if i < n && s[i] == ';' {
goto PARAM
}
break
}
}
return mimetype, params
}
// DataURI parses the given data URI and returns the mediatype, data and ok.
func DataURI(dataURI []byte) ([]byte, []byte, error) {
if len(dataURI) > 5 && bytes.Equal(dataURI[:5], dataSchemeBytes) {
dataURI = dataURI[5:]
inBase64 := false
var mediatype []byte
i := 0
for j := 0; j < len(dataURI); j++ {
c := dataURI[j]
if c == '=' || c == ';' || c == ',' {
if c != '=' && bytes.Equal(TrimWhitespace(dataURI[i:j]), base64Bytes) {
if len(mediatype) > 0 {
mediatype = mediatype[:len(mediatype)-1]
}
inBase64 = true
i = j
} else if c != ',' {
mediatype = append(append(mediatype, TrimWhitespace(dataURI[i:j])...), c)
i = j + 1
} else {
mediatype = append(mediatype, TrimWhitespace(dataURI[i:j])...)
}
if c == ',' {
if len(mediatype) == 0 || mediatype[0] == ';' {
mediatype = textMimeBytes
}
data := dataURI[j+1:]
if inBase64 {
decoded := make([]byte, base64.StdEncoding.DecodedLen(len(data)))
n, err := base64.StdEncoding.Decode(decoded, data)
if err != nil {
return nil, nil, err
}
data = decoded[:n]
} else {
data = DecodeURL(data)
}
return mediatype, data, nil
}
}
}
}
return nil, nil, ErrBadDataURI
}
// QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length.
// TODO: deprecated
func QuoteEntity(b []byte) (quote byte, n int) {
if len(b) < 5 || b[0] != '&' {
return 0, 0
}
if b[1] == '#' {
if b[2] == 'x' {
i := 3
for i < len(b) && b[i] == '0' {
i++
}
if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' {
if b[i+1] == '2' {
return '"', i + 3 // &#x22;
} else if b[i+1] == '7' {
return '\'', i + 3 // &#x27;
}
}
} else {
i := 2
for i < len(b) && b[i] == '0' {
i++
}
if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' {
if b[i+1] == '4' {
return '"', i + 3 // &#34;
} else if b[i+1] == '9' {
return '\'', i + 3 // &#39;
}
}
}
} else if len(b) >= 6 && b[5] == ';' {
if bytes.Equal(b[1:5], []byte{'q', 'u', 'o', 't'}) {
return '"', 6 // &quot;
} else if bytes.Equal(b[1:5], []byte{'a', 'p', 'o', 's'}) {
return '\'', 6 // &apos;
}
}
return 0, 0
}

View file

@ -1,47 +0,0 @@
package parse
import (
"bytes"
"fmt"
"io"
)
// Error is a parsing error returned by parser. It contains a message and an offset at which the error occurred.
type Error struct {
Message string
Line int
Column int
Context string
}
// NewError creates a new error
func NewError(r io.Reader, offset int, message string, a ...interface{}) *Error {
line, column, context := Position(r, offset)
if 0 < len(a) {
message = fmt.Sprintf(message, a...)
}
return &Error{
Message: message,
Line: line,
Column: column,
Context: context,
}
}
// NewErrorLexer creates a new error from an active Lexer.
func NewErrorLexer(l *Input, message string, a ...interface{}) *Error {
r := bytes.NewBuffer(l.Bytes())
offset := l.Offset()
return NewError(r, offset, message, a...)
}
// Position returns the line, column, and context of the error.
// Context is the entire line at which the error occurred.
func (e *Error) Position() (int, int, string) {
return e.Line, e.Column, e.Context
}
// Error returns the error string, containing the context and line + column number.
func (e *Error) Error() string {
return fmt.Sprintf("%s on line %d and column %d\n%s", e.Message, e.Line, e.Column, e.Context)
}

View file

@ -1,98 +0,0 @@
# HTML [![API reference](https://img.shields.io/badge/godoc-reference-5272B4)](https://pkg.go.dev/github.com/tdewolff/parse/v2/html?tab=doc)
This package is an HTML5 lexer written in [Go][1]. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF.
## Installation
Run the following command
go get -u github.com/tdewolff/parse/v2/html
or add the following import and run project with `go get`
import "github.com/tdewolff/parse/v2/html"
## Lexer
### Usage
The following initializes a new Lexer with io.Reader `r`:
``` go
l := html.NewLexer(parse.NewInput(r))
```
To tokenize until EOF an error, use:
``` go
for {
tt, data := l.Next()
switch tt {
case html.ErrorToken:
// error or EOF set in l.Err()
return
case html.StartTagToken:
// ...
for {
ttAttr, dataAttr := l.Next()
if ttAttr != html.AttributeToken {
break
}
// ...
}
// ...
}
}
```
All tokens:
``` go
ErrorToken TokenType = iota // extra token when errors occur
CommentToken
DoctypeToken
StartTagToken
StartTagCloseToken
StartTagVoidToken
EndTagToken
AttributeToken
TextToken
```
### Examples
``` go
package main
import (
"os"
"github.com/tdewolff/parse/v2/html"
)
// Tokenize HTML from stdin.
func main() {
l := html.NewLexer(parse.NewInput(os.Stdin))
for {
tt, data := l.Next()
switch tt {
case html.ErrorToken:
if l.Err() != io.EOF {
fmt.Println("Error on line", l.Line(), ":", l.Err())
}
return
case html.StartTagToken:
fmt.Println("Tag", string(data))
for {
ttAttr, dataAttr := l.Next()
if ttAttr != html.AttributeToken {
break
}
key := dataAttr
val := l.AttrVal()
fmt.Println("Attribute", string(key), "=", string(val))
}
// ...
}
}
}
```
## License
Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md).
[1]: http://golang.org/ "Go Language"

View file

@ -1,81 +0,0 @@
package html
// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate
// uses github.com/tdewolff/hasher
//go:generate hasher -type=Hash -file=hash.go
// Hash defines perfect hashes for a predefined list of strings
type Hash uint32
// Unique hash definitions to be used instead of strings
const (
Iframe Hash = 0x6 // iframe
Math Hash = 0x604 // math
Plaintext Hash = 0x1e09 // plaintext
Script Hash = 0xa06 // script
Style Hash = 0x1405 // style
Svg Hash = 0x1903 // svg
Textarea Hash = 0x2308 // textarea
Title Hash = 0xf05 // title
Xmp Hash = 0x1c03 // xmp
)
// String returns the hash' name.
func (i Hash) String() string {
start := uint32(i >> 8)
n := uint32(i & 0xff)
if start+n > uint32(len(_Hash_text)) {
return ""
}
return _Hash_text[start : start+n]
}
// ToHash returns the hash whose name is s. It returns zero if there is no
// such hash. It is case sensitive.
func ToHash(s []byte) Hash {
if len(s) == 0 || len(s) > _Hash_maxLen {
return 0
}
h := uint32(_Hash_hash0)
for i := 0; i < len(s); i++ {
h ^= uint32(s[i])
h *= 16777619
}
if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) {
t := _Hash_text[i>>8 : i>>8+i&0xff]
for i := 0; i < len(s); i++ {
if t[i] != s[i] {
goto NEXT
}
}
return i
}
NEXT:
if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) {
t := _Hash_text[i>>8 : i>>8+i&0xff]
for i := 0; i < len(s); i++ {
if t[i] != s[i] {
return 0
}
}
return i
}
return 0
}
const _Hash_hash0 = 0x9acb0442
const _Hash_maxLen = 9
const _Hash_text = "iframemathscriptitlestylesvgxmplaintextarea"
var _Hash_table = [1 << 4]Hash{
0x0: 0x2308, // textarea
0x2: 0x6, // iframe
0x4: 0xf05, // title
0x5: 0x1e09, // plaintext
0x7: 0x1405, // style
0x8: 0x604, // math
0x9: 0xa06, // script
0xa: 0x1903, // svg
0xb: 0x1c03, // xmp
}

View file

@ -1,494 +0,0 @@
// Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
package html
import (
"strconv"
"github.com/tdewolff/parse/v2"
)
// TokenType determines the type of token, eg. a number or a semicolon.
type TokenType uint32
// TokenType values.
const (
ErrorToken TokenType = iota // extra token when errors occur
CommentToken
DoctypeToken
StartTagToken
StartTagCloseToken
StartTagVoidToken
EndTagToken
AttributeToken
TextToken
SvgToken
MathToken
)
// String returns the string representation of a TokenType.
func (tt TokenType) String() string {
switch tt {
case ErrorToken:
return "Error"
case CommentToken:
return "Comment"
case DoctypeToken:
return "Doctype"
case StartTagToken:
return "StartTag"
case StartTagCloseToken:
return "StartTagClose"
case StartTagVoidToken:
return "StartTagVoid"
case EndTagToken:
return "EndTag"
case AttributeToken:
return "Attribute"
case TextToken:
return "Text"
case SvgToken:
return "Svg"
case MathToken:
return "Math"
}
return "Invalid(" + strconv.Itoa(int(tt)) + ")"
}
////////////////////////////////////////////////////////////////
// Lexer is the state for the lexer.
type Lexer struct {
r *parse.Input
err error
rawTag Hash
inTag bool
text []byte
attrVal []byte
}
// NewLexer returns a new Lexer for a given io.Reader.
func NewLexer(r *parse.Input) *Lexer {
return &Lexer{
r: r,
}
}
// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
func (l *Lexer) Err() error {
if l.err != nil {
return l.err
}
return l.r.Err()
}
// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
func (l *Lexer) Text() []byte {
return l.text
}
// AttrVal returns the attribute value when an AttributeToken was returned from Next.
func (l *Lexer) AttrVal() []byte {
return l.attrVal
}
// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
func (l *Lexer) Next() (TokenType, []byte) {
l.text = nil
var c byte
if l.inTag {
l.attrVal = nil
for { // before attribute name state
if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
l.r.Move(1)
continue
}
break
}
if c == 0 && l.r.Err() != nil {
return ErrorToken, nil
} else if c != '>' && (c != '/' || l.r.Peek(1) != '>') {
return AttributeToken, l.shiftAttribute()
}
l.r.Skip()
l.inTag = false
if c == '/' {
l.r.Move(2)
return StartTagVoidToken, l.r.Shift()
}
l.r.Move(1)
return StartTagCloseToken, l.r.Shift()
}
if l.rawTag != 0 {
if rawText := l.shiftRawText(); len(rawText) > 0 {
l.text = rawText
l.rawTag = 0
return TextToken, rawText
}
l.rawTag = 0
}
for {
c = l.r.Peek(0)
if c == '<' {
c = l.r.Peek(1)
isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil)
if l.r.Pos() > 0 {
if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
// return currently buffered texttoken so that we can return tag next iteration
l.text = l.r.Shift()
return TextToken, l.text
}
} else if isEndTag {
l.r.Move(2)
// only endtags that are not followed by > or EOF arrive here
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
return CommentToken, l.shiftBogusComment()
}
return EndTagToken, l.shiftEndTag()
} else if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
l.r.Move(1)
l.inTag = true
return l.shiftStartTag()
} else if c == '!' {
l.r.Move(2)
return l.readMarkup()
} else if c == '?' {
l.r.Move(1)
return CommentToken, l.shiftBogusComment()
}
} else if c == 0 && l.r.Err() != nil {
if l.r.Pos() > 0 {
l.text = l.r.Shift()
return TextToken, l.text
}
return ErrorToken, nil
}
l.r.Move(1)
}
}
////////////////////////////////////////////////////////////////
// The following functions follow the specifications at https://html.spec.whatwg.org/multipage/parsing.html
func (l *Lexer) shiftRawText() []byte {
if l.rawTag == Plaintext {
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
return l.r.Shift()
}
l.r.Move(1)
}
} else { // RCDATA, RAWTEXT and SCRIPT
for {
c := l.r.Peek(0)
if c == '<' {
if l.r.Peek(1) == '/' {
mark := l.r.Pos()
l.r.Move(2)
for {
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
break
}
l.r.Move(1)
}
if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == l.rawTag { // copy so that ToLower doesn't change the case of the underlying slice
l.r.Rewind(mark)
return l.r.Shift()
}
} else if l.rawTag == Script && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' {
l.r.Move(4)
inScript := false
for {
c := l.r.Peek(0)
if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' {
l.r.Move(3)
break
} else if c == '<' {
isEnd := l.r.Peek(1) == '/'
if isEnd {
l.r.Move(2)
} else {
l.r.Move(1)
}
mark := l.r.Pos()
for {
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
break
}
l.r.Move(1)
}
if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark:]))); h == Script { // copy so that ToLower doesn't change the case of the underlying slice
if !isEnd {
inScript = true
} else {
if !inScript {
l.r.Rewind(mark - 2)
return l.r.Shift()
}
inScript = false
}
}
} else if c == 0 && l.r.Err() != nil {
return l.r.Shift()
} else {
l.r.Move(1)
}
}
} else {
l.r.Move(1)
}
} else if c == 0 && l.r.Err() != nil {
return l.r.Shift()
} else {
l.r.Move(1)
}
}
}
}
func (l *Lexer) readMarkup() (TokenType, []byte) {
if l.at('-', '-') {
l.r.Move(2)
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[4:]
return CommentToken, l.r.Shift()
} else if l.at('-', '-', '>') {
l.text = l.r.Lexeme()[4:]
l.r.Move(3)
return CommentToken, l.r.Shift()
} else if l.at('-', '-', '!', '>') {
l.text = l.r.Lexeme()[4:]
l.r.Move(4)
return CommentToken, l.r.Shift()
}
l.r.Move(1)
}
} else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') {
l.r.Move(7)
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[9:]
return TextToken, l.r.Shift()
} else if l.at(']', ']', '>') {
l.text = l.r.Lexeme()[9:]
l.r.Move(3)
return TextToken, l.r.Shift()
}
l.r.Move(1)
}
} else {
if l.atCaseInsensitive('d', 'o', 'c', 't', 'y', 'p', 'e') {
l.r.Move(7)
if l.r.Peek(0) == ' ' {
l.r.Move(1)
}
for {
if c := l.r.Peek(0); c == '>' || c == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[9:]
if c == '>' {
l.r.Move(1)
}
return DoctypeToken, l.r.Shift()
}
l.r.Move(1)
}
}
}
return CommentToken, l.shiftBogusComment()
}
func (l *Lexer) shiftBogusComment() []byte {
for {
c := l.r.Peek(0)
if c == '>' {
l.text = l.r.Lexeme()[2:]
l.r.Move(1)
return l.r.Shift()
} else if c == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[2:]
return l.r.Shift()
}
l.r.Move(1)
}
}
func (l *Lexer) shiftStartTag() (TokenType, []byte) {
for {
if c := l.r.Peek(0); c == ' ' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
l.text = parse.ToLower(l.r.Lexeme()[1:])
if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math {
if h == Svg || h == Math {
data := l.shiftXML(h)
if l.err != nil {
return ErrorToken, nil
}
l.inTag = false
if h == Svg {
return SvgToken, data
}
return MathToken, data
}
l.rawTag = h
}
return StartTagToken, l.r.Shift()
}
func (l *Lexer) shiftAttribute() []byte {
nameStart := l.r.Pos()
var c byte
for { // attribute name state
if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
nameEnd := l.r.Pos()
for { // after attribute name state
if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
l.r.Move(1)
continue
}
break
}
if c == '=' {
l.r.Move(1)
for { // before attribute value state
if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
l.r.Move(1)
continue
}
break
}
attrPos := l.r.Pos()
delim := c
if delim == '"' || delim == '\'' { // attribute value single- and double-quoted state
l.r.Move(1)
for {
c := l.r.Peek(0)
if c == delim {
l.r.Move(1)
break
} else if c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
} else { // attribute value unquoted state
for {
if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
}
l.attrVal = l.r.Lexeme()[attrPos:]
} else {
l.r.Rewind(nameEnd)
l.attrVal = nil
}
l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd])
return l.r.Shift()
}
func (l *Lexer) shiftEndTag() []byte {
for {
c := l.r.Peek(0)
if c == '>' {
l.text = l.r.Lexeme()[2:]
l.r.Move(1)
break
} else if c == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[2:]
break
}
l.r.Move(1)
}
end := len(l.text)
for end > 0 {
if c := l.text[end-1]; c == ' ' || c == '\t' || c == '\n' || c == '\r' {
end--
continue
}
break
}
l.text = l.text[:end]
return parse.ToLower(l.r.Shift())
}
// shiftXML parses the content of a svg or math tag according to the XML 1.1 specifications, including the tag itself.
// So far we have already parsed `<svg` or `<math`.
func (l *Lexer) shiftXML(rawTag Hash) []byte {
inQuote := false
for {
c := l.r.Peek(0)
if c == '"' {
inQuote = !inQuote
l.r.Move(1)
} else if c == '<' && !inQuote && l.r.Peek(1) == '/' {
mark := l.r.Pos()
l.r.Move(2)
for {
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
break
}
l.r.Move(1)
}
if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice
break
}
} else if c == 0 {
if l.r.Err() == nil {
l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
}
return l.r.Shift()
} else {
l.r.Move(1)
}
}
for {
c := l.r.Peek(0)
if c == '>' {
l.r.Move(1)
break
} else if c == 0 {
if l.r.Err() == nil {
l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
}
return l.r.Shift()
}
l.r.Move(1)
}
return l.r.Shift()
}
////////////////////////////////////////////////////////////////
func (l *Lexer) at(b ...byte) bool {
for i, c := range b {
if l.r.Peek(i) != c {
return false
}
}
return true
}
func (l *Lexer) atCaseInsensitive(b ...byte) bool {
for i, c := range b {
if l.r.Peek(i) != c && (l.r.Peek(i)+('a'-'A')) != c {
return false
}
}
return true
}

View file

@ -1,103 +0,0 @@
package html
var (
singleQuoteEntityBytes = []byte("&#39;")
doubleQuoteEntityBytes = []byte("&#34;")
)
// EscapeAttrVal returns the escaped attribute value bytes with quotes. Either single or double quotes are used, whichever is shorter. If there are no quotes present in the value and the value is in HTML (not XML), it will return the value without quotes.
func EscapeAttrVal(buf *[]byte, orig, b []byte, isXML bool) []byte {
singles := 0
doubles := 0
unquoted := true
entities := false
for _, c := range b {
if charTable[c] {
unquoted = false
if c == '"' {
doubles++
} else if c == '\'' {
singles++
}
}
}
if unquoted && !isXML {
return b
} else if !entities && len(orig) == len(b)+2 && (singles == 0 && orig[0] == '\'' || doubles == 0 && orig[0] == '"') {
return orig
}
n := len(b) + 2
var quote byte
var escapedQuote []byte
if singles >= doubles || isXML {
n += doubles * 4
quote = '"'
escapedQuote = doubleQuoteEntityBytes
} else {
n += singles * 4
quote = '\''
escapedQuote = singleQuoteEntityBytes
}
if n > cap(*buf) {
*buf = make([]byte, 0, n) // maximum size, not actual size
}
t := (*buf)[:n] // maximum size, not actual size
t[0] = quote
j := 1
start := 0
for i, c := range b {
if c == quote {
j += copy(t[j:], b[start:i])
j += copy(t[j:], escapedQuote)
start = i + 1
}
}
j += copy(t[j:], b[start:])
t[j] = quote
return t[:j+1]
}
var charTable = [256]bool{
// ASCII
false, false, false, false, false, false, false, false,
false, true, true, false, true, true, false, false, // tab, line feed, form feed, carriage return
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
true, false, true, false, false, false, false, true, // space, "), '
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, true, true, true, false, // <, =, >
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
true, false, false, false, false, false, false, false, // `
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
// non-ASCII
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
}

View file

@ -1,173 +0,0 @@
package parse
import (
"io"
"io/ioutil"
)
var nullBuffer = []byte{0}
// Input is a buffered reader that allows peeking forward and shifting, taking an io.Input.
// It keeps data in-memory until Free, taking a byte length, is called to move beyond the data.
type Input struct {
buf []byte
pos int // index in buf
start int // index in buf
err error
restore func()
}
// NewInput returns a new Input for a given io.Input and uses ioutil.ReadAll to read it into a byte slice.
// If the io.Input implements Bytes, that is used instead. It will append a NULL at the end of the buffer.
func NewInput(r io.Reader) *Input {
var b []byte
if r != nil {
if buffer, ok := r.(interface {
Bytes() []byte
}); ok {
b = buffer.Bytes()
} else {
var err error
b, err = ioutil.ReadAll(r)
if err != nil {
return &Input{
buf: nullBuffer,
err: err,
}
}
}
}
return NewInputBytes(b)
}
// NewInputString returns a new Input for a given string and appends NULL at the end.
func NewInputString(s string) *Input {
return NewInputBytes([]byte(s))
}
// NewInputBytes returns a new Input for a given byte slice and appends NULL at the end.
// To avoid reallocation, make sure the capacity has room for one more byte.
func NewInputBytes(b []byte) *Input {
z := &Input{
buf: b,
}
n := len(b)
if n == 0 {
z.buf = nullBuffer
} else {
// Append NULL to buffer, but try to avoid reallocation
if cap(b) > n {
// Overwrite next byte but restore when done
b = b[:n+1]
c := b[n]
b[n] = 0
z.buf = b
z.restore = func() {
b[n] = c
}
} else {
z.buf = append(b, 0)
}
}
return z
}
// Restore restores the replaced byte past the end of the buffer by NULL.
func (z *Input) Restore() {
if z.restore != nil {
z.restore()
z.restore = nil
}
}
// Err returns the error returned from io.Input or io.EOF when the end has been reached.
func (z *Input) Err() error {
return z.PeekErr(0)
}
// PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err().
func (z *Input) PeekErr(pos int) error {
if z.err != nil {
return z.err
} else if z.pos+pos >= len(z.buf)-1 {
return io.EOF
}
return nil
}
// Peek returns the ith byte relative to the end position.
// Peek returns 0 when an error has occurred, Err returns the erroz.
func (z *Input) Peek(pos int) byte {
pos += z.pos
return z.buf[pos]
}
// PeekRune returns the rune and rune length of the ith byte relative to the end position.
func (z *Input) PeekRune(pos int) (rune, int) {
// from unicode/utf8
c := z.Peek(pos)
if c < 0xC0 || z.Peek(pos+1) == 0 {
return rune(c), 1
} else if c < 0xE0 || z.Peek(pos+2) == 0 {
return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2
} else if c < 0xF0 || z.Peek(pos+3) == 0 {
return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3
}
return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4
}
// Move advances the position.
func (z *Input) Move(n int) {
z.pos += n
}
// Pos returns a mark to which can be rewinded.
func (z *Input) Pos() int {
return z.pos - z.start
}
// Rewind rewinds the position to the given position.
func (z *Input) Rewind(pos int) {
z.pos = z.start + pos
}
// Lexeme returns the bytes of the current selection.
func (z *Input) Lexeme() []byte {
return z.buf[z.start:z.pos:z.pos]
}
// Skip collapses the position to the end of the selection.
func (z *Input) Skip() {
z.start = z.pos
}
// Shift returns the bytes of the current selection and collapses the position to the end of the selection.
func (z *Input) Shift() []byte {
b := z.buf[z.start:z.pos:z.pos]
z.start = z.pos
return b
}
// Offset returns the character position in the buffez.
func (z *Input) Offset() int {
return z.pos
}
// Bytes returns the underlying buffez.
func (z *Input) Bytes() []byte {
return z.buf[: len(z.buf)-1 : len(z.buf)-1]
}
// Len returns the length of the underlying buffez.
func (z *Input) Len() int {
return len(z.buf) - 1
}
// Reset resets position to the underlying buffez.
func (z *Input) Reset() {
z.start = 0
z.pos = 0
}

View file

@ -1,95 +0,0 @@
package parse
import (
"fmt"
"io"
"strings"
"unicode"
)
// Position returns the line and column number for a certain position in a file. It is useful for recovering the position in a file that caused an error.
// It only treates \n, \r, and \r\n as newlines, which might be different from some languages also recognizing \f, \u2028, and \u2029 to be newlines.
func Position(r io.Reader, offset int) (line, col int, context string) {
l := NewInput(r)
line = 1
for l.Pos() < offset {
c := l.Peek(0)
n := 1
newline := false
if c == '\n' {
newline = true
} else if c == '\r' {
if l.Peek(1) == '\n' {
newline = true
n = 2
} else {
newline = true
}
} else if c >= 0xC0 {
var r rune
if r, n = l.PeekRune(0); r == '\u2028' || r == '\u2029' {
newline = true
}
} else if c == 0 && l.Err() != nil {
break
}
if 1 < n && offset < l.Pos()+n {
break
}
l.Move(n)
if newline {
line++
offset -= l.Pos()
l.Skip()
}
}
col = len([]rune(string(l.Lexeme()))) + 1
context = positionContext(l, line, col)
return
}
func positionContext(l *Input, line, col int) (context string) {
for {
c := l.Peek(0)
if c == 0 && l.Err() != nil || c == '\n' || c == '\r' {
break
}
l.Move(1)
}
rs := []rune(string(l.Lexeme()))
// cut off front or rear of context to stay between 60 characters
limit := 60
offset := 20
ellipsisFront := ""
ellipsisRear := ""
if limit < len(rs) {
if col <= limit-offset {
ellipsisRear = "..."
rs = rs[:limit-3]
} else if col >= len(rs)-offset-3 {
ellipsisFront = "..."
col -= len(rs) - offset - offset - 7
rs = rs[len(rs)-offset-offset-4:]
} else {
ellipsisFront = "..."
ellipsisRear = "..."
rs = rs[col-offset-1 : col+offset]
col = offset + 4
}
}
// replace unprintable characters by a space
for i, r := range rs {
if !unicode.IsGraphic(r) {
rs[i] = '·'
}
}
context += fmt.Sprintf("%5d: %s%s%s\n", line, ellipsisFront, string(rs), ellipsisRear)
context += fmt.Sprintf("%s^", strings.Repeat(" ", 6+col))
return
}

View file

@ -1,257 +0,0 @@
package strconv
import (
"math"
)
var float64pow10 = []float64{
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1e20, 1e21, 1e22,
}
// ParseFloat parses a byte-slice and returns the float it represents.
// If an invalid character is encountered, it will stop there.
func ParseFloat(b []byte) (float64, int) {
i := 0
neg := false
if i < len(b) && (b[i] == '+' || b[i] == '-') {
neg = b[i] == '-'
i++
}
start := i
dot := -1
trunk := -1
n := uint64(0)
for ; i < len(b); i++ {
c := b[i]
if c >= '0' && c <= '9' {
if trunk == -1 {
if n > math.MaxUint64/10 {
trunk = i
} else {
n *= 10
n += uint64(c - '0')
}
}
} else if dot == -1 && c == '.' {
dot = i
} else {
break
}
}
if i == start || i == start+1 && dot == start {
return 0.0, 0
}
f := float64(n)
if neg {
f = -f
}
mantExp := int64(0)
if dot != -1 {
if trunk == -1 {
trunk = i
}
mantExp = int64(trunk - dot - 1)
} else if trunk != -1 {
mantExp = int64(trunk - i)
}
expExp := int64(0)
if i < len(b) && (b[i] == 'e' || b[i] == 'E') {
startExp := i
i++
if e, expLen := ParseInt(b[i:]); expLen > 0 {
expExp = e
i += expLen
} else {
i = startExp
}
}
exp := expExp - mantExp
// copied from strconv/atof.go
if exp == 0 {
return f, i
} else if exp > 0 && exp <= 15+22 { // int * 10^k
// If exponent is big but number of digits is not,
// can move a few zeros into the integer part.
if exp > 22 {
f *= float64pow10[exp-22]
exp = 22
}
if f <= 1e15 && f >= -1e15 {
return f * float64pow10[exp], i
}
} else if exp < 0 && exp >= -22 { // int / 10^k
return f / float64pow10[-exp], i
}
f *= math.Pow10(int(-mantExp))
return f * math.Pow10(int(expExp)), i
}
const log2 = 0.3010299956639812
func float64exp(f float64) int {
exp2 := 0
if f != 0.0 {
x := math.Float64bits(f)
exp2 = int(x>>(64-11-1))&0x7FF - 1023 + 1
}
exp10 := float64(exp2) * log2
if exp10 < 0 {
exp10 -= 1.0
}
return int(exp10)
}
// AppendFloat appends a float to `b` with precision `prec`. It returns the new slice and whether successful or not. Precision is the number of decimals to display, thus prec + 1 == number of significant digits.
func AppendFloat(b []byte, f float64, prec int) ([]byte, bool) {
if math.IsNaN(f) || math.IsInf(f, 0) {
return b, false
}
neg := false
if f < 0.0 {
f = -f
neg = true
}
if prec < 0 || 17 < prec {
prec = 17 // maximum number of significant digits in double
}
prec -= float64exp(f) // number of digits in front of the dot
f *= math.Pow10(prec)
// calculate mantissa and exponent
mant := int64(f)
mantLen := LenInt(mant)
mantExp := mantLen - prec - 1
if mant == 0 {
return append(b, '0'), true
}
// expLen is zero for positive exponents, because positive exponents are determined later on in the big conversion loop
exp := 0
expLen := 0
if mantExp > 0 {
// positive exponent is determined in the loop below
// but if we initially decreased the exponent to fit in an integer, we can't set the new exponent in the loop alone,
// since the number of zeros at the end determines the positive exponent in the loop, and we just artificially lost zeros
if prec < 0 {
exp = mantExp
}
expLen = 1 + LenInt(int64(exp)) // e + digits
} else if mantExp < -3 {
exp = mantExp
expLen = 2 + LenInt(int64(exp)) // e + minus + digits
} else if mantExp < -1 {
mantLen += -mantExp - 1 // extra zero between dot and first digit
}
// reserve space in b
i := len(b)
maxLen := 1 + mantLen + expLen // dot + mantissa digits + exponent
if neg {
maxLen++
}
if i+maxLen > cap(b) {
b = append(b, make([]byte, maxLen)...)
} else {
b = b[:i+maxLen]
}
// write to string representation
if neg {
b[i] = '-'
i++
}
// big conversion loop, start at the end and move to the front
// initially print trailing zeros and remove them later on
// for example if the first non-zero digit is three positions in front of the dot, it will overwrite the zeros with a positive exponent
zero := true
last := i + mantLen // right-most position of digit that is non-zero + dot
dot := last - prec - exp // position of dot
j := last
for mant > 0 {
if j == dot {
b[j] = '.'
j--
}
newMant := mant / 10
digit := mant - 10*newMant
if zero && digit > 0 {
// first non-zero digit, if we are still behind the dot we can trim the end to this position
// otherwise trim to the dot (including the dot)
if j > dot {
i = j + 1
// decrease negative exponent further to get rid of dot
if exp < 0 {
newExp := exp - (j - dot)
// getting rid of the dot shouldn't lower the exponent to more digits (e.g. -9 -> -10)
if LenInt(int64(newExp)) == LenInt(int64(exp)) {
exp = newExp
dot = j
j--
i--
}
}
} else {
i = dot
}
last = j
zero = false
}
b[j] = '0' + byte(digit)
j--
mant = newMant
}
if j > dot {
// extra zeros behind the dot
for j > dot {
b[j] = '0'
j--
}
b[j] = '.'
} else if last+3 < dot {
// add positive exponent because we have 3 or more zeros in front of the dot
i = last + 1
exp = dot - last - 1
} else if j == dot {
// handle 0.1
b[j] = '.'
}
// exponent
if exp != 0 {
if exp == 1 {
b[i] = '0'
i++
} else if exp == 2 {
b[i] = '0'
b[i+1] = '0'
i += 2
} else {
b[i] = 'e'
i++
if exp < 0 {
b[i] = '-'
i++
exp = -exp
}
i += LenInt(int64(exp))
j := i
for exp > 0 {
newExp := exp / 10
digit := exp - 10*newExp
j--
b[j] = '0' + byte(digit)
exp = newExp
}
}
}
return b[:i], true
}

View file

@ -1,108 +0,0 @@
package strconv
import (
"math"
)
// ParseInt parses a byte-slice and returns the integer it represents.
// If an invalid character is encountered, it will stop there.
func ParseInt(b []byte) (int64, int) {
i := 0
neg := false
if len(b) > 0 && (b[0] == '+' || b[0] == '-') {
neg = b[0] == '-'
i++
}
start := i
n := uint64(0)
for i < len(b) {
c := b[i]
if n > math.MaxUint64/10 {
return 0, 0
} else if c >= '0' && c <= '9' {
n *= 10
n += uint64(c - '0')
} else {
break
}
i++
}
if i == start {
return 0, 0
}
if !neg && n > uint64(math.MaxInt64) || n > uint64(math.MaxInt64)+1 {
return 0, 0
} else if neg {
return -int64(n), i
}
return int64(n), i
}
// ParseUint parses a byte-slice and returns the integer it represents.
// If an invalid character is encountered, it will stop there.
func ParseUint(b []byte) (uint64, int) {
i := 0
n := uint64(0)
for i < len(b) {
c := b[i]
if n > math.MaxUint64/10 {
return 0, 0
} else if c >= '0' && c <= '9' {
n *= 10
n += uint64(c - '0')
} else {
break
}
i++
}
return n, i
}
// LenInt returns the written length of an integer.
func LenInt(i int64) int {
if i < 0 {
if i == -9223372036854775808 {
return 19
}
i = -i
}
switch {
case i < 10:
return 1
case i < 100:
return 2
case i < 1000:
return 3
case i < 10000:
return 4
case i < 100000:
return 5
case i < 1000000:
return 6
case i < 10000000:
return 7
case i < 100000000:
return 8
case i < 1000000000:
return 9
case i < 10000000000:
return 10
case i < 100000000000:
return 11
case i < 1000000000000:
return 12
case i < 10000000000000:
return 13
case i < 100000000000000:
return 14
case i < 1000000000000000:
return 15
case i < 10000000000000000:
return 16
case i < 100000000000000000:
return 17
case i < 1000000000000000000:
return 18
}
return 19
}

View file

@ -1,83 +0,0 @@
package strconv
// AppendPrice will append an int64 formatted as a price, where the int64 is the price in cents.
// It does not display whether a price is negative or not.
func AppendPrice(b []byte, price int64, dec bool, milSeparator byte, decSeparator byte) []byte {
if price < 0 {
if price == -9223372036854775808 {
x := []byte("92 233 720 368 547 758 08")
x[2] = milSeparator
x[6] = milSeparator
x[10] = milSeparator
x[14] = milSeparator
x[18] = milSeparator
x[22] = decSeparator
return append(b, x...)
}
price = -price
}
// rounding
if !dec {
firstDec := (price / 10) % 10
if firstDec >= 5 {
price += 100
}
}
// calculate size
n := LenInt(price) - 2
if n > 0 {
n += (n - 1) / 3 // mil separator
} else {
n = 1
}
if dec {
n += 2 + 1 // decimals + dec separator
}
// resize byte slice
i := len(b)
if i+n > cap(b) {
b = append(b, make([]byte, n)...)
} else {
b = b[:i+n]
}
// print fractional-part
i += n - 1
if dec {
for j := 0; j < 2; j++ {
c := byte(price%10) + '0'
price /= 10
b[i] = c
i--
}
b[i] = decSeparator
i--
} else {
price /= 100
}
if price == 0 {
b[i] = '0'
return b
}
// print integer-part
j := 0
for price > 0 {
if j == 3 {
b[i] = milSeparator
i--
j = 0
}
c := byte(price%10) + '0'
price /= 10
b[i] = c
i--
j++
}
return b
}

View file

@ -1,486 +0,0 @@
package parse
import (
"bytes"
"fmt"
"strconv"
"unicode"
)
// Copy returns a copy of the given byte slice.
func Copy(src []byte) (dst []byte) {
dst = make([]byte, len(src))
copy(dst, src)
return
}
// ToLower converts all characters in the byte slice from A-Z to a-z.
func ToLower(src []byte) []byte {
for i, c := range src {
if c >= 'A' && c <= 'Z' {
src[i] = c + ('a' - 'A')
}
}
return src
}
// EqualFold returns true when s matches case-insensitively the targetLower (which must be lowercase).
func EqualFold(s, targetLower []byte) bool {
if len(s) != len(targetLower) {
return false
}
for i, c := range targetLower {
d := s[i]
if d != c && (d < 'A' || d > 'Z' || d+('a'-'A') != c) {
return false
}
}
return true
}
// Printable returns a printable string for given rune
func Printable(r rune) string {
if unicode.IsGraphic(r) {
return fmt.Sprintf("%c", r)
} else if r < 128 {
return fmt.Sprintf("0x%02X", r)
}
return fmt.Sprintf("%U", r)
}
var whitespaceTable = [256]bool{
// ASCII
false, false, false, false, false, false, false, false,
false, true, true, false, true, true, false, false, // tab, new line, form feed, carriage return
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
true, false, false, false, false, false, false, false, // space
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
// non-ASCII
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
}
// IsWhitespace returns true for space, \n, \r, \t, \f.
func IsWhitespace(c byte) bool {
return whitespaceTable[c]
}
var newlineTable = [256]bool{
// ASCII
false, false, false, false, false, false, false, false,
false, false, true, false, false, true, false, false, // new line, carriage return
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
// non-ASCII
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
}
// IsNewline returns true for \n, \r.
func IsNewline(c byte) bool {
return newlineTable[c]
}
// IsAllWhitespace returns true when the entire byte slice consists of space, \n, \r, \t, \f.
func IsAllWhitespace(b []byte) bool {
for _, c := range b {
if !IsWhitespace(c) {
return false
}
}
return true
}
// TrimWhitespace removes any leading and trailing whitespace characters.
func TrimWhitespace(b []byte) []byte {
n := len(b)
start := n
for i := 0; i < n; i++ {
if !IsWhitespace(b[i]) {
start = i
break
}
}
end := n
for i := n - 1; i >= start; i-- {
if !IsWhitespace(b[i]) {
end = i + 1
break
}
}
return b[start:end]
}
// ReplaceMultipleWhitespace replaces character series of space, \n, \t, \f, \r into a single space or newline (when the serie contained a \n or \r).
func ReplaceMultipleWhitespace(b []byte) []byte {
j, k := 0, 0 // j is write position, k is start of next text section
for i := 0; i < len(b); i++ {
if IsWhitespace(b[i]) {
start := i
newline := IsNewline(b[i])
i++
for ; i < len(b) && IsWhitespace(b[i]); i++ {
if IsNewline(b[i]) {
newline = true
}
}
if newline {
b[start] = '\n'
} else {
b[start] = ' '
}
if 1 < i-start { // more than one whitespace
if j == 0 {
j = start + 1
} else {
j += copy(b[j:], b[k:start+1])
}
k = i
}
}
}
if j == 0 {
return b
} else if j == 1 { // only if starts with whitespace
b[k-1] = b[0]
return b[k-1:]
} else if k < len(b) {
j += copy(b[j:], b[k:])
}
return b[:j]
}
// replaceEntities will replace in b at index i, assuming that b[i] == '&' and that i+3<len(b). The returned int will be the last character of the entity, so that the next iteration can safely do i++ to continue and not miss any entitites.
func replaceEntities(b []byte, i int, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) ([]byte, int) {
const MaxEntityLength = 31 // longest HTML entity: CounterClockwiseContourIntegral
var r []byte
j := i + 1
if b[j] == '#' {
j++
if b[j] == 'x' {
j++
c := 0
for ; j < len(b) && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ {
if b[j] <= '9' {
c = c<<4 + int(b[j]-'0')
} else if b[j] <= 'F' {
c = c<<4 + int(b[j]-'A') + 10
} else if b[j] <= 'f' {
c = c<<4 + int(b[j]-'a') + 10
}
}
if j <= i+3 || 10000 <= c {
return b, j - 1
}
if c < 128 {
r = []byte{byte(c)}
} else {
r = append(r, '&', '#')
r = strconv.AppendInt(r, int64(c), 10)
r = append(r, ';')
}
} else {
c := 0
for ; j < len(b) && c < 128 && b[j] >= '0' && b[j] <= '9'; j++ {
c = c*10 + int(b[j]-'0')
}
if j <= i+2 || 128 <= c {
return b, j - 1
}
r = []byte{byte(c)}
}
} else {
for ; j < len(b) && j-i-1 <= MaxEntityLength && b[j] != ';'; j++ {
}
if j <= i+1 || len(b) <= j {
return b, j - 1
}
var ok bool
r, ok = entitiesMap[string(b[i+1:j])]
if !ok {
return b, j
}
}
// j is at semicolon
n := j + 1 - i
if j < len(b) && b[j] == ';' && 2 < n {
if len(r) == 1 {
if q, ok := revEntitiesMap[r[0]]; ok {
if len(q) == len(b[i:j+1]) && bytes.Equal(q, b[i:j+1]) {
return b, j
}
r = q
} else if r[0] == '&' {
// check if for example &amp; is followed by something that could potentially be an entity
k := j + 1
if k < len(b) && b[k] == '#' {
k++
}
for ; k < len(b) && k-j <= MaxEntityLength && (b[k] >= '0' && b[k] <= '9' || b[k] >= 'a' && b[k] <= 'z' || b[k] >= 'A' && b[k] <= 'Z'); k++ {
}
if k < len(b) && b[k] == ';' {
return b, k
}
}
}
copy(b[i:], r)
copy(b[i+len(r):], b[j+1:])
b = b[:len(b)-n+len(r)]
return b, i + len(r) - 1
}
return b, i
}
// ReplaceEntities replaces all occurrences of entites (such as &quot;) to their respective unencoded bytes.
func ReplaceEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte {
for i := 0; i < len(b); i++ {
if b[i] == '&' && i+3 < len(b) {
b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap)
}
}
return b
}
// ReplaceMultipleWhitespaceAndEntities is a combination of ReplaceMultipleWhitespace and ReplaceEntities. It is faster than executing both sequentially.
func ReplaceMultipleWhitespaceAndEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte {
j, k := 0, 0 // j is write position, k is start of next text section
for i := 0; i < len(b); i++ {
if IsWhitespace(b[i]) {
start := i
newline := IsNewline(b[i])
i++
for ; i < len(b) && IsWhitespace(b[i]); i++ {
if IsNewline(b[i]) {
newline = true
}
}
if newline {
b[start] = '\n'
} else {
b[start] = ' '
}
if 1 < i-start { // more than one whitespace
if j == 0 {
j = start + 1
} else {
j += copy(b[j:], b[k:start+1])
}
k = i
}
}
if i+3 < len(b) && b[i] == '&' {
b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap)
}
}
if j == 0 {
return b
} else if j == 1 { // only if starts with whitespace
b[k-1] = b[0]
return b[k-1:]
} else if k < len(b) {
j += copy(b[j:], b[k:])
}
return b[:j]
}
// URLEncodingTable is a charmap for which characters need escaping in the URL encoding scheme
var URLEncodingTable = [256]bool{
// ASCII
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, false, true, true, true, true, true, false, // space, ", #, $, %, &
false, false, false, true, true, false, false, true, // +, comma, /
false, false, false, false, false, false, false, false,
false, false, true, true, true, true, true, true, // :, ;, <, =, >, ?
true, false, false, false, false, false, false, false, // @
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, true, true, true, true, false, // [, \, ], ^
true, false, false, false, false, false, false, false, // `
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, true, true, true, false, true, // {, |, }, DEL
// non-ASCII
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
}
// DataURIEncodingTable is a charmap for which characters need escaping in the Data URI encoding scheme
// Escape only non-printable characters, unicode and %, #, &.
// IE11 additionally requires encoding of \, [, ], ", <, >, `, {, }, |, ^ which is not required by Chrome, Firefox, Opera, Edge, Safari, Yandex
// To pass the HTML validator, restricted URL characters must be escaped: non-printable characters, space, <, >, #, %, "
var DataURIEncodingTable = [256]bool{
// ASCII
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, false, true, true, false, true, true, false, // space, ", #, %, &
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, true, false, true, false, // <, >
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, true, true, true, true, false, // [, \, ], ^
true, false, false, false, false, false, false, false, // `
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, true, true, true, false, true, // {, |, }, DEL
// non-ASCII
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
}
// EncodeURL encodes bytes using the URL encoding scheme
func EncodeURL(b []byte, table [256]bool) []byte {
for i := 0; i < len(b); i++ {
c := b[i]
if table[c] {
b = append(b, 0, 0)
copy(b[i+3:], b[i+1:])
b[i+0] = '%'
b[i+1] = "0123456789ABCDEF"[c>>4]
b[i+2] = "0123456789ABCDEF"[c&15]
}
}
return b
}
// DecodeURL decodes an URL encoded using the URL encoding scheme
func DecodeURL(b []byte) []byte {
for i := 0; i < len(b); i++ {
if b[i] == '%' && i+2 < len(b) {
j := i + 1
c := 0
for ; j < i+3 && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ {
if b[j] <= '9' {
c = c<<4 + int(b[j]-'0')
} else if b[j] <= 'F' {
c = c<<4 + int(b[j]-'A') + 10
} else if b[j] <= 'f' {
c = c<<4 + int(b[j]-'a') + 10
}
}
if j == i+3 && c < 128 {
b[i] = byte(c)
b = append(b[:i+1], b[i+3:]...)
}
} else if b[i] == '+' {
b[i] = ' '
}
}
return b
}

10
vendor/modules.txt vendored
View file

@ -547,16 +547,6 @@ github.com/superseriousbusiness/oauth2/v4/generates
github.com/superseriousbusiness/oauth2/v4/manage
github.com/superseriousbusiness/oauth2/v4/models
github.com/superseriousbusiness/oauth2/v4/server
# github.com/tdewolff/minify/v2 v2.11.2
## explicit; go 1.13
github.com/tdewolff/minify/v2
github.com/tdewolff/minify/v2/html
# github.com/tdewolff/parse/v2 v2.5.29
## explicit; go 1.13
github.com/tdewolff/parse/v2
github.com/tdewolff/parse/v2/buffer
github.com/tdewolff/parse/v2/html
github.com/tdewolff/parse/v2/strconv
# github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc
## explicit
github.com/tmthrgd/go-hex