diff --git a/internal/processing/account/account.go b/internal/processing/account/account.go
index 4432fd5f3..06caffaec 100644
--- a/internal/processing/account/account.go
+++ b/internal/processing/account/account.go
@@ -41,7 +41,7 @@ type Processor struct {
mediaManager *media.Manager
oauthServer oauth.Server
filter *visibility.Filter
- formatter text.Formatter
+ formatter *text.Formatter
federator federation.Federator
parseMention gtsmodel.ParseMentionFunc
}
diff --git a/internal/processing/status/create.go b/internal/processing/status/create.go
index d671ea8c4..4d4f7c574 100644
--- a/internal/processing/status/create.go
+++ b/internal/processing/status/create.go
@@ -277,7 +277,7 @@ func processLanguage(ctx context.Context, form *apimodel.AdvancedStatusCreateFor
return nil
}
-func processContent(ctx context.Context, dbService db.DB, formatter text.Formatter, parseMention gtsmodel.ParseMentionFunc, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
+func processContent(ctx context.Context, dbService db.DB, formatter *text.Formatter, parseMention gtsmodel.ParseMentionFunc, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
// if there's nothing in the status at all we can just return early
if form.Status == "" {
status.Content = ""
diff --git a/internal/processing/status/status.go b/internal/processing/status/status.go
index bd8457eb8..432f945fc 100644
--- a/internal/processing/status/status.go
+++ b/internal/processing/status/status.go
@@ -31,7 +31,7 @@ type Processor struct {
federator federation.Federator
converter *typeutils.Converter
filter *visibility.Filter
- formatter text.Formatter
+ formatter *text.Formatter
parseMention gtsmodel.ParseMentionFunc
}
diff --git a/internal/text/emojionly.go b/internal/text/emojionly.go
deleted file mode 100644
index f4f200b21..000000000
--- a/internal/text/emojionly.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// GoToSocial
-// Copyright (C) GoToSocial Authors admin@gotosocial.org
-// SPDX-License-Identifier: AGPL-3.0-or-later
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-package text
-
-import (
- "bytes"
- "context"
-
- "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
- "github.com/superseriousbusiness/gotosocial/internal/log"
- "github.com/yuin/goldmark"
- "github.com/yuin/goldmark/parser"
- "github.com/yuin/goldmark/renderer/html"
- "github.com/yuin/goldmark/util"
-)
-
-func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
- result := &FormatResult{
- Mentions: []*gtsmodel.Mention{},
- Tags: []*gtsmodel.Tag{},
- Emojis: []*gtsmodel.Emoji{},
- }
- // parse markdown text into html, using custom renderer to add hashtag/mention links
- md := goldmark.New(
- goldmark.WithRendererOptions(
- html.WithXHTML(),
- html.WithHardWraps(),
- ),
- goldmark.WithParser(
- parser.NewParser(
- parser.WithBlockParsers(
- util.Prioritized(newPlaintextParser(), 500),
- ),
- ),
- ),
- goldmark.WithExtensions(
- &customRenderer{f, ctx, pmf, authorID, statusID, true, result},
- ),
- )
-
- var htmlContentBytes bytes.Buffer
- err := md.Convert([]byte(plain), &htmlContentBytes)
- if err != nil {
- log.Errorf(ctx, "error formatting plaintext to HTML: %s", err)
- }
- result.HTML = htmlContentBytes.String()
-
- // clean anything dangerous out of the HTML
- result.HTML = SanitizeToHTML(result.HTML)
-
- // shrink ray
- result.HTML = MinifyHTML(result.HTML)
-
- return result
-}
diff --git a/internal/text/formatter.go b/internal/text/formatter.go
index 0e5e0b554..8f7e6e1f6 100644
--- a/internal/text/formatter.go
+++ b/internal/text/formatter.go
@@ -24,29 +24,25 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
-// Formatter wraps some logic and functions for parsing statuses and other text input into nice html.
-// Each of the member functions returns a struct containing the formatted HTML and any tags, mentions, and
-// emoji that were found in the text.
-type Formatter interface {
- // FromPlain parses an HTML text from a plaintext.
- FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
- // FromPlainNoParagraph parses an HTML text from a plaintext, without wrapping the resulting text in
tags.
- FromPlainNoParagraph(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
- // FromMarkdown parses an HTML text from a markdown-formatted text.
- FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, md string) *FormatResult
- // FromPlainEmojiOnly parses an HTML text from a plaintext, only parsing emojis and not mentions etc.
- FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
-}
+// FormatFunc is fulfilled by FromPlain,
+// FromPlainNoParagraph, and FromMarkdown.
+type FormatFunc func(
+ ctx context.Context,
+ parseMention gtsmodel.ParseMentionFunc,
+ authorID string,
+ statusID string,
+ text string,
+) *FormatResult
-type FormatFunc func(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, text string) *FormatResult
-
-type formatter struct {
+// Formatter wraps logic and functions for parsing
+// statuses and other text input into nice html.
+type Formatter struct {
db db.DB
}
-// NewFormatter returns a new Formatter interface for parsing statuses and other text input into nice html.
-func NewFormatter(db db.DB) Formatter {
- return &formatter{
+// NewFormatter returns a new Formatter.
+func NewFormatter(db db.DB) *Formatter {
+ return &Formatter{
db: db,
}
}
diff --git a/internal/text/formatter_test.go b/internal/text/formatter_test.go
index 403ba8e8e..cce9970b2 100644
--- a/internal/text/formatter_test.go
+++ b/internal/text/formatter_test.go
@@ -48,7 +48,7 @@ type TextStandardTestSuite struct {
testEmojis map[string]*gtsmodel.Emoji
// module being tested
- formatter text.Formatter
+ formatter *text.Formatter
}
func (suite *TextStandardTestSuite) SetupSuite() {
@@ -85,14 +85,32 @@ func (suite *TextStandardTestSuite) TearDownTest() {
testrig.StandardDBTeardown(suite.db)
}
-func (suite *TextStandardTestSuite) FromMarkdown(text string) *text.FormatResult {
- return suite.formatter.FromMarkdown(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
+func (suite *TextStandardTestSuite) FromMarkdown(input string) *text.FormatResult {
+ return suite.formatter.FromMarkdown(
+ context.Background(),
+ suite.parseMention,
+ suite.testAccounts["local_account_1"].ID,
+ "dummy_status_ID",
+ input,
+ )
}
-func (suite *TextStandardTestSuite) FromPlain(text string) *text.FormatResult {
- return suite.formatter.FromPlain(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
+func (suite *TextStandardTestSuite) FromPlain(input string) *text.FormatResult {
+ return suite.formatter.FromPlain(
+ context.Background(),
+ suite.parseMention,
+ suite.testAccounts["local_account_1"].ID,
+ "dummy_status_ID",
+ input,
+ )
}
-func (suite *TextStandardTestSuite) FromPlainNoParagraph(text string) *text.FormatResult {
- return suite.formatter.FromPlainNoParagraph(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
+func (suite *TextStandardTestSuite) FromPlainNoParagraph(input string) *text.FormatResult {
+ return suite.formatter.FromPlainNoParagraph(
+ context.Background(),
+ suite.parseMention,
+ suite.testAccounts["local_account_1"].ID,
+ "dummmy_status_ID",
+ input,
+ )
}
diff --git a/internal/text/goldmark_custom_renderer.go b/internal/text/goldmark_custom_renderer.go
new file mode 100644
index 000000000..438692577
--- /dev/null
+++ b/internal/text/goldmark_custom_renderer.go
@@ -0,0 +1,423 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package text
+
+import (
+ "context"
+ "errors"
+ "strings"
+
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/gtscontext"
+ "github.com/superseriousbusiness/gotosocial/internal/gtserror"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/id"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/superseriousbusiness/gotosocial/internal/uris"
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/renderer"
+ mdutil "github.com/yuin/goldmark/util"
+)
+
+// customRenderer fulfils the following goldmark interfaces:
+//
+// - renderer.NodeRenderer
+// - goldmark.Extender.
+//
+// It is used as a goldmark extension by FromMarkdown and
+// (variants of) FromPlain.
+//
+// The custom renderer extracts and re-renders mentions, hashtags,
+// and emojis that are encountered during parsing, writing out valid
+// HTML representations of these elements.
+//
+// The customRenderer has the following side effects:
+//
+// - May use its db connection to retrieve existing and/or
+// store new mentions, hashtags, and emojis.
+// - May update its *FormatResult to append discovered
+// mentions, hashtags, and emojis to it.
+type customRenderer struct {
+ ctx context.Context
+ db db.DB
+ parseMention gtsmodel.ParseMentionFunc
+ accountID string
+ statusID string
+ emojiOnly bool
+ result *FormatResult
+}
+
+func (cr *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
+ reg.Register(kindMention, cr.renderMention)
+ reg.Register(kindHashtag, cr.renderHashtag)
+ reg.Register(kindEmoji, cr.renderEmoji)
+}
+
+func (cr *customRenderer) Extend(markdown goldmark.Markdown) {
+ // 1000 is set as the lowest
+ // priority, but it's arbitrary.
+ const prio = 1000
+
+ if cr.emojiOnly {
+ // Parse + render only emojis.
+ markdown.Parser().AddOptions(
+ parser.WithInlineParsers(
+ mdutil.Prioritized(new(emojiParser), prio),
+ ),
+ )
+ } else {
+ // Parse + render emojis, mentions, hashtags.
+ markdown.Parser().AddOptions(parser.WithInlineParsers(
+ mdutil.Prioritized(new(emojiParser), prio),
+ mdutil.Prioritized(new(mentionParser), prio),
+ mdutil.Prioritized(new(hashtagParser), prio),
+ ))
+ }
+
+ // Add this custom renderer.
+ markdown.Renderer().AddOptions(
+ renderer.WithNodeRenderers(
+ mdutil.Prioritized(cr, prio),
+ ),
+ )
+}
+
+/*
+ MENTION RENDERING STUFF
+*/
+
+// renderMention takes a mention
+// ast.Node and renders it as HTML.
+func (cr *customRenderer) renderMention(
+ w mdutil.BufWriter,
+ source []byte,
+ node ast.Node,
+ entering bool,
+) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkSkipChildren, nil
+ }
+
+ // This function is registered
+ // only for kindMention, and
+ // should not be called for
+ // any other node type.
+ n, ok := node.(*mention)
+ if !ok {
+ log.Panic(cr.ctx, "type assertion failed")
+ }
+
+ // Get raw mention string eg., '@someone@domain.org'.
+ text := string(n.Segment.Value(source))
+
+ // Handle mention and get text to render.
+ text = cr.handleMention(text)
+
+ // Write returned text into HTML.
+ if _, err := w.WriteString(text); err != nil {
+ // We don't have much recourse if this fails.
+ log.Errorf(cr.ctx, "error writing HTML: %s", err)
+ }
+
+ return ast.WalkSkipChildren, nil
+}
+
+// handleMention takes a string in the form '@username@domain.com'
+// or '@localusername', and does the following:
+//
+// - Parse the mention string into a *gtsmodel.Mention.
+// - Insert mention into database if necessary.
+// - Add mention to cr.results.Mentions slice.
+// - Return mention rendered as nice HTML.
+//
+// If the mention is invalid or cannot be created,
+// the unaltered input text will be returned instead.
+func (cr *customRenderer) handleMention(text string) string {
+ mention, err := cr.parseMention(cr.ctx, text, cr.accountID, cr.statusID)
+ if err != nil {
+ log.Errorf(cr.ctx, "error parsing mention %s from status: %s", text, err)
+ return text
+ }
+
+ if cr.statusID != "" {
+ if err := cr.db.PutMention(cr.ctx, mention); err != nil {
+ log.Errorf(cr.ctx, "error putting mention in db: %s", err)
+ return text
+ }
+ }
+
+ // Append mention to result if not done already.
+ //
+ // This prevents multiple occurences of mention
+ // in the same status generating multiple
+ // entries for the same mention in result.
+ func() {
+ for _, m := range cr.result.Mentions {
+ if mention.TargetAccountID == m.TargetAccountID {
+ // Already appended.
+ return
+ }
+ }
+
+ // Not appended yet.
+ cr.result.Mentions = append(cr.result.Mentions, mention)
+ }()
+
+ if mention.TargetAccount == nil {
+ // Fetch mention target account if not yet populated.
+ mention.TargetAccount, err = cr.db.GetAccountByID(
+ gtscontext.SetBarebones(cr.ctx),
+ mention.TargetAccountID,
+ )
+ if err != nil {
+ log.Errorf(cr.ctx, "error populating mention target account: %v", err)
+ return text
+ }
+ }
+
+ // Replace the mention with the formatted mention content,
+ // eg. `@someone@domain.org` becomes:
+ // `@someone`
+ var b strings.Builder
+ b.WriteString(`@`)
+ b.WriteString(mention.TargetAccount.Username)
+ b.WriteString(``)
+ return b.String()
+}
+
+/*
+ HASHTAG RENDERING STUFF
+*/
+
+// renderHashtag takes a hashtag
+// ast.Node and renders it as HTML.
+func (cr *customRenderer) renderHashtag(
+ w mdutil.BufWriter,
+ source []byte,
+ node ast.Node,
+ entering bool,
+) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkSkipChildren, nil
+ }
+
+ // This function is registered
+ // only for kindHashtag, and
+ // should not be called for
+ // any other node type.
+ n, ok := node.(*hashtag)
+ if !ok {
+ log.Panic(cr.ctx, "type assertion failed")
+ }
+
+ // Get raw hashtag string eg., '#SomeHashtag'.
+ text := string(n.Segment.Value(source))
+
+ // Handle hashtag and get text to render.
+ text = cr.handleHashtag(text)
+
+ // Write returned text into HTML.
+ if _, err := w.WriteString(text); err != nil {
+ // We don't have much recourse if this fails.
+ log.Errorf(cr.ctx, "error writing HTML: %s", err)
+ }
+
+ return ast.WalkSkipChildren, nil
+}
+
+// handleHashtag takes a string in the form '#SomeHashtag',
+// and does the following:
+//
+// - Normalize + validate the hashtag.
+// - Get or create hashtag in the db.
+// - Add hashtag to cr.results.Tags slice.
+// - Return hashtag rendered as nice HTML.
+//
+// If the hashtag is invalid or cannot be retrieved,
+// the unaltered input text will be returned instead.
+func (cr *customRenderer) handleHashtag(text string) string {
+ normalized, ok := NormalizeHashtag(text)
+ if !ok {
+ // Not a valid hashtag.
+ return text
+ }
+
+ getOrCreateHashtag := func(name string) (*gtsmodel.Tag, error) {
+ var (
+ tag *gtsmodel.Tag
+ err error
+ )
+
+ // Check if we have a tag with this name already.
+ tag, err = cr.db.GetTagByName(cr.ctx, name)
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ return nil, gtserror.Newf("db error getting tag %s: %w", name, err)
+ }
+
+ if tag != nil {
+ // We had it!
+ return tag, nil
+ }
+
+ // We didn't have a tag with
+ // this name, create one.
+ tag = >smodel.Tag{
+ ID: id.NewULID(),
+ Name: name,
+ }
+
+ if err = cr.db.PutTag(cr.ctx, tag); err != nil {
+ return nil, gtserror.Newf("db error putting new tag %s: %w", name, err)
+ }
+
+ return tag, nil
+ }
+
+ tag, err := getOrCreateHashtag(normalized)
+ if err != nil {
+ log.Errorf(cr.ctx, "error generating hashtags from status: %s", err)
+ return text
+ }
+
+ // Append tag to result if not done already.
+ //
+ // This prevents multiple uses of a tag in
+ // the same status generating multiple
+ // entries for the same tag in result.
+ func() {
+ for _, t := range cr.result.Tags {
+ if tag.ID == t.ID {
+ // Already appended.
+ return
+ }
+ }
+
+ // Not appended yet.
+ cr.result.Tags = append(cr.result.Tags, tag)
+ }()
+
+ // Replace tag with the formatted tag content, eg. `#SomeHashtag` becomes:
+ // `#SomeHashtag`
+ var b strings.Builder
+ b.WriteString(`#`)
+ b.WriteString(normalized)
+ b.WriteString(``)
+
+ return b.String()
+}
+
+/*
+ EMOJI RENDERING STUFF
+*/
+
+// renderEmoji doesn't actually turn an emoji
+// ast.Node into HTML, but instead only adds it to
+// the custom renderer results for later processing.
+func (cr *customRenderer) renderEmoji(
+ w mdutil.BufWriter,
+ source []byte,
+ node ast.Node,
+ entering bool,
+) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkSkipChildren, nil
+ }
+
+ // This function is registered
+ // only for kindEmoji, and
+ // should not be called for
+ // any other node type.
+ n, ok := node.(*emoji)
+ if !ok {
+ log.Panic(cr.ctx, "type assertion failed")
+ }
+
+ // Get raw emoji string eg., ':boobs:'.
+ text := string(n.Segment.Value(source))
+
+ // Handle emoji and get text to render.
+ text = cr.handleEmoji(text)
+
+ // Write returned text into HTML.
+ if _, err := w.WriteString(text); err != nil {
+ // We don't have much recourse if this fails.
+ log.Errorf(cr.ctx, "error writing HTML: %s", err)
+ }
+
+ return ast.WalkSkipChildren, nil
+}
+
+// handleEmoji takes a string in the form ':some_emoji:',
+// and does the following:
+//
+// - Try to get emoji from the db.
+// - Add emoji to cr.results.Emojis slice if found and useable.
+//
+// This function will always return the unaltered input
+// text, since emojification is handled elsewhere.
+func (cr *customRenderer) handleEmoji(text string) string {
+ // Check if text points to a valid
+ // local emoji by using its shortcode.
+ //
+ // The shortcode is the text
+ // between enclosing ':' chars.
+ shortcode := strings.Trim(text, ":")
+
+ // Try to fetch emoji as a locally stored emoji.
+ emoji, err := cr.db.GetEmojiByShortcodeDomain(cr.ctx, shortcode, "")
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ log.Errorf(nil, "db error getting local emoji with shortcode %s: %s", shortcode, err)
+ }
+
+ if emoji == nil {
+ // No emoji found for this
+ // shortcode, oh well!
+ return text
+ }
+
+ if *emoji.Disabled || !*emoji.VisibleInPicker {
+ // Emoji was found but not useable.
+ return text
+ }
+
+ // Emoji was found and useable.
+ // Append to result if not done already.
+ //
+ // This prevents multiple uses of an emoji
+ // in the same status generating multiple
+ // entries for the same emoji in result.
+ func() {
+ for _, e := range cr.result.Emojis {
+ if emoji.Shortcode == e.Shortcode {
+ // Already appended.
+ return
+ }
+ }
+
+ // Not appended yet.
+ cr.result.Emojis = append(cr.result.Emojis, emoji)
+ }()
+
+ return text
+}
diff --git a/internal/text/goldmark_extension.go b/internal/text/goldmark_extension.go
deleted file mode 100644
index a12c618dc..000000000
--- a/internal/text/goldmark_extension.go
+++ /dev/null
@@ -1,313 +0,0 @@
-// GoToSocial
-// Copyright (C) GoToSocial Authors admin@gotosocial.org
-// SPDX-License-Identifier: AGPL-3.0-or-later
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-package text
-
-import (
- "context"
- "fmt"
- "strings"
-
- "github.com/superseriousbusiness/gotosocial/internal/db"
- "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
- "github.com/superseriousbusiness/gotosocial/internal/log"
- "github.com/superseriousbusiness/gotosocial/internal/regexes"
- "github.com/superseriousbusiness/gotosocial/internal/util"
- "github.com/yuin/goldmark"
- "github.com/yuin/goldmark/ast"
- "github.com/yuin/goldmark/parser"
- "github.com/yuin/goldmark/renderer"
- "github.com/yuin/goldmark/text"
- mdutil "github.com/yuin/goldmark/util"
-)
-
-// A goldmark extension that parses potential mentions and hashtags separately from regular
-// text, so that they stay as one contiguous text fragment in the AST, and then renders
-// them separately too, to avoid scanning normal text for mentions and tags.
-
-// mention and hashtag fulfil the goldmark ast.Node interface.
-type mention struct {
- ast.BaseInline
- Segment text.Segment
-}
-
-type hashtag struct {
- ast.BaseInline
- Segment text.Segment
-}
-
-type emoji struct {
- ast.BaseInline
- Segment text.Segment
-}
-
-var (
- kindMention = ast.NewNodeKind("Mention")
- kindHashtag = ast.NewNodeKind("Hashtag")
- kindEmoji = ast.NewNodeKind("Emoji")
-)
-
-func (n *mention) Kind() ast.NodeKind {
- return kindMention
-}
-
-func (n *hashtag) Kind() ast.NodeKind {
- return kindHashtag
-}
-
-func (n *emoji) Kind() ast.NodeKind {
- return kindEmoji
-}
-
-// Dump can be used for debugging.
-func (n *mention) Dump(source []byte, level int) {
- fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
-}
-
-func (n *hashtag) Dump(source []byte, level int) {
- fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
-}
-
-func (n *emoji) Dump(source []byte, level int) {
- fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
-}
-
-// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
-// The contained segment is used in rendering.
-func newMention(s text.Segment) *mention {
- return &mention{
- BaseInline: ast.BaseInline{},
- Segment: s,
- }
-}
-
-func newHashtag(s text.Segment) *hashtag {
- return &hashtag{
- BaseInline: ast.BaseInline{},
- Segment: s,
- }
-}
-
-func newEmoji(s text.Segment) *emoji {
- return &emoji{
- BaseInline: ast.BaseInline{},
- Segment: s,
- }
-}
-
-// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
-type mentionParser struct{}
-
-type hashtagParser struct{}
-
-type emojiParser struct{}
-
-func (p *mentionParser) Trigger() []byte {
- return []byte{'@'}
-}
-
-func (p *hashtagParser) Trigger() []byte {
- return []byte{'#'}
-}
-
-func (p *emojiParser) Trigger() []byte {
- return []byte{':'}
-}
-
-func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
- before := block.PrecendingCharacter()
- line, segment := block.PeekLine()
-
- if !util.IsMentionOrHashtagBoundary(before) {
- return nil
- }
-
- // unideal for performance but makes use of existing regex
- loc := regexes.MentionFinder.FindIndex(line)
- switch {
- case loc == nil:
- fallthrough
- case loc[0] != 0: // fail if not found at start
- return nil
- default:
- block.Advance(loc[1])
- return newMention(segment.WithStop(segment.Start + loc[1]))
- }
-}
-
-func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
- before := block.PrecendingCharacter()
- line, segment := block.PeekLine()
- s := string(line)
-
- if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
- return nil
- }
-
- for i, r := range s {
- switch {
- case r == '#' && i == 0:
- // ignore initial #
- continue
- case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
- // Fake hashtag, don't trust it
- return nil
- case util.IsMentionOrHashtagBoundary(r):
- if i <= 1 {
- // empty
- return nil
- }
- // End of hashtag
- block.Advance(i)
- return newHashtag(segment.WithStop(segment.Start + i))
- }
- }
- // If we don't find invalid characters before the end of the line then it's all hashtag, babey
- block.Advance(segment.Len())
- return newHashtag(segment)
-}
-
-func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
- line, segment := block.PeekLine()
-
- // unideal for performance but makes use of existing regex
- loc := regexes.EmojiFinder.FindIndex(line)
- switch {
- case loc == nil:
- fallthrough
- case loc[0] != 0: // fail if not found at start
- return nil
- default:
- block.Advance(loc[1])
- return newEmoji(segment.WithStop(segment.Start + loc[1]))
- }
-}
-
-// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
-// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
-// fields are used to report tags and mentions to the caller for use as metadata.
-type customRenderer struct {
- f *formatter
- ctx context.Context
- parseMention gtsmodel.ParseMentionFunc
- accountID string
- statusID string
- emojiOnly bool
- result *FormatResult
-}
-
-func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
- reg.Register(kindMention, r.renderMention)
- reg.Register(kindHashtag, r.renderHashtag)
- reg.Register(kindEmoji, r.renderEmoji)
-}
-
-func (r *customRenderer) Extend(m goldmark.Markdown) {
- // 1000 is set as the lowest priority, but it's arbitrary
- m.Parser().AddOptions(parser.WithInlineParsers(
- mdutil.Prioritized(&emojiParser{}, 1000),
- ))
- if !r.emojiOnly {
- m.Parser().AddOptions(parser.WithInlineParsers(
- mdutil.Prioritized(&mentionParser{}, 1000),
- mdutil.Prioritized(&hashtagParser{}, 1000),
- ))
- }
- m.Renderer().AddOptions(renderer.WithNodeRenderers(
- mdutil.Prioritized(r, 1000),
- ))
-}
-
-// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
-func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
- if !entering {
- return ast.WalkSkipChildren, nil
- }
-
- n, ok := node.(*mention) // this function is only registered for kindMention
- if !ok {
- log.Panic(r.ctx, "type assertion failed")
- }
- text := string(n.Segment.Value(source))
-
- html := r.replaceMention(text)
-
- // we don't have much recourse if this fails
- if _, err := w.WriteString(html); err != nil {
- log.Errorf(r.ctx, "error writing HTML: %s", err)
- }
- return ast.WalkSkipChildren, nil
-}
-
-func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
- if !entering {
- return ast.WalkSkipChildren, nil
- }
-
- n, ok := node.(*hashtag) // this function is only registered for kindHashtag
- if !ok {
- log.Panic(r.ctx, "type assertion failed")
- }
- text := string(n.Segment.Value(source))
-
- html := r.replaceHashtag(text)
-
- _, err := w.WriteString(html)
- // we don't have much recourse if this fails
- if err != nil {
- log.Errorf(r.ctx, "error writing HTML: %s", err)
- }
- return ast.WalkSkipChildren, nil
-}
-
-// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
-func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
- if !entering {
- return ast.WalkSkipChildren, nil
- }
-
- n, ok := node.(*emoji) // this function is only registered for kindEmoji
- if !ok {
- log.Panic(r.ctx, "type assertion failed")
- }
- text := string(n.Segment.Value(source))
- shortcode := text[1 : len(text)-1]
-
- emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
- if err != nil {
- if err != db.ErrNoEntries {
- log.Errorf(nil, "error getting local emoji with shortcode %s: %s", shortcode, err)
- }
- } else if *emoji.VisibleInPicker && !*emoji.Disabled {
- listed := false
- for _, e := range r.result.Emojis {
- if e.Shortcode == emoji.Shortcode {
- listed = true
- break
- }
- }
- if !listed {
- r.result.Emojis = append(r.result.Emojis, emoji)
- }
- }
-
- // we don't have much recourse if this fails
- if _, err := w.WriteString(text); err != nil {
- log.Errorf(r.ctx, "error writing HTML: %s", err)
- }
- return ast.WalkSkipChildren, nil
-}
diff --git a/internal/text/goldmark_parsers.go b/internal/text/goldmark_parsers.go
new file mode 100644
index 000000000..b7cf4f9e9
--- /dev/null
+++ b/internal/text/goldmark_parsers.go
@@ -0,0 +1,281 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package text
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/superseriousbusiness/gotosocial/internal/regexes"
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/text"
+)
+
+/*
+ MENTION PARSER STUFF
+*/
+
+// mention fulfils the goldmark
+// ast.Node interface.
+type mention struct {
+ ast.BaseInline
+ Segment text.Segment
+}
+
+var kindMention = ast.NewNodeKind("Mention")
+
+func (n *mention) Kind() ast.NodeKind {
+ return kindMention
+}
+
+func (n *mention) Dump(source []byte, level int) {
+ fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
+}
+
+// newMention creates a goldmark ast.Node
+// from a text.Segment. The contained segment
+// is used in rendering.
+func newMention(s text.Segment) *mention {
+ return &mention{
+ BaseInline: ast.BaseInline{},
+ Segment: s,
+ }
+}
+
+// mentionParser fulfils the goldmark
+// parser.InlineParser interface.
+type mentionParser struct{}
+
+// Mention parsing is triggered by the `@` symbol
+// which appears at the beginning of a mention.
+func (p *mentionParser) Trigger() []byte {
+ return []byte{'@'}
+}
+
+func (p *mentionParser) Parse(
+ _ ast.Node,
+ block text.Reader,
+ _ parser.Context,
+) ast.Node {
+ // If preceding character is not a valid boundary
+ // character, then this cannot be a valid mention.
+ if !isMentionBoundary(block.PrecendingCharacter()) {
+ return nil
+ }
+
+ line, segment := block.PeekLine()
+
+ // Ascertain location of mention in the line
+ // that starts with the trigger character.
+ loc := regexes.MentionFinder.FindIndex(line)
+ if loc == nil || loc[0] != 0 {
+ // Noop if not found or
+ // not found at start.
+ return nil
+ }
+
+ // Advance the block to
+ // the end of the mention.
+ block.Advance(loc[1])
+
+ // mention ast.Node spans from the
+ // beginning of this segment up to
+ // the last character of the mention.
+ return newMention(
+ segment.WithStop(
+ segment.Start + loc[1],
+ ),
+ )
+}
+
+/*
+ HASHTAG PARSER STUFF
+*/
+
+// hashtag fulfils the goldmark
+// ast.Node interface.
+type hashtag struct {
+ ast.BaseInline
+ Segment text.Segment
+}
+
+var kindHashtag = ast.NewNodeKind("Hashtag")
+
+func (n *hashtag) Kind() ast.NodeKind {
+ return kindHashtag
+}
+
+func (n *hashtag) Dump(source []byte, level int) {
+ fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
+}
+
+// newHashtag creates a goldmark ast.Node
+// from a text.Segment. The contained segment
+// is used in rendering.
+func newHashtag(s text.Segment) *hashtag {
+ return &hashtag{
+ BaseInline: ast.BaseInline{},
+ Segment: s,
+ }
+}
+
+type hashtagParser struct{}
+
+// Hashtag parsing is triggered by a '#' symbol
+// which appears at the beginning of a hashtag.
+func (p *hashtagParser) Trigger() []byte {
+ return []byte{'#'}
+}
+
+func (p *hashtagParser) Parse(
+ _ ast.Node,
+ block text.Reader,
+ _ parser.Context,
+) ast.Node {
+ // If preceding character is not a valid boundary
+ // character, then this cannot be a valid hashtag.
+ if !isHashtagBoundary(block.PrecendingCharacter()) {
+ return nil
+ }
+
+ var (
+ line, segment = block.PeekLine()
+ lineStr = string(line)
+ lineStrLen = len(lineStr)
+ )
+
+ if lineStrLen <= 1 {
+ // This is probably just
+ // a lonely '#' char.
+ return nil
+ }
+
+ // Iterate through the runes in the detected
+ // hashtag string until we reach either:
+ // - A weird character (bad).
+ // - The end of the hashtag (ok).
+ // - The end of the string (also ok).
+ for i, r := range lineStr {
+ switch {
+ case r == '#' && i == 0:
+ // Ignore initial '#'.
+ continue
+
+ case !isPlausiblyInHashtag(r) &&
+ !isHashtagBoundary(r):
+ // Weird non-boundary character
+ // in the hashtag. Don't trust it.
+ return nil
+
+ case isHashtagBoundary(r):
+ // Reached closing hashtag
+ // boundary. Advance block
+ // to the end of the hashtag.
+ block.Advance(i)
+
+ // hashtag ast.Node spans from
+ // the beginning of this segment
+ // up to the boundary character.
+ return newHashtag(
+ segment.WithStop(
+ segment.Start + i,
+ ),
+ )
+ }
+ }
+
+ // No invalid or boundary characters before the
+ // end of the line: it's all hashtag, baby 😎
+ //
+ // Advance block to the end of the segment.
+ block.Advance(segment.Len())
+
+ // hashtag ast.Node spans
+ // the entire segment.
+ return newHashtag(segment)
+}
+
+/*
+ EMOJI PARSER STUFF
+*/
+
+// emoji fulfils the goldmark
+// ast.Node interface.
+type emoji struct {
+ ast.BaseInline
+ Segment text.Segment
+}
+
+var kindEmoji = ast.NewNodeKind("Emoji")
+
+func (n *emoji) Kind() ast.NodeKind {
+ return kindEmoji
+}
+
+func (n *emoji) Dump(source []byte, level int) {
+ fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
+}
+
+// newEmoji creates a goldmark ast.Node
+// from a text.Segment. The contained
+// segment is used in rendering.
+func newEmoji(s text.Segment) *emoji {
+ return &emoji{
+ BaseInline: ast.BaseInline{},
+ Segment: s,
+ }
+}
+
+type emojiParser struct{}
+
+// Emoji parsing is triggered by a ':' char
+// which appears at the start of the emoji.
+func (p *emojiParser) Trigger() []byte {
+ return []byte{':'}
+}
+
+func (p *emojiParser) Parse(
+ _ ast.Node,
+ block text.Reader,
+ _ parser.Context,
+) ast.Node {
+ line, segment := block.PeekLine()
+
+ // Ascertain location of emoji in the line
+ // that starts with the trigger character.
+ loc := regexes.EmojiFinder.FindIndex(line)
+ if loc == nil || loc[0] != 0 {
+ // Noop if not found or
+ // not found at start.
+ return nil
+ }
+
+ // Advance the block to
+ // the end of the emoji.
+ block.Advance(loc[1])
+
+ // emoji ast.Node spans from the
+ // beginning of this segment up to
+ // the last character of the emoji.
+ return newEmoji(
+ segment.WithStop(
+ segment.Start + loc[1],
+ ),
+ )
+}
diff --git a/internal/text/goldmark_plaintext.go b/internal/text/goldmark_plaintext.go
index 635fdfc33..a27328317 100644
--- a/internal/text/goldmark_plaintext.go
+++ b/internal/text/goldmark_plaintext.go
@@ -26,7 +26,7 @@ import (
// plaintextParser implements goldmark.parser.BlockParser
type plaintextParser struct{}
-var defaultPlaintextParser = &plaintextParser{}
+var defaultPlaintextParser = new(plaintextParser)
func newPlaintextParser() parser.BlockParser {
return defaultPlaintextParser
@@ -64,7 +64,7 @@ func (b *plaintextParser) CanAcceptIndentedLine() bool {
// plaintextParserNoParagraph implements goldmark.parser.BlockParser
type plaintextParserNoParagraph struct{}
-var defaultPlaintextParserNoParagraph = &plaintextParserNoParagraph{}
+var defaultPlaintextParserNoParagraph = new(plaintextParserNoParagraph)
func newPlaintextParserNoParagraph() parser.BlockParser {
return defaultPlaintextParserNoParagraph
diff --git a/internal/text/markdown.go b/internal/text/markdown.go
index ecc49673b..6fc1bd2f0 100644
--- a/internal/text/markdown.go
+++ b/internal/text/markdown.go
@@ -28,38 +28,55 @@ import (
"github.com/yuin/goldmark/renderer/html"
)
-func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, markdownText string) *FormatResult {
- result := &FormatResult{
- Mentions: []*gtsmodel.Mention{},
- Tags: []*gtsmodel.Tag{},
- Emojis: []*gtsmodel.Emoji{},
- }
+// FromMarkdown fulfils FormatFunc by parsing
+// the given markdown input into a FormatResult.
+func (f *Formatter) FromMarkdown(
+ ctx context.Context,
+ parseMention gtsmodel.ParseMentionFunc,
+ authorID string,
+ statusID string,
+ input string,
+) *FormatResult {
+ result := new(FormatResult)
- // parse markdown text into html, using custom renderer to add hashtag/mention links
+ // Instantiate goldmark parser for
+ // markdown, using custom renderer
+ // to add hashtag/mention links.
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithHardWraps(),
- html.WithUnsafe(), // allows raw HTML
+ // Allows raw HTML. We sanitize
+ // at the end so this is OK.
+ html.WithUnsafe(),
),
goldmark.WithExtensions(
- &customRenderer{f, ctx, pmf, authorID, statusID, false, result},
- extension.Linkify, // turns URLs into links
+ &customRenderer{
+ ctx,
+ f.db,
+ parseMention,
+ authorID,
+ statusID,
+ false, // emojiOnly = false.
+ result,
+ },
+ extension.Linkify, // Turns URLs into links.
extension.Strikethrough,
),
)
- var htmlContentBytes bytes.Buffer
- err := md.Convert([]byte(markdownText), &htmlContentBytes)
- if err != nil {
- log.Errorf(ctx, "error formatting markdown to HTML: %s", err)
+ // Parse input into HTML.
+ var htmlBytes bytes.Buffer
+ if err := md.Convert(
+ []byte(input),
+ &htmlBytes,
+ ); err != nil {
+ log.Errorf(ctx, "error formatting markdown input to HTML: %s", err)
}
- result.HTML = htmlContentBytes.String()
- // clean anything dangerous out of the HTML
+ // Clean and shrink HTML.
+ result.HTML = htmlBytes.String()
result.HTML = SanitizeToHTML(result.HTML)
-
- // shrink ray
result.HTML = MinifyHTML(result.HTML)
return result
diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go
index cc466df6c..98ed3a96b 100644
--- a/internal/text/markdown_test.go
+++ b/internal/text/markdown_test.go
@@ -76,10 +76,16 @@ const (
mdWithLinkExpected = "
Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial
"
mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps"
mdObjectInCodeBlockExpected = "@foss_satan this is how to mention a user
@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n
hope that helps
"
- mdItalicHashtag = "_#hashtag_"
- mdItalicHashtagExpected = "#hashtag
"
- mdItalicHashtags = "_#hashtag #hashtag #hashtag_"
- mdItalicHashtagsExpected = "#hashtag #hashtag #hashtag
"
+ // Hashtags can be italicized but only with *, not _.
+ mdItalicHashtag = "*#hashtag*"
+ mdItalicHashtagExpected = "#hashtag
"
+ mdItalicHashtags = "*#hashtag #hashtag #hashtag*"
+ mdItalicHashtagsExpected = "#hashtag #hashtag #hashtag
"
+ // Hashtags can end with or contain _ but not start with it.
+ mdUnderscorePrefixHashtag = "_#hashtag"
+ mdUnderscorePrefixHashtagExpected = "_#hashtag
"
+ mdUnderscoreSuffixHashtag = "#hashtag_"
+ mdUnderscoreSuffixHashtagExpected = "#hashtag_
"
// BEWARE: sneaky unicode business going on.
// the first ö is one rune, the second ö is an o with a combining diacritic.
mdUnnormalizedHashtag = "#hellöthere #hellöthere"
@@ -194,6 +200,19 @@ func (suite *MarkdownTestSuite) TestParseItalicHashtags() {
suite.Equal(mdItalicHashtagsExpected, formatted.HTML)
}
+func (suite *MarkdownTestSuite) TestParseHashtagUnderscorePrefix() {
+ formatted := suite.FromMarkdown(mdUnderscorePrefixHashtag)
+ suite.Equal(mdUnderscorePrefixHashtagExpected, formatted.HTML)
+ suite.Empty(formatted.Tags)
+}
+
+func (suite *MarkdownTestSuite) TestParseHashtagUnderscoreSuffix() {
+ formatted := suite.FromMarkdown(mdUnderscoreSuffixHashtag)
+ suite.Equal(mdUnderscoreSuffixHashtagExpected, formatted.HTML)
+ suite.NotEmpty(formatted.Tags)
+ suite.Equal("hashtag_", formatted.Tags[0].Name)
+}
+
func (suite *MarkdownTestSuite) TestParseUnnormalizedHashtag() {
formatted := suite.FromMarkdown(mdUnnormalizedHashtag)
suite.Equal(mdUnnormalizedHashtagExpected, formatted.HTML)
diff --git a/internal/text/normalize.go b/internal/text/normalize.go
index 14caf6311..d2e633d1e 100644
--- a/internal/text/normalize.go
+++ b/internal/text/normalize.go
@@ -20,7 +20,6 @@ package text
import (
"strings"
- "github.com/superseriousbusiness/gotosocial/internal/util"
"golang.org/x/text/unicode/norm"
)
@@ -36,8 +35,10 @@ const (
//
// Finally, it will do a check on the normalized string to
// ensure that it's below maximumHashtagLength chars, and
-// contains only unicode letters and numbers. If this passes,
-// returned bool will be true.
+// contains only letters, numbers, and underscores (and not
+// *JUST* underscores).
+//
+// If all this passes, returned bool will be true.
func NormalizeHashtag(text string) (string, bool) {
// This normalization is specifically to avoid cases
// where visually-identical hashtags are stored with
@@ -47,14 +48,31 @@ func NormalizeHashtag(text string) (string, bool) {
// with parent characters to form regular letter symbols.
normalized := norm.NFC.String(strings.TrimPrefix(text, "#"))
- // Validate normalized.
- ok := true
+ // Validate normalized result.
+ var (
+ notJustUnderscores = false
+ onlyPermittedChars = true
+ lengthOK = true
+ )
+
for i, r := range normalized {
- if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) {
- ok = false
+ if r != '_' {
+ // This isn't an underscore,
+ // so the whole hashtag isn't
+ // just underscores.
+ notJustUnderscores = true
+ }
+
+ if i >= maximumHashtagLength {
+ lengthOK = false
+ break
+ }
+
+ if !isPermittedInHashtag(r) {
+ onlyPermittedChars = false
break
}
}
- return normalized, ok
+ return normalized, (lengthOK && onlyPermittedChars && notJustUnderscores)
}
diff --git a/internal/text/plain.go b/internal/text/plain.go
index 330ebfb15..1456fd016 100644
--- a/internal/text/plain.go
+++ b/internal/text/plain.go
@@ -30,66 +30,150 @@ import (
"github.com/yuin/goldmark/util"
)
-func (f *formatter) fromPlain(
+// FromPlain fulfils FormatFunc by parsing
+// the given plaintext input into a FormatResult.
+func (f *Formatter) FromPlain(
ctx context.Context,
- ptParser parser.Parser,
- pmf gtsmodel.ParseMentionFunc,
+ parseMention gtsmodel.ParseMentionFunc,
authorID string,
statusID string,
- plain string,
+ input string,
) *FormatResult {
- result := &FormatResult{
- Mentions: []*gtsmodel.Mention{},
- Tags: []*gtsmodel.Tag{},
- Emojis: []*gtsmodel.Emoji{},
- }
-
- // Parse markdown into html, using custom renderer
- // to add hashtag/mention links and emoji images.
- md := goldmark.New(
- goldmark.WithRendererOptions(
- html.WithXHTML(),
- html.WithHardWraps(),
- ),
- goldmark.WithParser(ptParser), // use parser we were passed
- goldmark.WithExtensions(
- &customRenderer{f, ctx, pmf, authorID, statusID, false, result},
- extension.Linkify, // turns URLs into links
- ),
- )
-
- var htmlContentBytes bytes.Buffer
- if err := md.Convert([]byte(plain), &htmlContentBytes); err != nil {
- log.Errorf(ctx, "error formatting plaintext to HTML: %s", err)
- }
- result.HTML = htmlContentBytes.String()
-
- // Clean anything dangerous out of resulting HTML.
- result.HTML = SanitizeToHTML(result.HTML)
-
- // Shrink ray!
- result.HTML = MinifyHTML(result.HTML)
-
- return result
-}
-
-func (f *formatter) FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
- ptParser := parser.NewParser(
+ // Initialize standard block parser
+ // that wraps result in tags.
+ plainTextParser := parser.NewParser(
parser.WithBlockParsers(
util.Prioritized(newPlaintextParser(), 500),
),
)
- return f.fromPlain(ctx, ptParser, pmf, authorID, statusID, plain)
+ return f.fromPlain(
+ ctx,
+ plainTextParser,
+ false, // emojiOnly = false
+ parseMention,
+ authorID,
+ statusID,
+ input,
+ )
}
-func (f *formatter) FromPlainNoParagraph(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
- ptParser := parser.NewParser(
+// FromPlainNoParagraph fulfils FormatFunc by parsing
+// the given plaintext input into a FormatResult.
+//
+// Unlike FromPlain, it will not wrap the resulting
+// HTML in
tags, making it useful for parsing
+// short fragments of text that oughtn't be formally
+// wrapped as a paragraph.
+func (f *Formatter) FromPlainNoParagraph(
+ ctx context.Context,
+ parseMention gtsmodel.ParseMentionFunc,
+ authorID string,
+ statusID string,
+ input string,
+) *FormatResult {
+ // Initialize block parser that
+ // doesn't wrap result in
tags.
+ plainTextParser := parser.NewParser(
parser.WithBlockParsers(
- // Initialize block parser that doesn't wrap in
tags.
util.Prioritized(newPlaintextParserNoParagraph(), 500),
),
)
- return f.fromPlain(ctx, ptParser, pmf, authorID, statusID, plain)
+ return f.fromPlain(
+ ctx,
+ plainTextParser,
+ false, // emojiOnly = false
+ parseMention,
+ authorID,
+ statusID,
+ input,
+ )
+}
+
+// FromPlainEmojiOnly fulfils FormatFunc by parsing
+// the given plaintext input into a FormatResult.
+//
+// Unlike FromPlain, it will only parse emojis with
+// the custom renderer, leaving aside mentions and tags.
+func (f *Formatter) FromPlainEmojiOnly(
+ ctx context.Context,
+ parseMention gtsmodel.ParseMentionFunc,
+ authorID string,
+ statusID string,
+ input string,
+) *FormatResult {
+ // Initialize standard block parser
+ // that wraps result in
tags.
+ plainTextParser := parser.NewParser(
+ parser.WithBlockParsers(
+ util.Prioritized(newPlaintextParser(), 500),
+ ),
+ )
+
+ return f.fromPlain(
+ ctx,
+ plainTextParser,
+ true, // emojiOnly = true
+ parseMention,
+ authorID,
+ statusID,
+ input,
+ )
+}
+
+// fromPlain parses the given input text
+// using the given plainTextParser, and
+// returns the result.
+func (f *Formatter) fromPlain(
+ ctx context.Context,
+ plainTextParser parser.Parser,
+ emojiOnly bool,
+ parseMention gtsmodel.ParseMentionFunc,
+ authorID string,
+ statusID string,
+ input string,
+) *FormatResult {
+ result := new(FormatResult)
+
+ // Instantiate goldmark parser for
+ // plaintext, using custom renderer
+ // to add hashtag/mention links.
+ md := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithXHTML(),
+ html.WithHardWraps(),
+ ),
+ // Use whichever plaintext
+ // parser we were passed.
+ goldmark.WithParser(plainTextParser),
+ goldmark.WithExtensions(
+ &customRenderer{
+ ctx,
+ f.db,
+ parseMention,
+ authorID,
+ statusID,
+ emojiOnly,
+ result,
+ },
+ extension.Linkify, // Turns URLs into links.
+ ),
+ )
+
+ // Parse input into HTML.
+ var htmlBytes bytes.Buffer
+ if err := md.Convert(
+ []byte(input),
+ &htmlBytes,
+ ); err != nil {
+ log.Errorf(ctx, "error formatting plaintext input to HTML: %s", err)
+ }
+
+ // Clean and shrink HTML.
+ result.HTML = htmlBytes.String()
+ result.HTML = SanitizeToHTML(result.HTML)
+ result.HTML = MinifyHTML(result.HTML)
+
+ return result
}
diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go
index dfcf8b953..43cc588c5 100644
--- a/internal/text/plain_test.go
+++ b/internal/text/plain_test.go
@@ -20,7 +20,6 @@ package text_test
import (
"testing"
- "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
)
@@ -85,7 +84,7 @@ that link shouldn't come out formatted as a mention!`
func (suite *PlainTestSuite) TestDeriveMentionsEmpty() {
statusText := ``
menchies := suite.FromPlain(statusText).Mentions
- assert.Len(suite.T(), menchies, 0)
+ suite.Len(menchies, 0)
}
func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
@@ -98,7 +97,9 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
here's a link with a fragment: https://example.org/whatever#ahhh
here's another link with a fragment: https://example.org/whatever/#ahhh
-(#ThisShouldAlsoWork) #this_should_be_split
+(#ThisShouldAlsoWork) #this_should_not_be_split
+
+#__ <- just underscores, shouldn't work
#111111 thisalsoshouldn'twork#### ##
@@ -108,24 +109,24 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
`
tags := suite.FromPlain(statusText).Tags
- assert.Len(suite.T(), tags, 13)
- assert.Equal(suite.T(), "testing123", tags[0].Name)
- assert.Equal(suite.T(), "also", tags[1].Name)
- assert.Equal(suite.T(), "thisshouldwork", tags[2].Name)
- assert.Equal(suite.T(), "dupe", tags[3].Name)
- assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4].Name)
- assert.Equal(suite.T(), "this", tags[5].Name)
- assert.Equal(suite.T(), "111111", tags[6].Name)
- assert.Equal(suite.T(), "alimentación", tags[7].Name)
- assert.Equal(suite.T(), "saúde", tags[8].Name)
- assert.Equal(suite.T(), "lävistää", tags[9].Name)
- assert.Equal(suite.T(), "ö", tags[10].Name)
- assert.Equal(suite.T(), "네", tags[11].Name)
- assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[12].Name)
+ suite.Len(tags, 13)
+ suite.Equal("testing123", tags[0].Name)
+ suite.Equal("also", tags[1].Name)
+ suite.Equal("thisshouldwork", tags[2].Name)
+ suite.Equal("dupe", tags[3].Name)
+ suite.Equal("ThisShouldAlsoWork", tags[4].Name)
+ suite.Equal("this_should_not_be_split", tags[5].Name)
+ suite.Equal("111111", tags[6].Name)
+ suite.Equal("alimentación", tags[7].Name)
+ suite.Equal("saúde", tags[8].Name)
+ suite.Equal("lävistää", tags[9].Name)
+ suite.Equal("ö", tags[10].Name)
+ suite.Equal("네", tags[11].Name)
+ suite.Equal("ThisOneIsThirteyCharactersLong", tags[12].Name)
statusText = `#올빼미 hej`
tags = suite.FromPlain(statusText).Tags
- assert.Equal(suite.T(), "올빼미", tags[0].Name)
+ suite.Equal("올빼미", tags[0].Name)
}
func (suite *PlainTestSuite) TestDeriveMultiple() {
@@ -137,20 +138,20 @@ func (suite *PlainTestSuite) TestDeriveMultiple() {
f := suite.FromPlain(statusText)
- assert.Len(suite.T(), f.Mentions, 1)
- assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString)
+ suite.Len(f.Mentions, 1)
+ suite.Equal("@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString)
- assert.Len(suite.T(), f.Tags, 1)
- assert.Equal(suite.T(), "hashtag", f.Tags[0].Name)
+ suite.Len(f.Tags, 1)
+ suite.Equal("hashtag", f.Tags[0].Name)
- assert.Len(suite.T(), f.Emojis, 0)
+ suite.Len(f.Emojis, 0)
}
func (suite *PlainTestSuite) TestZalgoHashtag() {
statusText := `yo who else loves #praying to #z̸͉̅a̸͚͋l̵͈̊g̸̫͌ỏ̷̪?`
f := suite.FromPlain(statusText)
- assert.Len(suite.T(), f.Tags, 1)
- assert.Equal(suite.T(), "praying", f.Tags[0].Name)
+ suite.Len(f.Tags, 1)
+ suite.Equal("praying", f.Tags[0].Name)
}
func TestPlainTestSuite(t *testing.T) {
diff --git a/internal/text/replace.go b/internal/text/replace.go
deleted file mode 100644
index db72aaf1d..000000000
--- a/internal/text/replace.go
+++ /dev/null
@@ -1,161 +0,0 @@
-// GoToSocial
-// Copyright (C) GoToSocial Authors admin@gotosocial.org
-// SPDX-License-Identifier: AGPL-3.0-or-later
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-package text
-
-import (
- "errors"
- "strings"
-
- "github.com/superseriousbusiness/gotosocial/internal/db"
- "github.com/superseriousbusiness/gotosocial/internal/gtscontext"
- "github.com/superseriousbusiness/gotosocial/internal/gtserror"
- "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
- "github.com/superseriousbusiness/gotosocial/internal/id"
- "github.com/superseriousbusiness/gotosocial/internal/log"
- "github.com/superseriousbusiness/gotosocial/internal/uris"
-)
-
-// replaceMention takes a string in the form @username@domain.com or @localusername
-func (r *customRenderer) replaceMention(text string) string {
- mention, err := r.parseMention(r.ctx, text, r.accountID, r.statusID)
- if err != nil {
- log.Errorf(r.ctx, "error parsing mention %s from status: %s", text, err)
- return text
- }
-
- if r.statusID != "" {
- if err := r.f.db.PutMention(r.ctx, mention); err != nil {
- log.Errorf(r.ctx, "error putting mention in db: %s", err)
- return text
- }
- }
-
- // only append if it's not been listed yet
- listed := false
- for _, m := range r.result.Mentions {
- if mention.ID == m.ID {
- listed = true
- break
- }
- }
- if !listed {
- r.result.Mentions = append(r.result.Mentions, mention)
- }
-
- if mention.TargetAccount == nil {
- // Fetch mention target account if not yet populated.
- mention.TargetAccount, err = r.f.db.GetAccountByID(
- gtscontext.SetBarebones(r.ctx),
- mention.TargetAccountID,
- )
- if err != nil {
- log.Errorf(r.ctx, "error populating mention target account: %v", err)
- return text
- }
- }
-
- // The mention's target is our target
- targetAccount := mention.TargetAccount
-
- var b strings.Builder
-
- // replace the mention with the formatted mention content
- // @targetAccount.Username
- b.WriteString(`@`)
- b.WriteString(targetAccount.Username)
- b.WriteString(``)
- return b.String()
-}
-
-// replaceHashtag takes a string in the form #SomeHashtag, and will normalize
-// it before adding it to the db (or just getting it from the db if it already
-// exists) and turning it into HTML.
-func (r *customRenderer) replaceHashtag(text string) string {
- normalized, ok := NormalizeHashtag(text)
- if !ok {
- // Not a valid hashtag.
- return text
- }
-
- tag, err := r.getOrCreateHashtag(normalized)
- if err != nil {
- log.Errorf(r.ctx, "error generating hashtags from status: %s", err)
- return text
- }
-
- // Append tag to result if not done already.
- //
- // This prevents multiple uses of a tag in
- // the same status generating multiple
- // entries for the same tag in result.
- func() {
- for _, t := range r.result.Tags {
- if tag.ID == t.ID {
- // Already appended.
- return
- }
- }
-
- // Not appended yet.
- r.result.Tags = append(r.result.Tags, tag)
- }()
-
- // Replace tag with the formatted tag content, eg. `#SomeHashtag` becomes:
- // `#SomeHashtag`
- var b strings.Builder
- b.WriteString(`#`)
- b.WriteString(normalized)
- b.WriteString(``)
-
- return b.String()
-}
-
-func (r *customRenderer) getOrCreateHashtag(name string) (*gtsmodel.Tag, error) {
- var (
- tag *gtsmodel.Tag
- err error
- )
-
- // Check if we have a tag with this name already.
- tag, err = r.f.db.GetTagByName(r.ctx, name)
- if err != nil && !errors.Is(err, db.ErrNoEntries) {
- return nil, gtserror.Newf("db error getting tag %s: %w", name, err)
- }
-
- if tag != nil {
- // We had it!
- return tag, nil
- }
-
- // We didn't have a tag with
- // this name, create one.
- tag = >smodel.Tag{
- ID: id.NewULID(),
- Name: name,
- }
-
- if err = r.f.db.PutTag(r.ctx, tag); err != nil {
- return nil, gtserror.Newf("db error putting new tag %s: %w", name, err)
- }
-
- return tag, nil
-}
diff --git a/internal/text/util.go b/internal/text/util.go
new file mode 100644
index 000000000..204c64838
--- /dev/null
+++ b/internal/text/util.go
@@ -0,0 +1,51 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package text
+
+import "unicode"
+
+func isPlausiblyInHashtag(r rune) bool {
+ // Marks are allowed during parsing
+ // prior to normalization, but not after,
+ // since they may be combined into letters
+ // during normalization.
+ return unicode.IsMark(r) ||
+ isPermittedInHashtag(r)
+}
+
+func isPermittedInHashtag(r rune) bool {
+ return unicode.IsLetter(r) ||
+ unicode.IsNumber(r) ||
+ r == '_'
+}
+
+// isHashtagBoundary returns true if rune r
+// is a recognized break character for before
+// or after a #hashtag.
+func isHashtagBoundary(r rune) bool {
+ return unicode.IsSpace(r) ||
+ (unicode.IsPunct(r) && r != '_')
+}
+
+// isMentionBoundary returns true if rune r
+// is a recognized break character for before
+// or after a @mention.
+func isMentionBoundary(r rune) bool {
+ return unicode.IsSpace(r) ||
+ unicode.IsPunct(r)
+}
diff --git a/internal/util/statustools.go b/internal/util/statustools.go
deleted file mode 100644
index c56cf84ce..000000000
--- a/internal/util/statustools.go
+++ /dev/null
@@ -1,37 +0,0 @@
-// GoToSocial
-// Copyright (C) GoToSocial Authors admin@gotosocial.org
-// SPDX-License-Identifier: AGPL-3.0-or-later
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
-package util
-
-import (
- "unicode"
-)
-
-func IsPlausiblyInHashtag(r rune) bool {
- // Marks are allowed during parsing, prior to normalization, but not after,
- // since they may be combined into letters during normalization.
- return unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsMark(r)
-}
-
-func IsPermittedInHashtag(r rune) bool {
- return unicode.IsLetter(r) || unicode.IsNumber(r)
-}
-
-// Decides where to break before or after a #hashtag or @mention
-func IsMentionOrHashtagBoundary(r rune) bool {
- return unicode.IsSpace(r) || unicode.IsPunct(r)
-}