writefreely/postrender.go
Matt Baer 64772aa203 Loosen restrictions on rendered img alt attribute
Previously, certain characters weren't allowed, and they would cause the entire
alt attribute to be elided from the rendered page. Since we safely sanitize the
content of this attribute anyway, this is unnecessary, so we now allow all text
entered there.

Fixes #649
2023-03-14 15:13:03 -04:00

363 lines
12 KiB
Go

/*
* Copyright © 2018-2021 Musing Studio LLC.
*
* This file is part of WriteFreely.
*
* WriteFreely is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, included
* in the LICENSE file in this source code package.
*/
package writefreely
import (
"bytes"
"encoding/json"
"fmt"
"html"
"html/template"
"net/http"
"net/url"
"regexp"
"strings"
"unicode"
"unicode/utf8"
"github.com/microcosm-cc/bluemonday"
stripmd "github.com/writeas/go-strip-markdown/v2"
"github.com/writeas/impart"
blackfriday "github.com/writeas/saturday"
"github.com/writeas/web-core/log"
"github.com/writeas/web-core/stringmanip"
"github.com/writefreely/writefreely/config"
"github.com/writefreely/writefreely/parse"
)
var (
blockReg = regexp.MustCompile("<(ul|ol|blockquote)>\n")
endBlockReg = regexp.MustCompile("</([a-z]+)>\n</(ul|ol|blockquote)>")
youtubeReg = regexp.MustCompile("(https?://www.youtube.com/embed/[a-zA-Z0-9\\-_]+)(\\?[^\t\n\f\r \"']+)?")
titleElementReg = regexp.MustCompile("</?h[1-6]>")
hashtagReg = regexp.MustCompile(`{{\[\[\|\|([^|]+)\|\|\]\]}}`)
markeddownReg = regexp.MustCompile("<p>(.+)</p>")
mentionReg = regexp.MustCompile(`@([A-Za-z0-9._%+-]+)(@[A-Za-z0-9.-]+\.[A-Za-z]+)\b`)
)
func (p *Post) handlePremiumContent(c *Collection, isOwner, postPage bool, cfg *config.Config) {
if c.Monetization != "" {
// User has Web Monetization enabled, so split content if it exists
spl := strings.Index(p.Content, shortCodePaid)
p.IsPaid = spl > -1
if postPage {
// We're viewing the individual post
if isOwner {
p.Content = strings.Replace(p.Content, shortCodePaid, "\n\n"+`<p class="split">Your subscriber content begins here.</p>`+"\n\n", 1)
} else {
if spl > -1 {
p.Content = p.Content[:spl+len(shortCodePaid)]
p.Content = strings.Replace(p.Content, shortCodePaid, "\n\n"+`<p class="split">Continue reading with a <strong>Coil</strong> membership.</p>`+"\n\n", 1)
}
}
} else {
// We've viewing the post on the collection landing
if spl > -1 {
baseURL := c.CanonicalURL()
if isOwner {
baseURL = "/" + c.Alias + "/"
}
p.Content = p.Content[:spl+len(shortCodePaid)]
p.HTMLExcerpt = template.HTML(applyMarkdown([]byte(p.Content[:spl]), baseURL, cfg))
}
}
}
}
func (p *Post) formatContent(cfg *config.Config, c *Collection, isOwner bool, isPostPage bool) {
baseURL := c.CanonicalURL()
// TODO: redundant
if !isSingleUser {
baseURL = "/" + c.Alias + "/"
}
p.handlePremiumContent(c, isOwner, isPostPage, cfg)
p.Content = strings.Replace(p.Content, "&lt;!--paid-->", "<!--paid-->", 1)
p.HTMLTitle = template.HTML(applyBasicMarkdown([]byte(p.Title.String)))
p.HTMLContent = template.HTML(applyMarkdown([]byte(p.Content), baseURL, cfg))
if exc := strings.Index(string(p.Content), "<!--more-->"); exc > -1 {
p.HTMLExcerpt = template.HTML(applyMarkdown([]byte(p.Content[:exc]), baseURL, cfg))
}
}
func (p *PublicPost) formatContent(cfg *config.Config, isOwner bool, isPostPage bool) {
p.Post.formatContent(cfg, &p.Collection.Collection, isOwner, isPostPage)
}
func (p *Post) augmentContent(c *Collection) {
if p.PinnedPosition.Valid {
// Don't augment posts that are pinned
return
}
if strings.Index(p.Content, "<!--nosig-->") > -1 {
// Don't augment posts with the special "nosig" shortcode
return
}
// Add post signatures
if c.Signature != "" {
p.Content += "\n\n" + c.Signature
}
}
func (p *PublicPost) augmentContent() {
p.Post.augmentContent(&p.Collection.Collection)
}
func (p *PublicPost) augmentReadingDestination() {
if p.IsPaid {
p.HTMLContent += template.HTML("\n\n" + `<p><a class="read-more" href="` + p.Collection.CanonicalURL() + p.Slug.String + `">` + localStr("Read more...", p.Language.String) + `</a> ($)</p>`)
}
}
func applyMarkdown(data []byte, baseURL string, cfg *config.Config) string {
return applyMarkdownSpecial(data, false, baseURL, cfg)
}
func disableYoutubeAutoplay(outHTML string) string {
for _, match := range youtubeReg.FindAllString(outHTML, -1) {
u, err := url.Parse(match)
if err != nil {
continue
}
u.RawQuery = html.UnescapeString(u.RawQuery)
q := u.Query()
// Set Youtube autoplay url parameter, if any, to 0
if len(q["autoplay"]) == 1 {
q.Set("autoplay", "0")
}
u.RawQuery = q.Encode()
cleanURL := u.String()
outHTML = strings.Replace(outHTML, match, cleanURL, 1)
}
return outHTML
}
func applyMarkdownSpecial(data []byte, skipNoFollow bool, baseURL string, cfg *config.Config) string {
mdExtensions := 0 |
blackfriday.EXTENSION_TABLES |
blackfriday.EXTENSION_FENCED_CODE |
blackfriday.EXTENSION_AUTOLINK |
blackfriday.EXTENSION_STRIKETHROUGH |
blackfriday.EXTENSION_SPACE_HEADERS |
blackfriday.EXTENSION_AUTO_HEADER_IDS
htmlFlags := 0 |
blackfriday.HTML_USE_SMARTYPANTS |
blackfriday.HTML_SMARTYPANTS_DASHES
if baseURL != "" {
htmlFlags |= blackfriday.HTML_HASHTAGS
}
// Generate Markdown
md := blackfriday.Markdown([]byte(data), blackfriday.HtmlRenderer(htmlFlags, "", ""), mdExtensions)
if baseURL != "" {
// Replace special text generated by Markdown parser
tagPrefix := baseURL + "tag:"
if cfg.App.Chorus {
tagPrefix = "/read/t/"
}
md = []byte(hashtagReg.ReplaceAll(md, []byte("<a href=\""+tagPrefix+"$1\" class=\"hashtag\"><span>#</span><span class=\"p-category\">$1</span></a>")))
handlePrefix := cfg.App.Host + "/@/"
md = []byte(mentionReg.ReplaceAll(md, []byte("<a href=\""+handlePrefix+"$1$2\" class=\"u-url mention\">@<span>$1$2</span></a>")))
}
// Strip out bad HTML
policy := getSanitizationPolicy()
policy.RequireNoFollowOnLinks(!skipNoFollow)
outHTML := string(policy.SanitizeBytes(md))
// Strip newlines on certain block elements that render with them
outHTML = blockReg.ReplaceAllString(outHTML, "<$1>")
outHTML = endBlockReg.ReplaceAllString(outHTML, "</$1></$2>")
outHTML = disableYoutubeAutoplay(outHTML)
return outHTML
}
func applyBasicMarkdown(data []byte) string {
if len(bytes.TrimSpace(data)) == 0 {
return ""
}
mdExtensions := 0 |
blackfriday.EXTENSION_STRIKETHROUGH |
blackfriday.EXTENSION_SPACE_HEADERS |
blackfriday.EXTENSION_HEADER_IDS
htmlFlags := 0 |
blackfriday.HTML_SKIP_HTML |
blackfriday.HTML_USE_SMARTYPANTS |
blackfriday.HTML_SMARTYPANTS_DASHES
// Generate Markdown
// This passes the supplied title into blackfriday.Markdown() as an H1 header, so we only render HTML that
// belongs in an H1.
md := blackfriday.Markdown(append([]byte("# "), data...), blackfriday.HtmlRenderer(htmlFlags, "", ""), mdExtensions)
// Remove H1 markup
md = bytes.TrimSpace(md) // blackfriday.Markdown adds a newline at the end of the <h1>
if len(md) == 0 {
return ""
}
md = md[len("<h1>") : len(md)-len("</h1>")]
// Strip out bad HTML
policy := bluemonday.UGCPolicy()
policy.AllowAttrs("class", "id").Globally()
outHTML := string(policy.SanitizeBytes(md))
outHTML = markeddownReg.ReplaceAllString(outHTML, "$1")
outHTML = strings.TrimRightFunc(outHTML, unicode.IsSpace)
return outHTML
}
func postTitle(content, friendlyId string) string {
const maxTitleLen = 80
content = stripHTMLWithoutEscaping(content)
content = strings.TrimLeftFunc(stripmd.Strip(content), unicode.IsSpace)
eol := strings.IndexRune(content, '\n')
blankLine := strings.Index(content, "\n\n")
if blankLine != -1 && blankLine <= eol && blankLine <= assumedTitleLen {
return strings.TrimSpace(content[:blankLine])
} else if utf8.RuneCountInString(content) <= maxTitleLen {
return content
}
return friendlyId
}
// TODO: fix duplicated code from postTitle. postTitle is a widely used func we
// don't have time to investigate right now.
func friendlyPostTitle(content, friendlyId string) string {
const maxTitleLen = 80
content = stripHTMLWithoutEscaping(content)
content = strings.TrimLeftFunc(stripmd.Strip(content), unicode.IsSpace)
eol := strings.IndexRune(content, '\n')
blankLine := strings.Index(content, "\n\n")
if blankLine != -1 && blankLine <= eol && blankLine <= assumedTitleLen {
return strings.TrimSpace(content[:blankLine])
} else if eol == -1 && utf8.RuneCountInString(content) <= maxTitleLen {
return content
}
title, truncd := parse.TruncToWord(parse.PostLede(content, true), maxTitleLen)
if truncd {
title += "..."
}
return title
}
// Strip HTML tags with bluemonday's StrictPolicy, then unescape the HTML
// entities added in by sanitizing the content.
func stripHTMLWithoutEscaping(content string) string {
return html.UnescapeString(bluemonday.StrictPolicy().Sanitize(content))
}
func getSanitizationPolicy() *bluemonday.Policy {
policy := bluemonday.UGCPolicy()
policy.AllowAttrs("src", "style").OnElements("iframe", "video", "audio")
policy.AllowAttrs("src", "type").OnElements("source")
policy.AllowAttrs("frameborder", "width", "height").Matching(bluemonday.Integer).OnElements("iframe")
policy.AllowAttrs("allowfullscreen").OnElements("iframe")
policy.AllowAttrs("controls", "loop", "muted", "autoplay").OnElements("video")
policy.AllowAttrs("controls", "loop", "muted", "autoplay", "preload").OnElements("audio")
policy.AllowAttrs("target").OnElements("a")
policy.AllowAttrs("title").OnElements("abbr")
policy.AllowAttrs("style", "class", "id").Globally()
policy.AllowAttrs("alt").OnElements("img")
policy.AllowElements("header", "footer")
policy.AllowURLSchemes("http", "https", "mailto", "xmpp")
return policy
}
func sanitizePost(content string) string {
return strings.Replace(content, "<", "&lt;", -1)
}
// postDescription generates a description based on the given post content,
// title, and post ID. This doesn't consider a V2 post field, `title` when
// choosing what to generate. In case a post has a title, this function will
// fail, and logic should instead be implemented to skip this when there's no
// title, like so:
//
// var desc string
// if title == "" {
// desc = postDescription(content, title, friendlyId)
// } else {
// desc = shortPostDescription(content)
// }
func postDescription(content, title, friendlyId string) string {
maxLen := 140
if content == "" {
content = "WriteFreely is a painless, simple, federated blogging platform."
} else {
fmtStr := "%s"
truncation := 0
if utf8.RuneCountInString(content) > maxLen {
// Post is longer than the max description, so let's show a better description
fmtStr = "%s..."
truncation = 3
}
if title == friendlyId {
// No specific title was found; simply truncate the post, starting at the beginning
content = fmt.Sprintf(fmtStr, strings.Replace(stringmanip.Substring(content, 0, maxLen-truncation), "\n", " ", -1))
} else {
// There was a title, so return a real description
blankLine := strings.Index(content, "\n\n")
if blankLine < 0 {
blankLine = 0
}
truncd := stringmanip.Substring(content, blankLine, blankLine+maxLen-truncation)
contentNoNL := strings.Replace(truncd, "\n", " ", -1)
content = strings.TrimSpace(fmt.Sprintf(fmtStr, contentNoNL))
}
}
return content
}
func shortPostDescription(content string) string {
maxLen := 140
fmtStr := "%s"
truncation := 0
if utf8.RuneCountInString(content) > maxLen {
// Post is longer than the max description, so let's show a better description
fmtStr = "%s..."
truncation = 3
}
return strings.TrimSpace(fmt.Sprintf(fmtStr, strings.Replace(stringmanip.Substring(content, 0, maxLen-truncation), "\n", " ", -1)))
}
func handleRenderMarkdown(app *App, w http.ResponseWriter, r *http.Request) error {
if !IsJSON(r) {
return impart.HTTPError{Status: http.StatusUnsupportedMediaType, Message: "Markdown API only supports JSON requests"}
}
in := struct {
CollectionURL string `json:"collection_url"`
RawBody string `json:"raw_body"`
}{}
decoder := json.NewDecoder(r.Body)
err := decoder.Decode(&in)
if err != nil {
log.Error("Couldn't parse markdown JSON request: %v", err)
return ErrBadJSON
}
out := struct {
Body string `json:"body"`
}{
Body: applyMarkdown([]byte(in.RawBody), in.CollectionURL, app.cfg),
}
return impart.WriteSuccess(w, out, http.StatusOK)
}