aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--go.mod8
-rw-r--r--go.sum11
-rw-r--r--plugins/base/routes.go5
-rw-r--r--plugins/base/sanitize_html.go183
4 files changed, 198 insertions, 9 deletions
diff --git a/go.mod b/go.mod
index e00e9dc..2f63a62 100644
--- a/go.mod
+++ b/go.mod
@@ -3,16 +3,20 @@ module git.sr.ht/~emersion/koushin
go 1.13
require (
+ github.com/aymerick/douceur v0.2.0
+ github.com/chris-ramon/douceur v0.2.0
github.com/emersion/go-imap v1.0.3
github.com/emersion/go-imap-move v0.0.0-20190710073258-6e5a51a5b342
github.com/emersion/go-message v0.11.1
github.com/emersion/go-sasl v0.0.0-20191210011802-430746ea8b9b
github.com/emersion/go-smtp v0.12.1
+ github.com/gorilla/css v1.0.0 // indirect
github.com/labstack/echo/v4 v4.1.13
github.com/labstack/gommon v0.3.0
+ github.com/microcosm-cc/bluemonday v1.0.2
github.com/yuin/gopher-lua v0.0.0-20191220021717-ab39c6098bdb
golang.org/x/crypto v0.0.0-20200117160349-530e935923ad // indirect
- golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa // indirect
- golang.org/x/sys v0.0.0-20200117145432-59e60aa80a0c // indirect
+ golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa
+ golang.org/x/sys v0.0.0-20200120151820-655fe14d7479 // indirect
layeh.com/gopher-luar v1.0.7
)
diff --git a/go.sum b/go.sum
index e430330..3fb32fe 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,7 @@
+github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
+github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
+github.com/chris-ramon/douceur v0.2.0 h1:IDMEdxlEUUBYBKE4z/mJnFyVXox+MjuEVDJNN27glkU=
+github.com/chris-ramon/douceur v0.2.0/go.mod h1:wDW5xjJdeoMm1mRt4sD4c/LbF/mWdEpRXQKjTR8nIBE=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
@@ -20,6 +24,8 @@ github.com/emersion/go-smtp v0.12.1 h1:1R8BDqrR2HhlGwgFYcOi+BVTvK1bMjAB65QcVpJ5s
github.com/emersion/go-smtp v0.12.1/go.mod h1:SD9V/xa4ndMw77lR3Mf7htkp8RBNYuPh9UeuBs9tpUQ=
github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe h1:40SWqY0zE3qCi6ZrtTf5OUdNm5lDnGnjRSq9GgmeTrg=
github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe/go.mod h1:aqO8z8wPrjkscevZJFVE1wXJrLpC5LtJG7fqLOsPb2U=
+github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
+github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
github.com/labstack/echo v3.3.10+incompatible h1:pGRcYk231ExFAyoAjAfD85kQzRJCRI8bbnE7CX5OEgg=
github.com/labstack/echo v3.3.10+incompatible/go.mod h1:0INS7j/VjnFxD4E2wkz67b8cVwCLbBmJyDaka6Cmk1s=
github.com/labstack/echo/v4 v4.1.13 h1:JYgKq6NQQSaKbQcsOadAKX1kUVLCUzLGwu8sxN5tC34=
@@ -58,6 +64,7 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200117160349-530e935923ad h1:Jh8cai0fqIK+f6nG0UgPW5wFk8wmiMhM3AyciDBdtQg=
golang.org/x/crypto v0.0.0-20200117160349-530e935923ad/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 h1:0GoQqolDA55aaLxZyTzK/Y2ePZzZTUrRacwib7cNsYQ=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553 h1:efeOvDhwQ29Dj3SdAV/MJf8oukgn+8D8WgaCaRMchF8=
@@ -72,8 +79,8 @@ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200117145432-59e60aa80a0c h1:gUYreENmqtjZb2brVfUas1sC6UivSY8XwKwPo8tloLs=
-golang.org/x/sys v0.0.0-20200117145432-59e60aa80a0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200120151820-655fe14d7479 h1:LhLiKguPgZL+Tglay4GhVtfF0kb8cvOJ0dHTCBO8YNI=
+golang.org/x/sys v0.0.0-20200120151820-655fe14d7479/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
diff --git a/plugins/base/routes.go b/plugins/base/routes.go
index 9232097..ad93c6f 100644
--- a/plugins/base/routes.go
+++ b/plugins/base/routes.go
@@ -244,7 +244,10 @@ func handleGetPart(ctx *koushin.Context, raw bool) error {
isHTML := false
if strings.EqualFold(mimeType, "text/html") {
- body = sanitizeHTML(body)
+ body, err = sanitizeHTML(body)
+ if err != nil {
+ return fmt.Errorf("failed to sanitize HTML part: %v", err)
+ }
isHTML = true
}
diff --git a/plugins/base/sanitize_html.go b/plugins/base/sanitize_html.go
index 830f7a7..aba47d6 100644
--- a/plugins/base/sanitize_html.go
+++ b/plugins/base/sanitize_html.go
@@ -1,18 +1,193 @@
package koushinbase
import (
+ "bytes"
+ "fmt"
+ "regexp"
+ "strings"
+
+ "golang.org/x/net/html"
"github.com/microcosm-cc/bluemonday"
+ "github.com/aymerick/douceur/css"
+ cssparser "github.com/chris-ramon/douceur/parser"
+)
+
+// TODO: this doesn't accomodate for quoting
+var (
+ cssURLRegexp = regexp.MustCompile(`url\([^)]*\)`)
+ cssExprRegexp = regexp.MustCompile(`expression\([^)]*\)`)
)
-func sanitizeHTML(b []byte) []byte {
+var allowedStyles = map[string]bool{
+ "direction": true,
+ "font": true,
+ "font-family": true,
+ "font-style": true,
+ "font-variant": true,
+ "font-size": true,
+ "font-weight": true,
+ "letter-spacing": true,
+ "line-height": true,
+ "text-align": true,
+ "text-decoration": true,
+ "text-indent": true,
+ "text-overflow": true,
+ "text-shadow": true,
+ "text-transform": true,
+ "white-space": true,
+ "word-spacing": true,
+ "word-wrap": true,
+ "vertical-align": true,
+
+ "color": true,
+ "background": true,
+ "background-color": true,
+ "background-image": true,
+ "background-repeat": true,
+
+ "border": true,
+ "border-color": true,
+ "border-radius": true,
+ "height": true,
+ "margin": true,
+ "padding": true,
+ "width": true,
+ "max-width": true,
+ "min-width": true,
+
+ "clear": true,
+ "float": true,
+
+ "border-collapse": true,
+ "border-spacing": true,
+ "caption-side": true,
+ "empty-cells": true,
+ "table-layout": true,
+
+ "list-style-type": true,
+ "list-style-position": true,
+}
+
+func sanitizeCSSDecls(decls []*css.Declaration) []*css.Declaration {
+ sanitized := make([]*css.Declaration, 0, len(decls))
+ for _, decl := range decls {
+ if !allowedStyles[decl.Property] {
+ continue
+ }
+ if cssExprRegexp.FindStringIndex(decl.Value) != nil {
+ continue
+ }
+
+ // TODO: more robust CSS declaration parsing
+ decl.Value = cssURLRegexp.ReplaceAllString(decl.Value, "url(about:blank)")
+
+ sanitized = append(sanitized, decl)
+ }
+ return sanitized
+}
+
+func sanitizeCSSRule(rule *css.Rule) {
+ // Disallow @import
+ if rule.Kind == css.AtRule && strings.EqualFold(rule.Name, "@import") {
+ rule.Prelude = "url(about:blank)"
+ }
+
+ rule.Declarations = sanitizeCSSDecls(rule.Declarations)
+
+ for _, child := range rule.Rules {
+ sanitizeCSSRule(child)
+ }
+}
+
+func sanitizeNode(n *html.Node) {
+ if n.Type == html.ElementNode {
+ if strings.EqualFold(n.Data, "img") {
+ for i := range n.Attr {
+ attr := &n.Attr[i]
+ if strings.EqualFold(attr.Key, "src") {
+ attr.Val = "about:blank"
+ }
+ }
+ } else if strings.EqualFold(n.Data, "style") {
+ var s string
+ c := n.FirstChild
+ for c != nil {
+ if c.Type == html.TextNode {
+ s += c.Data
+ }
+
+ next := c.NextSibling
+ n.RemoveChild(c)
+ c = next
+ }
+
+ stylesheet, err := cssparser.Parse(s)
+ if err != nil {
+ s = ""
+ } else {
+ for _, rule := range stylesheet.Rules {
+ sanitizeCSSRule(rule)
+ }
+
+ s = stylesheet.String()
+ }
+
+ n.AppendChild(&html.Node{
+ Type: html.TextNode,
+ Data: s,
+ })
+ }
+
+ for i := range n.Attr {
+ // Don't use `i, attr := range n.Attr` since `attr` would be a copy
+ attr := &n.Attr[i]
+
+ if strings.EqualFold(attr.Key, "style") {
+ decls, err := cssparser.ParseDeclarations(attr.Val)
+ if err != nil {
+ attr.Val = ""
+ continue
+ }
+
+ decls = sanitizeCSSDecls(decls)
+
+ attr.Val = ""
+ for _, d := range decls {
+ attr.Val += d.String()
+ }
+ }
+ }
+ }
+
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ sanitizeNode(c)
+ }
+}
+
+func sanitizeHTML(b []byte) ([]byte, error) {
+ doc, err := html.Parse(bytes.NewReader(b))
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse HTML: %v", err)
+ }
+
+ sanitizeNode(doc)
+
+ var buf bytes.Buffer
+ if err := html.Render(&buf, doc); err != nil {
+ return nil, fmt.Errorf("failed to render HTML: %v", err)
+ }
+ b = buf.Bytes()
+
+ // bluemonday must always be run last
p := bluemonday.UGCPolicy()
- // TODO: be more strict
+ // TODO: use bluemonday's AllowStyles once it's released and
+ // supports <style>
p.AllowElements("style")
- p.AllowAttrs("style")
+ p.AllowAttrs("style").Globally()
p.AddTargetBlankToFullyQualifiedLinks(true)
p.RequireNoFollowOnLinks(true)
- return p.SanitizeBytes(b)
+ return p.SanitizeBytes(b), nil
}