From 3bf17752ab5b810eaa01f48c316ba9333055b20b Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 20 Jan 2020 18:28:49 +0100 Subject: plugins/base: remove external resources URLs, sanitize CSS --- go.mod | 8 +- go.sum | 11 ++- plugins/base/routes.go | 5 +- plugins/base/sanitize_html.go | 183 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 198 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index e00e9dc..2f63a62 100644 --- a/go.mod +++ b/go.mod @@ -3,16 +3,20 @@ module git.sr.ht/~emersion/koushin go 1.13 require ( + github.com/aymerick/douceur v0.2.0 + github.com/chris-ramon/douceur v0.2.0 github.com/emersion/go-imap v1.0.3 github.com/emersion/go-imap-move v0.0.0-20190710073258-6e5a51a5b342 github.com/emersion/go-message v0.11.1 github.com/emersion/go-sasl v0.0.0-20191210011802-430746ea8b9b github.com/emersion/go-smtp v0.12.1 + github.com/gorilla/css v1.0.0 // indirect github.com/labstack/echo/v4 v4.1.13 github.com/labstack/gommon v0.3.0 + github.com/microcosm-cc/bluemonday v1.0.2 github.com/yuin/gopher-lua v0.0.0-20191220021717-ab39c6098bdb golang.org/x/crypto v0.0.0-20200117160349-530e935923ad // indirect - golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa // indirect - golang.org/x/sys v0.0.0-20200117145432-59e60aa80a0c // indirect + golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa + golang.org/x/sys v0.0.0-20200120151820-655fe14d7479 // indirect layeh.com/gopher-luar v1.0.7 ) diff --git a/go.sum b/go.sum index e430330..3fb32fe 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,7 @@ +github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= +github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= +github.com/chris-ramon/douceur v0.2.0 h1:IDMEdxlEUUBYBKE4z/mJnFyVXox+MjuEVDJNN27glkU= +github.com/chris-ramon/douceur v0.2.0/go.mod h1:wDW5xjJdeoMm1mRt4sD4c/LbF/mWdEpRXQKjTR8nIBE= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -20,6 +24,8 @@ github.com/emersion/go-smtp v0.12.1 h1:1R8BDqrR2HhlGwgFYcOi+BVTvK1bMjAB65QcVpJ5s github.com/emersion/go-smtp v0.12.1/go.mod h1:SD9V/xa4ndMw77lR3Mf7htkp8RBNYuPh9UeuBs9tpUQ= github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe h1:40SWqY0zE3qCi6ZrtTf5OUdNm5lDnGnjRSq9GgmeTrg= github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe/go.mod h1:aqO8z8wPrjkscevZJFVE1wXJrLpC5LtJG7fqLOsPb2U= +github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= +github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= github.com/labstack/echo v3.3.10+incompatible h1:pGRcYk231ExFAyoAjAfD85kQzRJCRI8bbnE7CX5OEgg= github.com/labstack/echo v3.3.10+incompatible/go.mod h1:0INS7j/VjnFxD4E2wkz67b8cVwCLbBmJyDaka6Cmk1s= github.com/labstack/echo/v4 v4.1.13 h1:JYgKq6NQQSaKbQcsOadAKX1kUVLCUzLGwu8sxN5tC34= @@ -58,6 +64,7 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200117160349-530e935923ad h1:Jh8cai0fqIK+f6nG0UgPW5wFk8wmiMhM3AyciDBdtQg= golang.org/x/crypto v0.0.0-20200117160349-530e935923ad/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 h1:0GoQqolDA55aaLxZyTzK/Y2ePZzZTUrRacwib7cNsYQ= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553 h1:efeOvDhwQ29Dj3SdAV/MJf8oukgn+8D8WgaCaRMchF8= @@ -72,8 +79,8 @@ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200117145432-59e60aa80a0c h1:gUYreENmqtjZb2brVfUas1sC6UivSY8XwKwPo8tloLs= -golang.org/x/sys v0.0.0-20200117145432-59e60aa80a0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200120151820-655fe14d7479 h1:LhLiKguPgZL+Tglay4GhVtfF0kb8cvOJ0dHTCBO8YNI= +golang.org/x/sys v0.0.0-20200120151820-655fe14d7479/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= diff --git a/plugins/base/routes.go b/plugins/base/routes.go index 9232097..ad93c6f 100644 --- a/plugins/base/routes.go +++ b/plugins/base/routes.go @@ -244,7 +244,10 @@ func handleGetPart(ctx *koushin.Context, raw bool) error { isHTML := false if strings.EqualFold(mimeType, "text/html") { - body = sanitizeHTML(body) + body, err = sanitizeHTML(body) + if err != nil { + return fmt.Errorf("failed to sanitize HTML part: %v", err) + } isHTML = true } diff --git a/plugins/base/sanitize_html.go b/plugins/base/sanitize_html.go index 830f7a7..aba47d6 100644 --- a/plugins/base/sanitize_html.go +++ b/plugins/base/sanitize_html.go @@ -1,18 +1,193 @@ package koushinbase import ( + "bytes" + "fmt" + "regexp" + "strings" + + "golang.org/x/net/html" "github.com/microcosm-cc/bluemonday" + "github.com/aymerick/douceur/css" + cssparser "github.com/chris-ramon/douceur/parser" +) + +// TODO: this doesn't accomodate for quoting +var ( + cssURLRegexp = regexp.MustCompile(`url\([^)]*\)`) + cssExprRegexp = regexp.MustCompile(`expression\([^)]*\)`) ) -func sanitizeHTML(b []byte) []byte { +var allowedStyles = map[string]bool{ + "direction": true, + "font": true, + "font-family": true, + "font-style": true, + "font-variant": true, + "font-size": true, + "font-weight": true, + "letter-spacing": true, + "line-height": true, + "text-align": true, + "text-decoration": true, + "text-indent": true, + "text-overflow": true, + "text-shadow": true, + "text-transform": true, + "white-space": true, + "word-spacing": true, + "word-wrap": true, + "vertical-align": true, + + "color": true, + "background": true, + "background-color": true, + "background-image": true, + "background-repeat": true, + + "border": true, + "border-color": true, + "border-radius": true, + "height": true, + "margin": true, + "padding": true, + "width": true, + "max-width": true, + "min-width": true, + + "clear": true, + "float": true, + + "border-collapse": true, + "border-spacing": true, + "caption-side": true, + "empty-cells": true, + "table-layout": true, + + "list-style-type": true, + "list-style-position": true, +} + +func sanitizeCSSDecls(decls []*css.Declaration) []*css.Declaration { + sanitized := make([]*css.Declaration, 0, len(decls)) + for _, decl := range decls { + if !allowedStyles[decl.Property] { + continue + } + if cssExprRegexp.FindStringIndex(decl.Value) != nil { + continue + } + + // TODO: more robust CSS declaration parsing + decl.Value = cssURLRegexp.ReplaceAllString(decl.Value, "url(about:blank)") + + sanitized = append(sanitized, decl) + } + return sanitized +} + +func sanitizeCSSRule(rule *css.Rule) { + // Disallow @import + if rule.Kind == css.AtRule && strings.EqualFold(rule.Name, "@import") { + rule.Prelude = "url(about:blank)" + } + + rule.Declarations = sanitizeCSSDecls(rule.Declarations) + + for _, child := range rule.Rules { + sanitizeCSSRule(child) + } +} + +func sanitizeNode(n *html.Node) { + if n.Type == html.ElementNode { + if strings.EqualFold(n.Data, "img") { + for i := range n.Attr { + attr := &n.Attr[i] + if strings.EqualFold(attr.Key, "src") { + attr.Val = "about:blank" + } + } + } else if strings.EqualFold(n.Data, "style") { + var s string + c := n.FirstChild + for c != nil { + if c.Type == html.TextNode { + s += c.Data + } + + next := c.NextSibling + n.RemoveChild(c) + c = next + } + + stylesheet, err := cssparser.Parse(s) + if err != nil { + s = "" + } else { + for _, rule := range stylesheet.Rules { + sanitizeCSSRule(rule) + } + + s = stylesheet.String() + } + + n.AppendChild(&html.Node{ + Type: html.TextNode, + Data: s, + }) + } + + for i := range n.Attr { + // Don't use `i, attr := range n.Attr` since `attr` would be a copy + attr := &n.Attr[i] + + if strings.EqualFold(attr.Key, "style") { + decls, err := cssparser.ParseDeclarations(attr.Val) + if err != nil { + attr.Val = "" + continue + } + + decls = sanitizeCSSDecls(decls) + + attr.Val = "" + for _, d := range decls { + attr.Val += d.String() + } + } + } + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + sanitizeNode(c) + } +} + +func sanitizeHTML(b []byte) ([]byte, error) { + doc, err := html.Parse(bytes.NewReader(b)) + if err != nil { + return nil, fmt.Errorf("failed to parse HTML: %v", err) + } + + sanitizeNode(doc) + + var buf bytes.Buffer + if err := html.Render(&buf, doc); err != nil { + return nil, fmt.Errorf("failed to render HTML: %v", err) + } + b = buf.Bytes() + + // bluemonday must always be run last p := bluemonday.UGCPolicy() - // TODO: be more strict + // TODO: use bluemonday's AllowStyles once it's released and + // supports