aboutsummaryrefslogtreecommitdiff
path: root/plugins/base/sanitize_html.go
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/base/sanitize_html.go')
-rw-r--r--plugins/base/sanitize_html.go193
1 files changed, 0 insertions, 193 deletions
diff --git a/plugins/base/sanitize_html.go b/plugins/base/sanitize_html.go
deleted file mode 100644
index a82c852..0000000
--- a/plugins/base/sanitize_html.go
+++ /dev/null
@@ -1,193 +0,0 @@
-package koushinbase
-
-import (
- "bytes"
- "fmt"
- "regexp"
- "strings"
-
- "github.com/aymerick/douceur/css"
- cssparser "github.com/chris-ramon/douceur/parser"
- "github.com/microcosm-cc/bluemonday"
- "golang.org/x/net/html"
-)
-
-// TODO: this doesn't accomodate for quoting
-var (
- cssURLRegexp = regexp.MustCompile(`url\([^)]*\)`)
- cssExprRegexp = regexp.MustCompile(`expression\([^)]*\)`)
-)
-
-var allowedStyles = map[string]bool{
- "direction": true,
- "font": true,
- "font-family": true,
- "font-style": true,
- "font-variant": true,
- "font-size": true,
- "font-weight": true,
- "letter-spacing": true,
- "line-height": true,
- "text-align": true,
- "text-decoration": true,
- "text-indent": true,
- "text-overflow": true,
- "text-shadow": true,
- "text-transform": true,
- "white-space": true,
- "word-spacing": true,
- "word-wrap": true,
- "vertical-align": true,
-
- "color": true,
- "background": true,
- "background-color": true,
- "background-image": true,
- "background-repeat": true,
-
- "border": true,
- "border-color": true,
- "border-radius": true,
- "height": true,
- "margin": true,
- "padding": true,
- "width": true,
- "max-width": true,
- "min-width": true,
-
- "clear": true,
- "float": true,
-
- "border-collapse": true,
- "border-spacing": true,
- "caption-side": true,
- "empty-cells": true,
- "table-layout": true,
-
- "list-style-type": true,
- "list-style-position": true,
-}
-
-func sanitizeCSSDecls(decls []*css.Declaration) []*css.Declaration {
- sanitized := make([]*css.Declaration, 0, len(decls))
- for _, decl := range decls {
- if !allowedStyles[decl.Property] {
- continue
- }
- if cssExprRegexp.FindStringIndex(decl.Value) != nil {
- continue
- }
-
- // TODO: more robust CSS declaration parsing
- decl.Value = cssURLRegexp.ReplaceAllString(decl.Value, "url(about:blank)")
-
- sanitized = append(sanitized, decl)
- }
- return sanitized
-}
-
-func sanitizeCSSRule(rule *css.Rule) {
- // Disallow @import
- if rule.Kind == css.AtRule && strings.EqualFold(rule.Name, "@import") {
- rule.Prelude = "url(about:blank)"
- }
-
- rule.Declarations = sanitizeCSSDecls(rule.Declarations)
-
- for _, child := range rule.Rules {
- sanitizeCSSRule(child)
- }
-}
-
-func sanitizeNode(n *html.Node) {
- if n.Type == html.ElementNode {
- if strings.EqualFold(n.Data, "img") {
- for i := range n.Attr {
- attr := &n.Attr[i]
- if strings.EqualFold(attr.Key, "src") {
- attr.Val = "about:blank"
- }
- }
- } else if strings.EqualFold(n.Data, "style") {
- var s string
- c := n.FirstChild
- for c != nil {
- if c.Type == html.TextNode {
- s += c.Data
- }
-
- next := c.NextSibling
- n.RemoveChild(c)
- c = next
- }
-
- stylesheet, err := cssparser.Parse(s)
- if err != nil {
- s = ""
- } else {
- for _, rule := range stylesheet.Rules {
- sanitizeCSSRule(rule)
- }
-
- s = stylesheet.String()
- }
-
- n.AppendChild(&html.Node{
- Type: html.TextNode,
- Data: s,
- })
- }
-
- for i := range n.Attr {
- // Don't use `i, attr := range n.Attr` since `attr` would be a copy
- attr := &n.Attr[i]
-
- if strings.EqualFold(attr.Key, "style") {
- decls, err := cssparser.ParseDeclarations(attr.Val)
- if err != nil {
- attr.Val = ""
- continue
- }
-
- decls = sanitizeCSSDecls(decls)
-
- attr.Val = ""
- for _, d := range decls {
- attr.Val += d.String()
- }
- }
- }
- }
-
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- sanitizeNode(c)
- }
-}
-
-func sanitizeHTML(b []byte) ([]byte, error) {
- doc, err := html.Parse(bytes.NewReader(b))
- if err != nil {
- return nil, fmt.Errorf("failed to parse HTML: %v", err)
- }
-
- sanitizeNode(doc)
-
- var buf bytes.Buffer
- if err := html.Render(&buf, doc); err != nil {
- return nil, fmt.Errorf("failed to render HTML: %v", err)
- }
- b = buf.Bytes()
-
- // bluemonday must always be run last
- p := bluemonday.UGCPolicy()
-
- // TODO: use bluemonday's AllowStyles once it's released and
- // supports <style>
- p.AllowElements("style")
- p.AllowAttrs("style").Globally()
-
- p.AddTargetBlankToFullyQualifiedLinks(true)
- p.RequireNoFollowOnLinks(true)
-
- return p.SanitizeBytes(b), nil
-}