Merge pull request #428 from calzoneman/sanitize-html

Merge sanitize-html into 3.0 #yolo
2024-09-13 04:32:09 +00:00 · 2015-01-14 13:23:01 -06:00 · 2015-01-14 13:23:01 -06:00 · d7ef0d1893
commit d7ef0d1893
parent 139825168f 991dc866b1
7 changed files with 75 additions and 276 deletions
--- a/lib/channel/channel.js
+++ b/lib/channel/channel.js
@ -163,10 +163,7 @@ Channel.prototype.loadState = function () {
    var errorLoad = function (msg) {
        if (self.modules.customization) {
            self.modules.customization.load({
-                motd: {
-                    motd: msg,
-                    html: msg
-                }
+                motd: msg
            });
        }

--- a/lib/channel/customization.js
+++ b/lib/channel/customization.js
@ -17,10 +17,7 @@ function CustomizationModule(channel) {
    ChannelModule.apply(this, arguments);
    this.css = "";
    this.js = "";
-    this.motd = {
-        motd: "",
-        html: ""
-    };
+    this.motd = "";
 }

 CustomizationModule.prototype = Object.create(ChannelModule.prototype);
@ -35,10 +32,15 @@ CustomizationModule.prototype.load = function (data) {
    }

    if ("motd" in data) {
-        this.motd = {
-            motd: data.motd.motd || "",
-            html: data.motd.html || ""
-        };
+        if (typeof data.motd === "object" && data.motd.motd) {
+            // Old style MOTD, convert to new
+            this.motd = XSS.sanitizeHTML(data.motd.motd).replace(
+                /\n/g, "<br>\n");
+        } else if (typeof data.motd === "string") {
+            // The MOTD is filtered before it is saved, however it is also
+            // re-filtered on load in case the filtering rules change
+            this.motd = XSS.sanitizeHTML(data.motd);
+        }
    }
 };

@ -49,12 +51,7 @@ CustomizationModule.prototype.save = function (data) {
 };

 CustomizationModule.prototype.setMotd = function (motd) {
-    motd = XSS.sanitizeHTML(motd);
-    var html = motd.replace(/\n/g, "<br>");
-    this.motd = {
-        motd: motd,
-        html: html
-    };
+    this.motd = XSS.sanitizeHTML(motd);
    this.sendMotd(this.channel.users);
 };

--- a/lib/xss.js
+++ b/lib/xss.js
@ -1,260 +1,63 @@
-/*
-    WARNING
+var sanitizeHTML = require("sanitize-html");

-    This file contains an XSS prevention module I wrote myself.  It has not
-    been verified by any external agency, and due to the nature of XSS I cannot
-    guarantee that it will filter correctly.  Feel free to send me bug reports
-    and I will do my best to fix them, but use at your own risk.
+// These tags are allowed in addition to the defaults
+// See https://github.com/punkave/sanitize-html
+const ALLOWED_TAGS = [
+    "button",
+    "center",
+    "details",
+    "font",
+    "h1",
+    "h2",
+    "img",
+    "marquee", // It pains me to do this, but a lot of people use it...
+    "section",
+    "span",
+    "summary"
+];

-*/
-
-/* Prototype for a basic XML tag parser */
-function TagParser(text) {
-    this.text = text;
-    this.i = 0;
-    this.tag = this.parse();
-}
-
-/* Moves the position marker past any whitespace characters */
-TagParser.prototype.skipWhitespace = function () {
-    while (this.i < this.text.length && this.text[this.i].match(/\s/)) {
-        this.i++;
-    }
-};
-
-/* Reads a literal value matching the given regexp.  Defaults
-   to /[^\s>]/; i.e. any string not containing whitespace or
-   the end of tag character '>'
-*/
-TagParser.prototype.readLiteral = function (regexp) {
-    if (regexp === void 0) {
-        regexp = /[^\s>]/;
-    }
-    var str = "";
-    while (this.i < this.text.length && this.text[this.i].match(regexp)) {
-        str += this.text[this.i];
-        this.i++;
-    }
-
-    str = str.replace(/&#([0-9]{2,7});?/g, function (m, p1) {
-        return String.fromCharCode(parseInt(p1));
-    });
-
-    str = str.replace(/&#x([0-9a-fA-F]{2,7});?/g, function (m, p1) {
-        return String.fromCharCode(parseInt(p1, 16));
-    });
-
-    str = str.replace(/[\x00-\x1f]/g, "");
-    return str;
-};
-
-/* If the character at the current position is a quote, read
-   a string.  Otherwise, read a literal
-*/
-TagParser.prototype.readLiteralOrString = function (regexp) {
-    if (this.text[this.i].match(/["'`]/)) {
-        return this.readString();
-    }
-    return this.readLiteral(regexp);
-};
-
-/* Read a string delimited by the character at the current
-   position.  For XML tags this means strings enclosed in
-   " or '.  Treats \" as a literal '"' symbol and not a
-   delimiter.
-*/
-TagParser.prototype.readString = function () {
-    var delim = this.text[this.i++];
-
-    var str = "";
-    while (this.i < this.text.length && this.text[this.i] !== delim) {
-        if (this.text[this.i] === "\\" && this.text[this.i+1] === delim) {
-            str += this.text[this.i+1];
-            this.i++;
-        } else {
-            str += this.text[this.i];
-        }
-        this.i++;
-    }
-    this.i++;
-
-    str = str.replace(/&#([0-9]{2,7});?/g, function (m, p1) {
-        return String.fromCharCode(parseInt(p1));
-    });
-
-    str = str.replace(/&#x([0-9a-fA-F]{2,7});?/g, function (m, p1) {
-        return String.fromCharCode(parseInt(p1, 16));
-    });
-
-    str = str.replace(/[\x00-\x1f]/g, "");
-    return str;
-};
-
-/* Attempts to parse a tagname and attributes from an
-   XML tag.
-   NOTE: Does not actually parse a DOM node, only parses
-   the tag between '<' and '>' because that's all I need
-   to do XSS filtering, I don't care what's between a tag
-   and its end tag (if it's another tag I handle that
-   separately)
-*/
-TagParser.prototype.parse = function () {
-    this.i = this.text.indexOf("<");
-    // Not a tag
-    if (this.i === -1) {
-        return null;
-    }
-
-    this.i++;
-    this.skipWhitespace();
-
-    // First non-whitespace string after the opening '<' is the tag name
-    var tname = this.readLiteral();
-
-    var attrs = {};
-    // Continue parsing attributes until the end of string is reached or
-    // the end of tag is reached
-    while (this.i < this.text.length && this.text[this.i] !== ">") {
-        // Read any string not containing equals, possibly delimited by
-        // " or '
-        var key = this.readLiteralOrString(/[^\s=>]/);
-        this.skipWhitespace();
-        // It's possible for tags to have attributes with no value, where
-        // the equals sign is not necessary
-        if (this.text[this.i] !== "=") {
-            if (key.trim().length > 0) {
-                attrs[key] = "";
-            }
-            continue;
-        }
-
-        this.i++;
-        //this.skipWhitespace();
-        var value = this.readLiteralOrString();
-        if (key.trim().length > 0) {
-            attrs[key] = value;
-        }
-        this.skipWhitespace();
-    }
-
-    // If end-of-string was not reached, consume the ending '>'
-    if (this.i < this.text.length) {
-        this.i++;
-    }
-
-    return {
-        tagName: tname,
-        attributes: attrs,
-        text: this.text.substring(0, this.i) // Original text (for replacement)
-    };
-};
-
-/* Some of these may not even be HTML tags, I borrowed them from the
-   [now deprecated] XSS module of node-validator
-*/
-const badTags = new RegExp([
-    "alert",
-    "applet",
-    "audio",
-    "basefont",
-    "base",
-    "behavior",
-    "bgsound",
-    "blink",
-    "body",
-    "embed",
-    "expression",
-    "form",
-    "frameset",
-    "frame",
-    "head",
-    "html",
-    "ilayer",
-    "iframe",
-    "input",
-    "layer",
-    "link",
-    "meta",
-    "object",
+const ALLOWED_ATTRIBUTES = [
+    "id",
+    "aria-*",
+    "border",
+    "class",
+    "color",
+    "data-*",
+    "height",
+    "role",
    "style",
-    "script",
-    "textarea",
    "title",
-    "video",
-    "xml",
-    "xss"
-].join("|"), "i");
+    "valign",
+    "width"
+];

-/* Nasty attributes.  Anything starting with "on" is probably a javascript
-   callback, and I hope you see why formaction is a bad idea.
-*/
-const badAttrs = new RegExp([
-    "\\bon\\S*",
-    "\\bformaction",
-    "\\baction"
-].join("|"), "i");
-
-function sanitizeHTML(str) {
-    var i = str.indexOf("<");
-    if (i === -1) {
-        // No HTML tags in the string
-        return str;
-    }
-
-    // Loop across all tag delimiters '<' in string, parse each one,
-    // and replace the results with sanitized tags
-    while (i !== -1) {
-        var t = new TagParser(str.substring(i)).tag;
-        if (t.tagName.replace("/", "").match(badTags)) {
-            // Note: Important that I replace the tag with a nonempty value,
-            // otherwise <scr<script>ipt> would possibly defeat the filter.
-            str = str.replace(t.text, "[tag removed]");
-            i = str.indexOf("<", i+1);
-            continue;
-        }
-        for (var k in t.attributes) {
-            // Keys should not contain non-word characters.
-            var k2 = k.replace(/[^\w]/g, "");
-            if (k2 !== k) {
-                t.attributes[k2] = t.attributes[k];
-                delete t.attributes[k];
-                k = k2;
-            }
-            // If it's an evil attribute, just nuke it entirely
-            if (k.match(badAttrs)) {
-                delete t.attributes[k];
-            } else {
-                if (t.attributes[k].replace(/\s/g, "").indexOf("javascript:") !== -1) {
-                    t.attributes[k] = "[removed]";
-                }
-
-            }
-        }
-        // Build the sanitized tag
-        var fmt = "<" + t.tagName;
-        for (var k in t.attributes) {
-            if (k.trim().length > 0) {
-                fmt += " " + k;
-                if (t.attributes[k].trim().length > 0) {
-                    var delim = '"';
-                    if (t.attributes[k].match(/[^\\]"/)) {
-                        delim = "'";
-                        if (t.attributes[k].match(/[^\\]'/)) {
-                            delim = "`";
-                        }
-                    }
-                    fmt += "=" + delim + t.attributes[k] + delim;
-                }
-            }
-        }
-        str = str.replace(t.text, fmt + ">");
-        i = str.indexOf("<", i + fmt.length + 1);
-    }
-
-    return str;
+var ATTRIBUTE_MAP = {
+    a: ["href", "name", "target"],
+    font: ["size"],
+    img: ["src"],
+    marquee: ["behavior", "behaviour", "direction", "scrollamount"],
+    table: ["cellpadding", "cellspacing"],
+    th: ["colspan", "rowspan"],
+    td: ["colspan", "rowspan"]
 }

-/* WIP: Sanitize a string where HTML is prohibited */
+for (var key in ATTRIBUTE_MAP) {
+    ALLOWED_ATTRIBUTES.forEach(function (attr) {
+        ATTRIBUTE_MAP[key].push(attr);
+    });
+}
+
+sanitizeHTML.defaults.allowedTags.concat(ALLOWED_TAGS).forEach(function (tag) {
+    if (!(tag in ATTRIBUTE_MAP)) {
+        ATTRIBUTE_MAP[tag] = ALLOWED_ATTRIBUTES;
+    }
+});
+
+const SETTINGS = {
+    allowedTags: sanitizeHTML.defaults.allowedTags.concat(ALLOWED_TAGS),
+    allowedAttributes: ATTRIBUTE_MAP
+};
+
 function sanitizeText(str) {
    str = str.replace(/&/g, "&amp;")
             .replace(/</g, "&lt;")
@ -280,6 +83,9 @@ function decodeText(str) {
    return str;
 }

-module.exports.sanitizeHTML = sanitizeHTML;
+module.exports.sanitizeHTML = function (html) {
+    return sanitizeHTML(html, SETTINGS);
+};
+
 module.exports.sanitizeText = sanitizeText;
 module.exports.decodeText = decodeText;
--- a/package.json
+++ b/package.json
@ -22,6 +22,7 @@
    "nodemailer": "^1.2.0",
    "oauth": "^0.9.12",
    "q": "^1.0.1",
+    "sanitize-html": "^1.5.0",
    "serve-static": "^1.5.3",
    "socket.io": "^1.2.1",
    "yamljs": "^0.1.5"
--- a/www/js/callbacks.js
+++ b/www/js/callbacks.js
@ -175,11 +175,10 @@ Callbacks = {
        }
    },

-    setMotd: function(data) {
-        CHANNEL.motd = data.html;
-        CHANNEL.motd_text = data.motd;
-        $("#motd").html(CHANNEL.motd);
-        $("#cs-motdtext").val(CHANNEL.motd_text);
+    setMotd: function(motd) {
+        CHANNEL.motd = motd;
+        $("#motd").html(motd);
+        $("#cs-motdtext").val(motd);
        if (data.motd != "") {
            $("#motdwrap").show();
            $("#motd").show();
--- a/www/js/data.js
+++ b/www/js/data.js
@ -19,7 +19,6 @@ var CHANNEL = {
    css: "",
    js: "",
    motd: "",
-    motd_text: "",
    name: false,
    usercount: 0,
    emotes: []
--- a/www/js/util.js
+++ b/www/js/util.js
@ -900,7 +900,7 @@ function handleModPermissions() {
    })();
    $("#cs-csstext").val(CHANNEL.css);
    $("#cs-jstext").val(CHANNEL.js);
-    $("#cs-motdtext").val(CHANNEL.motd_text);
+    $("#cs-motdtext").val(CHANNEL.motd);
    setParentVisible("a[href='#cs-motdeditor']", hasPermission("motdedit"));
    setParentVisible("a[href='#cs-permedit']", CLIENT.rank >= 3);
    setParentVisible("a[href='#cs-banlist']", hasPermission("ban"));