From 1c3a669279000490a7286fec1ecc331f161f5133 Mon Sep 17 00:00:00 2001 From: Calvin Montgomery Date: Tue, 6 Jan 2015 12:20:48 -0500 Subject: [PATCH] Replace XSS filter with sanitize-html --- lib/channel/customization.js | 6 +- lib/xss.js | 307 +++++++---------------------------- package.json | 1 + 3 files changed, 61 insertions(+), 253 deletions(-) diff --git a/lib/channel/customization.js b/lib/channel/customization.js index 627c9ae3..1ed7df8c 100644 --- a/lib/channel/customization.js +++ b/lib/channel/customization.js @@ -36,9 +36,11 @@ CustomizationModule.prototype.load = function (data) { if ("motd" in data) { this.motd = { - motd: data.motd.motd || "", - html: data.motd.html || "" + motd: data.motd.motd || "" }; + + this.motd.motd = XSS.sanitizeHTML(this.motd.motd); + this.motd.html = this.motd.motd.replace(/\n/g, "
"); } }; diff --git a/lib/xss.js b/lib/xss.js index 9b02ce2b..c6c0de2e 100644 --- a/lib/xss.js +++ b/lib/xss.js @@ -1,260 +1,62 @@ -/* - WARNING +var sanitizeHTML = require("sanitize-html"); - This file contains an XSS prevention module I wrote myself. It has not - been verified by any external agency, and due to the nature of XSS I cannot - guarantee that it will filter correctly. Feel free to send me bug reports - and I will do my best to fix them, but use at your own risk. +const ALLOWED_TAGS = [ + "button", + "center", + "details", + "font", + "h1", + "h2", + "img", + "marquee", // It pains me to do this, but a lot of people use it... + "section", + "span", + "summary" +]; -*/ - -/* Prototype for a basic XML tag parser */ -function TagParser(text) { - this.text = text; - this.i = 0; - this.tag = this.parse(); -} - -/* Moves the position marker past any whitespace characters */ -TagParser.prototype.skipWhitespace = function () { - while (this.i < this.text.length && this.text[this.i].match(/\s/)) { - this.i++; - } -}; - -/* Reads a literal value matching the given regexp. Defaults - to /[^\s>]/; i.e. any string not containing whitespace or - the end of tag character '>' -*/ -TagParser.prototype.readLiteral = function (regexp) { - if (regexp === void 0) { - regexp = /[^\s>]/; - } - var str = ""; - while (this.i < this.text.length && this.text[this.i].match(regexp)) { - str += this.text[this.i]; - this.i++; - } - - str = str.replace(/&#([0-9]{2,7});?/g, function (m, p1) { - return String.fromCharCode(parseInt(p1)); - }); - - str = str.replace(/&#x([0-9a-fA-F]{2,7});?/g, function (m, p1) { - return String.fromCharCode(parseInt(p1, 16)); - }); - - str = str.replace(/[\x00-\x1f]/g, ""); - return str; -}; - -/* If the character at the current position is a quote, read - a string. Otherwise, read a literal -*/ -TagParser.prototype.readLiteralOrString = function (regexp) { - if (this.text[this.i].match(/["'`]/)) { - return this.readString(); - } - return this.readLiteral(regexp); -}; - -/* Read a string delimited by the character at the current - position. For XML tags this means strings enclosed in - " or '. Treats \" as a literal '"' symbol and not a - delimiter. -*/ -TagParser.prototype.readString = function () { - var delim = this.text[this.i++]; - - var str = ""; - while (this.i < this.text.length && this.text[this.i] !== delim) { - if (this.text[this.i] === "\\" && this.text[this.i+1] === delim) { - str += this.text[this.i+1]; - this.i++; - } else { - str += this.text[this.i]; - } - this.i++; - } - this.i++; - - str = str.replace(/&#([0-9]{2,7});?/g, function (m, p1) { - return String.fromCharCode(parseInt(p1)); - }); - - str = str.replace(/&#x([0-9a-fA-F]{2,7});?/g, function (m, p1) { - return String.fromCharCode(parseInt(p1, 16)); - }); - - str = str.replace(/[\x00-\x1f]/g, ""); - return str; -}; - -/* Attempts to parse a tagname and attributes from an - XML tag. - NOTE: Does not actually parse a DOM node, only parses - the tag between '<' and '>' because that's all I need - to do XSS filtering, I don't care what's between a tag - and its end tag (if it's another tag I handle that - separately) -*/ -TagParser.prototype.parse = function () { - this.i = this.text.indexOf("<"); - // Not a tag - if (this.i === -1) { - return null; - } - - this.i++; - this.skipWhitespace(); - - // First non-whitespace string after the opening '<' is the tag name - var tname = this.readLiteral(); - - var attrs = {}; - // Continue parsing attributes until the end of string is reached or - // the end of tag is reached - while (this.i < this.text.length && this.text[this.i] !== ">") { - // Read any string not containing equals, possibly delimited by - // " or ' - var key = this.readLiteralOrString(/[^\s=>]/); - this.skipWhitespace(); - // It's possible for tags to have attributes with no value, where - // the equals sign is not necessary - if (this.text[this.i] !== "=") { - if (key.trim().length > 0) { - attrs[key] = ""; - } - continue; - } - - this.i++; - //this.skipWhitespace(); - var value = this.readLiteralOrString(); - if (key.trim().length > 0) { - attrs[key] = value; - } - this.skipWhitespace(); - } - - // If end-of-string was not reached, consume the ending '>' - if (this.i < this.text.length) { - this.i++; - } - - return { - tagName: tname, - attributes: attrs, - text: this.text.substring(0, this.i) // Original text (for replacement) - }; -}; - -/* Some of these may not even be HTML tags, I borrowed them from the - [now deprecated] XSS module of node-validator -*/ -const badTags = new RegExp([ - "alert", - "applet", - "audio", - "basefont", - "base", - "behavior", - "bgsound", - "blink", - "body", - "embed", - "expression", - "form", - "frameset", - "frame", - "head", - "html", - "ilayer", - "iframe", - "input", - "layer", - "link", - "meta", - "object", +const ALLOWED_ATTRIBUTES = [ + "id", + "aria-hidden", + "border", + "class", + "color", + "data-dismiss", + "data-target", + "height", + "role", "style", - "script", - "textarea", "title", - "video", - "xml", - "xss" -].join("|"), "i"); + "valign", + "width" +]; -/* Nasty attributes. Anything starting with "on" is probably a javascript - callback, and I hope you see why formaction is a bad idea. -*/ -const badAttrs = new RegExp([ - "\\bon\\S*", - "\\bformaction", - "\\baction" -].join("|"), "i"); - -function sanitizeHTML(str) { - var i = str.indexOf("<"); - if (i === -1) { - // No HTML tags in the string - return str; - } - - // Loop across all tag delimiters '<' in string, parse each one, - // and replace the results with sanitized tags - while (i !== -1) { - var t = new TagParser(str.substring(i)).tag; - if (t.tagName.replace("/", "").match(badTags)) { - // Note: Important that I replace the tag with a nonempty value, - // otherwise ipt> would possibly defeat the filter. - str = str.replace(t.text, "[tag removed]"); - i = str.indexOf("<", i+1); - continue; - } - for (var k in t.attributes) { - // Keys should not contain non-word characters. - var k2 = k.replace(/[^\w]/g, ""); - if (k2 !== k) { - t.attributes[k2] = t.attributes[k]; - delete t.attributes[k]; - k = k2; - } - // If it's an evil attribute, just nuke it entirely - if (k.match(badAttrs)) { - delete t.attributes[k]; - } else { - if (t.attributes[k].replace(/\s/g, "").indexOf("javascript:") !== -1) { - t.attributes[k] = "[removed]"; - } - - } - } - // Build the sanitized tag - var fmt = "<" + t.tagName; - for (var k in t.attributes) { - if (k.trim().length > 0) { - fmt += " " + k; - if (t.attributes[k].trim().length > 0) { - var delim = '"'; - if (t.attributes[k].match(/[^\\]"/)) { - delim = "'"; - if (t.attributes[k].match(/[^\\]'/)) { - delim = "`"; - } - } - fmt += "=" + delim + t.attributes[k] + delim; - } - } - } - str = str.replace(t.text, fmt + ">"); - i = str.indexOf("<", i + fmt.length + 1); - } - - return str; +var ATTRIBUTE_MAP = { + a: ["href", "name", "target"], + font: ["size"], + img: ["src"], + marquee: ["behavior", "behaviour", "direction", "scrollamount"], + table: ["cellpadding", "cellspacing"], + th: ["colspan", "rowspan"], + td: ["colspan", "rowspan"] } -/* WIP: Sanitize a string where HTML is prohibited */ +for (var key in ATTRIBUTE_MAP) { + ALLOWED_ATTRIBUTES.forEach(function (attr) { + ATTRIBUTE_MAP[key].push(attr); + }); +} + +sanitizeHTML.defaults.allowedTags.concat(ALLOWED_TAGS).forEach(function (tag) { + if (!(tag in ATTRIBUTE_MAP)) { + ATTRIBUTE_MAP[tag] = ALLOWED_ATTRIBUTES; + } +}); + +const SETTINGS = { + allowedTags: sanitizeHTML.defaults.allowedTags.concat(ALLOWED_TAGS), + allowedAttributes: ATTRIBUTE_MAP +}; + function sanitizeText(str) { str = str.replace(/&/g, "&") .replace(/