var http = require("http"); var https = require("https"); var cheerio = require('cheerio'); var Logger = require("./logger.js"); var Media = require("./media"); var CustomEmbedFilter = require("./customembed").filter; var Server = require("./server"); var Config = require("./config"); var ffmpeg = require("./ffmpeg"); /* * Preference map of quality => youtube formats. * see https://en.wikipedia.org/wiki/Youtube#Quality_and_codecs * * Prefer WebM over MP4, ignore other codecs (e.g. FLV) */ const GOOGLE_PREFERENCE = { "hd1080": [46, 37], "hd720": [45, 22], "large": [44, 59], "medium": [43, 18, 34] // 34 is 360p FLV as a last-ditch }; const CONTENT_TYPES = { 43: "webm", 44: "webm", 45: "webm", 46: "webm", 18: "mp4", 22: "mp4", 37: "mp4", 59: "mp4", 34: "flv" }; var urlRetrieve = function (transport, options, callback) { var req = transport.request(options, function (res) { res.on("error", function (err) { Logger.errlog.log("HTTP response " + options.host + options.path + " failed: "+ err); callback(503, ""); }); var buffer = ""; res.setEncoding("utf-8"); res.on("data", function (chunk) { buffer += chunk; }); res.on("end", function () { callback(res.statusCode, buffer); }); }); req.on("error", function (err) { Logger.errlog.log("HTTP request " + options.host + options.path + " failed: " + err); callback(503, ""); }); req.end(); }; var Getters = { /* youtube.com */ yt: function (id, callback) { var sv = Server.getServer(); var m = id.match(/([\w-]{11})/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } var options = { host: "gdata.youtube.com", port: 443, path: "/feeds/api/videos/" + id + "?v=2&alt=json", method: "GET", dataType: "jsonp", timeout: 1000 }; if (Config.get("youtube-v2-key")) { options.headers = { "X-Gdata-Key": "key=" + Config.get("youtube-v2-key") }; } urlRetrieve(https, options, function (status, data) { switch (status) { case 200: break; /* Request is OK, skip to handling data */ case 400: return callback("Invalid request", null); case 403: return callback("Private video", null); case 404: return callback("Video not found", null); case 500: case 503: return callback("Service unavailable", null); default: return callback("HTTP " + status, null); } var buffer = data; try { data = JSON.parse(data); /* Check for embedding restrictions */ if (data.entry.yt$accessControl) { var ac = data.entry.yt$accessControl; for (var i = 0; i < ac.length; i++) { if (ac[i].action === "embed") { if (ac[i].permission === "denied") { callback("Embedding disabled", null); return; } break; } } } var seconds = data.entry.media$group.yt$duration.seconds; var title = data.entry.title.$t; var meta = {}; /* Check for country restrictions */ if (data.entry.media$group.media$restriction) { var rest = data.entry.media$group.media$restriction; if (rest.length > 0) { if (rest[0].relationship === "deny") { meta.restricted = rest[0].$t; } } } var media = new Media(id, title, seconds, "yt", meta); callback(false, media); } catch (e) { // Gdata version 2 has the rather silly habit of // returning error codes in XML when I explicitly asked // for JSON var m = buffer.match(/([^<]+)<\/internalReason>/); if (m === null) m = buffer.match(/([^<]+)<\/code>/); var err = e; if (m) { if(m[1] === "too_many_recent_calls") { err = "YouTube is throttling the server right "+ "now for making too many requests. "+ "Please try again in a moment."; } else { err = m[1]; } } callback(err, null); } }); }, /* youtube.com playlists */ yp: function (id, callback, url) { /** * NOTE: callback may be called multiple times, once for each <= 25 video * batch of videos in the list. It will be called in order. */ var m = id.match(/([\w-]+)/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } var path = "/feeds/api/playlists/" + id + "?v=2&alt=json"; /** * NOTE: the third parameter, url, is used to chain this retriever * multiple times to get all the videos from a playlist, as each * request only returns 25 videos. */ if (url !== undefined) { path = "/" + url.split("gdata.youtube.com")[1]; } var options = { host: "gdata.youtube.com", port: 443, path: path, method: "GET", dataType: "jsonp", timeout: 1000 }; if (Config.get("youtube-v2-key")) { options.headers = { "X-Gdata-Key": "key=" + Config.get("youtube-v2-key") }; } urlRetrieve(https, options, function (status, data) { switch (status) { case 200: break; /* Request is OK, skip to handling data */ case 400: return callback("Invalid request", null); case 403: return callback("Private playlist", null); case 404: return callback("Playlist not found", null); case 500: case 503: return callback("Service unavailable", null); default: return callback("HTTP " + status, null); } try { data = JSON.parse(data); var vids = []; for(var i in data.feed.entry) { try { /** * FIXME: This should probably check for embed restrictions * and country restrictions on each video in the list */ var item = data.feed.entry[i]; var id = item.media$group.yt$videoid.$t; var title = item.title.$t; var seconds = item.media$group.yt$duration.seconds; var media = new Media(id, title, seconds, "yt"); vids.push(media); } catch(e) { } } callback(false, vids); var links = data.feed.link; for (var i in links) { if (links[i].rel === "next") { /* Look up the next batch of videos from the list */ Getters["yp"](id, callback, links[i].href); } } } catch (e) { callback(e, null); } }); }, /* youtube.com search */ ytSearch: function (terms, callback) { /** * terms is a list of words from the search query. Each word must be * encoded properly for use in the request URI */ for (var i in terms) { terms[i] = encodeURIComponent(terms[i]); } var query = terms.join("+"); var options = { host: "gdata.youtube.com", port: 443, path: "/feeds/api/videos/?q=" + query + "&v=2&alt=json", method: "GET", dataType: "jsonp", timeout: 1000 }; if (Config.get("youtube-v2-key")) { options.headers = { "X-Gdata-Key": "key=" + Config.get("youtube-v2-key") }; } urlRetrieve(https, options, function (status, data) { if (status !== 200) { callback("YouTube search: HTTP " + status, null); return; } try { data = JSON.parse(data); var vids = []; for(var i in data.feed.entry) { try { /** * FIXME: This should probably check for embed restrictions * and country restrictions on each video in the list */ var item = data.feed.entry[i]; var id = item.media$group.yt$videoid.$t; var title = item.title.$t; var seconds = item.media$group.yt$duration.seconds; var media = new Media(id, title, seconds, "yt"); media.thumb = item.media$group.media$thumbnail[0]; vids.push(media); } catch(e) { } } callback(false, vids); } catch(e) { callback(e, null); } }); }, /* vimeo.com */ vi: function (id, callback) { var m = id.match(/([\w-]+)/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } if (Config.get("vimeo-oauth.enabled")) { return Getters.vi_oauth(id, callback); } var options = { host: "vimeo.com", port: 443, path: "/api/v2/video/" + id + ".json", method: "GET", dataType: "jsonp", timeout: 1000 }; urlRetrieve(https, options, function (status, data) { switch (status) { case 200: break; /* Request is OK, skip to handling data */ case 400: return callback("Invalid request", null); case 403: return callback("Private video", null); case 404: return callback("Video not found", null); case 500: case 503: return callback("Service unavailable", null); default: return callback("HTTP " + status, null); } try { data = JSON.parse(data); data = data[0]; var seconds = data.duration; var title = data.title; var media = new Media(id, title, seconds, "vi"); callback(false, media); } catch(e) { var err = e; /** * This should no longer be necessary as the outer handler * checks for HTTP 404 */ if (buffer.match(/not found/)) err = "Video not found"; callback(err, null); } }); }, vi_oauth: function (id, callback) { var OAuth = require("oauth"); var oa = new OAuth.OAuth( "https://vimeo.com/oauth/request_token", "https://vimeo.com/oauth/access_token", Config.get("vimeo-oauth.consumer-key"), Config.get("vimeo-oauth.secret"), "1.0", null, "HMAC-SHA1" ); oa.get("https://vimeo.com/api/rest/v2?format=json" + "&method=vimeo.videos.getInfo&video_id=" + id, null, null, function (err, data, res) { if (err) { return callback(err, null); } try { data = JSON.parse(data); if (data.stat !== "ok") { return callback(data.err.msg, null); } var video = data.video[0]; if (video.embed_privacy !== "anywhere") { return callback("Embedding disabled", null); } var id = video.id; var seconds = parseInt(video.duration); var title = video.title; callback(null, new Media(id, title, seconds, "vi")); } catch (e) { callback("Error handling Vimeo response", null); } }); }, /* dailymotion.com */ dm: function (id, callback) { var m = id.match(/([\w-]+)/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } var options = { host: "api.dailymotion.com", port: 443, path: "/video/" + id + "?fields=duration,title", method: "GET", dataType: "jsonp", timeout: 1000 }; urlRetrieve(https, options, function (status, data) { switch (status) { case 200: break; /* Request is OK, skip to handling data */ case 400: return callback("Invalid request", null); case 403: return callback("Private video", null); case 404: return callback("Video not found", null); case 500: case 503: return callback("Service unavailable", null); default: return callback("HTTP " + status, null); } try { data = JSON.parse(data); var title = data.title; var seconds = data.duration; /** * This is a rather hacky way to indicate that a video has * been deleted... */ if (title === "Deleted video" && seconds === 10) { callback("Video not found", null); return; } var media = new Media(id, title, seconds, "dm"); callback(false, media); } catch(e) { callback(e, null); } }); }, /* soundcloud.com */ sc: function (id, callback) { /* TODO: require server owners to register their own API key, put in config */ const SC_CLIENT = "2e0c82ab5a020f3a7509318146128abd"; var m = id.match(/([\w-\/\.:]+)/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } var options = { host: "api.soundcloud.com", port: 443, path: "/resolve.json?url=" + id + "&client_id=" + SC_CLIENT, method: "GET", dataType: "jsonp", timeout: 1000 }; urlRetrieve(https, options, function (status, data) { switch (status) { case 200: case 302: break; /* Request is OK, skip to handling data */ case 400: return callback("Invalid request", null); case 403: return callback("Private sound", null); case 404: return callback("Sound not found", null); case 500: case 503: return callback("Service unavailable", null); default: return callback("HTTP " + status, null); } var track = null; try { data = JSON.parse(data); track = data.location; } catch(e) { callback(e, null); return; } var options2 = { host: "api.soundcloud.com", port: 443, path: track, method: "GET", dataType: "jsonp", timeout: 1000 }; /** * There has got to be a way to directly get the data I want without * making two requests to Soundcloud...right? * ...right? */ urlRetrieve(https, options2, function (status, data) { switch (status) { case 200: break; /* Request is OK, skip to handling data */ case 400: return callback("Invalid request", null); case 403: return callback("Private sound", null); case 404: return callback("Sound not found", null); case 500: case 503: return callback("Service unavailable", null); default: return callback("HTTP " + status, null); } try { data = JSON.parse(data); var seconds = data.duration / 1000; var title = data.title; var meta = {}; if (data.sharing === "private" && data.embeddable_by === "all") { meta.scuri = data.uri; } var media = new Media(id, title, seconds, "sc", meta); callback(false, media); } catch(e) { callback(e, null); } }); }); }, /* livestream.com */ li: function (id, callback) { var m = id.match(/([\w-]+)/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } var title = "Livestream.com - " + id; var media = new Media(id, title, "--:--", "li"); callback(false, media); }, /* twitch.tv */ tw: function (id, callback) { var m = id.match(/([\w-]+)/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } var title = "Twitch.tv - " + id; var media = new Media(id, title, "--:--", "tw"); callback(false, media); }, /* ustream.tv */ us: function (id, callback) { /** *2013-09-17 * They couldn't fucking decide whether channels should * be at http://www.ustream.tv/channel/foo or just * http://www.ustream.tv/foo so they do both. * [](/cleese) */ var m = id.match(/([^\?&#]+)|(channel\/[^\?&#]+)/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } var options = { host: "www.ustream.tv", port: 80, path: "/" + id, method: "GET", timeout: 1000 }; urlRetrieve(http, options, function (status, data) { if(status !== 200) { callback("Ustream HTTP " + status, null); return; } /** * Regexing the ID out of the HTML because * Ustream's API is so horribly documented * I literally could not figure out how to retrieve * this information. * * [](/eatadick) */ var m = data.match(/cid":([0-9]+)/); if(m) { var title = "Ustream.tv - " + id; var media = new Media(m[1], title, "--:--", "us"); callback(false, media); } else { callback(true, null); } }); }, /* JWPlayer */ jw: function (id, callback) { var title = "JWPlayer - " + id; var media = new Media(id, title, "--:--", "jw"); callback(false, media); }, /* rtmp stream */ rt: function (id, callback) { var title = "Livestream"; var media = new Media(id, title, "--:--", "rt"); callback(false, media); }, /* imgur.com albums */ im: function (id, callback) { /** * TODO: Consider deprecating this in favor of custom embeds */ var m = id.match(/([\w-]+)/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } var title = "Imgur Album - " + id; var media = new Media(id, title, "--:--", "im"); callback(false, media); }, /* custom embed */ cu: function (id, callback) { id = CustomEmbedFilter(id); var media = new Media(id, "Custom Media", "--:--", "cu"); callback(false, media); }, /* google docs */ gd: function (id, callback) { /* WARNING: hacks inbound */ var options = { host: "docs.google.com", path: "/file/d/" + id + "/view?sle=true", port: 443 }; urlRetrieve(https, options, function (status, res) { switch (status) { case 200: break; /* Request is OK, skip to handling data */ case 400: return callback("Invalid request", null); case 403: return callback("Private video", null); case 404: return callback("Video not found", null); case 500: case 503: return callback("Service unavailable", null); default: return callback("HTTP " + status, null); } /* * Match [[anything]] and ignore whitespace * Fix 2014-08-07: [^] matches \r\n, . doesn't. */ var m = res.match(/\[(\[\s*\[[^\[][^]*?\]\s*\])/); try { var propertyList = JSON.parse(m[1]); var data = {}; propertyList.forEach(function (kv) { data[kv[0]] = kv[1]; }); if (data.hasOwnProperty("reason")) { var reason = data.reason; if (reason.indexOf("Unable to play this video at this time.") === 0) { reason = "There is currently a bug with Google Drive which prevents playback " + "of videos 1 hour long or longer."; } return callback(reason); } if (!data.hasOwnProperty("title")) { return callback("Returned HTML is missing the video title. Are you " + "sure the video is done processing?"); } if (!data.hasOwnProperty("length_seconds")) { return callback("Returned HTML is missing the video duration. Are you " + "sure the video is done processing?"); } var title = data.title; var seconds = parseInt(data.length_seconds); var videos = {}; data.fmt_stream_map.split(",").forEach(function (stream) { var parts = stream.split("|"); videos[parts[0]] = parts[1]; }); var direct = {}; for (var key in GOOGLE_PREFERENCE) { for (var i = 0; i < GOOGLE_PREFERENCE[key].length; i++) { var format = GOOGLE_PREFERENCE[key][i]; if (format in videos) { direct[key] = { url: videos[format], contentType: CONTENT_TYPES[format] }; break; } } } if (Object.keys(direct).length === 0) { return callback("No valid links could be extracted", null); } callback(null, new Media(id, title, seconds, "gd", { gpdirect: direct })); } catch (e) { return callback("Failed to parse Google Docs output", null); } }); }, /* ffmpeg for raw files */ fi: function (id, cb) { ffmpeg.query(id, function (err, data) { if (err) { return cb(err); } var m = new Media(id, data.title, data.duration, "fi", { bitrate: data.bitrate, codec: data.codec }); cb(null, m); }); }, /* * Google+ videos * * Also known as Picasa Web Albums. * */ gp: function (id, cb) { var idparts = id.split("_"); if (idparts.length !== 3) { return cb("Invalid Google+ video ID"); } var options = { host: "picasaweb.google.com", path: '/data/feed/api/user/'+idparts[0]+'/albumid/'+idparts[1]+'/photoid/'+idparts[2]+'?kind=tag', port: 443 }; urlRetrieve(https, options, function (status, res) { switch (status) { case 200: break; /* Request is OK, skip to handling data */ case 400: return cb("Invalid request", null); case 403: return cb("Private video", null); case 404: return cb("Video not found", null); case 500: case 503: return cb("Service unavailable", null); default: return cb("HTTP " + status, null); } try { var $ = cheerio.load(res, { xmlMode: true }); switch ($("gphoto\\:videostatus").text()) { case "final": break; /* Incoming Fun. */ case "pending": return cb("The video is still being processed.", null); case "failed": return cb("A processing error has occured and the video should be deleted.", null); case "ready": return cb("The video has been processed but still needs a thumbnail.", null); } var duration = parseInt($("gphoto\\:originalvideo").attr("duration"),10); var title = $("media\\:title").text(); var videos = {}; $('media\\:content[medium="video"]').each(function(index, element){ var url = $(this).attr("url") var type = url.match(/itag=(\d\d)/)[1] videos[type] = { format: type, link: url }; }); $ = null; var direct = {}; for (var key in GOOGLE_PREFERENCE) { for (var i = 0; i < GOOGLE_PREFERENCE[key].length; i++) { var format = GOOGLE_PREFERENCE[key][i]; if (format in videos) { direct[key] = { url: videos[format].link, contentType: CONTENT_TYPES[format] }; break; } } } if (Object.keys(direct).length === 0) { return cb("Unable to retrieve video data from Google+. Check that " + "the album exists and is shared publicly."); } else if (!title) { return cb("Unable to retrieve title from Google+. Check that " + "the album exists and is shared publicly."); } else if (!duration) { return cb("Unable to retreive duration from Google+. This might be " + "because the video is still processing."); } var media = new Media(id, title, duration, "gp", { gpdirect: direct }); cb(null, media); } catch (e) { cb("Unknown error"); Logger.errlog.log("Unknown error for Google+ ID " + id + ": " + e.stack); } }); }, /* hitbox.tv */ hb: function (id, callback) { var m = id.match(/([\w-]+)/); if (m) { id = m[1]; } else { callback("Invalid ID", null); return; } var title = "Hitbox.tv - " + id; var media = new Media(id, title, "--:--", "hb"); callback(false, media); }, }; /** * Function to workaround Vimeo being a dick and blocking my domain from embeds. * Retrieves the player page and extracts the direct links to the MP4 encoded videos. */ function vimeoWorkaround(id, cb) { if (typeof cb !== "function") { return; } var failcount = 0; var inner = function () { var options = { host: "player.vimeo.com", path: "/video/" + id, headers: { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0", "Referrer": "player.vimeo.com" } }; var parse = function (data) { var i = data.indexOf("{\"cdn_url\""); if (i === -1) { setImmediate(function () { cb({}); }); return; } var j = data.indexOf("};", i); var json = data.substring(i, j+1); try { json = JSON.parse(json); if (!json.request.files) { setImmediate(function () { cb({}); }); return; } var codec = json.request.files.codecs[0]; var files = json.request.files[codec]; setImmediate(function () { cb(files); }); } catch (e) { // This shouldn't happen due to the user-agent, but just in case if (data.indexOf("crawler") !== -1) { Logger.syslog.log("Warning: vimdeoWorkaround got crawler response"); failcount++; if (failcount > 4) { Logger.errlog.log("vimeoWorkaround got bad response 5 times!"+ " Giving up."); setImmediate(function () { cb({}); }); } else { setImmediate(function () { inner(); }); } return; } else if (data.indexOf("This video does not exist.") !== -1) { cb({}); return; } else if (data.indexOf("Because of its privacy settings, this video cannot be played here.") !== -1) { cb({}); } Logger.errlog.log("Vimeo workaround error: "); Logger.errlog.log(e); Logger.errlog.log("http://vimeo.com/" + id); setImmediate(function () { cb({}); }); } }; urlRetrieve(http, options, function (status, buffer) { if (status !== 200) { setImmediate(function () { cb({}); }); return; } parse(buffer); }); }; inner(); } module.exports = { Getters: Getters, getMedia: function (id, type, callback) { if(type in this.Getters) { this.Getters[type](id, callback); } else { callback("Unknown media type '" + type + "'", null); } }, vimeoWorkaround: vimeoWorkaround };