sync/lib/get-info.js

841 lines
27 KiB
JavaScript
Raw Normal View History

var http = require("http");
var https = require("https");
var cheerio = require('cheerio');
2013-03-27 19:28:51 +00:00
var Logger = require("./logger.js");
2014-05-21 02:30:14 +00:00
var Media = require("./media");
var CustomEmbedFilter = require("./customembed").filter;
2013-10-11 21:31:40 +00:00
var Server = require("./server");
2014-01-22 23:11:26 +00:00
var Config = require("./config");
var ffmpeg = require("./ffmpeg");
require("cytube-mediaquery"); // Initialize sourcemaps
var YouTube = require("cytube-mediaquery/lib/provider/youtube");
2013-10-11 21:31:40 +00:00
/*
* Preference map of quality => youtube formats.
* see https://en.wikipedia.org/wiki/Youtube#Quality_and_codecs
*
* Prefer WebM over MP4, ignore other codecs (e.g. FLV)
*/
const GOOGLE_PREFERENCE = {
"hd1080": [37, 46],
"hd720": [22, 45],
"large": [59, 44],
"medium": [18, 43, 34] // 34 is 360p FLV as a last-ditch
};
const CONTENT_TYPES = {
43: "webm",
44: "webm",
45: "webm",
46: "webm",
18: "mp4",
22: "mp4",
37: "mp4",
59: "mp4",
34: "flv"
};
2013-10-11 21:31:40 +00:00
var urlRetrieve = function (transport, options, callback) {
var req = transport.request(options, function (res) {
2015-01-06 15:54:14 +00:00
res.on("error", function (err) {
Logger.errlog.log("HTTP response " + options.host + options.path + " failed: "+
err);
callback(503, "");
});
var buffer = "";
res.setEncoding("utf-8");
res.on("data", function (chunk) {
buffer += chunk;
});
res.on("end", function () {
callback(res.statusCode, buffer);
});
});
2013-08-03 14:52:24 +00:00
req.on("error", function (err) {
Logger.errlog.log("HTTP request " + options.host + options.path + " failed: " +
err);
callback(503, "");
2013-10-11 21:31:40 +00:00
});
req.end();
2013-10-11 21:31:40 +00:00
};
var Getters = {
/* youtube.com */
yt: function (id, callback) {
if (!Config.get("youtube-v3-key")) {
return callback("The YouTube API now requires an API key. Please see the " +
"documentation for youtube-v3-key in config.template.yaml");
2013-10-11 21:31:40 +00:00
}
YouTube.lookup(id).then(function (video) {
var meta = {};
if (video.meta.blocked) {
meta.restricted = video.meta.blocked;
}
var media = new Media(video.id, video.title, video.duration, "yt", meta);
callback(false, media);
}).catch(function (err) {
callback(err.message || err, null);
2013-10-11 21:31:40 +00:00
});
},
/* youtube.com playlists */
yp: function (id, callback) {
if (!Config.get("youtube-v3-key")) {
return callback("The YouTube API now requires an API key. Please see the " +
"documentation for youtube-v3-key in config.template.yaml");
2013-10-11 21:31:40 +00:00
}
YouTube.lookupPlaylist(id).then(function (videos) {
videos = videos.map(function (video) {
var meta = {};
if (video.meta.blocked) {
meta.restricted = video.meta.blocked;
}
return new Media(video.id, video.title, video.duration, "yt", meta);
});
callback(null, videos);
}).catch(function (err) {
callback(err.message || err, null);
2013-10-11 21:31:40 +00:00
});
},
/* youtube.com search */
ytSearch: function (query, callback) {
if (!Config.get("youtube-v3-key")) {
return callback("The YouTube API now requires an API key. Please see the " +
"documentation for youtube-v3-key in config.template.yaml");
2014-05-21 02:30:14 +00:00
}
2013-10-11 21:31:40 +00:00
YouTube.search(query).then(function (res) {
var videos = res.results;
videos = videos.map(function (video) {
var meta = {};
if (video.meta.blocked) {
meta.restricted = video.meta.blocked;
}
var media = new Media(video.id, video.title, video.duration, "yt", meta);
media.thumb = { url: video.meta.thumbnail };
return media;
});
callback(null, videos);
}).catch(function (err) {
callback(err.message || err, null);
2013-10-11 21:31:40 +00:00
});
},
/* vimeo.com */
vi: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
if (Config.get("vimeo-oauth.enabled")) {
return Getters.vi_oauth(id, callback);
}
2013-10-11 21:31:40 +00:00
var options = {
host: "vimeo.com",
port: 443,
path: "/api/v2/video/" + id + ".json",
method: "GET",
dataType: "jsonp",
timeout: 1000
};
urlRetrieve(https, options, function (status, data) {
2014-05-21 02:30:14 +00:00
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private video", null);
case 404:
return callback("Video not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
}
2013-08-01 03:26:11 +00:00
2013-10-11 21:31:40 +00:00
try {
data = JSON.parse(data);
data = data[0];
var seconds = data.duration;
var title = data.title;
var media = new Media(id, title, seconds, "vi");
callback(false, media);
} catch(e) {
var err = e;
2014-05-21 02:30:14 +00:00
/**
* This should no longer be necessary as the outer handler
* checks for HTTP 404
*/
if (buffer.match(/not found/))
2013-10-11 21:31:40 +00:00
err = "Video not found";
callback(err, null);
}
});
},
vi_oauth: function (id, callback) {
var OAuth = require("oauth");
var oa = new OAuth.OAuth(
"https://vimeo.com/oauth/request_token",
"https://vimeo.com/oauth/access_token",
Config.get("vimeo-oauth.consumer-key"),
Config.get("vimeo-oauth.secret"),
"1.0",
null,
"HMAC-SHA1"
);
oa.get("https://vimeo.com/api/rest/v2?format=json" +
"&method=vimeo.videos.getInfo&video_id=" + id,
null,
null,
function (err, data, res) {
if (err) {
return callback(err, null);
}
try {
data = JSON.parse(data);
if (data.stat !== "ok") {
return callback(data.err.msg, null);
}
var video = data.video[0];
if (video.embed_privacy !== "anywhere") {
return callback("Embedding disabled", null);
}
var id = video.id;
var seconds = parseInt(video.duration);
var title = video.title;
callback(null, new Media(id, title, seconds, "vi"));
} catch (e) {
callback("Error handling Vimeo response", null);
}
});
},
2013-10-11 21:31:40 +00:00
/* dailymotion.com */
dm: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1].split("_")[0];
2013-10-11 21:31:40 +00:00
} else {
callback("Invalid ID", null);
return;
}
var options = {
host: "api.dailymotion.com",
port: 443,
path: "/video/" + id + "?fields=duration,title",
method: "GET",
dataType: "jsonp",
timeout: 1000
};
urlRetrieve(https, options, function (status, data) {
2014-05-21 02:30:14 +00:00
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private video", null);
case 404:
return callback("Video not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
}
2013-08-01 03:26:11 +00:00
2013-10-11 21:31:40 +00:00
try {
data = JSON.parse(data);
var title = data.title;
var seconds = data.duration;
2014-05-21 02:30:14 +00:00
/**
* This is a rather hacky way to indicate that a video has
* been deleted...
*/
if (title === "Deleted video" && seconds === 10) {
2013-10-07 05:10:16 +00:00
callback("Video not found", null);
return;
2013-08-01 03:26:11 +00:00
}
2013-10-11 21:31:40 +00:00
var media = new Media(id, title, seconds, "dm");
callback(false, media);
} catch(e) {
2014-06-25 20:13:54 +00:00
callback(e, null);
2013-10-11 21:31:40 +00:00
}
});
},
/* soundcloud.com */
sc: function (id, callback) {
2014-05-21 02:30:14 +00:00
/* TODO: require server owners to register their own API key, put in config */
2013-10-11 21:31:40 +00:00
const SC_CLIENT = "2e0c82ab5a020f3a7509318146128abd";
var m = id.match(/([\w-\/\.:]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
2013-08-01 03:26:11 +00:00
2013-10-11 21:31:40 +00:00
var options = {
host: "api.soundcloud.com",
port: 443,
path: "/resolve.json?url=" + id + "&client_id=" + SC_CLIENT,
method: "GET",
dataType: "jsonp",
timeout: 1000
};
urlRetrieve(https, options, function (status, data) {
2014-05-21 02:30:14 +00:00
switch (status) {
case 200:
case 302:
2014-05-21 02:30:14 +00:00
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private sound", null);
case 404:
return callback("Sound not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
2013-10-11 21:31:40 +00:00
}
2013-10-11 21:31:40 +00:00
var track = null;
try {
data = JSON.parse(data);
track = data.location;
} catch(e) {
callback(e, null);
return;
}
2013-10-11 21:31:40 +00:00
var options2 = {
host: "api.soundcloud.com",
port: 443,
2013-10-11 21:31:40 +00:00
path: track,
method: "GET",
dataType: "jsonp",
timeout: 1000
};
2014-05-21 02:30:14 +00:00
/**
* There has got to be a way to directly get the data I want without
* making two requests to Soundcloud...right?
* ...right?
*/
2013-10-11 21:31:40 +00:00
urlRetrieve(https, options2, function (status, data) {
2014-05-21 02:30:14 +00:00
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private sound", null);
case 404:
return callback("Sound not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
}
try {
data = JSON.parse(data);
2013-10-11 21:31:40 +00:00
var seconds = data.duration / 1000;
var title = data.title;
var meta = {};
if (data.sharing === "private" && data.embeddable_by === "all") {
meta.scuri = data.uri;
}
var media = new Media(id, title, seconds, "sc", meta);
2013-10-11 21:31:40 +00:00
callback(false, media);
} catch(e) {
2013-10-07 05:10:16 +00:00
callback(e, null);
}
});
2013-10-11 21:31:40 +00:00
});
},
/* livestream.com */
li: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
var title = "Livestream.com - " + id;
var media = new Media(id, title, "--:--", "li");
callback(false, media);
},
/* twitch.tv */
tw: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
var title = "Twitch.tv - " + id;
var media = new Media(id, title, "--:--", "tw");
callback(false, media);
},
/* ustream.tv */
us: function (id, callback) {
2014-05-21 02:30:14 +00:00
/**
*2013-09-17
* They couldn't fucking decide whether channels should
* be at http://www.ustream.tv/channel/foo or just
* http://www.ustream.tv/foo so they do both.
* [](/cleese)
*/
2013-10-11 21:31:40 +00:00
var m = id.match(/([^\?&#]+)|(channel\/[^\?&#]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
2014-05-21 02:30:14 +00:00
2013-10-11 21:31:40 +00:00
var options = {
host: "www.ustream.tv",
port: 80,
path: "/" + id,
method: "GET",
timeout: 1000
};
urlRetrieve(http, options, function (status, data) {
if(status !== 200) {
callback("Ustream HTTP " + status, null);
return;
}
2014-05-21 02:30:14 +00:00
/**
* Regexing the ID out of the HTML because
* Ustream's API is so horribly documented
* I literally could not figure out how to retrieve
* this information.
*
* [](/eatadick)
*/
2015-03-23 23:10:00 +00:00
var m = data.match(/https:\/\/www\.ustream\.tv\/embed\/(\d+)/);
if (m) {
2013-10-11 21:31:40 +00:00
var title = "Ustream.tv - " + id;
var media = new Media(m[1], title, "--:--", "us");
callback(false, media);
} else {
2015-03-23 23:10:00 +00:00
callback("Channel ID not found", null);
}
2013-10-11 21:31:40 +00:00
});
},
/* JWPlayer */
jw: function (id, callback) {
var title = "JWPlayer - " + id;
var media = new Media(id, title, "--:--", "jw");
callback(false, media);
},
/* rtmp stream */
rt: function (id, callback) {
var title = "Livestream";
var media = new Media(id, title, "--:--", "rt");
callback(false, media);
},
/* imgur.com albums */
im: function (id, callback) {
2014-05-21 02:30:14 +00:00
/**
* TODO: Consider deprecating this in favor of custom embeds
*/
2013-10-11 21:31:40 +00:00
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
2013-10-11 21:31:40 +00:00
var title = "Imgur Album - " + id;
var media = new Media(id, title, "--:--", "im");
callback(false, media);
},
/* custom embed */
cu: function (id, callback) {
id = CustomEmbedFilter(id);
var media = new Media(id, "Custom Media", "--:--", "cu");
callback(false, media);
2013-11-07 23:19:36 +00:00
},
/* google docs */
gd: function (id, callback) {
2014-05-21 02:30:14 +00:00
/* WARNING: hacks inbound */
2013-11-07 23:19:36 +00:00
var options = {
host: "docs.google.com",
2015-02-21 04:59:11 +00:00
path: "/file/d/" + id + "/get_video_info?sle=true",
2013-11-07 23:19:36 +00:00
port: 443
};
urlRetrieve(https, options, function (status, res) {
2014-05-21 02:30:14 +00:00
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
return callback("Invalid request", null);
case 403:
return callback("Private video", null);
case 404:
return callback("Video not found", null);
case 500:
case 503:
return callback("Service unavailable", null);
default:
return callback("HTTP " + status, null);
2013-11-07 23:19:36 +00:00
}
try {
2015-02-21 05:11:09 +00:00
var data = {};
2015-02-21 04:59:11 +00:00
res.split("&").forEach(function (urlparam) {
2015-02-21 05:11:09 +00:00
var pair = urlparam.split("=").map(decodeURIComponent).map(
function (s) { return s.replace(/\+/g, ' '); });
2015-02-21 04:59:11 +00:00
data[pair[0]] = pair[1];
});
2014-07-10 03:41:11 +00:00
if (data.hasOwnProperty("reason")) {
var reason = data.reason;
if (reason.indexOf("Unable to play this video at this time.") === 0) {
reason = "There is currently a bug with Google Drive which prevents playback " +
"of videos 1 hour long or longer.";
2015-02-21 05:11:09 +00:00
} else if (reason.indexOf(
"You must be signed in to access this video") >= 0) {
reason = "This video is not shared properly";
}
2015-02-21 05:11:09 +00:00
return callback(reason);
}
if (!data.hasOwnProperty("title")) {
return callback("Returned HTML is missing the video title. Are you " +
"sure the video is done processing?");
}
if (!data.hasOwnProperty("length_seconds")) {
return callback("Returned HTML is missing the video duration. Are you " +
"sure the video is done processing?");
}
var title = data.title;
var seconds = parseInt(data.length_seconds);
2013-11-07 23:19:36 +00:00
var videos = {};
data.fmt_stream_map.split(",").forEach(function (stream) {
var parts = stream.split("|");
videos[parts[0]] = parts[1];
});
2014-05-21 02:30:14 +00:00
var direct = {};
for (var key in GOOGLE_PREFERENCE) {
for (var i = 0; i < GOOGLE_PREFERENCE[key].length; i++) {
var format = GOOGLE_PREFERENCE[key][i];
if (format in videos) {
direct[key] = {
url: videos[format],
contentType: CONTENT_TYPES[format]
};
break;
}
}
2013-11-07 23:19:36 +00:00
}
if (Object.keys(direct).length === 0) {
return callback("No valid links could be extracted", null);
}
callback(null, new Media(id, title, seconds, "gd", { gpdirect: direct }));
} catch (e) {
return callback("Failed to parse Google Docs output", null);
2013-11-07 23:19:36 +00:00
}
});
},
/* ffmpeg for raw files */
fi: function (id, cb) {
ffmpeg.query(id, function (err, data) {
if (err) {
return cb(err);
}
var m = new Media(id, data.title, data.duration, "fi", {
bitrate: data.bitrate,
codec: data.codec
});
cb(null, m);
});
2014-07-10 03:41:11 +00:00
},
2014-07-11 06:03:13 +00:00
/*
* Google+ videos
*
* Also known as Picasa Web Albums.
*
2014-07-11 06:03:13 +00:00
*/
2014-07-10 03:41:11 +00:00
gp: function (id, cb) {
2014-07-11 06:03:13 +00:00
var idparts = id.split("_");
if (idparts.length !== 3) {
2014-07-10 03:41:11 +00:00
return cb("Invalid Google+ video ID");
}
var options = {
host: "picasaweb.google.com",
path: '/data/feed/api/user/'+idparts[0]+'/albumid/'+idparts[1]+'/photoid/'+idparts[2]+'?kind=tag',
2014-07-10 03:41:11 +00:00
port: 443
};
urlRetrieve(https, options, function (status, res) {
switch (status) {
case 200:
break; /* Request is OK, skip to handling data */
case 400:
2014-07-13 18:29:50 +00:00
return cb("Invalid request", null);
2014-07-10 03:41:11 +00:00
case 403:
2014-07-13 18:29:50 +00:00
return cb("Private video", null);
2014-07-10 03:41:11 +00:00
case 404:
2014-07-13 18:29:50 +00:00
return cb("Video not found", null);
2014-07-10 03:41:11 +00:00
case 500:
case 503:
2014-07-13 18:29:50 +00:00
return cb("Service unavailable", null);
2014-07-10 03:41:11 +00:00
default:
2014-07-13 18:29:50 +00:00
return cb("HTTP " + status, null);
2014-07-10 03:41:11 +00:00
}
2014-07-13 18:29:50 +00:00
try {
var $ = cheerio.load(res, { xmlMode: true });
switch ($("gphoto\\:videostatus").text()) {
case "final":
break; /* Incoming Fun. */
case "pending":
return cb("The video is still being processed.", null);
case "failed":
return cb("A processing error has occured and the video should be deleted.", null);
case "ready":
return cb("The video has been processed but still needs a thumbnail.", null);
}
var duration = parseInt($("gphoto\\:originalvideo").attr("duration"),10);
var title = $("media\\:title").text();
2014-07-13 18:29:50 +00:00
var videos = {};
$('media\\:content[medium="video"]').each(function(index, element){
2015-03-31 20:57:57 +00:00
var url = $(this).attr("url");
var match = url.match(/itag=(\d+)/)
if (!match) {
match = url.match(/googleusercontent.*=m(\d+)$/);
}
if (match && match[1]) {
var type = match[1];
videos[type] = {
format: type,
2015-03-31 20:57:57 +00:00
link: url
};
}
2014-07-13 18:29:50 +00:00
});
$ = null;
2014-07-10 03:41:11 +00:00
2014-07-13 18:29:50 +00:00
var direct = {};
2014-07-10 03:41:11 +00:00
for (var key in GOOGLE_PREFERENCE) {
for (var i = 0; i < GOOGLE_PREFERENCE[key].length; i++) {
var format = GOOGLE_PREFERENCE[key][i];
2014-07-10 03:41:11 +00:00
2014-07-13 18:29:50 +00:00
if (format in videos) {
direct[key] = {
2014-11-03 18:56:15 +00:00
url: videos[format].link,
contentType: CONTENT_TYPES[format]
};
2014-07-13 18:29:50 +00:00
break;
}
2014-07-10 03:41:11 +00:00
}
}
2014-07-13 18:29:50 +00:00
if (Object.keys(direct).length === 0) {
return cb("Unable to retrieve video data from Google+. The videos " +
"may have not finished processing yet.");
2014-07-13 18:29:50 +00:00
} else if (!title) {
return cb("Unable to retrieve title from Google+. Check that " +
"the album exists and is shared publicly.");
} else if (!duration) {
return cb("Unable to retreive duration from Google+. This might be " +
"because the video is still processing.");
}
var media = new Media(id, title, duration, "gp", { gpdirect: direct });
cb(null, media);
} catch (e) {
cb("Unknown error");
2014-07-13 18:31:30 +00:00
Logger.errlog.log("Unknown error for Google+ ID " + id + ": " + e.stack);
2014-07-13 18:29:50 +00:00
}
2014-07-10 03:41:11 +00:00
});
2015-01-23 05:21:31 +00:00
},
/* hitbox.tv */
hb: function (id, callback) {
var m = id.match(/([\w-]+)/);
if (m) {
id = m[1];
} else {
callback("Invalid ID", null);
return;
}
var title = "Hitbox.tv - " + id;
var media = new Media(id, title, "--:--", "hb");
callback(false, media);
},
2013-10-11 21:31:40 +00:00
};
/**
* Function to workaround Vimeo being a dick and blocking my domain from embeds.
* Retrieves the player page and extracts the direct links to the MP4 encoded videos.
*/
function vimeoWorkaround(id, cb) {
if (typeof cb !== "function") {
return;
}
var failcount = 0;
var inner = function () {
var options = {
host: "player.vimeo.com",
path: "/video/" + id,
headers: {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0",
"Referrer": "player.vimeo.com"
}
};
var parse = function (data) {
2014-02-05 04:02:27 +00:00
var i = data.indexOf("{\"cdn_url\"");
if (i === -1) {
setImmediate(function () {
cb({});
});
return;
}
var j = data.indexOf("};", i);
2014-02-05 04:02:27 +00:00
var json = data.substring(i, j+1);
try {
json = JSON.parse(json);
if (!json.request.files) {
setImmediate(function () {
cb({});
});
return;
}
var codec = json.request.files.codecs[0];
var files = json.request.files[codec];
setImmediate(function () {
cb(files);
});
} catch (e) {
// This shouldn't happen due to the user-agent, but just in case
if (data.indexOf("crawler") !== -1) {
2014-05-21 02:30:14 +00:00
Logger.syslog.log("Warning: vimdeoWorkaround got crawler response");
failcount++;
if (failcount > 4) {
2014-05-21 02:30:14 +00:00
Logger.errlog.log("vimeoWorkaround got bad response 5 times!"+
" Giving up.");
setImmediate(function () {
cb({});
});
} else {
setImmediate(function () {
inner();
});
}
return;
} else if (data.indexOf("This video does not exist.") !== -1) {
cb({});
return;
} else if (data.indexOf("Because of its privacy settings, this video cannot be played here.") !== -1) {
cb({});
}
Logger.errlog.log("Vimeo workaround error: ");
Logger.errlog.log(e);
2014-02-05 04:02:27 +00:00
Logger.errlog.log("http://vimeo.com/" + id);
setImmediate(function () {
cb({});
});
}
};
2015-01-06 15:54:14 +00:00
urlRetrieve(http, options, function (status, buffer) {
if (status !== 200) {
setImmediate(function () {
cb({});
});
return;
}
2015-01-06 15:54:14 +00:00
parse(buffer);
});
};
inner();
}
2013-10-11 21:31:40 +00:00
module.exports = {
Getters: Getters,
getMedia: function (id, type, callback) {
if(type in this.Getters) {
this.Getters[type](id, callback);
} else {
callback("Unknown media type '" + type + "'", null);
}
},
vimeoWorkaround: vimeoWorkaround
2013-10-11 21:31:40 +00:00
};