Media retrieval refactor; first pass

2013-07-31 23:26:11 -04:00 · 2013-07-31 23:26:11 -04:00 · 90437e5ac2
parent 7477c64ae2
commit 90437e5ac2
1 changed files with 265 additions and 0 deletions
--- a/get-info.js
+++ b/get-info.js
@ -14,6 +14,21 @@ var https = require("https");
 var Logger = require("./logger.js");
 var Media = require("./media.js").Media;

+function urlRetrieve(transport, options, callback) {
+    var req = transport.request(options, function (res) {
+        var buffer = "";
+        res.setEncoding("utf-8");
+        res.on("data", function (chunk) {
+            buffer += chunk;
+        });
+        res.on("end", function () {
+            callback(res.statusCode, buffer);
+        });
+    });
+
+    req.end();
+}
+
 // Helper function for making an HTTP request and getting the result
 // as JSON
 function getJSONInternal(transport, options, callback) {
@ -63,6 +78,256 @@ function getJSONHTTPS(options, callback) {
    getJSONInternal(https, options, callback);
 }

+var Getters = {
+    /* youtube.com */
+    yt: function (id, callback) {
+        var options = {
+            host: "gdata.youtube.com",
+            port: 443,
+            path: "/feeds/api/videos/" + id + "?v=2&alt=json",
+            method: "GET",
+            dataType: "jsonp",
+            timeout: 1000
+        };
+
+        urlRetrieve(https, options, function (status, data) {
+            if(status !== 200) {
+                callback(true, null);
+                return;
+            }
+
+            try {
+                data = JSON.parse(data);
+                var seconds = data.entry.media$group.yt$duration.seconds;
+                var title = data.entry.title.$t;
+                var media = new Media(id, title, seconds, "yt");
+                callback(false, media);
+            } catch(e) {
+                // Gdata version 2 has the rather silly habit of
+                // returning error codes in XML when I explicitly asked
+                // for JSON
+                var m = buffer.match(/<internalReason>([^<]+)<\/internalReason>/);
+                if(m === null)
+                    m = buffer.match(/<code>([^<]+)<\/code>/);
+
+                var err = true;
+                if(m) {
+                    if(m[1] === "too_many_recent_calls") {
+                        err = "YouTube is throttling the server right "+
+                               "now for making too many requests.  "+
+                               "Please try again in a moment.";
+                    } else {
+                        err = m[1];
+                    }
+                }
+
+                callback(err, null);
+            }
+        });
+    },
+
+    // TODO Add youtube playlists
+
+    /* vimeo.com */
+    vi: function (id, callback) {
+        var options = {
+            host: "vimeo.com",
+            port: 443,
+            path: "/api/v2/video/" + id + ".json",
+            method: "GET",
+            dataType: "jsonp",
+            timeout: 1000
+        };
+
+        urlRetrieve(https, options, function (status, data) {
+            if(status !== 200) {
+                callback(true, null);
+                return;
+            }
+
+            try {
+                data = JSON.parse(data);
+                data = data[0];
+                var seconds = data.duration;
+                var title = data.title;
+                var media = new Media(id, title, seconds, "vi");
+                callback(false, media);
+            } catch(e) {
+                var err = true;
+                if(buffer.match(/not found/))
+                    err = "Video not found";
+
+                callback(err, null);
+            }
+        });
+    },
+    
+    /* dailymotion.com */
+    dm: function (id, callback) {
+        // Dailymotion's API is an example of an API done right
+        // - Supports SSL
+        // - I can ask for exactly which fields I want
+        // - URL is simple
+        // - Field names are sensible
+        // Other media providers take notes, please
+        var options = {
+            host: "api.dailymotion.com",
+            port: 443,
+            path: "/video/" + id + "?fields=duration,title",
+            method: "GET",
+            dataType: "jsonp",
+            timeout: 1000
+        };
+
+        urlRetrieve(https, options, function (status, data) {
+            if(status !== 200) {
+                callback(true, null);
+                return;
+            }
+
+            try {
+                data = JSON.parse(data);
+                var title = data.title;
+                var seconds = data.duration;
+                var media = new Media(id, title, seconds, "dm");
+                callback(false, media);
+            } catch(e) {
+                // TODO See what kinds of errors DM returns
+                var err = true;
+                callback(err, null);
+            }
+        });
+    },
+
+    /* soundcloud.com */
+    sc: function (id, callback) {
+        // Soundcloud's API is badly designed and badly documented
+        // In order to lookup track data from a URL, I have to first
+        // make a call to /resolve to get the track id, then make a second
+        // call to /tracks/{track.id} to actally get useful data
+        // This is a waste of bandwidth and a pain in the ass
+
+        const SC_CLIENT = "2e0c82ab5a020f3a7509318146128abd";
+
+        var options = {
+            host: "api.soundcloud.com",
+            port: 443,
+            path: "/resolve.json?url=" + id + "&client_id=" + SC_CLIENT,
+            method: "GET",
+            dataType: "jsonp",
+            timeout: 1000
+        };
+
+        urlRetrieve(https, options, function (status, data) {
+            if(status !== 302) {
+                callback(true, null);
+                return;
+            }
+
+            var track = null;
+            try {
+                data = JSON.parse(data);
+                track = data.location;
+            } catch(e) {
+                callback(true, null);
+                return;
+            }
+
+            var options2 = {
+                host: "api.soundcloud.com",
+                port: 443,
+                path: track,
+                method: "GET",
+                dataType: "jsonp",
+                timeout: 1000
+            };
+
+            // I want to get off async's wild ride
+            urlRetrieve(https, options2, function (status, data) {
+                if(status !== 200) {
+                    callback(true, null);
+                    return;
+                }
+
+                try {
+                    data = JSON.parse(data);
+                    // Duration is in ms, but I want s
+                    var seconds = data.duration / 1000;
+                    var title = data.title;
+                    var media = new Media(id, title, seconds, "sc");
+                    callback(false, media);
+                } catch(e) {
+                    callback(true, null);
+                }
+            });
+                
+        });
+    },
+    
+    /* livestream.com */
+    li: function (id, callback) {
+        var title = "Livestream.com - " + id;
+        var media = new Media(id, title, "--:--", "li");
+        callback(false, media);
+    },
+
+    /* twitch.tv */
+    tw: function (id, callback) {
+        var title = "Twitch.tv - " + id;
+        var media = new Media(id, title, "--:--", "tw");
+        callback(false, media);
+    },
+
+    /* justin.tv */
+    jt: function (id, callback) {
+        var title = "Justin.tv - " + id;
+        var media = new Media(id, title, "--:--", "jt");
+        callback(false, media);
+    },
+
+    /* ustream.tv */
+    us: function (id, callback) {
+        var options = {
+            host: "www.ustream.tv",
+            port: 80,
+            path: "/" + id,
+            method: "GET",
+            timeout: 1000
+        };
+
+        urlRetrieve(http, options, function (status, data) {
+            if(status !== 200) {
+                callback(true, null);
+                return;
+            }
+
+            // Regexing the ID out of the HTML because
+            // Ustream's API is so horribly documented
+            // I literally could not figure out how to retrieve
+            // this information.
+            //
+            // [](/eatadick)
+            var m = data.match(/cid":([0-9]+)/);
+            if(m) {
+                var title = "Ustream.tv - " + id;
+                var media = new Media(m[1], title, "--:--", "us");
+                callback(false, media);
+            }
+            
+            callback(true, null);
+        });
+    },
+
+    /* JWPlayer */
+    jw: function (id, callback) {
+        var title = "JWPlayer - " + id;
+        var media = new Media(id, title, "--:--", "jw");
+        callback(false, media);
+    }
+}
+
+exports.Getters = Getters;
+
 // Look up YouTube metadata
 // Fairly straightforward
 exports.getYTInfo = function(id, callback) {