scrappy-youtube/main.ts

64 lines
2.3 KiB
TypeScript
Raw Normal View History

2023-07-27 01:39:16 +00:00
import { DB } from "https://deno.land/x/sqlite/mod.ts";
import { parse } from "https://deno.land/x/xml/mod.ts"
import { search } from "https://deno.land/x/jmespath/index.ts";
const YT_FEED_PREFIX = "https://www.youtube.com/feeds/videos.xml?channel_id=";
const YT_VID_PREFIX = "https://www.youtube.com/watch?v=";
const db = new DB("scrappy.db");
const command = Deno.args[0];
if (command === "create") {
db.execute(`create table channel (id INTEGER PRIMARY KEY AUTOINCREMENT, channel_id text not null unique, channel_name text not null, last_seen_at number not null default 0);`);
}
else if (command === "new" && Deno.args.length == 3) {
const channelId = Deno.args[1];
const name = Deno.args[2];
console.debug("args:", channelId, name);
db.query("insert into channel (channel_id, channel_name) values (?, ?)", [channelId, name]);
console.log("Saved.");
db.close();
}
else if (command === "get" && Deno.args.length == 2) {
const DEST = Deno.args[1];
const rows = db.query("select * from channel order by id");
for (const [id, channelId, name, lastSeenAt]: [number, string, string, number] of rows) {
await Deno.mkdir(`${DEST}/${name}`, { recursive: true });
const res = await fetch(YT_FEED_PREFIX + channelId);
const raw = await res.text();
const doc = parse(raw);
const out = search(doc, `feed.entry[].["yt:videoId", published, title]`);
let newLastSeenAt = lastSeenAt;
for (const [videoId, published, title]: [string, string, string] of out.slice(0, 4).reverse()) {
const publishedSeconds = Math.floor(new Date(published).getTime() / 1000);
if (publishedSeconds > lastSeenAt) {
newLastSeenAt = Math.max(publishedSeconds, newLastSeenAt);;
const p = Deno.run({ cmd: [
"yt-dlp",
"-q",
"-f22",
`${YT_VID_PREFIX}${videoId}`,
"-o",
`${DEST}/${name}/%(title)s.%(ext)s`
] });
const status = await p.status();
if (status.code) console.error("Exited with status:", status.code);
}
}
if (newLastSeenAt != lastSeenAt) db.query("update channel set last_seen_at = ? where id = ?", [newLastSeenAt, id]);
}
}
else {
console.error("Unrecognized command.", Deno.args);
Deno.exit(1);
}