diff --git a/src/deps.ts b/src/deps.ts index 52cd691..75fe3d8 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -29,3 +29,4 @@ export { default as linkifyStr } from 'npm:linkify-string@^4.1.0'; import 'npm:linkify-plugin-hashtag@^4.1.0'; // @deno-types="npm:@types/mime@3.0.0" export { default as mime } from 'npm:mime@^3.0.0'; +export { unfurl } from 'npm:unfurl.js@^6.3.1'; diff --git a/src/note.ts b/src/note.ts index eee20cb..a778e0d 100644 --- a/src/note.ts +++ b/src/note.ts @@ -6,16 +6,6 @@ linkify.registerCustomProtocol('wss'); const url = (path: string) => new URL(path, LOCAL_DOMAIN).toString(); -/** Get pubkey from decoded bech32 entity, or undefined if not applicable. */ -function getDecodedPubkey(decoded: nip19.DecodeResult): string | undefined { - switch (decoded.type) { - case 'npub': - return decoded.data; - case 'nprofile': - return decoded.data.pubkey; - } -} - const linkifyOpts: linkify.Opts = { render: { hashtag: ({ content }) => { @@ -46,16 +36,8 @@ type Link = ReturnType[0]; interface ParsedNoteContent { html: string; links: Link[]; -} - -/** Ensures the URL can be parsed. Why linkifyjs doesn't already guarantee this, idk... */ -function isValidLink(link: Link): boolean { - try { - new URL(link.href); - return true; - } catch (_e) { - return false; - } + /** First non-media URL - eligible for a preview card. */ + firstUrl: string | undefined; } /** Convert Nostr content to Mastodon API HTML. Also return parsed data. */ @@ -63,10 +45,12 @@ function parseNoteContent(content: string): ParsedNoteContent { // Parsing twice is ineffecient, but I don't know how to do only once. const html = linkifyStr(content, linkifyOpts); const links = linkify.find(content).filter(isValidLink); + const firstUrl = links.find(isNonMediaLink)?.href; return { html, links, + firstUrl, }; } @@ -77,9 +61,7 @@ interface MediaLink { function getMediaLinks(links: Link[]): MediaLink[] { return links.reduce((acc, link) => { - const { pathname } = new URL(link.href); - const mimeType = mime.getType(pathname); - + const mimeType = getUrlMimeType(link.href); if (!mimeType) return acc; const [baseType, _subType] = mimeType.split('/'); @@ -95,4 +77,39 @@ function getMediaLinks(links: Link[]): MediaLink[] { }, []); } +function isNonMediaLink({ href }: Link): boolean { + return /^https?:\/\//.test(href) && !getUrlMimeType(href); +} + +/** Ensures the URL can be parsed. Why linkifyjs doesn't already guarantee this, idk... */ +function isValidLink(link: Link): boolean { + try { + new URL(link.href); + return true; + } catch (_e) { + console.error(`Invalid link: ${link.href}`); + return false; + } +} + +/** `npm:mime` treats `.com` as a file extension, so parse the full URL to get its path first. */ +function getUrlMimeType(url: string): string | undefined { + try { + const { pathname } = new URL(url); + return mime.getType(pathname) || undefined; + } catch (_e) { + return undefined; + } +} + +/** Get pubkey from decoded bech32 entity, or undefined if not applicable. */ +function getDecodedPubkey(decoded: nip19.DecodeResult): string | undefined { + switch (decoded.type) { + case 'npub': + return decoded.data; + case 'nprofile': + return decoded.data.pubkey; + } +} + export { getMediaLinks, type MediaLink, parseNoteContent }; diff --git a/src/transmute.ts b/src/transmute.ts index 70ecb20..e67ea9c 100644 --- a/src/transmute.ts +++ b/src/transmute.ts @@ -1,4 +1,4 @@ -import { findReplyTag, lodash, nip19, z } from '@/deps.ts'; +import { findReplyTag, lodash, nip19, unfurl, z } from '@/deps.ts'; import { type Event } from '@/event.ts'; import { type MetaContent, parseMetaContent } from '@/schema.ts'; @@ -101,12 +101,13 @@ async function toStatus(event: Event<1>) { ), ]; - const { html, links } = parseNoteContent(event.content); + const { html, links, firstUrl } = parseNoteContent(event.content); const mediaLinks = getMediaLinks(links); return { id: event.id, account, + card: firstUrl ? await unfurlCard(firstUrl) : null, content: html, created_at: new Date(event.created_at * 1000).toISOString(), in_reply_to_id: replyTag ? replyTag[1] : null, @@ -128,7 +129,6 @@ async function toStatus(event: Event<1>) { mentions: await Promise.all(mentionedPubkeys.map(toMention)), tags: [], emojis: [], - card: null, poll: null, uri: `${LOCAL_DOMAIN}/posts/${event.id}`, url: `${LOCAL_DOMAIN}/posts/${event.id}`, @@ -153,4 +153,47 @@ function renderAttachment({ url, mimeType }: MediaLink) { }; } +interface PreviewCard { + url: string; + title: string; + description: string; + type: 'link' | 'photo' | 'video' | 'rich'; + author_name: string; + author_url: string; + provider_name: string; + provider_url: string; + html: string; + width: number; + height: number; + image: string | null; + embed_url: string; + blurhash: string | null; +} + +async function unfurlCard(url: string): Promise { + console.log(`Unfurling ${url}...`); + try { + const result = await unfurl(url, { fetch }); + return { + type: result.oEmbed?.type || 'link', + url: result.canonical_url || url, + title: result.oEmbed?.title || result.title || '', + description: result.open_graph.description || result.description || '', + author_name: result.oEmbed?.author_name || '', + author_url: result.oEmbed?.author_url || '', + provider_name: result.oEmbed?.provider_name || '', + provider_url: result.oEmbed?.provider_url || '', + // @ts-expect-error `html` does in fact exist on oEmbed. + html: result.oEmbed?.html || '', + width: result.oEmbed?.width || 0, + height: result.oEmbed?.height || 0, + image: result.oEmbed?.thumbnails?.[0].url || null, + embed_url: '', + blurhash: null, + }; + } catch (_e) { + return null; + } +} + export { toAccount, toStatus };