diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b9889ba..9b2675e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -14,6 +14,6 @@ lint: stage: test script: deno lint -# test: -# stage: test -# script: deno task test \ No newline at end of file +test: + stage: test + script: deno task test \ No newline at end of file diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/data/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/deno.json b/deno.json index 8b2a01d..f0c10e6 100644 --- a/deno.json +++ b/deno.json @@ -2,8 +2,8 @@ "$schema": "https://deno.land/x/deno@v1.32.3/cli/schemas/config-file.v1.json", "lock": false, "tasks": { - "dev": "deno run --allow-read --allow-env --allow-net --allow-ffi --unstable --watch src/server.ts", - "test": "deno test" + "dev": "deno run --allow-read --allow-write --allow-env --allow-net --allow-ffi --unstable --watch src/server.ts", + "test": "deno test -A --unstable src" }, "imports": { "@/": "./src/" diff --git a/src/app.ts b/src/app.ts index 8a90aba..af95656 100644 --- a/src/app.ts +++ b/src/app.ts @@ -1,5 +1,6 @@ import { type Context, cors, type Handler, Hono, type HonoEnv, logger, type MiddlewareHandler } from '@/deps.ts'; import { type Event } from '@/event.ts'; +import '@/loopback.ts'; import { accountController, @@ -26,6 +27,7 @@ import { statusController, } from './controllers/api/statuses.ts'; import { streamingController } from './controllers/api/streaming.ts'; +import { trendingTagsController } from './controllers/api/trends.ts'; import { indexController } from './controllers/site.ts'; import { hostMetaController } from './controllers/well-known/host-meta.ts'; import { nodeInfoController, nodeInfoSchemaController } from './controllers/well-known/nodeinfo.ts'; @@ -100,6 +102,9 @@ app.get('/api/v2/search', searchController); app.get('/api/pleroma/frontend_configurations', frontendConfigController); +app.get('/api/v1/trends/tags', trendingTagsController); +app.get('/api/v1/trends', trendingTagsController); + // Not (yet) implemented. app.get('/api/v1/notifications', emptyArrayController); app.get('/api/v1/bookmarks', emptyArrayController); diff --git a/src/client.ts b/src/client.ts index e922060..378dc9c 100644 --- a/src/client.ts +++ b/src/client.ts @@ -172,7 +172,7 @@ function getDescendants(eventId: string): Promise[]> { /** Publish an event to the Nostr relay. */ function publish(event: SignedEvent, relays = Conf.publishRelays): void { - console.log('Publishing event', event); + console.log('Publishing event', event, relays); try { getPool().publish(event, relays); } catch (e) { diff --git a/src/config.ts b/src/config.ts index aa55548..69d0057 100644 --- a/src/config.ts +++ b/src/config.ts @@ -29,7 +29,11 @@ const Conf = { ); }, get relay() { - return Deno.env.get('DITTO_RELAY'); + const value = Deno.env.get('DITTO_RELAY'); + if (!value) { + throw new Error('Missing DITTO_RELAY'); + } + return value; }, get localDomain() { return Deno.env.get('LOCAL_DOMAIN') || 'http://localhost:8000'; diff --git a/src/controllers/api/trends.ts b/src/controllers/api/trends.ts new file mode 100644 index 0000000..7fc98e3 --- /dev/null +++ b/src/controllers/api/trends.ts @@ -0,0 +1,52 @@ +import { type AppController } from '@/app.ts'; +import { Conf } from '@/config.ts'; +import { z } from '@/deps.ts'; +import { trends } from '@/trends.ts'; +import { Time } from '@/utils.ts'; +import { stripTime } from '@/utils/time.ts'; + +const limitSchema = z.coerce.number().catch(10).transform((value) => Math.min(Math.max(value, 0), 20)); + +const trendingTagsController: AppController = (c) => { + const limit = limitSchema.parse(c.req.query('limit')); + if (limit < 1) return c.json([]); + + const now = new Date(); + const yesterday = new Date(now.getTime() - Time.days(1)); + const lastWeek = new Date(now.getTime() - Time.days(7)); + + /** Most used hashtags within the past 24h. */ + const tags = trends.getTrendingTags({ + since: yesterday, + until: now, + limit, + }); + + return c.json(tags.map(({ name, uses, accounts }) => ({ + name, + url: Conf.local(`/tags/${name}`), + history: [ + // Use the full 24h query for the current day. Then use `offset: 1` to adjust for this below. + // This result is more accurate than what Mastodon returns. + { + day: String(Math.floor(stripTime(now).getTime() / 1000)), + accounts: String(accounts), + uses: String(uses), + }, + ...trends.getTagHistory({ + tag: name, + since: lastWeek, + until: now, + limit: 6, + offset: 1, + }).map((history) => ({ + // For some reason, Mastodon wants these to be strings... oh well. + day: String(Math.floor(history.day.getTime() / 1000)), + accounts: String(history.accounts), + uses: String(history.uses), + })), + ], + }))); +}; + +export { trendingTagsController }; diff --git a/src/db.ts b/src/db.ts index 5f13af7..8a22426 100644 --- a/src/db.ts +++ b/src/db.ts @@ -1,9 +1,10 @@ import { createPentagon, z } from '@/deps.ts'; +import { hexIdSchema } from '@/schema.ts'; const kv = await Deno.openKv(); const userSchema = z.object({ - pubkey: z.string().regex(/^[0-9a-f]{64}$/).describe('primary'), + pubkey: hexIdSchema.describe('primary'), username: z.string().regex(/^\w{1,30}$/).describe('unique'), createdAt: z.date(), }); diff --git a/src/deps-test.ts b/src/deps-test.ts new file mode 100644 index 0000000..e57b4ad --- /dev/null +++ b/src/deps-test.ts @@ -0,0 +1 @@ +export { assert, assertEquals, assertThrows } from 'https://deno.land/std@0.177.0/testing/asserts.ts'; diff --git a/src/deps.ts b/src/deps.ts index 9e8e9b7..d3091e1 100644 --- a/src/deps.ts +++ b/src/deps.ts @@ -49,3 +49,5 @@ export { export { generateSeededRsa } from 'https://gitlab.com/soapbox-pub/seeded-rsa/-/raw/v1.0.0/mod.ts'; export * as secp from 'npm:@noble/secp256k1@^1.7.1'; export { LRUCache } from 'npm:lru-cache@^10.0.0'; +export { DB as Sqlite } from 'https://deno.land/x/sqlite@v3.7.0/mod.ts'; +export { serve } from 'https://deno.land/std@0.177.0/http/server.ts'; diff --git a/src/loopback.ts b/src/loopback.ts new file mode 100644 index 0000000..e9f13fc --- /dev/null +++ b/src/loopback.ts @@ -0,0 +1,44 @@ +import { Conf } from '@/config.ts'; +import { RelayPool } from '@/deps.ts'; +import { trends } from '@/trends.ts'; +import { nostrDate, nostrNow } from '@/utils.ts'; + +import type { Event } from '@/event.ts'; + +const relay = new RelayPool([Conf.relay]); + +// This file watches all events on your Ditto relay and triggers +// side-effects based on them. This can be used for things like +// notifications, trending hashtag tracking, etc. +relay.subscribe( + [{ kinds: [1], since: nostrNow() }], + [Conf.relay], + handleEvent, + undefined, + undefined, +); + +/** Handle events through the loopback pipeline. */ +function handleEvent(event: Event): void { + console.info('loopback event:', event.id); + trackHashtags(event); +} + +/** Track whenever a hashtag is used, for processing trending tags. */ +function trackHashtags(event: Event): void { + const date = nostrDate(event.created_at); + + const tags = event.tags + .filter((tag) => tag[0] === 't') + .map((tag) => tag[1]) + .slice(0, 5); + + if (!tags.length) return; + + try { + console.info('tracking tags:', tags); + trends.addTagUsages(event.pubkey, tags, date); + } catch (_e) { + // do nothing + } +} diff --git a/src/schema.ts b/src/schema.ts index 8027023..635dff8 100644 --- a/src/schema.ts +++ b/src/schema.ts @@ -67,15 +67,15 @@ const relaySchema = z.custom((relay) => { } }); -const nostrIdSchema = z.string().regex(/^[0-9a-f]{64}$/); +const hexIdSchema = z.string().regex(/^[0-9a-f]{64}$/); const eventSchema = z.object({ - id: nostrIdSchema, + id: hexIdSchema, kind: z.number(), tags: z.array(z.array(z.string())), content: z.string(), created_at: z.number(), - pubkey: nostrIdSchema, + pubkey: hexIdSchema, sig: z.string(), }); @@ -95,10 +95,14 @@ const decode64Schema = z.string().transform((value, ctx) => { } }); +const hashtagSchema = z.string().regex(/^\w{1,30}$/); + export { decode64Schema, emojiTagSchema, filteredArray, + hashtagSchema, + hexIdSchema, jsonSchema, type MetaContent, metaContentSchema, diff --git a/src/server.ts b/src/server.ts index a7bd083..c9d3f4f 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,5 +1,5 @@ import 'https://deno.land/std@0.177.0/dotenv/load.ts'; -import { serve } from 'https://deno.land/std@0.177.0/http/server.ts'; +import { serve } from '@/deps.ts'; import app from './app.ts'; diff --git a/src/trends.test.ts b/src/trends.test.ts new file mode 100644 index 0000000..dbc6d7c --- /dev/null +++ b/src/trends.test.ts @@ -0,0 +1,32 @@ +import { assertEquals } from '@/deps-test.ts'; +import { Sqlite } from '@/deps.ts'; + +import { TrendsDB } from './trends.ts'; + +const db = new Sqlite(':memory:'); +const trends = new TrendsDB(db); + +const p8 = (pubkey8: string) => `${pubkey8}00000000000000000000000000000000000000000000000000000000`; + +Deno.test('getTrendingTags', () => { + trends.addTagUsages(p8('00000000'), ['ditto', 'hello', 'yolo']); + trends.addTagUsages(p8('00000000'), ['hello']); + trends.addTagUsages(p8('00000001'), ['Ditto', 'hello']); + trends.addTagUsages(p8('00000010'), ['DITTO']); + + const result = trends.getTrendingTags({ + since: new Date('1999-01-01T00:00:00'), + until: new Date('2999-01-01T00:00:00'), + threshold: 1, + }); + + const expected = [ + { name: 'ditto', accounts: 3, uses: 3 }, + { name: 'hello', accounts: 2, uses: 3 }, + { name: 'yolo', accounts: 1, uses: 1 }, + ]; + + assertEquals(result, expected); + + trends.cleanupTagUsages(new Date('2999-01-01T00:00:00')); +}); diff --git a/src/trends.ts b/src/trends.ts new file mode 100644 index 0000000..3bb19d4 --- /dev/null +++ b/src/trends.ts @@ -0,0 +1,124 @@ +import { Sqlite } from '@/deps.ts'; +import { hashtagSchema, hexIdSchema } from '@/schema.ts'; +import { Time } from '@/utils.ts'; +import { generateDateRange } from '@/utils/time.ts'; + +interface GetTrendingTagsOpts { + since: Date; + until: Date; + limit?: number; + threshold?: number; +} + +interface GetTagHistoryOpts { + tag: string; + since: Date; + until: Date; + limit?: number; + offset?: number; +} + +class TrendsDB { + #db: Sqlite; + + constructor(db: Sqlite) { + this.#db = db; + + this.#db.execute(` + CREATE TABLE IF NOT EXISTS tag_usages ( + tag TEXT NOT NULL COLLATE NOCASE, + pubkey8 TEXT NOT NULL, + inserted_at DATETIME NOT NULL + ); + + CREATE INDEX IF NOT EXISTS idx_time_tag ON tag_usages(inserted_at, tag); + `); + + const cleanup = () => { + console.info('Cleaning up old tag usages...'); + const lastWeek = new Date(new Date().getTime() - Time.days(7)); + this.cleanupTagUsages(lastWeek); + }; + + setInterval(cleanup, Time.hours(1)); + cleanup(); + } + + /** Gets the most used hashtags between the date range. */ + getTrendingTags({ since, until, limit = 10, threshold = 3 }: GetTrendingTagsOpts) { + return this.#db.query( + ` + SELECT tag, COUNT(DISTINCT pubkey8), COUNT(*) + FROM tag_usages + WHERE inserted_at >= ? AND inserted_at < ? + GROUP BY tag + HAVING COUNT(DISTINCT pubkey8) >= ? + ORDER BY COUNT(DISTINCT pubkey8) + DESC LIMIT ?; + `, + [since, until, threshold, limit], + ).map((row) => ({ + name: row[0], + accounts: Number(row[1]), + uses: Number(row[2]), + })); + } + + /** + * Gets the tag usage count for a specific tag. + * It returns an array with counts for each date between the range. + */ + getTagHistory({ tag, since, until, limit = 7, offset = 0 }: GetTagHistoryOpts) { + const result = this.#db.query( + ` + SELECT date(inserted_at), COUNT(DISTINCT pubkey8), COUNT(*) + FROM tag_usages + WHERE tag = ? AND inserted_at >= ? AND inserted_at < ? + GROUP BY date(inserted_at) + ORDER BY date(inserted_at) DESC + LIMIT ? + OFFSET ?; + `, + [tag, since, until, limit, offset], + ).map((row) => ({ + day: new Date(row[0]), + accounts: Number(row[1]), + uses: Number(row[2]), + })); + + /** Full date range between `since` and `until`. */ + const dateRange = generateDateRange( + new Date(since.getTime() + Time.days(1)), + new Date(until.getTime() - Time.days(offset)), + ).reverse(); + + // Fill in missing dates with 0 usages. + return dateRange.map((day) => { + const data = result.find((item) => item.day.getTime() === day.getTime()); + return data || { day, accounts: 0, uses: 0 }; + }); + } + + addTagUsages(pubkey: string, hashtags: string[], date = new Date()): void { + const pubkey8 = hexIdSchema.parse(pubkey).substring(0, 8); + const tags = hashtagSchema.array().min(1).parse(hashtags); + + this.#db.query( + 'INSERT INTO tag_usages (tag, pubkey8, inserted_at) VALUES ' + tags.map(() => '(?, ?, ?)').join(', '), + tags.map((tag) => [tag, pubkey8, date]).flat(), + ); + } + + cleanupTagUsages(until: Date): void { + this.#db.query( + 'DELETE FROM tag_usages WHERE inserted_at < ?', + [until], + ); + } +} + +const trends = new TrendsDB( + new Sqlite('data/trends.sqlite3'), +); + +export { trends, TrendsDB }; diff --git a/src/utils.ts b/src/utils.ts index 7a91fc2..a81c0fd 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -81,7 +81,7 @@ async function parseBody(req: Request): Promise { const paginationSchema = z.object({ since: z.coerce.number().optional().catch(undefined), until: z.lazy(() => z.coerce.number().catch(nostrNow())), - limit: z.coerce.number().min(0).max(40).catch(20), + limit: z.coerce.number().catch(20).transform((value) => Math.min(Math.max(value, 0), 40)), }); type PaginationParams = z.infer; diff --git a/src/utils/time.test.ts b/src/utils/time.test.ts new file mode 100644 index 0000000..c167caf --- /dev/null +++ b/src/utils/time.test.ts @@ -0,0 +1,23 @@ +import { assertEquals } from '@/deps-test.ts'; + +import { generateDateRange } from './time.ts'; + +Deno.test('generateDateRange', () => { + const since = new Date('2023-07-03T16:30:00.000Z'); + const until = new Date('2023-07-07T09:01:00.000Z'); + + const expected = [ + new Date('2023-07-03T00:00:00.000Z'), + new Date('2023-07-04T00:00:00.000Z'), + new Date('2023-07-05T00:00:00.000Z'), + new Date('2023-07-06T00:00:00.000Z'), + new Date('2023-07-07T00:00:00.000Z'), + ]; + + const result = generateDateRange(since, until); + + assertEquals( + result.map((d) => d.getTime()), + expected.map((d) => d.getTime()), + ); +}); diff --git a/src/utils/time.ts b/src/utils/time.ts index 1c2444e..2266eb4 100644 --- a/src/utils/time.ts +++ b/src/utils/time.ts @@ -9,4 +9,24 @@ const Time = { years: (y: number) => y * Time.days(365), }; -export { Time }; +/** Strips the time off the date, giving 12am UTC. */ +function stripTime(date: Date): Date { + return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate())); +} + +/** Strips times off the dates and generates all 24h intervals between them, inclusive of both inputs. */ +function generateDateRange(since: Date, until: Date): Date[] { + const dates = []; + + const sinceDate = stripTime(since); + const untilDate = stripTime(until); + + while (sinceDate <= untilDate) { + dates.push(new Date(sinceDate)); + sinceDate.setUTCDate(sinceDate.getUTCDate() + 1); + } + + return dates; +} + +export { generateDateRange, stripTime, Time };