Merge branch 'fts' into 'develop'

Add full-text search with SQLite FTS5

Closes #61 and #60

See merge request soapbox-pub/ditto!30
This commit is contained in:
Alex Gleason 2023-08-30 22:32:44 +00:00
commit 305c5035a2
8 changed files with 193 additions and 18 deletions

View File

@ -1,23 +1,143 @@
import { AppController } from '@/app.ts';
import { lookupAccount } from '@/utils.ts';
import { toAccount } from '@/transformers/nostr-to-mastoapi.ts';
import * as eventsDB from '@/db/events.ts';
import { type Event, type Filter, nip19, z } from '@/deps.ts';
import * as mixer from '@/mixer.ts';
import { lookupNip05Cached } from '@/nip05.ts';
import { booleanParamSchema } from '@/schema.ts';
import { nostrIdSchema } from '@/schemas/nostr.ts';
import { toAccount, toStatus } from '@/transformers/nostr-to-mastoapi.ts';
import { dedupeEvents, Time } from '@/utils.ts';
/** Matches NIP-05 names with or without an @ in front. */
const ACCT_REGEX = /^@?(?:([\w.+-]+)@)?([\w.-]+)$/;
const searchQuerySchema = z.object({
q: z.string().transform(decodeURIComponent),
type: z.enum(['accounts', 'statuses', 'hashtags']).optional(),
resolve: booleanParamSchema.optional().transform(Boolean),
following: z.boolean().default(false),
account_id: nostrIdSchema.optional(),
limit: z.coerce.number().catch(20).transform((value) => Math.min(Math.max(value, 0), 40)),
});
type SearchQuery = z.infer<typeof searchQuerySchema>;
const searchController: AppController = async (c) => {
const q = c.req.query('q');
const result = searchQuerySchema.safeParse(c.req.query());
if (!q) {
return c.json({ error: 'Missing `q` query parameter.' }, 422);
if (!result.success) {
return c.json({ error: 'Bad request', schema: result.error }, 422);
}
// For now, only support looking up accounts.
// TODO: Support searching statuses and hashtags.
const event = await lookupAccount(decodeURIComponent(q));
const [event, events] = await Promise.all([
lookupEvent(result.data),
searchEvents(result.data),
]);
if (event) {
events.push(event);
}
const results = dedupeEvents(events);
const [accounts, statuses] = await Promise.all([
Promise.all(
results
.filter((event): event is Event<0> => event.kind === 0)
.map((event) => toAccount(event)),
),
Promise.all(
results
.filter((event): event is Event<1> => event.kind === 1)
.map((event) => toStatus(event, c.get('pubkey'))),
),
]);
return c.json({
accounts: event ? [await toAccount(event)] : [],
statuses: [],
accounts: accounts.filter(Boolean),
statuses: statuses.filter(Boolean),
hashtags: [],
});
};
/** Get events for the search params. */
function searchEvents({ q, type, limit, account_id }: SearchQuery): Promise<Event[]> {
if (type === 'hashtags') return Promise.resolve([]);
const filter: Filter = {
kinds: typeToKinds(type),
search: q,
limit,
};
if (account_id) {
filter.authors = [account_id];
}
return eventsDB.getFilters([filter]);
}
/** Get event kinds to search from `type` query param. */
function typeToKinds(type: SearchQuery['type']): number[] {
switch (type) {
case 'accounts':
return [0];
case 'statuses':
return [1];
default:
return [0, 1];
}
}
/** Resolve a searched value into an event, if applicable. */
async function lookupEvent(query: SearchQuery): Promise<Event | undefined> {
const filters = await getLookupFilters(query);
const [event] = await mixer.getFilters(filters, { limit: 1, timeout: Time.seconds(1) });
return event;
}
/** Get filters to lookup the input value. */
async function getLookupFilters({ q, type, resolve }: SearchQuery): Promise<Filter[]> {
const filters: Filter[] = [];
const accounts = !type || type === 'accounts';
const statuses = !type || type === 'statuses';
if (!resolve || type === 'hashtags') {
return filters;
}
if (new RegExp(`^${nip19.BECH32_REGEX.source}$`).test(q)) {
try {
const result = nip19.decode(q);
switch (result.type) {
case 'npub':
if (accounts) filters.push({ kinds: [0], authors: [result.data] });
break;
case 'nprofile':
if (accounts) filters.push({ kinds: [0], authors: [result.data.pubkey] });
break;
case 'note':
if (statuses) filters.push({ kinds: [1], ids: [result.data] });
break;
case 'nevent':
if (statuses) filters.push({ kinds: [1], ids: [result.data.id] });
break;
}
} catch (_e) {
// do nothing
}
} else if (/^[0-9a-f]{64}$/.test(q)) {
if (accounts) filters.push({ kinds: [0], authors: [q] });
if (statuses) filters.push({ kinds: [1], ids: [q] });
} else if (accounts && ACCT_REGEX.test(q)) {
const pubkey = await lookupNip05Cached(q);
if (pubkey) {
filters.push({ kinds: [0], authors: [pubkey] });
}
}
return filters;
}
export { searchController };

View File

@ -6,6 +6,7 @@ import { Conf } from '@/config.ts';
interface DittoDB {
events: EventRow;
events_fts: EventFTSRow;
tags: TagRow;
users: UserRow;
relays: RelayRow;
@ -21,6 +22,11 @@ interface EventRow {
sig: string;
}
interface EventFTSRow {
id: string;
content: string;
}
interface TagRow {
tag: string;
value_1: string | null;

View File

@ -2,6 +2,7 @@ import { db, type TagRow } from '@/db.ts';
import { type Event, type Insertable, SqliteError } from '@/deps.ts';
import type { DittoFilter, GetFiltersOpts } from '@/filter.ts';
import { jsonMetaContentSchema } from '@/schemas/nostr.ts';
type TagCondition = ({ event, count }: { event: Event; count: number }) => boolean;
@ -25,6 +26,13 @@ function insertEvent(event: Event): Promise<void> {
})
.execute();
const searchContent = buildSearchContent(event);
if (searchContent) {
await trx.insertInto('events_fts')
.values({ id: event.id, content: searchContent.substring(0, 1000) })
.execute();
}
const tagCounts: Record<string, number> = {};
const tags = event.tags.reduce<Insertable<TagRow>[]>((results, tag) => {
const tagName = tag[0];
@ -111,6 +119,12 @@ function getFilterQuery(filter: DittoFilter) {
query = query.innerJoin('users', 'users.pubkey', 'events.pubkey');
}
if (filter.search) {
query = query
.innerJoin('events_fts', 'events_fts.id', 'events.id')
.where('events_fts.content', 'match', JSON.stringify(filter.search));
}
return query;
}
@ -131,6 +145,7 @@ async function getFilters<K extends number>(
));
}
/** Get number of events that would be returned by filters. */
async function countFilters<K extends number>(filters: DittoFilter<K>[]): Promise<number> {
if (!filters.length) return Promise.resolve(0);
const query = filters.map(getFilterQuery).reduce((acc, curr) => acc.union(curr));
@ -143,4 +158,22 @@ async function countFilters<K extends number>(filters: DittoFilter<K>[]): Promis
return Number(count);
}
/** Build a search index from the event. */
function buildSearchContent(event: Event): string {
switch (event.kind) {
case 0:
return buildUserSearchContent(event as Event<0>);
case 1:
return event.content;
default:
return '';
}
}
/** Build search content for a user. */
function buildUserSearchContent(event: Event<0>): string {
const { name, nip05, about } = jsonMetaContentSchema.parse(event.content);
return [name, nip05, about].filter(Boolean).join('\n');
}
export { countFilters, getFilters, insertEvent };

View File

@ -0,0 +1,9 @@
import { Kysely, sql } from '@/deps.ts';
export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE VIRTUAL TABLE events_fts USING fts5(id, content)`.execute(db);
}
export async function down(db: Kysely<any>): Promise<void> {
await db.schema.dropTable('events_fts').execute();
}

View File

@ -50,7 +50,10 @@ export {
export { generateSeededRsa } from 'https://gitlab.com/soapbox-pub/seeded-rsa/-/raw/v1.0.0/mod.ts';
export * as secp from 'npm:@noble/secp256k1@^2.0.0';
export { LRUCache } from 'npm:lru-cache@^10.0.0';
export { DB as Sqlite, SqliteError } from 'https://deno.land/x/sqlite@v3.7.3/mod.ts';
export {
DB as Sqlite,
SqliteError,
} from 'https://raw.githubusercontent.com/alexgleason/deno-sqlite/325f66d8c395e7f6f5ee78ebfa42a0eeea4a942b/mod.ts';
export * as dotenv from 'https://deno.land/std@0.198.0/dotenv/mod.ts';
export {
FileMigrationProvider,
@ -60,7 +63,7 @@ export {
type NullableInsertKeys,
sql,
} from 'npm:kysely@^0.25.0';
export { DenoSqliteDialect } from 'https://gitlab.com/soapbox-pub/kysely-deno-sqlite/-/raw/v1.0.0/mod.ts';
export { DenoSqliteDialect } from 'https://gitlab.com/soapbox-pub/kysely-deno-sqlite/-/raw/v1.0.1/mod.ts';
export { default as tldts } from 'npm:tldts@^6.0.14';
export type * as TypeFest from 'npm:type-fest@^4.3.0';

View File

@ -2,7 +2,7 @@ import { type Event, matchFilters } from '@/deps.ts';
import * as client from '@/client.ts';
import * as eventsDB from '@/db/events.ts';
import { eventDateComparator } from '@/utils.ts';
import { dedupeEvents, eventDateComparator } from '@/utils.ts';
import type { DittoFilter, GetFiltersOpts } from '@/filter.ts';
@ -11,6 +11,8 @@ async function getFilters<K extends number>(
filters: DittoFilter<K>[],
opts?: GetFiltersOpts,
): Promise<Event<K>[]> {
if (!filters.length) return Promise.resolve([]);
const results = await Promise.allSettled([
client.getFilters(filters.filter((filter) => !filter.local), opts),
eventsDB.getFilters(filters, opts),
@ -33,11 +35,6 @@ function unmixEvents<K extends number>(events: Event<K>[], filters: DittoFilter<
return events;
}
/** Deduplicate events by ID. */
function dedupeEvents<K extends number>(events: Event<K>[]): Event<K>[] {
return [...new Map(events.map((event) => [event.id, event])).values()];
}
/** Take the newest events among replaceable ones. */
function takeNewestEvents<K extends number>(events: Event<K>[]): Event<K>[] {
const isReplaceable = (kind: number) =>

View File

@ -29,6 +29,7 @@ const filterSchema = z.object({
since: z.number().int().nonnegative().optional(),
until: z.number().int().nonnegative().optional(),
limit: z.number().int().nonnegative().optional(),
search: z.string().optional(),
}).passthrough().and(
z.record(
z.custom<`#${string}`>((val) => typeof val === 'string' && val.startsWith('#')),

View File

@ -101,8 +101,14 @@ function isFollowing(source: Event<3>, targetPubkey: string): boolean {
);
}
/** Deduplicate events by ID. */
function dedupeEvents<K extends number>(events: Event<K>[]): Event<K>[] {
return [...new Map(events.map((event) => [event.id, event])).values()];
}
export {
bech32ToPubkey,
dedupeEvents,
eventAge,
eventDateComparator,
findTag,