Merge branch 'fts' into 'develop'
Add full-text search with SQLite FTS5 Closes #61 and #60 See merge request soapbox-pub/ditto!30
This commit is contained in:
commit
305c5035a2
|
@ -1,23 +1,143 @@
|
|||
import { AppController } from '@/app.ts';
|
||||
import { lookupAccount } from '@/utils.ts';
|
||||
import { toAccount } from '@/transformers/nostr-to-mastoapi.ts';
|
||||
import * as eventsDB from '@/db/events.ts';
|
||||
import { type Event, type Filter, nip19, z } from '@/deps.ts';
|
||||
import * as mixer from '@/mixer.ts';
|
||||
import { lookupNip05Cached } from '@/nip05.ts';
|
||||
import { booleanParamSchema } from '@/schema.ts';
|
||||
import { nostrIdSchema } from '@/schemas/nostr.ts';
|
||||
import { toAccount, toStatus } from '@/transformers/nostr-to-mastoapi.ts';
|
||||
import { dedupeEvents, Time } from '@/utils.ts';
|
||||
|
||||
/** Matches NIP-05 names with or without an @ in front. */
|
||||
const ACCT_REGEX = /^@?(?:([\w.+-]+)@)?([\w.-]+)$/;
|
||||
|
||||
const searchQuerySchema = z.object({
|
||||
q: z.string().transform(decodeURIComponent),
|
||||
type: z.enum(['accounts', 'statuses', 'hashtags']).optional(),
|
||||
resolve: booleanParamSchema.optional().transform(Boolean),
|
||||
following: z.boolean().default(false),
|
||||
account_id: nostrIdSchema.optional(),
|
||||
limit: z.coerce.number().catch(20).transform((value) => Math.min(Math.max(value, 0), 40)),
|
||||
});
|
||||
|
||||
type SearchQuery = z.infer<typeof searchQuerySchema>;
|
||||
|
||||
const searchController: AppController = async (c) => {
|
||||
const q = c.req.query('q');
|
||||
const result = searchQuerySchema.safeParse(c.req.query());
|
||||
|
||||
if (!q) {
|
||||
return c.json({ error: 'Missing `q` query parameter.' }, 422);
|
||||
if (!result.success) {
|
||||
return c.json({ error: 'Bad request', schema: result.error }, 422);
|
||||
}
|
||||
|
||||
// For now, only support looking up accounts.
|
||||
// TODO: Support searching statuses and hashtags.
|
||||
const event = await lookupAccount(decodeURIComponent(q));
|
||||
const [event, events] = await Promise.all([
|
||||
lookupEvent(result.data),
|
||||
searchEvents(result.data),
|
||||
]);
|
||||
|
||||
if (event) {
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
const results = dedupeEvents(events);
|
||||
|
||||
const [accounts, statuses] = await Promise.all([
|
||||
Promise.all(
|
||||
results
|
||||
.filter((event): event is Event<0> => event.kind === 0)
|
||||
.map((event) => toAccount(event)),
|
||||
),
|
||||
Promise.all(
|
||||
results
|
||||
.filter((event): event is Event<1> => event.kind === 1)
|
||||
.map((event) => toStatus(event, c.get('pubkey'))),
|
||||
),
|
||||
]);
|
||||
|
||||
return c.json({
|
||||
accounts: event ? [await toAccount(event)] : [],
|
||||
statuses: [],
|
||||
accounts: accounts.filter(Boolean),
|
||||
statuses: statuses.filter(Boolean),
|
||||
hashtags: [],
|
||||
});
|
||||
};
|
||||
|
||||
/** Get events for the search params. */
|
||||
function searchEvents({ q, type, limit, account_id }: SearchQuery): Promise<Event[]> {
|
||||
if (type === 'hashtags') return Promise.resolve([]);
|
||||
|
||||
const filter: Filter = {
|
||||
kinds: typeToKinds(type),
|
||||
search: q,
|
||||
limit,
|
||||
};
|
||||
|
||||
if (account_id) {
|
||||
filter.authors = [account_id];
|
||||
}
|
||||
|
||||
return eventsDB.getFilters([filter]);
|
||||
}
|
||||
|
||||
/** Get event kinds to search from `type` query param. */
|
||||
function typeToKinds(type: SearchQuery['type']): number[] {
|
||||
switch (type) {
|
||||
case 'accounts':
|
||||
return [0];
|
||||
case 'statuses':
|
||||
return [1];
|
||||
default:
|
||||
return [0, 1];
|
||||
}
|
||||
}
|
||||
|
||||
/** Resolve a searched value into an event, if applicable. */
|
||||
async function lookupEvent(query: SearchQuery): Promise<Event | undefined> {
|
||||
const filters = await getLookupFilters(query);
|
||||
const [event] = await mixer.getFilters(filters, { limit: 1, timeout: Time.seconds(1) });
|
||||
return event;
|
||||
}
|
||||
|
||||
/** Get filters to lookup the input value. */
|
||||
async function getLookupFilters({ q, type, resolve }: SearchQuery): Promise<Filter[]> {
|
||||
const filters: Filter[] = [];
|
||||
|
||||
const accounts = !type || type === 'accounts';
|
||||
const statuses = !type || type === 'statuses';
|
||||
|
||||
if (!resolve || type === 'hashtags') {
|
||||
return filters;
|
||||
}
|
||||
|
||||
if (new RegExp(`^${nip19.BECH32_REGEX.source}$`).test(q)) {
|
||||
try {
|
||||
const result = nip19.decode(q);
|
||||
switch (result.type) {
|
||||
case 'npub':
|
||||
if (accounts) filters.push({ kinds: [0], authors: [result.data] });
|
||||
break;
|
||||
case 'nprofile':
|
||||
if (accounts) filters.push({ kinds: [0], authors: [result.data.pubkey] });
|
||||
break;
|
||||
case 'note':
|
||||
if (statuses) filters.push({ kinds: [1], ids: [result.data] });
|
||||
break;
|
||||
case 'nevent':
|
||||
if (statuses) filters.push({ kinds: [1], ids: [result.data.id] });
|
||||
break;
|
||||
}
|
||||
} catch (_e) {
|
||||
// do nothing
|
||||
}
|
||||
} else if (/^[0-9a-f]{64}$/.test(q)) {
|
||||
if (accounts) filters.push({ kinds: [0], authors: [q] });
|
||||
if (statuses) filters.push({ kinds: [1], ids: [q] });
|
||||
} else if (accounts && ACCT_REGEX.test(q)) {
|
||||
const pubkey = await lookupNip05Cached(q);
|
||||
if (pubkey) {
|
||||
filters.push({ kinds: [0], authors: [pubkey] });
|
||||
}
|
||||
}
|
||||
|
||||
return filters;
|
||||
}
|
||||
|
||||
export { searchController };
|
||||
|
|
|
@ -6,6 +6,7 @@ import { Conf } from '@/config.ts';
|
|||
|
||||
interface DittoDB {
|
||||
events: EventRow;
|
||||
events_fts: EventFTSRow;
|
||||
tags: TagRow;
|
||||
users: UserRow;
|
||||
relays: RelayRow;
|
||||
|
@ -21,6 +22,11 @@ interface EventRow {
|
|||
sig: string;
|
||||
}
|
||||
|
||||
interface EventFTSRow {
|
||||
id: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
interface TagRow {
|
||||
tag: string;
|
||||
value_1: string | null;
|
||||
|
|
|
@ -2,6 +2,7 @@ import { db, type TagRow } from '@/db.ts';
|
|||
import { type Event, type Insertable, SqliteError } from '@/deps.ts';
|
||||
|
||||
import type { DittoFilter, GetFiltersOpts } from '@/filter.ts';
|
||||
import { jsonMetaContentSchema } from '@/schemas/nostr.ts';
|
||||
|
||||
type TagCondition = ({ event, count }: { event: Event; count: number }) => boolean;
|
||||
|
||||
|
@ -25,6 +26,13 @@ function insertEvent(event: Event): Promise<void> {
|
|||
})
|
||||
.execute();
|
||||
|
||||
const searchContent = buildSearchContent(event);
|
||||
if (searchContent) {
|
||||
await trx.insertInto('events_fts')
|
||||
.values({ id: event.id, content: searchContent.substring(0, 1000) })
|
||||
.execute();
|
||||
}
|
||||
|
||||
const tagCounts: Record<string, number> = {};
|
||||
const tags = event.tags.reduce<Insertable<TagRow>[]>((results, tag) => {
|
||||
const tagName = tag[0];
|
||||
|
@ -111,6 +119,12 @@ function getFilterQuery(filter: DittoFilter) {
|
|||
query = query.innerJoin('users', 'users.pubkey', 'events.pubkey');
|
||||
}
|
||||
|
||||
if (filter.search) {
|
||||
query = query
|
||||
.innerJoin('events_fts', 'events_fts.id', 'events.id')
|
||||
.where('events_fts.content', 'match', JSON.stringify(filter.search));
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
|
@ -131,6 +145,7 @@ async function getFilters<K extends number>(
|
|||
));
|
||||
}
|
||||
|
||||
/** Get number of events that would be returned by filters. */
|
||||
async function countFilters<K extends number>(filters: DittoFilter<K>[]): Promise<number> {
|
||||
if (!filters.length) return Promise.resolve(0);
|
||||
const query = filters.map(getFilterQuery).reduce((acc, curr) => acc.union(curr));
|
||||
|
@ -143,4 +158,22 @@ async function countFilters<K extends number>(filters: DittoFilter<K>[]): Promis
|
|||
return Number(count);
|
||||
}
|
||||
|
||||
/** Build a search index from the event. */
|
||||
function buildSearchContent(event: Event): string {
|
||||
switch (event.kind) {
|
||||
case 0:
|
||||
return buildUserSearchContent(event as Event<0>);
|
||||
case 1:
|
||||
return event.content;
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/** Build search content for a user. */
|
||||
function buildUserSearchContent(event: Event<0>): string {
|
||||
const { name, nip05, about } = jsonMetaContentSchema.parse(event.content);
|
||||
return [name, nip05, about].filter(Boolean).join('\n');
|
||||
}
|
||||
|
||||
export { countFilters, getFilters, insertEvent };
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
import { Kysely, sql } from '@/deps.ts';
|
||||
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
await sql`CREATE VIRTUAL TABLE events_fts USING fts5(id, content)`.execute(db);
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<any>): Promise<void> {
|
||||
await db.schema.dropTable('events_fts').execute();
|
||||
}
|
|
@ -50,7 +50,10 @@ export {
|
|||
export { generateSeededRsa } from 'https://gitlab.com/soapbox-pub/seeded-rsa/-/raw/v1.0.0/mod.ts';
|
||||
export * as secp from 'npm:@noble/secp256k1@^2.0.0';
|
||||
export { LRUCache } from 'npm:lru-cache@^10.0.0';
|
||||
export { DB as Sqlite, SqliteError } from 'https://deno.land/x/sqlite@v3.7.3/mod.ts';
|
||||
export {
|
||||
DB as Sqlite,
|
||||
SqliteError,
|
||||
} from 'https://raw.githubusercontent.com/alexgleason/deno-sqlite/325f66d8c395e7f6f5ee78ebfa42a0eeea4a942b/mod.ts';
|
||||
export * as dotenv from 'https://deno.land/std@0.198.0/dotenv/mod.ts';
|
||||
export {
|
||||
FileMigrationProvider,
|
||||
|
@ -60,7 +63,7 @@ export {
|
|||
type NullableInsertKeys,
|
||||
sql,
|
||||
} from 'npm:kysely@^0.25.0';
|
||||
export { DenoSqliteDialect } from 'https://gitlab.com/soapbox-pub/kysely-deno-sqlite/-/raw/v1.0.0/mod.ts';
|
||||
export { DenoSqliteDialect } from 'https://gitlab.com/soapbox-pub/kysely-deno-sqlite/-/raw/v1.0.1/mod.ts';
|
||||
export { default as tldts } from 'npm:tldts@^6.0.14';
|
||||
|
||||
export type * as TypeFest from 'npm:type-fest@^4.3.0';
|
||||
|
|
|
@ -2,7 +2,7 @@ import { type Event, matchFilters } from '@/deps.ts';
|
|||
|
||||
import * as client from '@/client.ts';
|
||||
import * as eventsDB from '@/db/events.ts';
|
||||
import { eventDateComparator } from '@/utils.ts';
|
||||
import { dedupeEvents, eventDateComparator } from '@/utils.ts';
|
||||
|
||||
import type { DittoFilter, GetFiltersOpts } from '@/filter.ts';
|
||||
|
||||
|
@ -11,6 +11,8 @@ async function getFilters<K extends number>(
|
|||
filters: DittoFilter<K>[],
|
||||
opts?: GetFiltersOpts,
|
||||
): Promise<Event<K>[]> {
|
||||
if (!filters.length) return Promise.resolve([]);
|
||||
|
||||
const results = await Promise.allSettled([
|
||||
client.getFilters(filters.filter((filter) => !filter.local), opts),
|
||||
eventsDB.getFilters(filters, opts),
|
||||
|
@ -33,11 +35,6 @@ function unmixEvents<K extends number>(events: Event<K>[], filters: DittoFilter<
|
|||
return events;
|
||||
}
|
||||
|
||||
/** Deduplicate events by ID. */
|
||||
function dedupeEvents<K extends number>(events: Event<K>[]): Event<K>[] {
|
||||
return [...new Map(events.map((event) => [event.id, event])).values()];
|
||||
}
|
||||
|
||||
/** Take the newest events among replaceable ones. */
|
||||
function takeNewestEvents<K extends number>(events: Event<K>[]): Event<K>[] {
|
||||
const isReplaceable = (kind: number) =>
|
||||
|
|
|
@ -29,6 +29,7 @@ const filterSchema = z.object({
|
|||
since: z.number().int().nonnegative().optional(),
|
||||
until: z.number().int().nonnegative().optional(),
|
||||
limit: z.number().int().nonnegative().optional(),
|
||||
search: z.string().optional(),
|
||||
}).passthrough().and(
|
||||
z.record(
|
||||
z.custom<`#${string}`>((val) => typeof val === 'string' && val.startsWith('#')),
|
||||
|
|
|
@ -101,8 +101,14 @@ function isFollowing(source: Event<3>, targetPubkey: string): boolean {
|
|||
);
|
||||
}
|
||||
|
||||
/** Deduplicate events by ID. */
|
||||
function dedupeEvents<K extends number>(events: Event<K>[]): Event<K>[] {
|
||||
return [...new Map(events.map((event) => [event.id, event])).values()];
|
||||
}
|
||||
|
||||
export {
|
||||
bech32ToPubkey,
|
||||
dedupeEvents,
|
||||
eventAge,
|
||||
eventDateComparator,
|
||||
findTag,
|
||||
|
|
Loading…
Reference in New Issue