Merge branch 'fts' into 'develop'
Add full-text search with SQLite FTS5 Closes #61 and #60 See merge request soapbox-pub/ditto!30
This commit is contained in:
commit
305c5035a2
|
@ -1,23 +1,143 @@
|
||||||
import { AppController } from '@/app.ts';
|
import { AppController } from '@/app.ts';
|
||||||
import { lookupAccount } from '@/utils.ts';
|
import * as eventsDB from '@/db/events.ts';
|
||||||
import { toAccount } from '@/transformers/nostr-to-mastoapi.ts';
|
import { type Event, type Filter, nip19, z } from '@/deps.ts';
|
||||||
|
import * as mixer from '@/mixer.ts';
|
||||||
|
import { lookupNip05Cached } from '@/nip05.ts';
|
||||||
|
import { booleanParamSchema } from '@/schema.ts';
|
||||||
|
import { nostrIdSchema } from '@/schemas/nostr.ts';
|
||||||
|
import { toAccount, toStatus } from '@/transformers/nostr-to-mastoapi.ts';
|
||||||
|
import { dedupeEvents, Time } from '@/utils.ts';
|
||||||
|
|
||||||
|
/** Matches NIP-05 names with or without an @ in front. */
|
||||||
|
const ACCT_REGEX = /^@?(?:([\w.+-]+)@)?([\w.-]+)$/;
|
||||||
|
|
||||||
|
const searchQuerySchema = z.object({
|
||||||
|
q: z.string().transform(decodeURIComponent),
|
||||||
|
type: z.enum(['accounts', 'statuses', 'hashtags']).optional(),
|
||||||
|
resolve: booleanParamSchema.optional().transform(Boolean),
|
||||||
|
following: z.boolean().default(false),
|
||||||
|
account_id: nostrIdSchema.optional(),
|
||||||
|
limit: z.coerce.number().catch(20).transform((value) => Math.min(Math.max(value, 0), 40)),
|
||||||
|
});
|
||||||
|
|
||||||
|
type SearchQuery = z.infer<typeof searchQuerySchema>;
|
||||||
|
|
||||||
const searchController: AppController = async (c) => {
|
const searchController: AppController = async (c) => {
|
||||||
const q = c.req.query('q');
|
const result = searchQuerySchema.safeParse(c.req.query());
|
||||||
|
|
||||||
if (!q) {
|
if (!result.success) {
|
||||||
return c.json({ error: 'Missing `q` query parameter.' }, 422);
|
return c.json({ error: 'Bad request', schema: result.error }, 422);
|
||||||
}
|
}
|
||||||
|
|
||||||
// For now, only support looking up accounts.
|
const [event, events] = await Promise.all([
|
||||||
// TODO: Support searching statuses and hashtags.
|
lookupEvent(result.data),
|
||||||
const event = await lookupAccount(decodeURIComponent(q));
|
searchEvents(result.data),
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (event) {
|
||||||
|
events.push(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = dedupeEvents(events);
|
||||||
|
|
||||||
|
const [accounts, statuses] = await Promise.all([
|
||||||
|
Promise.all(
|
||||||
|
results
|
||||||
|
.filter((event): event is Event<0> => event.kind === 0)
|
||||||
|
.map((event) => toAccount(event)),
|
||||||
|
),
|
||||||
|
Promise.all(
|
||||||
|
results
|
||||||
|
.filter((event): event is Event<1> => event.kind === 1)
|
||||||
|
.map((event) => toStatus(event, c.get('pubkey'))),
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
return c.json({
|
return c.json({
|
||||||
accounts: event ? [await toAccount(event)] : [],
|
accounts: accounts.filter(Boolean),
|
||||||
statuses: [],
|
statuses: statuses.filter(Boolean),
|
||||||
hashtags: [],
|
hashtags: [],
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Get events for the search params. */
|
||||||
|
function searchEvents({ q, type, limit, account_id }: SearchQuery): Promise<Event[]> {
|
||||||
|
if (type === 'hashtags') return Promise.resolve([]);
|
||||||
|
|
||||||
|
const filter: Filter = {
|
||||||
|
kinds: typeToKinds(type),
|
||||||
|
search: q,
|
||||||
|
limit,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (account_id) {
|
||||||
|
filter.authors = [account_id];
|
||||||
|
}
|
||||||
|
|
||||||
|
return eventsDB.getFilters([filter]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get event kinds to search from `type` query param. */
|
||||||
|
function typeToKinds(type: SearchQuery['type']): number[] {
|
||||||
|
switch (type) {
|
||||||
|
case 'accounts':
|
||||||
|
return [0];
|
||||||
|
case 'statuses':
|
||||||
|
return [1];
|
||||||
|
default:
|
||||||
|
return [0, 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Resolve a searched value into an event, if applicable. */
|
||||||
|
async function lookupEvent(query: SearchQuery): Promise<Event | undefined> {
|
||||||
|
const filters = await getLookupFilters(query);
|
||||||
|
const [event] = await mixer.getFilters(filters, { limit: 1, timeout: Time.seconds(1) });
|
||||||
|
return event;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get filters to lookup the input value. */
|
||||||
|
async function getLookupFilters({ q, type, resolve }: SearchQuery): Promise<Filter[]> {
|
||||||
|
const filters: Filter[] = [];
|
||||||
|
|
||||||
|
const accounts = !type || type === 'accounts';
|
||||||
|
const statuses = !type || type === 'statuses';
|
||||||
|
|
||||||
|
if (!resolve || type === 'hashtags') {
|
||||||
|
return filters;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new RegExp(`^${nip19.BECH32_REGEX.source}$`).test(q)) {
|
||||||
|
try {
|
||||||
|
const result = nip19.decode(q);
|
||||||
|
switch (result.type) {
|
||||||
|
case 'npub':
|
||||||
|
if (accounts) filters.push({ kinds: [0], authors: [result.data] });
|
||||||
|
break;
|
||||||
|
case 'nprofile':
|
||||||
|
if (accounts) filters.push({ kinds: [0], authors: [result.data.pubkey] });
|
||||||
|
break;
|
||||||
|
case 'note':
|
||||||
|
if (statuses) filters.push({ kinds: [1], ids: [result.data] });
|
||||||
|
break;
|
||||||
|
case 'nevent':
|
||||||
|
if (statuses) filters.push({ kinds: [1], ids: [result.data.id] });
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (_e) {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
} else if (/^[0-9a-f]{64}$/.test(q)) {
|
||||||
|
if (accounts) filters.push({ kinds: [0], authors: [q] });
|
||||||
|
if (statuses) filters.push({ kinds: [1], ids: [q] });
|
||||||
|
} else if (accounts && ACCT_REGEX.test(q)) {
|
||||||
|
const pubkey = await lookupNip05Cached(q);
|
||||||
|
if (pubkey) {
|
||||||
|
filters.push({ kinds: [0], authors: [pubkey] });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return filters;
|
||||||
|
}
|
||||||
|
|
||||||
export { searchController };
|
export { searchController };
|
||||||
|
|
|
@ -6,6 +6,7 @@ import { Conf } from '@/config.ts';
|
||||||
|
|
||||||
interface DittoDB {
|
interface DittoDB {
|
||||||
events: EventRow;
|
events: EventRow;
|
||||||
|
events_fts: EventFTSRow;
|
||||||
tags: TagRow;
|
tags: TagRow;
|
||||||
users: UserRow;
|
users: UserRow;
|
||||||
relays: RelayRow;
|
relays: RelayRow;
|
||||||
|
@ -21,6 +22,11 @@ interface EventRow {
|
||||||
sig: string;
|
sig: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface EventFTSRow {
|
||||||
|
id: string;
|
||||||
|
content: string;
|
||||||
|
}
|
||||||
|
|
||||||
interface TagRow {
|
interface TagRow {
|
||||||
tag: string;
|
tag: string;
|
||||||
value_1: string | null;
|
value_1: string | null;
|
||||||
|
|
|
@ -2,6 +2,7 @@ import { db, type TagRow } from '@/db.ts';
|
||||||
import { type Event, type Insertable, SqliteError } from '@/deps.ts';
|
import { type Event, type Insertable, SqliteError } from '@/deps.ts';
|
||||||
|
|
||||||
import type { DittoFilter, GetFiltersOpts } from '@/filter.ts';
|
import type { DittoFilter, GetFiltersOpts } from '@/filter.ts';
|
||||||
|
import { jsonMetaContentSchema } from '@/schemas/nostr.ts';
|
||||||
|
|
||||||
type TagCondition = ({ event, count }: { event: Event; count: number }) => boolean;
|
type TagCondition = ({ event, count }: { event: Event; count: number }) => boolean;
|
||||||
|
|
||||||
|
@ -25,6 +26,13 @@ function insertEvent(event: Event): Promise<void> {
|
||||||
})
|
})
|
||||||
.execute();
|
.execute();
|
||||||
|
|
||||||
|
const searchContent = buildSearchContent(event);
|
||||||
|
if (searchContent) {
|
||||||
|
await trx.insertInto('events_fts')
|
||||||
|
.values({ id: event.id, content: searchContent.substring(0, 1000) })
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
|
||||||
const tagCounts: Record<string, number> = {};
|
const tagCounts: Record<string, number> = {};
|
||||||
const tags = event.tags.reduce<Insertable<TagRow>[]>((results, tag) => {
|
const tags = event.tags.reduce<Insertable<TagRow>[]>((results, tag) => {
|
||||||
const tagName = tag[0];
|
const tagName = tag[0];
|
||||||
|
@ -111,6 +119,12 @@ function getFilterQuery(filter: DittoFilter) {
|
||||||
query = query.innerJoin('users', 'users.pubkey', 'events.pubkey');
|
query = query.innerJoin('users', 'users.pubkey', 'events.pubkey');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (filter.search) {
|
||||||
|
query = query
|
||||||
|
.innerJoin('events_fts', 'events_fts.id', 'events.id')
|
||||||
|
.where('events_fts.content', 'match', JSON.stringify(filter.search));
|
||||||
|
}
|
||||||
|
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -131,6 +145,7 @@ async function getFilters<K extends number>(
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Get number of events that would be returned by filters. */
|
||||||
async function countFilters<K extends number>(filters: DittoFilter<K>[]): Promise<number> {
|
async function countFilters<K extends number>(filters: DittoFilter<K>[]): Promise<number> {
|
||||||
if (!filters.length) return Promise.resolve(0);
|
if (!filters.length) return Promise.resolve(0);
|
||||||
const query = filters.map(getFilterQuery).reduce((acc, curr) => acc.union(curr));
|
const query = filters.map(getFilterQuery).reduce((acc, curr) => acc.union(curr));
|
||||||
|
@ -143,4 +158,22 @@ async function countFilters<K extends number>(filters: DittoFilter<K>[]): Promis
|
||||||
return Number(count);
|
return Number(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Build a search index from the event. */
|
||||||
|
function buildSearchContent(event: Event): string {
|
||||||
|
switch (event.kind) {
|
||||||
|
case 0:
|
||||||
|
return buildUserSearchContent(event as Event<0>);
|
||||||
|
case 1:
|
||||||
|
return event.content;
|
||||||
|
default:
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build search content for a user. */
|
||||||
|
function buildUserSearchContent(event: Event<0>): string {
|
||||||
|
const { name, nip05, about } = jsonMetaContentSchema.parse(event.content);
|
||||||
|
return [name, nip05, about].filter(Boolean).join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
export { countFilters, getFilters, insertEvent };
|
export { countFilters, getFilters, insertEvent };
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
import { Kysely, sql } from '@/deps.ts';
|
||||||
|
|
||||||
|
export async function up(db: Kysely<any>): Promise<void> {
|
||||||
|
await sql`CREATE VIRTUAL TABLE events_fts USING fts5(id, content)`.execute(db);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function down(db: Kysely<any>): Promise<void> {
|
||||||
|
await db.schema.dropTable('events_fts').execute();
|
||||||
|
}
|
|
@ -50,7 +50,10 @@ export {
|
||||||
export { generateSeededRsa } from 'https://gitlab.com/soapbox-pub/seeded-rsa/-/raw/v1.0.0/mod.ts';
|
export { generateSeededRsa } from 'https://gitlab.com/soapbox-pub/seeded-rsa/-/raw/v1.0.0/mod.ts';
|
||||||
export * as secp from 'npm:@noble/secp256k1@^2.0.0';
|
export * as secp from 'npm:@noble/secp256k1@^2.0.0';
|
||||||
export { LRUCache } from 'npm:lru-cache@^10.0.0';
|
export { LRUCache } from 'npm:lru-cache@^10.0.0';
|
||||||
export { DB as Sqlite, SqliteError } from 'https://deno.land/x/sqlite@v3.7.3/mod.ts';
|
export {
|
||||||
|
DB as Sqlite,
|
||||||
|
SqliteError,
|
||||||
|
} from 'https://raw.githubusercontent.com/alexgleason/deno-sqlite/325f66d8c395e7f6f5ee78ebfa42a0eeea4a942b/mod.ts';
|
||||||
export * as dotenv from 'https://deno.land/std@0.198.0/dotenv/mod.ts';
|
export * as dotenv from 'https://deno.land/std@0.198.0/dotenv/mod.ts';
|
||||||
export {
|
export {
|
||||||
FileMigrationProvider,
|
FileMigrationProvider,
|
||||||
|
@ -60,7 +63,7 @@ export {
|
||||||
type NullableInsertKeys,
|
type NullableInsertKeys,
|
||||||
sql,
|
sql,
|
||||||
} from 'npm:kysely@^0.25.0';
|
} from 'npm:kysely@^0.25.0';
|
||||||
export { DenoSqliteDialect } from 'https://gitlab.com/soapbox-pub/kysely-deno-sqlite/-/raw/v1.0.0/mod.ts';
|
export { DenoSqliteDialect } from 'https://gitlab.com/soapbox-pub/kysely-deno-sqlite/-/raw/v1.0.1/mod.ts';
|
||||||
export { default as tldts } from 'npm:tldts@^6.0.14';
|
export { default as tldts } from 'npm:tldts@^6.0.14';
|
||||||
|
|
||||||
export type * as TypeFest from 'npm:type-fest@^4.3.0';
|
export type * as TypeFest from 'npm:type-fest@^4.3.0';
|
||||||
|
|
|
@ -2,7 +2,7 @@ import { type Event, matchFilters } from '@/deps.ts';
|
||||||
|
|
||||||
import * as client from '@/client.ts';
|
import * as client from '@/client.ts';
|
||||||
import * as eventsDB from '@/db/events.ts';
|
import * as eventsDB from '@/db/events.ts';
|
||||||
import { eventDateComparator } from '@/utils.ts';
|
import { dedupeEvents, eventDateComparator } from '@/utils.ts';
|
||||||
|
|
||||||
import type { DittoFilter, GetFiltersOpts } from '@/filter.ts';
|
import type { DittoFilter, GetFiltersOpts } from '@/filter.ts';
|
||||||
|
|
||||||
|
@ -11,6 +11,8 @@ async function getFilters<K extends number>(
|
||||||
filters: DittoFilter<K>[],
|
filters: DittoFilter<K>[],
|
||||||
opts?: GetFiltersOpts,
|
opts?: GetFiltersOpts,
|
||||||
): Promise<Event<K>[]> {
|
): Promise<Event<K>[]> {
|
||||||
|
if (!filters.length) return Promise.resolve([]);
|
||||||
|
|
||||||
const results = await Promise.allSettled([
|
const results = await Promise.allSettled([
|
||||||
client.getFilters(filters.filter((filter) => !filter.local), opts),
|
client.getFilters(filters.filter((filter) => !filter.local), opts),
|
||||||
eventsDB.getFilters(filters, opts),
|
eventsDB.getFilters(filters, opts),
|
||||||
|
@ -33,11 +35,6 @@ function unmixEvents<K extends number>(events: Event<K>[], filters: DittoFilter<
|
||||||
return events;
|
return events;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Deduplicate events by ID. */
|
|
||||||
function dedupeEvents<K extends number>(events: Event<K>[]): Event<K>[] {
|
|
||||||
return [...new Map(events.map((event) => [event.id, event])).values()];
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Take the newest events among replaceable ones. */
|
/** Take the newest events among replaceable ones. */
|
||||||
function takeNewestEvents<K extends number>(events: Event<K>[]): Event<K>[] {
|
function takeNewestEvents<K extends number>(events: Event<K>[]): Event<K>[] {
|
||||||
const isReplaceable = (kind: number) =>
|
const isReplaceable = (kind: number) =>
|
||||||
|
|
|
@ -29,6 +29,7 @@ const filterSchema = z.object({
|
||||||
since: z.number().int().nonnegative().optional(),
|
since: z.number().int().nonnegative().optional(),
|
||||||
until: z.number().int().nonnegative().optional(),
|
until: z.number().int().nonnegative().optional(),
|
||||||
limit: z.number().int().nonnegative().optional(),
|
limit: z.number().int().nonnegative().optional(),
|
||||||
|
search: z.string().optional(),
|
||||||
}).passthrough().and(
|
}).passthrough().and(
|
||||||
z.record(
|
z.record(
|
||||||
z.custom<`#${string}`>((val) => typeof val === 'string' && val.startsWith('#')),
|
z.custom<`#${string}`>((val) => typeof val === 'string' && val.startsWith('#')),
|
||||||
|
|
|
@ -101,8 +101,14 @@ function isFollowing(source: Event<3>, targetPubkey: string): boolean {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Deduplicate events by ID. */
|
||||||
|
function dedupeEvents<K extends number>(events: Event<K>[]): Event<K>[] {
|
||||||
|
return [...new Map(events.map((event) => [event.id, event])).values()];
|
||||||
|
}
|
||||||
|
|
||||||
export {
|
export {
|
||||||
bech32ToPubkey,
|
bech32ToPubkey,
|
||||||
|
dedupeEvents,
|
||||||
eventAge,
|
eventAge,
|
||||||
eventDateComparator,
|
eventDateComparator,
|
||||||
findTag,
|
findTag,
|
||||||
|
|
Loading…
Reference in New Issue