diff --git a/packages/bsky/src/api/app/bsky/actor/searchActorsTypeahead.ts b/packages/bsky/src/api/app/bsky/actor/searchActorsTypeahead.ts index 48996de8f47..9148634fa19 100644 --- a/packages/bsky/src/api/app/bsky/actor/searchActorsTypeahead.ts +++ b/packages/bsky/src/api/app/bsky/actor/searchActorsTypeahead.ts @@ -1,6 +1,9 @@ import AppContext from '../../../../context' import { Server } from '../../../../lexicon' -import { cleanTerm, getUserSearchQuery } from '../../../../services/util/search' +import { + cleanTerm, + getUserSearchQuerySimple, +} from '../../../../services/util/search' export default function (server: Server, ctx: AppContext) { server.app.bsky.actor.searchActorsTypeahead({ @@ -12,7 +15,7 @@ export default function (server: Server, ctx: AppContext) { const term = cleanTerm(rawTerm || '') const results = term - ? await getUserSearchQuery(db, { term, limit }) + ? await getUserSearchQuerySimple(db, { term, limit }) .selectAll('actor') .execute() : [] diff --git a/packages/bsky/src/api/app/bsky/feed/getPostThread.ts b/packages/bsky/src/api/app/bsky/feed/getPostThread.ts index 8fd538146e0..ce019bef047 100644 --- a/packages/bsky/src/api/app/bsky/feed/getPostThread.ts +++ b/packages/bsky/src/api/app/bsky/feed/getPostThread.ts @@ -127,9 +127,19 @@ const composeThread = ( let replies: (ThreadViewPost | NotFoundPost | BlockedPost)[] | undefined if (threadData.replies) { - replies = threadData.replies.map((reply) => - composeThread(reply, feedService, posts, actors, embeds, labels), - ) + replies = threadData.replies.flatMap((reply) => { + const thread = composeThread( + reply, + feedService, + posts, + actors, + embeds, + labels, + ) + // e.g. don't bother including #postNotFound reply placeholders for takedowns. either way matches api contract. + const skip = [] + return isNotFoundPost(thread) ? skip : thread + }) } return { diff --git a/packages/bsky/src/api/app/bsky/util/feed.ts b/packages/bsky/src/api/app/bsky/util/feed.ts index a170e7e7c94..9e85f4e974f 100644 --- a/packages/bsky/src/api/app/bsky/util/feed.ts +++ b/packages/bsky/src/api/app/bsky/util/feed.ts @@ -12,7 +12,7 @@ export class FeedKeyset extends TimeCidKeyset { } // For users with sparse feeds, avoid scanning more than one week for a single page -export const getFeedDateThreshold = (from: string | undefined, days = 7) => { +export const getFeedDateThreshold = (from: string | undefined, days = 3) => { const timelineDateThreshold = from ? new Date(from) : new Date() timelineDateThreshold.setDate(timelineDateThreshold.getDate() - days) return timelineDateThreshold.toISOString() diff --git a/packages/bsky/src/db/pagination.ts b/packages/bsky/src/db/pagination.ts index a88853c17f9..bd498360ca2 100644 --- a/packages/bsky/src/db/pagination.ts +++ b/packages/bsky/src/db/pagination.ts @@ -1,6 +1,6 @@ -import { SelectQueryBuilder, sql } from 'kysely' +import { sql } from 'kysely' import { InvalidRequestError } from '@atproto/xrpc-server' -import { DbRef } from './util' +import { AnyQb, DbRef } from './util' export type Cursor = { primary: string; secondary: string } export type LabeledResult = { @@ -107,7 +107,7 @@ export class TimeCidKeyset< } export const paginate = < - QB extends SelectQueryBuilder, + QB extends AnyQb, K extends GenericKeyset, >( qb: QB, diff --git a/packages/bsky/src/db/util.ts b/packages/bsky/src/db/util.ts index ab8ef582be8..dfd93e66a17 100644 --- a/packages/bsky/src/db/util.ts +++ b/packages/bsky/src/db/util.ts @@ -2,6 +2,7 @@ import { DummyDriver, DynamicModule, RawBuilder, + SelectQueryBuilder, sql, SqliteAdapter, SqliteIntrospector, @@ -56,3 +57,5 @@ export const dummyDialect = { } export type DbRef = RawBuilder | ReturnType + +export type AnyQb = SelectQueryBuilder diff --git a/packages/bsky/src/feed-gen/best-of-follows.ts b/packages/bsky/src/feed-gen/best-of-follows.ts index 129e3d778c2..ba263b3ee6f 100644 --- a/packages/bsky/src/feed-gen/best-of-follows.ts +++ b/packages/bsky/src/feed-gen/best-of-follows.ts @@ -7,30 +7,33 @@ import AppContext from '../context' const handler: AlgoHandler = async ( ctx: AppContext, params: SkeletonParams, - requester: string, + viewer: string, ): Promise => { const { limit, cursor } = params const feedService = ctx.services.feed(ctx.db) + const graphService = ctx.services.graph(ctx.db) const { ref } = ctx.db.db.dynamic // candidates are ranked within a materialized view by like count, depreciated over time. - // @TODO apply blocks and mutes let builder = feedService .selectPostQb() .innerJoin('algo_whats_hot_view as candidate', 'candidate.uri', 'post.uri') - .leftJoin('post_embed_record', 'post_embed_record.postUri', 'post.uri') .where((qb) => qb - .where('post.creator', '=', requester) + .where('post.creator', '=', viewer) .orWhereExists((inner) => inner .selectFrom('follow') - .where('follow.creator', '=', requester) + .where('follow.creator', '=', viewer) .whereRef('follow.subjectDid', '=', 'post.creator'), ), ) + .where((qb) => + graphService.whereNotMuted(qb, viewer, [ref('post.creator')]), + ) + .whereNotExists(graphService.blockQb(viewer, [ref('post.creator')])) .select('candidate.score') .select('candidate.cid') diff --git a/packages/bsky/src/feed-gen/bsky-team.ts b/packages/bsky/src/feed-gen/bsky-team.ts index 36365af9548..8655ab5af2e 100644 --- a/packages/bsky/src/feed-gen/bsky-team.ts +++ b/packages/bsky/src/feed-gen/bsky-team.ts @@ -27,13 +27,17 @@ const handler: AlgoHandler = async ( ): Promise => { const { limit = 50, cursor } = params const feedService = ctx.services.feed(ctx.db) + const graphService = ctx.services.graph(ctx.db) const { ref } = ctx.db.db.dynamic - // @TODO apply blocks and mutes const postsQb = feedService .selectPostQb() .where('post.creator', 'in', BSKY_TEAM) + .where((qb) => + graphService.whereNotMuted(qb, viewer, [ref('post.creator')]), + ) + .whereNotExists(graphService.blockQb(viewer, [ref('post.creator')])) const keyset = new FeedKeyset(ref('sortAt'), ref('cid')) diff --git a/packages/bsky/src/feed-gen/hot-classic.ts b/packages/bsky/src/feed-gen/hot-classic.ts index d1559e5f60d..672aee24edb 100644 --- a/packages/bsky/src/feed-gen/hot-classic.ts +++ b/packages/bsky/src/feed-gen/hot-classic.ts @@ -23,10 +23,10 @@ const handler: AlgoHandler = async ( ): Promise => { const { limit = 50, cursor } = params const feedService = ctx.services.feed(ctx.db) + const graphService = ctx.services.graph(ctx.db) const { ref } = ctx.db.db.dynamic - // @TODO apply blocks and mutes const postsQb = feedService .selectPostQb() .leftJoin('post_agg', 'post_agg.uri', 'post.uri') @@ -46,6 +46,10 @@ const handler: AlgoHandler = async ( .orWhereRef('label.uri', '=', ref('post_embed_record.embedUri')), ), ) + .where((qb) => + graphService.whereNotMuted(qb, viewer, [ref('post.creator')]), + ) + .whereNotExists(graphService.blockQb(viewer, [ref('post.creator')])) const keyset = new FeedKeyset(ref('sortAt'), ref('cid')) diff --git a/packages/bsky/src/feed-gen/mutuals.ts b/packages/bsky/src/feed-gen/mutuals.ts index b71a09666ab..d81ee46dd2f 100644 --- a/packages/bsky/src/feed-gen/mutuals.ts +++ b/packages/bsky/src/feed-gen/mutuals.ts @@ -7,21 +7,22 @@ import { FeedKeyset, getFeedDateThreshold } from '../api/app/bsky/util/feed' const handler: AlgoHandler = async ( ctx: AppContext, params: SkeletonParams, - requester: string, + viewer: string, ): Promise => { const { limit = 50, cursor } = params const feedService = ctx.services.feed(ctx.db) + const graphService = ctx.services.graph(ctx.db) const { ref } = ctx.db.db.dynamic const mutualsSubquery = ctx.db.db .selectFrom('follow') - .where('follow.creator', '=', requester) + .where('follow.creator', '=', viewer) .whereExists((qb) => qb .selectFrom('follow as follow_inner') .whereRef('follow_inner.creator', '=', 'follow.subjectDid') - .where('follow_inner.subjectDid', '=', requester) + .where('follow_inner.subjectDid', '=', viewer) .selectAll(), ) .select('follow.subjectDid') @@ -29,16 +30,19 @@ const handler: AlgoHandler = async ( const keyset = new FeedKeyset(ref('feed_item.sortAt'), ref('feed_item.cid')) const sortFrom = keyset.unpack(cursor)?.primary - // @TODO apply blocks and mutes let feedQb = feedService .selectFeedItemQb() .where('feed_item.type', '=', 'post') // ensures originatorDid is post.creator .where((qb) => qb - .where('originatorDid', '=', requester) + .where('originatorDid', '=', viewer) .orWhere('originatorDid', 'in', mutualsSubquery), ) .where('feed_item.sortAt', '>', getFeedDateThreshold(sortFrom)) + .where((qb) => + graphService.whereNotMuted(qb, viewer, [ref('originatorDid')]), + ) + .whereNotExists(graphService.blockQb(viewer, [ref('originatorDid')])) feedQb = paginate(feedQb, { limit, cursor, keyset }) diff --git a/packages/bsky/src/feed-gen/whats-hot.ts b/packages/bsky/src/feed-gen/whats-hot.ts index 356e638d0ae..92771581865 100644 --- a/packages/bsky/src/feed-gen/whats-hot.ts +++ b/packages/bsky/src/feed-gen/whats-hot.ts @@ -4,7 +4,7 @@ import { QueryParams as SkeletonParams } from '../lexicon/types/app/bsky/feed/ge import { AlgoHandler, AlgoResponse } from './types' import { GenericKeyset, paginate } from '../db/pagination' import AppContext from '../context' -import { notSoftDeletedClause, valuesList } from '../db/util' +import { valuesList } from '../db/util' import { sql } from 'kysely' import { FeedItemType } from '../services/types' @@ -24,20 +24,16 @@ const handler: AlgoHandler = async ( viewer: string, ): Promise => { const { limit, cursor } = params + const graphService = ctx.services.graph(ctx.db) const { ref } = ctx.db.db.dynamic // candidates are ranked within a materialized view by like count, depreciated over time. - // @TODO apply blocks and mutes let builder = ctx.db.db .selectFrom('algo_whats_hot_view as candidate') .innerJoin('post', 'post.uri', 'candidate.uri') - .innerJoin('actor as author', 'author.did', 'post.creator') - .innerJoin('record', 'record.uri', 'post.uri') .leftJoin('post_embed_record', 'post_embed_record.postUri', 'candidate.uri') - .where(notSoftDeletedClause(ref('author'))) - .where(notSoftDeletedClause(ref('record'))) .whereNotExists((qb) => qb .selectFrom('label') @@ -51,6 +47,10 @@ const handler: AlgoHandler = async ( .orWhereRef('label.uri', '=', ref('post_embed_record.embedUri')), ), ) + .where((qb) => + graphService.whereNotMuted(qb, viewer, [ref('post.creator')]), + ) + .whereNotExists(graphService.blockQb(viewer, [ref('post.creator')])) .select([ sql`${'post'}`.as('type'), 'post.uri as uri', diff --git a/packages/bsky/src/feed-gen/with-friends.ts b/packages/bsky/src/feed-gen/with-friends.ts index 65f38bdaecc..6817116f40f 100644 --- a/packages/bsky/src/feed-gen/with-friends.ts +++ b/packages/bsky/src/feed-gen/with-friends.ts @@ -2,7 +2,7 @@ import AppContext from '../context' import { QueryParams as SkeletonParams } from '../lexicon/types/app/bsky/feed/getFeedSkeleton' import { paginate } from '../db/pagination' import { AlgoHandler, AlgoResponse } from './types' -import { FeedKeyset } from '../api/app/bsky/util/feed' +import { FeedKeyset, getFeedDateThreshold } from '../api/app/bsky/util/feed' const handler: AlgoHandler = async ( ctx: AppContext, @@ -11,26 +11,30 @@ const handler: AlgoHandler = async ( ): Promise => { const { cursor, limit = 50 } = params const feedService = ctx.services.feed(ctx.db) + const graphService = ctx.services.graph(ctx.db) const { ref } = ctx.db.db.dynamic - // @NOTE use of getFeedDateThreshold() not currently beneficial to this feed - const keyset = new FeedKeyset(ref('feed_item.sortAt'), ref('feed_item.cid')) + const keyset = new FeedKeyset(ref('post.indexedAt'), ref('post.cid')) + const sortFrom = keyset.unpack(cursor)?.primary - // @TODO apply blocks and mutes let postsQb = feedService - .selectFeedItemQb() - .innerJoin('post_agg', 'post_agg.uri', 'feed_item.uri') - .where('feed_item.type', '=', 'post') + .selectPostQb() + .innerJoin('post_agg', 'post_agg.uri', 'post.uri') .where('post_agg.likeCount', '>=', 5) .whereExists((qb) => qb .selectFrom('follow') .where('follow.creator', '=', requester) - .whereRef('follow.subjectDid', '=', 'originatorDid'), + .whereRef('follow.subjectDid', '=', 'post.creator'), ) + .where((qb) => + graphService.whereNotMuted(qb, requester, [ref('post.creator')]), + ) + .whereNotExists(graphService.blockQb(requester, [ref('post.creator')])) + .where('post.indexedAt', '>', getFeedDateThreshold(sortFrom)) - postsQb = paginate(postsQb, { limit, cursor, keyset }) + postsQb = paginate(postsQb, { limit, cursor, keyset, tryIndex: true }) const feedItems = await postsQb.execute() return { @@ -40,51 +44,3 @@ const handler: AlgoHandler = async ( } export default handler - -// Original algorithm, temporarily disabled because of performance issues -// -------------------------- - -// const postRate = sql`(10000 * ${ref('postsCount')} / extract(epoch from ${ref( -// 'user_account.createdAt', -// )}::timestamp))` -// const mostActiveMutuals = await ctx.db.db -// .selectFrom('follow') -// .select('subjectDid as did') -// .innerJoin('user_account', 'user_account.did', 'follow.subjectDid') -// .innerJoin('profile_agg', 'profile_agg.did', 'follow.subjectDid') -// .where('follow.creator', '=', requester) -// .whereExists((qb) => -// qb -// .selectFrom('follow as mutual') -// .where('mutual.subjectDid', '=', requester) -// .whereRef('mutual.creator', '=', 'follow.subjectDid'), -// ) -// .orderBy(postRate, 'desc') -// .limit(25) -// .execute() - -// if (!mostActiveMutuals.length) { -// return { feedItems: [] } -// } - -// // All posts that hit a certain threshold of likes and also have -// // at least one like by one of your most active mutuals. -// let postsQb = feedService -// .selectFeedItemQb() -// .innerJoin('post_agg', 'post_agg.uri', 'feed_item.uri') -// .where('feed_item.type', '=', 'post') -// .where('post_agg.likeCount', '>=', 5) -// .whereExists((qb) => { -// return qb -// .selectFrom('like') -// .whereRef('like.subject', '=', 'post.uri') -// .whereRef( -// 'like.creator', -// 'in', -// valuesList(mostActiveMutuals.map((follow) => follow.did)), -// ) -// }) -// .where((qb) => -// accountService.whereNotMuted(qb, requester, [ref('post.creator')]), -// ) -// .whereNotExists(graphService.blockQb(requester, [ref('post.creator')])) diff --git a/packages/bsky/src/services/feed/index.ts b/packages/bsky/src/services/feed/index.ts index 13a222d9338..cbfaaf01be4 100644 --- a/packages/bsky/src/services/feed/index.ts +++ b/packages/bsky/src/services/feed/index.ts @@ -43,13 +43,8 @@ export class FeedService { } selectPostQb() { - const { ref } = this.db.db.dynamic return this.db.db .selectFrom('post') - .innerJoin('actor as author', 'author.did', 'post.creator') - .innerJoin('record', 'record.uri', 'post.uri') - .where(notSoftDeletedClause(ref('author'))) - .where(notSoftDeletedClause(ref('record'))) .select([ sql`${'post'}`.as('type'), 'post.uri as uri', @@ -64,20 +59,9 @@ export class FeedService { } selectFeedItemQb() { - const { ref } = this.db.db.dynamic return this.db.db .selectFrom('feed_item') .innerJoin('post', 'post.uri', 'feed_item.postUri') - .innerJoin('actor as author', 'author.did', 'post.creator') - .innerJoin( - 'actor as originator', - 'originator.did', - 'feed_item.originatorDid', - ) - .innerJoin('record as post_record', 'post_record.uri', 'post.uri') - .where(notSoftDeletedClause(ref('author'))) - .where(notSoftDeletedClause(ref('originator'))) - .where(notSoftDeletedClause(ref('post_record'))) .selectAll('feed_item') .select([ 'post.replyRoot', @@ -126,8 +110,9 @@ export class FeedService { const [actors, labels, listMutes] = await Promise.all([ this.db.db .selectFrom('actor') - .where('actor.did', 'in', dids) .leftJoin('profile', 'profile.creator', 'actor.did') + .where('actor.did', 'in', dids) + .where(notSoftDeletedClause(ref('actor'))) .selectAll('actor') .select([ 'profile.uri as profileUri', diff --git a/packages/bsky/src/services/feed/views.ts b/packages/bsky/src/services/feed/views.ts index 1a683e28356..76f54ed127b 100644 --- a/packages/bsky/src/services/feed/views.ts +++ b/packages/bsky/src/services/feed/views.ts @@ -75,7 +75,10 @@ export class FeedViews { const feedPost = { post } if (item.type === 'repost') { const originator = actors[item.originatorDid] - if (originator) { + // skip over reposts where we don't have reposter profile + if (!originator) { + continue + } else { feedPost['reason'] = { $type: 'app.bsky.feed.defs#reasonRepost', by: { diff --git a/packages/bsky/src/services/util/search.ts b/packages/bsky/src/services/util/search.ts index 2b9a383ee62..826854941f7 100644 --- a/packages/bsky/src/services/util/search.ts +++ b/packages/bsky/src/services/util/search.ts @@ -1,7 +1,7 @@ import { sql } from 'kysely' import { InvalidRequestError } from '@atproto/xrpc-server' import Database from '../../db' -import { notSoftDeletedClause, DbRef } from '../../db/util' +import { notSoftDeletedClause, DbRef, AnyQb } from '../../db/util' import { GenericKeyset, paginate } from '../../db/pagination' export const getUserSearchQuery = ( @@ -15,47 +15,102 @@ export const getUserSearchQuery = ( ) => { const { ref } = db.db.dynamic const { term, limit, cursor, includeSoftDeleted } = opts - - // Performing matching by word using "strict word similarity" operator. - // The more characters the user gives us, the more we can ratchet down - // the distance threshold for matching. - const threshold = term.length < 3 ? 0.9 : 0.8 - // Matching user accounts based on handle const distanceAccount = distance(term, ref('handle')) - let accountsQb = db.db - .selectFrom('actor') - .if(!includeSoftDeleted, (qb) => - qb.where(notSoftDeletedClause(ref('actor'))), - ) - .where(similar(term, ref('handle'))) // Coarse filter engaging trigram index - .where(distanceAccount, '<', threshold) // Refines results from trigram index - .select(['actor.did as did', distanceAccount.as('distance')]) + let accountsQb = getMatchingAccountsQb(db, { term, includeSoftDeleted }) accountsQb = paginate(accountsQb, { limit, cursor, direction: 'asc', keyset: new SearchKeyset(distanceAccount, ref('handle')), }) + // Matching profiles based on display name + const distanceProfile = distance(term, ref('displayName')) + let profilesQb = getMatchingProfilesQb(db, { term, includeSoftDeleted }) + profilesQb = paginate(profilesQb, { + limit, + cursor, + direction: 'asc', + keyset: new SearchKeyset(distanceProfile, ref('handle')), + }) + // Combine and paginate result set + return paginate(combineAccountsAndProfilesQb(db, accountsQb, profilesQb), { + limit, + cursor, + direction: 'asc', + keyset: new SearchKeyset(ref('distance'), ref('handle')), + }) +} +// Takes maximal advantage of trigram index at the expense of ability to paginate. +export const getUserSearchQuerySimple = ( + db: Database, + opts: { + term: string + limit: number + }, +) => { + const { ref } = db.db.dynamic + const { term, limit } = opts + // Matching user accounts based on handle + const accountsQb = getMatchingAccountsQb(db, { term }) + .orderBy('distance', 'asc') + .limit(limit) // Matching profiles based on display name + const profilesQb = getMatchingProfilesQb(db, { term }) + .orderBy('distance', 'asc') + .limit(limit) + // Combine and paginate result set + return paginate(combineAccountsAndProfilesQb(db, accountsQb, profilesQb), { + limit, + direction: 'asc', + keyset: new SearchKeyset(ref('distance'), ref('handle')), + }) +} + +// Matching user accounts based on handle +const getMatchingAccountsQb = ( + db: Database, + opts: { term: string; includeSoftDeleted?: boolean }, +) => { + const { ref } = db.db.dynamic + const { term, includeSoftDeleted } = opts + const distanceAccount = distance(term, ref('handle')) + return db.db + .selectFrom('actor') + .if(!includeSoftDeleted, (qb) => + qb.where(notSoftDeletedClause(ref('actor'))), + ) + .where(similar(term, ref('handle'))) // Coarse filter engaging trigram index + .where(distanceAccount, '<', getMatchThreshold(term)) // Refines results from trigram index + .select(['actor.did as did', distanceAccount.as('distance')]) +} + +// Matching profiles based on display name +const getMatchingProfilesQb = ( + db: Database, + opts: { term: string; includeSoftDeleted?: boolean }, +) => { + const { ref } = db.db.dynamic + const { term, includeSoftDeleted } = opts const distanceProfile = distance(term, ref('displayName')) - let profilesQb = db.db + return db.db .selectFrom('profile') .innerJoin('actor', 'actor.did', 'profile.creator') .if(!includeSoftDeleted, (qb) => qb.where(notSoftDeletedClause(ref('actor'))), ) .where(similar(term, ref('displayName'))) // Coarse filter engaging trigram index - .where(distanceProfile, '<', threshold) // Refines results from trigram index - .select(['actor.did as did', distanceProfile.as('distance')]) - profilesQb = paginate(profilesQb, { - limit, - cursor, - direction: 'asc', - keyset: new SearchKeyset(distanceProfile, ref('handle')), - }) + .where(distanceProfile, '<', getMatchThreshold(term)) // Refines results from trigram index + .select(['profile.creator as did', distanceProfile.as('distance')]) +} +// Combine profile and account result sets +const combineAccountsAndProfilesQb = ( + db: Database, + accountsQb: AnyQb, + profilesQb: AnyQb, +) => { // Combine user account and profile results, taking best matches from each const emptyQb = db.db .selectFrom('actor') @@ -72,17 +127,9 @@ export const getUserSearchQuery = ( .distinctOn('did') // Per did, take whichever of account and profile distance is best .orderBy('did') .orderBy('distance') - - // Sort and paginate all user results - const allQb = db.db + return db.db .selectFrom(resultsQb.as('results')) .innerJoin('actor', 'actor.did', 'results.did') - return paginate(allQb, { - limit, - cursor, - direction: 'asc', - keyset: new SearchKeyset(ref('distance'), ref('handle')), - }) } // Remove leading @ in case a handle is input that way @@ -95,6 +142,13 @@ const distance = (term: string, ref: DbRef) => // Can utilize trigram index to match on strict word similarity const similar = (term: string, ref: DbRef) => sql`(${term} <<% ${ref})` +const getMatchThreshold = (term: string) => { + // Performing matching by word using "strict word similarity" operator. + // The more characters the user gives us, the more we can ratchet down + // the distance threshold for matching. + return term.length < 3 ? 0.9 : 0.8 +} + type Result = { distance: number; handle: string } type LabeledResult = { primary: number; secondary: string } export class SearchKeyset extends GenericKeyset { diff --git a/packages/pds/src/app-view/api/app/bsky/actor/searchActorsTypeahead.ts b/packages/pds/src/app-view/api/app/bsky/actor/searchActorsTypeahead.ts index fc34adba5a0..f0868a402ec 100644 --- a/packages/pds/src/app-view/api/app/bsky/actor/searchActorsTypeahead.ts +++ b/packages/pds/src/app-view/api/app/bsky/actor/searchActorsTypeahead.ts @@ -4,7 +4,7 @@ import { Server } from '../../../../../lexicon' import * as Method from '../../../../../lexicon/types/app/bsky/actor/searchActorsTypeahead' import { cleanTerm, - getUserSearchQueryPg, + getUserSearchQuerySimplePg, getUserSearchQuerySqlite, } from '../../../../../services/util/search' import { DidHandle } from '../../../../../db/tables/did-handle' @@ -65,7 +65,7 @@ export default function (server: Server, ctx: AppContext) { } const getResultsPg: GetResultsFn = async (db, { term, limit }) => { - return await getUserSearchQueryPg(db, { term: term || '', limit }) + return await getUserSearchQuerySimplePg(db, { term: term || '', limit }) .selectAll('did_handle') .execute() } diff --git a/packages/pds/src/db/pagination.ts b/packages/pds/src/db/pagination.ts index 73f7e4f0a25..bfc82da6dfa 100644 --- a/packages/pds/src/db/pagination.ts +++ b/packages/pds/src/db/pagination.ts @@ -1,6 +1,6 @@ -import { SelectQueryBuilder, sql } from 'kysely' +import { sql } from 'kysely' import { InvalidRequestError } from '@atproto/xrpc-server' -import { DbRef } from './util' +import { AnyQb, DbRef } from './util' export type Cursor = { primary: string; secondary: string } export type LabeledResult = { @@ -107,7 +107,7 @@ export class TimeCidKeyset< } export const paginate = < - QB extends SelectQueryBuilder, + QB extends AnyQb, K extends GenericKeyset, >( qb: QB, diff --git a/packages/pds/src/db/util.ts b/packages/pds/src/db/util.ts index 8b5b9677515..696ac7dee8b 100644 --- a/packages/pds/src/db/util.ts +++ b/packages/pds/src/db/util.ts @@ -2,6 +2,7 @@ import { DummyDriver, DynamicModule, RawBuilder, + SelectQueryBuilder, sql, SqliteAdapter, SqliteIntrospector, @@ -33,10 +34,6 @@ export const excluded = (db: DatabaseSchema, col) => { return sql`${db.dynamic.ref(`excluded.${col}`)}` } -export const nullToZero = (ref: DbRef) => { - return sql`coalesce(${ref}, 0)` -} - // Can be useful for large where-in clauses, to get the db to use a hash lookup on the list export const valuesList = (vals: unknown[]) => { return sql`(values (${sql.join(vals, sql`), (`)}))` @@ -58,3 +55,5 @@ export const dummyDialect = { } export type DbRef = RawBuilder | ReturnType + +export type AnyQb = SelectQueryBuilder diff --git a/packages/pds/src/feed-gen/best-of-follows.ts b/packages/pds/src/feed-gen/best-of-follows.ts index 9f930b85545..a88e24fdb8f 100644 --- a/packages/pds/src/feed-gen/best-of-follows.ts +++ b/packages/pds/src/feed-gen/best-of-follows.ts @@ -25,7 +25,6 @@ const handler: AlgoHandler = async ( let builder = feedService .selectPostQb() .innerJoin('algo_whats_hot_view as candidate', 'candidate.uri', 'post.uri') - .leftJoin('post_embed_record', 'post_embed_record.postUri', 'post.uri') .where((qb) => qb .where('post.creator', '=', requester) diff --git a/packages/pds/src/feed-gen/with-friends.ts b/packages/pds/src/feed-gen/with-friends.ts index 3a1051842d2..a4a40364563 100644 --- a/packages/pds/src/feed-gen/with-friends.ts +++ b/packages/pds/src/feed-gen/with-friends.ts @@ -38,7 +38,7 @@ const handler: AlgoHandler = async ( .whereNotExists(graphService.blockQb(requester, [ref('post.creator')])) .where('post.indexedAt', '>', getFeedDateThreshold(sortFrom)) - postsQb = paginate(postsQb, { limit, cursor, keyset }) + postsQb = paginate(postsQb, { limit, cursor, keyset, tryIndex: true }) const feedItems = await postsQb.execute() return { @@ -48,51 +48,3 @@ const handler: AlgoHandler = async ( } export default handler - -// Original algorithm, temporarily disabled because of performance issues -// -------------------------- - -// const postRate = sql`(10000 * ${ref('postsCount')} / extract(epoch from ${ref( -// 'user_account.createdAt', -// )}::timestamp))` -// const mostActiveMutuals = await ctx.db.db -// .selectFrom('follow') -// .select('subjectDid as did') -// .innerJoin('user_account', 'user_account.did', 'follow.subjectDid') -// .innerJoin('profile_agg', 'profile_agg.did', 'follow.subjectDid') -// .where('follow.creator', '=', requester) -// .whereExists((qb) => -// qb -// .selectFrom('follow as mutual') -// .where('mutual.subjectDid', '=', requester) -// .whereRef('mutual.creator', '=', 'follow.subjectDid'), -// ) -// .orderBy(postRate, 'desc') -// .limit(25) -// .execute() - -// if (!mostActiveMutuals.length) { -// return { feedItems: [] } -// } - -// // All posts that hit a certain threshold of likes and also have -// // at least one like by one of your most active mutuals. -// let postsQb = feedService -// .selectFeedItemQb() -// .innerJoin('post_agg', 'post_agg.uri', 'feed_item.uri') -// .where('feed_item.type', '=', 'post') -// .where('post_agg.likeCount', '>=', 5) -// .whereExists((qb) => { -// return qb -// .selectFrom('like') -// .whereRef('like.subject', '=', 'post.uri') -// .whereRef( -// 'like.creator', -// 'in', -// valuesList(mostActiveMutuals.map((follow) => follow.did)), -// ) -// }) -// .where((qb) => -// accountService.whereNotMuted(qb, requester, [ref('post.creator')]), -// ) -// .whereNotExists(graphService.blockQb(requester, [ref('post.creator')])) diff --git a/packages/pds/src/services/account/index.ts b/packages/pds/src/services/account/index.ts index 719a4d3ed46..7756c0764c2 100644 --- a/packages/pds/src/services/account/index.ts +++ b/packages/pds/src/services/account/index.ts @@ -5,12 +5,7 @@ import * as scrypt from '../../db/scrypt' import { UserAccountEntry } from '../../db/tables/user-account' import { DidHandle } from '../../db/tables/did-handle' import { RepoRoot } from '../../db/tables/repo-root' -import { - DbRef, - countAll, - notSoftDeletedClause, - nullToZero, -} from '../../db/util' +import { DbRef, countAll, notSoftDeletedClause } from '../../db/util' import { getUserSearchQueryPg, getUserSearchQuerySqlite } from '../util/search' import { paginate, TimeCidKeyset } from '../../db/pagination' import * as sequencer from '../../sequencer' @@ -445,14 +440,7 @@ export class AccountService { selectInviteCodesQb() { const ref = this.db.db.dynamic.ref const builder = this.db.db - .with('use_count', (qb) => - qb - .selectFrom('invite_code_use') - .groupBy('code') - .select(['code', countAll.as('uses')]), - ) .selectFrom('invite_code') - .leftJoin('use_count', 'invite_code.code', 'use_count.code') .select([ 'invite_code.code as code', 'invite_code.availableUses as available', @@ -460,7 +448,11 @@ export class AccountService { 'invite_code.forUser as forAccount', 'invite_code.createdBy as createdBy', 'invite_code.createdAt as createdAt', - nullToZero(ref('use_count.uses')).as('uses'), + this.db.db + .selectFrom('invite_code_use') + .select(countAll.as('count')) + .whereRef('invite_code_use.code', '=', ref('invite_code.code')) + .as('uses'), ]) return this.db.db.selectFrom(builder.as('codes')).selectAll() } diff --git a/packages/pds/src/services/util/search.ts b/packages/pds/src/services/util/search.ts index 7bb41ce790f..fc735f14d9d 100644 --- a/packages/pds/src/services/util/search.ts +++ b/packages/pds/src/services/util/search.ts @@ -1,7 +1,7 @@ import { sql } from 'kysely' import { InvalidRequestError } from '@atproto/xrpc-server' import Database from '../../db' -import { notSoftDeletedClause, DbRef } from '../../db/util' +import { notSoftDeletedClause, DbRef, AnyQb } from '../../db/util' import { GenericKeyset, paginate } from '../../db/pagination' // @TODO utilized in both pds and app-view @@ -17,49 +17,106 @@ export const getUserSearchQueryPg = ( ) => { const { ref } = db.db.dynamic const { term, limit, cursor, includeSoftDeleted } = opts + // Matching user accounts based on handle + const distanceAccount = distance(term, ref('handle')) + let accountsQb = getMatchingAccountsQb(db, { term, includeSoftDeleted }) + accountsQb = paginate(accountsQb, { + limit, + cursor, + direction: 'asc', + keyset: new SearchKeyset(distanceAccount, ref('handle')), + }) + // Matching profiles based on display name + const distanceProfile = distance(term, ref('displayName')) + let profilesQb = getMatchingProfilesQb(db, { term, includeSoftDeleted }) + profilesQb = paginate( + profilesQb.innerJoin('did_handle', 'did_handle.did', 'profile.creator'), // for handle pagination + { + limit, + cursor, + direction: 'asc', + keyset: new SearchKeyset(distanceProfile, ref('handle')), + }, + ) + // Combine and paginate result set + return paginate(combineAccountsAndProfilesQb(db, accountsQb, profilesQb), { + limit, + cursor, + direction: 'asc', + keyset: new SearchKeyset(ref('distance'), ref('handle')), + }) +} - // Performing matching by word using "strict word similarity" operator. - // The more characters the user gives us, the more we can ratchet down - // the distance threshold for matching. - const threshold = term.length < 3 ? 0.9 : 0.8 - +// Takes maximal advantage of trigram index at the expense of ability to paginate. +export const getUserSearchQuerySimplePg = ( + db: Database, + opts: { + term: string + limit: number + }, +) => { + const { ref } = db.db.dynamic + const { term, limit } = opts // Matching user accounts based on handle + const accountsQb = getMatchingAccountsQb(db, { term }) + .orderBy('distance', 'asc') + .limit(limit) + // Matching profiles based on display name + const profilesQb = getMatchingProfilesQb(db, { term }) + .orderBy('distance', 'asc') + .limit(limit) + // Combine and paginate result set + return paginate(combineAccountsAndProfilesQb(db, accountsQb, profilesQb), { + limit, + direction: 'asc', + keyset: new SearchKeyset(ref('distance'), ref('handle')), + }) +} + +// Matching user accounts based on handle +const getMatchingAccountsQb = ( + db: Database, + opts: { term: string; includeSoftDeleted?: boolean }, +) => { + const { ref } = db.db.dynamic + const { term, includeSoftDeleted } = opts const distanceAccount = distance(term, ref('handle')) - let accountsQb = db.db + return db.db .selectFrom('did_handle') .innerJoin('repo_root', 'repo_root.did', 'did_handle.did') .if(!includeSoftDeleted, (qb) => qb.where(notSoftDeletedClause(ref('repo_root'))), ) .where(similar(term, ref('handle'))) // Coarse filter engaging trigram index - .where(distanceAccount, '<', threshold) // Refines results from trigram index + .where(distanceAccount, '<', getMatchThreshold(term)) // Refines results from trigram index .select(['did_handle.did as did', distanceAccount.as('distance')]) - accountsQb = paginate(accountsQb, { - limit, - cursor, - direction: 'asc', - keyset: new SearchKeyset(distanceAccount, ref('handle')), - }) +} - // Matching profiles based on display name +// Matching profiles based on display name +const getMatchingProfilesQb = ( + db: Database, + opts: { term: string; includeSoftDeleted?: boolean }, +) => { + const { ref } = db.db.dynamic + const { term, includeSoftDeleted } = opts const distanceProfile = distance(term, ref('displayName')) - let profilesQb = db.db + return db.db .selectFrom('profile') - .innerJoin('did_handle', 'did_handle.did', 'profile.creator') - .innerJoin('repo_root', 'repo_root.did', 'did_handle.did') + .innerJoin('repo_root', 'repo_root.did', 'profile.creator') .if(!includeSoftDeleted, (qb) => qb.where(notSoftDeletedClause(ref('repo_root'))), ) .where(similar(term, ref('displayName'))) // Coarse filter engaging trigram index - .where(distanceProfile, '<', threshold) // Refines results from trigram index - .select(['did_handle.did as did', distanceProfile.as('distance')]) - profilesQb = paginate(profilesQb, { - limit, - cursor, - direction: 'asc', - keyset: new SearchKeyset(distanceProfile, ref('handle')), - }) + .where(distanceProfile, '<', getMatchThreshold(term)) // Refines results from trigram index + .select(['profile.creator as did', distanceProfile.as('distance')]) +} +// Combine profile and account result sets +const combineAccountsAndProfilesQb = ( + db: Database, + accountsQb: AnyQb, + profilesQb: AnyQb, +) => { // Combine user account and profile results, taking best matches from each const emptyQb = db.db .selectFrom('user_account') @@ -76,17 +133,9 @@ export const getUserSearchQueryPg = ( .distinctOn('did') // Per did, take whichever of account and profile distance is best .orderBy('did') .orderBy('distance') - - // Sort and paginate all user results - const allQb = db.db + return db.db .selectFrom(resultsQb.as('results')) .innerJoin('did_handle', 'did_handle.did', 'results.did') - return paginate(allQb, { - limit, - cursor, - direction: 'asc', - keyset: new SearchKeyset(ref('distance'), ref('handle')), - }) } export const getUserSearchQuerySqlite = ( @@ -155,6 +204,13 @@ const distance = (term: string, ref: DbRef) => // Can utilize trigram index to match on strict word similarity const similar = (term: string, ref: DbRef) => sql`(${term} <<% ${ref})` +const getMatchThreshold = (term: string) => { + // Performing matching by word using "strict word similarity" operator. + // The more characters the user gives us, the more we can ratchet down + // the distance threshold for matching. + return term.length < 3 ? 0.9 : 0.8 +} + type Result = { distance: number; handle: string } type LabeledResult = { primary: number; secondary: string } export class SearchKeyset extends GenericKeyset {