-
Notifications
You must be signed in to change notification settings - Fork 11.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[indexer-alt] Add obj_info pipeline #20436
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
DROP TABLE IF EXISTS obj_info; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
-- A table that keeps track of all the updates to object type and owner information. | ||
-- In particular, whenever an object's presence or ownership changes, we insert a | ||
-- new row into this table. Each row should have a unique (object_id, cp_sequence_number) | ||
-- pair. | ||
-- When implementing consistency queries, we will use this table to find all | ||
-- object IDs that match the given filters bounded by the cursor checkpoint. | ||
-- These object IDs can then be used to look up the latest version of the objects | ||
-- bounded by the given checkpoint in the object_versions table. | ||
CREATE TABLE IF NOT EXISTS obj_info | ||
( | ||
object_id BYTEA NOT NULL, | ||
cp_sequence_number BIGINT NOT NULL, | ||
-- An enum describing the object's ownership model: | ||
-- | ||
-- Immutable = 0, | ||
-- Address-owned = 1, | ||
-- Object-owned (dynamic field) = 2, | ||
-- Shared = 3. | ||
-- | ||
-- Note that there is a distinction between an object that is owned by | ||
-- another object (kind 2), which relates to dynamic fields, and an object | ||
-- that is owned by another object's address (kind 1), which relates to | ||
-- transfer-to-object. | ||
owner_kind SMALLINT, | ||
-- The address for address-owned objects, and the parent object for | ||
-- object-owned objects. | ||
owner_id BYTEA, | ||
-- The following fields relate to the object's type. These only apply to | ||
-- Move Objects. For Move Packages they will all be NULL. | ||
-- | ||
-- The type's package ID. | ||
package BYTEA, | ||
-- The type's module name. | ||
module TEXT, | ||
-- The type's name. | ||
name TEXT, | ||
-- The type's type parameters, as a BCS-encoded array of TypeTags. | ||
instantiation BYTEA, | ||
PRIMARY KEY (object_id, cp_sequence_number) | ||
); | ||
|
||
CREATE INDEX IF NOT EXISTS obj_info_owner | ||
ON obj_info (owner_kind, owner_id, cp_sequence_number, object_id); | ||
|
||
CREATE INDEX IF NOT EXISTS obj_info_pkg | ||
ON obj_info (package, cp_sequence_number, object_id); | ||
|
||
CREATE INDEX IF NOT EXISTS obj_info_mod | ||
ON obj_info (package, module, cp_sequence_number, object_id); | ||
|
||
CREATE INDEX IF NOT EXISTS obj_info_name | ||
ON obj_info (package, module, name, cp_sequence_number, object_id); | ||
|
||
CREATE INDEX IF NOT EXISTS obj_info_inst | ||
ON obj_info (package, module, name, instantiation, cp_sequence_number, object_id); | ||
|
||
CREATE INDEX IF NOT EXISTS obj_info_owner_pkg | ||
ON obj_info (owner_kind, owner_id, package, cp_sequence_number, object_id); | ||
|
||
CREATE INDEX IF NOT EXISTS obj_info_owner_mod | ||
ON obj_info (owner_kind, owner_id, package, module, cp_sequence_number, object_id); | ||
|
||
CREATE INDEX IF NOT EXISTS obj_info_owner_name | ||
ON obj_info (owner_kind, owner_id, package, module, name, cp_sequence_number, object_id); | ||
|
||
CREATE INDEX IF NOT EXISTS obj_info_owner_inst | ||
ON obj_info (owner_kind, owner_id, package, module, name, instantiation, cp_sequence_number, object_id); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
// Copyright (c) Mysten Labs, Inc. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
use std::{collections::BTreeMap, sync::Arc}; | ||
|
||
use anyhow::{anyhow, Result}; | ||
use diesel_async::RunQueryDsl; | ||
use sui_types::{base_types::ObjectID, full_checkpoint_content::CheckpointData, object::Owner}; | ||
|
||
use crate::{ | ||
db, | ||
models::objects::{StoredObjInfo, StoredOwnerKind}, | ||
pipeline::{concurrent::Handler, Processor}, | ||
schema::obj_info, | ||
}; | ||
|
||
pub struct ObjInfo; | ||
|
||
impl Processor for ObjInfo { | ||
const NAME: &'static str = "obj_info"; | ||
type Value = StoredObjInfo; | ||
|
||
fn process(&self, checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> { | ||
let cp_sequence_number = checkpoint.checkpoint_summary.sequence_number as i64; | ||
let checkpoint_input_objects = checkpoint.checkpoint_input_objects(); | ||
let latest_live_output_objects = checkpoint | ||
.latest_live_output_objects() | ||
.into_iter() | ||
.map(|o| (o.id(), o)) | ||
.collect::<BTreeMap<_, _>>(); | ||
let mut values: BTreeMap<ObjectID, Self::Value> = BTreeMap::new(); | ||
for object_id in checkpoint_input_objects.keys() { | ||
if !latest_live_output_objects.contains_key(object_id) { | ||
// If an input object is not in the latest live output objects, it must have been deleted | ||
// or wrapped in this checkpoint. We keep an entry for it in the table. | ||
// This is necessary when we query objects and iterating over them, so that we don't | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not following here, can you elaborate on the query that needs to use deleted / wrapped entries? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
All queries need to track deleted / wrapped objects, because they will start by finding candidates that meet the ownership/type criteria, and they will follow that up by checking whether there is some later version of the object that supersedes the candidate (in which case we discard it), and an object should be considered superseded if it has been deleted or wrapped.
We have this problem today with
With
|
||
// include the object in the result if it was deleted. | ||
values.insert( | ||
*object_id, | ||
StoredObjInfo { | ||
object_id: object_id.to_vec(), | ||
cp_sequence_number, | ||
owner_kind: None, | ||
owner_id: None, | ||
package: None, | ||
module: None, | ||
name: None, | ||
instantiation: None, | ||
}, | ||
); | ||
} | ||
} | ||
for (object_id, object) in latest_live_output_objects.iter() { | ||
// If an object is newly created/unwrapped in this checkpoint, or if the owner changed, | ||
// we need to insert an entry for it in the table. | ||
let should_insert = match checkpoint_input_objects.get(object_id) { | ||
Some(input_object) => input_object.owner() != object.owner(), | ||
None => true, | ||
}; | ||
if should_insert { | ||
let type_ = object.type_(); | ||
values.insert( | ||
*object_id, | ||
StoredObjInfo { | ||
object_id: object_id.to_vec(), | ||
cp_sequence_number, | ||
owner_kind: Some(match object.owner() { | ||
Owner::AddressOwner(_) => StoredOwnerKind::Address, | ||
Owner::ObjectOwner(_) => StoredOwnerKind::Object, | ||
Owner::Shared { .. } => StoredOwnerKind::Shared, | ||
Owner::Immutable => StoredOwnerKind::Immutable, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (will need to add the case for |
||
Owner::ConsensusV2 { .. } => todo!(), | ||
}), | ||
|
||
owner_id: match object.owner() { | ||
Owner::AddressOwner(a) => Some(a.to_vec()), | ||
Owner::ObjectOwner(o) => Some(o.to_vec()), | ||
Owner::Shared { .. } | Owner::Immutable { .. } => None, | ||
Owner::ConsensusV2 { .. } => todo!(), | ||
}, | ||
|
||
package: type_.map(|t| t.address().to_vec()), | ||
module: type_.map(|t| t.module().to_string()), | ||
name: type_.map(|t| t.name().to_string()), | ||
instantiation: type_ | ||
.map(|t| bcs::to_bytes(&t.type_params())) | ||
.transpose() | ||
.map_err(|e| { | ||
anyhow!( | ||
"Failed to serialize type parameters for {}: {e}", | ||
object.id().to_canonical_display(/* with_prefix */ true), | ||
) | ||
})?, | ||
}, | ||
); | ||
} | ||
} | ||
|
||
Ok(values.into_values().collect()) | ||
} | ||
} | ||
|
||
#[async_trait::async_trait] | ||
impl Handler for ObjInfo { | ||
async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>) -> Result<usize> { | ||
Ok(diesel::insert_into(obj_info::table) | ||
.values(values) | ||
.on_conflict_do_nothing() | ||
.execute(conn) | ||
.await?) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the primary key be the other way around, to support the unfiltered query efficiently?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have been thinking about this, and I think that it actually should be
(object_id, cp_sequence_number)
to support unfiltered query that is consistent with how we deal with filtering.So when we do filter on owner and types, we would filter down to a list of object id entries that match the filtering condition bounded by the view checkpoint, and join with another table where we find the max cp_sequence_number for each object ID. The second part above, where we find the max cp_sequence_number for each object ID, is where we need the index to be
(object_id, cp_sequence_number)
. This should not change when filtering is empty.