forked from mrgnlabs/marginfi-v2
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Man0s/event pipeline 2 (mrgnlabs#137)
* feat(obs): write to BQ * feat(obs): setup flex template * chore(obs): structure for multiple job definitions * chore(obs): update gitignore * feat(obs): working batch tx process job * feat(obs): modularize for multiple jobs * feat(obs): create tx parsing stream job & add missing events to event generator script * fix(obs): parse JSON stringify data from pub/sub into same format as fed from BQ * feat(obs): containerize indexer bot * feat(obs): avoid reading file for each item for versioned IDL * fix(obs): allow seamless SA auth in indexer bots in GKE * feat(obs): use latest triton geyser protos * fix(obs): silently discard parsing transactions from non-supported programs * feat(obs): WIP account ETL * feat(obs): namespace IDLs by program * feat(obs): working account parsing * feat(obs): ORM for accounts * feat(obs): stream job for accounts ETL * fix(obs): wrong field parsing * feat(obs): setup logging for GCP * feat(obs): downgrade log severities * feat(obs): update BQ schema * feat(obs): add mainnet IDL * fix(obs): add missing pubkey in parsed accounts * feat(obs): ETL mainnet config + snapshot bot WIP * feat(obs): more metrics + lint * feat(obs): working snapshot bot * fix(obs): snapshot bugs * chore: bring indexer in main workspace and fix version conflicts * feat(obs): account snapshot improvements * feat(obs): add bank and mint to parsed liquidity change event * fix(obs): wrong IDL structs padding for ix/event parsing * feat: switchboard oracle support * fix: break down account metrics upload * fix: pin solana deps and update base image rust version for indexer image * chore: sync cli * chore: sync cli * chore: sync fuzz * chore: remove obsolete arg * fix: breaking solana dep update + wrapper struct * chore: format and pin dep * chore: format * chore: remove unused file --------- Co-authored-by: Jakob Povšič <[email protected]>
- Loading branch information
Showing
76 changed files
with
10,309 additions
and
6,151 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Ignore everything except for Python files and the requirements file. | ||
* | ||
!setup.py | ||
!MANIFEST.in | ||
!dataflow_etls/ | ||
!jobs/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Ignore everything except for Python files and the requirements file. | ||
* | ||
!setup.py | ||
!MANIFEST.in | ||
!Dockerfile | ||
!dataflow_etls/ | ||
!jobs/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
__pycache__ | ||
.mypy_cache | ||
.venv | ||
poetry.lock | ||
local_file* | ||
parsed_event_* | ||
.idea* | ||
beam-temp-* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,24 @@ | ||
FROM gcr.io/dataflow-templates-base/python3-template-launcher-base | ||
FROM gcr.io/dataflow-templates-base/python39-template-launcher-base | ||
|
||
ARG JOB_DIRECTORY | ||
ARG WORKDIR=/dataflow/template | ||
RUN mkdir -p ${WORKDIR} | ||
WORKDIR ${WORKDIR} | ||
|
||
COPY requirements.txt . | ||
COPY job.py . | ||
COPY idls . | ||
COPY dataflow_etls . | ||
COPY setup.py . | ||
COPY MANIFEST.in . | ||
|
||
ENV FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE="/${WORKDIR}/requirements.txt" | ||
ENV FLEX_TEMPLATE_PYTHON_PY_FILE="/${WORKDIR}/job.py" | ||
ENV FLEX_TEMPLATE_PYTHON_SETUP_FILE="/${WORKDIR}/setup.py" | ||
|
||
# We could get rid of installing libffi-dev and git, or we could leave them. | ||
RUN apt-get update \ | ||
&& apt-get install -y libffi-dev git \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
# Upgrade pip and install the requirements. | ||
&& pip install --no-cache-dir --upgrade pip \ | ||
&& pip install -U apache-beam==2.44.0 \ | ||
&& pip install --no-cache-dir -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE \ | ||
# Upgrade pip and install the requirements. | ||
RUN pip install --no-cache-dir --upgrade pip \ | ||
&& pip install --no-cache-dir . \ | ||
# Download the requirements to speed up launching the Dataflow job. | ||
&& pip download --no-cache-dir --dest /tmp/dataflow-etls-requirements-cache -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE | ||
&& pip download --no-cache-dir --dest /tmp/dataflow-etls-requirements-cache . | ||
|
||
COPY dataflow_etls/ dataflow_etls/ | ||
COPY ${JOB_DIRECTORY}/job.py . | ||
|
||
# Since we already downloaded all the dependencies, there's no need to rebuild everything. | ||
ENV PIP_NO_DEPS=True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
include dataflow_etls/idls/**/*.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
observability/etl/dataflow-etls/dataflow_etls/account_parsing.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import base64 | ||
from dataclasses import asdict | ||
from datetime import datetime | ||
from typing import List, TypedDict, Dict, Any, Tuple, Generator | ||
from decimal import Decimal | ||
|
||
from anchorpy_core.idl import Idl | ||
from based58 import based58 # type: ignore | ||
from solders.pubkey import Pubkey | ||
import apache_beam as beam # type: ignore | ||
from anchorpy.program.common import NamedInstruction as NamedAccountData | ||
|
||
from dataflow_etls.idl_versions import VersionedProgram, IdlPool, Cluster | ||
from dataflow_etls.orm.accounts import ACCOUNT_UPDATE_TO_RECORD_TYPE, AccountUpdateRecord | ||
|
||
AccountUpdateRaw = TypedDict('AccountUpdateRaw', { | ||
'id': str, | ||
'created_at': datetime, | ||
'timestamp': datetime, | ||
'owner': str, | ||
'slot': Decimal, | ||
'pubkey': str, | ||
'txn_signature': str, | ||
'lamports': Decimal, | ||
'executable': bool, | ||
'rent_epoch': Decimal, | ||
'data': str, | ||
}) | ||
|
||
|
||
class OwnerProgramNotSupported(Exception): | ||
pass | ||
|
||
|
||
def parse_account(account_update: AccountUpdateRaw, min_idl_version: int, cluster: Cluster, | ||
idl_pool: IdlPool) -> List[AccountUpdateRecord]: | ||
owner_program_id_str = account_update["owner"] | ||
owner_program_id = Pubkey.from_string(owner_program_id_str) | ||
account_update_slot = int(account_update["slot"]) | ||
|
||
try: | ||
idl_raw, idl_version = idl_pool.get_idl_for_slot(owner_program_id_str, account_update_slot) | ||
except KeyError: | ||
raise OwnerProgramNotSupported(f"Unsupported program {owner_program_id_str}") | ||
|
||
idl = Idl.from_json(idl_raw) | ||
program = VersionedProgram(cluster, idl_version, idl, owner_program_id) | ||
|
||
if idl_version < min_idl_version: | ||
return [] | ||
|
||
account_data_bytes = base64.b64decode(account_update["data"]) | ||
|
||
try: | ||
parsed_account_data: NamedAccountData = program.coder.accounts.parse(account_data_bytes) | ||
except Exception as e: | ||
print(f"failed to parse account data in update {account_update['id']}", e) | ||
return [] | ||
|
||
if parsed_account_data.name not in ACCOUNT_UPDATE_TO_RECORD_TYPE: | ||
print(f"discarding unsupported account type {parsed_account_data.name} in update {account_update['id']}") | ||
return [] | ||
else: | ||
# noinspection PyPep8Naming | ||
AccountUpdateRecordType = ACCOUNT_UPDATE_TO_RECORD_TYPE[parsed_account_data.name] | ||
return [AccountUpdateRecordType(parsed_account_data, account_update, idl_version)] | ||
|
||
|
||
class DispatchEventsDoFn(beam.DoFn): # type: ignore | ||
def process(self, record: AccountUpdateRecord, *args: Tuple[Any], **kwargs: Dict[str, Tuple[Any]]) -> Generator[ | ||
str, None, None]: | ||
yield beam.pvalue.TaggedOutput(record.get_tag(), record) | ||
|
||
|
||
def dictionify_record(record: AccountUpdateRecord) -> Dict[str, Any]: | ||
return asdict(record) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.