Skip to content

Commit

Permalink
Featuers/taint algo (Pometry#848)
Browse files Browse the repository at this point in the history
* init taint algo

* add vertex in read_vec_partitions

* impl generic taint convergence

* add test

* more tests

* more tests

* init taint notebook

* init taint notebook

* impl py test

* impl generic taint notebook

* impl load stablecoin rust

* add taint tracking

* fix lotr and modify generic taint apis

* impl loader for stablecoins

* revive this if need be

* simplify stable coin impl

* impl generic taint generic over string and u64

* impl stablecoin fetch and tests

* create default dir

* create default dir

* accept data dir as args

* create dirs always

* add err msg
  • Loading branch information
shivam-880 authored May 3, 2023
1 parent b52ae91 commit e212a34
Show file tree
Hide file tree
Showing 20 changed files with 1,062 additions and 284 deletions.
84 changes: 84 additions & 0 deletions examples/py/crypto/stable_coin_analysis.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "5e76e492-a0b9-4199-8c46-bb10d1b15db2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'5': [(6, 13, '2')], '1': [(0, 11, '1')], '2': [(2, 12, '1'), (2, 11, '1'), (0, 11, '2')], '4': [(5, 12, '2')]}Time taken to complete step 1 = 0\n",
"\n",
"Time taken to check convergence = 0\n",
"Completed 1 steps in 0 secs\n",
"Time taken to complete step 1 = 0\n",
"Time taken to check convergence = 0\n"
]
}
],
"source": [
"from raphtory import Graph\n",
"from raphtory import algorithms\n",
"\n",
"# actual data\n",
"g = Graph(1)\n",
"g.add_edge(10, 1, 3, {})\n",
"g.add_edge(11, 1, 2, {})\n",
"g.add_edge(12, 1, 2, {})\n",
"g.add_edge(9, 1, 2, {})\n",
"g.add_edge(12, 2, 4, {})\n",
"g.add_edge(13, 2, 5, {})\n",
"g.add_edge(14, 5, 5, {})\n",
"g.add_edge(14, 5, 4, {})\n",
"g.add_edge(5, 4, 6, {})\n",
"g.add_edge(15, 4, 7, {})\n",
"g.add_edge(10, 4, 7, {})\n",
"g.add_edge(10, 5, 8, {})\n",
"\n",
"actual = algorithms.generic_taint(g, 20, 11, [1, 2], [4, 5])\n",
"expected = {\n",
" '1': [(0, 11, '1')],\n",
" '2': [(2, 12, '1'), (2, 11, '1'), (0, 11, '2')],\n",
" '4': [(5, 12, '2')],\n",
" '5': [(6, 13, '2')],\n",
"}\n",
"\n",
"assert (actual == expected)\n",
"\n",
"print(actual)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "719e89be-da1f-4662-8055-1c1ee658116a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:pyraphtory] *",
"language": "python",
"name": "conda-env-pyraphtory-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
3 changes: 3 additions & 0 deletions examples/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ name = "healthcheck"
[[bin]]
name = "hulongbay"

[[bin]]
name = "crypto"

[target.x86_64-unknown-linux-gnu]
linker = "/usr/bin/clang"
rustflags = ["-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment"]
Expand Down
54 changes: 54 additions & 0 deletions examples/rust/src/bin/crypto/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
use std::env;
use std::path::Path;
use raphtory::algorithms::generic_taint::generic_taint;
use raphtory::db::view_api::*;
use raphtory::graph_loader::example::stable_coins::stable_coin_graph;
use serde::Deserialize;
use std::time::Instant;
use raphtory::algorithms::pagerank::unweighted_page_rank;

#[derive(Deserialize, std::fmt::Debug)]
pub struct StableCoin {
block_number: String,
transaction_index: u32,
from_address: String,
to_address: String,
time_stamp: i64,
contract_address: String,
value: f64,
}

fn main() {
let args: Vec<String> = env::args().collect();

let data_dir = if args.len() < 2 {
None
} else {
Some(args.get(1).unwrap().to_string())
};

let g = stable_coin_graph(data_dir, 1);

assert_eq!(g.num_vertices(), 1523333);
assert_eq!(g.num_edges(), 2871269);

println!("Pagerank");
let now = Instant::now();
let _ = unweighted_page_rank(&g, i64::MIN..i64::MAX, 20);
println!("Time taken: {} secs", now.elapsed().as_secs());

let now = Instant::now();
let _ = unweighted_page_rank(&g.layer("0xdac17f958d2ee523a2206206994597c13d831ec7").unwrap(), i64::MIN..i64::MAX, 20);
println!("Time taken: {} secs", now.elapsed().as_secs());

println!("Generic taint");
let now = Instant::now();
let _ = generic_taint(
&g.layer("0xdac17f958d2ee523a2206206994597c13d831ec7").unwrap(),
20,
1651105815,
vec!["0xd30b438df65f4f788563b2b3611bd6059bff4ad9"],
vec![],
);
println!("Time taken: {}", now.elapsed().as_secs());
}
183 changes: 29 additions & 154 deletions examples/rust/src/bin/lotr/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ use raphtory::graph_loader::source::csv_loader::CsvLoader;
use serde::Deserialize;
use std::path::PathBuf;
use std::{env, path::Path, time::Instant};
use itertools::Itertools;
use raphtory::algorithms::generic_taint::generic_taint;
use raphtory::db::view_api::internal::GraphViewInternalOps;
use raphtory::graph_loader::example::lotr_graph::lotr_graph;

#[derive(Deserialize, std::fmt::Debug)]
pub struct Lotr {
Expand Down Expand Up @@ -55,30 +59,32 @@ fn main() {

CsvLoader::new(data_dir)
.load_into_graph(&g, |lotr: Lotr, g: &Graph| {
let src_id = utils::calculate_hash(&lotr.src_id);
let dst_id = utils::calculate_hash(&lotr.dst_id);
let time = lotr.time;

g.add_vertex(
time,
src_id,
&vec![("name".to_string(), Prop::Str("Character".to_string()))],
);
lotr.time,
lotr.src_id.clone(),
&vec![
("type".to_string(), Prop::Str("Character".to_string()))
],
).expect("Failed to add vertex");

g.add_vertex(
time,
src_id,
&vec![("name".to_string(), Prop::Str("Character".to_string()))],
);
lotr.time,
lotr.dst_id.clone(),
&vec![
("type".to_string(), Prop::Str("Character".to_string()))
],
).expect("Failed to add vertex");

g.add_edge(
time,
src_id,
dst_id,
lotr.time,
lotr.src_id.clone(),
lotr.dst_id.clone(),
&vec![(
"name".to_string(),
"type".to_string(),
Prop::Str("Character Co-occurrence".to_string()),
)],
None,
);
).expect("Failed to add edge");
})
.expect("Failed to load graph from CSV data files");

Expand All @@ -90,8 +96,8 @@ fn main() {
now.elapsed().as_secs()
);

// g.save_to_file(encoded_data_dir)
// .expect("Failed to save graph");
g.save_to_file(encoded_data_dir)
.expect("Failed to save graph");

g
};
Expand All @@ -101,141 +107,10 @@ fn main() {

let gandalf = utils::calculate_hash(&"Gandalf");

assert_eq!(gandalf, 8703678510860200260);
assert_eq!(gandalf, 2760374808085341115);
assert!(graph.has_vertex(gandalf));
assert_eq!(graph.vertex(gandalf).unwrap().name(), "Gandalf");

let program_s1 = TriangleCountS1 {};
let program_s2 = TriangleCountS2 {};
let agg = state::def::sum::<u64>(1);

let mut gs = GlobalEvalState::new(graph.clone(), false);

program_s1.run_step(&graph, &mut gs);

program_s2.run_step(&graph, &mut gs);

let actual_tri_count = gs.read_global_state(&agg);

println!("Actual triangle count: {:?}", actual_tri_count);

let program = TriangleCountSlowS2 {};
let agg = state::def::sum::<usize>(0);

let mut gs = GlobalEvalState::new(graph.clone(), false);

program.run_step(&graph, &mut gs);

let actual_tri_count = gs.read_global_state(&agg).map(|v| v / 3);

println!("Actual triangle count: {:?}", actual_tri_count);

// assert_eq!(v.in_degree().unwrap(), 24);
// assert_eq!(v.out_degree().unwrap(), 35);
// assert_eq!(v.degree().unwrap(), 49);
//
// let windowed_graph = graph.window(0, i64::MAX);
// let v = windowed_graph.vertex(gandalf).unwrap().unwrap();
//
// assert_eq!(v.in_degree().unwrap(), 24);
// assert_eq!(v.out_degree().unwrap(), 35);
// assert_eq!(v.degree().unwrap(), 49);
//
// let windowed_graph = graph.window(100, 9000);
// let v = windowed_graph.vertex(gandalf).unwrap().unwrap();

// let windowed_graph = graph.window(100, 9000);
// let v = windowed_graph.vertex(gandalf).unwrap();

// let actual = v
// .out_edges()
// .map(|e| (e.src().id(), e.dst().id()))
// .collect::<Vec<_>>();

// let expected = vec![
// (13840129630991083248, 6768237561757024290),
// (13840129630991083248, 2582862946330553552),
// (13840129630991083248, 13415634039873497660),
// (13840129630991083248, 357812470600089148),
// (13840129630991083248, 17764752901005380738),
// (13840129630991083248, 6484040860173734298),
// (0, 2914346725110218071),
// (0, 5956895584314169235),
// (0, 12936471037316398897),
// (0, 13050559475682228465),
// (0, 13789593425373656861),
// (0, 14223985880962197705),
// ];

// let windowed_graph = graph.window(i64::MIN, i64::MAX);
// let v = windowed_graph.vertex(gandalf).unwrap().unwrap();
// let actual = v
// .out_edges()
// .take(10)
// .map(|e| (e.src().id(), e.dst().id()))
// .collect::<Vec<_>>();

// let windowed_graph = graph.window(i64::MIN, i64::MAX);
// let v = windowed_graph.vertex(gandalf).unwrap();
// let actual = v
// .out_edges()
// .take(10)
// .map(|e| (e.src().id(), e.dst().id()))
// .collect::<Vec<_>>();

// let expected: Vec<(u64, u64)> = vec![
// (13840129630991083248, 12772980705568717046),
// (13840129630991083248, 6768237561757024290),
// (13840129630991083248, 11214194356141027632),
// (13840129630991083248, 2582862946330553552),
// (13840129630991083248, 13415634039873497660),
// (13840129630991083248, 6514938325906662882),
// (13840129630991083248, 13854913496482509346),
// (13840129630991083248, 357812470600089148),
// (13840129630991083248, 17764752901005380738),
// (13840129630991083248, 15044750458947305290),
// ];

// assert_eq!(actual, expected);

// let windowed_graph = graph.window(i64::MIN, i64::MAX);
// let actual = windowed_graph
// .vertices()
// .take(10)
// .map(|tv| tv.id())
// .collect::<Vec<u64>>();

// let expected: Vec<u64> = vec![
// 13840129630991083248,
// 12772980705568717046,
// 8366058037510783370,
// 11638942476191275730,
// 6768237561757024290,
// 13652678879212650868,
// 10620258110842154986,
// 12687378031997996522,
// 11214194356141027632,
// 2582862946330553552,
// ];

// assert_eq!(actual, expected);

// let windowed_graph = graph.window(0, 300);
// let actual = windowed_graph
// .vertices()
// .map(|v| v.id())
// .collect::<Vec<u64>>();

// let expected = vec![
// 13840129630991083248,
// 12772980705568717046,
// 8366058037510783370,
// 11638942476191275730,
// 12936471037316398897,
// 5956895584314169235,
// 5402476312775412883,
// 7320164159843417887,
// ];
// assert_eq!(actual, expected);

// triangle count global
let r = generic_taint(&graph, 20, 31930, vec!["Gandalf"], vec![]);
assert_eq!(r.keys().sorted().collect_vec(), vec!["Gandalf", "Saruman", "Wormtongue"])
}
Loading

0 comments on commit e212a34

Please sign in to comment.