Skip to content

Commit

Permalink
DEV-1387 / Timestamps and date ranges (Pometry#155)
Browse files Browse the repository at this point in the history
* initial implementation for timestamps and date ranges

* simplify time API

* port changes to python API

* fix tests

* add missing tests

* fix python tests

* refactor rust test in graph.rs

* fix notebook notebook.ipynb

* fix notebook demo.ipynb

* rename WindowIterator to WindowSet

* clean up core/time.rs

* add test for timestamp parsing at ingestion

* execute notebooks

* fix history tests

* refactor random_attachment.rs

* change back kernelspec names

* remove installation output from companies_house_example.ipynb

* address comments
  • Loading branch information
ricopinazo authored Apr 21, 2023
1 parent 344b395 commit 25240b7
Show file tree
Hide file tree
Showing 21 changed files with 726 additions and 676 deletions.
4 changes: 2 additions & 2 deletions docbrown/src/algorithms/triangle_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pub fn local_triangle_count<G: GraphViewOps>(graph: &G, v: u64) -> Result<usize,
let vertex = graph.vertex(v).unwrap();

let count = if vertex.degree() >= 2 {
let r: Result<Vec<_>, _> = vertex
let r: Result<Vec<_>, GraphError> = vertex
.neighbours()
.id()
.into_iter()
Expand All @@ -35,7 +35,7 @@ pub fn local_triangle_count<G: GraphViewOps>(graph: &G, v: u64) -> Result<usize,
}

pub fn global_triangle_count<G: GraphViewOps>(graph: &G) -> Result<usize, GraphError> {
let r: Result<Vec<_>, _> = graph
let r: Result<Vec<_>, GraphError> = graph
.vertices()
.into_iter()
.par_bridge()
Expand Down
1 change: 1 addition & 0 deletions docbrown/src/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub mod tadjset;
mod tcell;
pub mod tgraph;
pub mod tgraph_shard;
pub mod time;
mod tprop;
pub mod utils;
pub mod vertex;
Expand Down
2 changes: 1 addition & 1 deletion docbrown/src/core/tgraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ impl TemporalGraph {
}) // TODO: use the name here if exists
}

// TODO: remove this ???
// TODO: remove this??? it's only used for tests, we can use the other one instead
pub fn add_edge<T: InputVertex>(&mut self, t: i64, src: T, dst: T, layer: usize) {
self.add_edge_with_props(t, src, dst, &vec![], layer)
}
Expand Down
6 changes: 6 additions & 0 deletions docbrown/src/core/tgraph_shard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ mod lock {

pub mod errors {
use crate::core::tgraph::errors::MutateGraphError;
use crate::core::time::error::ParseTimeError;

#[derive(thiserror::Error, Debug, PartialEq)]
pub enum GraphError {
Expand All @@ -89,6 +90,11 @@ pub mod errors {
IncorrectPropertyType,
#[error("Failed to mutate graph")]
FailedToMutateGraph { source: MutateGraphError },
#[error("Failed to parse time strings")]
ParseTime {
#[from]
source: ParseTimeError,
},
}
}

Expand Down
251 changes: 251 additions & 0 deletions docbrown/src/core/time.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
use chrono::{DateTime, Duration, NaiveDateTime};
use itertools::{Either, Itertools};
use regex::Regex;

use crate::core::time::error::*;
use std::ops::{Add, Sub};

pub mod error {
use chrono::ParseError;
use std::num::ParseIntError;

#[derive(thiserror::Error, Debug, Clone, PartialEq)]
pub enum ParseTimeError {
#[error("the interval string doesn't contain a complete number of number-unit pairs")]
InvalidPairs,
#[error(
"one of the tokens in the interval string supposed to be a number couldn't be parsed"
)]
ParseInt {
#[from]
source: ParseIntError,
},
#[error("'{0}' is not a valid unit")]
InvalidUnit(String),
#[error(transparent)]
ParseError(#[from] ParseError),
}
}

pub trait IntoTime {
fn into_time(&self) -> Result<i64, ParseTimeError>;
}

impl IntoTime for i64 {
fn into_time(&self) -> Result<i64, ParseTimeError> {
Ok(*self)
}
}

impl IntoTime for &str {
/// Tries to parse the timestamp as RFC3339 and then as ISO 8601 with local format and all
/// fields mandatory except for milliseconds and allows replacing the T with a space
fn into_time(&self) -> Result<i64, ParseTimeError> {
let rfc_result = DateTime::parse_from_rfc3339(self);
if let Ok(datetime) = rfc_result {
return Ok(datetime.timestamp_millis());
}

let result = NaiveDateTime::parse_from_str(self, "%Y-%m-%dT%H:%M:%S%.3f");
if let Ok(datetime) = result {
return Ok(datetime.timestamp_millis());
}

let result = NaiveDateTime::parse_from_str(self, "%Y-%m-%dT%H:%M:%S%");
if let Ok(datetime) = result {
return Ok(datetime.timestamp_millis());
}

let result = NaiveDateTime::parse_from_str(self, "%Y-%m-%d %H:%M:%S%.3f");
if let Ok(datetime) = result {
return Ok(datetime.timestamp_millis());
}

let result = NaiveDateTime::parse_from_str(self, "%Y-%m-%d %H:%M:%S%");
if let Ok(datetime) = result {
return Ok(datetime.timestamp_millis());
}

Err(rfc_result.unwrap_err().into())
}
}

pub(crate) trait IntoTimeWithFormat {
fn parse_time(&self, fmt: &str) -> Result<i64, ParseTimeError>;
}

impl IntoTimeWithFormat for &str {
fn parse_time(&self, fmt: &str) -> Result<i64, ParseTimeError> {
Ok(NaiveDateTime::parse_from_str(self, fmt)?.timestamp_millis())
}
}

#[derive(Clone, Copy, Debug, PartialEq)]
pub(crate) enum IntervalSize {
Discrete(u64),
Temporal(Duration),
// Calendar(Duration, Months, Years), // TODO
}

#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Interval {
pub(crate) epoch_alignment: bool,
size: IntervalSize,
}

impl Default for Interval {
fn default() -> Self {
Self {
epoch_alignment: false,
size: IntervalSize::Discrete(1),
}
}
}

impl TryFrom<&str> for Interval {
type Error = ParseTimeError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
let trimmed = value.trim();
let no_and = trimmed.replace("and", "");
let cleaned = {
let re = Regex::new(r"[\s&,]+").unwrap();
re.replace_all(&no_and, " ")
};

let tokens = cleaned.split(" ").collect_vec();

if tokens.len() < 2 || tokens.len() % 2 != 0 {
return Err(ParseTimeError::InvalidPairs);
}

let (durations, errors): (Vec<Duration>, Vec<ParseTimeError>) = tokens
.chunks(2)
.map(|chunk| Self::parse_duration(chunk[0], chunk[1]))
.partition_map(|d| match d {
Ok(d) => Either::Left(d),
Err(e) => Either::Right(e),
});

if errors.is_empty() {
Ok(Self {
epoch_alignment: true,
size: IntervalSize::Temporal(durations.into_iter().reduce(|a, b| a + b).unwrap()),
})
} else {
Err(errors.get(0).unwrap().clone())
}
}
}

impl TryFrom<u64> for Interval {
type Error = ParseTimeError;
fn try_from(value: u64) -> Result<Self, Self::Error> {
Ok(Self {
epoch_alignment: false,
size: IntervalSize::Discrete(value),
})
}
}

impl Interval {
/// Return an option because there might be no exact translation to millis for some intervals
pub(crate) fn to_millis(&self) -> Option<u64> {
match self.size {
IntervalSize::Discrete(millis) => Some(millis),
IntervalSize::Temporal(duration) => Some(duration.num_milliseconds() as u64),
}
}

fn parse_duration(number: &str, unit: &str) -> Result<Duration, ParseTimeError> {
let number: i64 = number.parse::<u64>()? as i64;
let duration = match unit {
"week" | "weeks" => Duration::weeks(number),
"day" | "days" => Duration::days(number),
"hour" | "hours" => Duration::hours(number),
"minute" | "minutes" => Duration::minutes(number),
"second" | "seconds" => Duration::seconds(number),
"millisecond" | "milliseconds" => Duration::milliseconds(number),
unit => return Err(ParseTimeError::InvalidUnit(unit.to_string())),
};
Ok(duration)
}
}

impl Sub<Interval> for i64 {
type Output = i64;
fn sub(self, rhs: Interval) -> Self::Output {
match rhs.size {
IntervalSize::Discrete(number) => self - (number as i64),
IntervalSize::Temporal(duration) => self - duration.num_milliseconds(),
}
}
}

impl Add<Interval> for i64 {
type Output = i64;
fn add(self, rhs: Interval) -> Self::Output {
match rhs.size {
IntervalSize::Discrete(number) => self + (number as i64),
IntervalSize::Temporal(duration) => self + duration.num_milliseconds(),
}
}
}

#[cfg(test)]
mod time_tests {
use crate::core::time::{Interval, ParseTimeError};
use std::num::ParseIntError;
#[test]
fn interval_parsing() {
let second: u64 = 1000;
let minute = 60 * second;
let hour = 60 * minute;
let day = 24 * hour;
let week = 7 * day;

let interval: Interval = "1 day".try_into().unwrap();
assert_eq!(interval.to_millis().unwrap(), day);

let interval: Interval = "1 week".try_into().unwrap();
assert_eq!(interval.to_millis().unwrap(), week);

let interval: Interval = "4 weeks and 1 day".try_into().unwrap();
assert_eq!(interval.to_millis().unwrap(), 4 * week + day);

let interval: Interval = "2 days & 1 millisecond".try_into().unwrap();
assert_eq!(interval.to_millis().unwrap(), 2 * day + 1);

let interval: Interval = "2 days, 1 hour, and 2 minutes".try_into().unwrap();
assert_eq!(interval.to_millis().unwrap(), 2 * day + hour + 2 * minute);

let interval: Interval = "1 weeks , 1 minute".try_into().unwrap();
assert_eq!(interval.to_millis().unwrap(), week + minute);

let interval: Interval = "23 seconds and 34 millisecond and 1 minute"
.try_into()
.unwrap();
assert_eq!(interval.to_millis().unwrap(), 23 * second + 34 + minute);
}

#[test]
fn invalid_intervals() {
let result: Result<Interval, ParseTimeError> = "".try_into();
assert_eq!(result, Err(ParseTimeError::InvalidPairs));

let result: Result<Interval, ParseTimeError> = "1".try_into();
assert_eq!(result, Err(ParseTimeError::InvalidPairs));

let result: Result<Interval, ParseTimeError> = "1 day and 5".try_into();
assert_eq!(result, Err(ParseTimeError::InvalidPairs));

let result: Result<Interval, ParseTimeError> = "1 daay".try_into();
assert_eq!(result, Err(ParseTimeError::InvalidUnit("daay".to_string())));

let result: Result<Interval, ParseTimeError> = "day 1".try_into();

match result {
Err(ParseTimeError::ParseInt { .. }) => (),
_ => panic!(),
}
}
}
Loading

0 comments on commit 25240b7

Please sign in to comment.