forked from sinaptik-ai/pandas-ai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdf_validator.py
126 lines (98 loc) · 3.17 KB
/
df_validator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from typing import Dict, List
from pandasai.helpers.df_info import DataFrameType, df_type
from pandasai.pydantic import BaseModel, ValidationError
class DfValidationResult:
"""
Validation results for a dataframe.
Attributes:
passed: Whether the validation passed or not.
errors: List of errors if the validation failed.
"""
_passed: bool
_errors: List[Dict]
def __init__(self, passed: bool = True, errors: List[Dict] = None):
"""
Args:
passed: Whether the validation passed or not.
errors: List of errors if the validation failed.
"""
if errors is None:
errors = []
self._passed = passed
self._errors = errors
@property
def passed(self):
return self._passed
def errors(self) -> List[Dict]:
return self._errors
def add_error(self, error_message: str):
"""
Add an error message to the validation results.
Args:
error_message: Error message to add.
"""
self._passed = False
self._errors.append(error_message)
def __bool__(self) -> bool:
"""
Define the truthiness of ValidationResults.
"""
return self.passed
class DfValidator:
"""
Validate a dataframe using a Pydantic schema.
Attributes:
df: dataframe to be validated
"""
_df: DataFrameType
def __init__(self, df: DataFrameType):
"""
Args:
df: dataframe to be validated
"""
self._df = df
def _validate_batch(self, schema, df_json: List[Dict]):
"""
Args:
schema: Pydantic schema
batch_df: dataframe batch
Returns:
list of errors
"""
try:
# Create a Pydantic Validator to validate rows of dataframe
class PdVal(BaseModel):
df: List[schema]
PdVal(df=df_json)
return []
except ValidationError as e:
return e.errors()
def _df_to_list_of_dict(self, df: DataFrameType, dataframe_type: str) -> List[Dict]:
"""
Create list of dict of dataframe rows on basis of dataframe type
Supports only polars and pandas dataframe
Args:
df: dataframe to be converted
dataframe_type: type of dataframe
Returns:
list of dict of dataframe rows
"""
if dataframe_type in {"pandas", "modin"}:
return df.to_dict(orient="records")
elif dataframe_type == "polars":
return df.to_dicts()
else:
return []
def validate(self, schema: BaseModel) -> DfValidationResult:
"""
Args:
schema: Pydantic schema to be validated for the dataframe row
Returns:
Validation results
"""
dataframe_type = df_type(self._df)
if dataframe_type is None:
raise ValueError("Unsupported DataFrame")
df_json: List[Dict] = self._df_to_list_of_dict(self._df, dataframe_type)
errors = self._validate_batch(schema, df_json)
return DfValidationResult(len(errors) == 0, errors)