Skip to content

Commit

Permalink
fix: datetime.data in series (apache#20618)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaoyongjie authored Jul 7, 2022
1 parent c992ff3 commit 414cc99
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 7 deletions.
10 changes: 4 additions & 6 deletions superset/common/query_context_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from superset.common.chart_data import ChartDataResultFormat
from superset.common.db_query_status import QueryStatus
from superset.common.query_actions import get_query_results
from superset.common.utils import dataframe_utils as df_utils
from superset.common.utils import dataframe_utils
from superset.common.utils.query_cache_manager import QueryCacheManager
from superset.connectors.base.models import BaseDatasource
from superset.constants import CacheRegion
Expand Down Expand Up @@ -231,7 +231,7 @@ def normalize_df(self, df: pd.DataFrame, query_object: QueryObject) -> pd.DataFr
)

if self.enforce_numerical_metrics:
df_utils.df_metrics_to_num(df, query_object)
dataframe_utils.df_metrics_to_num(df, query_object)

df.replace([np.inf, -np.inf], np.nan, inplace=True)

Expand Down Expand Up @@ -322,9 +322,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals
# multi-dimensional charts
granularity = query_object.granularity
index = granularity if granularity in df.columns else DTTM_ALIAS
if not pd.api.types.is_datetime64_any_dtype(
offset_metrics_df.get(index)
):
if not dataframe_utils.is_datetime_series(offset_metrics_df.get(index)):
raise QueryObjectValidationError(
_(
"A time column must be specified "
Expand All @@ -337,7 +335,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals
)

# df left join `offset_metrics_df`
offset_df = df_utils.left_join_df(
offset_df = dataframe_utils.left_join_df(
left_df=df,
right_df=offset_metrics_df,
join_keys=join_keys,
Expand Down
15 changes: 14 additions & 1 deletion superset/common/utils/dataframe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
# under the License.
from __future__ import annotations

from typing import List, TYPE_CHECKING
import datetime
from typing import Any, List, TYPE_CHECKING

import numpy as np
import pandas as pd
Expand All @@ -42,3 +43,15 @@ def df_metrics_to_num(df: pd.DataFrame, query_object: QueryObject) -> None:
# soft-convert a metric column to numeric
# will stay as strings if conversion fails
df[col] = df[col].infer_objects()


def is_datetime_series(series: Any) -> bool:
if series is None or not isinstance(series, pd.Series):
return False

if series.isnull().all():
return False

return pd.api.types.is_datetime64_any_dtype(series) or (
series.apply(lambda x: isinstance(x, datetime.date) or x is None).all()
)
50 changes: 50 additions & 0 deletions tests/unit_tests/common/test_dataframe_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import datetime

import pandas as pd

from superset.common.utils import dataframe_utils


def test_is_datetime_series():
assert not dataframe_utils.is_datetime_series(None)
assert not dataframe_utils.is_datetime_series(pd.DataFrame({"foo": [1]}))
assert not dataframe_utils.is_datetime_series(pd.Series([1, 2, 3]))
assert not dataframe_utils.is_datetime_series(pd.Series(["1", "2", "3"]))
assert not dataframe_utils.is_datetime_series(pd.Series())
assert not dataframe_utils.is_datetime_series(pd.Series([None, None]))
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2), None])
)
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2)])
)
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2), None])
)
assert dataframe_utils.is_datetime_series(
pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2)])
)
assert dataframe_utils.is_datetime_series(
pd.date_range(datetime.date(2018, 1, 1), datetime.date(2018, 2, 1)).to_series()
)
assert dataframe_utils.is_datetime_series(
pd.date_range(
datetime.datetime(2018, 1, 1), datetime.datetime(2018, 2, 1)
).to_series()
)

0 comments on commit 414cc99

Please sign in to comment.