Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: Upgrade sqlalchemy #5

Merged
merged 40 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
3cf8de7
migrate workflows
CrossNox Oct 6, 2022
8c2cdc4
fix error?
CrossNox Oct 6, 2022
3c88ea6
(wip) chore: fix actions syntax errors, multiline scripts, env vars
JDSanto Oct 17, 2022
e380a90
(wip) chore: specify python container
JDSanto Oct 18, 2022
0544300
(wip) chore: specify poetry version, python container for version.yml
JDSanto Oct 18, 2022
3eef593
(wip) chore: remove container in favor of setup-python
JDSanto Oct 18, 2022
52e8bc3
debug(CI): sample workflow
MartinCura Oct 18, 2022
6e3cf16
test action runs on
gvillafanetapia Oct 26, 2022
5e44e3a
Update .github/workflows/test.yml
MartinCura Oct 26, 2022
a496da5
Update .github/workflows/test.yml
MartinCura Oct 26, 2022
a1c9dca
(wip) chore: add container image for run_tests
JDSanto Nov 1, 2022
8d4c42b
(wip) chore: remove sudo for run_tests
JDSanto Nov 1, 2022
1829004
(wip) chore: add container image for all workflows
JDSanto Nov 1, 2022
fc32a68
(wip) fix: add MIN_LINE_RATE var
JDSanto Nov 3, 2022
08bad96
(wip) fix: replace gitlab vars with github vars
JDSanto Nov 3, 2022
10fb8e7
(wip) chore: delete sample workflow
JDSanto Nov 3, 2022
9b6b526
(wip) add echo for testing purposes
JDSanto Nov 3, 2022
2f6874a
(wip) set id for tag output
JDSanto Nov 3, 2022
db448bf
(wip) bump version
JDSanto Nov 3, 2022
7da6ec8
add libraries for sqlalchemy 1.4
leo-mansini Dec 29, 2022
43a7d42
rename numpy object dtype
leo-mansini Dec 29, 2022
7907c18
change ibis import path
leo-mansini Dec 29, 2022
7496753
support immutable sqlalchemy url
leo-mansini Dec 29, 2022
2b88c4d
Merge remote-tracking branch 'origin/migration-from-gitlab' into upgr…
leo-mansini Dec 29, 2022
d761b8b
add implicit optionals
leo-mansini Dec 29, 2022
68b99f6
upgrade ipython
leo-mansini Jan 4, 2023
8db59e4
ipython version was 8.0.1
leo-mansini Jan 4, 2023
4b6b4ea
Revert "ipython version was 8.0.1"
leo-mansini Jan 4, 2023
0bd37be
Revert "upgrade ipython"
leo-mansini Jan 4, 2023
37a0cc4
upgrade ipython
leo-mansini Jan 4, 2023
95d2a30
Remove outdated wrapt
leo-mansini Jan 23, 2023
6a1782d
allow sqlalchemy ver 1.3
leo-mansini Mar 27, 2023
b45e52a
avoid snowflake-sqlalchemy 1.2.5
leo-mansini Mar 27, 2023
9121b11
isort
leo-mansini Mar 27, 2023
375728a
Merge branch 'master' into upgrade-sqlalchemy
leo-mansini Jun 9, 2023
11f76fb
bump pyproject.toml version
leo-mansini Jun 9, 2023
da078a2
nosec on read_pickle
leo-mansini Sep 29, 2023
2ea3644
note on pickle vulnerability
leo-mansini Sep 29, 2023
a5ce98a
support thrift 0.16 and above
leo-mansini Sep 29, 2023
04dbd23
further clarification on unpickling docstring
leo-mansini Oct 2, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.4.22
current_version = 1.4.23
commit = False
tag = False

Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.4.23] - 2023-06-09

### Added

- Support for SQLAlchemy >=1.3,<2.0.

## [1.4.22] - 2023-04-05

### Fixed
Expand Down
7 changes: 6 additions & 1 deletion muttlib/dbconn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from contextlib import closing

import pandas as pd
from sqlalchemy import create_engine
from packaging import version
from sqlalchemy.engine.url import URL
from sqlalchemy import __version__ as sqlalchemy_version, create_engine

import muttlib.utils as utils

Expand Down Expand Up @@ -126,6 +127,10 @@ def __setattr__(self, attr, value):
elif attr == 'driver':
value = format_drivername(self.dialect, value)
attr = 'drivername'

if version.parse(sqlalchemy_version) >= version.parse("1.4"):
self.conn_url = self.conn_url.set(**{attr: value})
return self.conn_url
MartinCura marked this conversation as resolved.
Show resolved Hide resolved
else:
_cls = object
target = self
Expand Down
4 changes: 2 additions & 2 deletions muttlib/dbconn/ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,11 @@ def _connect(self):
and self.hdfs_port is not None
and self.hdfs_username is not None
):
self.hdfs_client = ibis.backends.impala.hdfs_connect(
self.hdfs_client = ibis.impala.hdfs_connect(
host=self.hdfs_host, port=self.hdfs_port, user=self.hdfs_username
)

client = ibis.backends.impala.connect(
client = ibis.impala.connect(
host=self.host,
port=self.port,
database=self.database,
Expand Down
14 changes: 10 additions & 4 deletions muttlib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,13 @@ def wrapped_f(*args, **kwargs):


def df_read_multi(fn, index_col=False, quoting=0):
"""Read multiple table disk-formats into a pandas DataFrame."""
"""
Read multiple table disk-formats into a pandas DataFrame.

If the file has a '.pickle' or '.pkl' extension, keep in mind that
unpickling untrusted data is not secure.
https://docs.python.org/3/library/pickle.html
"""
ext = Path(fn).suffix[1:]
if ext == 'csv':
df = pd.read_csv(fn, index_col=index_col, quoting=quoting)
Expand All @@ -183,7 +189,7 @@ def clean_quotes(s):
elif ext == 'feather':
return pd.read_feather(fn)
elif ext in ['pickle', 'pkl']:
return pd.read_pickle(fn)
return pd.read_pickle(fn) # nosec
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For bandit to pass it's either this or removing the feature.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe add a comment to entrypoint function cautioning about passing unsafe pickle files because it's a security issue; apart from that i can't think of anything else you can do?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added to docstring (if by entrypoint you mean the df_read_multi util).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and maybe local_df_cache()

else:
raise ValueError(f"File format '{ext}' not supported!")

Expand Down Expand Up @@ -522,7 +528,7 @@ def str_normalize_pandas(data, str_replace_kws=None):
"""

if isinstance(data, pd.DataFrame):
obj_cols = data.select_dtypes(include=[np.object]).columns
obj_cols = data.select_dtypes(include=[np.object_]).columns
for col in obj_cols:
data[col] = (
data[col]
Expand All @@ -534,7 +540,7 @@ def str_normalize_pandas(data, str_replace_kws=None):
if str_replace_kws:
data[col] = data[col].str.replace(**str_replace_kws)
return data
elif isinstance(data, pd.Series) and data.dtype == np.object:
elif isinstance(data, pd.Series) and data.dtype == np.object_:
data = (
data.str.lower()
.str.lower()
Expand Down
3,823 changes: 2,172 additions & 1,651 deletions poetry.lock

Large diffs are not rendered by default.

15 changes: 7 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "muttlib"
version = "1.4.21"
version = "1.4.23"
description = "Collection of helper modules by Mutt Data."
readme = "README.md"
authors = [ "Mutt Data <[email protected]>",]
Expand Down Expand Up @@ -59,28 +59,27 @@ progressbar2 = "^3.55.0"
PyYAML = ">=5.1,<7.0"
scikit-learn = "^1.0.1"
scipy = "^1.7.2"
SQLAlchemy = "<1.4.0,>=1.3.0"
SQLAlchemy = ">=1.3,<2.0"
numpy = ">=1.22.0,<2.0"
jinjasql = "^0.1.8"
ipython = "^7.29.0"
ipython = ">=8.0.1"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

matplotlib = "^3.5.0"
pyarrow = { version = "6.0.0", optional = true }
cx-Oracle = { version = "^8.3.0", optional = true }
PyHive = { version = ">=0.6.1", optional = true }
thrift = { version = "^0.15.0", optional = true }
thrift = {version = "^0.16.0", optional = true}
psycopg2-binary = { version = "^2.9.2", optional = true }
PyMySQL = { version = "^1.0.2", optional = true }
pymssql = { version = "^2.2.2", optional = true }
pymongo = { version = "^3.12.1", optional = true }
snowflake-connector-python = { version = "^2.7.1", optional = true }
cryptography = { version = "3.4.7", optional = true }
snowflake-sqlalchemy = { version = "1.2.4", optional = true }
snowflake-sqlalchemy = { version = ">=1.2.4, <=1.4.4, !=1.2.5", optional = true } # https://github.com/snowflakedb/snowflake-sqlalchemy/issues/234
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoiding 1.2.5 because of incompatibility with sqlalchemy 1.3 (source).

teradatasql = { version = "17.0.0.8", optional = true }
sqlalchemy-trino = { version = "0.4.0", optional = true }
sqlalchemy-redshift = { version = "0.8.6", optional = true }
pyparsing = { version = "<3", optional = true }
ibis-framework = { version = "3.2.0", extras = [ "impala",], optional = true }
google-cloud-bigquery = { version = "~2.34.4", optional = true }
ibis-framework = { version = "1.4.0", extras = [ "impala",], optional = true }
oauth2client = { version = "^4.1.3", optional = true }
requests = { version = "^2.26.0", optional = true }
freezegun = { version = "^1.1.0", optional = true }
Expand All @@ -95,7 +94,6 @@ betamax = { version = "^0.8.1", optional = true }
betamax-serializers = { version = "^0.2.1", optional = true }
holidays = { version = ">=0.10.2", optional = true }
gspread-pandas = { version = "^2.3.0", optional = true }
wrapt = "1.11.2"
types-requests = "^2.27.7"
types-PyMySQL = "^1.0.11"
bandit = { version = "^1.7.2", optional = true }
Expand All @@ -106,6 +104,7 @@ Pillow = ">=9.1.1"
ujson = ">=5.4.0"
PyJWT = ">=2.4.0"
lxml = ">=4.9.1"
trino = {extras = ["sqlalchemy"], version = "0.320.0"}

[tool.poetry.extras]
pyarrow = [ "pyarrow",]
Expand Down
24 changes: 12 additions & 12 deletions tests/dbconn/test_ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_temp_hdfs_path_changes_config():


def test_execute_with_no_client():
with patch("ibis.backends.impala") as impala:
with patch("ibis.impala") as impala:
ibis_cli = IbisClient("host")
q = "SELECT *"
ibis_cli.execute(q, return_cursor=False)
Expand All @@ -57,7 +57,7 @@ def test_execute_with_no_client():


def test_execute_with_client():
with patch("ibis.backends.impala") as impala:
with patch("ibis.impala") as impala:
ibis_cli = IbisClient("host")
client = MagicMock()
q = "SELECT *"
Expand All @@ -67,7 +67,7 @@ def test_execute_with_client():


def test_execute_returns_cursor():
with patch("ibis.backends.impala") as impala:
with patch("ibis.impala") as impala:
ibis_cli = IbisClient("host")
client = MagicMock()
q = "SELECT *"
Expand All @@ -76,7 +76,7 @@ def test_execute_returns_cursor():


def test_to_frame_via_hdfs_and_no_hdfs_client_fails():
with patch("ibis.backends.impala") as impala:
with patch("ibis.impala") as impala:
ibis_cli = IbisClient("host")
q = "SELECT *"
with pytest.raises(ValueError, match=r".*hdfs\Wclient.*"):
Expand All @@ -85,7 +85,7 @@ def test_to_frame_via_hdfs_and_no_hdfs_client_fails():


def test_to_frame_via_hdfs_and_no_cache_dir_fails():
with patch("ibis.backends.impala") as impala:
with patch("ibis.impala") as impala:
ibis_cli = IbisClient(
"host",
hdfs_host="",
Expand All @@ -99,7 +99,7 @@ def test_to_frame_via_hdfs_and_no_cache_dir_fails():


def test_to_frame_via_hdfs_and_refresh_cache():
with patch("ibis.backends.impala") as impala, patch("shutil.rmtree") as rm, patch(
with patch("ibis.impala") as impala, patch("shutil.rmtree") as rm, patch(
"pyarrow.parquet.read_table"
) as pq:
ibis_cli = IbisClient("host", hdfs_host="", hdfs_port="", hdfs_username="")
Expand All @@ -112,7 +112,7 @@ def test_to_frame_via_hdfs_and_refresh_cache():


def test_to_frame_via_hdfs_creates_tmp_table():
with patch("ibis.backends.impala") as impala, patch(
with patch("ibis.impala") as impala, patch(
"muttlib.dbconn.ibis.urlparse"
) as parse, patch("muttlib.utils.make_dirs") as make_dirs, patch(
"muttlib.dbconn.ibis.pq"
Expand Down Expand Up @@ -153,7 +153,7 @@ def test_to_frame_via_hdfs_creates_tmp_table():


def test_to_frame_via_hdfs_create_tmp_table_fails():
with patch("ibis.backends.impala") as impala, patch(
with patch("ibis.impala") as impala, patch(
"muttlib.dbconn.ibis.sleep"
) as sleep, patch("muttlib.dbconn.ibis.urlparse") as parse, patch(
"muttlib.utils.make_dirs"
Expand All @@ -170,7 +170,7 @@ def test_to_frame_via_hdfs_create_tmp_table_fails():


def test_to_frame_via_hdfs_and_erase_cache_dir():
with patch("ibis.backends.impala") as impala, patch("shutil.rmtree") as rm, patch(
with patch("ibis.impala") as impala, patch("shutil.rmtree") as rm, patch(
"pyarrow.parquet.read_table"
) as pq:
local_tmp_table_dir = MagicMock()
Expand All @@ -184,7 +184,7 @@ def test_to_frame_via_hdfs_and_erase_cache_dir():


def test_to_frame_no_hdfs_returns_empty_df():
with patch("ibis.backends.impala") as impala:
with patch("ibis.impala") as impala:
ibis_cli = IbisClient("host")
q = "example query"
impala.connect.return_value.raw_sql.return_value.fetchall.return_value = None
Expand All @@ -197,7 +197,7 @@ def test_to_frame_no_hdfs_returns_empty_df():


def test_to_frame_no_hdfs_returns_non_empty_df():
with patch("ibis.backends.impala") as impala:
with patch("ibis.impala") as impala:
ibis_cli = IbisClient("host")
q = "example query"
values = [[1, 2], [3, 4]]
Expand All @@ -214,7 +214,7 @@ def test_to_frame_no_hdfs_returns_non_empty_df():


def test_to_frame_no_hdfs_fails_and_closes_connection():
with patch("ibis.backends.impala") as impala:
with patch("ibis.impala") as impala:
ibis_cli = IbisClient("host")
q = "example query"
impala.connect.return_value.raw_sql.side_effect = ValueError("test error")
Expand Down