From f53f9128939bf178f6dbd0f294d4cbfbdc1481dc Mon Sep 17 00:00:00 2001 From: "Benjamin A. Beasley" Date: Tue, 26 Sep 2023 14:08:21 -0400 Subject: [PATCH 1/8] Remove executable bit from filesystem permissions of README.md An executable readme makes no sense, of course. --- README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 README.md diff --git a/README.md b/README.md old mode 100755 new mode 100644 From c9c6d0cc6af7444122e87e0c4c61d994b012a3ae Mon Sep 17 00:00:00 2001 From: "Benjamin A. Beasley" Date: Tue, 26 Sep 2023 23:24:25 -0400 Subject: [PATCH 2/8] Replace deprecated license_file with license_files in setup.cfg --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 0522f13..cadb074 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] description_file = README.md -license_file = LICENSE +license_files = LICENSE [flake8] max-line-length=120 From 3b692bb4a07b2cedb90337eb46ea087274e4cc76 Mon Sep 17 00:00:00 2001 From: gram Date: Wed, 27 Sep 2023 08:42:10 +0200 Subject: [PATCH 3/8] sort imports --- tests/test_edit/test_damerau_levenshtein.py | 1 + textdistance/algorithms/base.py | 1 + textdistance/algorithms/compression_based.py | 2 ++ textdistance/algorithms/edit_based.py | 4 +++- textdistance/algorithms/phonetic.py | 7 ++++--- textdistance/algorithms/sequence_based.py | 4 ++++ textdistance/algorithms/simple.py | 2 ++ textdistance/algorithms/token_based.py | 1 + textdistance/algorithms/types.py | 1 + textdistance/algorithms/vector_based.py | 3 ++- textdistance/benchmark.py | 3 ++- textdistance/libraries.py | 1 + textdistance/utils.py | 1 + 13 files changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/test_edit/test_damerau_levenshtein.py b/tests/test_edit/test_damerau_levenshtein.py index 27f6abf..10856ac 100644 --- a/tests/test_edit/test_damerau_levenshtein.py +++ b/tests/test_edit/test_damerau_levenshtein.py @@ -4,6 +4,7 @@ # project import textdistance + ALG = textdistance.DamerauLevenshtein COMMON = [ diff --git a/textdistance/algorithms/base.py b/textdistance/algorithms/base.py index e82902a..3555884 100644 --- a/textdistance/algorithms/base.py +++ b/textdistance/algorithms/base.py @@ -1,4 +1,5 @@ from __future__ import annotations + # built-in from collections import Counter from contextlib import suppress diff --git a/textdistance/algorithms/compression_based.py b/textdistance/algorithms/compression_based.py index bc8aecb..9b16e4d 100644 --- a/textdistance/algorithms/compression_based.py +++ b/textdistance/algorithms/compression_based.py @@ -1,4 +1,5 @@ from __future__ import annotations + # built-in import codecs import math @@ -12,6 +13,7 @@ try: + # built-in import lzma except ImportError: lzma = None # type: ignore[assignment] diff --git a/textdistance/algorithms/edit_based.py b/textdistance/algorithms/edit_based.py index 9230352..bc75c0a 100644 --- a/textdistance/algorithms/edit_based.py +++ b/textdistance/algorithms/edit_based.py @@ -1,4 +1,5 @@ from __future__ import annotations + # built-in from collections import defaultdict from itertools import zip_longest @@ -6,10 +7,11 @@ # app from .base import Base as _Base, BaseSimilarity as _BaseSimilarity -from .types import TestFunc, SimFunc +from .types import SimFunc, TestFunc try: + # external import numpy except ImportError: numpy = None # type: ignore[assignment] diff --git a/textdistance/algorithms/phonetic.py b/textdistance/algorithms/phonetic.py index cf3849d..53cbc0d 100644 --- a/textdistance/algorithms/phonetic.py +++ b/textdistance/algorithms/phonetic.py @@ -1,15 +1,16 @@ from __future__ import annotations + # built-in from collections import defaultdict -from itertools import groupby +from itertools import groupby, zip_longest +from typing import Any, Iterator, Sequence, TypeVar # app from .base import Base as _Base, BaseSimilarity as _BaseSimilarity -from itertools import zip_longest -from typing import Any, Iterator, Sequence, TypeVar try: + # external import numpy except ImportError: numpy = None # type: ignore[assignment] diff --git a/textdistance/algorithms/sequence_based.py b/textdistance/algorithms/sequence_based.py index 9fcb69d..b1fd7af 100644 --- a/textdistance/algorithms/sequence_based.py +++ b/textdistance/algorithms/sequence_based.py @@ -1,4 +1,5 @@ from __future__ import annotations + # built-in from difflib import SequenceMatcher as _SequenceMatcher from typing import Any @@ -8,9 +9,12 @@ from .base import BaseSimilarity as _BaseSimilarity from .types import TestFunc + try: + # external import numpy except ImportError: + # built-in from array import array numpy = None # type: ignore[assignment] diff --git a/textdistance/algorithms/simple.py b/textdistance/algorithms/simple.py index eccde0c..28f8948 100644 --- a/textdistance/algorithms/simple.py +++ b/textdistance/algorithms/simple.py @@ -1,4 +1,5 @@ from __future__ import annotations + # built-in from itertools import takewhile from typing import Sequence @@ -7,6 +8,7 @@ from .base import Base as _Base, BaseSimilarity as _BaseSimilarity from .types import SimFunc + __all__ = [ 'Prefix', 'Postfix', 'Length', 'Identity', 'Matrix', 'prefix', 'postfix', 'length', 'identity', 'matrix', diff --git a/textdistance/algorithms/token_based.py b/textdistance/algorithms/token_based.py index fb0466a..82f5dc9 100644 --- a/textdistance/algorithms/token_based.py +++ b/textdistance/algorithms/token_based.py @@ -1,4 +1,5 @@ from __future__ import annotations + # built-in from functools import reduce from itertools import islice, permutations, repeat diff --git a/textdistance/algorithms/types.py b/textdistance/algorithms/types.py index a4b8835..c6bd195 100644 --- a/textdistance/algorithms/types.py +++ b/textdistance/algorithms/types.py @@ -1,4 +1,5 @@ +# built-in from typing import Callable, Optional, TypeVar diff --git a/textdistance/algorithms/vector_based.py b/textdistance/algorithms/vector_based.py index 0aa4724..ce78544 100644 --- a/textdistance/algorithms/vector_based.py +++ b/textdistance/algorithms/vector_based.py @@ -3,13 +3,14 @@ """ # built-in from functools import reduce +from typing import Any # app from .base import Base as _Base, BaseSimilarity as _BaseSimilarity -from typing import Any try: + # external import numpy except ImportError: numpy = None # type: ignore[assignment] diff --git a/textdistance/benchmark.py b/textdistance/benchmark.py index b6f92cd..3252569 100644 --- a/textdistance/benchmark.py +++ b/textdistance/benchmark.py @@ -1,8 +1,9 @@ from __future__ import annotations + # built-in import json -from collections import defaultdict import math +from collections import defaultdict from timeit import timeit from typing import Iterable, Iterator, NamedTuple diff --git a/textdistance/libraries.py b/textdistance/libraries.py index cf88e93..39045aa 100644 --- a/textdistance/libraries.py +++ b/textdistance/libraries.py @@ -1,4 +1,5 @@ from __future__ import annotations + # built-in import json from collections import defaultdict diff --git a/textdistance/utils.py b/textdistance/utils.py index 995b524..e37aa63 100644 --- a/textdistance/utils.py +++ b/textdistance/utils.py @@ -1,4 +1,5 @@ from __future__ import annotations + # built-in from itertools import permutations, product from typing import Sequence From 1aaeac777866ab77e84edfd5c581935567add399 Mon Sep 17 00:00:00 2001 From: gram Date: Wed, 27 Sep 2023 08:42:45 +0200 Subject: [PATCH 4/8] better tasks --- Taskfile.yml | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/Taskfile.yml b/Taskfile.yml index 19fa8ea..89d0611 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -48,19 +48,18 @@ tasks: cmds: - "{{.LINT_ENV}}/bin/twine upload dist/textdistance-*" - flake8:run: + flake8: deps: - lint:install cmds: - "{{.LINT_ENV}}/bin/flake8 ." - mypy:run: + mypy: deps: - lint:install cmds: - "{{.LINT_ENV}}/bin/mypy" - - pytest-pure:run: + pytest-pure: deps: - task: pip:install vars: @@ -69,7 +68,7 @@ tasks: cmds: - "{{.TEST_PURE_ENV}}/bin/pytest -m 'not external' {{.CLI_ARGS}}" - pytest-external:run: + pytest-external: deps: - task: pip:install vars: @@ -78,12 +77,18 @@ tasks: cmds: - "{{.TEST_EXT_ENV}}/bin/pytest {{.CLI_ARGS}}" - isort:run: + isort: deps: - lint:install cmds: - "{{.LINT_ENV}}/bin/isort ." + isort:check: + deps: + - lint:install + cmds: + - "{{.LINT_ENV}}/bin/isort --check ." + benchmark: deps: - task: pip:install @@ -92,3 +97,26 @@ tasks: EXTRA: benchmark cmds: - "{{.BENCHMARK_ENV}}/bin/python3 -m textdistance.benchmark" + + # groups + format: + desc: "run all code formatters" + cmds: + - task: isort + lint: + desc: "run all linters" + cmds: + - task: flake8 + # - task: mypy + - task: isort:check + test: + desc: "run all tests" + cmds: + - task: pytest-pure + - task: pytest-external + all: + desc: "run all code formatters, linters, and tests" + cmds: + - task: format + - task: lint + - task: test From 29285755c6f531706a865ecb43247a524adc5128 Mon Sep 17 00:00:00 2001 From: gram Date: Wed, 27 Sep 2023 08:45:03 +0200 Subject: [PATCH 5/8] Github Actions --- .drone.star | 53 -------------------------- .github/workflows/main.yml | 77 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 53 deletions(-) delete mode 100644 .drone.star create mode 100644 .github/workflows/main.yml diff --git a/.drone.star b/.drone.star deleted file mode 100644 index 4668ed8..0000000 --- a/.drone.star +++ /dev/null @@ -1,53 +0,0 @@ -def main(ctx): - return dict( - kind="pipeline", - type="docker", - name="default", - trigger=dict(branch="master"), - steps=[ - dict( - name="install task", - image="debian:latest", - commands=[ - "apt update", - "apt install -y wget", - "wget https://taskfile.dev/install.sh", - "sh install.sh", - "rm install.sh", - ], - ), - - step(env="pytest-pure", python="3.7"), - step(env="pytest-pure", python="3.8"), - step(env="pytest-pure", python="3.9"), - step(env="pytest-pure", python="3.10"), - - step(env="pytest-external", python="3.7"), - step(env="pytest-external", python="3.8"), - step(env="pytest-external", python="3.9"), - # step(env="pytest-external", python="3.10"), - - step(env="flake8", python="3.9"), - ], - ) - - -def step(env, python): - result = dict( - name="{} (py{})".format(env, python), - image="python:{}-buster".format(python), - depends_on=["install task"], - environment=dict( - # set coverage database file name to avoid conflicts between steps - COVERAGE_FILE=".coverage.{}.{}".format(env, python), - ), - commands=[ - "apt update", - "apt install -y curl git gcc libc-dev build-essential", - "./bin/task PYTHON_BIN=python3 VENVS=/opt/py{python}/ -f {env}:run".format( - python=python, - env=env, - ), - ], - ) - return result diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..cd5181c --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,77 @@ +name: main + +on: + push: + branches: + - master + pull_request: + workflow_dispatch: + +concurrency: + group: ${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.8" + - uses: arduino/setup-task@v1 + with: + repo-token: ${{ github.token }} + - run: task lint + + pytest-pure: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + # - "3.12.0-rc.1" + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - uses: arduino/setup-task@v1 + with: + repo-token: ${{ github.token }} + - run: task pytest-pure + + pytest-external: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + # - "3.12.0-rc.1" + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - uses: arduino/setup-task@v1 + with: + repo-token: ${{ github.token }} + - run: task pytest-external + + markdownlint-cli: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: nosborn/github-action-markdown-cli@v3.2.0 + with: + files: . + config_file: .markdownlint.yaml + dot: true From adde1744e8a2100b6a6308c9f6921b276b2385c5 Mon Sep 17 00:00:00 2001 From: gram Date: Wed, 27 Sep 2023 08:46:12 +0200 Subject: [PATCH 6/8] trigger CI From c8dc2f628f0a2952f19975714db5a1a71f12dcd9 Mon Sep 17 00:00:00 2001 From: gram Date: Wed, 27 Sep 2023 12:26:54 +0200 Subject: [PATCH 7/8] markdownlint config, disable external tests --- .github/workflows/main.yml | 40 +++++++++++++++++++------------------- .markdownlint.yaml | 8 ++++++++ 2 files changed, 28 insertions(+), 20 deletions(-) create mode 100644 .markdownlint.yaml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cd5181c..6ec3e95 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -45,26 +45,26 @@ jobs: repo-token: ${{ github.token }} - run: task pytest-pure - pytest-external: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: - - "3.8" - - "3.9" - - "3.10" - - "3.11" - # - "3.12.0-rc.1" - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - uses: arduino/setup-task@v1 - with: - repo-token: ${{ github.token }} - - run: task pytest-external + # pytest-external: + # runs-on: ubuntu-latest + # strategy: + # fail-fast: false + # matrix: + # python-version: + # - "3.8" + # - "3.9" + # - "3.10" + # - "3.11" + # # - "3.12.0-rc.1" + # steps: + # - uses: actions/checkout@v3 + # - uses: actions/setup-python@v4 + # with: + # python-version: ${{ matrix.python-version }} + # - uses: arduino/setup-task@v1 + # with: + # repo-token: ${{ github.token }} + # - run: task pytest-external markdownlint-cli: runs-on: ubuntu-latest diff --git a/.markdownlint.yaml b/.markdownlint.yaml new file mode 100644 index 0000000..808d2a6 --- /dev/null +++ b/.markdownlint.yaml @@ -0,0 +1,8 @@ +# https://github.com/DavidAnson/markdownlint/blob/main/schema/.markdownlint.yaml +default: true # enable all by default +MD007: # unordered list indentation + indent: 2 +MD013: false # do not validate line length +MD014: false # allow $ before command output +MD029: # ordered list prefix + style: "one" From c0323f27bd34fcf33a8136b3732fcd6e8d30d8a2 Mon Sep 17 00:00:00 2001 From: gram Date: Wed, 27 Sep 2023 13:09:29 +0200 Subject: [PATCH 8/8] change ordered list numbering --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 67c07e3..6fd96f2 100644 --- a/README.md +++ b/README.md @@ -148,15 +148,15 @@ pip install -e ".[benchmark]" All algorithms have 2 interfaces: 1. Class with algorithm-specific params for customizing. -2. Class instance with default params for quick and simple usage. +1. Class instance with default params for quick and simple usage. All algorithms have some common methods: 1. `.distance(*sequences)` -- calculate distance between sequences. -2. `.similarity(*sequences)` -- calculate similarity for sequences. -3. `.maximum(*sequences)` -- maximum possible value for distance and similarity. For any sequence: `distance + similarity == maximum`. -4. `.normalized_distance(*sequences)` -- normalized distance between sequences. The return value is a float between 0 and 1, where 0 means equal, and 1 totally different. -5. `.normalized_similarity(*sequences)` -- normalized similarity for sequences. The return value is a float between 0 and 1, where 0 means totally different, and 1 equal. +1. `.similarity(*sequences)` -- calculate similarity for sequences. +1. `.maximum(*sequences)` -- maximum possible value for distance and similarity. For any sequence: `distance + similarity == maximum`. +1. `.normalized_distance(*sequences)` -- normalized distance between sequences. The return value is a float between 0 and 1, where 0 means equal, and 1 totally different. +1. `.normalized_similarity(*sequences)` -- normalized similarity for sequences. The return value is a float between 0 and 1, where 0 means totally different, and 1 equal. Most common init arguments: @@ -164,7 +164,7 @@ Most common init arguments: - 1 (default) -- compare sequences by chars. - 2 or more -- transform sequences to q-grams. - None -- split sequences by words. -2. `as_set` -- for token-based algorithms: +1. `as_set` -- for token-based algorithms: - True -- `t` and `ttt` is equal. - False (default) -- `t` and `ttt` is different.