v1.11.0 (#355)

* v1.11.0-rc0 * Don't auto-update-conda * Disable auto-update-conda everywhere --------- Co-authored-by: rtosholdings-bot <[email protected]>
rtosholdings · Aug 1, 2023 · 00d962f · 00d962f
1 parent b1b50bb
commit 00d962f
Show file tree

Hide file tree

Showing 31 changed files with 3,372 additions and 334 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -35,7 +35,7 @@ jobs:
           activate-environment: "flake8_env"
           python-version: ${{ matrix.python-version }}
           mamba-version: "*"
-          auto-update-conda: true
+          auto-update-conda: false
           channels: conda-forge
           channel-priority: flexible
           show-channel-urls: true
@@ -74,7 +74,7 @@ jobs:
           activate-environment: "conda_build"
           python-version: ${{ env.python_version }}
           mamba-version: "*"
-          auto-update-conda: true
+          auto-update-conda: false
           channels: conda-forge
           channel-priority: flexible
           show-channel-urls: true
@@ -136,7 +136,7 @@ jobs:
         shell: bash -l {0}
     strategy:
       matrix:
-        os: ["ubuntu-latest", "windows-2019"]
+        os: ["ubuntu-latest", "windows-2022"]
         python-version: [3.9, "3.10", "3.11"]
         numpy-version: [1.23, 1.24]
     env:
@@ -156,7 +156,7 @@ jobs:
           activate-environment: "conda_test"
           python-version: ${{ matrix.python-version }}
           mamba-version: "*"
-          auto-update-conda: true
+          auto-update-conda: false
           channels: conda-forge
           channel-priority: flexible
           show-channel-urls: true
@@ -199,7 +199,7 @@ jobs:
         shell: bash -l {0}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-2019]
+        os: [ubuntu-latest, windows-2022]
         python-version: [3.9, "3.10", "3.11"]
         numpy-version: [1.23, 1.24]
     steps:
@@ -216,7 +216,7 @@ jobs:
           activate-environment: "pypi_test"
           python-version: ${{ matrix.python-version }}
           mamba-version: "*"
-          auto-update-conda: true
+          auto-update-conda: false
           channels: conda-forge
           channel-priority: flexible
           show-channel-urls: true
@@ -275,7 +275,7 @@ jobs:
         with:
           activate-environment: "conda_deploy"
           mamba-version: "*"
-          auto-update-conda: true
+          auto-update-conda: false
           channels: conda-forge
           channel-priority: flexible
           show-channel-urls: true

diff --git a/conda_recipe/meta.yaml b/conda_recipe/meta.yaml
@@ -22,7 +22,7 @@ requirements:
     - numba >=0.56.2
     - pandas >=1.0,<3.0
     - python-dateutil
-    - riptide_cpp >=1.12.2,<2 # run with any (compatible) version in this range
+    - riptide_cpp >=1.14.0,<2 # run with any (compatible) version in this range
 
 test:
   source_files:

diff --git a/dev_tools/gen_requirements.py b/dev_tools/gen_requirements.py
@@ -33,8 +33,8 @@ def is_python(major: int, minor: int) -> bool:
     toolchain_reqs += [
         "binutils",
         "binutils_linux-64",
-        "gcc==8.*",
-        "gxx==8.*",
+        "gcc==10.*",
+        "gxx==10.*",
         "ninja",
     ]
 

diff --git a/docs/source/_autoapi_templates/python/package.rst b/docs/source/_autoapi_templates/python/package.rst
@@ -0,0 +1,54 @@
+{% if not obj.display %}
+:orphan:
+{% endif %}
+
+:py:mod:`{{ obj.name }}`
+=========={{ "=" * obj.name|length }}
+
+.. py:module:: {{ obj.name }}
+
+
+{% block subpackages %}
+{% set visible_subpackages = obj.subpackages|selectattr("display")|list %}
+{% if visible_subpackages %}
+Subpackages
+-----------
+.. toctree::
+   :titlesonly:
+   :maxdepth: 3
+
+{% for subpackage in visible_subpackages %}
+   {{ subpackage.short_name }}/index.rst
+{% endfor %}
+
+
+{% endif %}
+{% endblock %}
+{% block submodules %}
+{% set visible_submodules = obj.submodules|selectattr("display")|list %}
+{% if visible_submodules %}
+Submodules
+----------
+.. toctree::
+   :titlesonly:
+   :maxdepth: 1
+
+{% for submodule in visible_submodules %}
+   {{ submodule.short_name }}/index.rst
+{% endfor %}
+
+
+{% endif %}
+{% endblock %}
+{% block content %}
+{% if obj.all is not none %}
+{% set visible_children = obj.children|selectattr("short_name", "in", obj.all)|list %}
+{% elif obj.type is equalto("package") %}
+{% set visible_children = obj.children|selectattr("display")|list %}
+{% else %}
+{% set visible_children = obj.children|selectattr("display")|rejectattr("imported")|list %}
+{% endif %}
+
+{% endblock %}
+
+
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -59,8 +59,6 @@
 napoleon_use_rtype = True
 napoleon_type_aliases = None
 
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -88,6 +86,21 @@
 autoapi_dirs = ["../../riptable"]
 autoapi_ignore = ["*test*", "*benchmarks*"]
 
+# When using AutoAPI, use autoapi_template_dir (see below).
+# templates_path = ["_templates"]
+
+# https://sphinx-autoapi.readthedocs.io/en/latest/how_to.html#customise-templates
+autoapi_template_dir = "_autoapi_templates"
+
+# Setting autoapi_add_toctree_entry = False prevents a table of contents entry for the
+# API Reference (/riptable/riptable/autoapi/index.html) from being automatically created.
+# See https://sphinx-autoapi.readthedocs.io/en/latest/how_to.html#how-to-remove-the-index-page.
+# That API Reference page doesn't seem to allow any sidebar TOC entries.
+# Instead, manually add a link to /autoapi/riptable/index in /docs/source/index.rst as
+# suggested at the sphinx-autoapi link above. This link goes to /autoapi/riptable/index.html
+# -- a page that is generated using an AutoAPI template, which can be customized (see above).
+autoapi_add_toctree_entry = False
+
 # Remove typehints from html
 autodoc_typehints = "none"
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -11,6 +11,7 @@ Welcome to Riptable's documentation!
    :caption: Contents:
 
    tutorial/tutorial
+   API Reference <autoapi/riptable/index>
 
 
 

diff --git a/docs/source/tutorial/categoricals_user_guide.rst b/docs/source/tutorial/categoricals_user_guide.rst
@@ -0,0 +1,97 @@
+
+Riptable Categoricals User Guide
+********************************
+
+.. currentmodule:: riptable
+
+This guide covers a few topics in more depth than the
+:doc:`Categoricals </tutorial/tutorial_categoricals>` section of the :doc:`/tutorial/tutorial`
+and the API reference docs for the `~rt_categorical.Categorical` class.
+
+
+.. toctree::
+   :maxdepth: 1
+
+   Constructing <categoricals_user_guide_construct>
+   Accessing Data <categoricals_user_guide_access_data>
+   Indexing <categoricals_user_guide_indexing>
+   Filters <categoricals_user_guide_filters>
+   Base Index <categoricals_user_guide_base_index>
+   Sorting and Display Order <categoricals_user_guide_order>
+   Comparisons <categoricals_user_guide_comparisons>
+   Final dtype of Integer Code Array <categoricals_user_guide_dtype>
+   Invalid Categories <categoricals_user_guide_invalid_categories>
+   Bins and Categories <categoricals_user_guide_bins_categories>
+
+
+Riptable Categoricals have two related uses:
+
+-  They efficiently store string (or other large dtype) arrays that have
+   repeated values. The repeated values are partitioned into groups (a.k.a.
+   categories), and each group is mapped to an integer. For example, in a 
+   Categorical that contains three "AAPL" symbols and four "MSFT" symbols, 
+   the data is partitioned into an "AAPL" group that's mapped to 1 and a 
+   "MSFT" group that's mapped to 2. This integer mapping allows the data to 
+   be stored and operated on more efficiently.
+-  They're Riptable's class for doing group operations. A method applied
+   to a Categorical is applied to each group separately.
+
+A Categorical is typically created from a list of strings::
+
+    >>> c = rt.Categorical(["a", "a", "b", "a", "c", "c", "b"])
+    >>> c
+    Categorical([a, a, b, a, c, c, b]) Length: 7
+      FastArray([1, 1, 2, 1, 3, 3, 2], dtype=int8) Base Index: 1
+      FastArray([b'a', b'b', b'c'], dtype='|S1') Unique count: 3
+
+The output shows:
+
+- The Categorical values. These are made unique to form the categories.
+- The integer mapping codes that correspond to the unique categories. Because the integers can be used to index into the Categorical, they're
+  also referred to as the indices. Notice that the base index of the array is also shown. 
+  By default, the integer index is 1-based; 0 is reserved for Filtered categories. The 
+  integer array is an `int8`, `int16`, `int32`, or `int64` array, depending on the number 
+  of unique categories or the maximum value provided in a mapping. 
+- The unique categories. (Both the categories and their associated integer codes are 
+  sometimes called bins.) Each represents a group for groupby operations. The
+  categories are held in an array (sorted by default), a dictionary that maps
+  integers to strings or strings to integers, or a multi-key dictionary.
+
+Use Categorical objects to perform aggregations over arbitrary arrays of the same
+dimension as the Categorical::
+
+    >>> c = rt.Categorical(["a", "a", "b", "a", "c", "c", "b"])
+    >>> ints = rt.FA([3, 10, 2, 5, 4, 1, 1])
+    >>> flts = rt.FA([1.2, 3.4, 5.6, 4.0, 2.1, 0.6, 11.3])
+    >>> c.sum([ints, flts])
+    *key_0   col_0   col_1
+    ------   -----   -----
+    a           18    8.60
+    b            3   16.90
+    c            5    2.70
+
+Multi-key Categoricals let you create and operate on groupings based on multiple related 
+categories::
+    
+    >>> strs1 = rt.FastArray(["a", "b", "b", "a", "b", "a"])
+    >>> strs2 = rt.FastArray(["x", "y", "y", "z", "x", "x"])
+    >>> c = rt.Categorical([strs1, strs2])
+    >>> c
+    Categorical([(a, x), (b, y), (b, y), (a, z), (b, x), (a, x)]) Length: 6
+      FastArray([1, 2, 2, 3, 4, 1], dtype=int8) Base Index: 1
+      {'key_0': FastArray([b'a', b'b', b'a', b'b'], dtype='|S1'), 'key_1': FastArray([b'x', b'y', b'z', b'x'], dtype='|S1')} Unique count: 4
+
+    >>> c.count()
+    *key_0   *key_1   Count
+    ------   ------   -----
+    a        x            2
+    b        y            2
+    a        z            1
+    b        x            1
+
+
+To see more ways to create a Categorical, go to 
+:doc:`Constructing Categoricals <categoricals_user_guide_construct>`. To see more 
+operations on Categoricals, see the
+:doc:`Categoricals </tutorial/tutorial_categoricals>` section of the 
+:doc:`/tutorial/tutorial`.
diff --git a/docs/source/tutorial/categoricals_user_guide_access_data.rst b/docs/source/tutorial/categoricals_user_guide_access_data.rst
@@ -0,0 +1,104 @@
+Riptable Categoricals -- Accessing Parts of the Categorical 
+***********************************************************
+
+.. currentmodule:: riptable
+
+Use Categorical methods and properties to access the stored data.
+
+Get the array of Categorical values with `~riptable.rt_categorical.Categorical.expand_array`.
+Note that because the expansion constructs the complete list of values from the list of 
+unique categories, it is an expensive operation::
+
+    >>> c = rt.Categorical(["b", "a", "b", "c", "a", "c", "b"])
+    >>> c
+    Categorical([b, a, b, c, a, c, b]) Length: 7
+      FastArray([2, 1, 2, 3, 1, 3, 2], dtype=int8) Base Index: 1
+      FastArray([b'a', b'b', b'c'], dtype='|S1') Unique count: 3
+
+    >>> c.expand_array
+    FastArray([b'b', b'a', b'b', b'c', b'a', b'c', b'b'], dtype='|S8')
+
+
+    >>> c2 = rt.Categorical([10, 0, 0, 5, 5, 10, 0, 0, 5, 0])
+    >>> c2
+    Categorical([10, 0, 0, 5, 5, 10, 0, 0, 5, 0]) Length: 10
+      FastArray([3, 1, 1, 2, 2, 3, 1, 1, 2, 1], dtype=int8) Base Index: 1
+      FastArray([ 0,  5, 10]) Unique count: 3
+
+    >>> c2.expand_array
+    FastArray([10,  0,  0,  5,  5, 10,  0,  0,  5,  0])
+
+Note that in this base-1 Categorical with an integer mapping array and unique categories 
+provided, 0 is mapped to Filtered, 1 is mapped to "b", and 2 is mapped to "a"; there is 
+no 3 to be mapped to "c", so it doesn't appear in the expanded array.
+
+    >>> c3 = rt.Categorical([0, 1, 1, 2, 2, 0, 1, 1, 2, 1], categories=["b", "a", "c"])
+    >>> c3
+    Categorical([Filtered, b, b, a, a, Filtered, b, b, a, b]) Length: 10
+      FastArray([0, 1, 1, 2, 2, 0, 1, 1, 2, 1]) Base Index: 1
+      FastArray([b'b', b'a', b'c'], dtype='|S1') Unique count: 3
+
+    >>> c3.expand_array
+    FastArray([b'Filtered', b'b', b'b', b'a', b'a', b'Filtered', b'b', b'b', b'a', b'b'], dtype='|S8')
+
+
+Get the integer mapping array with `~riptable.rt_categorical.Categorical._fa`::
+
+    >>> c._fa
+    FastArray([2, 1, 2, 3, 1, 3, 2], dtype=int8)
+
+    >>> c2._fa
+    FastArray([3, 1, 1, 2, 2, 3, 1, 1, 2, 1], dtype=int8)
+
+    >>> c3._fa
+    FastArray([0, 1, 1, 2, 2, 0, 1, 1, 2, 1])
+
+
+Get the array of unique categories with `~riptable.rt_categorical.Categorical.category_array`::
+
+    >>> c.category_array
+    FastArray([b'a', b'b', b'c'], dtype='|S1')
+
+    >>> c2.category_array
+    FastArray([ 0,  5, 10])
+
+    >>> c3.category_array
+    FastArray([b'b', b'a', b'c'], dtype='|S1')
+
+
+Note that if you want to use `~riptable.rt_categorical.Categorical._fa` to index into `~riptable.rt_categorical.Categorical.category_array`, you'll need
+to subtract 1:
+
+    >>> c.category_array[c._fa[0]-1]
+    b'b'
+
+For multi-key Categoricals, use `~riptable.rt_categorical.Categorical.category_dict` to get a dictionary of the two category 
+arrays::
+
+    >>> strs = rt.FastArray(["a", "b", "b", "a", "b", "a"])
+    >>> ints = rt.FastArray([2, 1, 1, 2, 1, 3])
+    >>> c = rt.Categorical([strs, ints]) 
+    >>> c
+    Categorical([(a, 2), (b, 1), (b, 1), (a, 2), (b, 1), (a, 3)]) Length: 6
+      FastArray([1, 2, 2, 1, 2, 3], dtype=int8) Base Index: 1
+      {'key_0': FastArray([b'a', b'b', b'a'], dtype='|S1'), 'key_1': FastArray([2, 1, 3])} Unique count: 3
+
+    >>> c.category_dict
+    {'key_0': FastArray([b'a', b'b', b'a'], dtype='|S1'),
+    'key_1': FastArray([2, 1, 3])}
+
+Use `~riptable.rt_categorical.Categorical.category_mapping` to get the mapping dictionary from a Categorical created with
+an :py:class:`~enum.IntEnum` or mapping dictionary::
+
+    >>> d = {"StronglyAgree": 44, "Agree": 133, "Disagree": 75, "StronglyDisagree": 1, "NeitherAgreeNorDisagree": 144 }
+    >>> codes = [1, 44, 44, 133, 75]
+    >>> c = rt.Categorical(codes, categories=d)
+    Categorical([StronglyDisagree, StronglyAgree, StronglyAgree, Agree, Disagree]) Length: 5
+      FastArray([  1,  44,  44, 133,  75]) Base Index: None
+      {44:'StronglyAgree', 133:'Agree', 75:'Disagree', 1:'StronglyDisagree', 144:'NeitherAgreeNorDisagree'} Unique count: 4
+    >>> c.category_mapping
+    {44: 'StronglyAgree',
+     133: 'Agree',
+     75: 'Disagree',
+     1: 'StronglyDisagree',
+     144: 'NeitherAgreeNorDisagree'}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,6 +11,7 @@ Welcome to Riptable's documentation!
		:caption: Contents:

		tutorial/tutorial
		API Reference <autoapi/riptable/index>



Expand Down