Merge pull request spotify#72 from spotify/chris/bar_color

Better control over bar color
mojerro · Mar 8, 2019 · e188526 · e188526
2 parents 55a0783 + 8281ff2
commit e188526
Show file tree

Hide file tree

Showing 5 changed files with 113 additions and 30 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -2,6 +2,21 @@
 History
 =======
 
+2.6.0 (2019-03-08)
+------------------
+
+Improvements:
+
+* Allows users to plot colors on bar charts that aren't contained in the
+  categorical axis.
+
+
+Bugfixes:
+
+* Fixed bug that caused float types to break when plotted with categorical
+  text plots (Thanks for finding @danela!)
+* Fixed broken readme links.
+
 2.5.0 (2019-02-17)
 ------------------
 

diff --git a/README.rst b/README.rst
@@ -4,7 +4,7 @@ Chartify
 |status|  |release|  |python|
 
 .. |status| image:: https://img.shields.io/badge/Status-Beta-blue.svg
-.. |release| image:: https://img.shields.io/badge/Release-2.5.0-blue.svg
+.. |release| image:: https://img.shields.io/badge/Release-2.6.0-blue.svg
 .. |python| image:: https://img.shields.io/badge/Python-3.6-blue.svg
 
 Chartify is a Python library that makes it easy for data scientists to create charts.
@@ -46,9 +46,9 @@ Installation
 Getting started
 ---------------
 
-This `tutorial notebook </examples/Chartify%20Tutorial.ipynb>`_ is the best place to get started with a guided tour of the core concepts of Chartify.
+This `tutorial notebook <https://github.com/spotify/chartify/blob/master/examples/Chartify%20Tutorial.ipynb>`_ is the best place to get started with a guided tour of the core concepts of Chartify.
 
-From there, check out the `example notebook </examples/Examples.ipynb>`_ for a list of all the available plots.
+From there, check out the `example notebook <https://github.com/spotify/chartify/blob/master/examples/Examples.ipynb>`_ for a list of all the available plots.
 
 Getting support
 ---------------

diff --git a/chartify/__init__.py b/chartify/__init__.py
@@ -22,7 +22,7 @@
 
 __author__ = """Chris Halpert"""
 __email__ = '[email protected]'
-__version__ = '2.5.0'
+__version__ = '2.6.0'
 
 _IPYTHON_INSTANCE = False
 

diff --git a/chartify/_core/plot.py b/chartify/_core/plot.py
@@ -90,17 +90,22 @@ def _get_color_and_order(self,
             if categorical_columns is None:  # Numeric data
                 colors = next_colors
             else:
-                # Color column must be in the categorical_columns
-                try:
-                    color_index = categorical_columns.index(color_column)
-                except ValueError:
-                    raise ValueError(
-                        '''`color_column` must be present
-                         in the `categorical_columns`'''
-                    )
+                # # Color column must be in the categorical_columns
+                # try:
+                #     color_index = categorical_columns.index(color_column)
+                #     color_label = 'factors'
+                # except ValueError:
+                #     color_label = 'color_column'
+                #     color_index = 0
+                #     raise ValueError(
+                #         '''`color_column` must be present
+                #          in the `categorical_columns`'''
+                #     )
+                color_label = 'color_column'
+                color_index = 0
                 color_order = [str(factor) for factor in color_order]
                 colors = bokeh.transform.factor_cmap(
-                    'factors',
+                    color_label,
                     palette=next_colors,
                     factors=color_order,
                     start=color_index,
@@ -964,7 +969,8 @@ def _construct_source(self,
                           stack_column=None,
                           normalize=False,
                           categorical_order_by=None,
-                          categorical_order_ascending=False):
+                          categorical_order_ascending=False,
+                          color_column=None):
         """Constructs ColumnDataSource
 
         Returns:
@@ -1003,8 +1009,18 @@ def _construct_source(self,
                 columns=stack_column,
                 index=categorical_columns,
                 values=numeric_column,
-                aggfunc='sum').fillna(0)  # NA columns break the stacks
+                aggfunc='sum')
         )
+        # NA columns break the stacks
+        # Might want to make this conditional in the future for parallel plots.
+        source = source.fillna(0)
+
+        if color_column:
+            # Merge color column
+            color_df = data_frame.astype(type_map)
+            color_df['color_column'] = color_df[color_column].astype(str)
+            color_df = color_df.set_index(categorical_columns)['color_column']
+            source = source.join(color_df)
 
         # Normalize values at the grouped levels.
         # Only relevant for stacked objects
@@ -1152,14 +1168,15 @@ def text(self,
                 numeric_column,
                 categorical_order_by=categorical_order_by,
                 categorical_order_ascending=categorical_order_ascending)
-            sliced_data = (sliced_data.set_index(categorical_columns)
+            sliced_data = (sliced_data.astype(str)
+                           .set_index(categorical_columns)
                            .reindex(source.data['factors']).reset_index())
             # Text column isn't in the source so it needs to be added.
-            if text_column != numeric_column:
-                source.add(sliced_data[text_column], name=text_column)
+            sliced_data['text_column'] = sliced_data[text_column]
+            source.add(sliced_data['text_column'], name='text_column')
 
             self._chart.figure.text(
-                text=text_column,
+                text='text_column',
                 x=x_value,
                 y=y_value,
                 text_font_size=font_size,
@@ -1285,7 +1302,7 @@ def text_stacked(self,
                            .reindex(index=factors).reset_index())
 
             text_values = np.where(sliced_data[text_column].isna(), '',
-                                   sliced_data[text_column])
+                                   sliced_data[text_column].astype(str))
 
             if cumulative_numeric_value is not None:
                 cumulative_numeric_value = (
@@ -1363,17 +1380,25 @@ def bar(self,
             categorical_columns,
             numeric_column,
             categorical_order_by=categorical_order_by,
-            categorical_order_ascending=categorical_order_ascending)
+            categorical_order_ascending=categorical_order_ascending,
+            color_column=color_column)
+
+        colors, color_values = self._get_color_and_order(
+            data_frame, color_column, color_order, categorical_columns)
 
-        colors, _ = self._get_color_and_order(data_frame, color_column,
-                                              color_order, categorical_columns)
         if color_column is None:
             colors = colors[0]
 
         self._set_categorical_axis_default_factors(vertical, factors)
         self._set_categorical_axis_default_range(vertical, data_frame,
                                                  numeric_column)
         bar_width = self._get_bar_width(factors)
+
+        if color_column:
+            legend = bokeh.core.properties.field('color_column')
+        else:
+            legend = None
+
         if vertical:
             self._chart.figure.vbar(
                 x='factors',
@@ -1382,7 +1407,8 @@ def bar(self,
                 bottom=0,
                 line_color='white',
                 source=source,
-                fill_color=colors)
+                fill_color=colors,
+                legend=legend)
         else:
             self._chart.figure.hbar(
                 y='factors',
@@ -1391,7 +1417,11 @@ def bar(self,
                 left=0,
                 line_color='white',
                 source=source,
-                fill_color=colors)
+                fill_color=colors,
+                legend=legend)
+        # Set legend defaults if there are multiple series.
+        if color_column is not None:
+            self._chart.style._apply_settings('legend')
         return self._chart
 
     def interval(self,
@@ -1708,17 +1738,23 @@ def lollipop(self,
             categorical_columns,
             numeric_column,
             categorical_order_by=categorical_order_by,
-            categorical_order_ascending=categorical_order_ascending)
+            categorical_order_ascending=categorical_order_ascending,
+            color_column=color_column)
 
-        colors, _ = self._get_color_and_order(data_frame, color_column,
-                                              color_order, categorical_columns)
+        colors, color_values = self._get_color_and_order(
+            data_frame, color_column, color_order, categorical_columns)
         if color_column is None:
             colors = colors[0]
 
         self._set_categorical_axis_default_factors(vertical, factors)
         self._set_categorical_axis_default_range(vertical, data_frame,
                                                  numeric_column)
 
+        if color_column:
+            legend = bokeh.core.properties.field('color_column')
+        else:
+            legend = None
+
         if vertical:
             self._chart.figure.segment(
                 'factors',
@@ -1735,7 +1771,8 @@ def lollipop(self,
                 fill_color=colors,
                 line_color=colors,
                 line_width=3,
-                source=source)
+                source=source,
+                legend=legend)
         else:
             self._chart.figure.segment(
                 0,
@@ -1752,7 +1789,13 @@ def lollipop(self,
                 fill_color=colors,
                 line_color=colors,
                 line_width=3,
-                source=source)
+                source=source,
+                legend=legend)
+
+        # Set legend defaults if there are multiple series.
+        if color_column is not None:
+            self._chart.style._apply_settings('legend')
+
         return self._chart
 
     def parallel(self,

diff --git a/tests/test_plots.py b/tests/test_plots.py
@@ -220,6 +220,20 @@ def test_single_datetime_text(self):
             chart_data(ch, '')['text'], ['a', 'b', 'a', 'b']))
 
 
+class CategoricalTextTest:
+    def test_float_labels(self):
+        label_test = pd.DataFrame(
+            {'value': [.20, .40, .05, .6, .2, .8],
+             'bucket': [1, 2, 3, 1, 2, 3],
+             'platform': ['android', 'android', 'android', 'ios', 'ios', 'ios'],
+             'value2': [1.0, 2.0, 3, 6., 8., 10.]})
+        ch = chartify.Chart(x_axis_type='categorical')
+        ch.plot.text(label_test, ['bucket', 'platform'], 'value', 'value')
+        assert (np.array_equal(
+            ch.data[0]['text_column'].values,
+            ['0.8', '0.05', '0.6', '0.2', '0.4', '0.2']))
+
+
 class TestAreaPlot:
     """Area plot tests.
 
@@ -341,6 +355,7 @@ def setup(self):
         self.data = pd.DataFrame({
             'category1': ['a', 'b', 'a', 'b', 'a'],
             'category2': [1, 1, 2, 2, 3],
+            'color': ['c', 'd', 'e', 'f', 'g'],
             'number': [5, 4, 10, -3, 0],
         })
         self.plot_methods = ['bar', 'lollipop', 'parallel']
@@ -453,6 +468,16 @@ def test_bar_parallel_color_column(self):
         assert (np.array_equal(chart_data(ch, '')['2'], [10, -3]))
         assert (np.array_equal(chart_data(ch, '')['3'], [0, 0]))
 
+    def test_bar_color(self):
+        ch = chartify.Chart(blank_labels=True, x_axis_type='categorical')
+        ch.plot.bar(
+            data_frame=self.data,
+            categorical_columns=['category1', 'category2'],
+            numeric_column='number',
+            color_column='color')
+        assert np.array_equal(
+            ch.data[0]['color_column'], ['e', 'c', 'g', 'd', 'f'])
+
 
 class TestBarNumericSort:
     def setup(self):