Skip to content

Commit

Permalink
Merge pull request pandas-dev#4150 from hayd/melt_multi
Browse files Browse the repository at this point in the history
ENH: Melt with MultiIndex columns
  • Loading branch information
hayd committed Jul 12, 2013
2 parents 56009bd + 92fdeff commit f4246fb
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 33 deletions.
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ pandas 0.12
to specify custom column names of the returned DataFrame (:issue:`3649`),
thanks @hoechenberger. If ``var_name`` is not specified and ``dataframe.columns.name``
is not None, then this will be used as the ``var_name`` (:issue:`4144`).
Also support for MultiIndex columns.
- clipboard functions use pyperclip (no dependencies on Windows, alternative
dependencies offered for Linux) (:issue:`3837`).
- Plotting functions now raise a ``TypeError`` before trying to plot anything
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1653,7 +1653,9 @@ def get_level_values(self, level):
num = self._get_level_number(level)
unique_vals = self.levels[num] # .values
labels = self.labels[num]
return unique_vals.take(labels)
values = unique_vals.take(labels)
values.name = self.names[num]
return values

def format(self, space=2, sparsify=None, adjoin=True, names=False,
na_rep='NaN', formatter=None):
Expand Down
108 changes: 76 additions & 32 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,20 @@

import numpy as np

import six

from pandas.core.series import Series
from pandas.core.frame import DataFrame

from pandas.core.categorical import Categorical
from pandas.core.common import (notnull, _ensure_platform_int, _maybe_promote,
_maybe_upcast, isnull)
isnull)
from pandas.core.groupby import (get_group_index, _compress_group_index,
decons_group_index)
import pandas.core.common as com
import pandas.algos as algos
from pandas import lib

from pandas.core.index import MultiIndex, Index
from pandas.core.index import MultiIndex


class ReshapeError(Exception):
Expand All @@ -35,21 +36,26 @@ class _Unstacker(object):
Examples
--------
>>> import pandas as pd
>>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
... ('two', 'a'), ('two', 'b')])
>>> s = pd.Series(np.arange(1.0, 5.0), index=index)
>>> s
one a 1.
one b 2.
two a 3.
two b 4.
one a 1
b 2
two a 3
b 4
dtype: float64
>>> s.unstack(level=-1)
a b
one 1. 2.
two 3. 4.
one 1 2
two 3 4
>>> s.unstack(level=0)
one two
a 1. 2.
b 3. 4.
a 1 2
b 3 4
Returns
-------
Expand Down Expand Up @@ -159,7 +165,7 @@ def get_result(self):
values[j] = orig_values[i]
else:
index = index.take(self.unique_groups)

return DataFrame(values, index=index, columns=columns)

def get_new_values(self):
Expand Down Expand Up @@ -601,7 +607,7 @@ def _stack_multi_columns(frame, level=-1, dropna=True):


def melt(frame, id_vars=None, value_vars=None,
var_name=None, value_name='value'):
var_name=None, value_name='value', col_level=None):
"""
"Unpivots" a DataFrame from wide format to long format, optionally leaving
id variables set
Expand All @@ -613,27 +619,47 @@ def melt(frame, id_vars=None, value_vars=None,
value_vars : tuple, list, or ndarray
var_name : scalar, if None uses frame.column.name or 'variable'
value_name : scalar, default 'value'
col_level : scalar, if columns are a MultiIndex then use this level to melt
Examples
--------
>>> import pandas as pd
>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
... 'B': {0: 1, 1: 3, 2: 5},
... 'C': {0: 2, 1: 4, 2: 6}})
>>> df
A B C
a 1 2
b 3 4
c 5 6
A B C
0 a 1 2
1 b 3 4
2 c 5 6
>>> melt(df, id_vars=['A'], value_vars=['B'])
A variable value
a B 1
b B 3
c B 5
A variable value
0 a B 1
1 b B 3
2 c B 5
>>> melt(df, id_vars=['A'], value_vars=['B'],
... var_name='myVarname', value_name='myValname')
A myVarname myValname
a B 1
b B 3
c B 5
A myVarname myValname
0 a B 1
1 b B 3
2 c B 5
>>> df.columns = [list('ABC'), list('DEF')]
>>> melt(df, col_level=0, id_vars=['A'], value_vars=['B'])
A variable value
0 a B 1
1 b B 3
2 c B 5
>>> melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')])
(A, D) variable_0 variable_1 value
0 a B E 1
1 b B E 3
2 c B E 5
"""
# TODO: what about the existing index?
Expand All @@ -652,8 +678,21 @@ def melt(frame, id_vars=None, value_vars=None,
else:
frame = frame.copy()

if col_level is not None: # allow list or other?
frame.columns = frame.columns.get_level_values(col_level) # frame is a copy

if var_name is None:
var_name = frame.columns.name if frame.columns.name is not None else 'variable'
if isinstance(frame.columns, MultiIndex):
if len(frame.columns.names) == len(set(frame.columns.names)):
var_name = frame.columns.names
else:
var_name = ['variable_%s' % i for i in
xrange(len(frame.columns.names))]
else:
var_name = [frame.columns.name if frame.columns.name is not None
else 'variable']
if isinstance(var_name, six.string_types):
var_name = [var_name]

N, K = frame.shape
K -= len(id_vars)
Expand All @@ -662,11 +701,13 @@ def melt(frame, id_vars=None, value_vars=None,
for col in id_vars:
mdata[col] = np.tile(frame.pop(col).values, K)

mcolumns = id_vars + [var_name, value_name]
mcolumns = id_vars + var_name + [value_name]

mdata[value_name] = frame.values.ravel('F')
mdata[var_name] = np.asarray(frame.columns).repeat(N)

for i, col in enumerate(var_name):
# asanyarray will keep the columns as an Index
mdata[col] = np.asanyarray(frame.columns.get_level_values(i)).repeat(N)

return DataFrame(mdata, columns=mcolumns)


Expand All @@ -683,13 +724,16 @@ def lreshape(data, groups, dropna=True, label=None):
Examples
--------
>>> import pandas as pd
>>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
... 'team': ['Red Sox', 'Yankees'],
... 'year1': [2007, 2008], 'year2': [2008, 2008]})
>>> data
hr1 hr2 team year1 year2
0 514 545 Red Sox 2007 2008
1 573 526 Yankees 2007 2008
>>> pd.lreshape(data, {'year': ['year1', 'year2'],
'hr': ['hr1', 'hr2']})
>>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
team hr year
0 Red Sox 514 2007
1 Yankees 573 2007
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,8 @@ def test_get_level_values(self):
expected = ['foo', 'foo', 'bar', 'baz', 'qux', 'qux']
self.assert_(np.array_equal(result, expected))

self.assertEquals(result.name, 'first')

result = self.index.get_level_values('first')
expected = self.index.get_level_values(0)
self.assert_(np.array_equal(result, expected))
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import nose

from pandas import DataFrame
import pandas as pd

from numpy import nan
import numpy as np
Expand All @@ -30,6 +31,12 @@ def setUp(self):
self.var_name = 'var'
self.value_name = 'val'

self.df1 = pd.DataFrame([[ 1.067683, -1.110463, 0.20867 ],
[-1.321405, 0.368915, -1.055342],
[-0.807333, 0.08298 , -0.873361]])
self.df1.columns = [list('ABC'), list('abc')]
self.df1.columns.names = ['CAP', 'low']

def test_default_col_names(self):
result = melt(self.df)
self.assertEqual(result.columns.tolist(), ['variable', 'value'])
Expand Down Expand Up @@ -128,6 +135,17 @@ def test_custom_var_and_value_name(self):
result20 = melt(self.df)
self.assertEqual(result20.columns.tolist(), ['foo', 'value'])

def test_col_level(self):
res1 = melt(self.df1, col_level=0)
res2 = melt(self.df1, col_level='CAP')
self.assertEqual(res1.columns.tolist(), ['CAP', 'value'])
self.assertEqual(res1.columns.tolist(), ['CAP', 'value'])

def test_multiindex(self):
res = pd.melt(self.df1)
self.assertEqual(res.columns.tolist(), ['CAP', 'low', 'value'])


class TestConvertDummies(unittest.TestCase):
def test_convert_dummies(self):
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
Expand Down

0 comments on commit f4246fb

Please sign in to comment.