Skip to content

Commit

Permalink
ENH: Better handling of MultiIndex with Excel
Browse files Browse the repository at this point in the history
Added merged cell formatting for MultiIndex and Hierarchical Rows.
Issue pandas-dev#5254.
  • Loading branch information
jmcnamara committed Nov 5, 2013
1 parent 4d8cd16 commit ae37d22
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 144 deletions.
2 changes: 1 addition & 1 deletion ci/requirements-2.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ numexpr==2.1
tables==2.3.1
matplotlib==1.1.1
openpyxl==1.6.2
xlsxwriter==0.4.3
xlsxwriter==0.4.6
xlrd==0.9.2
patsy==0.1.0
html5lib==1.0b2
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-2.7_LOCALE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ python-dateutil
pytz==2013b
xlwt==0.7.5
openpyxl==1.6.2
xlsxwriter==0.4.3
xlsxwriter==0.4.6
xlrd==0.9.2
numpy==1.6.1
cython==0.19.1
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-3.2.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
python-dateutil==2.1
pytz==2013b
openpyxl==1.6.2
xlsxwriter==0.4.3
xlsxwriter==0.4.6
xlrd==0.9.2
numpy==1.7.1
cython==0.19.1
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-3.3.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
python-dateutil==2.2
pytz==2013b
openpyxl==1.6.2
xlsxwriter==0.4.3
xlsxwriter==0.4.6
xlrd==0.9.2
html5lib==1.0b2
numpy==1.8.0
Expand Down
5 changes: 5 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ Improvements to existing features
by color as expected.
- ``read_excel()`` now tries to convert integral floats (like ``1.0``) to int
by default. (:issue:`5394`)
- Excel writers now have a default option ``merge_cells`` in ``to_excel()``
to merge cells in MultiIndex and Hierarchical Rows. Note: using this
option it is no longer possible to round trip Excel files with merged
MultiIndex and Hierarchical Rows. Set the ``merge_cells`` to ``False`` to
restore the previous behaviour. (:issue:`5254`)

API Changes
~~~~~~~~~~~
Expand Down
144 changes: 98 additions & 46 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,7 @@ def __init__(self, row, col, val,
"right": "thin",
"bottom": "thin",
"left": "thin"},
"alignment": {"horizontal": "center"}}
"alignment": {"horizontal": "center", "vertical": "top"}}


class ExcelFormatter(object):
Expand All @@ -1237,10 +1237,12 @@ class ExcelFormatter(object):
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.
merge_cells : boolean, default False
Format MultiIndex and Hierarchical Rows as merged cells.
"""

def __init__(self, df, na_rep='', float_format=None, cols=None,
header=True, index=True, index_label=None):
header=True, index=True, index_label=None, merge_cells=False):
self.df = df
self.rowcounter = 0
self.na_rep = na_rep
Expand All @@ -1251,6 +1253,7 @@ def __init__(self, df, na_rep='', float_format=None, cols=None,
self.index = index
self.index_label = index_label
self.header = header
self.merge_cells = merge_cells

def _format_value(self, val):
if lib.checknull(val):
Expand All @@ -1264,29 +1267,44 @@ def _format_header_mi(self):
if not(has_aliases or self.header):
return

levels = self.columns.format(sparsify=True, adjoin=False,
names=False)
# level_lenghts = _get_level_lengths(levels)
coloffset = 1
if isinstance(self.df.index, MultiIndex):
coloffset = len(self.df.index[0])

# for lnum, (records, values) in enumerate(zip(level_lenghts,
# levels)):
# name = self.columns.names[lnum]
# yield ExcelCell(lnum, coloffset, name, header_style)
# for i in records:
# if records[i] > 1:
# yield ExcelCell(lnum,coloffset + i + 1, values[i],
# header_style, lnum, coloffset + i + records[i])
# else:
# yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style)

# self.rowcounter = lnum
columns = self.columns
level_strs = columns.format(sparsify=True, adjoin=False, names=False)
level_lengths = _get_level_lengths(level_strs)
coloffset = 0
lnum = 0
for i, values in enumerate(zip(*levels)):
v = ".".join(map(com.pprint_thing, values))
yield ExcelCell(lnum, coloffset + i, v, header_style)

if isinstance(self.df.index, MultiIndex):
coloffset = len(self.df.index[0]) - 1

if self.merge_cells:
# Format multi-index as a merged cells.
for lnum in range(len(level_lengths)):
name = columns.names[lnum]
yield ExcelCell(lnum, coloffset, name, header_style)

for lnum, (spans, levels, labels) in enumerate(zip(level_lengths,
columns.levels,
columns.labels)
):
values = levels.take(labels)
for i in spans:
if spans[i] > 1:
yield ExcelCell(lnum,
coloffset + i + 1,
values[i],
header_style,
lnum,
coloffset + i + spans[i])
else:
yield ExcelCell(lnum,
coloffset + i + 1,
values[i],
header_style)
else:
# Format in legacy format with dots to indicate levels.
for i, values in enumerate(zip(*level_strs)):
v = ".".join(map(com.pprint_thing, values))
yield ExcelCell(lnum, coloffset + i + 1, v, header_style)

self.rowcounter = lnum

Expand Down Expand Up @@ -1354,14 +1372,17 @@ def _format_regular_rows(self):
index_label = self.df.index.names[0]

if index_label and self.header is not False:
# add to same level as column names
# if isinstance(self.df.columns, MultiIndex):
# yield ExcelCell(self.rowcounter, 0,
# index_label, header_style)
# self.rowcounter += 1
# else:
yield ExcelCell(self.rowcounter - 1, 0,
index_label, header_style)
if self.merge_cells:
yield ExcelCell(self.rowcounter,
0,
index_label,
header_style)
self.rowcounter += 1
else:
yield ExcelCell(self.rowcounter - 1,
0,
index_label,
header_style)

# write index_values
index_values = self.df.index
Expand All @@ -1383,7 +1404,7 @@ def _format_hierarchical_rows(self):
self.rowcounter += 1

gcolidx = 0
# output index and index_label?

if self.index:
index_labels = self.df.index.names
# check for aliases
Expand All @@ -1394,29 +1415,60 @@ def _format_hierarchical_rows(self):
# if index labels are not empty go ahead and dump
if (any(x is not None for x in index_labels)
and self.header is not False):
# if isinstance(self.df.columns, MultiIndex):
# self.rowcounter += 1
# else:
self.rowcounter -= 1

if not self.merge_cells:
self.rowcounter -= 1

for cidx, name in enumerate(index_labels):
yield ExcelCell(self.rowcounter, cidx,
name, header_style)
yield ExcelCell(self.rowcounter,
cidx,
name,
header_style)
self.rowcounter += 1

for indexcolvals in zip(*self.df.index):
for idx, indexcolval in enumerate(indexcolvals):
yield ExcelCell(self.rowcounter + idx, gcolidx,
indexcolval, header_style)
gcolidx += 1
if self.merge_cells:
# Format hierarchical rows as merged cells.
level_strs = self.df.index.format(sparsify=True, adjoin=False,
names=False)
level_lengths = _get_level_lengths(level_strs)

for spans, levels, labels in zip(level_lengths,
self.df.index.levels,
self.df.index.labels):
values = levels.take(labels)
for i in spans:
if spans[i] > 1:
yield ExcelCell(self.rowcounter + i,
gcolidx,
values[i],
header_style,
self.rowcounter + i + spans[i] - 1,
gcolidx)
else:
yield ExcelCell(self.rowcounter + i,
gcolidx,
values[i],
header_style)
gcolidx += 1

else:
# Format hierarchical rows with non-merged values.
for indexcolvals in zip(*self.df.index):
for idx, indexcolval in enumerate(indexcolvals):
yield ExcelCell(self.rowcounter + idx,
gcolidx,
indexcolval,
header_style)
gcolidx += 1

for colidx in range(len(self.columns)):
series = self.df.iloc[:, colidx]
for i, val in enumerate(series):
yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val)

def get_formatted_cells(self):
for cell in itertools.chain(self._format_header(), self._format_body()
):
for cell in itertools.chain(self._format_header(),
self._format_body()):
cell.val = self._format_value(cell.val)
yield cell

Expand Down
10 changes: 7 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,7 +1130,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,

def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, cols=None, header=True, index=True,
index_label=None, startrow=0, startcol=0, engine=None):
index_label=None, startrow=0, startcol=0, engine=None,
merge_cells=True):
"""
Write DataFrame to a excel sheet
Expand Down Expand Up @@ -1161,13 +1162,15 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
write engine to use - you can also set this via the options
``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
``io.excel.xlsm.writer``.
merge_cells : boolean, default True
Write MultiIndex and Hierarchical Rows as merged cells.
Notes
-----
If passing an existing ExcelWriter object, then the sheet will be added
to the existing workbook. This can be used to save different
DataFrames to one workbook
>>> writer = ExcelWriter('output.xlsx')
>>> df1.to_excel(writer,'Sheet1')
>>> df2.to_excel(writer,'Sheet2')
Expand All @@ -1185,7 +1188,8 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
header=header,
float_format=float_format,
index=index,
index_label=index_label)
index_label=index_label,
merge_cells=merge_cells)
formatted_cells = formatter.get_formatted_cells()
excel_writer.write_cells(formatted_cells, sheet_name,
startrow=startrow, startcol=startcol)
Expand Down
Loading

0 comments on commit ae37d22

Please sign in to comment.