forked from paceaux/xlsx-to-json
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxlsx_reader.py
92 lines (82 loc) · 3.4 KB
/
xlsx_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
'''Module that reads an xlsx spreadsheet and can produce json data from it'''
import pylightxl as xl
def get_column_names(sheet):
'''Takes a single worksheet, returns the strings in the top row of each column'''
column_lists = sheet.cols
column_names = []
for column_list in column_lists:
column_names.append(column_list[00])
return column_names
def get_row_data(row, column_names, column_type):
'''takes a single row of a worksheet and an array of rows,
returns an object with column_name:rowvalue
'''
row_data = {}
counter = 0
d_column_type = {"int": int, "float": float, "list": eval, "number": int,
"array": eval, "string": str, "boolean": eval, "bool": eval, 'object': eval}
for cell in row:
column_name = column_names[counter]
if cell == "":
if column_type[counter] == "array" or column_type[counter] == "list":
cell = '[]'
elif column_type[counter] == "number" or column_type[counter] == "int":
cell = '0'
elif column_type[counter] == "float":
cell = '0.0'
elif column_type[counter] == "object":
cell = '{}'
if column_type[counter] == "boolean" or column_type[counter] == "bool":
if cell is False or cell is True:
cell = str(cell)
else:
if cell.lower() == 'true':
cell = 'true'
elif cell.lower() == 'false':
cell = 'False'
else:
raise 'The cell value must be True or False, but actually is ' + str(cell)
try:
row_data[column_name] = d_column_type.get(column_type[counter])(cell)
except:
print('cell=', cell, "\ttype=", column_type[counter], 'cell_type=', type(cell))
counter = counter + 1
return row_data
def get_sheet_data(sheet, column_names):
'''Takes a single worksheet, returns an object with row data'''
max_rows = sheet.size[0]
sheet_data = {}
column_type = sheet.row(2)
for idx in range(4, max_rows):
row = sheet.row(idx)
row_data = get_row_data(row, column_names, column_type)
sheet_data[row_data.get(column_names[0])] = row_data
return sheet_data
def get_workbook_data(workbook):
'''Takes a workbook and returns all worksheet data'''
workbook_sheet_names = workbook.ws_names
sn_no = 1
output_file_suffix = ""
if len(workbook_sheet_names) > 1:
print("当前Excel有多个表格,请选择需要导出的表格")
for sn in workbook_sheet_names:
print(f'{sn_no}.{sn}\t', end='')
sn_no += 1
print(f'{sn_no}.全部')
sheet_num = input("请输入要导出的表格序号:")
print()
workbook_data = {}
if int(sheet_num) == sn_no:
export_sheet = workbook_sheet_names
else:
export_sheet = [workbook_sheet_names[int(sheet_num) - 1]]
output_file_suffix = '_'+export_sheet[0]
for sheet_name in export_sheet:
worksheet = workbook.ws(ws=sheet_name)
column_names = get_column_names(worksheet)
sheet_data = get_sheet_data(worksheet, column_names)
workbook_data[sheet_name.lower().replace(' ', '_')] = sheet_data
return [output_file_suffix, workbook_data]
def get_workbook(filename):
'''opens a workbook for reading'''
return xl.readxl(filename)