Skip to content

Commit

Permalink
添加输出器:csv表格格式
Browse files Browse the repository at this point in the history
  • Loading branch information
hiroi-sora committed Sep 28, 2023
1 parent a49a78d commit ec5ae82
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 1 deletion.
47 changes: 47 additions & 0 deletions UmiOCR-data/pyapp/ocr/output/output_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# 输出到csv表格文件

from .output import Output

import csv


class OutputCsv(Output):
def __init__(self, argd):
self.dir = argd["outputDir"] # 输出路径(文件夹)
self.fileName = argd["outputFileName"] # 文件名
self.fileName = self.fileName.replace(
"%name", argd["outputDirName"]
) # 文件名添加路径名
self.outputPath = f"{self.dir}/{self.fileName}.csv" # 输出路径
self.ingoreBlank = argd["ingoreBlank"] # 忽略空白文件
self.headers = ["Image Name", "OCR", "Image Path"] # 表头
self.lineBreak = "\n" # 换行符
# 创建输出文件
try:
with open(
self.outputPath, "w", encoding="utf-8", newline=""
) as f: # 覆盖创建文件
writer = csv.writer(f)
writer.writerow(self.headers) # 写入CSV表头
except Exception as e:
raise Exception(f"Failed to create csv file. {e}\n创建csv文件失败。")

def print(self, res): # 输出图片结果
if not res["code"] == 100 and self.ingoreBlank:
return # 忽略空白图片
name = res["fileName"]
path = res["path"]
textOut = ""
if res["code"] == 100:
for tb in res["data"]:
if tb["text"]:
textOut += tb["text"] + self.lineBreak
elif res["code"] == 101:
pass
else:
textOut += f'[Error] OCR failed. Code: {res["code"]}, Msg: {res["data"]} \n【异常】OCR识别失败。'

writeList = [name, textOut, path]
with open(self.outputPath, "a", encoding="utf-8", newline="") as f: # 追加写入本地文件
writer = csv.writer(f)
writer.writerow(writeList)
3 changes: 3 additions & 0 deletions UmiOCR-data/pyapp/tag_pages/BatchOCR.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ..ocr.output.output_txt_individual import OutputTxtIndividual
from ..ocr.output.output_md import OutputMD
from ..ocr.output.output_jsonl import OutputJsonl
from ..ocr.output.output_csv import OutputCsv

import os
import time
Expand Down Expand Up @@ -120,6 +121,8 @@ def __initOutputList(self, argd): # 初始化输出器列表,无异常返回T
self.outputList.append(OutputMD(outputArgd))
if argd["mission.filesType.jsonl"]: # jsonl
self.outputList.append(OutputJsonl(outputArgd))
if argd["mission.filesType.csv"]: # csv
self.outputList.append(OutputCsv(outputArgd))
except Exception as e:
self.__onEnd(
None,
Expand Down
7 changes: 6 additions & 1 deletion UmiOCR-data/res/qml/TabPages/BatchOCR/BatchOCRConfigs.qml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Configs {
"datetimeFormat": {
"title": qsTr("日期时间格式"),
"toolTip": qsTr("文件名中 %date 的日期格式。支持插入以下占位符:\n%Y 年、 %m 月、 %d 日、 %H 小时、 \n%M 分钟、 %S 秒 、 %unix 时间戳 \n举例:%Y年%m月%d日_%H-%M\n生成:2023年09月01日_12-13.txt"),
"default": "%Y-%m-%d_%H-%M",
"default": "%Y%m%d_%H%M",
"advanced": true, // 高级选项
},

Expand Down Expand Up @@ -74,6 +74,11 @@ Configs {
"toolTip": qsTr("Markdown图文混排格式,可用Markdown阅读器浏览文件"),
"default": false,
},
"csv": {
"title": qsTr("csv 表格文件(Excel)"),
"toolTip": qsTr("将图片信息和识别内容写入csv表格文件。可用Excel打开,另存为xlsx格式。"),
"default": false,
},
"jsonl": {
"title": qsTr("jsonl 原始信息"),
"toolTip": qsTr("每行为一条json数据,便于第三方程序读取操作"),
Expand Down

0 comments on commit ec5ae82

Please sign in to comment.