1. 修复 slice bounds out of range 错误（感谢 xiaoxinmiao 的代码合并）

2. 开放了小语种的识别 3. 修复 lrc 歌词文件生成的格式问题 4. 增加了尝试多次查询语音识别结果的机制，避免由于偶然的网络错误导致语音识别时长的浪费
tv0001 · Jul 21, 2021 · f88155e · f88155e
1 parent 6b68392
commit f88155e
Show file tree

Hide file tree

Showing 6 changed files with 74 additions and 51 deletions.
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@
 
 这是一个可以识别视频语音自动生成字幕SRT文件的开源软件工具。<br />适用于快速、批量的为媒体（视频/音频）生成中/英文字幕、文本文件的业务场景。
 
-0.3.2 版本将会使用以下接口：
+0.3.3 版本将会使用以下接口：
 - 阿里云 [OSS对象存储](https://www.aliyun.com/product/oss?spm=5176.12825654.eofdhaal5.13.e9392c4aGfj5vj&aly_as=K11FcpO8)
 - 阿里云 [录音文件识别](https://ai.aliyun.com/nls/filetrans?spm=5176.12061031.1228726.1.47fe3cb43I34mn) 
 - 百度翻译开放平台 [翻译API](http://api.fanyi.baidu.com/api/trans/product/index) 
@@ -55,8 +55,8 @@ B站Up主自制教程：[https://search.bilibili.com/all?keyword=videosrt](https
 
 <a name="e66a66f1"></a>
 ##### 下载地址:
-- (v0.3.2)（含ffmpeg依赖） [点我下载](http://file.viggo.site/video-srt/0.3.2/video-srt-gui-ffmpeg-0.3.2-x64.zip)
-- (v0.3.2)（不含ffmpeg依赖） [点我下载](http://file.viggo.site/video-srt/0.3.2/video-srt-gui-0.3.2-x64.zip)
+- (v0.3.3)（含ffmpeg依赖） [点我下载](http://file.viggo.site/video-srt/0.3.3/video-srt-gui-ffmpeg-0.3.3-x64.zip)
+- (v0.3.3)（不含ffmpeg依赖） [点我下载](http://file.viggo.site/video-srt/0.3.3/video-srt-gui-0.3.3-x64.zip)
 - (v0.2.6)（含ffmpeg依赖） [点我下载](http://file.viggo.site/video-srt/0.2.6/video-srt-gui-ffmpeg-0.2.6-x64.zip)
 - (v0.2.6)（不含ffmpeg依赖） [点我下载](http://file.viggo.site/video-srt/0.2.6/video-srt-gui-0.2.6-x64.zip)
 
@@ -75,6 +75,16 @@ B站Up主自制教程：[https://search.bilibili.com/all?keyword=videosrt](https
 - 然后用旧版本软件的 `data` 文件夹覆盖新版软件的 `data` 文件夹
 - 0.2.6 升级至 0.2.9 以上的版本时，由于翻译设置无法直接兼容低版本，可能需要重新在软件创建翻译引擎才能继续使用翻译功能
 
+## 其他
+今日，有小伙伴在淘宝、闲鱼上发现一些界面相似的盗版且额外收费的软件。由于`VideoSrt` 是开源软件，所以不排除个别不良用户通过微调软件源代码，然后进行二次售卖甚至传播病毒。 
+
+目前已知的有：`大象字幕`
+
+因此，软件的安装包不要在第三方平台渠道下载，尽量在 github/码云/帮助文档 等官方渠道的页面进行下载。
+
+小伙伴们要擦亮眼睛，打击盗版，望周知！
+
+
 ## FAQ
 ##### 1.为什么Linux和Mac不能用？
 因为`VideoSrt`的GUI是使用[lxn/walk](https://github.com/lxn/walk)开发的，仅支持Windows的GUI，如果您想在Linux上使用，可以体验[CLI版本](https://github.com/wxbool/video-srt)

diff --git a/app/aliyun/engine.go b/app/aliyun/engine.go
@@ -3,9 +3,9 @@ package aliyun
 import (
 	"encoding/json"
 	"errors"
-	"fmt"
 	"github.com/aliyun/alibaba-cloud-sdk-go/sdk"
 	"github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
+	"github.com/aliyun/alibaba-cloud-sdk-go/sdk/responses"
 	"strconv"
 	"time"
 )
@@ -66,14 +66,14 @@ const KEY_APP_KEY string = "appkey"
 const KEY_FILE_LINK string = "file_link"
 const KEY_VERSION string = "version"
 const KEY_ENABLE_WORDS string = "enable_words"
-//是否启用统一后处理，默认值为 false
-const ENABLE_UNIFY_POST string = "enable_unify_post"
-//是否打开ITN，中文数字将转为阿拉伯数字输出，默认值为 false
-//开启时需要设置version为”4.0”， enable_unify_post 必须为 true
-const ENABLE_INVERSE_TEXT_NORMALIZATION string = "enable_inverse_text_normalization"
-//如需启用后处理模型，默认值为 chinese，开启时需要设置version为”4.0”，
-//enable_unify_post 必须为 true，可选值为 english
-const UNIFY_POST_MODEL_NAME string = "unify_post_model_name"
+
+//是否打开ITN，中文数字将转为阿拉伯数字输出，默认值为false
+const KEY_ENABLE_INVERSE_TEXT_NORMAL = "enable_inverse_text_normalization"
+//是否启⽤语义断句，取值：true/false，默认值false
+const KEY_ENABLE_SEMANTIC_SENTENCE_DETECTION = "enable_semantic_sentence_detection"
+//是否启用时间戳校准功能，取值：true/false，默认值false
+const KEY_ENABLE_TIMESTAMP_ALIGNMENT = "enable_timestamp_alignment"
+
 // 响应参数key
 const KEY_TASK string = "Task"
 const KEY_TASK_ID string = "TaskId"
@@ -90,8 +90,6 @@ const STATUS_QUEUEING string = "QUEUEING"
 func (c AliyunClound) NewAudioFile(fileLink string) (string , *sdk.Client , error) {
 	regionId, domain, apiVersion, product := c.GetApiVariable()
 
-	fmt.Println( regionId, domain, apiVersion, product , c.Region )
-
 	client, err := sdk.NewClientWithAccessKey(regionId, c.AccessKeyId, c.AccessKeySecret)
 	if err != nil {
 		return "" , client , err
@@ -114,10 +112,10 @@ func (c AliyunClound) NewAudioFile(fileLink string) (string , *sdk.Client , erro
 	// 设置是否输出词信息，默认为false，开启时需要设置version为4.0
 	mapTask[KEY_ENABLE_WORDS] = "true"
 
-	//启用统一后处理
-	//mapTask[ENABLE_UNIFY_POST] = "true"
-	//mapTask[ENABLE_INVERSE_TEXT_NORMALIZATION] = "true"
-	//mapTask[UNIFY_POST_MODEL_NAME] = "chinese"
+	//统一后处理
+	mapTask[KEY_ENABLE_INVERSE_TEXT_NORMAL] = "true"
+	mapTask[KEY_ENABLE_SEMANTIC_SENTENCE_DETECTION] = "true"
+	mapTask[KEY_ENABLE_TIMESTAMP_ALIGNMENT] = "true"
 
 	// to json
 	task, err := json.Marshal(mapTask)
@@ -158,7 +156,7 @@ func (c AliyunClound) NewAudioFile(fileLink string) (string , *sdk.Client , erro
 
 //获取录音文件识别结果
 //接口文档 https://help.aliyun.com/document_detail/90727.html?spm=a2c4g.11186623.6.581.691af6ebYsUkd1
-func (c AliyunClound) GetAudioFileResult(taskId string , client *sdk.Client , callback func(result []byte)) error {
+func (c AliyunClound) GetAudioFileResult(taskId string , client *sdk.Client , logOutput func(text string) , callback func(result []byte)) (err error) {
 	_, domain, apiVersion, product := c.GetApiVariable()
 
 	getRequest := requests.NewCommonRequest()
@@ -170,27 +168,44 @@ func (c AliyunClound) GetAudioFileResult(taskId string , client *sdk.Client , ca
 	getRequest.QueryParams[KEY_TASK_ID] = taskId
 	statusText := ""
 
+
+	var (
+		trys = 0
+		getResponse *responses.CommonResponse
+		getResponseContent string
+	)
+
 	//遍历获取识别结果
-	for true {
-		getResponse, err := client.ProcessCommonRequest(getRequest)
+	for trys < 10 {
+
+		if trys != 0 {
+			logOutput("尝试重新查询识别结果，第" + strconv.Itoa(trys) + "次")
+		}
+
+		getResponse, err = client.ProcessCommonRequest(getRequest)
 		if err != nil {
-			return err
+			logOutput("查询识别结果失败：" + err.Error())
+			trys++
+			time.Sleep(time.Second * time.Duration(trys))
+			continue
 		}
-		getResponseContent := getResponse.GetHttpContentString()
 
+		getResponseContent = getResponse.GetHttpContentString()
 		if (getResponse.GetHttpStatus() != 200) {
-			return errors.New("识别结果查询请求失败 , Http错误码 : " + strconv.Itoa(getResponse.GetHttpStatus()))
+			logOutput("查询识别结果失败，Http错误码：" + strconv.Itoa(getResponse.GetHttpStatus()))
+			trys++
+			time.Sleep(time.Second * time.Duration(trys))
+			continue
 		}
 
 		var getMapResult map[string]interface{}
 		err = json.Unmarshal([]byte(getResponseContent), &getMapResult)
 		if err != nil {
-			return err
+			trys++
+			logOutput("查询识别结果失败，解析结果失败：" + err.Error())
+			continue
 		}
 
-		//调用回调函数
-		callback(getResponse.GetHttpContentBytes())
-
 		//校验遍历条件
 		statusText = getMapResult[KEY_STATUS_TEXT].(string)
 		if statusText == STATUS_RUNNING || statusText == STATUS_QUEUEING {
@@ -200,11 +215,14 @@ func (c AliyunClound) GetAudioFileResult(taskId string , client *sdk.Client , ca
 		}
 	}
 
-	if statusText != STATUS_SUCCESS {
-		return errors.New("录音文件识别失败 , (" + c.GetErrorStatusTextMessage(statusText) + ")")
+	if statusText == STATUS_SUCCESS && getResponse != nil {
+		//调用回调函数
+		callback(getResponse.GetHttpContentBytes())
+	} else {
+		err = errors.New("录音文件识别失败 , (" + c.GetErrorStatusTextMessage(statusText) + ")")
+		return
 	}
-
-	return nil
+	return
 }
 
 

diff --git a/app/ffmpeg/ffmpeg.go b/app/ffmpeg/ffmpeg.go
@@ -44,7 +44,6 @@ func VailFfmpegLibrary() error {
 //校验ffmpeg并加入临时环境遍历
 func VailTempFfmpegLibrary(rootDir string)  {
 	ffmpegDir := tool.WinDir(rootDir + "/ffmpeg")
-	//fmt.Println("VailTempFfmpegLibrary " , ffmpegDir)
 
 	if tool.DirExists(ffmpegDir) {
 		//临时加入用户环境变量

diff --git a/app/tool/tool.go b/app/tool/tool.go
@@ -167,7 +167,7 @@ func MusicLrcTextMillisecond(time int64) string {
 	var minText = RepeatStr(strconv.FormatInt(min , 10) , "0" , 2 , true)
 	var millisecondText = RepeatStr(strconv.FormatInt(millisecond , 10) , "0" , 2 , true)
 
-	return minText + ":" + miaoText + ":" + millisecondText
+	return minText + ":" + miaoText + "." + millisecondText
 }
 
 

diff --git a/app/video.go b/app/video.go
@@ -234,7 +234,7 @@ func (app *VideoSrt) Run(video string) {
 	}
 
 	//阿里云录音文件识别
-	AudioResult , IntelligentBlockResult := AliyunAudioRecognition(app , app.AliyunClound, filelink)
+	AudioResult , IntelligentBlockResult := AliyunAudioRecognition(app , video , app.AliyunClound, filelink)
 
 	app.Log("文件识别成功 , 字幕处理中 ..." , video)
 
@@ -398,7 +398,7 @@ func CheckEmptyResult(AudioResult map[int64][] *aliyun.AliyunAudioRecognitionRes
 
 
 //阿里云录音文件识别
-func AliyunAudioRecognition(app *VideoSrt , engine aliyun.AliyunClound , filelink string) (AudioResult map[int64][] *aliyun.AliyunAudioRecognitionResult , IntelligentBlockResult map[int64][] *aliyun.AliyunAudioRecognitionResult) {
+func AliyunAudioRecognition(app *VideoSrt , video string , engine aliyun.AliyunClound , filelink string) (AudioResult map[int64][] *aliyun.AliyunAudioRecognitionResult , IntelligentBlockResult map[int64][] *aliyun.AliyunAudioRecognitionResult) {
 	//创建识别请求
 	taskid, client, e := engine.NewAudioFile(filelink)
 	if e != nil {
@@ -409,9 +409,10 @@ func AliyunAudioRecognition(app *VideoSrt , engine aliyun.AliyunClound , filelin
 	IntelligentBlockResult = make(map[int64][] *aliyun.AliyunAudioRecognitionResult)
 
 	//遍历获取识别结果
-	resultError := engine.GetAudioFileResult(taskid , client , func(result []byte) {
-		//mylog.WriteLog(string(result))
-
+	resultError := engine.GetAudioFileResult(taskid , client , func(text string) {
+		//日志输出
+		app.Log(text , video)
+	} , func(result []byte) {
 		//结果处理
 		statusText, _ := jsonparser.GetString(result, "StatusText") //结果状态
 

diff --git a/main.go b/main.go
@@ -13,7 +13,7 @@ import (
 )
 
 //应用版本号
-const APP_VERSION = "0.3.2"
+const APP_VERSION = "0.3.3"
 
 var AppRootDir string
 var mw *MyMainWindow
@@ -310,6 +310,12 @@ func main() {
 						_ = tool.OpenUrl("https://www.mu-yan.net/")
 					},
 				},
+				Menu{
+					Text:  "实时字幕小工具",
+					OnTriggered: func() {
+						_ = tool.OpenUrl("https://gitee.com/641453620/livecaption")
+					},
+				},
 			},
 		},
 		Size: Size{800, 650},
@@ -769,17 +775,6 @@ func main() {
 								mw.NewErrormationTips("错误" , "请先设置Oss对象配置")
 								return
 							}
-							//校验输入语言
-							if tempAppSetting.InputLanguage != LANGUAGE_ZH &&
-								tempAppSetting.InputLanguage != LANGUAGE_EN &&
-								tempAppSetting.InputLanguage != LANGUAGE_JP &&
-								tempAppSetting.InputLanguage != LANGUAGE_KOR &&
-								tempAppSetting.InputLanguage != LANGUAGE_RU &&
-								tempAppSetting.InputLanguage != LANGUAGE_SPA {
-								mw.NewErrormationTips("错误" , "由于语音提供商的限制，生成字幕仅支持中文、英文、日语、韩语、俄语、西班牙语")
-								return
-							}
-
 							//查询选择的语音引擎
 							if tempAppSetting.CurrentEngineId == 0 {
 								mw.NewErrormationTips("错误" , "请先新建/选择语音引擎")