Skip to content

Commit

Permalink
1. 修复 slice bounds out of range 错误(感谢 xiaoxinmiao 的代码合并)
Browse files Browse the repository at this point in the history
2. 开放了小语种的识别
3. 修复 lrc 歌词文件生成的格式问题
4. 增加了尝试多次查询语音识别结果的机制,避免由于偶然的网络错误导致语音识别时长的浪费
  • Loading branch information
Viggo-Local committed Jul 21, 2021
1 parent 6b68392 commit f88155e
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 51 deletions.
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

这是一个可以识别视频语音自动生成字幕SRT文件的开源软件工具。<br />适用于快速、批量的为媒体(视频/音频)生成中/英文字幕、文本文件的业务场景。

0.3.2 版本将会使用以下接口:
0.3.3 版本将会使用以下接口:
- 阿里云 [OSS对象存储](https://www.aliyun.com/product/oss?spm=5176.12825654.eofdhaal5.13.e9392c4aGfj5vj&aly_as=K11FcpO8)
- 阿里云 [录音文件识别](https://ai.aliyun.com/nls/filetrans?spm=5176.12061031.1228726.1.47fe3cb43I34mn)
- 百度翻译开放平台 [翻译API](http://api.fanyi.baidu.com/api/trans/product/index)
Expand Down Expand Up @@ -55,8 +55,8 @@ B站Up主自制教程:[https://search.bilibili.com/all?keyword=videosrt](https

<a name="e66a66f1"></a>
##### 下载地址:
- (v0.3.2)(含ffmpeg依赖) [点我下载](http://file.viggo.site/video-srt/0.3.2/video-srt-gui-ffmpeg-0.3.2-x64.zip)
- (v0.3.2)(不含ffmpeg依赖) [点我下载](http://file.viggo.site/video-srt/0.3.2/video-srt-gui-0.3.2-x64.zip)
- (v0.3.3)(含ffmpeg依赖) [点我下载](http://file.viggo.site/video-srt/0.3.3/video-srt-gui-ffmpeg-0.3.3-x64.zip)
- (v0.3.3)(不含ffmpeg依赖) [点我下载](http://file.viggo.site/video-srt/0.3.3/video-srt-gui-0.3.3-x64.zip)
- (v0.2.6)(含ffmpeg依赖) [点我下载](http://file.viggo.site/video-srt/0.2.6/video-srt-gui-ffmpeg-0.2.6-x64.zip)
- (v0.2.6)(不含ffmpeg依赖) [点我下载](http://file.viggo.site/video-srt/0.2.6/video-srt-gui-0.2.6-x64.zip)

Expand All @@ -75,6 +75,16 @@ B站Up主自制教程:[https://search.bilibili.com/all?keyword=videosrt](https
- 然后用旧版本软件的 `data` 文件夹覆盖新版软件的 `data` 文件夹
- 0.2.6 升级至 0.2.9 以上的版本时,由于翻译设置无法直接兼容低版本,可能需要重新在软件创建翻译引擎才能继续使用翻译功能

## 其他
今日,有小伙伴在淘宝、闲鱼上发现一些界面相似的盗版且额外收费的软件。由于`VideoSrt` 是开源软件,所以不排除个别不良用户通过微调软件源代码,然后进行二次售卖甚至传播病毒。

目前已知的有:`大象字幕`

因此,软件的安装包不要在第三方平台渠道下载,尽量在 github/码云/帮助文档 等官方渠道的页面进行下载。

小伙伴们要擦亮眼睛,打击盗版,望周知!


## FAQ
##### 1.为什么Linux和Mac不能用?
因为`VideoSrt`的GUI是使用[lxn/walk](https://github.com/lxn/walk)开发的,仅支持Windows的GUI,如果您想在Linux上使用,可以体验[CLI版本](https://github.com/wxbool/video-srt)
Expand Down
76 changes: 47 additions & 29 deletions app/aliyun/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package aliyun
import (
"encoding/json"
"errors"
"fmt"
"github.com/aliyun/alibaba-cloud-sdk-go/sdk"
"github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
"github.com/aliyun/alibaba-cloud-sdk-go/sdk/responses"
"strconv"
"time"
)
Expand Down Expand Up @@ -66,14 +66,14 @@ const KEY_APP_KEY string = "appkey"
const KEY_FILE_LINK string = "file_link"
const KEY_VERSION string = "version"
const KEY_ENABLE_WORDS string = "enable_words"
//是否启用统一后处理,默认值为 false
const ENABLE_UNIFY_POST string = "enable_unify_post"
//是否打开ITN,中文数字将转为阿拉伯数字输出,默认值为 false
//开启时需要设置version为”4.0”, enable_unify_post 必须为 true
const ENABLE_INVERSE_TEXT_NORMALIZATION string = "enable_inverse_text_normalization"
//如需启用后处理模型,默认值为 chinese,开启时需要设置version为”4.0”,
//enable_unify_post 必须为 true,可选值为 english
const UNIFY_POST_MODEL_NAME string = "unify_post_model_name"

//是否打开ITN,中文数字将转为阿拉伯数字输出,默认值为false
const KEY_ENABLE_INVERSE_TEXT_NORMAL = "enable_inverse_text_normalization"
//是否启⽤语义断句,取值:true/false,默认值false
const KEY_ENABLE_SEMANTIC_SENTENCE_DETECTION = "enable_semantic_sentence_detection"
//是否启用时间戳校准功能,取值:true/false,默认值false
const KEY_ENABLE_TIMESTAMP_ALIGNMENT = "enable_timestamp_alignment"

// 响应参数key
const KEY_TASK string = "Task"
const KEY_TASK_ID string = "TaskId"
Expand All @@ -90,8 +90,6 @@ const STATUS_QUEUEING string = "QUEUEING"
func (c AliyunClound) NewAudioFile(fileLink string) (string , *sdk.Client , error) {
regionId, domain, apiVersion, product := c.GetApiVariable()

fmt.Println( regionId, domain, apiVersion, product , c.Region )

client, err := sdk.NewClientWithAccessKey(regionId, c.AccessKeyId, c.AccessKeySecret)
if err != nil {
return "" , client , err
Expand All @@ -114,10 +112,10 @@ func (c AliyunClound) NewAudioFile(fileLink string) (string , *sdk.Client , erro
// 设置是否输出词信息,默认为false,开启时需要设置version为4.0
mapTask[KEY_ENABLE_WORDS] = "true"

//启用统一后处理
//mapTask[ENABLE_UNIFY_POST] = "true"
//mapTask[ENABLE_INVERSE_TEXT_NORMALIZATION] = "true"
//mapTask[UNIFY_POST_MODEL_NAME] = "chinese"
//统一后处理
mapTask[KEY_ENABLE_INVERSE_TEXT_NORMAL] = "true"
mapTask[KEY_ENABLE_SEMANTIC_SENTENCE_DETECTION] = "true"
mapTask[KEY_ENABLE_TIMESTAMP_ALIGNMENT] = "true"

// to json
task, err := json.Marshal(mapTask)
Expand Down Expand Up @@ -158,7 +156,7 @@ func (c AliyunClound) NewAudioFile(fileLink string) (string , *sdk.Client , erro

//获取录音文件识别结果
//接口文档 https://help.aliyun.com/document_detail/90727.html?spm=a2c4g.11186623.6.581.691af6ebYsUkd1
func (c AliyunClound) GetAudioFileResult(taskId string , client *sdk.Client , callback func(result []byte)) error {
func (c AliyunClound) GetAudioFileResult(taskId string , client *sdk.Client , logOutput func(text string) , callback func(result []byte)) (err error) {
_, domain, apiVersion, product := c.GetApiVariable()

getRequest := requests.NewCommonRequest()
Expand All @@ -170,27 +168,44 @@ func (c AliyunClound) GetAudioFileResult(taskId string , client *sdk.Client , ca
getRequest.QueryParams[KEY_TASK_ID] = taskId
statusText := ""


var (
trys = 0
getResponse *responses.CommonResponse
getResponseContent string
)

//遍历获取识别结果
for true {
getResponse, err := client.ProcessCommonRequest(getRequest)
for trys < 10 {

if trys != 0 {
logOutput("尝试重新查询识别结果,第" + strconv.Itoa(trys) + "次")
}

getResponse, err = client.ProcessCommonRequest(getRequest)
if err != nil {
return err
logOutput("查询识别结果失败:" + err.Error())
trys++
time.Sleep(time.Second * time.Duration(trys))
continue
}
getResponseContent := getResponse.GetHttpContentString()

getResponseContent = getResponse.GetHttpContentString()
if (getResponse.GetHttpStatus() != 200) {
return errors.New("识别结果查询请求失败 , Http错误码 : " + strconv.Itoa(getResponse.GetHttpStatus()))
logOutput("查询识别结果失败,Http错误码:" + strconv.Itoa(getResponse.GetHttpStatus()))
trys++
time.Sleep(time.Second * time.Duration(trys))
continue
}

var getMapResult map[string]interface{}
err = json.Unmarshal([]byte(getResponseContent), &getMapResult)
if err != nil {
return err
trys++
logOutput("查询识别结果失败,解析结果失败:" + err.Error())
continue
}

//调用回调函数
callback(getResponse.GetHttpContentBytes())

//校验遍历条件
statusText = getMapResult[KEY_STATUS_TEXT].(string)
if statusText == STATUS_RUNNING || statusText == STATUS_QUEUEING {
Expand All @@ -200,11 +215,14 @@ func (c AliyunClound) GetAudioFileResult(taskId string , client *sdk.Client , ca
}
}

if statusText != STATUS_SUCCESS {
return errors.New("录音文件识别失败 , (" + c.GetErrorStatusTextMessage(statusText) + ")")
if statusText == STATUS_SUCCESS && getResponse != nil {
//调用回调函数
callback(getResponse.GetHttpContentBytes())
} else {
err = errors.New("录音文件识别失败 , (" + c.GetErrorStatusTextMessage(statusText) + ")")
return
}

return nil
return
}


Expand Down
1 change: 0 additions & 1 deletion app/ffmpeg/ffmpeg.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ func VailFfmpegLibrary() error {
//校验ffmpeg并加入临时环境遍历
func VailTempFfmpegLibrary(rootDir string) {
ffmpegDir := tool.WinDir(rootDir + "/ffmpeg")
//fmt.Println("VailTempFfmpegLibrary " , ffmpegDir)

if tool.DirExists(ffmpegDir) {
//临时加入用户环境变量
Expand Down
2 changes: 1 addition & 1 deletion app/tool/tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func MusicLrcTextMillisecond(time int64) string {
var minText = RepeatStr(strconv.FormatInt(min , 10) , "0" , 2 , true)
var millisecondText = RepeatStr(strconv.FormatInt(millisecond , 10) , "0" , 2 , true)

return minText + ":" + miaoText + ":" + millisecondText
return minText + ":" + miaoText + "." + millisecondText
}


Expand Down
11 changes: 6 additions & 5 deletions app/video.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ func (app *VideoSrt) Run(video string) {
}

//阿里云录音文件识别
AudioResult , IntelligentBlockResult := AliyunAudioRecognition(app , app.AliyunClound, filelink)
AudioResult , IntelligentBlockResult := AliyunAudioRecognition(app , video , app.AliyunClound, filelink)

app.Log("文件识别成功 , 字幕处理中 ..." , video)

Expand Down Expand Up @@ -398,7 +398,7 @@ func CheckEmptyResult(AudioResult map[int64][] *aliyun.AliyunAudioRecognitionRes


//阿里云录音文件识别
func AliyunAudioRecognition(app *VideoSrt , engine aliyun.AliyunClound , filelink string) (AudioResult map[int64][] *aliyun.AliyunAudioRecognitionResult , IntelligentBlockResult map[int64][] *aliyun.AliyunAudioRecognitionResult) {
func AliyunAudioRecognition(app *VideoSrt , video string , engine aliyun.AliyunClound , filelink string) (AudioResult map[int64][] *aliyun.AliyunAudioRecognitionResult , IntelligentBlockResult map[int64][] *aliyun.AliyunAudioRecognitionResult) {
//创建识别请求
taskid, client, e := engine.NewAudioFile(filelink)
if e != nil {
Expand All @@ -409,9 +409,10 @@ func AliyunAudioRecognition(app *VideoSrt , engine aliyun.AliyunClound , filelin
IntelligentBlockResult = make(map[int64][] *aliyun.AliyunAudioRecognitionResult)

//遍历获取识别结果
resultError := engine.GetAudioFileResult(taskid , client , func(result []byte) {
//mylog.WriteLog(string(result))

resultError := engine.GetAudioFileResult(taskid , client , func(text string) {
//日志输出
app.Log(text , video)
} , func(result []byte) {
//结果处理
statusText, _ := jsonparser.GetString(result, "StatusText") //结果状态

Expand Down
19 changes: 7 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
)

//应用版本号
const APP_VERSION = "0.3.2"
const APP_VERSION = "0.3.3"

var AppRootDir string
var mw *MyMainWindow
Expand Down Expand Up @@ -310,6 +310,12 @@ func main() {
_ = tool.OpenUrl("https://www.mu-yan.net/")
},
},
Menu{
Text: "实时字幕小工具",
OnTriggered: func() {
_ = tool.OpenUrl("https://gitee.com/641453620/livecaption")
},
},
},
},
Size: Size{800, 650},
Expand Down Expand Up @@ -769,17 +775,6 @@ func main() {
mw.NewErrormationTips("错误" , "请先设置Oss对象配置")
return
}
//校验输入语言
if tempAppSetting.InputLanguage != LANGUAGE_ZH &&
tempAppSetting.InputLanguage != LANGUAGE_EN &&
tempAppSetting.InputLanguage != LANGUAGE_JP &&
tempAppSetting.InputLanguage != LANGUAGE_KOR &&
tempAppSetting.InputLanguage != LANGUAGE_RU &&
tempAppSetting.InputLanguage != LANGUAGE_SPA {
mw.NewErrormationTips("错误" , "由于语音提供商的限制,生成字幕仅支持中文、英文、日语、韩语、俄语、西班牙语")
return
}

//查询选择的语音引擎
if tempAppSetting.CurrentEngineId == 0 {
mw.NewErrormationTips("错误" , "请先新建/选择语音引擎")
Expand Down

0 comments on commit f88155e

Please sign in to comment.