diff --git a/models/aliyun_ftasr_api_zh/SBI b/models/aliyun_ftasr_api_zh/SBI index 0bdb982..cf6f19d 100755 --- a/models/aliyun_ftasr_api_zh/SBI +++ b/models/aliyun_ftasr_api_zh/SBI @@ -8,7 +8,9 @@ fi scp=$1 dir=$2 oss_scp=${dir}/wav_oss.scp +oss_loss_scp=${dir}/wav_oss_loss.scp part_nums=10 +loss_base_nums=10 test_time=$(date +"%Y-%m-%d-%H-%M-%S") echo "Test starts at "${test_time} @@ -16,8 +18,8 @@ echo "Test starts at "${test_time} # 可通过命令(wget https://gosspublic.alicdn.com/ossutil/1.7.13/ossutil64) 获取相同版本ossutil64(版本1.7.13) 进行md5sum一致性比较 # compare ossutil64 md5sum md5sum ossutil64 > ${dir}/ossutil64.md5.compare -diff -r ${dir}/ossutil64.md5.compare ./ossutil64.md5.txt -if [ $? -ne 0 ];then +diff -r ${dir}/ossutil64.md5.compare ./ossutil64.md5.txt > ${dir}/ossutil64.md5.compare.diff +if [ -s ${dir}/ossutil64.md5.compare.diff ];then echo "md5sum of ossutil64 is different! Exit!" exit 1 else @@ -26,7 +28,17 @@ fi # upload wav to oss -./upload_wav.py ${scp} ${oss_scp} ${dir} ${test_time} +./upload_wav.py ${scp} ${dir} speechiotest-${test_time} + +# check oss wav +./check_wav.py speechiotest-${test_time} ${scp} ${oss_loss_scp} ${oss_scp} +loss_wav_num=$(cat $oss_loss_scp | wc -l) +if [ ${loss_wav_num} -gt ${loss_base_nums} ];then + echo "Not found some audio in oss bucket! Please check" ${oss_loss_scp} + exit 1 +else + echo "Upload audio successfully. Continue..." +fi # split test set awk -vf=${part_nums} -vl="`wc -l $oss_scp`" 'BEGIN{p=int(l/f);q=(l%f);for(n=1;n<=f;n++)a[n]=n*p+((n<=q)?++x:x)}{if(NR>a[i])i++;print > "'$oss_scp'."i}' $oss_scp @@ -44,4 +56,4 @@ rm -rf ${dir}/raw_rec.txt.* rm -rf ${oss_scp}.* # delete oss bucket -./del_oss_bucket.py ${dir} ${test_time} +./del_oss_bucket.py ${dir} speechiotest-${test_time} diff --git a/models/aliyun_ftasr_api_zh/check_wav.py b/models/aliyun_ftasr_api_zh/check_wav.py new file mode 100755 index 0000000..e974208 --- /dev/null +++ b/models/aliyun_ftasr_api_zh/check_wav.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +''' +录音文件识别 接口说明 +https://help.aliyun.com/document_detail/90727.html +通过ossutil命令获取文件URL +egs: 为目标存储空间examplebucket下的文件exampleobject.png生成文件URL,并指定超时时间为3600秒。 + ./ossutil64 sign oss://examplebucket/exampleobject.png --timeout 3600 +''' +import sys +import os +import time +import codecs +import oss2 + + +access_key_id = "" +with open('ACCESS_KEY_ID', 'r') as f: + access_key_id = f.readline().strip() + +access_key_secret = "" +with open('ACCESS_KEY_SECRET', 'r') as f: + access_key_secret = f.readline().strip() + +def retry_upload(wavId, wavPath, ossPath, times=10): + upload_command = "./ossutil64 cp {wav} {osspath} -e oss-cn-hangzhou.aliyuncs.com -i {access_key_id} -k {access_key_secret}".format(wav=wavPath, osspath=ossPath, access_key_id=access_key_id, access_key_secret=access_key_secret) + upload_res = os.popen(upload_command) + for r in upload_res: + print(wavId + "\t" + r.strip()) + re_exist = ossbucket.object_exists('wav/{wavId}.wav'.format(wavId=wavId)) + retry_flag = False + if re_exist: + print('{wavId} {wavPath} object exist, retry {time} times successfully.'.format(wavId=wavId, wavPath=wavPath, time=10-times)) + sign_command = "./ossutil64 sign {osspath}{idx}.wav --timeout {times} -e oss-cn-hangzhou.aliyuncs.com -i {access_key_id} -k {access_key_secret}".format(osspath=ossPath, idx=wavId, times=32400, access_key_id=access_key_id, access_key_secret=access_key_secret) + res = os.popen(sign_command) + for audio in res: + wavOssFile.write(wavId + "\t" + audio) + break + retry_flag = True + return retry_flag + elif times > 0: + time.sleep(5) + print('{wavId} {wavPath} object not exist, will retry upload {time} times.'.format(wavId=wavId, wavPath=wavPath, time=times)) + retry_flag = retry_upload(wavId, wavPath, ossPath, times=times-1) + return retry_flag + + +if __name__ == "__main__": + if len(sys.argv) != 5: + sys.stderr.write("check_wav.py \n") + exit(-1) + + bucketName = sys.argv[1] + wavFile = codecs.open(sys.argv[2], 'r', 'utf8') + wavLossFile = codecs.open(sys.argv[3], 'w+', 'utf8') + wavOssFile = codecs.open(sys.argv[4], 'w+', 'utf8') + + auth = oss2.Auth(access_key_id, access_key_secret) + ossbucket = oss2.Bucket(auth, 'https://oss-cn-hangzhou.aliyuncs.com', bucketName) + + bucket = "oss://{bucketName}".format(bucketName=bucketName) + ossPath = bucket + "/wav/" + for meta in wavFile: + wavmeta = meta.split() + wavId = wavmeta[0] + wavPath = wavmeta[1] + + exist = ossbucket.object_exists('wav/{wavId}.wav'.format(wavId=wavId)) + if exist: + print('{wavId} {wavPath} object exist'.format(wavId=wavId, wavPath=wavPath)) + # 生成签名URL + sign_command = "./ossutil64 sign {osspath}{idx}.wav --timeout {times} -e oss-cn-hangzhou.aliyuncs.com -i {access_key_id} -k {access_key_secret}".format(osspath=ossPath, idx=wavId, times=32400, access_key_id=access_key_id, access_key_secret=access_key_secret) + res = os.popen(sign_command) + for audio in res: + wavOssFile.write(wavId + "\t" + audio) + break + else: + print('{wavId} {wavPath} object not exist'.format(wavId=wavId, wavPath=wavPath)) + retry_flag = retry_upload(wavId, wavPath, ossPath, times=10) + if not retry_flag: + wavLossFile.write(meta) + wavLossFile.flush() + + wavFile.close() + wavLossFile.close() diff --git a/models/aliyun_ftasr_api_zh/del_oss_bucket.py b/models/aliyun_ftasr_api_zh/del_oss_bucket.py index 5eb93b7..1320149 100755 --- a/models/aliyun_ftasr_api_zh/del_oss_bucket.py +++ b/models/aliyun_ftasr_api_zh/del_oss_bucket.py @@ -15,12 +15,15 @@ if __name__ == "__main__": if len(sys.argv) != 3: - sys.stderr.write("del_oss_bucket.py \n") + sys.stderr.write("del_oss_bucket.py \n") exit(-1) print("decoding finish") oss_config_path = sys.argv[1] - test_time = sys.argv[2] + bucketName = sys.argv[2] - bucket = "oss://speechiotest-{test_time}".format(test_time=test_time) + bucket = "oss://{bucketName}".format(bucketName=bucketName) del_command = "./ossutil64 rm {bucket} -b -a -r -f -e oss-cn-hangzhou.aliyuncs.com -i {access_key_id} -k {access_key_secret}".format(bucket=bucket, access_key_id=access_key_id, access_key_secret=access_key_secret) del_res = os.popen(del_command) + for u in del_res: + print(u) + time.sleep(20) diff --git a/models/aliyun_ftasr_api_zh/docker/Dockerfile b/models/aliyun_ftasr_api_zh/docker/Dockerfile index 01593f2..900a158 100644 --- a/models/aliyun_ftasr_api_zh/docker/Dockerfile +++ b/models/aliyun_ftasr_api_zh/docker/Dockerfile @@ -7,7 +7,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3-pip && \ rm -rf /var/lib/apt/lists/* -RUN pip3 install aliyun-python-sdk-core==2.13.3 +RUN pip3 install oss2 aliyun-python-sdk-core==2.13.3 # Use C.UTF-8 locale to avoid issues with ASCII encoding ENV LC_ALL=C.UTF-8 diff --git a/models/aliyun_ftasr_api_zh/upload_wav.py b/models/aliyun_ftasr_api_zh/upload_wav.py index 28f876c..95b4f42 100755 --- a/models/aliyun_ftasr_api_zh/upload_wav.py +++ b/models/aliyun_ftasr_api_zh/upload_wav.py @@ -21,17 +21,17 @@ access_key_secret = f.readline().strip() if __name__ == "__main__": - if len(sys.argv) != 5: - sys.stderr.write("upload_wav.py \n") + if len(sys.argv) != 4: + sys.stderr.write("upload_wav.py \n") exit(-1) wavDict = {} wavFile = codecs.open(sys.argv[1], 'r', 'utf8') - oss_config_path = sys.argv[3] - test_time = sys.argv[4] + oss_config_path = sys.argv[2] + bucketName = sys.argv[3] # OSS bucket - bucket = "oss://speechiotest-{test_time}/".format(test_time=test_time) + bucket = "oss://{bucketName}".format(bucketName=bucketName) for meta in wavFile: meta = meta.split() wavId = meta[0] @@ -40,17 +40,14 @@ # 创建bucket md_command = "./ossutil64 mb {bucket} -e oss-cn-hangzhou.aliyuncs.com -i {access_key_id} -k {access_key_secret}".format(bucket=bucket, access_key_id=access_key_id, access_key_secret=access_key_secret) md_res = os.popen(md_command) + for r in md_res: + print(r) time.sleep(5) - ossPath = bucket + "wav/" - ossWavFile = codecs.open(sys.argv[2], 'w+', 'utf8') - for idx, wav in wavDict.items(): + ossPath = bucket + "/wav/" + for wavIdx, wav in wavDict.items(): # 上传文件 upload_command = "./ossutil64 cp {wav} {osspath} -e oss-cn-hangzhou.aliyuncs.com -i {access_key_id} -k {access_key_secret}".format(wav=wav, osspath=ossPath, access_key_id=access_key_id, access_key_secret=access_key_secret) upload_res = os.popen(upload_command) - # 生成签名URL - sign_command = "./ossutil64 sign {osspath}{idx}.wav --timeout {times} -e oss-cn-hangzhou.aliyuncs.com -i {access_key_id} -k {access_key_secret}".format(osspath=ossPath, idx=idx, times=32400, access_key_id=access_key_id, access_key_secret=access_key_secret) - res = os.popen(sign_command) - for audio in res: - ossWavFile.write(idx + "\t" + audio) - break + for r in upload_res: + print(wavIdx + "\t" + r.strip())