Skip to content

Commit

Permalink
refactor: Optimize the way to obtain GPU computing power.
Browse files Browse the repository at this point in the history
  • Loading branch information
xixihahaliu committed Apr 9, 2024
1 parent bf66629 commit 8a051e5
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 15 deletions.
20 changes: 20 additions & 0 deletions scripts/get_cuda_capability.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pycuda.driver as cuda
import pycuda.autoinit
import sys # 导入sys模块来读取命令行参数

def get_cuda_device_major_minor(device_id=0):
cuda.init()
device = cuda.Device(device_id)
attributes = device.get_attributes()
major = attributes[cuda.device_attribute.COMPUTE_CAPABILITY_MAJOR]
minor = attributes[cuda.device_attribute.COMPUTE_CAPABILITY_MINOR]
cmp_ver = f"{major}.{minor}"
return cmp_ver

# 从命令行参数获取设备号
device_id = 0 # 默认设备号
if len(sys.argv) > 1:
device_id = int(sys.argv[1]) # 将传入的参数转换为整数

cmp_ver = get_cuda_device_major_minor(device_id)
print(cmp_ver) # 打印结果以便在Shell中捕获
10 changes: 6 additions & 4 deletions scripts/run_for_cloud_option.sh
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,12 @@ echo "GPU ID: $gpu_id1, $gpu_id2"
# 判断硬件条件与启动参数是否匹配
# 获取显卡型号
gpu_model=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits -i $gpu_id1)
compute_capability=$(jq -r ".[\"$gpu_model\"]" /workspace/qanything_local/scripts/gpu_capabilities.json)
# 如果compute_capability为空,则说明显卡型号不在gpu_capabilities.json中
if [ -z "$compute_capability" ]; then
echo "您的显卡型号 $gpu_model 不在支持列表中,请联系技术支持。"
# compute_capability=$(jq -r ".[\"$gpu_model\"]" /workspace/qanything_local/scripts/gpu_capabilities.json)
# 执行Python脚本,传入设备号,并捕获输出
compute_capability=$(python3 get_cuda_capability.py $gpu_id1)
status=$? # 获取Python脚本的退出状态码
if [ $status -ne 0 ]; then
echo "您的显卡型号 $gpu_model 获取算力时出错,请联系技术支持。"
exit 1
fi
echo "GPU1 Model: $gpu_model"
Expand Down
21 changes: 10 additions & 11 deletions scripts/run_for_local_option.sh
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,16 @@ gpu_model=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits -i $gp
base_gpu_model=$(echo $gpu_model | grep -o '^[^-]*')
# nvidia RTX 30系列或40系列或A系列,比如A10,A30,A30,A100,A800
gpu_series=$(echo $gpu_model | grep -oP '(RTX\s*(30|40)|A(10|30|40|100|800))')
if ! command -v jq &> /dev/null; then
echo "Error: jq 命令不存在,请使用 sudo apt update && sudo apt-get install jq 安装,再重新启动。"
exit 1
fi
compute_capability=$(jq -r ".[\"$base_gpu_model\"]" /workspace/qanything_local/scripts/gpu_capabilities.json)
# 如果compute_capability为空,则说明显卡型号不在gpu_capabilities.json中
if [ "$compute_capability" == "null" ]; then
echo "您的显卡型号 $gpu_model 不在支持列表中,请联系技术支持。"
#if ! command -v jq &> /dev/null; then
# echo "Error: jq 命令不存在,请使用 sudo apt update && sudo apt-get install jq 安装,再重新启动。"
# exit 1
#fi
# compute_capability=$(jq -r ".[\"$base_gpu_model\"]" /workspace/qanything_local/scripts/gpu_capabilities.json)
# 执行Python脚本,传入设备号,并捕获输出
compute_capability=$(python3 get_cuda_capability.py $gpu_id1)
status=$? # 获取Python脚本的退出状态码
if [ $status -ne 0 ]; then
echo "您的显卡型号 $gpu_model 获取算力时出错,请联系技术支持。"
exit 1
fi
echo "GPU1 Model: $gpu_model"
Expand Down Expand Up @@ -376,9 +378,6 @@ CUDA_VISIBLE_DEVICES=$gpu_id2 nohup python3 -u qanything_kernel/dependent_server
echo "The ocr service is ready! (3/8)"
echo "OCR服务已就绪! (3/8)"

pip install third_party/es/whl/elastic_transport-8.12.0-py3-none-any.whl
pip install third_party/es/whl/elasticsearch-8.12.1-py3-none-any.whl

nohup python3 -u qanything_kernel/qanything_server/sanic_api.py --mode "local" > /workspace/qanything_local/logs/debug_logs/sanic_api.log 2>&1 &

# 监听后端服务启动
Expand Down

0 comments on commit 8a051e5

Please sign in to comment.