[XPU] Update XPU L3 Cache setting docs (PaddlePaddle#2001)

* [patchelf] fix patchelf error for inference xpu * [serving] add xpu dockerfile and support fd server * [serving] add xpu dockerfile and support fd server * [Serving] support XPU + Tritron * [Serving] support XPU + Tritron * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] add comments for xpu tritron dockerfile * [Doruntime] fix xpu infer error * [Doruntime] fix xpu infer error * [XPU] update xpu dockerfile * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * [XPU] Update XPU L3 Cache setting docs
ArthurSec · May 30, 2023 · 387c569 · 387c569
1 parent 434b48d
commit 387c569
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 0 deletions.
diff --git a/benchmark/cpp/option.h b/benchmark/cpp/option.h
@@ -34,6 +34,7 @@ static void UpdateBaseCustomFlags(
   if (FLAGS_xpu_l3_cache >= 0) {
     config_info["xpu_l3_cache"] = std::to_string(FLAGS_xpu_l3_cache);
   }
+  // update custom options for paddle backend
   if (FLAGS_enable_log_info) {
     config_info["enable_log_info"] = "true";
   } else {

diff --git a/fastdeploy/runtime/backends/paddle/option.h b/fastdeploy/runtime/backends/paddle/option.h
@@ -85,6 +85,8 @@ struct PaddleBackendOption {
   bool enable_memory_optimize = true;
   /// Whether enable ir debug, default false
   bool switch_ir_debug = false;
+  /// Whether enable ir optimize, default true
+  bool switch_ir_optimize = true;
 
   /*
    * @brief IPU option, this will configure the IPU hardware, if inference model in IPU

diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -84,6 +84,9 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
 #endif
   } else if (option.device == Device::KUNLUNXIN) {
 #ifdef WITH_KUNLUNXIN
+    // Note(qiuyanjun): For Paddle XPU L3 Cache, please set
+    // export XPU_PADDLE_L3_SIZE=67104768 (XPU R200)
+    // export FLAGS_fuse_multi_transformer_quant_type="float"
     config_.EnableXpu(option.xpu_option.kunlunxin_l3_workspace_size,
                       option.xpu_option.kunlunxin_locked,
                       option.xpu_option.kunlunxin_autotune,
@@ -117,6 +120,9 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
   } else {
     config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
   }
+  // Note: SwitchIrOptim is enabled by default for paddle inference
+  // backend. So, we don't need to set it manually.
+  // config_.SwitchIrOptim(option.switch_ir_optimize);
 }
 
 bool PaddleBackend::Init(const RuntimeOption& runtime_option) {

diff --git a/serving/docs/zh_CN/xpu.md b/serving/docs/zh_CN/xpu.md
@@ -36,7 +36,12 @@ docker run -itd --name fd_xpu_server -v `pwd`/:/serving --net=host --privileged
 ```bash
 docker exec -it fd_xpu_server /bin/bash
 cd /opt/fastdeploy/benchmark/cpp/build
+
+# 设置XPU L3 Cache (R200是63Mb)
+export XPU_PADDLE_L3_SIZE=67104768  
+# 运行benchmark验证
 ./benchmark --model ResNet50_infer --config_path ../config/config.xpu.paddle.fp32.txt --enable_log_info
+
 cd /serving
 ```
 输出为：