Skip to content

Commit

Permalink
Add Cpp Doc Generate tools (PaddlePaddle#5900)
Browse files Browse the repository at this point in the history
* CAPItools

* add note
  • Loading branch information
Liyulingyue authored Jun 13, 2023
1 parent 9dca23d commit 9d695ac
Show file tree
Hide file tree
Showing 5 changed files with 877 additions and 0 deletions.
55 changes: 55 additions & 0 deletions ci_scripts/CAPItools/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# CAPI tools
CAPI tools 用于一键生成 C++ 的 rst 文档。

## 调用方式
```python
python main.py <source dir> <target dir>
```

若不设置`source dir``target dir`,则默认先查找已安装的`paddlepaddle`包环境。

其中:
- source dir 是安装后的 Paddle C++ API 声明路径。 例如`venv/Lib/site-packages/paddle/include/paddle`
- target dir 目标文件保存路径。

最终生成结果如下所示:
```python
target dir
| -cn
|- index.rst
|- Paddle
|- fluid
|- phi
|- ...
| -en
|- index.rst
|- Paddle
|- fluid
|- phi
|- ...
```

## 获取最新 PaddlePaddle
pip install python -m pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/windows/cpu-mkl-avx/develop.html

## 代码结构

### `main.py`文件主要用于处理和筛选包文件, 并调用`utils_helper.py`中的函数进行文件生成
```python
def analysis_file() # 用于解析文件内容(多线程不安全)

def generate_docs() # 用于创建目录并传值给 utils_helper.py 中的函数进行文件生成

def cpp2py() # 用于筛选出 cpp api 和 py api 相对应的函数名称
```

### `utils_helper.py`文件主要存放函数生成、解析, 以及文件写入的工作
```python

class func_helper(object) # 用于生成和解析方法
decode() # 用于解析输出输出参数、函数名称、返回值、函数注释信息
class class_helper(object) # 用于生成和解析类
decode() # 同 func_helper()

def generate_overview() # 用于生成 overview.rst 文件
```
150 changes: 150 additions & 0 deletions ci_scripts/CAPItools/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# python main.py [source dir] [target dir]
# python main.py ../paddle .


import CppHeaderParser
import json
import os
import traceback
import sys
import re

from utils_helper import func_helper, class_helper, generate_overview
from utils import get_PADDLE_API_class, get_PADDLE_API_func


# 解析所有的函数, 类, 枚举, 返回一个字典
# 多线程使用并不安全, 请不要使用多线程
def analysis_file(path):
header = CppHeaderParser.CppHeader(path, encoding='utf8')
data = json.loads(header.toJSON())
return data


# 生成文件
# 根据给定的list内容,生成对应的文档信息
def generate_docs(
all_funcs, all_class, cpp2py_api_list, save_dir, LANGUAGE="cn"
):
for item in all_funcs:
path = item["filename"].replace("../", "").replace(".h", "")
dir_path = os.path.join(save_dir, LANGUAGE, path)
if not os.path.exists(dir_path):
os.makedirs(dir_path)

# 这个反斜杠需要单独处理, 在 linux 下
func_name = item["name"].replace("/", "")

# Note: 操作符仅不生成rst,实际上在Overview列表依然会呈现以提示存在此操作符
if func_name.startswith('operator'):
checkwords = func_name.replace('operator', '', 1)
if re.search(r"\w", checkwords) == None:
continue # 跳过操作符声明
rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst")
# avoid a filename such as operate*.rst, only windows
try:
helper = func_helper(item, cpp2py_api_list)
helper.create_and_write_file(rst_dir, LANGUAGE)
except:
print(traceback.format_exc())
print('FAULT GENERATE:' + rst_dir)

for item in all_class:
path = item["filename"].replace("../", "").replace(".h", "")
dir_path = os.path.join(save_dir, LANGUAGE, path)
if not os.path.exists(dir_path):
os.makedirs(dir_path)

func_name = item["name"].replace("PADDLE_API", "")
rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst")
try:
helper = class_helper(item)
helper.create_and_write_file(rst_dir, LANGUAGE)
except:
print(traceback.format_exc())
print('FAULT GENERATE:' + rst_dir)


# cpp 对应 python api
# 用于存储 api 的名称, 用于后续生成对应python api文档链接
def cpp2py(data: dict):
cpp2py_api_list = []
for i in data["using"]:
cpp2py_api_list.append(i.replace("paddle::", ""))

return cpp2py_api_list


# 运行主函数,主要流程如下
# 1. 确定生成的目录
# 2. 提取待生成文档的PADDLE_API list
# 3. 生成文档
if __name__ == "__main__":
root_dir = ''
save_dir = '.' # 默认保存在当前目录
if len(sys.argv) == 3:
root_dir = sys.argv[1]
save_dir = sys.argv[2]

if root_dir == '':
try:
import paddle
import inspect

root_dir = os.path.dirname(inspect.getsourcefile(paddle))
except:
# for simple run
root_dir = '../paddle'
save_dir = '.' # 默认保存在当前目录

all_funcs = []
all_class = []
cpp2py_api_list = []
overview_list = []
for home, dirs, files in os.walk(root_dir):
for file_name in files:
# 跳过不需要处理的文件
if file_name.split(".")[-1] not in ["cc", "cu", "h"]:
continue

file_path = os.path.join(home, file_name)
# 处理 cpp 和 py api对应的文件, 目前只有这个文件内的 cpp api和 python api是对应的
if file_name == "tensor_compat.h":
cpp2py_data = analysis_file(file_path)
cpp2py_api_list = cpp2py(cpp2py_data).copy()

# 跳过文件中未包含PADDLE_API
with open(file_path, encoding='utf-8') as f:
if 'PADDLE_API ' not in f.read():
continue

print("Parsing: ", file_path)
data = analysis_file(file_path)

# 信息抽取
current_func = get_PADDLE_API_func(data)
current_class = get_PADDLE_API_class(data)

# 信息记录
all_funcs.extend(current_func)
all_class.extend(current_class)
overview_list.append(
{
'h_file': file_path,
'class': current_class,
'function': current_func,
}
)

# 生成文档
generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "cn")
generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "en")

# 生成 overview
generate_overview(overview_list, save_dir, "cn")
generate_overview(overview_list, save_dir, "en")

# 统计信息
print("PADDLE_API func count: ", len(all_funcs))
print("PADDLE_API class count: ", len(all_class))
print("cpp2py api count: ", len(cpp2py_api_list))
2 changes: 2 additions & 0 deletions ci_scripts/CAPItools/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
robotpy-cppheaderparser==5.1.0
# paddle
94 changes: 94 additions & 0 deletions ci_scripts/CAPItools/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# 获取存在 PADDLE_API func 数组的名称
# CppHeaderParser 解析后以字典形式保存数据,'debug' 字段中保存了原始信息
# 如果 PADDLE_API 在字段中,则表明该 API 是外部暴露的函数
def get_PADDLE_API_func(data: dict):
result = []
for i in data["functions"]:
if 'PADDLE_API' in i['debug']:
result.append(i)
return result


# 获取存在 PADDLE_API class 数组的名称
# CppHeaderParser 解析后以字典形式保存数据
# 如果 PADDLE_API 在字段中,则表明该 API 是外部暴露的类
def get_PADDLE_API_class(data: dict):
result = []
for classname in data["classes"]:
# Note 目前没有 PADDLE_API 是 struct 的
if data["classes"][classname]["declaration_method"] == "struct":
continue

# Note 这里需要处理一下, 因为类名和 PADDLE_API 会粘在一起, 例: PADDLE_APIDeviceContextPool
if "PADDLE_API" in classname:
result.append(data["classes"][classname])
return result


# 获取方法中的参数parameters
# 根据解析的参数字典,添加对应的参数名、参数类型、说明
# 有时候会将“&”解析为参数名,需要特殊处理
def get_parameters(parameters):
# parameter_api = "" # 这里解析是给api使用的 (暂时不用)
parameter_dict = {}
for i in parameters:
parameter_type_tmp = i['type'].replace(" &", "").replace(" *", "")
# * 和 & 情况
# parameter_api += parameter_type_tmp

# 添加引用
parameter_type_tmp += "&" * i["reference"]
if i["pointer"] == 1:
# parameter_api += "*"
parameter_type_tmp += "*"
if i["constant"] == 1 and not parameter_type_tmp.startswith('const'):
parameter_type_tmp = "const " + parameter_type_tmp
# parameter_api += f" {i['name']}, "
desc = i.get('desc', '').replace(' ', '')

# special progress for none parameter name case
if i['name'] == '&':
continue
else:
parameter_dict[i['name']] = {
'type': parameter_type_tmp,
'intro': desc,
}
# parameter += f"\t- **{i['name']}** ({parameter_type_tmp}) - {desc}\n"
# 去掉末尾的逗号
# parameter_api = parameter_api[:-2]
# return parameter, parameter_api
return parameter_dict


# 将注释内容解析为说明字典
# 解析前: @brief Construct a Tensor from a buffer pointed to by `data` @note `from_blob` doesn’t copy or move data, Modifying the constructed tensor is equivalent to modifying the original data. @param data The pointer to the memory buffer. @param shape The dims of the tensor. @param dtype The data type of the tensor, should correspond to data type of`data`. See PD_FOR_EACH_DATA_TYPE in `phi/common/data_type.h` @param layout The data layout of the tensor. @param place The place where the tensor is located.If `place` is default value, it will be inferred from `data`,However, the feature is only supported on CPU or GPU.If `place` is not default value, make sure that `place` is equalto the place of `data` @param deleter A function or function object that will be called to free thememory buffer. @return A Tensor object constructed from the buffer
# 以@作为分隔符,索引关键字包括'brief'、'note'、'return'、'param'
# 解析后分别将对应关键字后的内容放入字典对应关键字后
def parse_doxygen(doxygen):
doxygen_dict = {
'intro': '',
'returns': '',
'param_intro': {},
'note': '',
}

if '@' in doxygen:
doxygen = doxygen[doxygen.find('@') :]
for doxygen_part in doxygen.split('@'):
if doxygen_part.startswith('brief '):
doxygen_dict['intro'] = doxygen_part.replace('brief ', '', 1)
elif doxygen_part.startswith('return '):
doxygen_dict['returns'] = doxygen_part.replace('return ', '', 1)
elif doxygen_part.startswith('param '):
param_intro = doxygen_part.replace('param ', '', 1)
param_name = param_intro[: param_intro.find(' ')]
doxygen_dict['param_intro'][param_name] = param_intro[
param_intro.find(' ') + 1 :
]
elif doxygen_part.startswith('note '):
doxygen_dict['note'] = doxygen_part.replace('note ', '', 1)
else:
pass

return doxygen_dict
Loading

0 comments on commit 9d695ac

Please sign in to comment.