forked from PaddlePaddle/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Cpp Doc Generate tools (PaddlePaddle#5900)
* CAPItools * add note
- Loading branch information
1 parent
9dca23d
commit 9d695ac
Showing
5 changed files
with
877 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# CAPI tools | ||
CAPI tools 用于一键生成 C++ 的 rst 文档。 | ||
|
||
## 调用方式 | ||
```python | ||
python main.py <source dir> <target dir> | ||
``` | ||
|
||
若不设置`source dir`和`target dir`,则默认先查找已安装的`paddlepaddle`包环境。 | ||
|
||
其中: | ||
- source dir 是安装后的 Paddle C++ API 声明路径。 例如`venv/Lib/site-packages/paddle/include/paddle`。 | ||
- target dir 目标文件保存路径。 | ||
|
||
最终生成结果如下所示: | ||
```python | ||
target dir | ||
| -cn | ||
|- index.rst | ||
|- Paddle | ||
|- fluid | ||
|- phi | ||
|- ... | ||
| -en | ||
|- index.rst | ||
|- Paddle | ||
|- fluid | ||
|- phi | ||
|- ... | ||
``` | ||
|
||
## 获取最新 PaddlePaddle | ||
pip install python -m pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/windows/cpu-mkl-avx/develop.html | ||
|
||
## 代码结构 | ||
|
||
### `main.py`文件主要用于处理和筛选包文件, 并调用`utils_helper.py`中的函数进行文件生成 | ||
```python | ||
def analysis_file() # 用于解析文件内容(多线程不安全) | ||
|
||
def generate_docs() # 用于创建目录并传值给 utils_helper.py 中的函数进行文件生成 | ||
|
||
def cpp2py() # 用于筛选出 cpp api 和 py api 相对应的函数名称 | ||
``` | ||
|
||
### `utils_helper.py`文件主要存放函数生成、解析, 以及文件写入的工作 | ||
```python | ||
|
||
class func_helper(object) # 用于生成和解析方法 | ||
decode() # 用于解析输出输出参数、函数名称、返回值、函数注释信息 | ||
class class_helper(object) # 用于生成和解析类 | ||
decode() # 同 func_helper() | ||
|
||
def generate_overview() # 用于生成 overview.rst 文件 | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
# python main.py [source dir] [target dir] | ||
# python main.py ../paddle . | ||
|
||
|
||
import CppHeaderParser | ||
import json | ||
import os | ||
import traceback | ||
import sys | ||
import re | ||
|
||
from utils_helper import func_helper, class_helper, generate_overview | ||
from utils import get_PADDLE_API_class, get_PADDLE_API_func | ||
|
||
|
||
# 解析所有的函数, 类, 枚举, 返回一个字典 | ||
# 多线程使用并不安全, 请不要使用多线程 | ||
def analysis_file(path): | ||
header = CppHeaderParser.CppHeader(path, encoding='utf8') | ||
data = json.loads(header.toJSON()) | ||
return data | ||
|
||
|
||
# 生成文件 | ||
# 根据给定的list内容,生成对应的文档信息 | ||
def generate_docs( | ||
all_funcs, all_class, cpp2py_api_list, save_dir, LANGUAGE="cn" | ||
): | ||
for item in all_funcs: | ||
path = item["filename"].replace("../", "").replace(".h", "") | ||
dir_path = os.path.join(save_dir, LANGUAGE, path) | ||
if not os.path.exists(dir_path): | ||
os.makedirs(dir_path) | ||
|
||
# 这个反斜杠需要单独处理, 在 linux 下 | ||
func_name = item["name"].replace("/", "") | ||
|
||
# Note: 操作符仅不生成rst,实际上在Overview列表依然会呈现以提示存在此操作符 | ||
if func_name.startswith('operator'): | ||
checkwords = func_name.replace('operator', '', 1) | ||
if re.search(r"\w", checkwords) == None: | ||
continue # 跳过操作符声明 | ||
rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst") | ||
# avoid a filename such as operate*.rst, only windows | ||
try: | ||
helper = func_helper(item, cpp2py_api_list) | ||
helper.create_and_write_file(rst_dir, LANGUAGE) | ||
except: | ||
print(traceback.format_exc()) | ||
print('FAULT GENERATE:' + rst_dir) | ||
|
||
for item in all_class: | ||
path = item["filename"].replace("../", "").replace(".h", "") | ||
dir_path = os.path.join(save_dir, LANGUAGE, path) | ||
if not os.path.exists(dir_path): | ||
os.makedirs(dir_path) | ||
|
||
func_name = item["name"].replace("PADDLE_API", "") | ||
rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst") | ||
try: | ||
helper = class_helper(item) | ||
helper.create_and_write_file(rst_dir, LANGUAGE) | ||
except: | ||
print(traceback.format_exc()) | ||
print('FAULT GENERATE:' + rst_dir) | ||
|
||
|
||
# cpp 对应 python api | ||
# 用于存储 api 的名称, 用于后续生成对应python api文档链接 | ||
def cpp2py(data: dict): | ||
cpp2py_api_list = [] | ||
for i in data["using"]: | ||
cpp2py_api_list.append(i.replace("paddle::", "")) | ||
|
||
return cpp2py_api_list | ||
|
||
|
||
# 运行主函数,主要流程如下 | ||
# 1. 确定生成的目录 | ||
# 2. 提取待生成文档的PADDLE_API list | ||
# 3. 生成文档 | ||
if __name__ == "__main__": | ||
root_dir = '' | ||
save_dir = '.' # 默认保存在当前目录 | ||
if len(sys.argv) == 3: | ||
root_dir = sys.argv[1] | ||
save_dir = sys.argv[2] | ||
|
||
if root_dir == '': | ||
try: | ||
import paddle | ||
import inspect | ||
|
||
root_dir = os.path.dirname(inspect.getsourcefile(paddle)) | ||
except: | ||
# for simple run | ||
root_dir = '../paddle' | ||
save_dir = '.' # 默认保存在当前目录 | ||
|
||
all_funcs = [] | ||
all_class = [] | ||
cpp2py_api_list = [] | ||
overview_list = [] | ||
for home, dirs, files in os.walk(root_dir): | ||
for file_name in files: | ||
# 跳过不需要处理的文件 | ||
if file_name.split(".")[-1] not in ["cc", "cu", "h"]: | ||
continue | ||
|
||
file_path = os.path.join(home, file_name) | ||
# 处理 cpp 和 py api对应的文件, 目前只有这个文件内的 cpp api和 python api是对应的 | ||
if file_name == "tensor_compat.h": | ||
cpp2py_data = analysis_file(file_path) | ||
cpp2py_api_list = cpp2py(cpp2py_data).copy() | ||
|
||
# 跳过文件中未包含PADDLE_API | ||
with open(file_path, encoding='utf-8') as f: | ||
if 'PADDLE_API ' not in f.read(): | ||
continue | ||
|
||
print("Parsing: ", file_path) | ||
data = analysis_file(file_path) | ||
|
||
# 信息抽取 | ||
current_func = get_PADDLE_API_func(data) | ||
current_class = get_PADDLE_API_class(data) | ||
|
||
# 信息记录 | ||
all_funcs.extend(current_func) | ||
all_class.extend(current_class) | ||
overview_list.append( | ||
{ | ||
'h_file': file_path, | ||
'class': current_class, | ||
'function': current_func, | ||
} | ||
) | ||
|
||
# 生成文档 | ||
generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "cn") | ||
generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "en") | ||
|
||
# 生成 overview | ||
generate_overview(overview_list, save_dir, "cn") | ||
generate_overview(overview_list, save_dir, "en") | ||
|
||
# 统计信息 | ||
print("PADDLE_API func count: ", len(all_funcs)) | ||
print("PADDLE_API class count: ", len(all_class)) | ||
print("cpp2py api count: ", len(cpp2py_api_list)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
robotpy-cppheaderparser==5.1.0 | ||
# paddle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
# 获取存在 PADDLE_API func 数组的名称 | ||
# CppHeaderParser 解析后以字典形式保存数据,'debug' 字段中保存了原始信息 | ||
# 如果 PADDLE_API 在字段中,则表明该 API 是外部暴露的函数 | ||
def get_PADDLE_API_func(data: dict): | ||
result = [] | ||
for i in data["functions"]: | ||
if 'PADDLE_API' in i['debug']: | ||
result.append(i) | ||
return result | ||
|
||
|
||
# 获取存在 PADDLE_API class 数组的名称 | ||
# CppHeaderParser 解析后以字典形式保存数据 | ||
# 如果 PADDLE_API 在字段中,则表明该 API 是外部暴露的类 | ||
def get_PADDLE_API_class(data: dict): | ||
result = [] | ||
for classname in data["classes"]: | ||
# Note 目前没有 PADDLE_API 是 struct 的 | ||
if data["classes"][classname]["declaration_method"] == "struct": | ||
continue | ||
|
||
# Note 这里需要处理一下, 因为类名和 PADDLE_API 会粘在一起, 例: PADDLE_APIDeviceContextPool | ||
if "PADDLE_API" in classname: | ||
result.append(data["classes"][classname]) | ||
return result | ||
|
||
|
||
# 获取方法中的参数parameters | ||
# 根据解析的参数字典,添加对应的参数名、参数类型、说明 | ||
# 有时候会将“&”解析为参数名,需要特殊处理 | ||
def get_parameters(parameters): | ||
# parameter_api = "" # 这里解析是给api使用的 (暂时不用) | ||
parameter_dict = {} | ||
for i in parameters: | ||
parameter_type_tmp = i['type'].replace(" &", "").replace(" *", "") | ||
# * 和 & 情况 | ||
# parameter_api += parameter_type_tmp | ||
|
||
# 添加引用 | ||
parameter_type_tmp += "&" * i["reference"] | ||
if i["pointer"] == 1: | ||
# parameter_api += "*" | ||
parameter_type_tmp += "*" | ||
if i["constant"] == 1 and not parameter_type_tmp.startswith('const'): | ||
parameter_type_tmp = "const " + parameter_type_tmp | ||
# parameter_api += f" {i['name']}, " | ||
desc = i.get('desc', '').replace(' ', '') | ||
|
||
# special progress for none parameter name case | ||
if i['name'] == '&': | ||
continue | ||
else: | ||
parameter_dict[i['name']] = { | ||
'type': parameter_type_tmp, | ||
'intro': desc, | ||
} | ||
# parameter += f"\t- **{i['name']}** ({parameter_type_tmp}) - {desc}\n" | ||
# 去掉末尾的逗号 | ||
# parameter_api = parameter_api[:-2] | ||
# return parameter, parameter_api | ||
return parameter_dict | ||
|
||
|
||
# 将注释内容解析为说明字典 | ||
# 解析前: @brief Construct a Tensor from a buffer pointed to by `data` @note `from_blob` doesn’t copy or move data, Modifying the constructed tensor is equivalent to modifying the original data. @param data The pointer to the memory buffer. @param shape The dims of the tensor. @param dtype The data type of the tensor, should correspond to data type of`data`. See PD_FOR_EACH_DATA_TYPE in `phi/common/data_type.h` @param layout The data layout of the tensor. @param place The place where the tensor is located.If `place` is default value, it will be inferred from `data`,However, the feature is only supported on CPU or GPU.If `place` is not default value, make sure that `place` is equalto the place of `data` @param deleter A function or function object that will be called to free thememory buffer. @return A Tensor object constructed from the buffer | ||
# 以@作为分隔符,索引关键字包括'brief'、'note'、'return'、'param' | ||
# 解析后分别将对应关键字后的内容放入字典对应关键字后 | ||
def parse_doxygen(doxygen): | ||
doxygen_dict = { | ||
'intro': '', | ||
'returns': '', | ||
'param_intro': {}, | ||
'note': '', | ||
} | ||
|
||
if '@' in doxygen: | ||
doxygen = doxygen[doxygen.find('@') :] | ||
for doxygen_part in doxygen.split('@'): | ||
if doxygen_part.startswith('brief '): | ||
doxygen_dict['intro'] = doxygen_part.replace('brief ', '', 1) | ||
elif doxygen_part.startswith('return '): | ||
doxygen_dict['returns'] = doxygen_part.replace('return ', '', 1) | ||
elif doxygen_part.startswith('param '): | ||
param_intro = doxygen_part.replace('param ', '', 1) | ||
param_name = param_intro[: param_intro.find(' ')] | ||
doxygen_dict['param_intro'][param_name] = param_intro[ | ||
param_intro.find(' ') + 1 : | ||
] | ||
elif doxygen_part.startswith('note '): | ||
doxygen_dict['note'] = doxygen_part.replace('note ', '', 1) | ||
else: | ||
pass | ||
|
||
return doxygen_dict |
Oops, something went wrong.