Skip to content

Commit

Permalink
[Change]修改目前 Paddle docs 的代码检查方式 (PaddlePaddle#5962)
Browse files Browse the repository at this point in the history
  • Loading branch information
megemini authored Jul 1, 2023
1 parent dd6138a commit 63b2bfa
Show file tree
Hide file tree
Showing 2 changed files with 159 additions and 2 deletions.
81 changes: 80 additions & 1 deletion ci_scripts/chinese_samplecode_processor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import math
import os
import pickle
Expand Down Expand Up @@ -83,6 +84,82 @@ def find_all(src_str, substr):
return indices


def _hasprefix(line, prefixes):
"""helper prefix test"""
return any(line == p or line.startswith(p + ' ') for p in prefixes)


def strip_ps_from_codeblock(codeblock):
"""strip PS1(>>> ) and PS2(... ) from codeblock
Note:
These two functions should be updated simultaneously:
`docs/docs/api/gen_doc.py :: strip_ps_from_codeblock`
`docs/ci_scripts/chinese_samplecode_processor.py :: strip_ps_from_codeblock`
"""
match_obj = re.search(r"\n>>>\s?", "\n" + codeblock)
if match_obj is None:
return codeblock

TEXT = 'text' # text of codeblock
CODE_PS1 = 'code_ps1' # PS1(>>> ) of codeblock
CODE_PS2 = 'code_ps2' # PS2(... ) of codeblock

previous_state = TEXT
current_state = None

codeblock_clean = []
for line in codeblock.splitlines():
strip_line = line.strip()

if previous_state == TEXT:
# text transitions to source whenever a PS1 line is encountered
if _hasprefix(strip_line, ('>>>',)):
current_state = CODE_PS1
else:
current_state = TEXT

elif previous_state in {CODE_PS1, CODE_PS2}:
if len(strip_line) == 0:
current_state = TEXT

# allow source to continue with either PS1 or PS2
elif _hasprefix(strip_line, ('>>>', '...')):
if strip_line == '...':
if previous_state == CODE_PS2:
current_state = CODE_PS2
else:
current_state = TEXT
else:
if _hasprefix(strip_line, ('...',)):
current_state = CODE_PS2
else:
current_state = CODE_PS1
else:
current_state = TEXT
else:
raise AssertionError(
'Unknown state previous_state={}'.format(previous_state)
)

lstrip_line = line.lstrip()
if current_state in {CODE_PS1, CODE_PS2}:
match_obj = re.match(r"^\.\.\.\s?|^>>>\s?", lstrip_line)
codeblock_clean.append(lstrip_line[match_obj.end() :])

elif current_state == TEXT:
codeblock_clean.append("# {}".format(line))

else:
raise AssertionError(
'Unknown state current_state={}'.format(current_state)
)

previous_state = current_state

return "\n".join(codeblock_clean)


def extract_sample_code(srcfile, status_all):
filename = srcfile.name
srcc = srcfile.read()
Expand Down Expand Up @@ -129,7 +206,9 @@ def extract_sample_code(srcfile, status_all):
if srcls[j].find(" code-block:: python") != -1:
break
content += srcls[j].replace(startindent, "", 1)
status.append(run_sample_code(content, filename))
status.append(
run_sample_code(strip_ps_from_codeblock(content), filename)
)

status_all[filename] = status
return status_all
Expand Down
80 changes: 79 additions & 1 deletion docs/api/gen_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,82 @@ def filter_out_object_of_api_info_dict():
del api_info_dict[id_api]['object']


def _hasprefix(line, prefixes):
"""helper prefix test"""
return any(line == p or line.startswith(p + ' ') for p in prefixes)


def strip_ps_from_codeblock(codeblock):
"""strip PS1(>>> ) and PS2(... ) from codeblock
Note:
These two functions should be updated simultaneously:
`docs/docs/api/gen_doc.py :: strip_ps_from_codeblock`
`docs/ci_scripts/chinese_samplecode_processor.py :: strip_ps_from_codeblock`
"""
match_obj = re.search(r"\n>>>\s?", "\n" + codeblock)
if match_obj is None:
return codeblock

TEXT = 'text' # text of codeblock
CODE_PS1 = 'code_ps1' # PS1(>>> ) of codeblock
CODE_PS2 = 'code_ps2' # PS2(... ) of codeblock

previous_state = TEXT
current_state = None

codeblock_clean = []
for line in codeblock.splitlines():
strip_line = line.strip()

if previous_state == TEXT:
# text transitions to source whenever a PS1 line is encountered
if _hasprefix(strip_line, ('>>>',)):
current_state = CODE_PS1
else:
current_state = TEXT

elif previous_state in {CODE_PS1, CODE_PS2}:
if len(strip_line) == 0:
current_state = TEXT

# allow source to continue with either PS1 or PS2
elif _hasprefix(strip_line, ('>>>', '...')):
if strip_line == '...':
if previous_state == CODE_PS2:
current_state = CODE_PS2
else:
current_state = TEXT
else:
if _hasprefix(strip_line, ('...',)):
current_state = CODE_PS2
else:
current_state = CODE_PS1
else:
current_state = TEXT
else:
raise AssertionError(
'Unknown state previous_state={}'.format(previous_state)
)

lstrip_line = line.lstrip()
if current_state in {CODE_PS1, CODE_PS2}:
match_obj = re.match(r"^\.\.\.\s?|^>>>\s?", lstrip_line)
codeblock_clean.append(lstrip_line[match_obj.end() :])

elif current_state == TEXT:
codeblock_clean.append("# {}".format(line))

else:
raise AssertionError(
'Unknown state current_state={}'.format(current_state)
)

previous_state = current_state

return "\n".join(codeblock_clean)


def extract_code_blocks_from_docstr(docstr, google_style=True):
"""
extract code-blocks from the given docstring.
Expand Down Expand Up @@ -1082,7 +1158,9 @@ def _append_code_block(in_examples):
# nonlocal code_blocks, cb_cur, cb_cur_name, cb_cur_seq_id, cb_required
code_blocks.append(
{
'codes': inspect.cleandoc("\n".join(cb_info['cb_cur'])),
'codes': strip_ps_from_codeblock(
inspect.cleandoc("\n" + "\n".join(cb_info['cb_cur']))
),
'name': cb_info['cb_cur_name'],
'id': cb_info['cb_cur_seq_id'],
'required': cb_info['cb_required'],
Expand Down

0 comments on commit 63b2bfa

Please sign in to comment.