mirror of
https://github.com/fastapi/fastapi.git
synced 2026-02-24 02:38:13 -05:00
Handle code blocks, fix some bugs, add fix-all command
This commit is contained in:
@@ -20,10 +20,15 @@ MARKDOWN_LINK_RE = re.compile(
|
||||
)
|
||||
|
||||
HTML_LINK_RE = re.compile(r"<a\s+[^>]*>.*?</a>")
|
||||
HTML_LINK_TEXT = re.compile(r"<a\b([^>]*)>(.*?)</a>")
|
||||
HTML_LINK_TEXT_RE = re.compile(r"<a\b([^>]*)>(.*?)</a>")
|
||||
HTML_LINK_OPEN_TAG_RE = re.compile(r"<a\b([^>]*)>")
|
||||
HTML_ATTR_RE = re.compile(r'(\w+)\s*=\s*([\'"])(.*?)\2')
|
||||
|
||||
CODE_BLOCK_LANG_RE = re.compile(r"^```([\w-]*)", re.MULTILINE)
|
||||
|
||||
SLASHES_COMMENT_RE = re.compile(r"^(?P<code>.*?)(?P<comment>\s*// .*)?$")
|
||||
HASH_COMMENT_RE = re.compile(r"^(?P<code>.*?)(?P<comment>\s*# .*)?$")
|
||||
|
||||
|
||||
class CodeIncludeInfo(TypedDict):
|
||||
line_no: int
|
||||
@@ -57,6 +62,12 @@ class HtmlLinkInfo(TypedDict):
|
||||
text: str
|
||||
|
||||
|
||||
class MultilineCodeBlockInfo(TypedDict):
|
||||
lang: str
|
||||
start_line_no: int
|
||||
content: list[str]
|
||||
|
||||
|
||||
# Code includes
|
||||
# -----------------------------------------------------------------------------------------
|
||||
|
||||
@@ -82,10 +93,11 @@ def replace_code_includes_with_placeholders(text: list[str]) -> list[str]:
|
||||
Replace code includes with placeholders.
|
||||
"""
|
||||
|
||||
modified_text = text.copy()
|
||||
includes = extract_code_includes(text)
|
||||
for include in includes:
|
||||
text[include["line_no"] - 1] = CODE_INCLUDE_PLACEHOLDER
|
||||
return text
|
||||
modified_text[include["line_no"] - 1] = CODE_INCLUDE_PLACEHOLDER
|
||||
return modified_text
|
||||
|
||||
|
||||
def replace_placeholders_with_code_includes(
|
||||
@@ -274,7 +286,7 @@ def _construct_markdown_link(
|
||||
link = f"[{text}]({url})"
|
||||
|
||||
if attributes:
|
||||
link += f" {{{attributes}}}"
|
||||
link += f"{{{attributes}}}"
|
||||
|
||||
return link
|
||||
|
||||
@@ -345,7 +357,7 @@ def extract_html_links(lines: list[str]) -> list[HtmlLinkInfo]:
|
||||
for html_link in HTML_LINK_RE.finditer(line):
|
||||
link_str = html_link.group(0)
|
||||
|
||||
link_text_match = HTML_LINK_TEXT.match(link_str)
|
||||
link_text_match = HTML_LINK_TEXT_RE.match(link_str)
|
||||
assert link_text_match is not None
|
||||
link_text = link_text_match.group(2)
|
||||
assert isinstance(link_text, str)
|
||||
@@ -442,3 +454,188 @@ def replace_html_links(
|
||||
)
|
||||
|
||||
return modified_text
|
||||
|
||||
|
||||
# Multiline code blocks
|
||||
# -----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_code_block_lang(line: str) -> str:
|
||||
match = CODE_BLOCK_LANG_RE.match(line)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return ""
|
||||
|
||||
|
||||
def extract_multiline_code_blocks(text: list[str]) -> list[MultilineCodeBlockInfo]:
|
||||
blocks: list[MultilineCodeBlockInfo] = []
|
||||
|
||||
in_code_block3 = False
|
||||
in_code_block4 = False
|
||||
current_block_lang = ""
|
||||
current_block_start_line = -1
|
||||
current_block_lines = []
|
||||
|
||||
for line_no, line in enumerate(text, start=1):
|
||||
stripped = line.lstrip()
|
||||
|
||||
# --- Detect opening fence ---
|
||||
if not (in_code_block3 or in_code_block4):
|
||||
if stripped.startswith("```"):
|
||||
current_block_start_line = line_no
|
||||
count = len(stripped) - len(stripped.lstrip("`"))
|
||||
if count == 3:
|
||||
in_code_block3 = True
|
||||
current_block_lang = get_code_block_lang(stripped)
|
||||
current_block_lines = [line]
|
||||
continue
|
||||
elif count >= 4:
|
||||
in_code_block4 = True
|
||||
current_block_lang = get_code_block_lang(stripped)
|
||||
current_block_lines = [line]
|
||||
continue
|
||||
|
||||
# --- Detect closing fence ---
|
||||
elif in_code_block3:
|
||||
if stripped.startswith("```"):
|
||||
count = len(stripped) - len(stripped.lstrip("`"))
|
||||
if count == 3:
|
||||
current_block_lines.append(line)
|
||||
blocks.append(
|
||||
MultilineCodeBlockInfo(
|
||||
lang=current_block_lang,
|
||||
start_line_no=current_block_start_line,
|
||||
content=current_block_lines,
|
||||
)
|
||||
)
|
||||
in_code_block3 = False
|
||||
current_block_lang = ""
|
||||
current_block_start_line = -1
|
||||
current_block_lines = []
|
||||
continue
|
||||
current_block_lines.append(line)
|
||||
|
||||
elif in_code_block4:
|
||||
if stripped.startswith("````"):
|
||||
count = len(stripped) - len(stripped.lstrip("`"))
|
||||
if count >= 4:
|
||||
current_block_lines.append(line)
|
||||
blocks.append(
|
||||
MultilineCodeBlockInfo(
|
||||
lang=current_block_lang,
|
||||
start_line_no=current_block_start_line,
|
||||
content=current_block_lines,
|
||||
)
|
||||
)
|
||||
in_code_block4 = False
|
||||
current_block_lang = ""
|
||||
current_block_start_line = -1
|
||||
current_block_lines = []
|
||||
continue
|
||||
current_block_lines.append(line)
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
def _split_hash_comment(line: str) -> tuple[str, str | None]:
|
||||
match = HASH_COMMENT_RE.match(line)
|
||||
if match:
|
||||
code = match.group("code").rstrip()
|
||||
comment = match.group("comment")
|
||||
return code, comment
|
||||
return line.rstrip(), None
|
||||
|
||||
|
||||
def _split_slashes_comment(line: str) -> tuple[str, str | None]:
|
||||
match = SLASHES_COMMENT_RE.match(line)
|
||||
if match:
|
||||
code = match.group("code").rstrip()
|
||||
comment = match.group("comment")
|
||||
return code, comment
|
||||
return line, None
|
||||
|
||||
|
||||
def replace_multiline_code_block(
|
||||
block_a: MultilineCodeBlockInfo, block_b: MultilineCodeBlockInfo
|
||||
) -> list[str]:
|
||||
"""
|
||||
Replace multiline code block a with block b leaving comments intact.
|
||||
|
||||
Syntax of comments depends on the language of the code block.
|
||||
Raises ValueError if the blocks are not compatible (different languages or different number of lines).
|
||||
"""
|
||||
|
||||
if block_a["lang"] != block_b["lang"]:
|
||||
raise ValueError("Code blocks have different languages")
|
||||
if len(block_a["content"]) != len(block_b["content"]):
|
||||
raise ValueError("Code blocks have different number of lines")
|
||||
|
||||
block_language = block_a["lang"].lower()
|
||||
if block_language in {"mermaid"}:
|
||||
return block_a["content"].copy() # We don't handle mermaid code blocks for now
|
||||
|
||||
code_block: list[str] = []
|
||||
for line_a, line_b in zip(block_a["content"], block_b["content"]):
|
||||
line_a_comment: str | None = None
|
||||
line_b_comment: str | None = None
|
||||
|
||||
# Handle comments based on language
|
||||
if block_language in {
|
||||
"python",
|
||||
"py",
|
||||
"sh",
|
||||
"bash",
|
||||
"dockerfile",
|
||||
"requirements",
|
||||
"gitignore",
|
||||
"toml",
|
||||
"yaml",
|
||||
"yml",
|
||||
}:
|
||||
_line_a_code, line_a_comment = _split_hash_comment(line_a)
|
||||
line_b_code, line_b_comment = _split_hash_comment(line_b)
|
||||
res_line = line_b
|
||||
if line_b_comment:
|
||||
res_line = res_line.replace(line_b_comment, line_a_comment, 1)
|
||||
code_block.append(res_line)
|
||||
elif block_language in {"console", "json"}:
|
||||
_line_a_code, line_a_comment = _split_slashes_comment(line_a)
|
||||
line_b_code, line_b_comment = _split_slashes_comment(line_b)
|
||||
res_line = line_b
|
||||
if line_b_comment:
|
||||
print(f"Replacing comment: {line_b_comment} with {line_a_comment}")
|
||||
res_line = res_line.replace(line_b_comment, line_a_comment, 1)
|
||||
print(f"Resulting line: {res_line}")
|
||||
code_block.append(res_line)
|
||||
else:
|
||||
code_block.append(line_b)
|
||||
|
||||
return code_block
|
||||
|
||||
|
||||
def replace_multiline_code_blocks_in_text(
|
||||
text: list[str],
|
||||
code_blocks: list[MultilineCodeBlockInfo],
|
||||
original_code_blocks: list[MultilineCodeBlockInfo],
|
||||
) -> list[MultilineCodeBlockInfo]:
|
||||
"""
|
||||
Update each code block in `text` with the corresponding code block from
|
||||
`original_code_blocks` with comments taken from `code_blocks`.
|
||||
|
||||
Raises ValueError if the number, language, or shape of code blocks do not match.
|
||||
"""
|
||||
|
||||
if len(code_blocks) != len(original_code_blocks):
|
||||
raise ValueError(
|
||||
"Number of code blocks does not match the number of original code blocks"
|
||||
)
|
||||
|
||||
modified_text = text.copy()
|
||||
for block, original_block in zip(code_blocks, original_code_blocks):
|
||||
updated_content = replace_multiline_code_block(block, original_block)
|
||||
|
||||
start_line_index = block["start_line_no"] - 1
|
||||
for i, updated_line in enumerate(updated_content):
|
||||
modified_text[start_line_index + i] = updated_line
|
||||
|
||||
return modified_text
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
import difflib
|
||||
import os
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
@@ -8,13 +11,27 @@ from scripts.doc_parsing_utils import (
|
||||
extract_header_permalinks,
|
||||
extract_html_links,
|
||||
extract_markdown_links,
|
||||
extract_multiline_code_blocks,
|
||||
replace_code_includes_with_placeholders,
|
||||
replace_header_permalinks,
|
||||
replace_html_links,
|
||||
replace_markdown_links,
|
||||
replace_multiline_code_blocks_in_text,
|
||||
replace_placeholders_with_code_includes,
|
||||
)
|
||||
|
||||
non_translated_sections = (
|
||||
f"reference{os.sep}",
|
||||
"release-notes.md",
|
||||
"fastapi-people.md",
|
||||
"external-links.md",
|
||||
"newsletter.md",
|
||||
"management-tasks.md",
|
||||
"management.md",
|
||||
"contributing.md",
|
||||
)
|
||||
|
||||
|
||||
cli = typer.Typer()
|
||||
|
||||
|
||||
@@ -23,6 +40,53 @@ def callback():
|
||||
pass
|
||||
|
||||
|
||||
def iter_all_lang_paths(lang_path_root: Path) -> Iterable[Path]:
|
||||
"""
|
||||
Iterate on the markdown files to translate in order of priority.
|
||||
"""
|
||||
|
||||
first_dirs = [
|
||||
lang_path_root / "learn",
|
||||
lang_path_root / "tutorial",
|
||||
lang_path_root / "advanced",
|
||||
lang_path_root / "about",
|
||||
lang_path_root / "how-to",
|
||||
]
|
||||
first_parent = lang_path_root
|
||||
yield from first_parent.glob("*.md")
|
||||
for dir_path in first_dirs:
|
||||
yield from dir_path.rglob("*.md")
|
||||
first_dirs_str = tuple(str(d) for d in first_dirs)
|
||||
for path in lang_path_root.rglob("*.md"):
|
||||
if str(path).startswith(first_dirs_str):
|
||||
continue
|
||||
if path.parent == first_parent:
|
||||
continue
|
||||
yield path
|
||||
|
||||
|
||||
def get_all_paths(lang: str):
|
||||
res: list[str] = []
|
||||
lang_docs_root = Path("docs") / lang / "docs"
|
||||
for path in iter_all_lang_paths(lang_docs_root):
|
||||
relpath = path.relative_to(lang_docs_root)
|
||||
if not str(relpath).startswith(non_translated_sections):
|
||||
res.append(str(relpath))
|
||||
return res
|
||||
|
||||
|
||||
@cli.command()
|
||||
def fix_all(ctx: typer.Context, language: str):
|
||||
docs = get_all_paths(language)
|
||||
|
||||
for page in docs:
|
||||
doc_path = Path("docs") / language / "docs" / page
|
||||
try:
|
||||
fix_pages(doc_paths=[doc_path])
|
||||
except ValueError as e:
|
||||
print(f"Error processing {doc_path}: {e}")
|
||||
|
||||
|
||||
@cli.command()
|
||||
def fix_pages(
|
||||
doc_paths: Annotated[
|
||||
@@ -49,6 +113,11 @@ def fix_pages(
|
||||
)
|
||||
if fixed_doc_lines != doc_lines:
|
||||
print(f"Fixing code includes in: {path}")
|
||||
diff = difflib.unified_diff(
|
||||
doc_lines, fixed_doc_lines, fromfile="translation", tofile="fixed"
|
||||
)
|
||||
print("\n".join(diff))
|
||||
|
||||
doc_lines = fixed_doc_lines
|
||||
|
||||
# Fix permalinks
|
||||
@@ -75,7 +144,14 @@ def fix_pages(
|
||||
doc_lines = fixed_doc_lines
|
||||
|
||||
# Fix multiline code blocks
|
||||
# TODO: Implement
|
||||
en_code_blocks = extract_multiline_code_blocks(en_doc_lines)
|
||||
doc_code_blocks = extract_multiline_code_blocks(doc_lines)
|
||||
fixed_doc_lines = replace_multiline_code_blocks_in_text(
|
||||
doc_lines, doc_code_blocks, en_code_blocks
|
||||
)
|
||||
if fixed_doc_lines != doc_lines:
|
||||
print(f"Fixing multiline code blocks in: {path}")
|
||||
doc_lines = fixed_doc_lines
|
||||
|
||||
# Write back the fixed document
|
||||
doc_lines.append("") # Ensure file ends with a newline
|
||||
|
||||
Reference in New Issue
Block a user