import json
import logging
import os
import re
import shutil
import subprocess
from html.parser import HTMLParser
from http.server import HTTPServer, SimpleHTTPRequestHandler
from multiprocessing import Pool
from pathlib import Path
from typing import Any
import typer
import yaml
from jinja2 import Template
from ruff.__main__ import find_ruff_bin
from slugify import slugify as py_slugify
logging.basicConfig(level=logging.INFO)
SUPPORTED_LANGS = {
"de",
"en",
"es",
"fr",
"ja",
"ko",
"pt",
"ru",
"tr",
"uk",
"zh",
"zh-hant",
}
app = typer.Typer()
mkdocs_name = "mkdocs.yml"
non_translated_sections = (
f"reference{os.sep}",
"release-notes.md",
"fastapi-people.md",
"external-links.md",
"newsletter.md",
"management-tasks.md",
"management.md",
"contributing.md",
"translations.md",
)
docs_path = Path("docs")
en_docs_path = Path("docs/en")
en_config_path: Path = en_docs_path / mkdocs_name
site_path = Path("site").absolute()
zensical_src_path = Path("site_zensical_src").absolute()
header_pattern = re.compile(r"^(#{1,6}) (.+?)(?:\s*\{\s*(#.*)\s*\})?\s*$")
header_with_permalink_pattern = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})\s*$")
code_block3_pattern = re.compile(r"^\s*```")
code_block4_pattern = re.compile(r"^\s*````")
# Pattern to match markdown links: [text](url) → text
md_link_pattern = re.compile(r"\[([^\]]+)\]\([^)]+\)")
def strip_markdown_links(text: str) -> str:
"""Replace markdown links with just their visible text."""
return md_link_pattern.sub(r"\1", text)
class VisibleTextExtractor(HTMLParser):
"""Extract visible text from a string with HTML tags."""
def __init__(self):
super().__init__()
self.text_parts = []
def handle_data(self, data):
self.text_parts.append(data)
def extract_visible_text(self, html: str) -> str:
self.reset()
self.text_parts = []
self.feed(html)
return "".join(self.text_parts).strip()
def slugify(text: str) -> str:
return py_slugify(
text,
replacements=[
("`", ""), # `dict`s -> dicts
("'s", "s"), # it's -> its
("'t", "t"), # don't -> dont
("**", ""), # **FastAPI**s -> FastAPIs
],
)
def get_en_config() -> dict[str, Any]:
return yaml.unsafe_load(en_config_path.read_text(encoding="utf-8"))
def get_lang_paths() -> list[Path]:
return sorted(docs_path.iterdir())
def lang_callback(lang: str | None) -> str | None:
if lang is None:
return None
lang = lang.lower()
return lang
def complete_existing_lang(incomplete: str):
lang_path: Path
for lang_path in get_lang_paths():
if lang_path.is_dir() and lang_path.name.startswith(incomplete):
yield lang_path.name
@app.callback()
def callback() -> None:
# For MacOS with Cairo
os.environ["DYLD_FALLBACK_LIBRARY_PATH"] = "/opt/homebrew/lib"
@app.command()
def new_lang(lang: str = typer.Argument(..., callback=lang_callback)):
"""
Generate a new docs translation directory for the language LANG.
"""
new_path: Path = Path("docs") / lang
if new_path.exists():
typer.echo(f"The language was already created: {lang}")
raise typer.Abort()
new_path.mkdir()
new_llm_prompt_path: Path = new_path / "llm-prompt.md"
new_llm_prompt_path.write_text("", encoding="utf-8")
print(f"Successfully initialized: {new_path}")
update_languages()
@app.command()
def build_lang(
lang: str = typer.Argument(
..., callback=lang_callback, autocompletion=complete_existing_lang
),
) -> None:
"""
Build the docs for a language.
"""
build_zensical_lang_to_stage(lang)
copy_zensical_stage_to_site(lang)
typer.secho(f"Successfully built docs for: {lang}", color=typer.colors.GREEN)
def split_markdown_header(markdown: str) -> tuple[str, str]:
prefix = ""
if markdown.startswith("---\n"):
front_matter_end = markdown.find("\n---\n", 4)
if front_matter_end != -1:
front_matter_end += len("\n---\n")
prefix = markdown[:front_matter_end]
markdown = markdown[front_matter_end:]
if markdown.startswith("#"):
header, separator, body = markdown.partition("\n\n")
if separator:
return f"{prefix}{header}", body
if prefix:
return prefix.rstrip("\n"), markdown
return "", markdown
def add_markdown_notice(markdown: str, notice: str) -> str:
header, body = split_markdown_header(markdown)
if header:
return f"{header}\n\n{notice}\n\n{body}"
return f"{notice}\n\n{body}"
def is_non_translated_path(path: Path) -> bool:
src_path = path.as_posix()
return any(src_path.startswith(section) for section in non_translated_sections)
def get_en_url(path: Path) -> str:
url_path = path.with_suffix("").as_posix()
if url_path.endswith("/index"):
url_path = url_path.removesuffix("index")
elif url_path != "index":
url_path = f"{url_path}/"
else:
url_path = ""
return f"https://fastapi.tiangolo.com/{url_path}"
def get_zensical_theme_language(lang: str) -> str:
if lang == "zh-hant":
return "zh-Hant"
return lang
def stage_zensical_docs(lang: str) -> Path:
lang_docs_path = docs_path / lang / "docs"
if not lang_docs_path.is_dir():
typer.echo(f"The language translation doesn't seem to exist yet: {lang}")
raise typer.Abort()
en_docs_source_path = en_docs_path / "docs"
staged_docs_src_path = zensical_src_path / "docs_src"
if not staged_docs_src_path.exists():
shutil.copytree(Path("docs_src"), staged_docs_src_path, dirs_exist_ok=True)
lang_stage_path = zensical_src_path / lang
staged_docs_path = lang_stage_path / "content"
shutil.rmtree(lang_stage_path, ignore_errors=True)
shutil.copytree(en_docs_source_path, staged_docs_path)
missing_translation = (docs_path / "missing-translation.md").read_text(
encoding="utf-8"
)
translation_banner_path = lang_docs_path / "translation-banner.md"
if not translation_banner_path.is_file():
translation_banner_path = en_docs_source_path / "translation-banner.md"
translation_banner = translation_banner_path.read_text(encoding="utf-8")
if lang != "en":
for staged_file in staged_docs_path.rglob("*.md"):
relative_path = staged_file.relative_to(staged_docs_path)
translated_file = lang_docs_path / relative_path
if translated_file.is_file():
markdown = translated_file.read_text(encoding="utf-8")
if relative_path.name == "translation-banner.md":
staged_file.write_text(markdown, encoding="utf-8")
continue
en_url = get_en_url(relative_path)
banner = translation_banner.replace("ENGLISH_VERSION_URL", en_url)
staged_file.write_text(
add_markdown_notice(markdown, banner), encoding="utf-8"
)
elif not is_non_translated_path(relative_path):
markdown = staged_file.read_text(encoding="utf-8")
staged_file.write_text(
add_markdown_notice(markdown, missing_translation),
encoding="utf-8",
)
shutil.copytree(en_docs_path / "data", lang_stage_path / "data")
shutil.copytree(en_docs_path / "overrides", lang_stage_path / "overrides")
config = get_updated_config_content()
config["docs_dir"] = "content"
config["site_dir"] = "site"
if lang == "en":
config["site_url"] = "https://fastapi.tiangolo.com/"
else:
config["site_url"] = f"https://fastapi.tiangolo.com/{lang}/"
config.setdefault("theme", {})
config["theme"]["language"] = get_zensical_theme_language(lang)
if lang != "en":
# The root English build owns shared static assets; translated builds should
# reference those root paths instead of emitting language-local copies.
if "logo" in config["theme"]:
config["theme"]["logo"] = "/" + config["theme"]["logo"].lstrip("/")
if "favicon" in config["theme"]:
config["theme"]["favicon"] = "/" + config["theme"]["favicon"].lstrip("/")
config["extra_css"] = ["/" + path.lstrip("/") for path in config["extra_css"]]
config["extra_javascript"] = [
"/" + path.lstrip("/") for path in config["extra_javascript"]
]
config_path = lang_stage_path / mkdocs_name
config_path.write_text(
yaml.dump(config, sort_keys=False, width=200, allow_unicode=True),
encoding="utf-8",
)
return config_path
def build_zensical_config(config_path: Path) -> None:
subprocess.run(
["zensical", "build", "--config-file", config_path.name],
check=True,
cwd=config_path.parent,
)
def build_zensical_lang_to_stage(lang: str) -> Path:
typer.echo(f"Building Zensical docs for: {lang}")
config_path = stage_zensical_docs(lang)
config = yaml.unsafe_load(config_path.read_text(encoding="utf-8"))
build_site_dist_path = config_path.parent / config["site_dir"]
shutil.rmtree(build_site_dist_path, ignore_errors=True)
build_zensical_config(config_path)
return build_site_dist_path
def copy_zensical_stage_to_site(lang: str) -> None:
build_site_dist_path = zensical_src_path / lang / "site"
if lang == "en":
dist_path = site_path
else:
dist_path = site_path / lang
shutil.rmtree(dist_path, ignore_errors=True)
shutil.copytree(build_site_dist_path, dist_path, dirs_exist_ok=True)
index_sponsors_template = """
### Keystone Sponsor
{% for sponsor in sponsors.keystone -%}
{% endfor %}
### Gold Sponsors
{% for sponsor in sponsors.gold -%}
{% endfor %}
### Silver Sponsors
{% for sponsor in sponsors.silver -%}
{% endfor %}
"""
def remove_header_permalinks(content: str):
lines: list[str] = []
for line in content.split("\n"):
match = header_with_permalink_pattern.match(line)
if match:
hashes, title, *_ = match.groups()
line = f"{hashes} {title}"
lines.append(line)
return "\n".join(lines)
def generate_readme_content() -> str:
en_index = en_docs_path / "docs" / "index.md"
content = en_index.read_text("utf-8")
content = remove_header_permalinks(content) # remove permalinks from headers
match_pre = re.search(r"\n\n", content)
match_start = re.search(r"", content)
match_end = re.search(r"", content)
sponsors_data_path = en_docs_path / "data" / "sponsors.yml"
sponsors = yaml.safe_load(sponsors_data_path.read_text(encoding="utf-8"))
if not (match_start and match_end):
raise RuntimeError("Couldn't auto-generate sponsors section")
if not match_pre:
raise RuntimeError("Couldn't find pre section (