import json import logging import os import re import shutil import subprocess from html.parser import HTMLParser from http.server import HTTPServer, SimpleHTTPRequestHandler from multiprocessing import Pool from pathlib import Path from typing import Any import typer import yaml from jinja2 import Template from ruff.__main__ import find_ruff_bin from slugify import slugify as py_slugify logging.basicConfig(level=logging.INFO) SUPPORTED_LANGS = { "de", "en", "es", "fr", "ja", "ko", "pt", "ru", "tr", "uk", "zh", "zh-hant", } app = typer.Typer() mkdocs_name = "mkdocs.yml" non_translated_sections = ( f"reference{os.sep}", "release-notes.md", "fastapi-people.md", "external-links.md", "newsletter.md", "management-tasks.md", "management.md", "contributing.md", "translations.md", ) docs_path = Path("docs") en_docs_path = Path("docs/en") en_config_path: Path = en_docs_path / mkdocs_name site_path = Path("site").absolute() zensical_src_path = Path("site_zensical_src").absolute() header_pattern = re.compile(r"^(#{1,6}) (.+?)(?:\s*\{\s*(#.*)\s*\})?\s*$") header_with_permalink_pattern = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})\s*$") code_block3_pattern = re.compile(r"^\s*```") code_block4_pattern = re.compile(r"^\s*````") # Pattern to match markdown links: [text](url) → text md_link_pattern = re.compile(r"\[([^\]]+)\]\([^)]+\)") def strip_markdown_links(text: str) -> str: """Replace markdown links with just their visible text.""" return md_link_pattern.sub(r"\1", text) class VisibleTextExtractor(HTMLParser): """Extract visible text from a string with HTML tags.""" def __init__(self): super().__init__() self.text_parts = [] def handle_data(self, data): self.text_parts.append(data) def extract_visible_text(self, html: str) -> str: self.reset() self.text_parts = [] self.feed(html) return "".join(self.text_parts).strip() def slugify(text: str) -> str: return py_slugify( text, replacements=[ ("`", ""), # `dict`s -> dicts ("'s", "s"), # it's -> its ("'t", "t"), # don't -> dont ("**", ""), # **FastAPI**s -> FastAPIs ], ) def get_en_config() -> dict[str, Any]: return yaml.unsafe_load(en_config_path.read_text(encoding="utf-8")) def get_lang_paths() -> list[Path]: return sorted(docs_path.iterdir()) def lang_callback(lang: str | None) -> str | None: if lang is None: return None lang = lang.lower() return lang def complete_existing_lang(incomplete: str): lang_path: Path for lang_path in get_lang_paths(): if lang_path.is_dir() and lang_path.name.startswith(incomplete): yield lang_path.name @app.callback() def callback() -> None: # For MacOS with Cairo os.environ["DYLD_FALLBACK_LIBRARY_PATH"] = "/opt/homebrew/lib" @app.command() def new_lang(lang: str = typer.Argument(..., callback=lang_callback)): """ Generate a new docs translation directory for the language LANG. """ new_path: Path = Path("docs") / lang if new_path.exists(): typer.echo(f"The language was already created: {lang}") raise typer.Abort() new_path.mkdir() new_llm_prompt_path: Path = new_path / "llm-prompt.md" new_llm_prompt_path.write_text("", encoding="utf-8") print(f"Successfully initialized: {new_path}") update_languages() @app.command() def build_lang( lang: str = typer.Argument( ..., callback=lang_callback, autocompletion=complete_existing_lang ), ) -> None: """ Build the docs for a language. """ build_zensical_lang_to_stage(lang) copy_zensical_stage_to_site(lang) typer.secho(f"Successfully built docs for: {lang}", color=typer.colors.GREEN) def split_markdown_header(markdown: str) -> tuple[str, str]: prefix = "" if markdown.startswith("---\n"): front_matter_end = markdown.find("\n---\n", 4) if front_matter_end != -1: front_matter_end += len("\n---\n") prefix = markdown[:front_matter_end] markdown = markdown[front_matter_end:] if markdown.startswith("#"): header, separator, body = markdown.partition("\n\n") if separator: return f"{prefix}{header}", body if prefix: return prefix.rstrip("\n"), markdown return "", markdown def add_markdown_notice(markdown: str, notice: str) -> str: header, body = split_markdown_header(markdown) if header: return f"{header}\n\n{notice}\n\n{body}" return f"{notice}\n\n{body}" def is_non_translated_path(path: Path) -> bool: src_path = path.as_posix() return any(src_path.startswith(section) for section in non_translated_sections) def get_en_url(path: Path) -> str: url_path = path.with_suffix("").as_posix() if url_path.endswith("/index"): url_path = url_path.removesuffix("index") elif url_path != "index": url_path = f"{url_path}/" else: url_path = "" return f"https://fastapi.tiangolo.com/{url_path}" def get_zensical_theme_language(lang: str) -> str: if lang == "zh-hant": return "zh-Hant" return lang def stage_zensical_docs(lang: str) -> Path: lang_docs_path = docs_path / lang / "docs" if not lang_docs_path.is_dir(): typer.echo(f"The language translation doesn't seem to exist yet: {lang}") raise typer.Abort() en_docs_source_path = en_docs_path / "docs" staged_docs_src_path = zensical_src_path / "docs_src" if not staged_docs_src_path.exists(): shutil.copytree(Path("docs_src"), staged_docs_src_path, dirs_exist_ok=True) lang_stage_path = zensical_src_path / lang staged_docs_path = lang_stage_path / "content" shutil.rmtree(lang_stage_path, ignore_errors=True) shutil.copytree(en_docs_source_path, staged_docs_path) missing_translation = (docs_path / "missing-translation.md").read_text( encoding="utf-8" ) translation_banner_path = lang_docs_path / "translation-banner.md" if not translation_banner_path.is_file(): translation_banner_path = en_docs_source_path / "translation-banner.md" translation_banner = translation_banner_path.read_text(encoding="utf-8") if lang != "en": for staged_file in staged_docs_path.rglob("*.md"): relative_path = staged_file.relative_to(staged_docs_path) translated_file = lang_docs_path / relative_path if translated_file.is_file(): markdown = translated_file.read_text(encoding="utf-8") if relative_path.name == "translation-banner.md": staged_file.write_text(markdown, encoding="utf-8") continue en_url = get_en_url(relative_path) banner = translation_banner.replace("ENGLISH_VERSION_URL", en_url) staged_file.write_text( add_markdown_notice(markdown, banner), encoding="utf-8" ) elif not is_non_translated_path(relative_path): markdown = staged_file.read_text(encoding="utf-8") staged_file.write_text( add_markdown_notice(markdown, missing_translation), encoding="utf-8", ) shutil.copytree(en_docs_path / "data", lang_stage_path / "data") shutil.copytree(en_docs_path / "overrides", lang_stage_path / "overrides") config = get_updated_config_content() config["docs_dir"] = "content" config["site_dir"] = "site" if lang == "en": config["site_url"] = "https://fastapi.tiangolo.com/" else: config["site_url"] = f"https://fastapi.tiangolo.com/{lang}/" config.setdefault("theme", {}) config["theme"]["language"] = get_zensical_theme_language(lang) if lang != "en": # The root English build owns shared static assets; translated builds should # reference those root paths instead of emitting language-local copies. if "logo" in config["theme"]: config["theme"]["logo"] = "/" + config["theme"]["logo"].lstrip("/") if "favicon" in config["theme"]: config["theme"]["favicon"] = "/" + config["theme"]["favicon"].lstrip("/") config["extra_css"] = ["/" + path.lstrip("/") for path in config["extra_css"]] config["extra_javascript"] = [ "/" + path.lstrip("/") for path in config["extra_javascript"] ] config_path = lang_stage_path / mkdocs_name config_path.write_text( yaml.dump(config, sort_keys=False, width=200, allow_unicode=True), encoding="utf-8", ) return config_path def build_zensical_config(config_path: Path) -> None: subprocess.run( ["zensical", "build", "--config-file", config_path.name], check=True, cwd=config_path.parent, ) def build_zensical_lang_to_stage(lang: str) -> Path: typer.echo(f"Building Zensical docs for: {lang}") config_path = stage_zensical_docs(lang) config = yaml.unsafe_load(config_path.read_text(encoding="utf-8")) build_site_dist_path = config_path.parent / config["site_dir"] shutil.rmtree(build_site_dist_path, ignore_errors=True) build_zensical_config(config_path) return build_site_dist_path def copy_zensical_stage_to_site(lang: str) -> None: build_site_dist_path = zensical_src_path / lang / "site" if lang == "en": dist_path = site_path else: dist_path = site_path / lang shutil.rmtree(dist_path, ignore_errors=True) shutil.copytree(build_site_dist_path, dist_path, dirs_exist_ok=True) index_sponsors_template = """ ### Keystone Sponsor {% for sponsor in sponsors.keystone -%} {% endfor %} ### Gold Sponsors {% for sponsor in sponsors.gold -%} {% endfor %} ### Silver Sponsors {% for sponsor in sponsors.silver -%} {% endfor %} """ def remove_header_permalinks(content: str): lines: list[str] = [] for line in content.split("\n"): match = header_with_permalink_pattern.match(line) if match: hashes, title, *_ = match.groups() line = f"{hashes} {title}" lines.append(line) return "\n".join(lines) def generate_readme_content() -> str: en_index = en_docs_path / "docs" / "index.md" content = en_index.read_text("utf-8") content = remove_header_permalinks(content) # remove permalinks from headers match_pre = re.search(r"\n\n", content) match_start = re.search(r"", content) match_end = re.search(r"", content) sponsors_data_path = en_docs_path / "data" / "sponsors.yml" sponsors = yaml.safe_load(sponsors_data_path.read_text(encoding="utf-8")) if not (match_start and match_end): raise RuntimeError("Couldn't auto-generate sponsors section") if not match_pre: raise RuntimeError("Couldn't find pre section (