From 3a7431ce041e34fc9018e32f33e2bea996815328 Mon Sep 17 00:00:00 2001 From: polybjorn Date: Sat, 2 May 2026 21:43:19 +0000 Subject: [PATCH] fix(i18n): validate language directory names against gen.lang.* keys (#8767) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Detect when an `app/i18n//` directory has no matching `gen.lang.` key in the reference language (or vice versa), and refuse to regenerate the README from that invalid state. This catches a class of silent corruption where the README translation table renders literal i18n keys instead of localised language names. The trigger is most often a case-folded directory on macOS APFS - git tracks `zh-TW`, the local FS reads back `zh-tw`, the script's `_t('gen.lang.zh-tw')` lookup misses, and the README ends up with `gen.lang.zh-tw (zh-tw)` instead of `正體中文 (zh-TW)`. The same check also flags orphan directories (no display-name key) and orphan keys (no directory). The new validateLanguageNames() method on I18nData performs a bidirectional set comparison and returns human-readable issues. cli/check.translation.php prints them to STDERR and gates --generate-readme on the result, leaving routine completeness validation behaviour unchanged. Adds four PHPUnit tests covering: clean state, case mismatch, orphan directory, orphan key. Co-authored-by: Bjørn A. Andersen --- cli/check.translation.php | 13 ++++++++++ cli/i18n/I18nData.php | 39 ++++++++++++++++++++++++++++ tests/cli/i18n/I18nDataTest.php | 46 +++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) diff --git a/cli/check.translation.php b/cli/check.translation.php index 1ec9ead28..82a95e063 100755 --- a/cli/check.translation.php +++ b/cli/check.translation.php @@ -43,6 +43,12 @@ if (isset($cliOptions->language)) { } $isValidated = true; +$languageNameIssues = $i18nData->validateLanguageNames(); +foreach ($languageNameIssues as $issue) { + fwrite(STDERR, "Error: {$issue}\n"); + $isValidated = false; +} + $result = []; $report = []; $percentage = []; @@ -116,6 +122,13 @@ function writeToReadme(string $readmePath, string $markdownTable): void { } if ($cliOptions->generateReadme) { + if ($languageNameIssues !== []) { + // Refuse to regenerate the README when language directory names and + // `gen.lang.*` keys disagree, otherwise we would silently produce a + // corrupt translation table (e.g. literal `gen.lang.*` keys instead of + // localised language names). Routine incomplete translations are fine. + exit(1); + } $markdownTable = <<` keys in the reference language, case-sensitively. + * + * Catches two classes of mismatch: + * - A language directory whose name has no matching `gen.lang.` key + * (case-folding on case-insensitive filesystems such as macOS APFS, typo, + * or a new language added without its display name). + * - A `gen.lang.` key with no matching directory (orphan after a + * language was removed). + * + * @return list Human-readable mismatches; empty when consistent. + */ + public function validateLanguageNames(): array { + $prefix = 'gen.lang.'; + $declared = []; + foreach (array_keys($this->data[static::REFERENCE_LANGUAGE]['gen.php'] ?? []) as $key) { + if (str_starts_with((string)$key, $prefix)) { + $declared[] = substr((string)$key, strlen($prefix)); + } + } + sort($declared); + + $available = $this->getAvailableLanguages(); + $issues = []; + foreach (array_diff($available, $declared) as $orphanDir) { + $issues[] = "Language directory `app/i18n/{$orphanDir}/` has no matching " + . "`gen.lang.{$orphanDir}` key in the reference language. Possible causes: " + . 'case mismatch (e.g. on macOS APFS), typo, or missing display-name key.'; + } + foreach (array_diff($declared, $available) as $orphanKey) { + $issues[] = "Reference key `gen.lang.{$orphanKey}` has no matching " + . "`app/i18n/{$orphanKey}/` directory. Possible cause: orphan key after " + . 'a language was removed.'; + } + + return $issues; + } + /** * Return all available languages without the reference language * @return list diff --git a/tests/cli/i18n/I18nDataTest.php b/tests/cli/i18n/I18nDataTest.php index 1d1907f7c..c22dff70a 100644 --- a/tests/cli/i18n/I18nDataTest.php +++ b/tests/cli/i18n/I18nDataTest.php @@ -869,4 +869,50 @@ final class I18nDataTest extends \PHPUnit\Framework\TestCase { $data = new I18nData($rawData); self::assertSame($this->referenceData['en'], $data->getReferenceLanguage()); } + + /** @return array>> */ + private function dataWithLangKeys(string ...$langCodes): array { + $genFile = []; + foreach ($langCodes as $code) { + $genFile['gen.lang.' . $code] = $this->value; + } + return [ + 'en' => ['gen.php' => $genFile], + ]; + } + + public function testValidateLanguageNamesPassesWhenDirsAndKeysMatch(): void { + $rawData = $this->dataWithLangKeys('en', 'fr', 'zh-TW'); + $rawData['fr'] = []; + $rawData['zh-TW'] = []; + $data = new I18nData($rawData); + self::assertSame([], $data->validateLanguageNames()); + } + + public function testValidateLanguageNamesFlagsCaseMismatch(): void { + $rawData = $this->dataWithLangKeys('en', 'zh-TW'); + $rawData['zh-tw'] = []; + $data = new I18nData($rawData); + $issues = $data->validateLanguageNames(); + self::assertCount(2, $issues); + self::assertStringContainsString('app/i18n/zh-tw/', $issues[0]); + self::assertStringContainsString('gen.lang.zh-TW', $issues[1]); + } + + public function testValidateLanguageNamesFlagsOrphanDirectory(): void { + $rawData = $this->dataWithLangKeys('en'); + $rawData['fr'] = []; + $data = new I18nData($rawData); + $issues = $data->validateLanguageNames(); + self::assertCount(1, $issues); + self::assertStringContainsString('app/i18n/fr/', $issues[0]); + } + + public function testValidateLanguageNamesFlagsOrphanKey(): void { + $rawData = $this->dataWithLangKeys('en', 'fr'); + $data = new I18nData($rawData); + $issues = $data->validateLanguageNames(); + self::assertCount(1, $issues); + self::assertStringContainsString('gen.lang.fr', $issues[0]); + } }