From 5dcdac6680adf01bc560302a86a374dc60a1e906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Pablo=20Villaf=C3=A1=C3=B1ez?= Date: Thu, 11 Nov 2021 14:13:41 +0100 Subject: [PATCH] Refactor to simplify the code --- thumbnails/pkg/preprocessor/textanalyzer.go | 178 ++++++++++-------- .../pkg/preprocessor/textanalyzer_test.go | 159 ++++++++++------ 2 files changed, 200 insertions(+), 137 deletions(-) diff --git a/thumbnails/pkg/preprocessor/textanalyzer.go b/thumbnails/pkg/preprocessor/textanalyzer.go index 846fa2f225..eb139e9f2a 100644 --- a/thumbnails/pkg/preprocessor/textanalyzer.go +++ b/thumbnails/pkg/preprocessor/textanalyzer.go @@ -112,18 +112,7 @@ func (ta *TextAnalyzer) AnalyzeString(word string, opts AnalysisOpts) TextAnalys runeCount := 0 for wordIndex, char := range word { - script := "_unknown" - for scriptIndex, scriptFound := range ta.scriptListCache { - // if we can't match with a known script, do nothing and jump to the next char - if unicode.Is(ta.scripts[scriptFound], char) { - if scriptIndex > 3 { - // we might expect more chars with the same script - // so move the script first to match it faster next time - ta.reorderScriptList(scriptFound) - } - script = scriptFound - } - } + script := ta.chooseScriptFor(char) isWhiteSpace := unicode.Is(unicode.White_Space, char) if lastRange == nil { @@ -135,23 +124,16 @@ func (ta *TextAnalyzer) AnalyzeString(word string, opts AnalysisOpts) TextAnalys } } else { if script != lastRange.TargetScript { - if opts.UseMergeMap { - // This option mainly target japanese chars; multiple scripts can be used - // in the same piece of text (Han, Hiragana and Katakana) - // Instead of starting a new range, adjust the target script of the last range - if expCurrent, currentOk := opts.MergeMap[lastRange.TargetScript]; currentOk { - if expFinal, finalOk := expCurrent[script]; finalOk { - lastRange.TargetScript = expFinal - if isWhiteSpace { - // TODO: Check if this is dead code. - // whitespace should be part of the "Common" script, and the Common - // script shouldn't be part of a mergeMap - lastRange.Spaces = append(lastRange.Spaces, wordIndex) - } - runeCount++ - continue - } + if mapScript, isOk := ta.getMergeMapValue(opts, lastRange.TargetScript, script); isOk { + lastRange.TargetScript = mapScript + if isWhiteSpace { + // TODO: Check if this is dead code. + // whitespace should be part of the "Common" script, and the Common + // script shouldn't be part of a mergeMap + lastRange.Spaces = append(lastRange.Spaces, wordIndex) } + runeCount++ + continue } lastRange.High = wordIndex - 1 @@ -166,10 +148,9 @@ func (ta *TextAnalyzer) AnalyzeString(word string, opts AnalysisOpts) TextAnalys Spaces: make([]int, 0), TargetScript: script, } - runeCount = 1 - } else { - runeCount++ + runeCount = 0 } + runeCount++ } if isWhiteSpace { lastRange.Spaces = append(lastRange.Spaces, wordIndex) @@ -186,6 +167,22 @@ func (ta *TextAnalyzer) AnalyzeString(word string, opts AnalysisOpts) TextAnalys return analysis } +func (ta *TextAnalyzer) chooseScriptFor(char rune) string { + script := "_unknown" + for scriptIndex, scriptFound := range ta.scriptListCache { + // if we can't match with a known script, do nothing and jump to the next char + if unicode.Is(ta.scripts[scriptFound], char) { + if scriptIndex > 3 { + // we might expect more chars with the same script + // so move the script first to match it faster next time + ta.reorderScriptList(scriptFound) + } + return scriptFound + } + } + return script +} + // Reorder the scriptListCache in the TextAnalyzer in order to speed up // the next script searches. A "Latin" script is expected to be surrounded // by "Latin" chars, although "Common" script chars might be present too @@ -203,6 +200,23 @@ func (ta *TextAnalyzer) reorderScriptList(matchedScript string) { } } +// Get the value from the merge map based on the previous and current scripts. +// The information about using the merge map and the actual merge map will be +// gotten from the AnalysisOpts passed as parameter +func (ta *TextAnalyzer) getMergeMapValue(opts AnalysisOpts, previous, current string) (string, bool) { + if opts.UseMergeMap { + // This option mainly target japanese chars; multiple scripts can be used + // in the same piece of text (Han, Hiragana and Katakana) + // Instead of starting a new range, adjust the target script of the last range + if expCurrent, currentOk := opts.MergeMap[previous]; currentOk { + if expFinal, finalOk := expCurrent[current]; finalOk { + return expFinal, finalOk + } + } + } + return "", false +} + // Change the "Common" script to the one used in the previous script range. // The ranges will be readjusted and merged if they're adjacent. // This naive approach should be good enough for normal use cases @@ -213,52 +227,52 @@ func (ta *TextAnalyzer) reorderScriptList(matchedScript string) { // If the MergeMap isn't needed, use an empty one func (tr *TextAnalysis) MergeCommon(mergeMap MergeMap) { var finalRanges []ScriptRange - var previousRange *ScriptRange - for _, sRange := range tr.ScriptRanges { - if previousRange != nil { - if previousRange.TargetScript == sRange.TargetScript { - previousRange.High = sRange.High - previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...) - previousRange.RuneCount += sRange.RuneCount - } else if sRange.TargetScript == "Common" || sRange.TargetScript == "Inherited" { - // new range will be absorbed into the previous one - previousRange.High = sRange.High - previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...) - previousRange.RuneCount += sRange.RuneCount - tr.RuneCount[previousRange.TargetScript] += sRange.RuneCount - tr.RuneCount[sRange.TargetScript] -= sRange.RuneCount - } else if previousRange.TargetScript == "Common" || previousRange.TargetScript == "Inherited" { - // might happen if the text starts with a Common script - previousRange.High = sRange.High - previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...) - tr.RuneCount[sRange.TargetScript] += previousRange.RuneCount - tr.RuneCount[previousRange.TargetScript] -= previousRange.RuneCount - previousRange.RuneCount += sRange.RuneCount - previousRange.TargetScript = sRange.TargetScript - } else { - if expCurrent, currentOk := mergeMap[previousRange.TargetScript]; currentOk { - if expFinal, finalOk := expCurrent[sRange.TargetScript]; finalOk { - if sRange.TargetScript == expFinal { - // the previous range has changed the target script - tr.RuneCount[previousRange.TargetScript] -= previousRange.RuneCount - tr.RuneCount[sRange.TargetScript] += previousRange.RuneCount - } else { - // new range has been absorbed - tr.RuneCount[sRange.TargetScript] -= sRange.RuneCount - tr.RuneCount[previousRange.TargetScript] += sRange.RuneCount - } - previousRange.TargetScript = expFinal - previousRange.High = sRange.High - previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...) - previousRange.RuneCount += sRange.RuneCount - continue - } - } - finalRanges = append(finalRanges, *previousRange) - *previousRange = sRange - } + var previousRange *ScriptRange = &ScriptRange{} + + if len(tr.ScriptRanges) < 1 { + // no ranges -> nothing to do + return + } + + *previousRange = tr.ScriptRanges[0] + for _, sRange := range tr.ScriptRanges[1:] { + if previousRange.TargetScript == sRange.TargetScript { + previousRange.High = sRange.High + previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...) + previousRange.RuneCount += sRange.RuneCount + } else if sRange.TargetScript == "Common" || sRange.TargetScript == "Inherited" { + // new range will be absorbed into the previous one + previousRange.High = sRange.High + previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...) + previousRange.RuneCount += sRange.RuneCount + tr.RuneCount[previousRange.TargetScript] += sRange.RuneCount + tr.RuneCount[sRange.TargetScript] -= sRange.RuneCount + } else if previousRange.TargetScript == "Common" || previousRange.TargetScript == "Inherited" { + // might happen if the text starts with a Common script + previousRange.High = sRange.High + previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...) + tr.RuneCount[sRange.TargetScript] += previousRange.RuneCount + tr.RuneCount[previousRange.TargetScript] -= previousRange.RuneCount + previousRange.RuneCount += sRange.RuneCount + previousRange.TargetScript = sRange.TargetScript } else { - previousRange = &ScriptRange{} + if mapScript, isOk := tr.getMergeMapValue(mergeMap, previousRange.TargetScript, sRange.TargetScript); isOk { + if sRange.TargetScript == mapScript { + // the previous range has changed the target script + tr.RuneCount[previousRange.TargetScript] -= previousRange.RuneCount + tr.RuneCount[sRange.TargetScript] += previousRange.RuneCount + } else { + // new range has been absorbed + tr.RuneCount[sRange.TargetScript] -= sRange.RuneCount + tr.RuneCount[previousRange.TargetScript] += sRange.RuneCount + } + previousRange.TargetScript = mapScript + previousRange.High = sRange.High + previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...) + previousRange.RuneCount += sRange.RuneCount + continue + } + finalRanges = append(finalRanges, *previousRange) *previousRange = sRange } } @@ -273,3 +287,15 @@ func (tr *TextAnalysis) MergeCommon(mergeMap MergeMap) { } } } + +func (tr *TextAnalysis) getMergeMapValue(mMap MergeMap, previous, current string) (string, bool) { + // This option mainly target japanese chars; multiple scripts can be used + // in the same piece of text (Han, Hiragana and Katakana) + // Instead of starting a new range, adjust the target script of the last range + if expCurrent, currentOk := mMap[previous]; currentOk { + if expFinal, finalOk := expCurrent[current]; finalOk { + return expFinal, finalOk + } + } + return "", false +} diff --git a/thumbnails/pkg/preprocessor/textanalyzer_test.go b/thumbnails/pkg/preprocessor/textanalyzer_test.go index c34529db6b..02ccfd1262 100644 --- a/thumbnails/pkg/preprocessor/textanalyzer_test.go +++ b/thumbnails/pkg/preprocessor/textanalyzer_test.go @@ -7,6 +7,28 @@ import ( "github.com/stretchr/testify/assert" ) +var ( + inputs = [16]string{ + "basic latin", + "trailing tab ", + "Small text. \"$\", \"£\" and \"¥\" are currencies.", + "latin with 🖖", + "기본 한국어", + "基本的な日本語", + "ウーロン茶", + "私はエンジニアです", + "ティー私はエンジニアです", + "私はエンジニアです ティー", + "आधारभूत देवनागरी", + "mixed 언어 传入 🚀!", + "/k͜p/", + // ä and a + ¨ + "ä ä", + "базовый русский", // cyrillic script isn't part of our default + "latin русский", // latin + cyrillic (cyrillic not supported) + } +) + func TestAnalyzeString(t *testing.T) { defaultOpts := AnalysisOpts{ UseMergeMap: true, @@ -19,7 +41,7 @@ func TestAnalyzeString(t *testing.T) { eOut TextAnalysis }{ { - input: "basic latin", + input: inputs[0], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -28,12 +50,12 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Latin": 11, }, - Text: "basic latin", + Text: inputs[0], }, }, { - input: "trailing tab ", - opts: defaultOpts, + input: inputs[1], + opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 12, Spaces: []int{8, 12}, TargetScript: "Latin", RuneCount: 13}, @@ -41,11 +63,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Latin": 13, }, - Text: "trailing tab ", + Text: inputs[1], }, }, { - input: "Small text. \"$\", \"£\" and \"¥\" are currencies.", + input: inputs[2], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -54,11 +76,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Latin": 44, }, - Text: "Small text. \"$\", \"£\" and \"¥\" are currencies.", + Text: inputs[2], }, }, { - input: "latin with 🖖", + input: inputs[3], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -67,11 +89,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Latin": 12, }, - Text: "latin with 🖖", + Text: inputs[3], }, }, { - input: "기본 한국어", + input: inputs[4], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -80,11 +102,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Hangul": 6, }, - Text: "기본 한국어", + Text: inputs[4], }, }, { - input: "基本的な日本語", + input: inputs[5], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -93,11 +115,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Hiragana": 7, }, - Text: "基本的な日本語", + Text: inputs[5], }, }, { - input: "ウーロン茶", + input: inputs[6], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -106,11 +128,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Katakana": 5, }, - Text: "ウーロン茶", + Text: inputs[6], }, }, { - input: "私はエンジニアです", + input: inputs[7], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -119,11 +141,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Hiragana": 9, }, - Text: "私はエンジニアです", + Text: inputs[7], }, }, { - input: "ティー私はエンジニアです", + input: inputs[8], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -132,11 +154,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Hiragana": 12, }, - Text: "ティー私はエンジニアです", + Text: inputs[8], }, }, { - input: "私はエンジニアです ティー", + input: inputs[9], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -145,11 +167,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Hiragana": 13, }, - Text: "私はエンジニアです ティー", + Text: inputs[9], }, }, { - input: "आधारभूत देवनागरी", + input: inputs[10], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -158,11 +180,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Devanagari": 16, }, - Text: "आधारभूत देवनागरी", + Text: inputs[10], }, }, { - input: "mixed 언어 传入 🚀!", + input: inputs[11], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -175,11 +197,11 @@ func TestAnalyzeString(t *testing.T) { "Hangul": 3, "Han": 5, }, - Text: "mixed 언어 传入 🚀!", + Text: inputs[11], }, }, { - input: "/k͜p/", + input: inputs[12], opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -188,11 +210,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Latin": 5, }, - Text: "/k͜p/", + Text: inputs[12], }, }, { - input: "ä ä", // ä and a + ¨ + input: inputs[13], // ä and a + ¨ opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -201,11 +223,11 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "Latin": 4, }, - Text: "ä ä", + Text: inputs[13], }, }, { - input: "базовый русский", // cyrillic script isn't part of our default + input: inputs[14], // cyrillic script isn't part of our default opts: defaultOpts, eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ @@ -214,7 +236,22 @@ func TestAnalyzeString(t *testing.T) { RuneCount: map[string]int{ "_unknown": 15, }, - Text: "базовый русский", + Text: inputs[14], + }, + }, + { + input: inputs[15], // latin + cyrillic (cyrillic script isn't part of our default) + opts: defaultOpts, + eOut: TextAnalysis{ + ScriptRanges: []ScriptRange{ + ScriptRange{Low: 0, High: 5, Spaces: []int{5}, TargetScript: "Latin", RuneCount: 6}, + ScriptRange{Low: 6, High: 19, Spaces: []int{}, TargetScript: "_unknown", RuneCount: 7}, + }, + RuneCount: map[string]int{ + "Latin": 6, + "_unknown": 7, + }, + Text: inputs[15], }, }, } @@ -240,7 +277,7 @@ func TestAnalyzeStringRaw(t *testing.T) { eOut TextAnalysis }{ { - input: "basic latin", + input: inputs[0], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 4, Spaces: []int{}, TargetScript: "Latin", RuneCount: 5}, @@ -251,11 +288,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Latin": 10, "Common": 1, }, - Text: "basic latin", + Text: inputs[0], }, }, { - input: "trailing tab ", + input: inputs[1], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 7, Spaces: []int{}, TargetScript: "Latin", RuneCount: 8}, @@ -267,11 +304,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Latin": 11, "Common": 2, }, - Text: "trailing tab ", + Text: inputs[1], }, }, { - input: "Small text. \"$\", \"£\" and \"¥\" are currencies.", + input: inputs[2], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 4, Spaces: []int{}, TargetScript: "Latin", RuneCount: 5}, @@ -289,11 +326,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Latin": 25, "Common": 19, }, - Text: "Small text. \"$\", \"£\" and \"¥\" are currencies.", + Text: inputs[2], }, }, { - input: "latin with 🖖", + input: inputs[3], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 4, Spaces: []int{}, TargetScript: "Latin", RuneCount: 5}, @@ -305,11 +342,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Latin": 9, "Common": 3, }, - Text: "latin with 🖖", + Text: inputs[3], }, }, { - input: "기본 한국어", + input: inputs[4], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 5, Spaces: []int{}, TargetScript: "Hangul", RuneCount: 2}, @@ -320,11 +357,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Hangul": 5, "Common": 1, }, - Text: "기본 한국어", + Text: inputs[4], }, }, { - input: "基本的な日本語", + input: inputs[5], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 8, Spaces: []int{}, TargetScript: "Han", RuneCount: 3}, @@ -335,11 +372,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Hiragana": 1, "Han": 6, }, - Text: "基本的な日本語", + Text: inputs[5], }, }, { - input: "ウーロン茶", + input: inputs[6], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 2, Spaces: []int{}, TargetScript: "Katakana", RuneCount: 1}, @@ -352,11 +389,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Common": 1, "Han": 1, }, - Text: "ウーロン茶", + Text: inputs[6], }, }, { - input: "私はエンジニアです", + input: inputs[7], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 2, Spaces: []int{}, TargetScript: "Han", RuneCount: 1}, @@ -369,11 +406,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Hiragana": 3, "Katakana": 5, }, - Text: "私はエンジニアです", + Text: inputs[7], }, }, { - input: "ティー私はエンジニアです", + input: inputs[8], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 5, Spaces: []int{}, TargetScript: "Katakana", RuneCount: 2}, @@ -389,11 +426,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Katakana": 7, "Common": 1, }, - Text: "ティー私はエンジニアです", + Text: inputs[8], }, }, { - input: "私はエンジニアです ティー", + input: inputs[9], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 2, Spaces: []int{}, TargetScript: "Han", RuneCount: 1}, @@ -410,11 +447,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Katakana": 7, "Common": 2, }, - Text: "私はエンジニアです ティー", + Text: inputs[9], }, }, { - input: "आधारभूत देवनागरी", + input: inputs[10], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 20, Spaces: []int{}, TargetScript: "Devanagari", RuneCount: 7}, @@ -425,11 +462,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Devanagari": 15, "Common": 1, }, - Text: "आधारभूत देवनागरी", + Text: inputs[10], }, }, { - input: "mixed 언어 传入 🚀!", + input: inputs[11], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 4, Spaces: []int{}, TargetScript: "Latin", RuneCount: 5}, @@ -445,11 +482,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Han": 2, "Common": 5, }, - Text: "mixed 언어 传入 🚀!", + Text: inputs[11], }, }, { - input: "/k͜p/", + input: inputs[12], eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 0, Spaces: []int{}, TargetScript: "Common", RuneCount: 1}, @@ -463,11 +500,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Common": 2, "Inherited": 1, }, - Text: "/k͜p/", + Text: inputs[12], }, }, { - input: "ä ä", // ä and a + ¨ + input: inputs[13], // ä and a + ¨ eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 1, Spaces: []int{}, TargetScript: "Latin", RuneCount: 1}, @@ -480,11 +517,11 @@ func TestAnalyzeStringRaw(t *testing.T) { "Common": 1, "Inherited": 1, }, - Text: "ä ä", + Text: inputs[13], }, }, { - input: "базовый русский", // cyrillic script isn't part of our default + input: inputs[14], // cyrillic script isn't part of our default eOut: TextAnalysis{ ScriptRanges: []ScriptRange{ ScriptRange{Low: 0, High: 13, Spaces: []int{}, TargetScript: "_unknown", RuneCount: 7}, @@ -495,7 +532,7 @@ func TestAnalyzeStringRaw(t *testing.T) { "_unknown": 14, "Common": 1, }, - Text: "базовый русский", + Text: inputs[14], }, }, }