From 02b9b4fa1ca3db30598a2e996a6c660702863fec Mon Sep 17 00:00:00 2001 From: Jo-Be-Co Date: Sun, 22 Mar 2026 19:18:55 +0100 Subject: [PATCH] QS updates: - fixed documentation - regexp-checks running with timeout and culture-invariant matching - changed check-building in ConditionalTagCollection to use NonNull parameters. So no warnings occure. - add tests for and escaped chars --- .../ConditionalTagCollection[TClass].cs | 103 +++++++++++------- .../TemplatesTests.cs | 33 +++++- docs/features/naming-templates.md | 24 ++-- 3 files changed, 103 insertions(+), 57 deletions(-) diff --git a/Source/FileManager/NamingTemplate/ConditionalTagCollection[TClass].cs b/Source/FileManager/NamingTemplate/ConditionalTagCollection[TClass].cs index 9756732e..b474358d 100644 --- a/Source/FileManager/NamingTemplate/ConditionalTagCollection[TClass].cs +++ b/Source/FileManager/NamingTemplate/ConditionalTagCollection[TClass].cs @@ -83,7 +83,7 @@ public partial class ConditionalTagCollection(bool caseSensitive = true) public ConditionalTag(ITemplateTag templateTag, RegexOptions options, ParameterExpression parameter, ValueProvider valueProvider, ConditionEvaluator conditionEvaluator) : base(templateTag, Expression.Constant(false)) { - // needs to match on at least one character which is not a space + // needs to match on at least one character, which is not a space NameMatcher = new Regex($""" (?x) # option x: ignore all unescaped whitespace in pattern and allow comments starting with # ^<(?!)? # tags start with a '<'. Condtionals allow an optional ! captured in to negate the condition @@ -103,8 +103,8 @@ public partial class ConditionalTagCollection(bool caseSensitive = true) public ConditionalTag(ITemplateTag templateTag, RegexOptions options, ParameterExpression parameter, ValueProvider valueProvider) : base(templateTag, Expression.Constant(false)) { - // needs to match on at least one character which is not a space - // though we will capture check enclosed in [] at the end of the tag the property itself migth also have a [] part for formatting purposes + // needs to match on at least one character, which is not a space. + // though we will capture the group named `check` enclosed in [] at the end of the tag, the property itself might also have a [] part for formatting purposes NameMatcher = new Regex($""" (?x) # option x: ignore all unescaped whitespace in pattern and allow comments starting with # ^<(?!)? # tags start with a '<'. Condtionals allow an optional ! captured in to negate the condition @@ -155,28 +155,33 @@ public partial class ConditionalTagCollection(bool caseSensitive = true) private static ConditionEvaluator GetPredicate(string? checkString) { if (checkString == null) - return DefaultPredicate; + return (v, _) => v switch + { + null => false, + IEnumerable e => e.Any(), + _ => !string.IsNullOrWhiteSpace(v.ToString()) + }; var match = CheckRegex().Match(checkString); var valStr = match.Groups["val"].Value; - var ival = -1; - var isNumop = match.Groups["numop"].Success && int.TryParse(valStr, out ival); + var iVal = -1; + var isNumericalOperator = match.Groups["num_op"].Success && int.TryParse(valStr, out iVal); - var checkItem = match.Groups["op"].ValueSpan switch + Func checkItem = match.Groups["op"].ValueSpan switch { "=" or "" => (v, culture) => VComparedToStr(v, culture, valStr) == 0, "!=" or "!" => (v, culture) => VComparedToStr(v, culture, valStr) != 0, "~" => GetRegExpCheck(valStr), - "#=" => (v, _) => VAsInt(v) == ival, - "#!=" => (v, _) => VAsInt(v) != ival, - "#>=" or ">=" => (v, _) => VAsInt(v) >= ival, - "#>" or ">" => (v, _) => VAsInt(v) > ival, - "#<=" or "<=" => (v, _) => VAsInt(v) <= ival, - "#<" or "<" => (v, _) => VAsInt(v) < ival, - _ => DefaultPredicate, + "#=" => (v, _) => VAsInt(v) == iVal, + "#!=" => (v, _) => VAsInt(v) != iVal, + "#>=" or ">=" => (v, _) => VAsInt(v) >= iVal, + "#>" or ">" => (v, _) => VAsInt(v) > iVal, + "#<=" or "<=" => (v, _) => VAsInt(v) <= iVal, + "#<" or "<" => (v, _) => VAsInt(v) < iVal, + _ => (v, _) => !string.IsNullOrWhiteSpace(v.ToString()) }; - return isNumop + return isNumericalOperator ? (v, culture) => v switch { null => false, @@ -201,34 +206,54 @@ public partial class ConditionalTagCollection(bool caseSensitive = true) } /// - /// build a regular expression check which take the into account. + /// Build a regular expression check. Uses culture-invariant matching for thread-safety and consistency. + /// Applies a timeout to prevent regex patterns from causing excessive backtracking and blocking. /// - /// + /// The regex pattern to match /// check function to validate an object - private static ConditionEvaluator GetRegExpCheck(string valStr) + private static Func GetRegExpCheck(string valStr) { - return (v, culture) => + try { - var old = CultureInfo.CurrentCulture; - try + // Compile regex with timeout to prevent catastrophic backtracking + var regex = new Regex(valStr, + RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled, + TimeSpan.FromMilliseconds(100)); + + return (v, _) => { - CultureInfo.CurrentCulture = culture ?? CultureInfo.CurrentCulture; - return Regex.IsMatch(v?.ToString().Trim() ?? "", valStr, RegexOptions.IgnoreCase | RegexOptions.Compiled); - } - finally - { - CultureInfo.CurrentCulture = old; - } - }; + try + { + // CultureInfo parameter is intentionally ignored (discarded with _). + // RegexOptions.CultureInvariant ensures culture-independent matching for predictable behavior. + // This is preferred for template conditions because: + // 1. Thread-safety: Regex operations are isolated and don't depend on thread-local culture + // 2. Consistency: Template matches produce identical results regardless of system locale + // 3. Predictability: Rules don't unexpectedly change based on user's OS settings + // + // Culture-sensitive matching would be problematic in cases like: + // - Turkish locale: 'I' has different case folding (I ↔ ı vs. I ↔ i). Pattern "[i-z]" might match Turkish 'ı'. + // - German locale: ß might be treated as equivalent to 'ss' during case-insensitive matching. + // - Lithuanian locale: 'i' after 'ž' has an accent that affects sorting/matching. + // + // For naming templates, culture-invariant is the safer default. + return regex.IsMatch(v.ToString()?.Trim() ?? ""); + } + catch (RegexMatchTimeoutException) + { + // Return false if regex evaluation times out + return false; + } + }; + } + catch + { + // If regex compilation fails, return a predicate that always returns false + return (_, _) => false; + } } - // without any special check only the existance of the property is checked. Strings need to be non empty. - private static readonly ConditionEvaluator DefaultPredicate = (v, _) => v switch - { - null => false, - IEnumerable e => e.Any(), - _ => !string.IsNullOrWhiteSpace(v.ToString()) - }; + // without any special check, only the existence of the property is checked. Strings need to be non-empty. public bool StartsWithClosing(string templateString, [NotNullWhen(true)] out string? exactName, [NotNullWhen(true)] out IClosingPropertyTag? propertyTag) { @@ -256,11 +281,11 @@ public partial class ConditionalTagCollection(bool caseSensitive = true) [GeneratedRegex(""" (?x) # option x: ignore all unescaped whitespace in pattern and allow comments starting with # ^\s* # anchor at start of line trimming leading whitespace - (? # capture operator in and - (?\#=|\#!=|\#?>=|\#?>|\#?<=|\#?<) # - numerical operators start with a # and might be omitted if unique + (? # capture operator in and + (?\#=|\#!=|\#?>=|\#?>|\#?<=|\#?<) # - numerical operators start with a # and might be omitted if unique | ~|!=?|=? # - string comparison operators including ~ for regexp. No operator is like = ) \s* # ignore space between operator and value - (?(?(numop) # capture value in + (?(?(num_op) # capture value in \d+ # - numerical operators have to be followed by a number | .*? ) # - string for comparison. May be empty. Non-greedy capture resulting in no whitespace at the end )\s*$ # trimming up to the end diff --git a/Source/_Tests/LibationFileManager.Tests/TemplatesTests.cs b/Source/_Tests/LibationFileManager.Tests/TemplatesTests.cs index e867b433..84b1945b 100644 --- a/Source/_Tests/LibationFileManager.Tests/TemplatesTests.cs +++ b/Source/_Tests/LibationFileManager.Tests/TemplatesTests.cs @@ -54,7 +54,7 @@ namespace TemplatesTests Language = "English", Subtitle = "An Audible Original Drama", TitleWithSubtitle = "A Study in Scarlet: An Audible Original Drama", - Codec = "AAC-LC", + Codec = @"AAC[LC]\MP3", // special chars added FileVersion = null, // explicitly null LibationVersion = "", // explicitly empty string LengthInMinutes = 100, @@ -167,7 +167,7 @@ namespace TemplatesTests [DataRow("Kbps ", "128Kbps A STUDY IN SCARLET")] [DataRow("Kbps ", "128Kbps A Study In Scarlet")] [DataRow("Kbps ", "0128Kbps a study in scarlet")] - [DataRow(" Hz", "Aac-Lc 044100Hz")] + [DataRow(" Hz", "Aac[Lc]Mp3 044100Hz")] [DataRow(" ", "AAC A STU")] [DataRow("Kbps Hz", "0128Kbps 044100Hz")] public void FormatTags(string template, string expected) @@ -433,27 +433,36 @@ namespace TemplatesTests [TestMethod] [DataRow("empty-string<-has>", "")] + [DataRow("empty-string<-has>", "empty-string")] [DataRow("empty-string<-has>", "")] + [DataRow("empty-string<-has>", "empty-string")] [DataRow("empty-string<-has>", "empty-string")] [DataRow("empty-string<-has>", "empty-string")] [DataRow("empty-string<-has>", "empty-string")] [DataRow("null-string<-has>", "")] - [DataRow("null-string<-has>", "")] - [DataRow("null-string<-has>", "")] + [DataRow("null-string<-has>", "null-string")] + [DataRow("null-string<-has>", "")] + [DataRow("null-string<-has>", "")] + [DataRow("null-string<-has>", "null-string")] [DataRow("null-string<-has>", "")] - [DataRow("null-string<-has>", "")] + [DataRow("null-string<-has>", "")] [DataRow("null-int<-has>", "")] [DataRow("null-int<-has>", "")] [DataRow("null-int<-has>", "")] [DataRow("null-int<-has>", "")] + [DataRow("null-int<-has>", "null-int")] [DataRow("null-int<-has>", "")] [DataRow("unknown-tag<-has>", "")] [DataRow("unknown-tag<-has>", "")] + [DataRow("unknown-tag<-has>", "unknown-tag")] [DataRow("unknown-tag<-has>", "")] [DataRow("unknown-tag<-has>", "")] [DataRow("unknown-tag<-has>", "")] [DataRow("empty-list<-has>", "")] [DataRow("empty-list<-has>", "")] + [DataRow("empty-list<-has>", "empty-list")] + [DataRow("empty-list<-has>", "")] + [DataRow("empty-list<-has>", "empty-list")] [DataRow("empty-list<-has>", "")] [DataRow("empty-list<-has>", "")] [DataRow("empty-list<-has>", "empty-list")] @@ -484,11 +493,13 @@ namespace TemplatesTests [TestMethod] [DataRow("true<-has>", "true")] + [DataRow("false<-has>", "")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] + [DataRow("false<-has>", "")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] @@ -497,6 +508,8 @@ namespace TemplatesTests [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] + [DataRow(@"true<-has>", "true")] + [DataRow(@"true<-has>", "")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] @@ -510,9 +523,11 @@ namespace TemplatesTests [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] + [DataRow("false<-has>", "")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] + [DataRow("false<-has>", "")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "")] [DataRow("=1]->true<-has>", "true")] @@ -524,15 +539,21 @@ namespace TemplatesTests [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] + [DataRow("false<-has>", "")] + [DataRow("true<-has>", "true")] + [DataRow("false<-has>", "")] [DataRow("true<-has>", "true")] [DataRow("=3]->true<-has>", "")] - [DataRow("true<-has>", "")] + [DataRow(@"true<-has>", "")] [DataRow("true<-has>", "")] [DataRow("true<-has>", "")] [DataRow("true<-has>", "")] [DataRow("true<-has>", "")] [DataRow("true<-has>", "")] [DataRow("true<-has>", "")] + [DataRow("false<-has>", "false")] + [DataRow("true<-has>", "true")] + [DataRow("false<-has>", "")] [DataRow("true<-has>", "true")] [DataRow("true<-has>", "true")] public void HasValue_test(string template, string expected) diff --git a/docs/features/naming-templates.md b/docs/features/naming-templates.md index 5542dbd1..448d09b9 100644 --- a/docs/features/naming-templates.md +++ b/docs/features/naming-templates.md @@ -123,10 +123,10 @@ Text formatting can change length and case of the text. Use <#>, <#> or multiple entries.

Default is ", " | `` | Tag1_Tag2_Tag3_Tag4_Tag5 | +| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |--------------------------------------------- | ---------------------------------------------| +| separator() | Specify the text used to join
multiple entries.

Default is ", " | `` | Tag1_Tag2_Tag3_Tag4_Tag5 | | format(\{S\}) | Formats the entries by placing their values into the specified template.
Use {S:[Text_Formatter](#text-formatters)} to place the entry and optionally apply a format. | ``separator(;)]>` | Tag=tag1;Tag=tag2;Tag=tag3;Tag=tag4;Tag=tag5 | -| sort(S) | Sorts the elements by their value.

*Sorting direction:*
uppercase = ascending
lowercase = descending

Default is unsorted | ``separator(; )]>` | Tag5;Tag4;Tag4;Tag2;Tag1 | +| sort(S) | Sorts the elements by their value.

*Sorting direction:*
uppercase = ascending
lowercase = descending

Default is unsorted | ``separator(;)]>` | Tag5;Tag4;Tag3;Tag2;Tag1 | | max(#) | Only use the first # of entries | `` | Tag1 | | slice(#) | Only use the nth entry of the list | `` | Tag2 | | slice(#..) | Only use entries of the list starting from # | `` | Tag2, Tag3, Tag4, Tag5 | @@ -136,15 +136,15 @@ Text formatting can change length and case of the text. Use <#>, <#> or the series part tags.
\{N:[Text_Formatter](#text-formatters)\} = Series Name
\{#:[Number_Formatter](#number-formatters)\} = Number order in series
\{ID:[Text_Formatter](#text-formatters)\} = Audible Series ID

Formatter parts are optional and introduced by the colon. If specified the string will be used to format the part using the correspoing formatter.

Default is \{N\} | ``
``
``
`` | Sherlock Holmes
sherlock holmes
Sherlock Holmes, 1-6, B08376S3R2
SHERLOCK H, B08376S3R2, 01.0-06.0 | +| Formatter | Description | Example Usage | Example Result | +| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------- | +| \{N \| # \| ID\} | Formats the series using
the series part tags.
\{N:[Text_Formatter](#text-formatters)\} = Series Name
\{#:[Number_Formatter](#number-formatters)\} = Number order in series
\{ID:[Text_Formatter](#text-formatters)\} = Audible Series ID

Formatter parts are optional and introduced by the colon. If specified the string will be used to format the part using the corresponding formatter.

Default is \{N\} | ``
``
``
`` | Sherlock Holmes
sherlock holmes
Sherlock Holmes, 1-6, B08376S3R2
SHERLOCK H, B08376S3R2, 01.0-06.0 | ### Series List Formatters | Formatter | Description | Example Usage | Example Result | | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------- | -| separator() | Speficy the text used to join
multiple series names.

Default is ", " | `` | Sherlock Holmes; Some Other Series | +| separator() | Specify the text used to join
multiple series names.

Default is ", " | `` | Sherlock Holmes; Some Other Series | | format(\{N \| # \| ID\}) | Formats the series properties
using the name series tags.
See [Series Formatter Usage](#series-formatters) above. | ``separator(; )]>`
`` | Sherlock Holmes, 1-6; Book Collection, 1
B08376S3R2-Sherlock Holmes, 01.0-06.0, B000000000-Book Collection, 01.0 | | sort(N \| # \| ID) | Sorts the series by name, number or ID.

These terms define the primary, secondary, tertiary, … sorting order.
You may combine multiple terms in sequence to specify multi‑level sorting.

*Sorting direction:*
uppercase = ascending
lowercase = descending

Default is unsorted | ``separator(; )]>` | Book Collection, 1; Sherlock Holmes, 1-6 | | max(#) | Only use the first # of series | `` | Sherlock Holmes | @@ -160,7 +160,7 @@ Text formatting can change length and case of the text. Use <#>, <#> or multiple people's names.

Default is ", " | `` | Arthur Conan Doyle; Stephen Fry | +| separator() | Specify the text used to join
multiple people's names.

Default is ", " | `` | Arthur Conan Doyle; Stephen Fry | | format(\{T \| F \| M \| L \| S \| ID\}) | Formats the human name using
the name part tags.
See [Name Formatter Usage](#name-formatters) above. | ``separator(; )]>`
``_{ID}_) separator(; )]>` | DOYLE, Arthur; FRY, Stephen
Doyle, A. \_B000AQ43GQ\_;
Fry, S. \_B000APAGVS\_ | | sort(T \| F \| M \| L \| S \| ID) | Sorts the names by title,
first, middle, or last name,
suffix or Audible Contributor ID

These terms define the primary, secondary, tertiary, … sorting order.
You may combine multiple terms in sequence to specify multi‑level sorting.

*Sorting direction:*
uppercase = ascending
lowercase = descending

Default is unsorted | ``
``
| Stephen Fry, Arthur Conan Doyle
Stephen King, Stephen Fry
John P. Smith \_B000TTTBBB\_, John P. Smith \_B000TTTCCC\_, John S. Smith \_B000HHHVVV\_ | | max(#) | Only use the first # of names

Default is all names | `` | Arthur Conan Doyle | @@ -168,9 +168,9 @@ Text formatting can change length and case of the text. Use <#>, <#> or {D:[Number_Formatter](#number-formatter) = Number of full days
{H:[Number_Formatter](#number-formatter) = Number of full (remaining) hours
{M:[Number_Formatter](#number-formatter) = Number of (remaining) minutes

Default is {M} | ``
``
`` | 03000minutes
02d 120m
2-2-0 | +| Formatter | Description | Example Usage | Example Result | +| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------- | +| {M \| H \| D} | Format the minutes value in terms of minutes, hours and days.
{D:[Number_Formatter](#number-formatters) = Number of full days
{H:[Number_Formatter](#number-formatters) = Number of full (remaining) hours
{M:[Number_Formatter](#number-formatters) = Number of (remaining) minutes

Default is {M} | ``
``
`` | 03000minutes
02d 120m
2-2-0 | ### Number Formatters @@ -183,7 +183,7 @@ For more custom formatters and examples, [see this guide from Microsoft](https:/ ### Date Formatters -Form more standard formatters, [see this guide from Microsoft](https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-date-and-time-format-strings). +For more standard formatters, [see this guide from Microsoft](https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-date-and-time-format-strings). #### Standard DateTime Formatters