using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text.RegularExpressions; namespace FileManager.NamingTemplate; public static partial class CompareCondition { private static readonly ConcurrentDictionary RegexCache = new(); private static readonly TimeSpan RegexpCheckTimeout = TimeSpan.FromMilliseconds(100); public static (object, ConditionEvaluator) GetPredicateAndValue(string exactName, string? checkString) { if (checkString is null) return (string.Empty, (v1, _, _) => v1 switch { null => false, IEnumerable e => e.Any(), _ => !string.IsNullOrWhiteSpace(v1.ToString()) }); var match = GetMatch(exactName, checkString); var valStr = match.UnescapeValue("val"); var (evaluator, opGroup) = GetPredicate(exactName, match); return (opGroup.Name switch { "num_op" => int.Parse(valStr), // at this stage should have matched digits in CheckRegex "list_op" => new[] { valStr }, _ => valStr }, evaluator); } public static ConditionEvaluator GetPredicate(string exactName, string? checkString) { return GetPredicate(exactName, GetMatch(exactName, checkString)).Item1; } private static Match GetMatch(string exactName, string? checkString) { return CheckRegex().TryMatch(checkString, out var match) ? match : throw new ArgumentException($"Invalid check or operator format in conditional tag '{exactName}'. Check string: '{checkString}'"); } private static (ConditionEvaluator, Group) GetPredicate(string exactName, Match match) { var group = match.Groups["num_op"]; if (group.Success) { return (GetPredicateForNumOp(exactName, group.ValueSpan), group); } group = match.Groups["list_op"]; if (group.Success) { return (GetPredicateForListOp(exactName, group.ValueSpan), group); } group = match.Groups["op"]; return (GetPredicateForStringOp(exactName, group.ValueSpan), group); } private static ConditionEvaluator GetPredicateForNumOp(string _, ReadOnlySpan opString) { Func checkInt = opString switch { "#=" => (v1, v2, _) => v1 == v2, "#!=" or "≠" or "≠" => (v1, v2, _) => v1 != v2, "#>=" or ">=" or "≥" => (v1, v2, _) => v1 >= v2, "#>" or ">" => (v1, v2, _) => v1 > v2, "#<=" or "<=" or "≤" => (v1, v2, _) => v1 <= v2, "#<" or "<" => (v1, v2, _) => v1 < v2, _ => throw new ArgumentOutOfRangeException() // this should never happen because the regex only allows these values }; return (v1, v2, culture) => ToIntObject(v1) is { } i1 && ToIntObject(v2) is { } i2 && checkInt(i1, i2, culture); } private static int? ToIntObject(object? value) { return value switch { null => null, IEnumerable e => e.Count(), TimeSpan ts => (int)ts.TotalMinutes, DateTime dt => (int)dt.ToOADate(), string s => s.Length, int i => i, _ => null // language and such shall never match }; } private static ConditionEvaluator GetPredicateForListOp(string _, ReadOnlySpan opString) { var checklist = opString switch { "∋" or ">>" or ":contains:" => Swap>(IsSubset), "⊇" or ">=>" or ":superset:" => Swap>(IsSubset), "⊃" or ">->" or ":proper_superset:" => Swap>(IsProperSubset), "∌" or "!>>" or "∌" or ":not_contains:" => Invert(Swap>(IsSubset)), "∈" or "<<" or ":in:" => IsSubset, "⊆" or "<=<" or ":subset:" => IsSubset, "⊂" or "<-<" or ":proper_subset:" => IsProperSubset, "∉" or "!<<" or "∉" or ":not_in:" => Invert>(IsSubset), "⋂" or "&&" or ":overlaps:" => Overlaps, "⋂̸" or "&&!" or "⋂!" or ":disjoint:" => Invert>(Overlaps), "≡" or "==" or ":equals:" => (e1, e2, culture) => { var cmp = GetStringComparer(culture); return e1.OrderBy(e => e, cmp).SequenceEqual(e2.OrderBy(e => e, cmp), cmp); }, _ => throw new ArgumentOutOfRangeException() // this should never happen because the regex only allows these values }; return (v1, v2, culture) => v1 is not null && v2 is not null && checklist(ToEnumerable(v1), ToEnumerable(v2), culture); } private static IEnumerable ToEnumerable(object value) { return value switch { IEnumerable e => e, IEnumerable e => e.Select(o => o.ToString() ?? ""), string s => [s], _ => [value.ToString() ?? ""] }; } private static ConditionEvaluator GetPredicateForStringOp(string exactName, ReadOnlySpan opString) { var checkItem = opString switch { "=" or "" => GetStringEqCheck(), "!=" or "!" => Invert(GetStringEqCheck()), "=~" or "~" => GetRegExpCheck(exactName), "!~" => Invert(GetRegExpCheck(exactName)), _ => throw new ArgumentOutOfRangeException() // this should never happen because the regex only allows these values }; return (v1, v2, culture) => (v1, v2) switch { (null, _) => false, (_, null) => false, (IEnumerable e1, _) => e1.Any(l => checkItem(l, v2, culture)), (_, IEnumerable e2) => e2.Any(r => checkItem(v1, r, culture)), _ => checkItem(v1, v2, culture) }; } private static bool Overlaps(IEnumerable e1, IEnumerable e2, CultureInfo? culture) { var comparer = GetStringComparer(culture); return e1.Any(l => e2.Contains(l, comparer)); } private static bool IsSubset(IEnumerable e1, IEnumerable e2, CultureInfo? culture) { var comparer = GetStringComparer(culture); return e1.All(l => e2.Contains(l, comparer)); } private static bool IsProperSubset(IEnumerable e1, IEnumerable e2, CultureInfo? culture) { var comparer = GetStringComparer(culture); // ReSharper disable PossibleMultipleEnumeration return e1.All(l => e2.Contains(l, comparer)) && e2.Any(r => !e1.Contains(r, comparer)); // ReSharper restore PossibleMultipleEnumeration } private static Func Invert(Func condition) => (v1, v2, culture) => !condition(v1, v2, culture); private static Func Swap(Func condition) => (v1, v2, culture) => condition(v2, v1, culture); private static Func GetStringEqCheck() { return (v1, v2, culture) => GetStringComparer(culture).Equals(ValueToString(v1, culture), ValueToString(v2, culture)); } private static string? ValueToString(object value, CultureInfo? culture) { return value switch { TimeSpan ts => ts.TotalMinutes.ToString("0", culture), IFormattable f => f.ToString(null, culture), _ => value.ToString() }; } private static StringComparer GetStringComparer(CultureInfo? culture) { return StringComparer.Create(culture ?? CultureInfo.CurrentCulture, ignoreCase: true); } /// /// Build a regular expression check. Uses culture-invariant matching for thread-safety and consistency. /// Applies a timeout to prevent regex patterns from causing excessive backtracking and blocking. /// Throws InvalidOperationException if the regex pattern is invalid or evaluation times out. /// /// The full tag string for context in error messages /// check function to validate an object /// Thrown when regex parsing fails or when regex matching times out, indicating faulty user input private static Func GetRegExpCheck(string exactName) { return (v1, v2, _) => { var pattern = v2.ToString() ?? ""; var regex = RegexCache.GetOrAdd(pattern, p => { try { // Compile regex with timeout to prevent catastrophic backtracking return new Regex(p, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled, RegexpCheckTimeout); } catch (ArgumentException ex) { // If regex compilation fails, throw as faulty user input var errorMessage = BuildErrorMessage(exactName, p, "Invalid regular expression pattern. Correct the pattern and escaping or remove that condition"); throw new InvalidOperationException(errorMessage, ex); } }); try { // CultureInfo parameter is intentionally ignored (discarded with _). // RegexOptions.CultureInvariant ensures culture-independent matching for predictable behavior. // This is preferred for template conditions because: // 1. Thread-safety: Regex operations are isolated and don't depend on thread-local culture // 2. Consistency: Template matches produce identical results regardless of system locale // 3. Predictability: Rules don't unexpectedly change based on user's OS settings // // Culture-sensitive matching would be problematic in cases like: // - Turkish locale: 'I' has different case folding (I ↔ ı vs. I ↔ i). Pattern "[i-z]" might match Turkish 'ı'. // - German locale: ß might be treated as equivalent to 'ss' during case-insensitive matching. // - Lithuanian locale: 'i' after 'ž' has an accent that affects sorting/matching. // // For naming templates, culture-invariant is the safer default. return regex.IsMatch(v1.ToString() ?? ""); } catch (RegexMatchTimeoutException ex) { // Throw if regex evaluation times out, indicating faulty user input (e.g., catastrophic backtracking) var errorMessage = BuildErrorMessage(exactName, pattern, "Regular expression pattern evaluation timed out. Use a simpler pattern or remove that condition"); throw new InvalidOperationException(errorMessage, ex); } }; } private static string BuildErrorMessage(string exactName, string pattern, string errorType) { const int maxMessageLen = 200; // Build a full message with the pattern var fullMsg = $"{errorType}: {exactName} -> Pattern: {pattern}"; // Return a full message if it's within the character limit if (fullMsg.Length <= maxMessageLen) return fullMsg; // Keep the error type and as much pattern as possible var maxPatternLen = maxMessageLen - errorType.Length - 23; // Account for ". Pattern starts with: " var trimmedPattern = pattern.Length > maxPatternLen ? pattern[..(maxPatternLen - 3)] + "..." : pattern; return $"{errorType}. Pattern starts with: {trimmedPattern}"; } [GeneratedRegex(""" (?x) # option x: ignore all unescaped whitespace in pattern and allow comments starting with # ^(?>(?(? # anchor at start of line. Capture operator in , and ≡ | == | :equals: # - list operators: ≡ for checking if two lists contain the same items regardless of order | ∌ | !>> | ∌ | :not_contains: # - list operators: ∌ for checking if the first list does not contain any item of the second list | ∋ | >> | :contains: # - list operators: ∋ for checking if the first list contains all items of the second list | ∉ | !<< | ∉ | :not_in: # - list operators: ∉ for checking if the first list is not contained in the second list | ∈ | << | :in: # - list operators: ∈ for checking if the first list is contained in the second list | ⋂̸ | &&! | ⋂! | :disjoint: # - list operators: ⋂̸ for checking if the two lists are disjoint | ⋂ | && | :overlaps: # - list operators: ⋂ for checking if the two lists overlap in at least one item | ⊆ | <=< | :subset: # - list operators: ⊆ for checking if the first list is a subset of the second list (may be equal) | ⊇ | >=> | :superset: # - list operators: ⊇ for checking if the first list is a superset of the second list (may be equal) | ⊂ | <-< | :proper_subset: # - list operators: ⊂ for checking if the first list is a proper subset of the second list (not equal) | ⊃ | >-> | :proper_superset: # - list operators: ⊃ for checking if the first list is a proper superset of the second list (not equal) ) | (? \#!?= | ≠ | ≠ # - numerical operators: #= #!= ≠ | \#[<>]=? # - numerical operators: #<= #>= #< #> | [<>]=? | ≤ | ≥ # - numerical operators: <= >= < > ≤ ≥ ) | [=!]?~ | !=? | =? # - string comparison operators including ~ for regexp, = and !=. No operator is like = )) \s*? # ignore space between operator and value (?(?(num_op) # capture value in (?:\d)+ # - numerical operators have to be followed by a number | (?:\\.|[^\\])+ ) # - string for comparison. May be empty. Capturing also all whitespace up to the end as this must have been escaped. )?$ # match to the end """)] private static partial Regex CheckRegex(); }