diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index 469d9f6d3d2cf..3d7bcdc5f1cfc 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -2,7 +2,6 @@ System.Text.RegularExpressions $(DefineConstants);FEATURE_COMPILED - $(DefineConstants);FEATURE_COMPILEAPIS true $(NetCoreAppCurrent)-Debug;$(NetCoreAppCurrent)-Release enable diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunnerFactory.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunnerFactory.cs index 257300e6e3f52..e39d80f3a68e1 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunnerFactory.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunnerFactory.cs @@ -2,24 +2,32 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -// This is the only concrete implementation of RegexRunnerFactory, -// but we cannot combine them due to RegexRunnerFactory having shipped public. +using System.Reflection.Emit; namespace System.Text.RegularExpressions { internal sealed class CompiledRegexRunnerFactory : RegexRunnerFactory { - private readonly Action _go; - private readonly Func _findFirstChar; - private readonly Action _initTrackCount; + private readonly DynamicMethod _goMethod; + private readonly DynamicMethod _findFirstCharMethod; + private readonly DynamicMethod _initTrackCountMethod; - public CompiledRegexRunnerFactory(Action go, Func findFirstChar, Action initTrackCount) + // Delegates are lazily created to avoid forcing JIT'ing until the regex is actually executed. + private Action? _go; + private Func? _findFirstChar; + private Action? _initTrackCount; + + public CompiledRegexRunnerFactory(DynamicMethod goMethod, DynamicMethod findFirstCharMethod, DynamicMethod initTrackCountMethod) { - _go = go; - _findFirstChar = findFirstChar; - _initTrackCount = initTrackCount; + _goMethod = goMethod; + _findFirstCharMethod = findFirstCharMethod; + _initTrackCountMethod = initTrackCountMethod; } - protected internal override RegexRunner CreateInstance() => new CompiledRegexRunner(_go, _findFirstChar, _initTrackCount); + protected internal override RegexRunner CreateInstance() => + new CompiledRegexRunner( + _go ??= (Action)_goMethod.CreateDelegate(typeof(Action)), + _findFirstChar ??= (Func)_findFirstCharMethod.CreateDelegate(typeof(Func)), + _initTrackCount ??= (Action)_initTrackCountMethod.CreateDelegate(typeof(Action))); } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs index a29209beced54..a16fd0ec90a17 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs @@ -26,6 +26,7 @@ // using System.Collections; +using System.Diagnostics.CodeAnalysis; using System.Globalization; namespace System.Text.RegularExpressions @@ -331,6 +332,7 @@ internal void Tidy(int textpos) } #if DEBUG + [ExcludeFromCodeCoverage] internal bool Debug => _regex != null && _regex.Debug; internal virtual void Dump() @@ -372,6 +374,7 @@ internal MatchSparse(Regex regex, Hashtable caps, int capcount, string text, int public override GroupCollection Groups => _groupcoll ??= new GroupCollection(this, _caps); #if DEBUG + [ExcludeFromCodeCoverage] internal override void Dump() { if (_caps != null) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index 13a741dfddbd1..cc505492c164a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -207,7 +207,6 @@ private static RegexRunnerFactory Compile(RegexCode code, RegexOptions options, } #endif -#if FEATURE_COMPILEAPIS public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname) { throw new PlatformNotSupportedException(SR.PlatformNotSupported_CompileToAssembly); @@ -222,7 +221,6 @@ public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, Assembly { throw new PlatformNotSupportedException(SR.PlatformNotSupported_CompileToAssembly); } -#endif // FEATURE_COMPILEAPIS /// /// Escapes a minimal set of metacharacters (\, *, +, ?, |, {, [, (, ), ^, $, ., #, and @@ -461,6 +459,7 @@ protected void InitializeReferences() /// /// True if the regex has debugging enabled /// + [ExcludeFromCodeCoverage] internal bool Debug => (roptions & RegexOptions.Debug) != 0; #endif } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs index c142f5d5a6274..12b7c24cf8210 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs @@ -12,6 +12,7 @@ // need to be examined. using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Globalization; namespace System.Text.RegularExpressions @@ -338,8 +339,10 @@ public int Scan(string text, int index, int beglimit, int endlimit) /// /// Used when dumping for debugging. /// + [ExcludeFromCodeCoverage] public override string ToString() => Pattern; + [ExcludeFromCodeCoverage] public string Dump(string indent) { StringBuilder sb = new StringBuilder(); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 2c66ad68366f8..d85ad3cb1fb21 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.Threading; @@ -733,7 +734,7 @@ public static string ConvertOldStringsToClass(string set, string category) /// public static char SingletonChar(string set) { - Debug.Assert(IsSingletonInverse(set), "Tried to get the singleton char out of a non singleton character class"); + Debug.Assert(IsSingleton(set) || IsSingletonInverse(set), "Tried to get the singleton char out of a non singleton character class"); return set[SetStartIndex]; } @@ -748,6 +749,20 @@ public static bool IsEmpty(string charClass) => !IsNegated(charClass) && !IsSubtraction(charClass); + /// true if the set contains a single character only + /// + /// This will happen not only from character classes manually written to contain a single character, + /// but much more frequently by the implementation/parser itself, e.g. when looking for \n as part of + /// finding the end of a line, when processing an alternation like "hello|hithere" where the first + /// character of both options is the same, etc. + /// + public static bool IsSingleton(string set) => + set[CategoryLengthIndex] == 0 && + set[SetLengthIndex] == 2 && + !IsNegated(set) && + !IsSubtraction(set) && + (set[SetStartIndex] == LastChar || set[SetStartIndex] + 1 == set[SetStartIndex + 1]); + public static bool IsSingletonInverse(string set) => set[CategoryLengthIndex] == 0 && set[SetLengthIndex] == 2 && @@ -755,6 +770,117 @@ public static bool IsSingletonInverse(string set) => !IsSubtraction(set) && (set[SetStartIndex] == LastChar || set[SetStartIndex] + 1 == set[SetStartIndex + 1]); + /// Gets all of the characters in the specified set, storing them into the provided span. + /// The character class. + /// The span into which the chars should be stored. + /// + /// The number of stored chars. If they won't all fit, 0 is returned. + /// + /// + /// Only considers character classes that only contain sets (no categories), no negation, + /// and no subtraction... just simple sets containing starting/ending pairs. + /// + public static int GetSetChars(string set, Span chars) + { + if (!CanEasilyEnumerateSetContents(set)) + { + return 0; + } + + int setLength = set[SetLengthIndex]; + int count = 0; + for (int i = SetStartIndex; i < SetStartIndex + setLength; i += 2) + { + int curSetEnd = set[i + 1]; + for (int c = set[i]; c < curSetEnd; c++) + { + if (count >= chars.Length) + { + return 0; + } + + chars[count++] = (char)c; + } + } + + return count; + } + + /// + /// Determines whether two sets may overlap. + /// + /// false if the two sets do not overlap; true if they may. + /// + /// If the method returns false, the caller can be sure the sets do not overlap. + /// If the method returns true, it's still possible the sets don't overlap. + /// + public static bool MayOverlap(string set1, string set2) + { + // If either set is all-inclusive, there's overlap. + if (set1 == AnyClass || set2 == AnyClass) + { + return true; + } + + // If the sets are identical other than one being the negation of the other, they don't overlap. + if (IsNegated(set1) != IsNegated(set2) && set1.AsSpan(1).SequenceEqual(set2.AsSpan(1))) + { + return false; + } + + // Special-case some known, common classes that don't overlap. + if (KnownDistinctSets(set1, set2) || + KnownDistinctSets(set2, set1)) + { + return false; + } + + // If set2 can be easily enumerated (e.g. no unicode categories), then enumerate it and + // check if any of its members are in set1. Otherwise, the same for set1. + if (CanEasilyEnumerateSetContents(set2)) + { + return MayOverlapByEnumeration(set1, set2); + } + else if (CanEasilyEnumerateSetContents(set1)) + { + return MayOverlapByEnumeration(set2, set1); + } + + // Assume that everything else might overlap. In the future if it proved impactful, we could be more accurate here, + // at the exense of more computation time. + return true; + + static bool KnownDistinctSets(string set1, string set2) => + (set1 == SpaceClass || set1 == ECMASpaceClass) && + (set2 == DigitClass || set2 == WordClass || set2 == ECMADigitClass || set2 == ECMAWordClass); + + static bool MayOverlapByEnumeration(string set1, string set2) + { + for (int i = SetStartIndex; i < SetStartIndex + set2[SetLengthIndex]; i += 2) + { + int curSetEnd = set2[i + 1]; + for (int c = set2[i]; c < curSetEnd; c++) + { + if (CharInClass((char)c, set1)) + { + return true; + } + } + } + + return false; + } + } + + /// Gets whether we can iterate through the set list pairs in order to completely enumerate the set's contents. + internal static bool CanEasilyEnumerateSetContents(string set) => + set.Length > SetStartIndex && + set[SetLengthIndex] > 0 && + set[SetLengthIndex] % 2 == 0 && + set[CategoryLengthIndex] == 0 && + !IsNegated(set) && + !IsSubtraction(set); + internal static bool IsSubtraction(string charClass) => charClass.Length > SetStartIndex + charClass[CategoryLengthIndex] + @@ -1249,6 +1375,7 @@ private static ReadOnlySpan SetFromProperty(string capname, bool invert, s /// /// Produces a human-readable description for a set string. /// + [ExcludeFromCodeCoverage] public static string SetDescription(string set) { int setLength = set[SetLengthIndex]; @@ -1347,6 +1474,7 @@ public static string SetDescription(string set) /// /// Produces a human-readable description for a single character. /// + [ExcludeFromCodeCoverage] public static string CharDescription(char ch) { if (ch == '\\') @@ -1382,6 +1510,7 @@ public static string CharDescription(char ch) return sb.ToString(); } + [ExcludeFromCodeCoverage] private static string CategoryDescription(char ch) { if (ch == SpaceConst) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs index 33ef2f0772815..8c3da31d7c743 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs @@ -18,6 +18,7 @@ using System.Collections; using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; namespace System.Text.RegularExpressions { @@ -75,12 +76,18 @@ internal sealed class RegexCode public const int Testref = 37; // backtrack if ref undefined public const int Goto = 38; // jump just go - public const int Prune = 39; // prune it baby public const int Stop = 40; // done! public const int ECMABoundary = 41; // \b public const int NonECMABoundary = 42; // \B + // Manufactured primitive operations, derived from the tree that comes from the parser. + // These exist to reduce backtracking (both actually performing it and spitting code for it). + + public const int Oneloopatomic = 43; // lef,back char,min,max (?> a {,n} ) + public const int Notoneloopatomic = 44; // lef,back set,min,max (?> . {,n} ) + public const int Setloopatomic = 45; // lef,back set,min,max (?> [\d]{,n} ) + // Modifiers for alternate modes public const int Mask = 63; // Mask to get unmodified ordinary operator public const int Rtl = 64; // bit to indicate that we're reverse scanning. @@ -88,6 +95,7 @@ internal sealed class RegexCode public const int Back2 = 256; // bit to indicate that we're backtracking on a second branch. public const int Ci = 512; // bit to indicate that we're case-insensitive. + public readonly RegexTree Tree; // the optimized parse tree public readonly int[] Codes; // the code public readonly string[] Strings; // the string/set table public readonly int[]?[] StringsAsciiLookup; // the ASCII lookup table optimization for the sets in Strings @@ -100,17 +108,15 @@ internal sealed class RegexCode public readonly int Anchors; // the set of zero-length start anchors (RegexFCD.Bol, etc) public readonly bool RightToLeft; // true if right to left - public RegexCode(int[] codes, List stringlist, int trackcount, - Hashtable? caps, int capsize, - RegexBoyerMoore? bmPrefix, RegexPrefix? fcPrefix, - int anchors, bool rightToLeft) + public RegexCode(RegexTree tree, int[] codes, string[] strings, int trackcount, + Hashtable? caps, int capsize, + RegexBoyerMoore? bmPrefix, RegexPrefix? fcPrefix, + int anchors, bool rightToLeft) { - Debug.Assert(codes != null, "codes cannot be null."); - Debug.Assert(stringlist != null, "stringlist cannot be null."); - + Tree = tree; Codes = codes; - Strings = stringlist.ToArray(); - StringsAsciiLookup = new int[Strings.Length][]; + Strings = strings; + StringsAsciiLookup = new int[strings.Length][]; TrackCount = trackcount; Caps = caps; CapSize = capsize; @@ -190,7 +196,6 @@ public static int OpcodeSize(int opcode) case Lazybranch: case Branchmark: case Lazybranchmark: - case Prune: case Set: return 2; @@ -200,12 +205,15 @@ public static int OpcodeSize(int opcode) case Onerep: case Notonerep: case Oneloop: + case Oneloopatomic: case Notoneloop: + case Notoneloopatomic: case Onelazy: case Notonelazy: case Setlazy: case Setrep: case Setloop: + case Setloopatomic: return 3; default: @@ -214,36 +222,68 @@ public static int OpcodeSize(int opcode) } #if DEBUG - private static readonly string[] s_codeStr = new string[] - { - "Onerep", "Notonerep", "Setrep", - "Oneloop", "Notoneloop", "Setloop", - "Onelazy", "Notonelazy", "Setlazy", - "One", "Notone", "Set", - "Multi", "Ref", - "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End", - "Nothing", - "Lazybranch", "Branchmark", "Lazybranchmark", - "Nullcount", "Setcount", "Branchcount", "Lazybranchcount", - "Nullmark", "Setmark", "Capturemark", "Getmark", - "Setjump", "Backjump", "Forejump", "Testref", "Goto", - "Prune", "Stop", -#if ECMA - "ECMABoundary", "NonECMABoundary", -#endif - }; - + [ExcludeFromCodeCoverage] private static string OperatorDescription(int Opcode) { - bool isCi = ((Opcode & Ci) != 0); - bool isRtl = ((Opcode & Rtl) != 0); - bool isBack = ((Opcode & Back) != 0); - bool isBack2 = ((Opcode & Back2) != 0); - - return s_codeStr[Opcode & Mask] + - (isCi ? "-Ci" : "") + (isRtl ? "-Rtl" : "") + (isBack ? "-Back" : "") + (isBack2 ? "-Back2" : ""); + string codeStr = (Opcode & Mask) switch + { + Onerep => nameof(Onerep), + Notonerep => nameof(Notonerep), + Setrep => nameof(Setrep), + Oneloop => nameof(Oneloop), + Notoneloop => nameof(Notoneloop), + Setloop => nameof(Setloop), + Onelazy => nameof(Onelazy), + Notonelazy => nameof(Notonelazy), + Setlazy => nameof(Setlazy), + One => nameof(One), + Notone => nameof(Notone), + Set => nameof(Set), + Multi => nameof(Multi), + Ref => nameof(Ref), + Bol => nameof(Bol), + Eol => nameof(Eol), + Boundary => nameof(Boundary), + Nonboundary => nameof(Nonboundary), + Beginning => nameof(Beginning), + Start => nameof(Start), + EndZ => nameof(EndZ), + End => nameof(End), + Nothing => nameof(Nothing), + Lazybranch => nameof(Lazybranch), + Branchmark => nameof(Branchmark), + Lazybranchmark => nameof(Lazybranchmark), + Nullcount => nameof(Nullcount), + Setcount => nameof(Setcount), + Branchcount => nameof(Branchcount), + Lazybranchcount => nameof(Lazybranchcount), + Nullmark => nameof(Nullmark), + Setmark => nameof(Setmark), + Capturemark => nameof(Capturemark), + Getmark => nameof(Getmark), + Setjump => nameof(Setjump), + Backjump => nameof(Backjump), + Forejump => nameof(Forejump), + Testref => nameof(Testref), + Goto => nameof(Goto), + Stop => nameof(Stop), + ECMABoundary => nameof(ECMABoundary), + NonECMABoundary => nameof(NonECMABoundary), + Oneloopatomic => nameof(Oneloopatomic), + Notoneloopatomic => nameof(Notoneloopatomic), + Setloopatomic => nameof(Setloopatomic), + _ => "(unknown)" + }; + + return + codeStr + + ((Opcode & Ci) != 0 ? "-Ci" : "") + + ((Opcode & Rtl) != 0 ? "-Rtl" : "") + + ((Opcode & Back) != 0 ? "-Back" : "") + + ((Opcode & Back2) != 0 ? "-Back2" : ""); } + [ExcludeFromCodeCoverage] public string OpcodeDescription(int offset) { StringBuilder sb = new StringBuilder(); @@ -263,7 +303,9 @@ public string OpcodeDescription(int offset) case Onerep: case Notonerep: case Oneloop: + case Oneloopatomic: case Notoneloop: + case Notoneloopatomic: case Onelazy: case Notonelazy: sb.Append("Ch = "); @@ -273,6 +315,7 @@ public string OpcodeDescription(int offset) case Set: case Setrep: case Setloop: + case Setloopatomic: case Setlazy: sb.Append("Set = "); sb.Append(RegexCharClass.SetDescription(Strings[Codes[offset + 1]])); @@ -321,11 +364,14 @@ public string OpcodeDescription(int offset) case Onerep: case Notonerep: case Oneloop: + case Oneloopatomic: case Notoneloop: + case Notoneloopatomic: case Onelazy: case Notonelazy: case Setrep: case Setloop: + case Setloopatomic: case Setlazy: sb.Append(", Rep = "); if (Codes[offset + 2] == int.MaxValue) @@ -349,6 +395,7 @@ public string OpcodeDescription(int offset) return sb.ToString(); } + [ExcludeFromCodeCoverage] public void Dump() { int i; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index e97c25a37a8db..222c68f6c57d6 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -2,7 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.Reflection; using System.Reflection.Emit; @@ -26,7 +28,8 @@ internal abstract class RegexCompiler private static readonly FieldInfo s_runstackField = RegexRunnerField("runstack"); private static readonly FieldInfo s_runtrackcountField = RegexRunnerField("runtrackcount"); - private static readonly MethodInfo s_ensureStorageMethod = RegexRunnerMethod("EnsureStorage"); + private static readonly MethodInfo s_doubleStackMethod = RegexRunnerMethod("DoubleStack"); + private static readonly MethodInfo s_doubleTrackMethod = RegexRunnerMethod("DoubleTrack"); private static readonly MethodInfo s_captureMethod = RegexRunnerMethod("Capture"); private static readonly MethodInfo s_transferCaptureMethod = RegexRunnerMethod("TransferCapture"); private static readonly MethodInfo s_uncaptureMethod = RegexRunnerMethod("Uncapture"); @@ -48,14 +51,22 @@ internal abstract class RegexCompiler private static readonly MethodInfo s_charIsWhiteSpaceMethod = typeof(char).GetMethod("IsWhiteSpace", new Type[] { typeof(char) })!; private static readonly MethodInfo s_stringGetCharsMethod = typeof(string).GetMethod("get_Chars", new Type[] { typeof(int) })!; private static readonly MethodInfo s_stringAsSpanMethod = typeof(MemoryExtensions).GetMethod("AsSpan", new Type[] { typeof(string), typeof(int), typeof(int) })!; + private static readonly MethodInfo s_stringIndexOf = typeof(string).GetMethod("IndexOf", new Type[] { typeof(char), typeof(int), typeof(int) })!; + private static readonly MethodInfo s_spanIndexOf = typeof(MemoryExtensions).GetMethod("IndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char)); + private static readonly MethodInfo s_spanIndexOfAnyCharChar = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char)); + private static readonly MethodInfo s_spanIndexOfAnyCharCharChar = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char)); private static readonly MethodInfo s_spanGetItemMethod = typeof(ReadOnlySpan).GetMethod("get_Item", new Type[] { typeof(int) })!; private static readonly MethodInfo s_spanGetLengthMethod = typeof(ReadOnlySpan).GetMethod("get_Length")!; + private static readonly MethodInfo s_spanSliceIntMethod = typeof(ReadOnlySpan).GetMethod("Slice", new Type[] { typeof(int) })!; + private static readonly MethodInfo s_spanSliceIntIntMethod = typeof(ReadOnlySpan).GetMethod("Slice", new Type[] { typeof(int), typeof(int) })!; private static readonly MethodInfo s_cultureInfoGetCurrentCultureMethod = typeof(CultureInfo).GetMethod("get_CurrentCulture")!; +#if DEBUG + private static readonly MethodInfo s_debugWriteLine = typeof(Debug).GetMethod("WriteLine", new Type[] { typeof(string) })!; +#endif protected ILGenerator? _ilg; // tokens representing local variables - private LocalBuilder? _runtextstartLocal; private LocalBuilder? _runtextbegLocal; private LocalBuilder? _runtextendLocal; private LocalBuilder? _runtextposLocal; @@ -70,7 +81,8 @@ internal abstract class RegexCompiler private LocalBuilder? _cultureLocal; // current culture is cached in local variable to prevent many thread local storage accesses for CultureInfo.CurrentCulture private LocalBuilder? _loopTimeoutCounterLocal; // timeout counter for setrep and setloop - protected RegexCode? _code; // the RegexCode object (used for debugging only) + protected RegexOptions _options; // options + protected RegexCode? _code; // the RegexCode object protected int[]? _codes; // the RegexCodes being translated protected string[]? _strings; // the stringtable associated with the RegexCodes protected RegexPrefix? _fcPrefix; // the possible first chars computed by RegexFCD @@ -82,16 +94,12 @@ internal abstract class RegexCompiler private BacktrackNote[]? _notes; // a list of the backtracking states to be generated private int _notecount; // true count of _notes (allocation grows exponentially) protected int _trackcount; // count of backtracking states (used to reduce allocations) - private Label _backtrack; // label for backtracking - private int _regexopcode; // the current opcode being processed private int _codepos; // the current code being translated private int _backpos; // the current backtrack-note being translated - protected RegexOptions _options; // options - // special code fragments private int[]? _uniquenote; // _notes indices for code that should be emitted <= once private int[]? _goto; // indices for forward-jumps-through-switch (for allocations) @@ -289,6 +297,9 @@ private void Ldc(int i) /// A macro for _ilg.Emit(OpCodes.Sub) or _ilg.Emit(OpCodes.Add). private void Sub(bool negate) => _ilg!.Emit(negate ? OpCodes.Add : OpCodes.Sub); + /// A macro for _ilg.Emit(OpCodes.Mul). + private void Mul() => _ilg!.Emit(OpCodes.Mul); + /// A macro for _ilg.Emit(OpCodes.And). private void And() => _ilg!.Emit(OpCodes.And); @@ -307,7 +318,7 @@ private void Ldc(int i) /// A macro for _ilg.Emit(OpCodes.Ldind_U2). private void LdindU2() => _ilg!.Emit(OpCodes.Ldind_U2); - /// A macro for _ilg.Emit(OpCodes.Stloc). + /// A macro for _ilg.Emit(OpCodes.Stloc_S). private void Stloc(LocalBuilder lt) => _ilg!.Emit(OpCodes.Stloc_S, lt); /// A macro for _ilg.Emit(OpCodes.Ldarg_0). @@ -317,7 +328,7 @@ private void Ldc(int i) private void Ldthisfld(FieldInfo ft) { Ldthis(); - _ilg!.Emit(OpCodes.Ldfld, ft); + Ldfld(ft); } /// A macro for Ldthis(); Ldfld(); Stloc(); @@ -335,6 +346,9 @@ private void Mvlocfld(LocalBuilder lt, FieldInfo ft) Stfld(ft); } + /// A macro for _ilg.Emit(OpCodes.Ldfld). + private void Ldfld(FieldInfo ft) => _ilg!.Emit(OpCodes.Ldfld, ft); + /// A macro for _ilg.Emit(OpCodes.Stfld). private void Stfld(FieldInfo ft) => _ilg!.Emit(OpCodes.Stfld, ft); @@ -359,9 +373,15 @@ private void Mvlocfld(LocalBuilder lt, FieldInfo ft) /// A macro for _ilg.Emit(OpCodes.Blt) (long form). private void BltFar(Label l) => _ilg!.Emit(OpCodes.Blt, l); + /// A macro for _ilg.Emit(OpCodes.Blt_Un) (long form). + private void BltUnFar(Label l) => _ilg!.Emit(OpCodes.Blt_Un, l); + /// A macro for _ilg.Emit(OpCodes.Bge) (long form). private void BgeFar(Label l) => _ilg!.Emit(OpCodes.Bge, l); + /// A macro for _ilg.Emit(OpCodes.Bge_Un) (long form). + private void BgeUnFar(Label l) => _ilg!.Emit(OpCodes.Bge_Un, l); + /// A macro for _ilg.Emit(OpCodes.Bgt) (long form). private void BgtFar(Label l) => _ilg!.Emit(OpCodes.Bgt, l); @@ -401,9 +421,31 @@ private void Mvlocfld(LocalBuilder lt, FieldInfo ft) /// A macro for _ilg.Emit(OpCodes.Beq_S) (short jump). private void Beq(Label l) => _ilg!.Emit(OpCodes.Beq_S, l); - /// A macro for the Ldlen instruction). + /// A macro for the Ldlen instruction. private void Ldlen() => _ilg!.Emit(OpCodes.Ldlen); + /// A macro for the Ldelem_I4 instruction. + private void LdelemI4() => _ilg!.Emit(OpCodes.Ldelem_I4); + + /// A macro for the Stelem_I4 instruction. + private void StelemI4() => _ilg!.Emit(OpCodes.Stelem_I4); + + private void Switch(Label[] table) => _ilg!.Emit(OpCodes.Switch, table); + + /// Declares a local int. + private LocalBuilder DeclareInt32() => _ilg!.DeclareLocal(typeof(int)); + + /// Declares a local CultureInfo. + private LocalBuilder? DeclareCultureInfo() => _ilg!.DeclareLocal(typeof(CultureInfo)); // cache local variable to avoid unnecessary TLS + + /// Declares a local int[]. + private LocalBuilder DeclareInt32Array() => _ilg!.DeclareLocal(typeof(int[])); + + /// Declares a local string. + private LocalBuilder DeclareString() => _ilg!.DeclareLocal(typeof(string)); + + private LocalBuilder DeclareReadOnlySpanChar() => _ilg!.DeclareLocal(typeof(ReadOnlySpan)); + /// Loads the char to the right of the current position. private void Rightchar() { @@ -417,11 +459,11 @@ private void Rightcharnext() { Ldloc(_runtextLocal!); Ldloc(_runtextposLocal!); - Dup(); + Callvirt(s_stringGetCharsMethod); + Ldloc(_runtextposLocal!); Ldc(1); Add(); Stloc(_runtextposLocal!); - Callvirt(s_stringGetCharsMethod); } /// Loads the char to the left of the current position. @@ -499,83 +541,83 @@ private void TrackUnique2(int i) /// Prologue to code that will push an element on the tracking stack. private void ReadyPushTrack() { - _ilg!.Emit(OpCodes.Ldloc_S, _runtrackLocal!); - _ilg.Emit(OpCodes.Ldloc_S, _runtrackposLocal!); - _ilg.Emit(OpCodes.Ldc_I4_1); - _ilg.Emit(OpCodes.Sub); - _ilg.Emit(OpCodes.Dup); - _ilg.Emit(OpCodes.Stloc_S, _runtrackposLocal!); + Ldloc(_runtrackLocal!); + Ldloc(_runtrackposLocal!); + Ldc(1); + Sub(); + Dup(); + Stloc(_runtrackposLocal!); } /// Pops an element off the tracking stack (leave it on the operand stack). private void PopTrack() { - _ilg!.Emit(OpCodes.Ldloc_S, _runtrackLocal!); - _ilg.Emit(OpCodes.Ldloc_S, _runtrackposLocal!); - _ilg.Emit(OpCodes.Dup); - _ilg.Emit(OpCodes.Ldc_I4_1); - _ilg.Emit(OpCodes.Add); - _ilg.Emit(OpCodes.Stloc_S, _runtrackposLocal!); - _ilg.Emit(OpCodes.Ldelem_I4); + Ldloc(_runtrackLocal!); + Ldloc(_runtrackposLocal!); + LdelemI4(); + Ldloc(_runtrackposLocal!); + Ldc(1); + Add(); + Stloc(_runtrackposLocal!); } /// Retrieves the top entry on the tracking stack without popping. private void TopTrack() { - _ilg!.Emit(OpCodes.Ldloc_S, _runtrackLocal!); - _ilg.Emit(OpCodes.Ldloc_S, _runtrackposLocal!); - _ilg.Emit(OpCodes.Ldelem_I4); + Ldloc(_runtrackLocal!); + Ldloc(_runtrackposLocal!); + LdelemI4(); } /// Saves the value of a local variable on the grouping stack. private void PushStack(LocalBuilder lt) { ReadyPushStack(); - _ilg!.Emit(OpCodes.Ldloc_S, lt); + Ldloc(lt); DoPush(); } /// Prologue to code that will replace the ith element on the grouping stack. internal void ReadyReplaceStack(int i) { - _ilg!.Emit(OpCodes.Ldloc_S, _runstackLocal!); - _ilg.Emit(OpCodes.Ldloc_S, _runstackposLocal!); + Ldloc(_runstackLocal!); + Ldloc(_runstackposLocal!); if (i != 0) { Ldc(i); - _ilg.Emit(OpCodes.Add); + Add(); } } /// Prologue to code that will push an element on the grouping stack. private void ReadyPushStack() { - _ilg!.Emit(OpCodes.Ldloc_S, _runstackLocal!); - _ilg.Emit(OpCodes.Ldloc_S, _runstackposLocal!); - _ilg.Emit(OpCodes.Ldc_I4_1); - _ilg.Emit(OpCodes.Sub); - _ilg.Emit(OpCodes.Dup); - _ilg.Emit(OpCodes.Stloc_S, _runstackposLocal!); + Ldloc(_runstackLocal!); + Ldloc(_runstackposLocal!); + Ldc(1); + Sub(); + Dup(); + Stloc(_runstackposLocal!); } /// Retrieves the top entry on the stack without popping. private void TopStack() { - _ilg!.Emit(OpCodes.Ldloc_S, _runstackLocal!); - _ilg.Emit(OpCodes.Ldloc_S, _runstackposLocal!); - _ilg.Emit(OpCodes.Ldelem_I4); + Ldloc(_runstackLocal!); + Ldloc(_runstackposLocal!); + LdelemI4(); } /// Pops an element off the grouping stack (leave it on the operand stack). private void PopStack() { - _ilg!.Emit(OpCodes.Ldloc_S, _runstackLocal!); - _ilg.Emit(OpCodes.Ldloc_S, _runstackposLocal!); - _ilg.Emit(OpCodes.Dup); - _ilg.Emit(OpCodes.Ldc_I4_1); - _ilg.Emit(OpCodes.Add); - _ilg.Emit(OpCodes.Stloc_S, _runstackposLocal!); - _ilg.Emit(OpCodes.Ldelem_I4); + Ldloc(_runstackLocal!); + Ldloc(_runstackposLocal!); + LdelemI4(); + Ldloc(_runstackposLocal!); + Ldc(1); + Add(); + Stloc(_runstackposLocal!); } /// Pops 1 element off the grouping stack and discards it. @@ -584,20 +626,20 @@ private void PopStack() /// Pops i elements off the grouping stack and discards them. private void PopDiscardStack(int i) { - _ilg!.Emit(OpCodes.Ldloc_S, _runstackposLocal!); + Ldloc(_runstackposLocal!); Ldc(i); - _ilg.Emit(OpCodes.Add); - _ilg.Emit(OpCodes.Stloc_S, _runstackposLocal!); + Add(); + Stloc(_runstackposLocal!); } /// Epilogue to code that will replace an element on a stack (use Ld* in between). - private void DoReplace() => _ilg!.Emit(OpCodes.Stelem_I4); + private void DoReplace() => StelemI4(); /// Epilogue to code that will push an element on a stack (use Ld* in between). - private void DoPush() => _ilg!.Emit(OpCodes.Stelem_I4); + private void DoPush() => StelemI4(); /// Jump to the backtracking switch. - private void Back() => _ilg!.Emit(OpCodes.Br, _backtrack); + private void Back() => BrFar(_backtrack); /// /// Branch to the MSIL corresponding to the regex code at i @@ -650,7 +692,7 @@ private void Goto(int i) private Label AdvanceLabel() => _labels![NextCodepos()]; /// Goto the next (forward) operation. - private void Advance() => _ilg!.Emit(OpCodes.Br, AdvanceLabel()); + private void Advance() => BrFar(AdvanceLabel()); /// Sets the culture local to CultureInfo.CurrentCulture. private void InitLocalCultureInfo() @@ -684,25 +726,22 @@ private void CallToLower() /// private void GenerateForwardSection() { + _uniquenote = new int[Uniquecount]; _labels = new Label[_codes!.Length]; _goto = new int[_codes.Length]; // initialize - int codepos; - for (codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) + Array.Fill(_uniquenote, -1); + for (int codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) { _goto[codepos] = -1; - _labels[codepos] = _ilg!.DefineLabel(); + _labels[codepos] = DefineLabel(); } - _uniquenote = new int[Uniquecount]; - Array.Fill(_uniquenote, -1); - // emit variable initializers Mvfldloc(s_runtextField, _runtextLocal!); - Mvfldloc(s_runtextstartField, _runtextstartLocal!); Mvfldloc(s_runtextbegField, _runtextbegLocal!); Mvfldloc(s_runtextendField, _runtextendLocal!); Mvfldloc(s_runtextposField, _runtextposLocal!); @@ -713,7 +752,7 @@ private void GenerateForwardSection() _backpos = -1; - for (codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) + for (int codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) { MarkLabel(_labels[codepos]); _codepos = codepos; @@ -730,28 +769,65 @@ private void GenerateForwardSection() /// private void GenerateMiddleSection() { - // Backtrack switch + LocalBuilder limitLocal = _temp1Local!; + Label afterDoubleStack = DefineLabel(); + Label afterDoubleTrack = DefineLabel(); + + // Backtrack: MarkLabel(_backtrack); - // first call EnsureStorage - Mvlocfld(_runtrackposLocal!, s_runtrackposField); + // (Equivalent of EnsureStorage, but written to avoid unnecessary local spilling.) + + // int limitLocal = runtrackcount * 4; + Ldthisfld(s_runtrackcountField); + Ldc(4); + Mul(); + Stloc(limitLocal); + + // if (runstackpos < limit) + // { + // this.runstackpos = runstackpos; + // DoubleStack(); // might change runstackpos and runstack + // runstackpos = this.runstackpos; + // runstack = this.runstack; + // } + Ldloc(_runstackposLocal!); + Ldloc(limitLocal); + Bge(afterDoubleStack); Mvlocfld(_runstackposLocal!, s_runstackposField); Ldthis(); - Callvirt(s_ensureStorageMethod); - Mvfldloc(s_runtrackposField, _runtrackposLocal!); + Callvirt(s_doubleStackMethod); Mvfldloc(s_runstackposField, _runstackposLocal!); - Mvfldloc(s_runtrackField, _runtrackLocal!); Mvfldloc(s_runstackField, _runstackLocal!); + MarkLabel(afterDoubleStack); + + // if (runtrackpos < limit) + // { + // this.runtrackpos = runtrackpos; + // DoubleTrack(); // might change runtrackpos and runtrack + // runtrackpos = this.runtrackpos; + // runtrack = this.runtrack; + // } + Ldloc(_runtrackposLocal!); + Ldloc(limitLocal); + Bge(afterDoubleTrack); + Mvlocfld(_runtrackposLocal!, s_runtrackposField); + Ldthis(); + Callvirt(s_doubleTrackMethod); + Mvfldloc(s_runtrackposField, _runtrackposLocal!); + Mvfldloc(s_runtrackField, _runtrackLocal!); + MarkLabel(afterDoubleTrack); + // runtrack[runtrackpos++] PopTrack(); + // Backtracking jump table var table = new Label[_notecount]; for (int i = 0; i < _notecount; i++) { table[i] = _notes![i]._label; } - - _ilg!.Emit(OpCodes.Switch, table); + Switch(table); } /// @@ -765,7 +841,7 @@ private void GenerateBacktrackSection() BacktrackNote n = _notes![i]; if (n._flags != 0) { - _ilg!.MarkLabel(n._label); + MarkLabel(n._label); _codepos = n._codepos; _backpos = i; _regexopcode = _codes![n._codepos] | n._flags; @@ -783,10 +859,11 @@ private void GenerateBacktrackSection() /// protected void GenerateFindFirstChar() { - _runtextposLocal = DeclareInt(); + _runtextposLocal = DeclareInt32(); + _runtextendLocal = DeclareInt32(); _runtextLocal = DeclareString(); - _temp1Local = DeclareInt(); - _temp2Local = DeclareInt(); + _temp1Local = DeclareInt32(); + _temp2Local = DeclareInt32(); _cultureLocal = null; if (!_options.HasFlag(RegexOptions.CultureInvariant)) { @@ -1029,7 +1106,7 @@ protected void GenerateFindFirstChar() } Ldloc(chLocal); - _ilg!.Emit(OpCodes.Switch, table); + Switch(table); for (int i = _bmPrefix.LowASCII; i <= _bmPrefix.HighASCII; i++) { @@ -1123,190 +1200,1398 @@ protected void GenerateFindFirstChar() Ldc(0); BleFar(l4); - MarkLabel(l1); + MarkLabel(l1); + + Ldloc(cLocal); + Ldc(1); + Sub(); + Stloc(cLocal); + + Leftcharnext(); + + if (!RegexCharClass.IsSingleton(_fcPrefix.GetValueOrDefault().Prefix)) + { + EmitCallCharInClass(_fcPrefix.GetValueOrDefault().Prefix, _fcPrefix.GetValueOrDefault().CaseInsensitive, charInClassLocal); + BrtrueFar(l2); + } + else + { + Ldc(RegexCharClass.SingletonChar(_fcPrefix.GetValueOrDefault().Prefix)); + Beq(l2); + } + + MarkLabel(l5); + + Ldloc(cLocal); + Ldc(0); + if (!RegexCharClass.IsSingleton(_fcPrefix.GetValueOrDefault().Prefix)) + { + BgtFar(l1); + } + else + { + Bgt(l1); + } + + Ldc(0); + BrFar(l3); + + MarkLabel(l2); + + Ldloc(_runtextposLocal); + Ldc(1); + Sub(_code.RightToLeft); + Stloc(_runtextposLocal); + Ldc(1); + + MarkLabel(l3); + + Mvlocfld(_runtextposLocal, s_runtextposField); + Ret(); + + MarkLabel(l4); + Ldc(0); + Ret(); + } + else // for left-to-right, we can take advantage of vectorization and JIT optimizations + { + LocalBuilder iLocal = _temp2Local; + Label returnFalseLabel = DefineLabel(); + Label updatePosAndReturnFalse = DefineLabel(); + + Mvfldloc(s_runtextposField, _runtextposLocal); + Mvfldloc(s_runtextendField, _runtextendLocal); + + // if (runtextend > runtextpos) + Ldloc(_runtextendLocal); + Ldloc(_runtextposLocal); + BleFar(returnFalseLabel); + + Span setChars = stackalloc char[3]; + int setCharsCount; + if (!_fcPrefix.GetValueOrDefault().CaseInsensitive && + (setCharsCount = RegexCharClass.GetSetChars(_fcPrefix.GetValueOrDefault().Prefix, setChars)) > 0) + { + // This is a case-sensitive class with a small number of characters in the class, small enough + // that we can generate an IndexOf{Any} call. That takes advantage of optimizations in + // IndexOf{Any}, such as vectorization, which our open-coded loop through the span doesn't have. + switch (setCharsCount) + { + case 1: + // int i = this.runtext.IndexOf(setChars[0], runtextpos, runtextend - runtextpos); + Ldthisfld(s_runtextField); + Ldc(setChars[0]); + Ldloc(_runtextposLocal); + Ldloc(_runtextendLocal); + Ldloc(_runtextposLocal); + Sub(); + Call(s_stringIndexOf); + Stloc(iLocal); + + // if (i >= 0) + Ldloc(iLocal); + Ldc(0); + BltFar(updatePosAndReturnFalse); + + // runtextpos = i; return true; + Mvlocfld(iLocal, s_runtextposField); + Ldc(1); + Ret(); + break; + + case 2: + case 3: + // int i = this.runtext.AsSpan(runtextpos, runtextend - runtextpos).IndexOfAny(setChars[0], setChars[1]{, setChars[2]}); + Ldthisfld(s_runtextField); + Ldloc(_runtextposLocal); + Ldloc(_runtextendLocal); + Ldloc(_runtextposLocal); + Sub(); + Call(s_stringAsSpanMethod); + Ldc(setChars[0]); + Ldc(setChars[1]); + if (setCharsCount == 3) + { + Ldc(setChars[2]); + Call(s_spanIndexOfAnyCharCharChar); + } + else + { + Call(s_spanIndexOfAnyCharChar); + } + Stloc(iLocal); + + // if (i >= 0) + Ldloc(iLocal); + Ldc(0); + BltFar(updatePosAndReturnFalse); + + // this.runtextpos = runtextpos + i; return true; + Ldthis(); + Ldloc(_runtextposLocal); + Ldloc(iLocal); + Add(); + Stfld(s_runtextposField); + Ldc(1); + Ret(); + break; + + default: + Debug.Fail("Unexpected setCharsCount: " + setCharsCount); + break; + } + } + else + { + // Either this isn't a class with just a few characters in it, or this is case insensitive. + // Either way, create a span and iterate through it rather than the original string in order + // to avoid bounds checks on each access. + + LocalBuilder charInClassLocal = _temp1Local; + _temp3Local = DeclareReadOnlySpanChar(); + LocalBuilder textSpanLocal = _temp3Local; + + Label checkSpanLengthLabel = DefineLabel(); + Label charNotInClassLabel = DefineLabel(); + Label loopBody = DefineLabel(); + + // ReadOnlySpan span = this.runtext.AsSpan(runtextpos, runtextend - runtextpos); + Ldthisfld(s_runtextField); + Ldloc(_runtextposLocal); + Ldloc(_runtextendLocal); + Ldloc(_runtextposLocal); + Sub(); + Call(s_stringAsSpanMethod); + Stloc(textSpanLocal); + + // for (int i = 0; + Ldc(0); + Stloc(iLocal); + BrFar(checkSpanLengthLabel); + + // if (CharInClass(span[i], "...")) + MarkLabel(loopBody); + Ldloca(textSpanLocal); + Ldloc(iLocal); + Call(s_spanGetItemMethod); + LdindU2(); + EmitCallCharInClass(_fcPrefix.GetValueOrDefault().Prefix, _fcPrefix.GetValueOrDefault().CaseInsensitive, charInClassLocal); + BrfalseFar(charNotInClassLabel); + + // thisruntextpos = runtextpos + i; return true; + Ldthis(); + Ldloc(_runtextposLocal); + Ldloc(iLocal); + Add(); + Stfld(s_runtextposField); + Ldc(1); + Ret(); + + // for (...; ...; i++) + MarkLabel(charNotInClassLabel); + Ldloc(iLocal); + Ldc(1); + Add(); + Stloc(iLocal); + + // for (...; i < span.Length; ...); + MarkLabel(checkSpanLengthLabel); + Ldloc(iLocal); + Ldloca(textSpanLocal); + Call(s_spanGetLengthMethod); + BltFar(loopBody); + } + + // runtextpos = runtextend; + MarkLabel(updatePosAndReturnFalse); + Ldthis(); + Ldloc(_runtextendLocal); + Stfld(s_runtextposField); + + // return false; + MarkLabel(returnFalseLabel); + Ldc(0); + Ret(); + } + } + + /// Generates a very simple method that sets the _trackcount field. + protected void GenerateInitTrackCount() + { + Ldthis(); + Ldc(_trackcount); + Stfld(s_runtrackcountField); + Ret(); + } + + private bool TryGenerateNonBacktrackingGo(RegexNode node) + { + Debug.Assert(node.Type == RegexNode.Capture && node.ChildCount() == 1, + "Every generated tree should begin with a capture node that has a single child."); + + // RightToLeft is rare and not worth adding a lot of custom code to handle in this path. + if ((node.Options & RegexOptions.RightToLeft) != 0) + { + return false; + } + + // Skip the Capture node. This path only supports the implicit capture of the whole match, + // which we handle implicitly at the end of the generated code in one location. + node = node.Child(0); + if (!NodeSupportsNonBacktrackingImplementation(node, level: 0)) + { + return false; + } + + // We've determined that the RegexNode can be handled with this optimized path. Generate the code. +#if DEBUG + if ((_options & RegexOptions.Debug) != 0) + { + Debug.WriteLine("Using optimized non-backtracking code gen."); + } +#endif + + // Declare some locals. + LocalBuilder runtextLocal = DeclareString(); + LocalBuilder originalruntextposLocal = DeclareInt32(); + LocalBuilder runtextposLocal = DeclareInt32(); + LocalBuilder textSpanLocal = DeclareReadOnlySpanChar(); + Stack? iterationLocals = null; + Stack? spanLocals = null; + Label stopSuccessLabel = DefineLabel(); + Label doneLabel = DefineLabel(); + if (_hasTimeout) + { + _loopTimeoutCounterLocal = DeclareInt32(); + } + + // CultureInfo culture = CultureInfo.CurrentCulture; // only if the whole expression or any subportion is ignoring case, and we're not using invariant + InitializeCultureForGoIfNecessary(); + + // string runtext = this.runtext; + Ldthisfld(s_runtextField); + Stloc(runtextLocal); + + // int originalruntextpos, runtextpos; + // runtextpos = originalruntextpos = this.runtextpos; + Ldthisfld(s_runtextposField); + Dup(); + Stloc(originalruntextposLocal); + Stloc(runtextposLocal); + + // The implementation tries to use const indexes into the span wherever possible, which we can do + // in all places except for variable-length loops. For everything else, we know at any point in + // the regex exactly how far into it we are, and we can use that to index into the span created + // at the beginning of the routine to begin at exactly where we're starting in the input. For + // variable-length loops, we index at this textSpanPos + i, and then after the loop we slice the input + // by i so that this position is still accurate for everything after it. + int textSpanPos = 0; + LoadTextSpanLocal(); + + // Emit the code for all nodes in the tree. + EmitNode(node); + + // Success: + // this.runtextpos = runtextpos + textSpanPos; + MarkLabel(stopSuccessLabel); + Ldthis(); + Ldloc(runtextposLocal); + if (textSpanPos > 0) + { + Ldc(textSpanPos); + Add(); + } + Stfld(s_runtextposField); + + // Capture(0, originalruntextposLocal, this.runtextpos); + Ldthis(); + Ldc(0); + Ldloc(originalruntextposLocal); + Ldthisfld(s_runtextposField); + Callvirt(s_captureMethod); + + // Done: + // return; + MarkLabel(doneLabel); + Ret(); + + // Generated code successfully with non-backtracking implementation. + return true; + + // Determines whether the node supports an optimized implementation that doesn't allow for backtracking. + static bool NodeSupportsNonBacktrackingImplementation(RegexNode node, int level) + { + bool supported = false; + + // We only support the default left-to-right, not right-to-left, which requires more complication in the gerated code. + // (Right-to-left is only employed when explicitly asked for by the developer or by lookbehind assertions.) + // We also limit the recursion involved to prevent stack dives; this limitation can be removed by switching + // away from a recursive implementation (done for convenience) to an iterative one that's more complicated + // but within the same problems. + if ((node.Options & RegexOptions.RightToLeft) == 0 && + level < 20) // arbitrary cut-off to limit stack dives + { + int childCount = node.ChildCount(); + + switch (node.Type) + { + // One/Notone/Set/Multi don't involve any repetition and are easily supported. + case RegexNode.One: + case RegexNode.Notone: + case RegexNode.Set: + case RegexNode.Multi: + // Boundaries are like set checks and don't involve repetition, either. + case RegexNode.Boundary: + case RegexNode.Nonboundary: + case RegexNode.ECMABoundary: + case RegexNode.NonECMABoundary: + // Anchors are also trivial. + case RegexNode.Beginning: + case RegexNode.Start: + case RegexNode.Bol: + case RegexNode.Eol: + case RegexNode.End: + case RegexNode.EndZ: + // {Set/One/Notone}loopatomic are optimized nodes that represent non-backtracking variable-length loops. + // These consume their {Set/One} inputs as long as they match, and don't give up anything they + // matched, which means we can support them without backtracking. + case RegexNode.Oneloopatomic: + case RegexNode.Notoneloopatomic: + case RegexNode.Setloopatomic: + // "Empty" is easy: nothing is emitted for it. + // "Nothing" is also easy: it doesn't match anything. + case RegexNode.Empty: + case RegexNode.Nothing: + supported = true; + break; + + // Repeaters don't require backtracking as long as their min and max are equal. + // At that point they're just a shorthand for writing out the One/Notone/Set + // that number of times. + case RegexNode.Oneloop: + case RegexNode.Notoneloop: + case RegexNode.Setloop: + Debug.Assert(node.Next == null || node.Next.Type != RegexNode.Atomic, "Loop should have been transformed into an atomic type."); + goto case RegexNode.Onelazy; + case RegexNode.Onelazy: + case RegexNode.Notonelazy: + case RegexNode.Setlazy: + supported = node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic); + break; + + // {Lazy}Loop repeaters are the same, except their child also needs to be supported. + // We also support such loops being atomic. + case RegexNode.Loop: + case RegexNode.Lazyloop: + supported = + (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)) && + NodeSupportsNonBacktrackingImplementation(node.Child(0), level + 1); + break; + + // We can handle atomic as long as we can handle making its child atomic, or + // its child doesn't have that concept. + case RegexNode.Atomic: + // Lookahead assertions also only require that the child node be supported. + // The RightToLeft check earlier is important to differentiate lookbehind, + // which is not supported. + case RegexNode.Require: + case RegexNode.Prevent: + supported = NodeSupportsNonBacktrackingImplementation(node.Child(0), level + 1); + break; + + // We can handle alternates as long as they're atomic (a root / global alternate is + // effectively atomic, as nothing will try to backtrack into it as it's the last thing). + // Its children must all also be supported. + case RegexNode.Alternate: + if (node.Next != null && + (node.Next.Type == RegexNode.Atomic || // atomic alternate + (node.Next.Type == RegexNode.Capture && node.Next.Next is null))) // root alternate + { + goto case RegexNode.Concatenate; + } + break; + + // Concatenation doesn't require backtracking as long as its children don't. + case RegexNode.Concatenate: + supported = true; + for (int i = 0; i < childCount; i++) + { + if (supported && !NodeSupportsNonBacktrackingImplementation(node.Child(i), level + 1)) + { + supported = false; + break; + } + } + break; + } + } +#if DEBUG + if (!supported && (node.Options & RegexOptions.Debug) != 0) + { + Debug.WriteLine($"Unable to use non-backtracking code gen: node {node.Description()} isn't supported."); + } +#endif + return supported; + } + + static bool IsCaseInsensitive(RegexNode node) => (node.Options & RegexOptions.IgnoreCase) != 0; + + // Creates a span for runtext starting at runtextpos until this.runtextend. + void LoadTextSpanLocal() + { + // textSpan = runtext.AsSpan(runtextpos, this.runtextend - runtextpos); + Ldloc(runtextLocal); + Ldloc(runtextposLocal); + Ldthisfld(s_runtextendField); + Ldloc(runtextposLocal); + Sub(); + Call(s_stringAsSpanMethod); + Stloc(textSpanLocal); + } + + // Rents an Int32 local. We want to minimize the number of locals we create, so we maintain + // a pool of them, only adding when needing, and nested constructs that each need their own + // independent local can use this to get one. + LocalBuilder RentInt32Local() + { + iterationLocals ??= new Stack(1); + return iterationLocals.TryPop(out LocalBuilder? iterationLocal) ? iterationLocal : DeclareInt32(); + } + + // Returns a rented Int32 local. + void ReturnInt32Local(LocalBuilder int32Local) + { + Debug.Assert(iterationLocals != null); + Debug.Assert(int32Local.LocalType == typeof(int)); + iterationLocals.Push(int32Local); + } + + LocalBuilder RentReadOnlySpanCharLocal() + { + spanLocals ??= new Stack(1); + return spanLocals.TryPop(out LocalBuilder? iterationLocal) ? iterationLocal : DeclareReadOnlySpanChar(); + } + + void ReturnReadOnlySpanCharLocal(LocalBuilder spanLocal) + { + Debug.Assert(spanLocals != null); + Debug.Assert(spanLocal.LocalType == typeof(ReadOnlySpan)); + spanLocals.Push(spanLocal); + } + + void EmitSum(int constant, LocalBuilder? local) + { + if (local == null) + { + Ldc(constant); + } + else if (constant == 0) + { + Ldloc(local); + } + else + { + Ldloc(local); + Ldc(constant); + Add(); + } + } + + // Emits a check that the span is large enough at the currently known static position to handle the required additional length. + void EmitSpanLengthCheck(int requiredLength, LocalBuilder? dynamicRequiredLength = null) + { + // if ((uint)(textSpanPos + requiredLength + dynamicRequiredLength - 1) >= (uint)textSpan.Length) goto Done; + Debug.Assert(requiredLength > 0); + EmitSum(textSpanPos + requiredLength - 1, dynamicRequiredLength); + Ldloca(textSpanLocal); + Call(s_spanGetLengthMethod); + BgeUnFar(doneLabel); + } + + void TransferTextSpanPosToRunTextPos() + { + if (textSpanPos > 0) + { + // runtextpos += textSpanPos; + Ldloc(runtextposLocal); + Ldc(textSpanPos); + Add(); + Stloc(runtextposLocal); + + // textSpan = textSpan.Slice(textSpanPos); + Ldloca(textSpanLocal); + Ldc(textSpanPos); + Call(s_spanSliceIntMethod); + Stloc(textSpanLocal); + + // textSpanPos = 0; + textSpanPos = 0; + } + } + + // Emits the code for an atomic alternate, one that once a branch successfully matches is non-backtracking into it. + // This amounts to generating the code for each branch, with failures in a branch resetting state to what it was initially + // and then jumping to the next branch. We don't need to worry about uncapturing, because capturing is only allowed for the + // implicit capture that happens for the whole match at the end. + void EmitAtomicAlternate(RegexNode node) + { + // int startingTextSpanPos = textSpanPos; + // int startingRunTextPos = runtextpos; + // + // Branch0(); // jumps to NextBranch1 on failure + // goto Success; + // + // NextBranch1: + // runtextpos = originalruntextpos; + // textSpan = originalTextSpan; + // Branch1(); // jumps to NextBranch2 on failure + // goto Success; + // + // ... + // + // NextBranchN: + // runtextpos = startingRunTextPos; + // textSpan = this.runtext.AsSpan(runtextpos, this.runtextend - runtextpos); + // textSpanPos = startingTextSpanPos; + // BranchN(); // jumps to Done on failure + + // Save off runtextpos. We'll need to reset this each time a branch fails. + LocalBuilder startingRunTextPos = RentInt32Local(); + Ldloc(runtextposLocal); + Stloc(startingRunTextPos); + int startingTextSpanPos = textSpanPos; + + // Label to jump to when any branch completes successfully. + Label doneAlternate = DefineLabel(); + + // A failure in a branch other than the last should jump to the next + // branch, not to the final done. + Label postAlternateDone = doneLabel; + + int childCount = node.ChildCount(); + for (int i = 0; i < childCount - 1; i++) + { + Label nextBranch = DefineLabel(); + doneLabel = nextBranch; + + // Emit the code for each branch. + EmitNode(node.Child(i)); + + // If we get here in the generated code, the branch completed successfully. + // Before jumping to the end, we need to zero out textSpanPos, so that no + // matter what the value is after the branch, whatever follows the alternate + // will see the same textSpanPos. + TransferTextSpanPosToRunTextPos(); + BrFar(doneAlternate); + + // Reset state for next branch and loop around to generate it. + MarkLabel(nextBranch); + Ldloc(startingRunTextPos); + Stloc(runtextposLocal); + LoadTextSpanLocal(); + textSpanPos = startingTextSpanPos; + } + + // If the final branch fails, that's like any other failure, and we jump to done. + doneLabel = postAlternateDone; + EmitNode(node.Child(childCount - 1)); + TransferTextSpanPosToRunTextPos(); + + // Successfully completed the alternate. + MarkLabel(doneAlternate); + ReturnInt32Local(startingRunTextPos); + + Debug.Assert(textSpanPos == 0); + } + + void EmitPositiveLookaheadAssertion(RegexNode node) + { + // Save off runtextpos. We'll need to reset this upon successful completion of the lookahead. + LocalBuilder startingRunTextPos = RentInt32Local(); + Ldloc(runtextposLocal); + Stloc(startingRunTextPos); + int startingTextSpanPos = textSpanPos; + + // Emit the child. + EmitNode(node.Child(0)); + + // After the child completes successfully, reset the text positions. + Ldloc(startingRunTextPos); + Stloc(runtextposLocal); + LoadTextSpanLocal(); + textSpanPos = startingTextSpanPos; + + ReturnInt32Local(startingRunTextPos); + } + + void EmitNegativeLookaheadAssertion(RegexNode node) + { + // Save off runtextpos. We'll need to reset this upon successful completion of the lookahead. + LocalBuilder startingRunTextPos = RentInt32Local(); + Ldloc(runtextposLocal); + Stloc(startingRunTextPos); + int startingTextSpanPos = textSpanPos; + + Label originalDoneLabel = doneLabel; + doneLabel = DefineLabel(); + + // Emit the child. + EmitNode(node.Child(0)); + + // If the generated code ends up here, it matched the lookahead, which actually + // means failure for a _negative_ lookahead, so we need to jump to the original done. + BrFar(originalDoneLabel); + + // Failures (success for a negative lookahead) jump here. + MarkLabel(doneLabel); + doneLabel = originalDoneLabel; + + // After the child completes in failure (success for negative lookahead), reset the text positions. + Ldloc(startingRunTextPos); + Stloc(runtextposLocal); + LoadTextSpanLocal(); + textSpanPos = startingTextSpanPos; + + ReturnInt32Local(startingRunTextPos); + } + + // Emits the code for the node. + void EmitNode(RegexNode node) + { + switch (node.Type) + { + case RegexNode.One: + case RegexNode.Notone: + case RegexNode.Set: + EmitSingleChar(node); + break; + + case RegexNode.Boundary: + case RegexNode.Nonboundary: + case RegexNode.ECMABoundary: + case RegexNode.NonECMABoundary: + EmitBoundary(node); + break; + + case RegexNode.Beginning: + case RegexNode.Start: + case RegexNode.Bol: + case RegexNode.Eol: + case RegexNode.End: + case RegexNode.EndZ: + EmitAnchors(node); + break; + + case RegexNode.Multi: + EmitMultiChar(node); + break; + + case RegexNode.Oneloopatomic: + case RegexNode.Notoneloopatomic: + case RegexNode.Setloopatomic: + EmitAtomicSingleCharLoop(node); + break; + + case RegexNode.Loop: + EmitAtomicNodeLoop(node); + break; + + case RegexNode.Lazyloop: + // An atomic lazy loop amounts to doing the minimum amount of work possible. + // That means iterating as little as is required, which means a repeater + // for the min, and if min is 0, doing nothing. + Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)); + if (node.M > 0) + { + EmitNodeRepeater(node); + } + break; + + case RegexNode.Atomic: + EmitNode(node.Child(0)); + break; + + case RegexNode.Alternate: + EmitAtomicAlternate(node); + break; + + case RegexNode.Oneloop: + case RegexNode.Onelazy: + case RegexNode.Notoneloop: + case RegexNode.Notonelazy: + case RegexNode.Setloop: + case RegexNode.Setlazy: + EmitSingleCharRepeater(node); + break; + + case RegexNode.Concatenate: + int childCount = node.ChildCount(); + for (int i = 0; i < childCount; i++) + { + EmitNode(node.Child(i)); + } + break; + + case RegexNode.Require: + EmitPositiveLookaheadAssertion(node); + break; + + case RegexNode.Prevent: + EmitNegativeLookaheadAssertion(node); + break; + + case RegexNode.Nothing: + BrFar(doneLabel); + break; + + case RegexNode.Empty: + // Emit nothing. + break; + + default: + Debug.Fail($"Unexpected node type: {node.Type}"); + break; + } + } + + // Emits the code to handle a single-character match. + void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, LocalBuilder? offset = null) + { + // if ((uint)(textSpanPos + offset) >= textSpan.Length || textSpan[textSpanPos + offset] != ch) goto Done; + if (emitLengthCheck) + { + EmitSpanLengthCheck(1, offset); + } + Ldloca(textSpanLocal); + EmitSum(textSpanPos, offset); + Call(s_spanGetItemMethod); + LdindU2(); + switch (node.Type) + { + // This only emits a single check, but it's called from the looping constructs in a loop + // to generate the code for a single check, so we map those looping constructs to the + // appropriate single check. + + case RegexNode.Set: + case RegexNode.Setlazy: + case RegexNode.Setloop: + case RegexNode.Setloopatomic: + LocalBuilder setScratchLocal = RentInt32Local(); + EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal); + ReturnInt32Local(setScratchLocal); + BrfalseFar(doneLabel); + break; + + case RegexNode.One: + case RegexNode.Onelazy: + case RegexNode.Oneloop: + case RegexNode.Oneloopatomic: + if (IsCaseInsensitive(node)) CallToLower(); + Ldc(node.Ch); + BneFar(doneLabel); + break; + + default: + Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopatomic); + if (IsCaseInsensitive(node)) CallToLower(); + Ldc(node.Ch); + BeqFar(doneLabel); + break; + } + + textSpanPos++; + } + + // Emits the code to handle a boundary check on a character. + void EmitBoundary(RegexNode node) + { + // if (!IsBoundary(runtextpos + textSpanPos, this.runtextbeg, this.runtextend)) goto doneLabel; + Ldthis(); + Ldloc(runtextposLocal); + if (textSpanPos > 0) + { + Ldc(textSpanPos); + Add(); + } + Ldthisfld(s_runtextbegField!); + Ldthisfld(s_runtextendField!); + switch (node.Type) + { + case RegexNode.Boundary: + Callvirt(s_isBoundaryMethod); + BrfalseFar(doneLabel); + break; + + case RegexNode.Nonboundary: + Callvirt(s_isBoundaryMethod); + BrtrueFar(doneLabel); + break; + + case RegexNode.ECMABoundary: + Callvirt(s_isECMABoundaryMethod); + BrfalseFar(doneLabel); + break; + + default: + Debug.Assert(node.Type == RegexNode.NonECMABoundary); + Callvirt(s_isECMABoundaryMethod); + BrtrueFar(doneLabel); + break; + } + } + + // Emits the code to handle various anchors. + void EmitAnchors(RegexNode node) + { + Debug.Assert(textSpanPos >= 0); + switch (node.Type) + { + case RegexNode.Beginning: + case RegexNode.Start: + if (textSpanPos > 0) + { + // If we statically know we've already matched part of the regex, there's no way we're at the + // beginning or start, as we've already progressed past it. + BrFar(doneLabel); + } + else + { + // if (runtextpos > this.runtextbeg/start) goto doneLabel; + Ldloc(runtextposLocal); + Ldthisfld(node.Type == RegexNode.Beginning ? s_runtextbegField : s_runtextstartField); + BneFar(doneLabel); + } + break; + + case RegexNode.Bol: + if (textSpanPos > 0) + { + // if (textSpan[textSpanPos - 1] != '\n') goto doneLabel; + Ldloca(textSpanLocal); + Ldc(textSpanPos - 1); + Call(s_spanGetItemMethod); + LdindU2(); + Ldc('\n'); + BneFar(doneLabel); + } + else + { + // We can't use our textSpan in this case, because we'd need to access textSpan[-1], so we access the runtext field directly: + // if (runtextpos > this.runtextbeg && this.runtext[runtextpos - 1] != '\n') goto doneLabel; + Label success = DefineLabel(); + Ldloc(runtextposLocal); + Ldthisfld(s_runtextbegField); + Ble(success); + Ldthisfld(s_runtextField); + Ldloc(runtextposLocal); + Ldc(1); + Sub(); + Callvirt(s_stringGetCharsMethod); + Ldc('\n'); + BneFar(doneLabel); + MarkLabel(success); + } + break; + + case RegexNode.End: + // if (textSpanPos < textSpan.Length) goto doneLabel; + Ldc(textSpanPos); + Ldloca(textSpanLocal); + Call(s_spanGetLengthMethod); + BltUnFar(doneLabel); + break; + + case RegexNode.EndZ: + // if (textSpanPos < textSpan.Length - 1) goto doneLabel; + Ldc(textSpanPos); + Ldloca(textSpanLocal); + Call(s_spanGetLengthMethod); + Ldc(1); + Sub(); + BltFar(doneLabel); + goto case RegexNode.Eol; + + case RegexNode.Eol: + // if (textSpanPos < textSpan.Length && textSpan[textSpanPos] != '\n') goto doneLabel; + { + Label success = DefineLabel(); + Ldc(textSpanPos); + Ldloca(textSpanLocal); + Call(s_spanGetLengthMethod); + BgeUnFar(success); + Ldloca(textSpanLocal); + Ldc(textSpanPos); + Call(s_spanGetItemMethod); + LdindU2(); + Ldc('\n'); + BneFar(doneLabel); + MarkLabel(success); + } + break; + } + } + + // Emits the code to handle a multiple-character match. + void EmitMultiChar(RegexNode node) + { + // if (textSpanPos + node.Str.Length >= textSpan.Length) goto doneLabel; + // if (node.Str[0] != textSpan[textSpanPos]) goto doneLabel; + // if (node.Str[1] != textSpan[textSpanPos+1]) goto doneLabel; + // ... + EmitSpanLengthCheck(node.Str!.Length); + for (int i = 0; i < node.Str!.Length; i++) + { + Ldloca(textSpanLocal); + Ldc(textSpanPos + i); + Call(s_spanGetItemMethod); + LdindU2(); + if (IsCaseInsensitive(node)) CallToLower(); + Ldc(node.Str[i]); + BneFar(doneLabel); + } + + textSpanPos += node.Str.Length; + } + + // Emits the code to handle a loop (repeater) with a fixed number of iterations. + // RegexNode.M is used for the number of iterations; RegexNode.N is ignored. + void EmitSingleCharRepeater(RegexNode node) + { + int iterations = node.M; + + if (iterations == 0) + { + // No iterations, nothing to do. + return; + } + + // if ((uint)(textSpanPos + iterations - 1) >= (uint)textSpan.Length) goto doneLabel; + EmitSpanLengthCheck(iterations); + + // Arbitrary limit for unrolling vs creating a loop. We want to balance size in the generated + // code with other costs, like the (small) overhead of slicing to create the temp span to iterate. + const int MaxUnrollSize = 16; + + if (iterations <= MaxUnrollSize) + { + // if (textSpan[textSpanPos] != c1 || + // textSpan[textSpanPos + 1] != c2 || + // ...) + // goto doneLabel; + for (int i = 0; i < iterations; i++) + { + EmitSingleChar(node, emitLengthCheck: false); + } + } + else + { + // ReadOnlySpan tmp = textSpan.Slice(textSpanPos, iterations); + // for (int i = 0; i < tmp.Length; i++) + // { + // TimeoutCheck(); + // if (tmp[i] != ch) goto Done; + // } + // textSpanPos += iterations; + + Label conditionLabel = DefineLabel(); + Label bodyLabel = DefineLabel(); + LocalBuilder iterationLocal = RentInt32Local(); + LocalBuilder spanLocal = RentReadOnlySpanCharLocal(); + + Ldloca(textSpanLocal); + Ldc(textSpanPos); + Ldc(iterations); + Call(s_spanSliceIntIntMethod); + Stloc(spanLocal); + + Ldc(0); + Stloc(iterationLocal); + BrFar(conditionLabel); + + MarkLabel(bodyLabel); + EmitTimeoutCheck(); + + LocalBuilder tmpTextSpanLocal = textSpanLocal; // we want EmitSingleChar to refer to this temporary + int tmpTextSpanPos = textSpanPos; + textSpanLocal = spanLocal; + textSpanPos = 0; + EmitSingleChar(node, emitLengthCheck: false, offset: iterationLocal); + textSpanLocal = tmpTextSpanLocal; + textSpanPos = tmpTextSpanPos; + + Ldloc(iterationLocal); + Ldc(1); + Add(); + Stloc(iterationLocal); + + MarkLabel(conditionLabel); + Ldloc(iterationLocal); + Ldloca(spanLocal); + Call(s_spanGetLengthMethod); + BltFar(bodyLabel); + + ReturnReadOnlySpanCharLocal(spanLocal); + ReturnInt32Local(iterationLocal); + + textSpanPos += iterations; + } + } + + // Emits the code to handle a loop (repeater) with a fixed number of iterations. + // This is used both to handle the case of A{5, 5} where the min and max are equal, + // and also to handle part of the case of A{3, 5}, where this method is called to + // handle the A{3, 3} portion, and then remaining A{0, 2} is handled separately. + void EmitNodeRepeater(RegexNode node) + { + int iterations = node.M; + Debug.Assert(iterations > 0); + + if (iterations == 1) + { + Debug.Assert(node.ChildCount() == 1); + EmitNode(node.Child(0)); + return; + } + + // Ensure textSpanPos is 0 prior to emitting the child. + TransferTextSpanPosToRunTextPos(); + + // for (int i = 0; i < iterations; i++) + // { + // TimeoutCheck(); + // if (textSpan[textSpanPos] != ch) goto Done; + // } + + Label conditionLabel = DefineLabel(); + Label bodyLabel = DefineLabel(); + LocalBuilder iterationLocal = RentInt32Local(); + + Ldc(0); + Stloc(iterationLocal); + BrFar(conditionLabel); + + MarkLabel(bodyLabel); + EmitTimeoutCheck(); + + Debug.Assert(node.ChildCount() == 1); + Debug.Assert(textSpanPos == 0); + EmitNode(node.Child(0)); + TransferTextSpanPosToRunTextPos(); + + Ldloc(iterationLocal); + Ldc(1); + Add(); + Stloc(iterationLocal); + + MarkLabel(conditionLabel); + Ldloc(iterationLocal); + Ldc(iterations); + BltFar(bodyLabel); + + ReturnInt32Local(iterationLocal); + } + + // Emits the code to handle a non-backtracking, variable-length loop around a single character comparison. + void EmitAtomicSingleCharLoop(RegexNode node) + { + Debug.Assert( + node.Type == RegexNode.Oneloopatomic || + node.Type == RegexNode.Notoneloopatomic || + node.Type == RegexNode.Setloopatomic); + Debug.Assert(node.M < int.MaxValue); + + // First generate the code to handle the required number of iterations. + if (node.M == node.N) + { + EmitSingleCharRepeater(node); + return; + } + + Debug.Assert(node.N > node.M); + int minIterations = node.M; + int maxIterations = node.N; + + LocalBuilder iterationLocal = RentInt32Local(); + + Label originalDoneLabel = doneLabel; + doneLabel = DefineLabel(); + + if (node.Type == RegexNode.Notoneloopatomic && maxIterations == int.MaxValue && !IsCaseInsensitive(node)) + { + // For Notoneloopatomic, we're looking for a specific character, as everything until we find + // it is consumed by the loop. If we're unbounded, such as with ".*" and if we're case-sensitive, + // we can use the vectorized IndexOf to do the search, rather than open-coding it. (In the future, + // we could consider using IndexOf with StringComparison for case insensitivity.) + + // int i = textSpan.Slice(textSpanPos).IndexOf(char); + if (textSpanPos > 0) + { + Ldloca(textSpanLocal); + Ldc(textSpanPos); + Call(s_spanSliceIntMethod); + } + else + { + Ldloc(textSpanLocal); + } + Ldc(node.Ch); + Call(s_spanIndexOf); + Stloc(iterationLocal); + + // if (i != -1) goto doneLabel; + Ldloc(iterationLocal); + Ldc(-1); + BneFar(doneLabel); - Ldloc(cLocal); - Ldc(1); - Sub(); - Stloc(cLocal); + // i = textSpan.Length - textSpanPos; + Ldloca(textSpanLocal); + Call(s_spanGetLengthMethod); + if (textSpanPos > 0) + { + Ldc(textSpanPos); + Sub(); + } + Stloc(iterationLocal); + } + else + { + // For everything else, do a normal loop. - Leftcharnext(); + // Transfer text pos to runtextpos to help with bounds check elimination on the loop. + TransferTextSpanPosToRunTextPos(); - EmitCallCharInClass(_fcPrefix.GetValueOrDefault().Prefix, _fcPrefix.GetValueOrDefault().CaseInsensitive, charInClassLocal); - BrtrueFar(l2); + Label conditionLabel = DefineLabel(); + Label bodyLabel = DefineLabel(); - MarkLabel(l5); + // int i = 0; + Ldc(0); + Stloc(iterationLocal); + BrFar(conditionLabel); + + // Body: + // TimeoutCheck(); + MarkLabel(bodyLabel); + EmitTimeoutCheck(); + + // if ((uint)i >= (uint)textSpan.Length) goto doneLabel; + Ldloc(iterationLocal); + Ldloca(textSpanLocal); + Call(s_spanGetLengthMethod); + BgeUnFar(doneLabel); + + // if (textSpan[i] != ch) goto Done; + Ldloca(textSpanLocal); + Ldloc(iterationLocal); + Call(s_spanGetItemMethod); + LdindU2(); + switch (node.Type) + { + case RegexNode.Oneloopatomic: + if (IsCaseInsensitive(node)) CallToLower(); + Ldc(node.Ch); + BneFar(doneLabel); + break; + case RegexNode.Notoneloopatomic: + if (IsCaseInsensitive(node)) CallToLower(); + Ldc(node.Ch); + BeqFar(doneLabel); + break; + case RegexNode.Setloopatomic: + LocalBuilder setScratchLocal = RentInt32Local(); + EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal); + ReturnInt32Local(setScratchLocal); + BrfalseFar(doneLabel); + break; + } - Ldloc(cLocal); - Ldc(0); - BgtFar(l1); + // i++; + Ldloc(iterationLocal); + Ldc(1); + Add(); + Stloc(iterationLocal); - Ldc(0); - BrFar(l3); + // if (i >= maxIterations) goto doneLabel; + MarkLabel(conditionLabel); + if (maxIterations != int.MaxValue) + { + Ldloc(iterationLocal); + Ldc(maxIterations); + BltFar(bodyLabel); + } + else + { + BrFar(bodyLabel); + } + } - MarkLabel(l2); + // Done: + MarkLabel(doneLabel); + doneLabel = originalDoneLabel; // Restore the original done label - Ldloc(_runtextposLocal); - Ldc(1); - Sub(_code.RightToLeft); - Stloc(_runtextposLocal); - Ldc(1); + // Check to ensure we've found at least min iterations. + if (minIterations > 0) + { + Ldloc(iterationLocal); + Ldc(minIterations); + BltFar(doneLabel); + } - MarkLabel(l3); + // Now that we've completed our optional iterations, advance the text span + // and runtextpos by the number of iterations completed. - Mvlocfld(_runtextposLocal, s_runtextposField); - Ret(); + // textSpan = textSpan.Slice(i); + Ldloca(textSpanLocal); + Ldloc(iterationLocal); + Call(s_spanSliceIntMethod); + Stloc(textSpanLocal); - MarkLabel(l4); - Ldc(0); - Ret(); + // runtextpos += i; + Ldloc(runtextposLocal); + Ldloc(iterationLocal); + Add(); + Stloc(runtextposLocal); + + ReturnInt32Local(iterationLocal); } - else // for left-to-right, use span to avoid bounds checks when doing normal forward iteration recognized by the JIT - { - LocalBuilder charInClassLocal = _temp1Local; - LocalBuilder iLocal = _temp2Local; - _temp3Local = DeclareReadOnlySpanChar(); - LocalBuilder textSpanLocal = _temp3Local; - Label returnFalseLabel = DefineLabel(); - Label checkSpanLengthLabel = DefineLabel(); - Label loopBody = DefineLabel(); - Label charNotInClassLabel = DefineLabel(); + // Emits the code to handle a non-backtracking, variable-length loop around another node. + void EmitAtomicNodeLoop(RegexNode node) + { + Debug.Assert(node.Type == RegexNode.Loop); + Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)); + Debug.Assert(node.M < int.MaxValue); - // string runtext = this.runtext - Mvfldloc(s_runtextField, _runtextLocal); + if (node.M == node.N) + { + EmitNodeRepeater(node); + return; + } - // if (runtextend - runtextpos > 0) - Ldthisfld(s_runtextendField); - Ldthisfld(s_runtextposField); - Sub(); - Ldc(0); - BleFar(returnFalseLabel); + LocalBuilder iterationLocal = RentInt32Local(); - // ReadOnlySpan span = runtext.AsSpan(runtextpos, runtextend - runtextpos); - Ldloc(_runtextLocal); - Ldthisfld(s_runtextposField); - Ldthisfld(s_runtextendField); - Ldthisfld(s_runtextposField); - Sub(); - Call(s_stringAsSpanMethod); - Stloc(textSpanLocal); + Label originalDoneLabel = doneLabel; + doneLabel = DefineLabel(); - // for (int i = 0; - Ldc(0); - Stloc(iLocal); - BrFar(checkSpanLengthLabel); + // We might loop any number of times. In order to ensure this loop + // and subsequent code sees textSpanPos the same regardless, we always need it to contain + // the same value, and the easiest such value is 0. So, we transfer + // textSpanPos to runtextpos, and ensure that any path out of here has + // textSpanPos as 0. + TransferTextSpanPosToRunTextPos(); - // if (CharInClass(span[i], "...")) - MarkLabel(loopBody); - Ldloca(textSpanLocal); - Ldloc(iLocal); - Call(s_spanGetItemMethod); - LdindU2(); - EmitCallCharInClass(_fcPrefix.GetValueOrDefault().Prefix, _fcPrefix.GetValueOrDefault().CaseInsensitive, charInClassLocal); - BrfalseFar(charNotInClassLabel); + Label conditionLabel = DefineLabel(); + Label bodyLabel = DefineLabel(); - // runtextpos += i; return true; - Ldthis(); - Ldthisfld(s_runtextposField); - Ldloc(iLocal); - Add(); - Stfld(s_runtextposField); - Ldc(1); - Ret(); + Debug.Assert(node.N > node.M); + int minIterations = node.M; + int maxIterations = node.N; - // for (...; ...; i++) - MarkLabel(charNotInClassLabel); - Ldloc(iLocal); + // int i = 0; + Ldc(0); + Stloc(iterationLocal); + BrFar(conditionLabel); + + // Body: + // TimeoutCheck(); + // if (!match) goto Done; + MarkLabel(bodyLabel); + EmitTimeoutCheck(); + + // Iteration body + Label successfulIterationLabel = DefineLabel(); + + Label prevDone = doneLabel; + doneLabel = DefineLabel(); + + // Save off runtextpos. + LocalBuilder startingRunTextPosLocal = RentInt32Local(); + Ldloc(runtextposLocal); + Stloc(startingRunTextPosLocal); + + // Emit the child. + Debug.Assert(textSpanPos == 0); + EmitNode(node.Child(0)); + TransferTextSpanPosToRunTextPos(); // ensure textSpanPos remains 0 + Br(successfulIterationLabel); // iteration succeeded + + // If the generated code gets here, the iteration failed. + // Reset state, branch to done. + MarkLabel(doneLabel); + doneLabel = prevDone; // reset done label + Ldloc(startingRunTextPosLocal); + Stloc(runtextposLocal); + ReturnInt32Local(startingRunTextPosLocal); + BrFar(doneLabel); + + // Successful iteration. + MarkLabel(successfulIterationLabel); + + // i++; + Ldloc(iterationLocal); Ldc(1); Add(); - Stloc(iLocal); + Stloc(iterationLocal); - // for (...; i < span.Length; ...); - MarkLabel(checkSpanLengthLabel); - Ldloc(iLocal); - Ldloca(textSpanLocal); - Call(s_spanGetLengthMethod); - BltFar(loopBody); + // if (i >= maxIterations) goto doneLabel; + MarkLabel(conditionLabel); + if (maxIterations != int.MaxValue) + { + Ldloc(iterationLocal); + Ldc(maxIterations); + BltFar(bodyLabel); + } + else + { + BrFar(bodyLabel); + } - // runtextpos = runtextend; - Ldthis(); - Ldthisfld(s_runtextendField); - Stfld(s_runtextposField); + // Done: + MarkLabel(doneLabel); + doneLabel = originalDoneLabel; // Restore the original done label - // return false; - MarkLabel(returnFalseLabel); - Ldc(0); - Ret(); - } - } + // Check to ensure we've found at least min iterations. + if (minIterations > 0) + { + Ldloc(iterationLocal); + Ldc(minIterations); + BltFar(doneLabel); + } - /// Generates a very simple method that sets the _trackcount field. - protected void GenerateInitTrackCount() - { - Ldthis(); - Ldc(_trackcount); - Stfld(s_runtrackcountField); - Ret(); + ReturnInt32Local(iterationLocal); + } } - /// Declares a local int. - private LocalBuilder DeclareInt() => _ilg!.DeclareLocal(typeof(int)); - - /// Declares a local CultureInfo. - private LocalBuilder? DeclareCultureInfo() => _ilg!.DeclareLocal(typeof(CultureInfo)); // cache local variable to avoid unnecessary TLS - - /// Declares a local int[]. - private LocalBuilder DeclareIntArray() => _ilg!.DeclareLocal(typeof(int[])); - - /// Declares a local string. - private LocalBuilder DeclareString() => _ilg!.DeclareLocal(typeof(string)); - - private LocalBuilder DeclareReadOnlySpanChar() => _ilg!.DeclareLocal(typeof(ReadOnlySpan)); - /// Generates the code for "RegexRunner.Go". protected void GenerateGo() { + // Generate backtrack-free code when we're dealing with simpler regexes. + if (TryGenerateNonBacktrackingGo(_code!.Tree.Root)) + { + return; + } + + // We're dealing with a regex more complicated that the fast-path non-backtracking + // implementation can handle. Do the full-fledged thing. + // declare some locals - _runtextposLocal = DeclareInt(); + _runtextposLocal = DeclareInt32(); _runtextLocal = DeclareString(); - _runtrackposLocal = DeclareInt(); - _runtrackLocal = DeclareIntArray(); - _runstackposLocal = DeclareInt(); - _runstackLocal = DeclareIntArray(); - _temp1Local = DeclareInt(); - _temp2Local = DeclareInt(); - _temp3Local = DeclareInt(); + _runtrackposLocal = DeclareInt32(); + _runtrackLocal = DeclareInt32Array(); + _runstackposLocal = DeclareInt32(); + _runstackLocal = DeclareInt32Array(); + _temp1Local = DeclareInt32(); + _temp2Local = DeclareInt32(); + _temp3Local = DeclareInt32(); if (_hasTimeout) { - _loopTimeoutCounterLocal = DeclareInt(); + _loopTimeoutCounterLocal = DeclareInt32(); } - _runtextbegLocal = DeclareInt(); - _runtextendLocal = DeclareInt(); - _runtextstartLocal = DeclareInt(); - - _cultureLocal = null; - if (!_options.HasFlag(RegexOptions.CultureInvariant)) - { - bool needsCulture = _options.HasFlag(RegexOptions.IgnoreCase); - if (!needsCulture) - { - for (int codepos = 0; codepos < _codes!.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) - { - if ((_codes[codepos] & RegexCode.Ci) == RegexCode.Ci) - { - needsCulture = true; - break; - } - } - } + _runtextbegLocal = DeclareInt32(); + _runtextendLocal = DeclareInt32(); - if (needsCulture) - { - _cultureLocal = DeclareCultureInfo(); - } - } + InitializeCultureForGoIfNecessary(); // clear some tables @@ -1320,29 +2605,37 @@ protected void GenerateGo() // emit the code! - // cache CultureInfo in local variable which saves excessive thread local storage accesses - if (_cultureLocal != null) - { - InitLocalCultureInfo(); - } - GenerateForwardSection(); GenerateMiddleSection(); GenerateBacktrackSection(); } -#if DEBUG - /// Debug.WriteLine - private static readonly MethodInfo? s_debugWriteLine = typeof(Debug).GetMethod("WriteLine", new Type[] { typeof(string) }); - - /// Debug only: emit code to print out a message. - private void Message(string str) + private void InitializeCultureForGoIfNecessary() { - Ldstr(str); - Call(s_debugWriteLine!); - } + _cultureLocal = null; + if ((_options & RegexOptions.CultureInvariant) == 0) + { + bool needsCulture = (_options & RegexOptions.IgnoreCase) != 0; + if (!needsCulture) + { + for (int codepos = 0; codepos < _codes!.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) + { + if ((_codes[codepos] & RegexCode.Ci) == RegexCode.Ci) + { + needsCulture = true; + break; + } + } + } -#endif + if (needsCulture) + { + // cache CultureInfo in local variable which saves excessive thread local storage accesses + _cultureLocal = DeclareCultureInfo(); + InitLocalCultureInfo(); + } + } + } /// /// The main translation function. It translates the logic for a single opcode at @@ -1361,37 +2654,9 @@ private void GenerateOneCode() { #if DEBUG if ((_options & RegexOptions.Debug) != 0) - { - Mvlocfld(_runtextposLocal!, s_runtextposField); - Mvlocfld(_runtrackposLocal!, s_runtrackposField); - Mvlocfld(_runstackposLocal!, s_runstackposField); - Ldthis(); - Callvirt(s_dumpStateM); - - var sb = new StringBuilder(); - if (_backpos > 0) - { - sb.AppendFormat("{0:D6} ", _backpos); - } - else - { - sb.Append(" "); - } - sb.Append(_code!.OpcodeDescription(_codepos)); - - if ((_regexopcode & RegexCode.Back) != 0) - { - sb.Append(" Back"); - } - - if ((_regexopcode & RegexCode.Back2) != 0) - { - sb.Append(" Back2"); - } - - Message(sb.ToString()); - } + DumpBacktracking(); #endif + LocalBuilder charInClassLocal; // Before executing any RegEx code in the unrolled loop, @@ -2241,10 +3506,7 @@ private void GenerateOneCode() //: if (Str[i] != Rightcharnext()) //: break Backward; { - int i; - string str; - - str = _strings![Operand(0)]; + string str = _strings![Operand(0)]; Ldc(str.Length); Ldloc(_runtextendLocal!); @@ -2253,7 +3515,7 @@ private void GenerateOneCode() BgtFar(_backtrack); // unroll the string - for (i = 0; i < str.Length; i++) + for (int i = 0; i < str.Length; i++) { Ldloc(_runtextLocal!); Ldloc(_runtextposLocal!); @@ -2289,10 +3551,7 @@ private void GenerateOneCode() //: if (Str[--c] != Leftcharnext()) //: break Backward; { - int i; - string str; - - str = _strings![Operand(0)]; + string str = _strings![Operand(0)]; Ldc(str.Length); Ldloc(_runtextposLocal!); @@ -2301,7 +3560,7 @@ private void GenerateOneCode() BgtFar(_backtrack); // unroll the string - for (i = str.Length; i > 0;) + for (int i = str.Length; i > 0;) { i--; Ldloc(_runtextLocal!); @@ -2509,10 +3768,7 @@ private void GenerateOneCode() if (Code() == RegexCode.Setrep) { - if (_hasTimeout) - { - EmitTimeoutCheck(); - } + EmitTimeoutCheck(); EmitCallCharInClass(_strings![Operand(0)], IsCaseInsensitive(), charInClassLocal); BrfalseFar(_backtrack); } @@ -2558,6 +3814,18 @@ private void GenerateOneCode() case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl: case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl: case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Oneloopatomic: + case RegexCode.Notoneloopatomic: + case RegexCode.Setloopatomic: + case RegexCode.Oneloopatomic | RegexCode.Rtl: + case RegexCode.Notoneloopatomic | RegexCode.Rtl: + case RegexCode.Setloopatomic | RegexCode.Rtl: + case RegexCode.Oneloopatomic | RegexCode.Ci: + case RegexCode.Notoneloopatomic | RegexCode.Ci: + case RegexCode.Setloopatomic | RegexCode.Ci: + case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl: //: int c = Operand(1); //: if (c > Rightchars()) //: c = Rightchars(); @@ -2620,7 +3888,7 @@ private void GenerateOneCode() Dup(); Stloc(cLocal); Ldc(0); - if (Code() == RegexCode.Setloop) + if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) { BleFar(l2); } @@ -2638,12 +3906,9 @@ private void GenerateOneCode() Rightcharnext(); } - if (Code() == RegexCode.Setloop) + if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) { - if (_hasTimeout) - { - EmitTimeoutCheck(); - } + EmitTimeoutCheck(); EmitCallCharInClass(_strings![Operand(0)], IsCaseInsensitive(), charInClassLocal); BrtrueFar(l1); } @@ -2655,12 +3920,13 @@ private void GenerateOneCode() } Ldc(Operand(0)); - if (Code() == RegexCode.Oneloop) + if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic) { Beq(l1); } else { + Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic); Bne(l1); } } @@ -2671,25 +3937,29 @@ private void GenerateOneCode() Stloc(_runtextposLocal!); MarkLabel(l2); - Ldloc(lenLocal); - Ldloc(cLocal); - Ble(AdvanceLabel()); - ReadyPushTrack(); - Ldloc(lenLocal); - Ldloc(cLocal); - Sub(); - Ldc(1); - Sub(); - DoPush(); + if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic) + { + Ldloc(lenLocal); + Ldloc(cLocal); + Ble(AdvanceLabel()); - ReadyPushTrack(); - Ldloc(_runtextposLocal!); - Ldc(1); - Sub(IsRightToLeft()); - DoPush(); + ReadyPushTrack(); + Ldloc(lenLocal); + Ldloc(cLocal); + Sub(); + Ldc(1); + Sub(); + DoPush(); - Track(); + ReadyPushTrack(); + Ldloc(_runtextposLocal!); + Ldc(1); + Sub(IsRightToLeft()); + DoPush(); + + Track(); + } break; } @@ -2937,11 +4207,20 @@ private void EmitCallCharInClass(string charClass, bool caseInsensitive, LocalBu charClass[RegexCharClass.CategoryLengthIndex] == 0 && // must not have any categories charClass[RegexCharClass.SetStartIndex] < charClass[RegexCharClass.SetStartIndex + 1]) // valid range { - // (uint)ch - charClass[3] < charClass[4] - charClass[3] - Ldc(charClass[RegexCharClass.SetStartIndex]); - Sub(); - Ldc(charClass[RegexCharClass.SetStartIndex + 1] - charClass[RegexCharClass.SetStartIndex]); - CltUn(); + if (RegexCharClass.IsSingleton(charClass) || RegexCharClass.IsSingletonInverse(charClass)) + { + // ch == charClass[3] + Ldc(charClass[3]); + Ceq(); + } + else + { + // (uint)ch - charClass[3] < charClass[4] - charClass[3] + Ldc(charClass[RegexCharClass.SetStartIndex]); + Sub(); + Ldc(charClass[RegexCharClass.SetStartIndex + 1] - charClass[RegexCharClass.SetStartIndex]); + CltUn(); + } // Negate the answer if the negation flag was set if (RegexCharClass.IsNegated(charClass)) @@ -2967,13 +4246,15 @@ private void EmitCallCharInClass(string charClass, bool caseInsensitive, LocalBu // endianness perspective because the compilation happens on the same machine // that runs the compiled code. If that were to ever change, this would need // to be revisited. String length is 8 chars == 16 bytes == 128 bits. - string bitVectorString = string.Create(8, charClass, (dest, charClass) => + string bitVectorString = string.Create(8, (charClass, invariant), (dest, state) => { for (int i = 0; i < 128; i++) { char c = (char)i; - if (RegexCharClass.CharInClass(c, charClass) || - (invariant && char.IsUpper(c) && RegexCharClass.CharInClass(char.ToLowerInvariant(c), charClass))) + bool isSet = state.invariant ? + RegexCharClass.CharInClass(char.ToLowerInvariant(c), state.charClass) : + RegexCharClass.CharInClass(c, state.charClass); + if (isSet) { dest[i >> 4] |= (char)(1 << (i & 0xF)); } @@ -2988,12 +4269,7 @@ private void EmitCallCharInClass(string charClass, bool caseInsensitive, LocalBu // 3. Evaluate CharInClass on all ~65K inputs. This is relatively expensive, impacting startup costs. // We currently go with (2). We may sometimes generate a fallback when we don't need one, but the cost of // doing so once in a while is minimal. - bool asciiOnly = - charClass.Length > RegexCharClass.SetStartIndex && - charClass[RegexCharClass.CategoryLengthIndex] == 0 && // if there are any categories, assume there's unicode - charClass[RegexCharClass.SetLengthIndex] % 2 == 0 && // range limits must come in pairs - !RegexCharClass.IsNegated(charClass) && // if there's negation, assume there's unicode - !RegexCharClass.IsSubtraction(charClass); // if it's subtraction, assume there's unicode + bool asciiOnly = RegexCharClass.CanEasilyEnumerateSetContents(charClass); if (asciiOnly) { for (int i = RegexCharClass.SetStartIndex; i < charClass.Length; i++) @@ -3062,7 +4338,12 @@ private void EmitCallCharInClass(string charClass, bool caseInsensitive, LocalBu /// Emits a timeout check. private void EmitTimeoutCheck() { - Debug.Assert(_hasTimeout && _loopTimeoutCounterLocal != null); + if (!_hasTimeout) + { + return; + } + + Debug.Assert(_loopTimeoutCounterLocal != null); // Increment counter for each loop iteration. Ldloc(_loopTimeoutCounterLocal); @@ -3080,5 +4361,42 @@ private void EmitTimeoutCheck() Callvirt(s_checkTimeoutMethod); MarkLabel(label); } + +#if DEBUG + /// Emit code to print out the current state of the runner. + [ExcludeFromCodeCoverage] + private void DumpBacktracking() + { + Mvlocfld(_runtextposLocal!, s_runtextposField); + Mvlocfld(_runtrackposLocal!, s_runtrackposField); + Mvlocfld(_runstackposLocal!, s_runstackposField); + Ldthis(); + Callvirt(s_dumpStateM); + + var sb = new StringBuilder(); + if (_backpos > 0) + { + sb.AppendFormat("{0:D6} ", _backpos); + } + else + { + sb.Append(" "); + } + sb.Append(_code!.OpcodeDescription(_codepos)); + + if ((_regexopcode & RegexCode.Back) != 0) + { + sb.Append(" Back"); + } + + if ((_regexopcode & RegexCode.Back2) != 0) + { + sb.Append(" Back2"); + } + + Ldstr(sb.ToString()); + Call(s_debugWriteLine!); + } +#endif } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs index b69a6e8c41934..ff958a4e151ad 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs @@ -13,6 +13,7 @@ using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Globalization; namespace System.Text.RegularExpressions @@ -84,7 +85,7 @@ public static RegexPrefix Prefix(RegexTree tree) while (true) { - switch (curNode.NType) + switch (curNode.Type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) @@ -94,13 +95,14 @@ public static RegexPrefix Prefix(RegexTree tree) } break; - case RegexNode.Greedy: + case RegexNode.Atomic: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; continue; case RegexNode.Oneloop: + case RegexNode.Oneloopatomic: case RegexNode.Onelazy: // In release, cutoff at a length to which we can still reasonably construct a string @@ -165,7 +167,7 @@ public static int Anchors(RegexTree tree) while (true) { - switch (curNode.NType) + switch (curNode.Type) { case RegexNode.Concatenate: if (curNode.ChildCount() > 0) @@ -175,7 +177,7 @@ public static int Anchors(RegexTree tree) } break; - case RegexNode.Greedy: + case RegexNode.Atomic: case RegexNode.Capture: curNode = curNode.Child(0); concatNode = null; @@ -189,7 +191,7 @@ public static int Anchors(RegexTree tree) case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: - return result | AnchorFromType(curNode.NType); + return result | AnchorFromType(curNode.Type); case RegexNode.Empty: case RegexNode.Require: @@ -225,6 +227,7 @@ private static int AnchorFromType(int type) => }; #if DEBUG + [ExcludeFromCodeCoverage] public static string AnchorDescription(int anchors) { StringBuilder sb = new StringBuilder(); @@ -295,19 +298,20 @@ private RegexFC PopFC() while (true) { - if (curNode.Children == null) + int curNodeChildCount = curNode.ChildCount(); + if (curNodeChildCount == 0) { // This is a leaf node - CalculateFC(curNode.NType, curNode, 0); + CalculateFC(curNode.Type, curNode, 0); } - else if (curChild < curNode.Children.Count && !_skipAllChildren) + else if (curChild < curNodeChildCount && !_skipAllChildren) { // This is an interior node, and we have more children to analyze - CalculateFC(curNode.NType | BeforeChild, curNode, curChild); + CalculateFC(curNode.Type | BeforeChild, curNode, curChild); if (!_skipchild) { - curNode = curNode.Children[curChild]; + curNode = curNode.Child(curChild); // this stack is how we get a depth first walk of the tree. PushInt(curChild); curChild = 0; @@ -330,7 +334,7 @@ private RegexFC PopFC() curChild = PopInt(); curNode = curNode.Next; - CalculateFC(curNode!.NType | AfterChild, curNode, curChild); + CalculateFC(curNode!.Type | AfterChild, curNode, curChild); if (_failed) return null; @@ -353,16 +357,8 @@ private RegexFC PopFC() /// private void CalculateFC(int NodeType, RegexNode node, int CurIndex) { - bool ci = false; - bool rtl = false; - - if (NodeType <= RegexNode.Ref) - { - if ((node.Options & RegexOptions.IgnoreCase) != 0) - ci = true; - if ((node.Options & RegexOptions.RightToLeft) != 0) - rtl = true; - } + bool ci = (node.Options & RegexOptions.IgnoreCase) != 0; + bool rtl = (node.Options & RegexOptions.RightToLeft) != 0; switch (NodeType) { @@ -426,8 +422,8 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) case RegexNode.Group | AfterChild: case RegexNode.Capture | BeforeChild: case RegexNode.Capture | AfterChild: - case RegexNode.Greedy | BeforeChild: - case RegexNode.Greedy | AfterChild: + case RegexNode.Atomic | BeforeChild: + case RegexNode.Atomic | AfterChild: break; case RegexNode.Require | BeforeChild: @@ -446,11 +442,13 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) break; case RegexNode.Oneloop: + case RegexNode.Oneloopatomic: case RegexNode.Onelazy: PushFC(new RegexFC(node.Ch, false, node.M == 0, ci)); break; case RegexNode.Notoneloop: + case RegexNode.Notoneloopatomic: case RegexNode.Notonelazy: PushFC(new RegexFC(node.Ch, true, node.M == 0, ci)); break; @@ -469,6 +467,7 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) break; case RegexNode.Setloop: + case RegexNode.Setloopatomic: case RegexNode.Setlazy: PushFC(new RegexFC(node.Str!, node.M == 0, ci)); break; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index bcbd0ad812f3d..a02da76c76121 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -6,6 +6,7 @@ // while consuming input. using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.Runtime.CompilerServices; @@ -451,71 +452,138 @@ protected override bool FindFirstChar() // We now loop through looking for the first matching character. This is a hot loop, so we lift out as many // branches as we can. Each operation requires knowing whether this is a) right-to-left vs left-to-right, and - // b) case-sensitive vs case-insensitive. So, we split it all out into 4 loops, for each combination of these. - // It's duplicated code, but it allows the inner loop to be much tighter than if everything were combined with - // multiple branches on each operation. We can also then use spans to avoid bounds checks in at least the forward - // iteration direction where the JIT is able to detect the pattern. + // b) case-sensitive vs case-insensitive, and c) a singleton or not. So, we split it all out into 8 loops, for + // each combination of these. It's duplicated code, but it allows the inner loop to be much tighter than if + // everything were combined with multiple branches on each operation. We can also then use spans to avoid bounds + // checks in at least the forward iteration direction where the JIT is able to detect the pattern. - if (!_rightToLeft) + if (RegexCharClass.IsSingleton(set)) { - ReadOnlySpan span = runtext.AsSpan(runtextpos, runtextend - runtextpos); - if (!_caseInsensitive) + char ch = RegexCharClass.SingletonChar(set); + + if (!_rightToLeft) { - // left-to-right, case-sensitive - for (int i = 0; i < span.Length; i++) + ReadOnlySpan span = runtext.AsSpan(runtextpos, runtextend - runtextpos); + if (!_caseInsensitive) { - if (RegexCharClass.CharInClass(span[i], set, ref _code.FCPrefixAsciiLookup)) + // singleton, left-to-right, case-sensitive + int i = runtext.AsSpan(runtextpos, runtextend - runtextpos).IndexOf(ch); + if (i >= 0) { runtextpos += i; return true; } } + else + { + // singleton, left-to-right, case-insensitive + TextInfo ti = _culture.TextInfo; + for (int i = 0; i < span.Length; i++) + { + if (ch == ti.ToLower(span[i])) + { + runtextpos += i; + return true; + } + } + } + + runtextpos = runtextend; } else { - // left-to-right, case-insensitive - TextInfo ti = _culture.TextInfo; - for (int i = 0; i < span.Length; i++) + if (!_caseInsensitive) { - if (RegexCharClass.CharInClass(ti.ToLower(span[i]), set, ref _code.FCPrefixAsciiLookup)) + // singleton, right-to-left, case-sensitive + for (int i = runtextpos - 1; i >= runtextbeg; i--) { - runtextpos += i; - return true; + if (ch == runtext![i]) + { + runtextpos = i + 1; + return true; + } + } + } + else + { + // singleton, right-to-left, case-insensitive + TextInfo ti = _culture.TextInfo; + for (int i = runtextpos - 1; i >= runtextbeg; i--) + { + if (ch == ti.ToLower(runtext![i])) + { + runtextpos = i + 1; + return true; + } } } - } - runtextpos = runtextend; + runtextpos = runtextbeg; + } } else { - if (!_caseInsensitive) + if (!_rightToLeft) { - // right-to-left, case-sensitive - for (int i = runtextpos - 1; i >= runtextbeg; i--) + ReadOnlySpan span = runtext.AsSpan(runtextpos, runtextend - runtextpos); + if (!_caseInsensitive) { - if (RegexCharClass.CharInClass(runtext![i], set, ref _code.FCPrefixAsciiLookup)) + // set, left-to-right, case-sensitive + for (int i = 0; i < span.Length; i++) { - runtextpos = i + 1; - return true; + if (RegexCharClass.CharInClass(span[i], set, ref _code.FCPrefixAsciiLookup)) + { + runtextpos += i; + return true; + } } } + else + { + // set, left-to-right, case-insensitive + TextInfo ti = _culture.TextInfo; + for (int i = 0; i < span.Length; i++) + { + if (RegexCharClass.CharInClass(ti.ToLower(span[i]), set, ref _code.FCPrefixAsciiLookup)) + { + runtextpos += i; + return true; + } + } + } + + runtextpos = runtextend; } else { - // right-to-left, case-insensitive - TextInfo ti = _culture.TextInfo; - for (int i = runtextpos - 1; i >= runtextbeg; i--) + if (!_caseInsensitive) { - if (RegexCharClass.CharInClass(ti.ToLower(runtext![i]), set, ref _code.FCPrefixAsciiLookup)) + // set, right-to-left, case-sensitive + for (int i = runtextpos - 1; i >= runtextbeg; i--) { - runtextpos = i + 1; - return true; + if (RegexCharClass.CharInClass(runtext![i], set, ref _code.FCPrefixAsciiLookup)) + { + runtextpos = i + 1; + return true; + } + } + } + else + { + // set, right-to-left, case-insensitive + TextInfo ti = _culture.TextInfo; + for (int i = runtextpos - 1; i >= runtextbeg; i--) + { + if (RegexCharClass.CharInClass(ti.ToLower(runtext![i]), set, ref _code.FCPrefixAsciiLookup)) + { + runtextpos = i + 1; + return true; + } } } - } - runtextpos = runtextbeg; + runtextpos = runtextbeg; + } } return false; @@ -1066,6 +1134,7 @@ protected override void Go() } case RegexCode.Oneloop: + case RegexCode.Oneloopatomic: { int c = Operand(1); @@ -1085,14 +1154,17 @@ protected override void Go() } } - if (c > i) + if (c > i && Operator() == RegexCode.Oneloop) + { TrackPush(c - i - 1, Textpos() - Bump()); + } advance = 2; continue; } case RegexCode.Notoneloop: + case RegexCode.Notoneloopatomic: { int c = Operand(1); @@ -1112,14 +1184,17 @@ protected override void Go() } } - if (c > i) + if (c > i && Operator() == RegexCode.Notoneloop) + { TrackPush(c - i - 1, Textpos() - Bump()); + } advance = 2; continue; } case RegexCode.Setloop: + case RegexCode.Setloopatomic: { int c = Operand(1); @@ -1149,8 +1224,10 @@ protected override void Go() } } - if (c > i) + if (c > i && Operator() == RegexCode.Setloop) + { TrackPush(c - i - 1, Textpos() - Bump()); + } advance = 2; continue; @@ -1286,6 +1363,7 @@ protected override void Go() } #if DEBUG + [ExcludeFromCodeCoverage] internal override void DumpState() { base.DumpState(); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs index 3963cbc9a1f9d..a11d25f6147ca 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs @@ -10,8 +10,8 @@ namespace System.Text.RegularExpressions { internal sealed class RegexLWCGCompiler : RegexCompiler { - private static int s_regexCount = 0; private static readonly Type[] s_paramTypes = new Type[] { typeof(RegexRunner) }; + private static int s_regexCount = 0; /// The top-level driver. Initializes everything then calls the Generate* methods. public RegexRunnerFactory FactoryInstanceFromCode(RegexCode code, RegexOptions options, bool hasTimeout) @@ -32,16 +32,13 @@ public RegexRunnerFactory FactoryInstanceFromCode(RegexCode code, RegexOptions o DynamicMethod goMethod = DefineDynamicMethod("Go" + regexnumString, null, typeof(CompiledRegexRunner)); GenerateGo(); - DynamicMethod firstCharMethod = DefineDynamicMethod("FindFirstChar" + regexnumString, typeof(bool), typeof(CompiledRegexRunner)); + DynamicMethod findFirstCharMethod = DefineDynamicMethod("FindFirstChar" + regexnumString, typeof(bool), typeof(CompiledRegexRunner)); GenerateFindFirstChar(); - DynamicMethod trackCountMethod = DefineDynamicMethod("InitTrackCount" + regexnumString, null, typeof(CompiledRegexRunner)); + DynamicMethod initTrackCountMethod = DefineDynamicMethod("InitTrackCount" + regexnumString, null, typeof(CompiledRegexRunner)); GenerateInitTrackCount(); - return new CompiledRegexRunnerFactory( - (Action)goMethod.CreateDelegate(typeof(Action)), - (Func)firstCharMethod.CreateDelegate(typeof(Func)), - (Action)trackCountMethod.CreateDelegate(typeof(Action))); + return new CompiledRegexRunnerFactory(goMethod, findFirstCharMethod, initTrackCountMethod); } /// Begins the definition of a new method (no args) with a specified return value. diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index e9e48724d05f3..dbd3339b5fe5c 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -41,6 +41,7 @@ using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Globalization; namespace System.Text.RegularExpressions @@ -77,6 +78,10 @@ internal sealed class RegexNode public const int EndZ = RegexCode.EndZ; // \Z public const int End = RegexCode.End; // \z + public const int Oneloopatomic = RegexCode.Oneloopatomic; // c,n (?> a*) + public const int Notoneloopatomic = RegexCode.Notoneloopatomic; // c,n (?> .*) + public const int Setloopatomic = RegexCode.Setloopatomic; // set,n (?> \d*) + // Interior nodes do not correspond to primitive operations, but // control structures compositing other operations @@ -95,49 +100,49 @@ internal sealed class RegexNode public const int Group = 29; // (?:) - noncapturing group public const int Require = 30; // (?=) (?<=) - lookahead and lookbehind assertions public const int Prevent = 31; // (?!) (?) - greedy subexpression + public const int Atomic = 32; // (?>) - atomic subexpression public const int Testref = 33; // (?(n) | ) - alternation, reference public const int Testgroup = 34; // (?(...) | )- alternation, expression - public int NType; - public List? Children; - public string? Str; - public char Ch; - public int M; - public int N; + private object? Children; + public int Type { get; private set; } + public string? Str { get; private set; } + public char Ch { get; private set; } + public int M { get; private set; } + public int N { get; private set; } public readonly RegexOptions Options; public RegexNode? Next; public RegexNode(int type, RegexOptions options) { - NType = type; + Type = type; Options = options; } public RegexNode(int type, RegexOptions options, char ch) { - NType = type; + Type = type; Options = options; Ch = ch; } public RegexNode(int type, RegexOptions options, string str) { - NType = type; + Type = type; Options = options; Str = str; } public RegexNode(int type, RegexOptions options, int m) { - NType = type; + Type = type; Options = options; M = m; } public RegexNode(int type, RegexOptions options, int m, int n) { - NType = type; + Type = type; Options = options; M = m; N = n; @@ -150,9 +155,9 @@ public bool UseOptionR() public RegexNode ReverseLeft() { - if (UseOptionR() && NType == Concatenate && Children != null) + if (UseOptionR() && Type == Concatenate && ChildCount() > 1) { - Children.Reverse(0, Children.Count); + ((List)Children!).Reverse(); } return this; @@ -163,11 +168,71 @@ public RegexNode ReverseLeft() /// private void MakeRep(int type, int min, int max) { - NType += (type - One); + Type += (type - One); M = min; N = max; } + /// Performs additional optimizations on an entire tree prior to being used. + internal RegexNode FinalOptimize() + { + RegexNode rootNode = this; + + // If we find backtracking construct at the end of the regex, we can instead make it non-backtracking, + // since nothing would ever backtrack into it anyway. Doing this then makes the construct available + // to implementations that don't support backtracking. + if ((Options & RegexOptions.RightToLeft) == 0 && // only apply optimization when LTR to avoid needing additional code for the rarer RTL case + (Options & RegexOptions.Compiled) != 0) // only apply when we're compiling, as that's the only time it would make a meaningful difference + { + RegexNode node = rootNode; + while (true) + { + switch (node.Type) + { + case Oneloop: + node.Type = Oneloopatomic; + break; + + case Notoneloop: + node.Type = Notoneloopatomic; + break; + + case Setloop: + node.Type = Setloopatomic; + break; + + case Capture: + case Concatenate: + RegexNode existingChild = node.Child(node.ChildCount() - 1); + switch (existingChild.Type) + { + default: + node = existingChild; + break; + + case Alternate: + case Loop: + case Lazyloop: + var atomic = new RegexNode(Atomic, Options); + atomic.AddChild(existingChild); + node.ReplaceChild(node.ChildCount() - 1, atomic); + break; + } + continue; + + case Atomic: + node = node.Child(0); + continue; + } + + break; + } + } + + // Done optimizing. Return the final tree. + return rootNode; + } + /// /// Removes redundant nodes from the subtree, and returns a reduced subtree. /// @@ -175,7 +240,7 @@ private RegexNode Reduce() { RegexNode n; - switch (Type()) + switch (Type) { case Alternate: n = ReduceAlternation(); @@ -187,7 +252,11 @@ private RegexNode Reduce() case Loop: case Lazyloop: - n = ReduceRep(); + n = ReduceLoops(); + break; + + case Atomic: + n = ReduceAtomic(); break; case Group: @@ -226,77 +295,176 @@ private RegexNode StripEnation(int emptyType) => /// private RegexNode ReduceGroup() { - RegexNode u; + RegexNode u = this; - for (u = this; u.Type() == Group;) + while (u.Type == Group) + { + Debug.Assert(u.ChildCount() == 1); u = u.Child(0); + } return u; } /// - /// Nested repeaters just get multiplied with each other if they're not - /// too lumpy + /// Simple optimization. If an atomic subexpression contains only a one/notone/set loop, + /// change it to be an atomic one/notone/set loop and remove the atomic node. /// - private RegexNode ReduceRep() + private RegexNode ReduceAtomic() + { + Debug.Assert(Type == Atomic); + Debug.Assert(ChildCount() == 1); + + RegexNode child = Child(0); + switch (child.Type) + { + case Oneloop: + child.Type = Oneloopatomic; + return child; + + case Notoneloop: + child.Type = Notoneloopatomic; + return child; + + case Setloop: + child.Type = Setloopatomic; + return child; + + case Oneloopatomic: + case Notoneloopatomic: + case Setloopatomic: + return child; + } + + return this; + } + + /// + /// Nested repeaters just get multiplied with each other if they're not too lumpy. + /// Other optimizations may have also resulted in {Lazy}loops directly containing + /// sets, ones, and notones, in which case they can be transformed into the corresponding + /// individual looping constructs. + /// + private RegexNode ReduceLoops() { RegexNode u = this; - RegexNode child; - int type = Type(); + int type = Type; + Debug.Assert(type == Loop || type == Lazyloop); + int min = M; int max = N; - while (true) + while (u.ChildCount() > 0) { - if (u.ChildCount() == 0) - break; - - child = u.Child(0); + RegexNode child = u.Child(0); // multiply reps of the same type only - if (child.Type() != type) + if (child.Type != type) { - int childType = child.Type(); + bool valid = false; + if (type == Loop) + { + switch (child.Type) + { + case Oneloop: + case Oneloopatomic: + case Notoneloop: + case Notoneloopatomic: + case Setloop: + case Setloopatomic: + valid = true; + break; + } + } + else // type == Lazyloop + { + switch (child.Type) + { + case Onelazy: + case Notonelazy: + case Setlazy: + valid = true; + break; + } + } - if (!(childType >= Oneloop && childType <= Setloop && type == Loop || - childType >= Onelazy && childType <= Setlazy && type == Lazyloop)) + if (!valid) + { break; + } } // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})? // [but things like (a {2,})+ are not too lumpy...] if (u.M == 0 && child.M > 1 || child.N < child.M * 2) + { break; + } u = child; + if (u.M > 0) + { u.M = min = ((int.MaxValue - 1) / u.M < min) ? int.MaxValue : u.M * min; + } + if (u.N > 0) + { u.N = max = ((int.MaxValue - 1) / u.N < max) ? int.MaxValue : u.N * max; + } + } + + if (min == int.MaxValue) + { + return new RegexNode(Nothing, Options); + } + + // If the Loop or Lazyloop now only has one child node and its a Set, One, or Notone, + // reduce to just Setloop/lazy, Oneloop/lazy, or Notoneloop/lazy. The parser will + // generally have only produced the latter, but other reductions could have exposed + // this. + if (u.ChildCount() == 1) + { + RegexNode child = u.Child(0); + switch (child.Type) + { + case One: + case Notone: + case Set: + child.MakeRep(u.Type == Lazyloop ? Onelazy : Oneloop, u.M, u.N); + u = child; + break; + } } - return min == int.MaxValue ? new RegexNode(Nothing, Options) : u; + return u; } /// - /// Simple optimization. If a set is an inverse singleton or empty, it's transformed accordingly. + /// Simple optimization. If a set is a singleton, an inverse singleton, or empty, it's transformed accordingly. /// private RegexNode ReduceSet() { - // Extract empty-set, one and not-one case as special + // Extract empty-set, one, and not-one case as special Debug.Assert(Str != null); if (RegexCharClass.IsEmpty(Str)) { - NType = Nothing; + Type = Nothing; Str = null; } + else if (RegexCharClass.IsSingleton(Str)) + { + Ch = RegexCharClass.SingletonChar(Str); + Str = null; + Type += (One - Set); + } else if (RegexCharClass.IsSingletonInverse(Str)) { Ch = RegexCharClass.SingletonChar(Str); Str = null; - NType += (Notone - Set); + Type += (Notone - Set); } return this; @@ -313,8 +481,16 @@ private RegexNode ReduceSet() /// private RegexNode ReduceAlternation() { - if (Children == null) + int childCount = ChildCount(); + if (childCount == 0) + { return new RegexNode(Nothing, Options); + } + + if (childCount == 1) + { + return Child(0); + } bool wasLastSet = false; bool lastNodeCannotMerge = false; @@ -325,29 +501,40 @@ private RegexNode ReduceAlternation() RegexNode at; RegexNode prev; - for (i = 0, j = 0; i < Children.Count; i++, j++) + List children = (List)Children!; + for (i = 0, j = 0; i < children.Count; i++, j++) { - at = Children[i]; + at = children[i]; if (j < i) - Children[j] = at; + children[j] = at; while (true) { - if (at.NType == Alternate) + if (at.Type == Alternate) { - for (int k = 0; k < at.Children!.Count; k++) - at.Children[k].Next = this; - - Children.InsertRange(i + 1, at.Children); + if (at.Children is List atChildren) + { + for (int k = 0; k < atChildren.Count; k++) + { + atChildren[k].Next = this; + } + children.InsertRange(i + 1, atChildren); + } + else + { + RegexNode atChild = (RegexNode)at.Children!; + atChild.Next = this; + children.Insert(i + 1, atChild); + } j--; } - else if (at.NType == Set || at.NType == One) + else if (at.Type == Set || at.Type == One) { // Cannot merge sets if L or I options differ, or if either are negated. optionsAt = at.Options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); - if (at.NType == Set) + if (at.Type == Set) { if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at.Str)) { @@ -369,10 +556,10 @@ private RegexNode ReduceAlternation() // The last node was a Set or a One, we're a Set or One and our options are the same. // Merge the two nodes. j--; - prev = Children[j]; + prev = children[j]; RegexCharClass prevCharClass; - if (prev.NType == One) + if (prev.Type == One) { prevCharClass = new RegexCharClass(); prevCharClass.AddChar(prev.Ch); @@ -382,7 +569,7 @@ private RegexNode ReduceAlternation() prevCharClass = RegexCharClass.Parse(prev.Str!); } - if (at.NType == One) + if (at.Type == One) { prevCharClass.AddChar(at.Ch); } @@ -392,10 +579,10 @@ private RegexNode ReduceAlternation() prevCharClass.AddCharClass(atCharClass); } - prev.NType = Set; + prev.Type = Set; prev.Str = prevCharClass.ToStringClass(); } - else if (at.NType == Nothing) + else if (at.Type == Nothing) { j--; } @@ -409,7 +596,9 @@ private RegexNode ReduceAlternation() } if (j < i) - Children.RemoveRange(j, i - j); + { + children.RemoveRange(j, i - j); + } return StripEnation(Nothing); } @@ -422,39 +611,54 @@ private RegexNode ReduceAlternation() /// private RegexNode ReduceConcatenation() { - if (Children == null) + int childCount = ChildCount(); + if (childCount == 0) + { return new RegexNode(Empty, Options); + } + + if (childCount == 1) + { + return Child(0); + } bool wasLastString = false; RegexOptions optionsLast = 0; - RegexOptions optionsAt; - int i; - int j; + int i, j; - for (i = 0, j = 0; i < Children.Count; i++, j++) + List children = (List)Children!; + for (i = 0, j = 0; i < children.Count; i++, j++) { - RegexNode at; - RegexNode prev; - - at = Children[i]; + RegexNode at = children[i]; if (j < i) - Children[j] = at; + { + children[j] = at; + } - if (at.NType == Concatenate && + if (at.Type == Concatenate && ((at.Options & RegexOptions.RightToLeft) == (Options & RegexOptions.RightToLeft))) { - for (int k = 0; k < at.Children!.Count; k++) - at.Children[k].Next = this; - - Children.InsertRange(i + 1, at.Children); + if (at.Children is List atChildren) + { + for (int k = 0; k < atChildren.Count; k++) + { + atChildren[k].Next = this; + } + children.InsertRange(i + 1, atChildren); + } + else + { + RegexNode atChild = (RegexNode)at.Children!; + atChild.Next = this; + children.Insert(i + 1, atChild); + } j--; } - else if (at.NType == Multi || - at.NType == One) + else if (at.Type == Multi || at.Type == One) { // Cannot merge strings if L or I options differ - optionsAt = at.Options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); + RegexOptions optionsAt = at.Options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); if (!wasLastString || optionsLast != optionsAt) { @@ -463,30 +667,24 @@ private RegexNode ReduceConcatenation() continue; } - prev = Children[--j]; + RegexNode prev = children[--j]; - if (prev.NType == One) + if (prev.Type == One) { - prev.NType = Multi; + prev.Type = Multi; prev.Str = prev.Ch.ToString(); } if ((optionsAt & RegexOptions.RightToLeft) == 0) { - if (at.NType == One) - prev.Str += at.Ch.ToString(); - else - prev.Str += at.Str; + prev.Str += (at.Type == One) ? at.Ch.ToString() : at.Str; } else { - if (at.NType == One) - prev.Str = at.Ch.ToString() + prev.Str; - else - prev.Str = at.Str + prev.Str; + prev.Str = (at.Type == One) ? at.Ch.ToString() + prev.Str : at.Str + prev.Str; } } - else if (at.NType == Empty) + else if (at.Type == Empty) { j--; } @@ -497,11 +695,152 @@ private RegexNode ReduceConcatenation() } if (j < i) - Children.RemoveRange(j, i - j); + { + children.RemoveRange(j, i - j); + } + + // Now try to convert as many loops as possible to be atomic to avoid unnecessary backtracking. + if ((Options & RegexOptions.RightToLeft) == 0) + { + ReduceConcatenateWithAutoAtomic(); + } + // If the concatenation is now empty, return an empty node, or if it's got a single child, return that child. + // Otherwise, return this. return StripEnation(Empty); } + /// + /// Finds oneloop and setloop nodes in the concatenation that can be automatically upgraded + /// to oneloopatomic and setloopatomic nodes. Such changes avoid potential useless backtracking. + /// This looks for cases like A*B, where A and B are known to not overlap: in such cases, + /// we can effectively convert this to (?>A*)B. + /// + private void ReduceConcatenateWithAutoAtomic() + { + Debug.Assert(Type == Concatenate); + Debug.Assert((Options & RegexOptions.RightToLeft) == 0); + Debug.Assert(Children is List); + + List children = (List)Children; + for (int i = 0; i < children.Count - 1; i++) + { + RegexNode node = children[i], subsequent = children[i + 1]; + + // Skip down the node past irrelevant capturing groups. We don't need to + // skip Groups, as they should have already been reduced away. + while (node.Type == Capture) + { + Debug.Assert(node.ChildCount() == 1); + node = node.Child(0); + } + Debug.Assert(node.Type != Group); + + // Skip the successor down to the guaranteed next node. + while (subsequent.ChildCount() > 0) + { + Debug.Assert(subsequent.Type != Group); + switch (subsequent.Type) + { + case Capture: + case Atomic: + case Require: + case Concatenate: + case Loop when subsequent.M > 0: + case Lazyloop when subsequent.M > 0: + subsequent = subsequent.Child(0); + continue; + } + + break; + } + + // If the two nodes don't agree on case-insensitivity, don't try to optimize. + // If they're both case sensitive or both case insensitive, then their tokens + // will be comparable. + if ((node.Options & RegexOptions.IgnoreCase) != (subsequent.Options & RegexOptions.IgnoreCase)) + { + continue; + } + + // If this node is a one/notone/setloop, see if it overlaps with its successor in the concatenation. + // If it doesn't, then we can upgrade it to being a one/notone/setloopatomic. + // Doing so avoids unnecessary backtracking. + switch (node.Type) + { + case Oneloop: + switch (subsequent.Type) + { + case One when node.Ch != subsequent.Ch: + case Onelazy when subsequent.M > 0 && node.Ch != subsequent.Ch: + case Oneloop when subsequent.M > 0 && node.Ch != subsequent.Ch: + case Oneloopatomic when subsequent.M > 0 && node.Ch != subsequent.Ch: + case Notone when node.Ch == subsequent.Ch: + case Notonelazy when subsequent.M > 0 && node.Ch == subsequent.Ch: + case Notoneloop when subsequent.M > 0 && node.Ch == subsequent.Ch: + case Notoneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch: + case Multi when node.Ch != subsequent.Str![0]: + case Set when !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): + case Setlazy when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): + case Setloop when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): + case Setloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): + case End: + case EndZ when node.Ch != '\n': + case Eol when node.Ch != '\n': + case Boundary when RegexCharClass.IsWordChar(node.Ch): + case Nonboundary when !RegexCharClass.IsWordChar(node.Ch): + case ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch): + case NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch): + node.Type = Oneloopatomic; + break; + } + break; + + case Notoneloop: + switch (subsequent.Type) + { + case One when node.Ch == subsequent.Ch: + case Onelazy when subsequent.M > 0 && node.Ch == subsequent.Ch: + case Oneloop when subsequent.M > 0 && node.Ch == subsequent.Ch: + case Oneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch: + case Multi when node.Ch == subsequent.Str![0]: + case End: + node.Type = Notoneloopatomic; + break; + } + break; + + case Setloop: + switch (subsequent.Type) + { + case One when !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case Onelazy when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case Oneloop when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case Oneloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case Notone when RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case Notonelazy when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case Notoneloop when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case Notoneloopatomic when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case Multi when !RegexCharClass.CharInClass(subsequent.Str![0], node.Str!): + case Set when !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): + case Setlazy when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): + case Setloop when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): + case Setloopatomic when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): + case End: + case EndZ when !RegexCharClass.CharInClass('\n', node.Str!): + case Eol when !RegexCharClass.CharInClass('\n', node.Str!): + case Boundary when node.Str == RegexCharClass.WordClass || node.Str == RegexCharClass.DigitClass: // TODO: Expand these with a more inclusive overlap check that considers categories + case Nonboundary when node.Str == RegexCharClass.NotWordClass || node.Str == RegexCharClass.NotDigitClass: + case ECMABoundary when node.Str == RegexCharClass.ECMAWordClass || node.Str == RegexCharClass.ECMADigitClass: + case NonECMABoundary when node.Str == RegexCharClass.NotECMAWordClass || node.Str == RegexCharClass.NotDigitClass: + node.Type = Setloopatomic; + break; + } + break; + } + } + } + public RegexNode MakeQuantifier(bool lazy, int min, int max) { if (min == 0 && max == 0) @@ -510,7 +849,7 @@ public RegexNode MakeQuantifier(bool lazy, int min, int max) if (min == 1 && max == 1) return this; - switch (NType) + switch (Type) { case One: case Notone: @@ -527,147 +866,201 @@ public RegexNode MakeQuantifier(bool lazy, int min, int max) public void AddChild(RegexNode newChild) { - if (Children == null) - Children = new List(4); - RegexNode reducedChild = newChild.Reduce(); - Children.Add(reducedChild); reducedChild.Next = this; + + if (Children is null) + { + Children = reducedChild; + } + else if (Children is RegexNode currentChild) + { + Children = new List() { currentChild, reducedChild }; + } + else + { + ((List)Children).Add(reducedChild); + } } - public RegexNode Child(int i) + public void ReplaceChild(int index, RegexNode newChild) { - return Children![i]; + Debug.Assert(Children != null); + Debug.Assert(index < ChildCount()); + + newChild.Next = this; + if (Children is RegexNode) + { + Children = newChild; + } + else + { + ((List)Children)[index] = newChild; + } } - public int ChildCount() + public RegexNode Child(int i) { - return Children == null ? 0 : Children.Count; + if (Children is RegexNode child) + { + return child; + } + + return ((List)Children!)[i]; } - public int Type() + public int ChildCount() { - return NType; + if (Children is null) + { + return 0; + } + + if (Children is List children) + { + return children.Count; + } + + Debug.Assert(Children is RegexNode); + return 1; } #if DEBUG - private const string Space = " "; - private static readonly string[] s_typeStr = new string[] { - "Onerep", "Notonerep", "Setrep", - "Oneloop", "Notoneloop", "Setloop", - "Onelazy", "Notonelazy", "Setlazy", - "One", "Notone", "Set", - "Multi", "Ref", - "Bol", "Eol", "Boundary", "Nonboundary", - "ECMABoundary", "NonECMABoundary", - "Beginning", "Start", "EndZ", "End", - "Nothing", "Empty", - "Alternate", "Concatenate", - "Loop", "Lazyloop", - "Capture", "Group", "Require", "Prevent", "Greedy", - "Testref", "Testgroup"}; - - private string Description() + [ExcludeFromCodeCoverage] + public string Description() { - StringBuilder ArgSb = new StringBuilder(); - - ArgSb.Append(s_typeStr[NType]); - - if ((Options & RegexOptions.ExplicitCapture) != 0) - ArgSb.Append("-C"); - if ((Options & RegexOptions.IgnoreCase) != 0) - ArgSb.Append("-I"); - if ((Options & RegexOptions.RightToLeft) != 0) - ArgSb.Append("-L"); - if ((Options & RegexOptions.Multiline) != 0) - ArgSb.Append("-M"); - if ((Options & RegexOptions.Singleline) != 0) - ArgSb.Append("-S"); - if ((Options & RegexOptions.IgnorePatternWhitespace) != 0) - ArgSb.Append("-X"); - if ((Options & RegexOptions.ECMAScript) != 0) - ArgSb.Append("-E"); - - switch (NType) + + string typeStr = Type switch + { + Oneloop => nameof(Oneloop), + Notoneloop => nameof(Notoneloop), + Setloop => nameof(Setloop), + Onelazy => nameof(Onelazy), + Notonelazy => nameof(Notonelazy), + Setlazy => nameof(Setlazy), + One => nameof(One), + Notone => nameof(Notone), + Set => nameof(Set), + Multi => nameof(Multi), + Ref => nameof(Ref), + Bol => nameof(Bol), + Eol => nameof(Eol), + Boundary => nameof(Boundary), + Nonboundary => nameof(Nonboundary), + ECMABoundary => nameof(ECMABoundary), + NonECMABoundary => nameof(NonECMABoundary), + Beginning => nameof(Beginning), + Start => nameof(Start), + EndZ => nameof(EndZ), + End => nameof(End), + Oneloopatomic => nameof(Oneloopatomic), + Notoneloopatomic => nameof(Notoneloopatomic), + Setloopatomic => nameof(Setloopatomic), + Nothing => nameof(Nothing), + Empty => nameof(Empty), + Lazyloop => nameof(Lazyloop), + Capture => nameof(Capture), + Group => nameof(Group), + Require => nameof(Require), + Prevent => nameof(Prevent), + Atomic => nameof(Atomic), + Testref => nameof(Testref), + Testgroup => nameof(Testgroup), + _ => "(unknown)" + }; + + var argSb = new StringBuilder().Append(typeStr); + + if ((Options & RegexOptions.ExplicitCapture) != 0) argSb.Append("-C"); + if ((Options & RegexOptions.IgnoreCase) != 0) argSb.Append("-I"); + if ((Options & RegexOptions.RightToLeft) != 0) argSb.Append("-L"); + if ((Options & RegexOptions.Multiline) != 0) argSb.Append("-M"); + if ((Options & RegexOptions.Singleline) != 0) argSb.Append("-S"); + if ((Options & RegexOptions.IgnorePatternWhitespace) != 0) argSb.Append("-X"); + if ((Options & RegexOptions.ECMAScript) != 0) argSb.Append("-E"); + + switch (Type) { case Oneloop: + case Oneloopatomic: case Notoneloop: + case Notoneloopatomic: case Onelazy: case Notonelazy: case One: case Notone: - ArgSb.Append("(Ch = " + RegexCharClass.CharDescription(Ch) + ")"); + argSb.Append("(Ch = " + RegexCharClass.CharDescription(Ch) + ")"); break; case Capture: - ArgSb.Append("(index = " + M.ToString(CultureInfo.InvariantCulture) + ", unindex = " + N.ToString(CultureInfo.InvariantCulture) + ")"); + argSb.Append("(index = " + M.ToString(CultureInfo.InvariantCulture) + ", unindex = " + N.ToString(CultureInfo.InvariantCulture) + ")"); break; case Ref: case Testref: - ArgSb.Append("(index = " + M.ToString(CultureInfo.InvariantCulture) + ")"); + argSb.Append("(index = " + M.ToString(CultureInfo.InvariantCulture) + ")"); break; case Multi: - ArgSb.Append("(String = " + Str + ")"); + argSb.Append("(String = " + Str + ")"); break; case Set: case Setloop: + case Setloopatomic: case Setlazy: - ArgSb.Append("(Set = " + RegexCharClass.SetDescription(Str!) + ")"); + argSb.Append("(Set = " + RegexCharClass.SetDescription(Str!) + ")"); break; } - switch (NType) + switch (Type) { case Oneloop: + case Oneloopatomic: case Notoneloop: + case Notoneloopatomic: case Onelazy: case Notonelazy: case Setloop: + case Setloopatomic: case Setlazy: case Loop: case Lazyloop: - ArgSb.Append("(Min = " + M.ToString(CultureInfo.InvariantCulture) + ", Max = " + (N == int.MaxValue ? "inf" : Convert.ToString(N, CultureInfo.InvariantCulture)) + ")"); + argSb.Append("(Min = " + M.ToString(CultureInfo.InvariantCulture) + ", Max = " + (N == int.MaxValue ? "inf" : Convert.ToString(N, CultureInfo.InvariantCulture)) + ")"); break; } - return ArgSb.ToString(); + return argSb.ToString(); } + [ExcludeFromCodeCoverage] public void Dump() { - List Stack = new List(); - RegexNode? CurNode; - int CurChild; - - CurNode = this; - CurChild = 0; + List stack = new List(); + RegexNode? curNode = this; + int curChild = 0; - Debug.WriteLine(CurNode.Description()); + Debug.WriteLine(curNode.Description()); while (true) { - if (CurNode!.Children != null && CurChild < CurNode.Children.Count) + if (curChild < curNode!.ChildCount()) { - Stack.Add(CurChild + 1); - CurNode = CurNode.Children[CurChild]; - CurChild = 0; - - int Depth = Stack.Count; - if (Depth > 32) - Depth = 32; + stack.Add(curChild + 1); + curNode = curNode.Child(curChild); + curChild = 0; - Debug.WriteLine(Space.Substring(0, Depth) + CurNode.Description()); + Debug.WriteLine(new string(' ', stack.Count) + curNode.Description()); } else { - if (Stack.Count == 0) + if (stack.Count == 0) + { break; + } - CurChild = Stack[Stack.Count - 1]; - Stack.RemoveAt(Stack.Count - 1); - CurNode = CurNode.Next; + curChild = stack[stack.Count - 1]; + stack.RemoveAt(stack.Count - 1); + curNode = curNode.Next; } } } #endif - } + } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseException.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseException.cs index 0a723cd2bfdb7..6304b27d8e821 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseException.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseException.cs @@ -9,7 +9,7 @@ namespace System.Text.RegularExpressions [Serializable] internal sealed class RegexParseException : ArgumentException { - private readonly RegexParseError _error; + private readonly RegexParseError _error; // tests access this via private reflection /// /// The error that happened during parsing. @@ -27,28 +27,14 @@ public RegexParseException(RegexParseError error, int offset, string message) : Offset = offset; } - public RegexParseException() : base() - { - } - - public RegexParseException(string message) : base(message) - { - } - - public RegexParseException(string message, Exception inner) : base(message, inner) - { - } - - private RegexParseException(SerializationInfo info, StreamingContext context) - : base(info, context) + private RegexParseException(SerializationInfo info, StreamingContext context) : base(info, context) { } public override void GetObjectData(SerializationInfo info, StreamingContext context) { base.GetObjectData(info, context); - // To maintain serialization support with netfx. - info.SetType(typeof(ArgumentException)); + info.SetType(typeof(ArgumentException)); // To maintain serialization support with netfx. } } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index edfe8bcfc442c..393a468e7b095 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -499,7 +499,7 @@ private RegexNode ScanRegex() AddGroup(); - return Unit()!; + return Unit()!.FinalOptimize(); } /* @@ -802,8 +802,8 @@ private RegexNode ScanReplacement() break; case '>': - // greedy subexpression - nodeType = RegexNode.Greedy; + // atomic subexpression + nodeType = RegexNode.Atomic; break; case '\'': @@ -1020,7 +1020,7 @@ private RegexNode ScanReplacement() nodeType = RegexNode.Group; // Disallow options in the children of a testgroup node - if (_group!.NType != RegexNode.Testgroup) + if (_group!.Type != RegexNode.Testgroup) { ScanOptions(); } @@ -2155,7 +2155,7 @@ private void PopGroup() _stack = _group!.Next; // The first () inside a Testgroup group goes directly to the group - if (_group.Type() == RegexNode.Testgroup && _group.ChildCount() == 0) + if (_group.Type == RegexNode.Testgroup && _group.ChildCount() == 0) { if (_unit == null) { @@ -2183,7 +2183,7 @@ private void AddAlternate() { // The | parts inside a Testgroup group go directly to the group - if (_group!.Type() == RegexNode.Testgroup || _group.Type() == RegexNode.Testref) + if (_group!.Type == RegexNode.Testgroup || _group.Type == RegexNode.Testref) { _group.AddChild(_concatenation!.ReverseLeft()); } @@ -2257,11 +2257,11 @@ private void AddUnitType(int type) /// Finish the current group (in response to a ')' or end) private void AddGroup() { - if (_group!.Type() == RegexNode.Testgroup || _group.Type() == RegexNode.Testref) + if (_group!.Type == RegexNode.Testgroup || _group.Type == RegexNode.Testref) { _group.AddChild(_concatenation!.ReverseLeft()); - if (_group.Type() == RegexNode.Testref && _group.ChildCount() > 2 || _group.ChildCount() > 3) + if (_group.Type == RegexNode.Testref && _group.ChildCount() > 2 || _group.ChildCount() > 3) { throw MakeException(RegexParseError.TooManyAlternates, SR.TooManyAlternates); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs index e299bc9642734..88013dd6335b5 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs @@ -30,7 +30,7 @@ internal sealed class RegexReplacement /// public RegexReplacement(string rep, RegexNode concat, Hashtable _caps) { - if (concat.Type() != RegexNode.Concatenate) + if (concat.Type != RegexNode.Concatenate) throw new ArgumentException(SR.ReplacementError); Span vsbStack = stackalloc char[256]; @@ -42,7 +42,7 @@ public RegexReplacement(string rep, RegexNode concat, Hashtable _caps) { RegexNode child = concat.Child(i); - switch (child.Type()) + switch (child.Type) { case RegexNode.Multi: vsb.Append(child.Str!); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs index db3d8b4c82b43..6f2bb75f97b69 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs @@ -15,6 +15,7 @@ // backtracked results from) the Match instance. using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Globalization; namespace System.Text.RegularExpressions @@ -547,6 +548,7 @@ protected int MatchLength(int cap) /// /// Dump the current state /// + [ExcludeFromCodeCoverage] internal virtual void DumpState() { Debug.WriteLine("Text: " + TextposDescription()); @@ -554,6 +556,7 @@ internal virtual void DumpState() Debug.WriteLine("Stack: " + StackDescription(runstack!, runstackpos)); } + [ExcludeFromCodeCoverage] private static string StackDescription(int[] a, int index) { var sb = new StringBuilder(); @@ -579,6 +582,7 @@ private static string StackDescription(int[] a, int index) return sb.ToString(); } + [ExcludeFromCodeCoverage] internal virtual string TextposDescription() { var sb = new StringBuilder(); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs index 09c29b91cc7b6..e0ddabe1afc6f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs @@ -6,6 +6,7 @@ // global information attached. using System.Collections; +using System.Diagnostics.CodeAnalysis; namespace System.Text.RegularExpressions { @@ -31,18 +32,11 @@ internal RegexTree(RegexNode root, Hashtable caps, int[] capNumList, int capTop, } #if DEBUG - public void Dump() - { - Root.Dump(); - } + [ExcludeFromCodeCoverage] + public void Dump() => Root.Dump(); - public bool Debug - { - get - { - return (Options & RegexOptions.Debug) != 0; - } - } + [ExcludeFromCodeCoverage] + public bool Debug => (Options & RegexOptions.Debug) != 0; #endif } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs index 917379bfed9e4..7c71cf5ef9378 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs @@ -28,8 +28,7 @@ internal ref struct RegexWriter private ValueListBuilder _emitted; private ValueListBuilder _intStack; - private readonly Dictionary _stringHash; - private readonly List _stringTable; + private readonly Dictionary _stringTable; private Hashtable? _caps; private int _trackCount; @@ -37,8 +36,7 @@ private RegexWriter(Span emittedSpan, Span intStackSpan) { _emitted = new ValueListBuilder(emittedSpan); _intStack = new ValueListBuilder(intStackSpan); - _stringHash = new Dictionary(); - _stringTable = new List(); + _stringTable = new Dictionary(); _caps = null; _trackCount = 0; } @@ -102,15 +100,16 @@ public RegexCode RegexCodeFromRegexTree(RegexTree tree) while (true) { - if (curNode.Children == null) + int curNodeChildCount = curNode!.ChildCount(); + if (curNodeChildCount == 0) { - EmitFragment(curNode.NType, curNode, 0); + EmitFragment(curNode.Type, curNode, 0); } - else if (curChild < curNode.Children.Count) + else if (curChild < curNodeChildCount) { - EmitFragment(curNode.NType | BeforeChild, curNode, curChild); + EmitFragment(curNode.Type | BeforeChild, curNode, curChild); - curNode = curNode.Children[curChild]; + curNode = curNode.Child(curChild); _intStack.Append(curChild); curChild = 0; continue; @@ -122,7 +121,7 @@ public RegexCode RegexCodeFromRegexTree(RegexTree tree) curChild = _intStack.Pop(); curNode = curNode.Next; - EmitFragment(curNode!.NType | AfterChild, curNode, curChild); + EmitFragment(curNode!.Type | AfterChild, curNode, curChild); curChild++; } @@ -131,20 +130,26 @@ public RegexCode RegexCodeFromRegexTree(RegexTree tree) RegexPrefix? fcPrefix = RegexFCD.FirstChars(tree); RegexPrefix prefix = RegexFCD.Prefix(tree); - bool rtl = ((tree.Options & RegexOptions.RightToLeft) != 0); + bool rtl = (tree.Options & RegexOptions.RightToLeft) != 0; CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; - RegexBoyerMoore? bmPrefix; - if (prefix.Prefix.Length > 0) + RegexBoyerMoore? bmPrefix = null; + if (prefix.Prefix.Length > 1) // if it's == 1, we're better off using fcPrefix + { bmPrefix = new RegexBoyerMoore(prefix.Prefix, prefix.CaseInsensitive, rtl, culture); - else - bmPrefix = null; + } int anchors = RegexFCD.Anchors(tree); int[] emitted = _emitted.AsSpan().ToArray(); - return new RegexCode(emitted, _stringTable, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl); + var strings = new string[_stringTable.Count]; + foreach (KeyValuePair stringEntry in _stringTable) + { + strings[stringEntry.Value] = stringEntry.Key; + } + + return new RegexCode(tree, emitted, strings, _trackCount, _caps, capsize, bmPrefix, fcPrefix, anchors, rtl); } /// @@ -198,17 +203,12 @@ private void Emit(int op, int opd1, int opd2) /// Returns an index in the string table for a string; /// uses a hashtable to eliminate duplicates. /// - private int StringCode(string? str) + private int StringCode(string str) { - if (str == null) - str = string.Empty; - - int i; - if (!_stringHash.TryGetValue(str, out i)) + if (!_stringTable.TryGetValue(str, out int i)) { i = _stringTable.Count; - _stringHash[str] = i; - _stringTable.Add(str); + _stringTable.Add(str, i); } return i; @@ -239,13 +239,13 @@ private int MapCapnum(int capnum) private void EmitFragment(int nodetype, RegexNode node, int curIndex) { int bits = 0; - - if (nodetype <= RegexNode.Ref) + if (node.UseOptionR()) { - if (node.UseOptionR()) - bits |= RegexCode.Rtl; - if ((node.Options & RegexOptions.IgnoreCase) != 0) - bits |= RegexCode.Ci; + bits |= RegexCode.Rtl; + } + if ((node.Options & RegexOptions.IgnoreCase) != 0) + { + bits |= RegexCode.Ci; } switch (nodetype) @@ -256,7 +256,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) break; case RegexNode.Alternate | BeforeChild: - if (curIndex < node.Children!.Count - 1) + if (curIndex < node.ChildCount() - 1) { _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); @@ -265,7 +265,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) case RegexNode.Alternate | AfterChild: { - if (curIndex < node.Children!.Count - 1) + if (curIndex < node.ChildCount() - 1) { int LBPos = _intStack.Pop(); _intStack.Append(_emitted.Length); @@ -306,7 +306,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) Emit(RegexCode.Goto, 0); PatchJump(Branchpos, _emitted.Length); Emit(RegexCode.Forejump); - if (node.Children!.Count > 1) + if (node.ChildCount() > 1) break; // else fallthrough goto case 1; @@ -344,7 +344,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); - if (node.Children!.Count > 2) + if (node.ChildCount() > 2) break; // else fallthrough goto case 2; @@ -428,50 +428,62 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) Emit(RegexCode.Forejump); break; - case RegexNode.Greedy | BeforeChild: + case RegexNode.Atomic | BeforeChild: Emit(RegexCode.Setjump); break; - case RegexNode.Greedy | AfterChild: + case RegexNode.Atomic | AfterChild: Emit(RegexCode.Forejump); break; case RegexNode.One: case RegexNode.Notone: - Emit(node.NType | bits, node.Ch); + Emit(node.Type | bits, node.Ch); break; case RegexNode.Notoneloop: + case RegexNode.Notoneloopatomic: case RegexNode.Notonelazy: case RegexNode.Oneloop: + case RegexNode.Oneloopatomic: case RegexNode.Onelazy: if (node.M > 0) - Emit(((node.NType == RegexNode.Oneloop || node.NType == RegexNode.Onelazy) ? + { + Emit(((node.Type == RegexNode.Oneloop || node.Type == RegexNode.Oneloopatomic || node.Type == RegexNode.Onelazy) ? RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M); + } if (node.N > node.M) - Emit(node.NType | bits, node.Ch, node.N == int.MaxValue ? - int.MaxValue : node.N - node.M); + { + Emit(node.Type | bits, node.Ch, node.N == int.MaxValue ? int.MaxValue : node.N - node.M); + } break; case RegexNode.Setloop: + case RegexNode.Setloopatomic: case RegexNode.Setlazy: - if (node.M > 0) - Emit(RegexCode.Setrep | bits, StringCode(node.Str), node.M); - if (node.N > node.M) - Emit(node.NType | bits, StringCode(node.Str), - (node.N == int.MaxValue) ? int.MaxValue : node.N - node.M); + { + int stringCode = StringCode(node.Str!); + if (node.M > 0) + { + Emit(RegexCode.Setrep | bits, stringCode, node.M); + } + if (node.N > node.M) + { + Emit(node.Type | bits, stringCode, (node.N == int.MaxValue) ? int.MaxValue : node.N - node.M); + } + } break; case RegexNode.Multi: - Emit(node.NType | bits, StringCode(node.Str)); + Emit(node.Type | bits, StringCode(node.Str!)); break; case RegexNode.Set: - Emit(node.NType | bits, StringCode(node.Str)); + Emit(node.Type | bits, StringCode(node.Str!)); break; case RegexNode.Ref: - Emit(node.NType | bits, MapCapnum(node.M)); + Emit(node.Type | bits, MapCapnum(node.M)); break; case RegexNode.Nothing: @@ -485,7 +497,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) case RegexNode.Start: case RegexNode.EndZ: case RegexNode.End: - Emit(node.NType); + Emit(node.Type); break; default: diff --git a/src/libraries/System.Text.RegularExpressions/tests/Configurations.props b/src/libraries/System.Text.RegularExpressions/tests/Configurations.props index b6e0d3ce6383b..3e0106dbfb5ca 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Configurations.props +++ b/src/libraries/System.Text.RegularExpressions/tests/Configurations.props @@ -2,6 +2,7 @@ $(NetCoreAppCurrent); + $(NetFrameworkCurrent); \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs b/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs new file mode 100644 index 0000000000000..76b508ca87965 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs @@ -0,0 +1,1141 @@ +// Licensed to the .NET Foundation under one or more agreements. +// See the LICENSE file in the project root for more information. + +// assembly: System_test +// namespace: MonoTests.System.Text.RegularExpressions +// file: PerlTrials.cs +// +// author: Dan Lewis (dlewis@gmx.co.uk) +// (c) 2002 + +using System.Collections.Generic; +using System.Diagnostics; +using Xunit; + +namespace System.Text.RegularExpressions.Tests +{ + public class MonoTests + { + // Ported from https://github.com/mono/mono/blob/0f2995e95e98e082c7c7039e17175cf2c6a00034/mcs/class/System/Test/System.Text.RegularExpressions/PerlTrials.cs + // Which in turn ported from perl-5.6.1/t/op/re_tests + + [Theory] + [MemberData(nameof(RegexTestCasesWithOptions))] + public void ValidateRegex(string pattern, RegexOptions options, string input, string expected) + { + string result; + try + { + var re = new Regex(pattern, options); + int[] groupNums = re.GetGroupNumbers(); + Match m = re.Match(input); + + if (m.Success) + { + result = "Pass."; + for (int i = 0; i < m.Groups.Count; ++i) + { + int gid = groupNums[i]; + Group group = m.Groups[gid]; + + result += $" Group[{gid}]="; + foreach (Capture cap in group.Captures) + { + result += $"({cap.Index},{cap.Length})"; + } + } + } + else + { + result = "Fail."; + } + } + catch + { + result = "Error."; + } + + Assert.Equal(expected, result); + } + + public static IEnumerable RegexTestCasesWithOptions() + { + foreach (object[] obj in RegexTestCases()) + { + yield return new object[] { obj[0], (RegexOptions)obj[1], obj[2], obj[3] }; + yield return new object[] { obj[0], RegexOptions.CultureInvariant | (RegexOptions)obj[1], obj[2], obj[3] }; + yield return new object[] { obj[0], RegexOptions.Compiled | (RegexOptions)obj[1], obj[2], obj[3] }; + yield return new object[] { obj[0], RegexOptions.Compiled | RegexOptions.CultureInvariant | (RegexOptions)obj[1], obj[2], obj[3] }; + } + } + + public static IEnumerable RegexTestCases() + { + yield return new object[] { @"abc", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"abc", RegexOptions.None, "xbc", "Fail." }; + yield return new object[] { @"abc", RegexOptions.None, "axc", "Fail." }; + yield return new object[] { @"abc", RegexOptions.None, "abx", "Fail." }; + yield return new object[] { @"abc", RegexOptions.None, "xabcy", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"abc", RegexOptions.None, "ababc", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"ab*c", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab*bc", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab*bc", RegexOptions.None, "abbc", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"ab*bc", RegexOptions.None, "abbbbc", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @".{1}", RegexOptions.None, "abbbbc", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @".{3,4}", RegexOptions.None, "abbbbc", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"ab{0,}bc", RegexOptions.None, "abbbbc", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab+bc", RegexOptions.None, "abbc", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"ab+bc", RegexOptions.None, "abc", "Fail." }; + yield return new object[] { @"ab+bc", RegexOptions.None, "abq", "Fail." }; + yield return new object[] { @"ab{1,}bc", RegexOptions.None, "abq", "Fail." }; + yield return new object[] { @"ab+bc", RegexOptions.None, "abbbbc", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab{1,}bc", RegexOptions.None, "abbbbc", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab{1,3}bc", RegexOptions.None, "abbbbc", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab{3,4}bc", RegexOptions.None, "abbbbc", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab{4,5}bc", RegexOptions.None, "abbbbc", "Fail." }; + yield return new object[] { @"ab?bc", RegexOptions.None, "abbc", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"ab?bc", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab{0,1}bc", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab?bc", RegexOptions.None, "abbbbc", "Fail." }; + yield return new object[] { @"ab?c", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab{0,1}c", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"^abc$", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"^abc$", RegexOptions.None, "abcc", "Fail." }; + yield return new object[] { @"^abc", RegexOptions.None, "abcc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"^abc$", RegexOptions.None, "aabc", "Fail." }; + yield return new object[] { @"abc$", RegexOptions.None, "aabc", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"abc$", RegexOptions.None, "aabcd", "Fail." }; + yield return new object[] { @"^", RegexOptions.None, "abc", "Pass. Group[0]=(0,0)" }; + yield return new object[] { @"$", RegexOptions.None, "abc", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"a.c", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a.c", RegexOptions.None, "axc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a.*c", RegexOptions.None, "axyzc", "Pass. Group[0]=(0,5)" }; + yield return new object[] { @"a.*c", RegexOptions.None, "axyzd", "Fail." }; + yield return new object[] { @"a[bc]d", RegexOptions.None, "abc", "Fail." }; + yield return new object[] { @"a[bc]d", RegexOptions.None, "abd", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[b-d]e", RegexOptions.None, "abd", "Fail." }; + yield return new object[] { @"a[b-d]e", RegexOptions.None, "ace", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[b-d]", RegexOptions.None, "aac", "Pass. Group[0]=(1,2)" }; + yield return new object[] { @"a[-b]", RegexOptions.None, "a-", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"a[b-]", RegexOptions.None, "a-", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"a[b-a]", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"a[]b", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"a[", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"a]", RegexOptions.None, "a]", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"a[]]b", RegexOptions.None, "a]b", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[^bc]d", RegexOptions.None, "aed", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[^bc]d", RegexOptions.None, "abd", "Fail." }; + yield return new object[] { @"a[^-b]c", RegexOptions.None, "adc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[^-b]c", RegexOptions.None, "a-c", "Fail." }; + yield return new object[] { @"a[^]b]c", RegexOptions.None, "a]c", "Fail." }; + yield return new object[] { @"a[^]b]c", RegexOptions.None, "adc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"\ba\b", RegexOptions.None, "a-", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"\ba\b", RegexOptions.None, "-a", "Pass. Group[0]=(1,1)" }; + yield return new object[] { @"\ba\b", RegexOptions.None, "-a-", "Pass. Group[0]=(1,1)" }; + yield return new object[] { @"\by\b", RegexOptions.None, "xy", "Fail." }; + yield return new object[] { @"\by\b", RegexOptions.None, "yz", "Fail." }; + yield return new object[] { @"\by\b", RegexOptions.None, "xyz", "Fail." }; + yield return new object[] { @"\Ba\B", RegexOptions.None, "a-", "Fail." }; + yield return new object[] { @"\Ba\B", RegexOptions.None, "-a", "Fail." }; + yield return new object[] { @"\Ba\B", RegexOptions.None, "-a-", "Fail." }; + yield return new object[] { @"\By\b", RegexOptions.None, "xy", "Pass. Group[0]=(1,1)" }; + yield return new object[] { @"\by\B", RegexOptions.None, "yz", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"\By\B", RegexOptions.None, "xyz", "Pass. Group[0]=(1,1)" }; + yield return new object[] { @"\w", RegexOptions.None, "a", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"\w", RegexOptions.None, "-", "Fail." }; + yield return new object[] { @"\W", RegexOptions.None, "a", "Fail." }; + yield return new object[] { @"\W", RegexOptions.None, "-", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"a\sb", RegexOptions.None, "a b", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a\sb", RegexOptions.None, "a-b", "Fail." }; + yield return new object[] { @"a\Sb", RegexOptions.None, "a b", "Fail." }; + yield return new object[] { @"a\Sb", RegexOptions.None, "a-b", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"\d", RegexOptions.None, "1", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"\d", RegexOptions.None, "-", "Fail." }; + yield return new object[] { @"\D", RegexOptions.None, "1", "Fail." }; + yield return new object[] { @"\D", RegexOptions.None, "-", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"[\w]", RegexOptions.None, "a", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"[\w]", RegexOptions.None, "-", "Fail." }; + yield return new object[] { @"[\W]", RegexOptions.None, "a", "Fail." }; + yield return new object[] { @"[\W]", RegexOptions.None, "-", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"a[\s]b", RegexOptions.None, "a b", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[\s]b", RegexOptions.None, "a-b", "Fail." }; + yield return new object[] { @"a[\S]b", RegexOptions.None, "a b", "Fail." }; + yield return new object[] { @"a[\S]b", RegexOptions.None, "a-b", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"[\d]", RegexOptions.None, "1", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"[\d]", RegexOptions.None, "-", "Fail." }; + yield return new object[] { @"[\D]", RegexOptions.None, "1", "Fail." }; + yield return new object[] { @"[\D]", RegexOptions.None, "-", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"ab|cd", RegexOptions.None, "abc", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"ab|cd", RegexOptions.None, "abcd", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"()ef", RegexOptions.None, "def", "Pass. Group[0]=(1,2) Group[1]=(1,0)" }; + yield return new object[] { @"*a", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"(*)b", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"$b", RegexOptions.None, "b", "Fail." }; + yield return new object[] { @"a\", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"a\(b", RegexOptions.None, "a(b", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a\(*b", RegexOptions.None, "ab", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"a\(*b", RegexOptions.None, "a((b", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"a\\b", RegexOptions.None, "a\\b", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"abc)", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"(abc", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"((a))", RegexOptions.None, "abc", "Pass. Group[0]=(0,1) Group[1]=(0,1) Group[2]=(0,1)" }; + yield return new object[] { @"(a)b(c)", RegexOptions.None, "abc", "Pass. Group[0]=(0,3) Group[1]=(0,1) Group[2]=(2,1)" }; + yield return new object[] { @"a+b+c", RegexOptions.None, "aabbabc", "Pass. Group[0]=(4,3)" }; + yield return new object[] { @"a{1,}b{1,}c", RegexOptions.None, "aabbabc", "Pass. Group[0]=(4,3)" }; + yield return new object[] { @"a**", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"a.+?c", RegexOptions.None, "abcabc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"(a+|b)*", RegexOptions.None, "ab", "Pass. Group[0]=(0,2) Group[1]=(0,1)(1,1)" }; + yield return new object[] { @"(a+|b){0,}", RegexOptions.None, "ab", "Pass. Group[0]=(0,2) Group[1]=(0,1)(1,1)" }; + yield return new object[] { @"(a+|b)+", RegexOptions.None, "ab", "Pass. Group[0]=(0,2) Group[1]=(0,1)(1,1)" }; + yield return new object[] { @"(a+|b){1,}", RegexOptions.None, "ab", "Pass. Group[0]=(0,2) Group[1]=(0,1)(1,1)" }; + yield return new object[] { @"(a+|b)?", RegexOptions.None, "ab", "Pass. Group[0]=(0,1) Group[1]=(0,1)" }; + yield return new object[] { @"(a+|b){0,1}", RegexOptions.None, "ab", "Pass. Group[0]=(0,1) Group[1]=(0,1)" }; + yield return new object[] { @")(", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"[^ab]*", RegexOptions.None, "cde", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"abc", RegexOptions.None, "", "Fail." }; + yield return new object[] { @"a*", RegexOptions.None, "", "Pass. Group[0]=(0,0)" }; + yield return new object[] { @"([abc])*d", RegexOptions.None, "abbbcd", "Pass. Group[0]=(0,6) Group[1]=(0,1)(1,1)(2,1)(3,1)(4,1)" }; + yield return new object[] { @"([abc])*bcd", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(0,1)" }; + yield return new object[] { @"a|b|c|d|e", RegexOptions.None, "e", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"(a|b|c|d|e)f", RegexOptions.None, "ef", "Pass. Group[0]=(0,2) Group[1]=(0,1)" }; + yield return new object[] { @"abcd*efg", RegexOptions.None, "abcdefg", "Pass. Group[0]=(0,7)" }; + yield return new object[] { @"ab*", RegexOptions.None, "xabyabbbz", "Pass. Group[0]=(1,2)" }; + yield return new object[] { @"ab*", RegexOptions.None, "xayabbbz", "Pass. Group[0]=(1,1)" }; + yield return new object[] { @"(ab|cd)e", RegexOptions.None, "abcde", "Pass. Group[0]=(2,3) Group[1]=(2,2)" }; + yield return new object[] { @"[abhgefdc]ij", RegexOptions.None, "hij", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"^(ab|cd)e", RegexOptions.None, "abcde", "Fail." }; + yield return new object[] { @"(abc|)ef", RegexOptions.None, "abcdef", "Pass. Group[0]=(4,2) Group[1]=(4,0)" }; + yield return new object[] { @"(a|b)c*d", RegexOptions.None, "abcd", "Pass. Group[0]=(1,3) Group[1]=(1,1)" }; + yield return new object[] { @"(ab|ab*)bc", RegexOptions.None, "abc", "Pass. Group[0]=(0,3) Group[1]=(0,1)" }; + yield return new object[] { @"a([bc]*)c*", RegexOptions.None, "abc", "Pass. Group[0]=(0,3) Group[1]=(1,2)" }; + yield return new object[] { @"a([bc]*)(c*d)", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(1,2) Group[2]=(3,1)" }; + yield return new object[] { @"a([bc]+)(c*d)", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(1,2) Group[2]=(3,1)" }; + yield return new object[] { @"a([bc]*)(c+d)", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(1,1) Group[2]=(2,2)" }; + yield return new object[] { @"a[bcd]*dcdcde", RegexOptions.None, "adcdcde", "Pass. Group[0]=(0,7)" }; + yield return new object[] { @"a[bcd]+dcdcde", RegexOptions.None, "adcdcde", "Fail." }; + yield return new object[] { @"(ab|a)b*c", RegexOptions.None, "abc", "Pass. Group[0]=(0,3) Group[1]=(0,2)" }; + yield return new object[] { @"((a)(b)c)(d)", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(0,3) Group[2]=(0,1) Group[3]=(1,1) Group[4]=(3,1)" }; + yield return new object[] { @"[a-zA-Z_][a-zA-Z0-9_]*", RegexOptions.None, "alpha", "Pass. Group[0]=(0,5)" }; + yield return new object[] { @"^a(bc+|b[eh])g|.h$", RegexOptions.None, "abh", "Pass. Group[0]=(1,2) Group[1]=" }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.None, "effgz", "Pass. Group[0]=(0,5) Group[1]=(0,5) Group[2]=" }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.None, "ij", "Pass. Group[0]=(0,2) Group[1]=(0,2) Group[2]=(1,1)" }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.None, "effg", "Fail." }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.None, "bcdd", "Fail." }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.None, "reffgz", "Pass. Group[0]=(1,5) Group[1]=(1,5) Group[2]=" }; + yield return new object[] { @"((((((((((a))))))))))", RegexOptions.None, "a", "Pass. Group[0]=(0,1) Group[1]=(0,1) Group[2]=(0,1) Group[3]=(0,1) Group[4]=(0,1) Group[5]=(0,1) Group[6]=(0,1) Group[7]=(0,1) Group[8]=(0,1) Group[9]=(0,1) Group[10]=(0,1)" }; + yield return new object[] { @"((((((((((a))))))))))\10", RegexOptions.None, "aa", "Pass. Group[0]=(0,2) Group[1]=(0,1) Group[2]=(0,1) Group[3]=(0,1) Group[4]=(0,1) Group[5]=(0,1) Group[6]=(0,1) Group[7]=(0,1) Group[8]=(0,1) Group[9]=(0,1) Group[10]=(0,1)" }; + yield return new object[] { @"((((((((((a))))))))))!", RegexOptions.None, "aa", "Fail." }; + yield return new object[] { @"((((((((((a))))))))))!", RegexOptions.None, "a!", "Pass. Group[0]=(0,2) Group[1]=(0,1) Group[2]=(0,1) Group[3]=(0,1) Group[4]=(0,1) Group[5]=(0,1) Group[6]=(0,1) Group[7]=(0,1) Group[8]=(0,1) Group[9]=(0,1) Group[10]=(0,1)" }; + yield return new object[] { @"(((((((((a)))))))))", RegexOptions.None, "a", "Pass. Group[0]=(0,1) Group[1]=(0,1) Group[2]=(0,1) Group[3]=(0,1) Group[4]=(0,1) Group[5]=(0,1) Group[6]=(0,1) Group[7]=(0,1) Group[8]=(0,1) Group[9]=(0,1)" }; + yield return new object[] { @"multiple words of text", RegexOptions.None, "uh-uh", "Fail." }; + yield return new object[] { @"multiple words", RegexOptions.None, "multiple words, yeah", "Pass. Group[0]=(0,14)" }; + yield return new object[] { @"(.*)c(.*)", RegexOptions.None, "abcde", "Pass. Group[0]=(0,5) Group[1]=(0,2) Group[2]=(3,2)" }; + yield return new object[] { @"\((.*), (.*)\)", RegexOptions.None, "(a, b)", "Pass. Group[0]=(0,6) Group[1]=(1,1) Group[2]=(4,1)" }; + yield return new object[] { @"[k]", RegexOptions.None, "ab", "Fail." }; + yield return new object[] { @"abcd", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"a(bc)d", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(1,2)" }; + yield return new object[] { @"a[-]?c", RegexOptions.None, "ac", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"(abc)\1", RegexOptions.None, "abcabc", "Pass. Group[0]=(0,6) Group[1]=(0,3)" }; + yield return new object[] { @"([a-c]*)\1", RegexOptions.None, "abcabc", "Pass. Group[0]=(0,6) Group[1]=(0,3)" }; + yield return new object[] { @"\1", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"\2", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"(a)|\1", RegexOptions.None, "a", "Pass. Group[0]=(0,1) Group[1]=(0,1)" }; + yield return new object[] { @"(a)|\1", RegexOptions.None, "x", "Fail." }; + yield return new object[] { @"(a)|\2", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"(([a-c])b*?\2)*", RegexOptions.None, "ababbbcbc", "Pass. Group[0]=(0,5) Group[1]=(0,3)(3,2) Group[2]=(0,1)(3,1)" }; + yield return new object[] { @"(([a-c])b*?\2){3}", RegexOptions.None, "ababbbcbc", "Pass. Group[0]=(0,9) Group[1]=(0,3)(3,3)(6,3) Group[2]=(0,1)(3,1)(6,1)" }; + yield return new object[] { @"((\3|b)\2(a)x)+", RegexOptions.None, "aaxabxbaxbbx", "Fail." }; + yield return new object[] { @"((\3|b)\2(a)x)+", RegexOptions.None, "aaaxabaxbaaxbbax", "Pass. Group[0]=(12,4) Group[1]=(12,4) Group[2]=(12,1) Group[3]=(14,1)" }; + yield return new object[] { @"((\3|b)\2(a)){2,}", RegexOptions.None, "bbaababbabaaaaabbaaaabba", "Pass. Group[0]=(15,9) Group[1]=(15,3)(18,3)(21,3) Group[2]=(15,1)(18,1)(21,1) Group[3]=(17,1)(20,1)(23,1)" }; + yield return new object[] { @"abc", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"abc", RegexOptions.IgnoreCase, "XBC", "Fail." }; + yield return new object[] { @"abc", RegexOptions.IgnoreCase, "AXC", "Fail." }; + yield return new object[] { @"abc", RegexOptions.IgnoreCase, "ABX", "Fail." }; + yield return new object[] { @"abc", RegexOptions.IgnoreCase, "XABCY", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"abc", RegexOptions.IgnoreCase, "ABABC", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"ab*c", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab*bc", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab*bc", RegexOptions.IgnoreCase, "ABBC", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"ab*?bc", RegexOptions.IgnoreCase, "ABBBBC", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab{0,}?bc", RegexOptions.IgnoreCase, "ABBBBC", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab+?bc", RegexOptions.IgnoreCase, "ABBC", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"ab+bc", RegexOptions.IgnoreCase, "ABC", "Fail." }; + yield return new object[] { @"ab+bc", RegexOptions.IgnoreCase, "ABQ", "Fail." }; + yield return new object[] { @"ab{1,}bc", RegexOptions.IgnoreCase, "ABQ", "Fail." }; + yield return new object[] { @"ab+bc", RegexOptions.IgnoreCase, "ABBBBC", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab{1,}?bc", RegexOptions.IgnoreCase, "ABBBBC", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab{1,3}?bc", RegexOptions.IgnoreCase, "ABBBBC", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab{3,4}?bc", RegexOptions.IgnoreCase, "ABBBBC", "Pass. Group[0]=(0,6)" }; + yield return new object[] { @"ab{4,5}?bc", RegexOptions.IgnoreCase, "ABBBBC", "Fail." }; + yield return new object[] { @"ab??bc", RegexOptions.IgnoreCase, "ABBC", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"ab??bc", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab{0,1}?bc", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab??bc", RegexOptions.IgnoreCase, "ABBBBC", "Fail." }; + yield return new object[] { @"ab??c", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab{0,1}?c", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"^abc$", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"^abc$", RegexOptions.IgnoreCase, "ABCC", "Fail." }; + yield return new object[] { @"^abc", RegexOptions.IgnoreCase, "ABCC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"^abc$", RegexOptions.IgnoreCase, "AABC", "Fail." }; + yield return new object[] { @"abc$", RegexOptions.IgnoreCase, "AABC", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"^", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,0)" }; + yield return new object[] { @"$", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"a.c", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a.c", RegexOptions.IgnoreCase, "AXC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a.*?c", RegexOptions.IgnoreCase, "AXYZC", "Pass. Group[0]=(0,5)" }; + yield return new object[] { @"a.*c", RegexOptions.IgnoreCase, "AXYZD", "Fail." }; + yield return new object[] { @"a[bc]d", RegexOptions.IgnoreCase, "ABC", "Fail." }; + yield return new object[] { @"a[bc]d", RegexOptions.IgnoreCase, "ABD", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[b-d]e", RegexOptions.IgnoreCase, "ABD", "Fail." }; + yield return new object[] { @"a[b-d]e", RegexOptions.IgnoreCase, "ACE", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[b-d]", RegexOptions.IgnoreCase, "AAC", "Pass. Group[0]=(1,2)" }; + yield return new object[] { @"a[-b]", RegexOptions.IgnoreCase, "A-", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"a[b-]", RegexOptions.IgnoreCase, "A-", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"a[b-a]", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"a[]b", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"a[", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"a]", RegexOptions.IgnoreCase, "A]", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"a[]]b", RegexOptions.IgnoreCase, "A]B", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[^bc]d", RegexOptions.IgnoreCase, "AED", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[^bc]d", RegexOptions.IgnoreCase, "ABD", "Fail." }; + yield return new object[] { @"a[^-b]c", RegexOptions.IgnoreCase, "ADC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a[^-b]c", RegexOptions.IgnoreCase, "A-C", "Fail." }; + yield return new object[] { @"a[^]b]c", RegexOptions.IgnoreCase, "A]C", "Fail." }; + yield return new object[] { @"a[^]b]c", RegexOptions.IgnoreCase, "ADC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"ab|cd", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"ab|cd", RegexOptions.IgnoreCase, "ABCD", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"()ef", RegexOptions.IgnoreCase, "DEF", "Pass. Group[0]=(1,2) Group[1]=(1,0)" }; + yield return new object[] { @"*a", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"(*)b", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"$b", RegexOptions.IgnoreCase, "B", "Fail." }; + yield return new object[] { @"a\", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"a\(b", RegexOptions.IgnoreCase, "A(B", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a\(*b", RegexOptions.IgnoreCase, "AB", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"a\(*b", RegexOptions.IgnoreCase, "A((B", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"a\\b", RegexOptions.IgnoreCase, "A\\B", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"abc)", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"(abc", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"((a))", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,1) Group[1]=(0,1) Group[2]=(0,1)" }; + yield return new object[] { @"(a)b(c)", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3) Group[1]=(0,1) Group[2]=(2,1)" }; + yield return new object[] { @"a+b+c", RegexOptions.IgnoreCase, "AABBABC", "Pass. Group[0]=(4,3)" }; + yield return new object[] { @"a{1,}b{1,}c", RegexOptions.IgnoreCase, "AABBABC", "Pass. Group[0]=(4,3)" }; + yield return new object[] { @"a**", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"a.+?c", RegexOptions.IgnoreCase, "ABCABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a.*?c", RegexOptions.IgnoreCase, "ABCABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"a.{0,5}?c", RegexOptions.IgnoreCase, "ABCABC", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"(a+|b)*", RegexOptions.IgnoreCase, "AB", "Pass. Group[0]=(0,2) Group[1]=(0,1)(1,1)" }; + yield return new object[] { @"(a+|b){0,}", RegexOptions.IgnoreCase, "AB", "Pass. Group[0]=(0,2) Group[1]=(0,1)(1,1)" }; + yield return new object[] { @"(a+|b)+", RegexOptions.IgnoreCase, "AB", "Pass. Group[0]=(0,2) Group[1]=(0,1)(1,1)" }; + yield return new object[] { @"(a+|b){1,}", RegexOptions.IgnoreCase, "AB", "Pass. Group[0]=(0,2) Group[1]=(0,1)(1,1)" }; + yield return new object[] { @"(a+|b)?", RegexOptions.IgnoreCase, "AB", "Pass. Group[0]=(0,1) Group[1]=(0,1)" }; + yield return new object[] { @"(a+|b){0,1}", RegexOptions.IgnoreCase, "AB", "Pass. Group[0]=(0,1) Group[1]=(0,1)" }; + yield return new object[] { @"(a+|b){0,1}?", RegexOptions.IgnoreCase, "AB", "Pass. Group[0]=(0,0) Group[1]=" }; + yield return new object[] { @")(", RegexOptions.IgnoreCase, "-", "Error." }; + yield return new object[] { @"[^ab]*", RegexOptions.IgnoreCase, "CDE", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"abc", RegexOptions.IgnoreCase, "", "Fail." }; + yield return new object[] { @"a*", RegexOptions.IgnoreCase, "", "Pass. Group[0]=(0,0)" }; + yield return new object[] { @"([abc])*d", RegexOptions.IgnoreCase, "ABBBCD", "Pass. Group[0]=(0,6) Group[1]=(0,1)(1,1)(2,1)(3,1)(4,1)" }; + yield return new object[] { @"([abc])*bcd", RegexOptions.IgnoreCase, "ABCD", "Pass. Group[0]=(0,4) Group[1]=(0,1)" }; + yield return new object[] { @"a|b|c|d|e", RegexOptions.IgnoreCase, "E", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"(a|b|c|d|e)f", RegexOptions.IgnoreCase, "EF", "Pass. Group[0]=(0,2) Group[1]=(0,1)" }; + yield return new object[] { @"abcd*efg", RegexOptions.IgnoreCase, "ABCDEFG", "Pass. Group[0]=(0,7)" }; + yield return new object[] { @"ab*", RegexOptions.IgnoreCase, "XABYABBBZ", "Pass. Group[0]=(1,2)" }; + yield return new object[] { @"ab*", RegexOptions.IgnoreCase, "XAYABBBZ", "Pass. Group[0]=(1,1)" }; + yield return new object[] { @"(ab|cd)e", RegexOptions.IgnoreCase, "ABCDE", "Pass. Group[0]=(2,3) Group[1]=(2,2)" }; + yield return new object[] { @"[abhgefdc]ij", RegexOptions.IgnoreCase, "HIJ", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"^(ab|cd)e", RegexOptions.IgnoreCase, "ABCDE", "Fail." }; + yield return new object[] { @"(abc|)ef", RegexOptions.IgnoreCase, "ABCDEF", "Pass. Group[0]=(4,2) Group[1]=(4,0)" }; + yield return new object[] { @"(a|b)c*d", RegexOptions.IgnoreCase, "ABCD", "Pass. Group[0]=(1,3) Group[1]=(1,1)" }; + yield return new object[] { @"(ab|ab*)bc", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3) Group[1]=(0,1)" }; + yield return new object[] { @"a([bc]*)c*", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3) Group[1]=(1,2)" }; + yield return new object[] { @"a([bc]*)(c*d)", RegexOptions.IgnoreCase, "ABCD", "Pass. Group[0]=(0,4) Group[1]=(1,2) Group[2]=(3,1)" }; + yield return new object[] { @"a([bc]+)(c*d)", RegexOptions.IgnoreCase, "ABCD", "Pass. Group[0]=(0,4) Group[1]=(1,2) Group[2]=(3,1)" }; + yield return new object[] { @"a([bc]*)(c+d)", RegexOptions.IgnoreCase, "ABCD", "Pass. Group[0]=(0,4) Group[1]=(1,1) Group[2]=(2,2)" }; + yield return new object[] { @"a[bcd]*dcdcde", RegexOptions.IgnoreCase, "ADCDCDE", "Pass. Group[0]=(0,7)" }; + yield return new object[] { @"a[bcd]+dcdcde", RegexOptions.IgnoreCase, "ADCDCDE", "Fail." }; + yield return new object[] { @"(ab|a)b*c", RegexOptions.IgnoreCase, "ABC", "Pass. Group[0]=(0,3) Group[1]=(0,2)" }; + yield return new object[] { @"((a)(b)c)(d)", RegexOptions.IgnoreCase, "ABCD", "Pass. Group[0]=(0,4) Group[1]=(0,3) Group[2]=(0,1) Group[3]=(1,1) Group[4]=(3,1)" }; + yield return new object[] { @"[a-zA-Z_][a-zA-Z0-9_]*", RegexOptions.IgnoreCase, "ALPHA", "Pass. Group[0]=(0,5)" }; + yield return new object[] { @"^a(bc+|b[eh])g|.h$", RegexOptions.IgnoreCase, "ABH", "Pass. Group[0]=(1,2) Group[1]=" }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.IgnoreCase, "EFFGZ", "Pass. Group[0]=(0,5) Group[1]=(0,5) Group[2]=" }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.IgnoreCase, "IJ", "Pass. Group[0]=(0,2) Group[1]=(0,2) Group[2]=(1,1)" }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.IgnoreCase, "EFFG", "Fail." }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.IgnoreCase, "BCDD", "Fail." }; + yield return new object[] { @"(bc+d$|ef*g.|h?i(j|k))", RegexOptions.IgnoreCase, "REFFGZ", "Pass. Group[0]=(1,5) Group[1]=(1,5) Group[2]=" }; + yield return new object[] { @"((((((((((a))))))))))", RegexOptions.IgnoreCase, "A", "Pass. Group[0]=(0,1) Group[1]=(0,1) Group[2]=(0,1) Group[3]=(0,1) Group[4]=(0,1) Group[5]=(0,1) Group[6]=(0,1) Group[7]=(0,1) Group[8]=(0,1) Group[9]=(0,1) Group[10]=(0,1)" }; + yield return new object[] { @"((((((((((a))))))))))\10", RegexOptions.IgnoreCase, "AA", "Pass. Group[0]=(0,2) Group[1]=(0,1) Group[2]=(0,1) Group[3]=(0,1) Group[4]=(0,1) Group[5]=(0,1) Group[6]=(0,1) Group[7]=(0,1) Group[8]=(0,1) Group[9]=(0,1) Group[10]=(0,1)" }; + yield return new object[] { @"((((((((((a))))))))))!", RegexOptions.IgnoreCase, "AA", "Fail." }; + yield return new object[] { @"((((((((((a))))))))))!", RegexOptions.IgnoreCase, "A!", "Pass. Group[0]=(0,2) Group[1]=(0,1) Group[2]=(0,1) Group[3]=(0,1) Group[4]=(0,1) Group[5]=(0,1) Group[6]=(0,1) Group[7]=(0,1) Group[8]=(0,1) Group[9]=(0,1) Group[10]=(0,1)" }; + yield return new object[] { @"(((((((((a)))))))))", RegexOptions.IgnoreCase, "A", "Pass. Group[0]=(0,1) Group[1]=(0,1) Group[2]=(0,1) Group[3]=(0,1) Group[4]=(0,1) Group[5]=(0,1) Group[6]=(0,1) Group[7]=(0,1) Group[8]=(0,1) Group[9]=(0,1)" }; + yield return new object[] { @"(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))", RegexOptions.IgnoreCase, "A", "Pass. Group[0]=(0,1) Group[1]=(0,1)" }; + yield return new object[] { @"(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))", RegexOptions.IgnoreCase, "C", "Pass. Group[0]=(0,1) Group[1]=(0,1)" }; + yield return new object[] { @"multiple words of text", RegexOptions.IgnoreCase, "UH-UH", "Fail." }; + yield return new object[] { @"multiple words", RegexOptions.IgnoreCase, "MULTIPLE WORDS, YEAH", "Pass. Group[0]=(0,14)" }; + yield return new object[] { @"(.*)c(.*)", RegexOptions.IgnoreCase, "ABCDE", "Pass. Group[0]=(0,5) Group[1]=(0,2) Group[2]=(3,2)" }; + yield return new object[] { @"\((.*), (.*)\)", RegexOptions.IgnoreCase, "(A, B)", "Pass. Group[0]=(0,6) Group[1]=(1,1) Group[2]=(4,1)" }; + yield return new object[] { @"[k]", RegexOptions.IgnoreCase, "AB", "Fail." }; + yield return new object[] { @"abcd", RegexOptions.IgnoreCase, "ABCD", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"a(bc)d", RegexOptions.IgnoreCase, "ABCD", "Pass. Group[0]=(0,4) Group[1]=(1,2)" }; + yield return new object[] { @"a[-]?c", RegexOptions.IgnoreCase, "AC", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"(abc)\1", RegexOptions.IgnoreCase, "ABCABC", "Pass. Group[0]=(0,6) Group[1]=(0,3)" }; + yield return new object[] { @"([a-c]*)\1", RegexOptions.IgnoreCase, "ABCABC", "Pass. Group[0]=(0,6) Group[1]=(0,3)" }; + yield return new object[] { @"a(?!b).", RegexOptions.None, "abad", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"a(?=d).", RegexOptions.None, "abad", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"a(?=c|d).", RegexOptions.None, "abad", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"a(?:b|c|d)(.)", RegexOptions.None, "ace", "Pass. Group[0]=(0,3) Group[1]=(2,1)" }; + yield return new object[] { @"a(?:b|c|d)*(.)", RegexOptions.None, "ace", "Pass. Group[0]=(0,3) Group[1]=(2,1)" }; + yield return new object[] { @"a(?:b|c|d)+?(.)", RegexOptions.None, "ace", "Pass. Group[0]=(0,3) Group[1]=(2,1)" }; + yield return new object[] { @"a(?:b|c|d)+?(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,3) Group[1]=(2,1)" }; + yield return new object[] { @"a(?:b|c|d)+(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,8) Group[1]=(7,1)" }; + yield return new object[] { @"a(?:b|c|d){2}(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,4) Group[1]=(3,1)" }; + yield return new object[] { @"a(?:b|c|d){4,5}(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,7) Group[1]=(6,1)" }; + yield return new object[] { @"a(?:b|c|d){4,5}?(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,6) Group[1]=(5,1)" }; + yield return new object[] { @"((foo)|(bar))*", RegexOptions.None, "foobar", "Pass. Group[0]=(0,6) Group[1]=(0,3)(3,3) Group[2]=(0,3) Group[3]=(3,3)" }; + yield return new object[] { @":(?:", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"a(?:b|c|d){6,7}(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,8) Group[1]=(7,1)" }; + yield return new object[] { @"a(?:b|c|d){6,7}?(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,8) Group[1]=(7,1)" }; + yield return new object[] { @"a(?:b|c|d){5,6}(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,8) Group[1]=(7,1)" }; + yield return new object[] { @"a(?:b|c|d){5,6}?(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,7) Group[1]=(6,1)" }; + yield return new object[] { @"a(?:b|c|d){5,7}(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,8) Group[1]=(7,1)" }; + yield return new object[] { @"a(?:b|c|d){5,7}?(.)", RegexOptions.None, "acdbcdbe", "Pass. Group[0]=(0,7) Group[1]=(6,1)" }; + yield return new object[] { @"a(?:b|(c|e){1,2}?|d)+?(.)", RegexOptions.None, "ace", "Pass. Group[0]=(0,3) Group[1]=(1,1) Group[2]=(2,1)" }; + yield return new object[] { @"^(.+)?B", RegexOptions.None, "AB", "Pass. Group[0]=(0,2) Group[1]=(0,1)" }; + yield return new object[] { @"^([^a-z])|(\^)$", RegexOptions.None, ".", "Pass. Group[0]=(0,1) Group[1]=(0,1) Group[2]=" }; + yield return new object[] { @"^[<>]&", RegexOptions.None, "<&OUT", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"^(a\1?){4}$", RegexOptions.None, "aaaaaaaaaa", "Pass. Group[0]=(0,10) Group[1]=(0,1)(1,2)(3,3)(6,4)" }; + yield return new object[] { @"^(a\1?){4}$", RegexOptions.None, "aaaaaaaaa", "Fail." }; + yield return new object[] { @"^(a\1?){4}$", RegexOptions.None, "aaaaaaaaaaa", "Fail." }; + yield return new object[] { @"^(a(?(1)\1)){4}$", RegexOptions.None, "aaaaaaaaaa", "Pass. Group[0]=(0,10) Group[1]=(0,1)(1,2)(3,3)(6,4)" }; + yield return new object[] { @"^(a(?(1)\1)){4}$", RegexOptions.None, "aaaaaaaaa", "Fail." }; + yield return new object[] { @"^(a(?(1)\1)){4}$", RegexOptions.None, "aaaaaaaaaaa", "Fail." }; + yield return new object[] { @"((a{4})+)", RegexOptions.None, "aaaaaaaaa", "Pass. Group[0]=(0,8) Group[1]=(0,8) Group[2]=(0,4)(4,4)" }; + yield return new object[] { @"(((aa){2})+)", RegexOptions.None, "aaaaaaaaaa", "Pass. Group[0]=(0,8) Group[1]=(0,8) Group[2]=(0,4)(4,4) Group[3]=(0,2)(2,2)(4,2)(6,2)" }; + yield return new object[] { @"(((a{2}){2})+)", RegexOptions.None, "aaaaaaaaaa", "Pass. Group[0]=(0,8) Group[1]=(0,8) Group[2]=(0,4)(4,4) Group[3]=(0,2)(2,2)(4,2)(6,2)" }; + yield return new object[] { @"(?:(f)(o)(o)|(b)(a)(r))*", RegexOptions.None, "foobar", "Pass. Group[0]=(0,6) Group[1]=(0,1) Group[2]=(1,1) Group[3]=(2,1) Group[4]=(3,1) Group[5]=(4,1) Group[6]=(5,1)" }; + yield return new object[] { @"(?<=a)b", RegexOptions.None, "ab", "Pass. Group[0]=(1,1)" }; + yield return new object[] { @"(?<=a)b", RegexOptions.None, "cb", "Fail." }; + yield return new object[] { @"(?<=a)b", RegexOptions.None, "b", "Fail." }; + yield return new object[] { @"(?a+)ab", RegexOptions.None, "aaab", "Fail." }; + yield return new object[] { @"(?>a+)b", RegexOptions.None, "aaab", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"([[:]+)", RegexOptions.None, "a:[b]:", "Pass. Group[0]=(1,2) Group[1]=(1,2)" }; + yield return new object[] { @"([[=]+)", RegexOptions.None, "a=[b]=", "Pass. Group[0]=(1,2) Group[1]=(1,2)" }; + yield return new object[] { @"([[.]+)", RegexOptions.None, "a.[b].", "Pass. Group[0]=(1,2) Group[1]=(1,2)" }; + yield return new object[] { @"[a[:]b[:c]", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"[a[:]b[:c]", RegexOptions.None, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"((?>a+)b)", RegexOptions.None, "aaab", "Pass. Group[0]=(0,4) Group[1]=(0,4)" }; + yield return new object[] { @"(?>(a+))b", RegexOptions.None, "aaab", "Pass. Group[0]=(0,4) Group[1]=(0,3)" }; + yield return new object[] { @"((?>[^()]+)|\([^()]*\))+", RegexOptions.None, "((abc(ade)ufh()()x", "Pass. Group[0]=(2,16) Group[1]=(2,3)(5,5)(10,3)(13,2)(15,2)(17,1)" }; + yield return new object[] { @"(?<=x+)", RegexOptions.None, "xxxxy", "Pass. Group[0]=(1,0)" }; + yield return new object[] { @"a{37,17}", RegexOptions.None, "-", "Error." }; + yield return new object[] { @"\Z", RegexOptions.None, "a\nb\n", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"\z", RegexOptions.None, "a\nb\n", "Pass. Group[0]=(4,0)" }; + yield return new object[] { @"$", RegexOptions.None, "a\nb\n", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"\Z", RegexOptions.None, "b\na\n", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"\z", RegexOptions.None, "b\na\n", "Pass. Group[0]=(4,0)" }; + yield return new object[] { @"$", RegexOptions.None, "b\na\n", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"\Z", RegexOptions.None, "b\na", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"\z", RegexOptions.None, "b\na", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"$", RegexOptions.None, "b\na", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"\Z", RegexOptions.Multiline, "a\nb\n", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"\z", RegexOptions.Multiline, "a\nb\n", "Pass. Group[0]=(4,0)" }; + yield return new object[] { @"$", RegexOptions.Multiline, "a\nb\n", "Pass. Group[0]=(1,0)" }; + yield return new object[] { @"\Z", RegexOptions.Multiline, "b\na\n", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"\z", RegexOptions.Multiline, "b\na\n", "Pass. Group[0]=(4,0)" }; + yield return new object[] { @"$", RegexOptions.Multiline, "b\na\n", "Pass. Group[0]=(1,0)" }; + yield return new object[] { @"\Z", RegexOptions.Multiline, "b\na", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"\z", RegexOptions.Multiline, "b\na", "Pass. Group[0]=(3,0)" }; + yield return new object[] { @"$", RegexOptions.Multiline, "b\na", "Pass. Group[0]=(1,0)" }; + yield return new object[] { @"a\Z", RegexOptions.None, "a\nb\n", "Fail." }; + yield return new object[] { @"a\z", RegexOptions.None, "a\nb\n", "Fail." }; + yield return new object[] { @"a$", RegexOptions.None, "a\nb\n", "Fail." }; + yield return new object[] { @"a\Z", RegexOptions.None, "b\na\n", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"a\z", RegexOptions.None, "b\na\n", "Fail." }; + yield return new object[] { @"a$", RegexOptions.None, "b\na\n", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"a\Z", RegexOptions.None, "b\na", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"a\z", RegexOptions.None, "b\na", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"a$", RegexOptions.None, "b\na", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"a\z", RegexOptions.Multiline, "a\nb\n", "Fail." }; + yield return new object[] { @"a$", RegexOptions.Multiline, "a\nb\n", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"a\Z", RegexOptions.Multiline, "b\na\n", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"a\z", RegexOptions.Multiline, "b\na\n", "Fail." }; + yield return new object[] { @"a$", RegexOptions.Multiline, "b\na\n", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"a\Z", RegexOptions.Multiline, "b\na", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"a\z", RegexOptions.Multiline, "b\na", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"a$", RegexOptions.Multiline, "b\na", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"aa\Z", RegexOptions.None, "aa\nb\n", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.None, "aa\nb\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.None, "aa\nb\n", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.None, "b\naa\n", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa\z", RegexOptions.None, "b\naa\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.None, "b\naa\n", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa\Z", RegexOptions.None, "b\naa", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa\z", RegexOptions.None, "b\naa", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa$", RegexOptions.None, "b\naa", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa\z", RegexOptions.Multiline, "aa\nb\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.Multiline, "aa\nb\n", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"aa\Z", RegexOptions.Multiline, "b\naa\n", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa\z", RegexOptions.Multiline, "b\naa\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.Multiline, "b\naa\n", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa\Z", RegexOptions.Multiline, "b\naa", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa\z", RegexOptions.Multiline, "b\naa", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa$", RegexOptions.Multiline, "b\naa", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"aa\Z", RegexOptions.None, "ac\nb\n", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.None, "ac\nb\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.None, "ac\nb\n", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.None, "b\nac\n", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.None, "b\nac\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.None, "b\nac\n", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.None, "b\nac", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.None, "b\nac", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.None, "b\nac", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.Multiline, "ac\nb\n", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.Multiline, "ac\nb\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.Multiline, "ac\nb\n", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.Multiline, "b\nac\n", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.Multiline, "b\nac\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.Multiline, "b\nac\n", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.Multiline, "b\nac", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.Multiline, "b\nac", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.Multiline, "b\nac", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.None, "ca\nb\n", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.None, "ca\nb\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.None, "ca\nb\n", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.None, "b\nca\n", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.None, "b\nca\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.None, "b\nca\n", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.None, "b\nca", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.None, "b\nca", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.None, "b\nca", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.Multiline, "ca\nb\n", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.Multiline, "ca\nb\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.Multiline, "ca\nb\n", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.Multiline, "b\nca\n", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.Multiline, "b\nca\n", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.Multiline, "b\nca\n", "Fail." }; + yield return new object[] { @"aa\Z", RegexOptions.Multiline, "b\nca", "Fail." }; + yield return new object[] { @"aa\z", RegexOptions.Multiline, "b\nca", "Fail." }; + yield return new object[] { @"aa$", RegexOptions.Multiline, "b\nca", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.None, "ab\nb\n", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.None, "ab\nb\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.None, "ab\nb\n", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.None, "b\nab\n", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab\z", RegexOptions.None, "b\nab\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.None, "b\nab\n", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab\Z", RegexOptions.None, "b\nab", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab\z", RegexOptions.None, "b\nab", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab$", RegexOptions.None, "b\nab", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab\z", RegexOptions.Multiline, "ab\nb\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.Multiline, "ab\nb\n", "Pass. Group[0]=(0,2)" }; + yield return new object[] { @"ab\Z", RegexOptions.Multiline, "b\nab\n", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab\z", RegexOptions.Multiline, "b\nab\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.Multiline, "b\nab\n", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab\Z", RegexOptions.Multiline, "b\nab", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab\z", RegexOptions.Multiline, "b\nab", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab$", RegexOptions.Multiline, "b\nab", "Pass. Group[0]=(2,2)" }; + yield return new object[] { @"ab\Z", RegexOptions.None, "ac\nb\n", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.None, "ac\nb\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.None, "ac\nb\n", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.None, "b\nac\n", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.None, "b\nac\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.None, "b\nac\n", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.None, "b\nac", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.None, "b\nac", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.None, "b\nac", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.Multiline, "ac\nb\n", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.Multiline, "ac\nb\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.Multiline, "ac\nb\n", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.Multiline, "b\nac\n", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.Multiline, "b\nac\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.Multiline, "b\nac\n", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.Multiline, "b\nac", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.Multiline, "b\nac", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.Multiline, "b\nac", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.None, "ca\nb\n", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.None, "ca\nb\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.None, "ca\nb\n", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.None, "b\nca\n", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.None, "b\nca\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.None, "b\nca\n", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.None, "b\nca", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.None, "b\nca", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.None, "b\nca", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.Multiline, "ca\nb\n", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.Multiline, "ca\nb\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.Multiline, "ca\nb\n", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.Multiline, "b\nca\n", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.Multiline, "b\nca\n", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.Multiline, "b\nca\n", "Fail." }; + yield return new object[] { @"ab\Z", RegexOptions.Multiline, "b\nca", "Fail." }; + yield return new object[] { @"ab\z", RegexOptions.Multiline, "b\nca", "Fail." }; + yield return new object[] { @"ab$", RegexOptions.Multiline, "b\nca", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.None, "abb\nb\n", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.None, "abb\nb\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.None, "abb\nb\n", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.None, "b\nabb\n", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb\z", RegexOptions.None, "b\nabb\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.None, "b\nabb\n", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb\Z", RegexOptions.None, "b\nabb", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb\z", RegexOptions.None, "b\nabb", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb$", RegexOptions.None, "b\nabb", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb\z", RegexOptions.Multiline, "abb\nb\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.Multiline, "abb\nb\n", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"abb\Z", RegexOptions.Multiline, "b\nabb\n", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb\z", RegexOptions.Multiline, "b\nabb\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.Multiline, "b\nabb\n", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb\Z", RegexOptions.Multiline, "b\nabb", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb\z", RegexOptions.Multiline, "b\nabb", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb$", RegexOptions.Multiline, "b\nabb", "Pass. Group[0]=(2,3)" }; + yield return new object[] { @"abb\Z", RegexOptions.None, "ac\nb\n", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.None, "ac\nb\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.None, "ac\nb\n", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.None, "b\nac\n", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.None, "b\nac\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.None, "b\nac\n", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.None, "b\nac", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.None, "b\nac", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.None, "b\nac", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.Multiline, "ac\nb\n", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.Multiline, "ac\nb\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.Multiline, "ac\nb\n", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.Multiline, "b\nac\n", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.Multiline, "b\nac\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.Multiline, "b\nac\n", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.Multiline, "b\nac", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.Multiline, "b\nac", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.Multiline, "b\nac", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.None, "ca\nb\n", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.None, "ca\nb\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.None, "ca\nb\n", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.None, "b\nca\n", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.None, "b\nca\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.None, "b\nca\n", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.None, "b\nca", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.None, "b\nca", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.None, "b\nca", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.Multiline, "ca\nb\n", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.Multiline, "ca\nb\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.Multiline, "ca\nb\n", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.Multiline, "b\nca\n", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.Multiline, "b\nca\n", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.Multiline, "b\nca\n", "Fail." }; + yield return new object[] { @"abb\Z", RegexOptions.Multiline, "b\nca", "Fail." }; + yield return new object[] { @"abb\z", RegexOptions.Multiline, "b\nca", "Fail." }; + yield return new object[] { @"abb$", RegexOptions.Multiline, "b\nca", "Fail." }; + yield return new object[] { @"(^|x)(c)", RegexOptions.None, "ca", "Pass. Group[0]=(0,1) Group[1]=(0,0) Group[2]=(0,1)" }; + yield return new object[] { @"a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz", RegexOptions.None, "x", "Fail." }; + yield return new object[] { @"round\(((?>[^()]+))\)", RegexOptions.None, "_I(round(xs * sz),1)", "Pass. Group[0]=(3,14) Group[1]=(9,7)" }; + yield return new object[] { @"foo.bart", RegexOptions.None, "foo.bart", "Pass. Group[0]=(0,8)" }; + yield return new object[] { @"^d[x][x][x]", RegexOptions.Multiline, "abcd\ndxxx", "Pass. Group[0]=(5,4)" }; + yield return new object[] { @".X(.+)+X", RegexOptions.None, "bbbbXcXaaaaaaaa", "Pass. Group[0]=(3,4) Group[1]=(5,1)" }; + yield return new object[] { @".X(.+)+XX", RegexOptions.None, "bbbbXcXXaaaaaaaa", "Pass. Group[0]=(3,5) Group[1]=(5,1)" }; + yield return new object[] { @".XX(.+)+X", RegexOptions.None, "bbbbXXcXaaaaaaaa", "Pass. Group[0]=(3,5) Group[1]=(6,1)" }; + yield return new object[] { @".X(.+)+X", RegexOptions.None, "bbbbXXaaaaaaaaa", "Fail." }; + yield return new object[] { @".X(.+)+XX", RegexOptions.None, "bbbbXXXaaaaaaaaa", "Fail." }; + yield return new object[] { @".XX(.+)+X", RegexOptions.None, "bbbbXXXaaaaaaaaa", "Fail." }; + yield return new object[] { @".X(.+)+[X]", RegexOptions.None, "bbbbXcXaaaaaaaa", "Pass. Group[0]=(3,4) Group[1]=(5,1)" }; + yield return new object[] { @".X(.+)+[X][X]", RegexOptions.None, "bbbbXcXXaaaaaaaa", "Pass. Group[0]=(3,5) Group[1]=(5,1)" }; + yield return new object[] { @".XX(.+)+[X]", RegexOptions.None, "bbbbXXcXaaaaaaaa", "Pass. Group[0]=(3,5) Group[1]=(6,1)" }; + yield return new object[] { @".X(.+)+[X]", RegexOptions.None, "bbbbXXaaaaaaaaa", "Fail." }; + yield return new object[] { @".X(.+)+[X][X]", RegexOptions.None, "bbbbXXXaaaaaaaaa", "Fail." }; + yield return new object[] { @".XX(.+)+[X]", RegexOptions.None, "bbbbXXXaaaaaaaaa", "Fail." }; + yield return new object[] { @".[X](.+)+[X]", RegexOptions.None, "bbbbXcXaaaaaaaa", "Pass. Group[0]=(3,4) Group[1]=(5,1)" }; + yield return new object[] { @".[X](.+)+[X][X]", RegexOptions.None, "bbbbXcXXaaaaaaaa", "Pass. Group[0]=(3,5) Group[1]=(5,1)" }; + yield return new object[] { @".[X][X](.+)+[X]", RegexOptions.None, "bbbbXXcXaaaaaaaa", "Pass. Group[0]=(3,5) Group[1]=(6,1)" }; + yield return new object[] { @".[X](.+)+[X]", RegexOptions.None, "bbbbXXaaaaaaaaa", "Fail." }; + yield return new object[] { @".[X](.+)+[X][X]", RegexOptions.None, "bbbbXXXaaaaaaaaa", "Fail." }; + yield return new object[] { @".[X][X](.+)+[X]", RegexOptions.None, "bbbbXXXaaaaaaaaa", "Fail." }; + yield return new object[] { @"tt+$", RegexOptions.None, "xxxtt", "Pass. Group[0]=(3,2)" }; + yield return new object[] { @"([\d-z]+)", RegexOptions.None, "a0-za", "Pass. Group[0]=(1,3) Group[1]=(1,3)" }; + yield return new object[] { @"([\d-\s]+)", RegexOptions.None, "a0- z", "Pass. Group[0]=(1,3) Group[1]=(1,3)" }; + yield return new object[] { @"\GX.*X", RegexOptions.None, "aaaXbX", "Fail." }; + yield return new object[] { @"(\d+\.\d+)", RegexOptions.None, "3.1415926", "Pass. Group[0]=(0,9) Group[1]=(0,9)" }; + yield return new object[] { @"(\ba.{0,10}br)", RegexOptions.None, "have a web browser", "Pass. Group[0]=(5,8) Group[1]=(5,8)" }; + yield return new object[] { @"\.c(pp|xx|c)?$", RegexOptions.IgnoreCase, "Changes", "Fail." }; + yield return new object[] { @"\.c(pp|xx|c)?$", RegexOptions.IgnoreCase, "IO.c", "Pass. Group[0]=(2,2) Group[1]=" }; + yield return new object[] { @"(\.c(pp|xx|c)?$)", RegexOptions.IgnoreCase, "IO.c", "Pass. Group[0]=(2,2) Group[1]=(2,2) Group[2]=" }; + yield return new object[] { @"^([a-z]:)", RegexOptions.None, "C:/", "Fail." }; + yield return new object[] { @"^\S\s+aa$", RegexOptions.Multiline, "\nx aa", "Pass. Group[0]=(1,4)" }; + yield return new object[] { @"(^|a)b", RegexOptions.None, "ab", "Pass. Group[0]=(0,2) Group[1]=(0,1)" }; + yield return new object[] { @"^([ab]*?)(b)?(c)$", RegexOptions.None, "abac", "Pass. Group[0]=(0,4) Group[1]=(0,3) Group[2]= Group[3]=(3,1)" }; + yield return new object[] { @"(\w)?(abc)\1b", RegexOptions.None, "abcab", "Fail." }; + yield return new object[] { @"^(?:.,){2}c", RegexOptions.None, "a,b,c", "Pass. Group[0]=(0,5)" }; + yield return new object[] { @"^(.,){2}c", RegexOptions.None, "a,b,c", "Pass. Group[0]=(0,5) Group[1]=(0,2)(2,2)" }; + yield return new object[] { @"^(?:[^,]*,){2}c", RegexOptions.None, "a,b,c", "Pass. Group[0]=(0,5)" }; + yield return new object[] { @"^([^,]*,){2}c", RegexOptions.None, "a,b,c", "Pass. Group[0]=(0,5) Group[1]=(0,2)(2,2)" }; + yield return new object[] { @"^([^,]*,){3}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]*,){3,}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]*,){0,3}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]{1,3},){3}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]{1,3},){3,}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]{1,3},){0,3}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]{1,},){3}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]{1,},){3,}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]{1,},){0,3}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]{0,3},){3}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]{0,3},){3,}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"^([^,]{0,3},){0,3}d", RegexOptions.None, "aaa,b,c,d", "Pass. Group[0]=(0,9) Group[1]=(0,4)(4,2)(6,2)" }; + yield return new object[] { @"(?i)", RegexOptions.None, "", "Pass. Group[0]=(0,0)" }; + yield return new object[] { @"(?!\A)x", RegexOptions.Multiline, "a\nxb\n", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"^(a(b)?)+$", RegexOptions.None, "aba", "Pass. Group[0]=(0,3) Group[1]=(0,2)(2,1) Group[2]=(1,1)" }; + yield return new object[] { @"^(aa(bb)?)+$", RegexOptions.None, "aabbaa", "Pass. Group[0]=(0,6) Group[1]=(0,4)(4,2) Group[2]=(2,2)" }; + yield return new object[] { @"^.{9}abc.*\n", RegexOptions.Multiline, "123\nabcabcabcabc\n", "Pass. Group[0]=(4,13)" }; + yield return new object[] { @"^(a)?a$", RegexOptions.None, "a", "Pass. Group[0]=(0,1) Group[1]=" }; + yield return new object[] { @"^(a)?(?(1)a|b)+$", RegexOptions.None, "a", "Fail." }; + yield return new object[] { @"^(a\1?)(a\1?)(a\2?)(a\3?)$", RegexOptions.None, "aaaaaa", "Pass. Group[0]=(0,6) Group[1]=(0,1) Group[2]=(1,2) Group[3]=(3,1) Group[4]=(4,2)" }; + yield return new object[] { @"^(a\1?){4}$", RegexOptions.None, "aaaaaa", "Pass. Group[0]=(0,6) Group[1]=(0,1)(1,2)(3,1)(4,2)" }; + yield return new object[] { @"^(0+)?(?:x(1))?", RegexOptions.None, "x1", "Pass. Group[0]=(0,2) Group[1]= Group[2]=(1,1)" }; + yield return new object[] { @"^([0-9a-fA-F]+)(?:x([0-9a-fA-F]+)?)(?:x([0-9a-fA-F]+))?", RegexOptions.None, "012cxx0190", "Pass. Group[0]=(0,10) Group[1]=(0,4) Group[2]= Group[3]=(6,4)" }; + yield return new object[] { @"^(b+?|a){1,2}c", RegexOptions.None, "bbbac", "Pass. Group[0]=(0,5) Group[1]=(0,3)(3,1)" }; + yield return new object[] { @"^(b+?|a){1,2}c", RegexOptions.None, "bbbbac", "Pass. Group[0]=(0,6) Group[1]=(0,4)(4,1)" }; + yield return new object[] { @"\((\w\. \w+)\)", RegexOptions.None, "cd. (A. Tw)", "Pass. Group[0]=(4,7) Group[1]=(5,5)" }; + yield return new object[] { @"((?:aaaa|bbbb)cccc)?", RegexOptions.None, "aaaacccc", "Pass. Group[0]=(0,8) Group[1]=(0,8)" }; + yield return new object[] { @"((?:aaaa|bbbb)cccc)?", RegexOptions.None, "bbbbcccc", "Pass. Group[0]=(0,8) Group[1]=(0,8)" }; + yield return new object[] { @"^(foo)|(bar)$", RegexOptions.None, "foobar", "Pass. Group[0]=(0,3) Group[1]=(0,3) Group[2]=" }; + yield return new object[] { @"^(foo)|(bar)$", RegexOptions.RightToLeft, "foobar", "Pass. Group[0]=(3,3) Group[1]= Group[2]=(3,3)" }; + yield return new object[] { @"b", RegexOptions.RightToLeft, "babaaa", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"bab", RegexOptions.RightToLeft, "babababaa", "Pass. Group[0]=(4,3)" }; + yield return new object[] { @"abb", RegexOptions.RightToLeft , "abb", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"b$", RegexOptions.RightToLeft | RegexOptions.Multiline, "aab\naab", "Pass. Group[0]=(6,1)" }; + yield return new object[] { @"^a", RegexOptions.RightToLeft | RegexOptions.Multiline, "aab\naab", "Pass. Group[0]=(4,1)" }; + yield return new object[] { @"^aaab", RegexOptions.RightToLeft | RegexOptions.Multiline, "aaab\naab", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"abb{2}", RegexOptions.RightToLeft , "abbb", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"abb{1,2}", RegexOptions.RightToLeft , "abbb", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"abb{1,2}", RegexOptions.RightToLeft , "abbbbbaaaa", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"\Ab", RegexOptions.RightToLeft, "bab\naaa", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"\Abab$", RegexOptions.RightToLeft, "bab", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"b\Z", RegexOptions.RightToLeft, "bab\naaa", "Fail." }; + yield return new object[] { @"b\Z", RegexOptions.RightToLeft, "babaaab", "Pass. Group[0]=(6,1)" }; + yield return new object[] { @"b\z", RegexOptions.RightToLeft, "babaaa", "Fail." }; + yield return new object[] { @"b\z", RegexOptions.RightToLeft, "babaaab", "Pass. Group[0]=(6,1)" }; + yield return new object[] { @"a\G", RegexOptions.RightToLeft, "babaaa", "Pass. Group[0]=(5,1)" }; + yield return new object[] { @"\Abaaa\G", RegexOptions.RightToLeft, "baaa", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"\bc", RegexOptions.RightToLeft, "aaa c aaa c a", "Pass. Group[0]=(10,1)" }; + yield return new object[] { @"\bc", RegexOptions.RightToLeft, "c aaa c", "Pass. Group[0]=(6,1)" }; + yield return new object[] { @"\bc", RegexOptions.RightToLeft, "aaa ac", "Fail." }; + yield return new object[] { @"\bc", RegexOptions.RightToLeft, "c aaa", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"\bc", RegexOptions.RightToLeft, "aaacaaa", "Fail." }; + yield return new object[] { @"\bc", RegexOptions.RightToLeft, "aaac aaa", "Fail." }; + yield return new object[] { @"\bc", RegexOptions.RightToLeft, "aaa ca caaa", "Pass. Group[0]=(7,1)" }; + yield return new object[] { @"\Bc", RegexOptions.RightToLeft, "ac aaa ac", "Pass. Group[0]=(8,1)" }; + yield return new object[] { @"\Bc", RegexOptions.RightToLeft, "aaa c", "Fail." }; + yield return new object[] { @"\Bc", RegexOptions.RightToLeft, "ca aaa", "Fail." }; + yield return new object[] { @"\Bc", RegexOptions.RightToLeft, "aaa c aaa", "Fail." }; + yield return new object[] { @"\Bc", RegexOptions.RightToLeft, " acaca ", "Pass. Group[0]=(4,1)" }; + yield return new object[] { @"\Bc", RegexOptions.RightToLeft, "aaac aaac", "Pass. Group[0]=(8,1)" }; + yield return new object[] { @"\Bc", RegexOptions.RightToLeft, "aaa caaa", "Fail." }; + yield return new object[] { @"b(a?)b", RegexOptions.RightToLeft, "aabababbaaababa", "Pass. Group[0]=(11,3) Group[1]=(12,1)" }; + yield return new object[] { @"b{4}", RegexOptions.RightToLeft, "abbbbaabbbbaabbb", "Pass. Group[0]=(7,4)" }; + yield return new object[] { @"b\1aa(.)", RegexOptions.RightToLeft, "bBaaB", "Pass. Group[0]=(0,5) Group[1]=(4,1)" }; + yield return new object[] { @"b(.)aa\1", RegexOptions.RightToLeft, "bBaaB", "Fail." }; + yield return new object[] { @"^(a\1?){4}$", RegexOptions.RightToLeft, "aaaaaa", "Pass. Group[0]=(0,6) Group[1]=(5,1)(3,2)(2,1)(0,2)" }; + yield return new object[] { @"^([0-9a-fA-F]+)(?:x([0-9a-fA-F]+)?)(?:x([0-9a-fA-F]+))?", RegexOptions.RightToLeft, "012cxx0190", "Pass. Group[0]=(0,10) Group[1]=(0,4) Group[2]= Group[3]=(6,4)" }; + yield return new object[] { @"^(b+?|a){1,2}c", RegexOptions.RightToLeft, "bbbac", "Pass. Group[0]=(0,5) Group[1]=(3,1)(0,3)" }; + yield return new object[] { @"\((\w\. \w+)\)", RegexOptions.RightToLeft, "cd. (A. Tw)", "Pass. Group[0]=(4,7) Group[1]=(5,5)" }; + yield return new object[] { @"((?:aaaa|bbbb)cccc)?", RegexOptions.RightToLeft, "aaaacccc", "Pass. Group[0]=(0,8) Group[1]=(0,8)" }; + yield return new object[] { @"((?:aaaa|bbbb)cccc)?", RegexOptions.RightToLeft, "bbbbcccc", "Pass. Group[0]=(0,8) Group[1]=(0,8)" }; + yield return new object[] { @"(?<=a)b", RegexOptions.RightToLeft, "ab", "Pass. Group[0]=(1,1)" }; + yield return new object[] { @"(?<=a)b", RegexOptions.RightToLeft, "cb", "Fail." }; + yield return new object[] { @"(?<=a)b", RegexOptions.RightToLeft, "b", "Fail." }; + yield return new object[] { @"(?[^()]+|\((?)|\)(?<-depth>))*(?(depth)(?!))\)", RegexOptions.None, "((a(b))c)", "Pass. Group[0]=(0,9) Group[1]=" }; + yield return new object[] { @"^\((?>[^()]+|\((?)|\)(?<-depth>))*(?(depth)(?!))\)$", RegexOptions.None, "((a(b))c)", "Pass. Group[0]=(0,9) Group[1]=" }; + yield return new object[] { @"^\((?>[^()]+|\((?)|\)(?<-depth>))*(?(depth)(?!))\)$", RegexOptions.None, "((a(b))c", "Fail." }; + yield return new object[] { @"^\((?>[^()]+|\((?)|\)(?<-depth>))*(?(depth)(?!))\)$", RegexOptions.None, "())", "Fail." }; + yield return new object[] { @"(((?\()[^()]*)+((?\))[^()]*)+)+(?(foo)(?!))", RegexOptions.None, "((a(b))c)", "Pass. Group[0]=(0,9) Group[1]=(0,9) Group[2]=(0,1)(1,2)(3,2) Group[3]=(5,1)(6,2)(8,1) Group[4]= Group[5]=(4,1)(2,4)(1,7)" }; + yield return new object[] { @"^(((?\()[^()]*)+((?\))[^()]*)+)+(?(foo)(?!))$", RegexOptions.None, "((a(b))c)", "Pass. Group[0]=(0,9) Group[1]=(0,9) Group[2]=(0,1)(1,2)(3,2) Group[3]=(5,1)(6,2)(8,1) Group[4]= Group[5]=(4,1)(2,4)(1,7)" }; + yield return new object[] { @"(((?\()[^()]*)+((?\))[^()]*)+)+(?(foo)(?!))", RegexOptions.None, "x(a((b)))b)x", "Pass. Group[0]=(1,9) Group[1]=(1,9) Group[2]=(1,2)(3,1)(4,2) Group[3]=(6,1)(7,1)(8,2) Group[4]= Group[5]=(5,1)(4,3)(2,6)" }; + yield return new object[] { @"(((?\()[^()]*)+((?\))[^()]*)+)+(?(foo)(?!))", RegexOptions.None, "x((a((b)))x", "Pass. Group[0]=(2,9) Group[1]=(2,9) Group[2]=(2,2)(4,1)(5,2) Group[3]=(7,1)(8,1)(9,2) Group[4]= Group[5]=(6,1)(5,3)(3,6)" }; + yield return new object[] { @"^(((?\()[^()]*)+((?\))[^()]*)+)+(?(foo)(?!))$", RegexOptions.None, "((a(b))c","Fail." }; + yield return new object[] { @"^(((?\()[^()]*)+((?\))[^()]*)+)+(?(foo)(?!))$", RegexOptions.None, "((a(b))c))","Fail." }; + yield return new object[] { @"^(((?\()[^()]*)+((?\))[^()]*)+)+(?(foo)(?!))$", RegexOptions.None, ")(","Fail." }; + yield return new object[] { @"^(((?\()[^()]*)+((?\))[^()]*)+)+(?(foo)(?!))$", RegexOptions.None, "((a((b))c)","Fail." }; + yield return new object[] { @"b", RegexOptions.RightToLeft, "babaaa", "Pass. Group[0]=(2,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "[n]", "Pass. Group[0]=(0,3) Group[1]=(1,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "n", "Pass. Group[0]=(0,1) Group[1]=(0,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "n[i]e", "Fail." }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "[n", "Fail." }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "]n]", "Fail." }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, @"\[n\]", "Fail." }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, @"[n\]", "Pass. Group[0]=(0,4) Group[1]=(1,2)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, @"[n\[]", "Pass. Group[0]=(0,5) Group[1]=(1,3)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, @"[[n]", "Pass. Group[0]=(0,4) Group[1]=(1,2)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "[s] . [n]", "Pass. Group[0]=(0,9) Group[1]=(1,1) Group[2]=(7,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "[s] . n", "Pass. Group[0]=(0,7) Group[1]=(1,1) Group[2]=(6,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "s.[ n ]", "Pass. Group[0]=(0,7) Group[1]=(0,1) Group[2]=(3,3)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, " . n", "Pass. Group[0]=(0,4) Group[1]=(0,1) Group[2]=(3,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "s. ", "Pass. Group[0]=(0,3) Group[1]=(0,1) Group[2]=(2,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "[.]. ", "Pass. Group[0]=(0,5) Group[1]=(1,1) Group[2]=(4,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "[c].[s].[n]", "Pass. Group[0]=(0,11) Group[1]=(1,1) Group[2]=(5,1) Group[3]=(9,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, " c . s . n ", "Pass. Group[0]=(0,11) Group[1]=(0,3) Group[2]=(5,2) Group[3]=(9,2)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, " . [.] . [ ]", "Pass. Group[0]=(0,12) Group[1]=(0,1) Group[2]=(4,1) Group[3]=(10,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "c.n", "Pass. Group[0]=(0,3) Group[1]=(0,1) Group[2]=(2,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "[c] .[n]", "Pass. Group[0]=(0,8) Group[1]=(1,1) Group[2]=(6,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "c.n.", "Fail." }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "s.c.n", "Pass. Group[0]=(0,5) Group[1]=(0,1) Group[2]=(2,1) Group[3]=(4,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "[s].[c].[n]", "Pass. Group[0]=(0,11) Group[1]=(1,1) Group[2]=(5,1) Group[3]=(9,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))\s*\.\s*((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture, "[s].[c].", "Fail." }; + yield return new object[] { @"^((\[(?.+)\])|(?\S+))([ ]+(?ASC|DESC))?$", RegexOptions.IgnoreCase|RegexOptions.ExplicitCapture, "[id]]", "Pass. Group[0]=(0,5) Group[1]=(1,3) Group[2]=" }; + yield return new object[] { @"a{1,2147483647}", RegexOptions.None, "a", "Pass. Group[0]=(0,1)" }; + yield return new object[] { @"^((\[(?[^\]]+)\])|(?[^\.\[\]]+))$", RegexOptions.None, "[a]", "Pass. Group[0]=(0,3) Group[1]=(0,3) Group[2]=(0,3) Group[3]=(1,1)" }; + + //// Ported from https://github.com/mono/mono/blob/0f2995e95e98e082c7c7039e17175cf2c6a00034/mcs/class/System/Test/System.Text.RegularExpressions/RegexMatchTests.cs + yield return new object[] { @"(a)(b)(c)", RegexOptions.ExplicitCapture, "abc", "Pass. Group[0]=(0,3)" }; + yield return new object[] { @"(a)(?<1>b)(c)", RegexOptions.ExplicitCapture, "abc", "Pass. Group[0]=(0,3) Group[1]=(1,1)" }; + yield return new object[] { @"(a)(?<2>b)(c)", RegexOptions.None, "abc", "Pass. Group[0]=(0,3) Group[1]=(0,1) Group[2]=(1,1)(2,1)" }; + yield return new object[] { @"(a)(?b)(c)", RegexOptions.ExplicitCapture, "abc", "Pass. Group[0]=(0,3) Group[1]=(1,1)" }; + yield return new object[] { @"(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11", RegexOptions.None, "F2345678910LL", "Pass. Group[0]=(0,13) Group[1]=(0,1) Group[2]=(1,1) Group[3]=(2,1) Group[4]=(3,1) Group[5]=(4,1) Group[6]=(5,1) Group[7]=(6,1) Group[8]=(7,1) Group[9]=(8,1) Group[10]=(9,2) Group[11]=(11,1)" }; + yield return new object[] { @"(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11", RegexOptions.ExplicitCapture, "F2345678910LL", "Fail." }; + yield return new object[] { @"(F)(2)(3)(4)(5)(6)(?7)(8)(9)(10)(L)\1", RegexOptions.None, "F2345678910L71", "Fail." }; + yield return new object[] { @"(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11", RegexOptions.None, "F2345678910LF1", "Fail." }; + yield return new object[] { @"(F)(2)(3)(4)(5)(6)(?7)(8)(9)(10)(L)\11", RegexOptions.None, "F2345678910L71", "Pass. Group[0]=(0,13) Group[1]=(0,1) Group[2]=(1,1) Group[3]=(2,1) Group[4]=(3,1) Group[5]=(4,1) Group[6]=(5,1) Group[7]=(7,1) Group[8]=(8,1) Group[9]=(9,2) Group[10]=(11,1) Group[11]=(6,1)" }; + yield return new object[] { @"(F)(2)(3)(?4)(5)(6)(?'S'7)(8)(9)(10)(L)\10", RegexOptions.None, "F2345678910L71", "Pass. Group[0]=(0,13) Group[1]=(0,1) Group[2]=(1,1) Group[3]=(2,1) Group[4]=(4,1) Group[5]=(5,1) Group[6]=(7,1) Group[7]=(8,1) Group[8]=(9,2) Group[9]=(11,1) Group[10]=(3,1)(6,1)" }; + yield return new object[] { @"(F)(2)(3)(?4)(5)(6)(?'S'7)(8)(9)(10)(L)\10", RegexOptions.ExplicitCapture, "F2345678910L70", "Fail." }; + yield return new object[] { @"(F)(2)(3)(?4)(5)(6)(?'S'7)(8)(9)(10)(L)\1", RegexOptions.ExplicitCapture, "F2345678910L70", "Pass. Group[0]=(0,13) Group[1]=(3,1)(6,1)" }; + yield return new object[] { @"(?n:(F)(2)(3)(?4)(5)(6)(?'S'7)(8)(9)(10)(L)\1)", RegexOptions.None, "F2345678910L70", "Pass. Group[0]=(0,13) Group[1]=(3,1)(6,1)" }; + yield return new object[] { @"(F)(2)(3)(?4)(5)(6)(?'S'7)(8)(9)(10)(L)(?(10)\10)", RegexOptions.None, "F2345678910L70","Pass. Group[0]=(0,13) Group[1]=(0,1) Group[2]=(1,1) Group[3]=(2,1) Group[4]=(4,1) Group[5]=(5,1) Group[6]=(7,1) Group[7]=(8,1) Group[8]=(9,2) Group[9]=(11,1) Group[10]=(3,1)(6,1)" }; + yield return new object[] { @"(F)(2)(3)(?4)(5)(6)(?'S'7)(8)(9)(10)(L)(?(S)|\10)", RegexOptions.None, "F2345678910L70","Pass. Group[0]=(0,12) Group[1]=(0,1) Group[2]=(1,1) Group[3]=(2,1) Group[4]=(4,1) Group[5]=(5,1) Group[6]=(7,1) Group[7]=(8,1) Group[8]=(9,2) Group[9]=(11,1) Group[10]=(3,1)(6,1)" }; + yield return new object[] { @"(F)(2)(3)(?4)(5)(6)(?'S'7)(8)(9)(10)(L)(?(7)|\10)", RegexOptions.None, "F2345678910L70","Pass. Group[0]=(0,12) Group[1]=(0,1) Group[2]=(1,1) Group[3]=(2,1) Group[4]=(4,1) Group[5]=(5,1) Group[6]=(7,1) Group[7]=(8,1) Group[8]=(9,2) Group[9]=(11,1) Group[10]=(3,1)(6,1)" }; + yield return new object[] { @"(F)(2)(3)(?4)(5)(6)(?'S'7)(8)(9)(10)(L)(?(K)|\10)", RegexOptions.None, "F2345678910L70","Pass. Group[0]=(0,13) Group[1]=(0,1) Group[2]=(1,1) Group[3]=(2,1) Group[4]=(4,1) Group[5]=(5,1) Group[6]=(7,1) Group[7]=(8,1) Group[8]=(9,2) Group[9]=(11,1) Group[10]=(3,1)(6,1)" }; + yield return new object[] { @"\P{IsHebrew}", RegexOptions.None, "\u05D0a", "Pass. Group[0]=(1,1)" }; + yield return new object[] { @"\p{IsHebrew}", RegexOptions.None, "abc\u05D0def", "Pass. Group[0]=(3,1)" }; + yield return new object[] { @"(?<=a+)(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"(?<=a*)(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"(?<=a{1,5})(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"(?<=a{1})(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"(?<=a{1,})(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"(?<=a+?)(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"(?<=a*?)(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(0,4)" }; + yield return new object[] { @"(?<=a{1,5}?)(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"(?<=a{1}?)(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"(?<=a{1}?)(?:a)*bc", RegexOptions.None, "aabc", "Pass. Group[0]=(1,3)" }; + yield return new object[] { @"(?b)(?'1'c)", RegexOptions.ExplicitCapture, "abc", "Pass. Group[0]=(0,3) Group[1]=(1,1)(2,1)" }; + yield return new object[] { @"(?>a*).", RegexOptions.ExplicitCapture, "aaaa", "Fail." }; + yield return new object[] { @"(?ab)c\1", RegexOptions.None, "abcabc", "Pass. Group[0]=(0,5) Group[1]=(0,2)" }; + yield return new object[] { @"\1", RegexOptions.ECMAScript, "-", "Fail." }; + yield return new object[] { @"\2", RegexOptions.ECMAScript, "-", "Fail." }; + yield return new object[] { @"(a)|\2", RegexOptions.ECMAScript, "-", "Fail." }; + yield return new object[] { @"\4400", RegexOptions.None, "asdf 012", "Pass. Group[0]=(4,2)" }; + yield return new object[] { @"\4400", RegexOptions.ECMAScript, "asdf 012", "Fail." }; + yield return new object[] { @"\4400", RegexOptions.None, "asdf$0012", "Fail." }; + yield return new object[] { @"\4400", RegexOptions.ECMAScript, "asdf$0012", "Pass. Group[0]=(4,3)" }; + yield return new object[] { @"(?<2>ab)(?c)(?d)", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(2,1) Group[2]=(0,2) Group[3]=(3,1)" };// 61 + yield return new object[] { @"(?<1>ab)(c)", RegexOptions.None, "abc", "Pass. Group[0]=(0,3) Group[1]=(0,2)(2,1)" }; + yield return new object[] { @"(?<44>a)", RegexOptions.None, "a", "Pass. Group[0]=(0,1) Group[44]=(0,1)" }; + yield return new object[] { @"(?<44>a)(?<8>b)", RegexOptions.None, "ab", "Pass. Group[0]=(0,2) Group[8]=(1,1) Group[44]=(0,1)" }; + yield return new object[] { @"(?<44>a)(?<8>b)(?<1>c)(d)", RegexOptions.None, "abcd", "Pass. Group[0]=(0,4) Group[1]=(2,1)(3,1) Group[8]=(1,1) Group[44]=(0,1)" }; + yield return new object[] { @"(?<44>a)(?<44>b)", RegexOptions.None, "ab", "Pass. Group[0]=(0,2) Group[44]=(0,1)(1,1)" }; + yield return new object[] { @"(?<44>a)\440", RegexOptions.None, "a ", "Pass. Group[0]=(0,2) Group[44]=(0,1)" }; + yield return new object[] { @"(?<44>a)\440", RegexOptions.ECMAScript, "a ", "Fail." }; + yield return new object[] { @"(?<44>a)\440", RegexOptions.None, "aa0", "Fail." }; + yield return new object[] { @"(?<44>a)\440", RegexOptions.ECMAScript, "aa0", "Pass. Group[0]=(0,3) Group[44]=(0,1)" }; + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Cache.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Cache.Tests.cs index db10fe8f512fc..eee4af3a38e3a 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Cache.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Cache.Tests.cs @@ -10,6 +10,7 @@ namespace System.Text.RegularExpressions.Tests { + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] public class RegexCacheTests { [Theory] diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs index fccd8fc6a4298..45986784625a2 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs @@ -51,6 +51,16 @@ public static void Ctor(string pattern, RegexOptions options, TimeSpan matchTime Assert.Equal(matchTimeout, regex3.MatchTimeout); } + [Theory] + [InlineData(RegexOptions.None)] + [InlineData(RegexOptions.Compiled)] + public void CtorDebugInvoke(RegexOptions options) + { + var r = new Regex("[abc]def(ghi|jkl)", options | (RegexOptions)0x80 /*RegexOptions.Debug*/); + Assert.False(r.Match("a").Success); + Assert.True(r.Match("adefghi").Success); + } + [Fact] public static void Ctor_Invalid() { @@ -96,5 +106,18 @@ public static void StaticCtor_InvalidTimeoutRange_ExceptionThrown() Assert.Throws(() => Regex.InfiniteMatchTimeout); }).Dispose(); } + + [Fact] + public void InitializeReferences_OnlyInvokedOnce() + { + var r = new DerivedRegex(); + r.InitializeReferences(); + Assert.Throws(() => r.InitializeReferences()); + } + + private sealed class DerivedRegex : Regex + { + public new void InitializeReferences() => base.InitializeReferences(); + } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs index 5b3fe7254436e..a58ca56562648 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs @@ -3,11 +3,8 @@ // See the LICENSE file in the project root for more information. using System.Collections.Generic; -using System.Diagnostics; using System.Globalization; -using System.Linq; using System.Tests; -using Microsoft.DotNet.RemoteExecutor; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -105,6 +102,15 @@ public static IEnumerable Groups_Basic_TestData() yield return new object[] { null, @"([0-9-[02468]]|[0-9-[13579]])+", "az1234567890za", RegexOptions.None, new string[] { "1234567890", "0" } }; yield return new object[] { null, @"([^0-9-[a-zAE-Z]]|[\w-[a-zAF-Z]])+", "azBCDE1234567890BCDEFza", RegexOptions.None, new string[] { "BCDE1234567890BCDE", "E" } }; yield return new object[] { null, @"([\p{Ll}-[aeiou]]|[^\w-[\s]])+", "aeiobcdxyz!@#aeio", RegexOptions.None, new string[] { "bcdxyz!@#", "#" } }; + yield return new object[] { null, @"(?:hello|hi){1,3}", "hello", RegexOptions.None, new string[] { "hello" } }; + yield return new object[] { null, @"(hello|hi){1,3}", "hellohihey", RegexOptions.None, new string[] { "hellohi", "hi" } }; + yield return new object[] { null, @"(?:hello|hi){1,3}", "hellohihey", RegexOptions.None, new string[] { "hellohi" } }; + yield return new object[] { null, @"(?:hello|hi){2,2}", "hellohihey", RegexOptions.None, new string[] { "hellohi" } }; + yield return new object[] { null, @"(?:hello|hi){2,2}?", "hellohihihello", RegexOptions.None, new string[] { "hellohi" } }; + yield return new object[] { null, @"(?:abc|def|ghi|hij|klm|no){1,4}", "this is a test nonoabcxyz this is only a test", RegexOptions.None, new string[] { "nonoabc" } }; + yield return new object[] { null, @"xyz(abc|def)xyz", "abcxyzdefxyzabc", RegexOptions.None, new string[] { "xyzdefxyz", "def" } }; + yield return new object[] { null, @"abc|(?:def|ghi)", "ghi", RegexOptions.None, new string[] { "ghi" } }; + yield return new object[] { null, @"abc|(def|ghi)", "def", RegexOptions.None, new string[] { "def", "def" } }; // Multiple character classes using character class subtraction yield return new object[] { null, @"98[\d-[9]][\d-[8]][\d-[0]]", "98911 98881 98870 98871", RegexOptions.None, new string[] { "98871" } }; @@ -390,16 +396,44 @@ public static IEnumerable Groups_Basic_TestData() yield return new object[] { null, @"(cat)(\cZ*)(dog)", "asdlkcat\u001adogiwod", RegexOptions.None, new string[] { "cat\u001adog", "cat", "\u001a", "dog" } }; yield return new object[] { null, @"(cat)(\cz*)(dog)", "asdlkcat\u001adogiwod", RegexOptions.None, new string[] { "cat\u001adog", "cat", "\u001a", "dog" } }; - yield return new object[] { null, @"(cat)(\c[*)(dog)", "asdlkcat\u001bdogiwod", RegexOptions.None, new string[] { "cat\u001bdog", "cat", "\u001b", "dog" } }; - yield return new object[] { null, @"(cat)(\c[*)(dog)", "asdlkcat\u001Bdogiwod", RegexOptions.None, new string[] { "cat\u001Bdog", "cat", "\u001B", "dog" } }; + if (!PlatformDetection.IsFullFramework) // missing fix for #26501 + { + yield return new object[] { null, @"(cat)(\c[*)(dog)", "asdlkcat\u001bdogiwod", RegexOptions.None, new string[] { "cat\u001bdog", "cat", "\u001b", "dog" } }; + yield return new object[] { null, @"(cat)(\c[*)(dog)", "asdlkcat\u001Bdogiwod", RegexOptions.None, new string[] { "cat\u001Bdog", "cat", "\u001B", "dog" } }; + } - // Atomic Zero-Width Assertions \A \Z \z \G \b \B + // Atomic Zero-Width Assertions \A \G ^ \Z \z \b \B //\A + yield return new object[] { null, @"\Acat\s+dog", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"\Acat\s+dog", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog" } }; yield return new object[] { null, @"\A(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog", "cat", "dog" } }; yield return new object[] { null, @"\A(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog", "cat", "dog" } }; - yield return new object[] { null, @"\A(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog", "cat", "dog" } }; + + //\G + yield return new object[] { null, @"\Gcat\s+dog", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"\Gcat\s+dog", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"\Gcat\s+dog", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"\G(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog", "cat", "dog" } }; + yield return new object[] { null, @"\G(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog", "cat", "dog" } }; + yield return new object[] { null, @"\G(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog", "cat", "dog" } }; + + //^ + yield return new object[] { null, @"^cat\s+dog", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"^cat\s+dog", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"mouse\s\n^cat\s+dog", "mouse\n\ncat \n\n\n dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat \n\n\n dog" } }; + yield return new object[] { null, @"^cat\s+dog", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"^(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog", "cat", "dog" } }; + yield return new object[] { null, @"^(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog", "cat", "dog" } }; + yield return new object[] { null, @"(mouse)\s\n^(cat)\s+(dog)", "mouse\n\ncat \n\n\n dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat \n\n\n dog", "mouse", "cat", "dog" } }; + yield return new object[] { null, @"^(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog", "cat", "dog" } }; //\Z + yield return new object[] { null, @"cat\s+dog\Z", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"cat\s+dog\Z", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"cat\s+dog\Z", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"cat\s+dog\Z", "cat \n\n\n dog\n", RegexOptions.None, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"cat\s+dog\Z", "cat \n\n\n dog\n", RegexOptions.Multiline, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"cat\s+dog\Z", "cat \n\n\n dog\n", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog" } }; yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog", "cat", "dog" } }; yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog", "cat", "dog" } }; yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog", "cat", "dog" } }; @@ -408,23 +442,43 @@ public static IEnumerable Groups_Basic_TestData() yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat \n\n\n dog\n", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog", "cat", "dog" } }; //\z + yield return new object[] { null, @"cat\s+dog\z", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"cat\s+dog\z", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog" } }; + yield return new object[] { null, @"cat\s+dog\z", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog" } }; yield return new object[] { null, @"(cat)\s+(dog)\z", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog", "cat", "dog" } }; yield return new object[] { null, @"(cat)\s+(dog)\z", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog", "cat", "dog" } }; yield return new object[] { null, @"(cat)\s+(dog)\z", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog", "cat", "dog" } }; //\b + yield return new object[] { null, @"\bcat\b", "cat", RegexOptions.None, new string[] { "cat" } }; + yield return new object[] { null, @"\bcat\b", "dog cat mouse", RegexOptions.None, new string[] { "cat" } }; + yield return new object[] { null, @"\bcat\b", "cat", RegexOptions.ECMAScript, new string[] { "cat" } }; + yield return new object[] { null, @"\bcat\b", "dog cat mouse", RegexOptions.ECMAScript, new string[] { "cat" } }; + yield return new object[] { null, @".*\bcat\b", "cat", RegexOptions.None, new string[] { "cat" } }; + yield return new object[] { null, @".*\bcat\b", "dog cat mouse", RegexOptions.None, new string[] { "dog cat" } }; + yield return new object[] { null, @".*\bcat\b", "cat", RegexOptions.ECMAScript, new string[] { "cat" } }; + yield return new object[] { null, @".*\bcat\b", "dog cat mouse", RegexOptions.ECMAScript, new string[] { "dog cat" } }; yield return new object[] { null, @"\b@cat", "123START123@catEND", RegexOptions.None, new string[] { "@cat" } }; yield return new object[] { null, @"\b\ Groups_Basic_TestData() yield return new object[] { null, @"^([a-z]*)([\w])$", "cat", RegexOptions.IgnoreCase, new string[] { "cat", "ca", "t" } }; // Quantifiers + yield return new object[] { null, @"a*", "", RegexOptions.None, new string[] { "" } }; + yield return new object[] { null, @"a*", "a", RegexOptions.None, new string[] { "a" } }; + yield return new object[] { null, @"a*", "aa", RegexOptions.None, new string[] { "aa" } }; + yield return new object[] { null, @"a*", "aaa", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"a*?", "", RegexOptions.None, new string[] { "" } }; + yield return new object[] { null, @"a*?", "a", RegexOptions.None, new string[] { "" } }; + yield return new object[] { null, @"a*?", "aa", RegexOptions.None, new string[] { "" } }; + yield return new object[] { null, @"a+?", "aa", RegexOptions.None, new string[] { "a" } }; + yield return new object[] { null, @"a{1,", "a{1,", RegexOptions.None, new string[] { "a{1," } }; + yield return new object[] { null, @"a{1,3}", "aaaaa", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"a{1,3}?", "aaaaa", RegexOptions.None, new string[] { "a" } }; + yield return new object[] { null, @"a{2,2}", "aaaaa", RegexOptions.None, new string[] { "aa" } }; + yield return new object[] { null, @"a{2,2}?", "aaaaa", RegexOptions.None, new string[] { "aa" } }; + yield return new object[] { null, @".{1,3}", "bb\nba", RegexOptions.None, new string[] { "bb" } }; + yield return new object[] { null, @".{1,3}?", "bb\nba", RegexOptions.None, new string[] { "b" } }; + yield return new object[] { null, @".{2,2}", "bbb\nba", RegexOptions.None, new string[] { "bb" } }; + yield return new object[] { null, @".{2,2}?", "bbb\nba", RegexOptions.None, new string[] { "bb" } }; + yield return new object[] { null, @"[abc]{1,3}", "ccaba", RegexOptions.None, new string[] { "cca" } }; + yield return new object[] { null, @"[abc]{1,3}?", "ccaba", RegexOptions.None, new string[] { "c" } }; + yield return new object[] { null, @"[abc]{2,2}", "ccaba", RegexOptions.None, new string[] { "cc" } }; + yield return new object[] { null, @"[abc]{2,2}?", "ccaba", RegexOptions.None, new string[] { "cc" } }; + yield return new object[] { null, @"(?:[abc]def){1,3}xyz", "cdefxyz", RegexOptions.None, new string[] { "cdefxyz" } }; + yield return new object[] { null, @"(?:[abc]def){1,3}xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "adefbdefcdefxyz" } }; + yield return new object[] { null, @"(?:[abc]def){1,3}?xyz", "cdefxyz", RegexOptions.None, new string[] { "cdefxyz" } }; + yield return new object[] { null, @"(?:[abc]def){1,3}?xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "adefbdefcdefxyz" } }; + yield return new object[] { null, @"(?:[abc]def){2,2}xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "bdefcdefxyz" } }; + yield return new object[] { null, @"(?:[abc]def){2,2}?xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "bdefcdefxyz" } }; + foreach (string prefix in new[] { "", "xyz" }) + { + yield return new object[] { null, prefix + @"(?:[abc]def){1,3}", prefix + "cdef", RegexOptions.None, new string[] { prefix + "cdef" } }; + yield return new object[] { null, prefix + @"(?:[abc]def){1,3}", prefix + "cdefadefbdef", RegexOptions.None, new string[] { prefix + "cdefadefbdef" } }; + yield return new object[] { null, prefix + @"(?:[abc]def){1,3}", prefix + "cdefadefbdefadef", RegexOptions.None, new string[] { prefix + "cdefadefbdef" } }; + yield return new object[] { null, prefix + @"(?:[abc]def){1,3}?", prefix + "cdef", RegexOptions.None, new string[] { prefix + "cdef" } }; + yield return new object[] { null, prefix + @"(?:[abc]def){1,3}?", prefix + "cdefadefbdef", RegexOptions.None, new string[] { prefix + "cdef" } }; + yield return new object[] { null, prefix + @"(?:[abc]def){2,2}", prefix + "cdefadefbdefadef", RegexOptions.None, new string[] { prefix + "cdefadef" } }; + yield return new object[] { null, prefix + @"(?:[abc]def){2,2}?", prefix + "cdefadefbdefadef", RegexOptions.None, new string[] { prefix + "cdefadef" } }; + } yield return new object[] { null, @"(cat){", "cat{", RegexOptions.None, new string[] { "cat{", "cat" } }; yield return new object[] { null, @"(cat){}", "cat{}", RegexOptions.None, new string[] { "cat{}", "cat" } }; yield return new object[] { null, @"(cat){,", "cat{,", RegexOptions.None, new string[] { "cat{,", "cat" } }; @@ -553,6 +644,74 @@ public static IEnumerable Groups_Basic_TestData() yield return new object[] { null, @"(cat){5,dog}?", "cat{5,dog}?", RegexOptions.None, new string[] { "cat{5,dog}", "cat" } }; yield return new object[] { null, @"(cat){cat,dog}?", "cat{cat,dog}?", RegexOptions.None, new string[] { "cat{cat,dog}", "cat" } }; + // Atomic subexpressions + // Implicitly upgrading oneloop to be atomic + yield return new object[] { null, @"a*", "aaa", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"a*b", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*b+", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*[^a]", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*[^a]+", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*[^a]+?", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*(?>[^a]+)", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*bcd", "aaabcd", RegexOptions.None, new string[] { "aaabcd" } }; + yield return new object[] { null, @"a*[bcd]", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*[bcd]+", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*[bcd]+?", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*(?>[bcd]+)", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*[bcd]{1,3}", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"a*$", "aaa", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"a*$", "aaa", RegexOptions.Multiline, new string[] { "aaa" } }; + yield return new object[] { null, @"a*\b", "aaa bbb", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"a*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } }; + yield return new object[] { null, @"@*\B", "@@@", RegexOptions.None, new string[] { "@@@" } }; + yield return new object[] { null, @"@*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } }; + // Implicitly upgrading notoneloop to be atomic + yield return new object[] { null, @"[^b]*b", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[^b]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[^b]*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[^b]*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[^b]*bac", "aaabac", RegexOptions.None, new string[] { "aaabac" } }; + yield return new object[] { null, @"[^b]*", "aaa", RegexOptions.None, new string[] { "aaa" } }; + // Implicitly upgrading setloop to be atomic + yield return new object[] { null, @"[ac]*", "aaa", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"[ac]*b", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*[^a]", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*[^a]+", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*[^a]+?", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*(?>[^a]+)", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*bcd", "aaabcd", RegexOptions.None, new string[] { "aaabcd" } }; + yield return new object[] { null, @"[ac]*[bd]", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*[bd]+", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*[bd]+?", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*(?>[bd]+)", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*[bd]{1,3}", "aaab", RegexOptions.None, new string[] { "aaab" } }; + yield return new object[] { null, @"[ac]*$", "aaa", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"[ac]*$", "aaa", RegexOptions.Multiline, new string[] { "aaa" } }; + yield return new object[] { null, @"[ac]*\b", "aaa bbb", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"[ac]*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } }; + yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.None, new string[] { "@@@" } }; + yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } }; + yield return new object[] { null, @".*.", "@@@", RegexOptions.Singleline, new string[] { "@@@" } }; + // Implicitly upgrading concat loops to be atomic + yield return new object[] { null, @"(?:[ab]c[de]f)*", "", RegexOptions.None, new string[] { "" } }; + yield return new object[] { null, @"(?:[ab]c[de]f)*", "acdf", RegexOptions.None, new string[] { "acdf" } }; + yield return new object[] { null, @"(?:[ab]c[de]f)*", "acdfbcef", RegexOptions.None, new string[] { "acdfbcef" } }; + yield return new object[] { null, @"(?:[ab]c[de]f)*", "cdfbcef", RegexOptions.None, new string[] { "" } }; + yield return new object[] { null, @"(?:[ab]c[de]f)+", "cdfbcef", RegexOptions.None, new string[] { "bcef" } }; + yield return new object[] { null, @"(?:[ab]c[de]f)*", "bcefbcdfacfe", RegexOptions.None, new string[] { "bcefbcdf" } }; + // Implicitly upgrading nested loops to be atomic + yield return new object[] { null, @"(?:a){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"(?:a){3}?", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } }; + yield return new object[] { null, @"(?:a{2}){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaaaaa" } }; + yield return new object[] { null, @"(?:a{2}?){3}?", "aaaaaaaaa", RegexOptions.None, new string[] { "aaaaaa" } }; + yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}){2}", "acdfbcdfacefbcefbcefbcdfacdef", RegexOptions.None, new string[] { "acdfbcdfacefbcefbcefbcdf" } }; + yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}hello){2}", "aaaaaacdfbcdfacefhellobcefbcefbcdfhellooooo", RegexOptions.None, new string[] { "acdfbcdfacefhellobcefbcefbcdfhello" } }; + // Grouping Constructs Invalid Regular Expressions yield return new object[] { null, @"()", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } }; yield return new object[] { null, @"(?)", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } }; diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs new file mode 100644 index 0000000000000..7c3f65486d585 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs @@ -0,0 +1,140 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Globalization; +using Xunit; + +namespace System.Text.RegularExpressions.Tests +{ + public class RegexKnownPatternTests + { + // These come from the regex docs: + // https://docs.microsoft.com/en-us/dotnet/standard/base-types/regular-expression-examples + + [Theory] + [InlineData(RegexOptions.None)] + [InlineData(RegexOptions.Compiled)] + public void ScanningHrefs(RegexOptions options) + { + const string HrefPattern = + @"href\s*=\s*(?:[""'](?<1>[^""']*)[""']|(?<1>\S+))"; + + const string InputString = + "My favorite web sites include:

" + + "" + + "MSDN Home Page

" + + "" + + "Microsoft Corporation Home Page

" + + "" + + ".NET Base Class Library blog

"; + + Match m = Regex.Match(InputString, HrefPattern, options | RegexOptions.IgnoreCase); + Assert.True(m.Success); + Assert.Equal("http://msdn2.microsoft.com", m.Groups[1].ToString()); + Assert.Equal(43, m.Groups[1].Index); + + m = m.NextMatch(); + Assert.True(m.Success); + Assert.Equal("http://www.microsoft.com", m.Groups[1].ToString()); + Assert.Equal(102, m.Groups[1].Index); + + m = m.NextMatch(); + Assert.True(m.Success); + Assert.Equal("http://blogs.msdn.com/bclteam", m.Groups[1].ToString()); + Assert.Equal(176, m.Groups[1].Index); + + m = m.NextMatch(); + Assert.False(m.Success); + } + + [Theory] + [InlineData(RegexOptions.None)] + [InlineData(RegexOptions.Compiled)] + public void MDYtoDMY(RegexOptions options) + { + string dt = new DateTime(2020, 1, 8, 0, 0, 0, DateTimeKind.Utc).ToString("d", DateTimeFormatInfo.InvariantInfo); + string result = Regex.Replace(dt, @"\b(?\d{1,2})/(?\d{1,2})/(?\d{2,4})\b", "${day}-${month}-${year}", options); + Assert.Equal("08-01-2020", result); + } + + [Theory] + [InlineData(RegexOptions.None)] + [InlineData(RegexOptions.Compiled)] + public void ExtractProtocolPort(RegexOptions options) + { + string url = "http://www.contoso.com:8080/letters/readme.html"; + Regex r = new Regex(@"^(?\w+)://[^/]+?(?:\d+)?/", options); + Match m = r.Match(url); + Assert.True(m.Success); + Assert.Equal("http:8080", m.Result("${proto}${port}")); + } + + [Theory] + [InlineData("david.jones@proseware.com", true)] + [InlineData("d.j@server1.proseware.com", true)] + [InlineData("jones@ms1.proseware.com", true)] + [InlineData("j.@server1.proseware.com", false)] + [InlineData("j@proseware.com9", true)] + [InlineData("js#internal@proseware.com", true)] + [InlineData("j_9@[129.126.118.1]", true)] + [InlineData("j..s@proseware.com", false)] + [InlineData("js*@proseware.com", false)] + [InlineData("js@proseware..com", false)] + [InlineData("js@proseware.com9", true)] + [InlineData("j.s@server1.proseware.com", true)] + [InlineData("\"j\\\"s\\\"\"@proseware.com", true)] + [InlineData("js@contoso.\u4E2D\u56FD", true)] + public void ValidateEmail(string email, bool expectedIsValid) + { + Assert.Equal(expectedIsValid, IsValidEmail(email, RegexOptions.None)); + Assert.Equal(expectedIsValid, IsValidEmail(email, RegexOptions.Compiled)); + + bool IsValidEmail(string email, RegexOptions options) + { + if (string.IsNullOrWhiteSpace(email)) + { + return false; + } + + try + { + // Normalize the domain + email = Regex.Replace(email, @"(@)(.+)$", DomainMapper, options, TimeSpan.FromMilliseconds(200)); + + // Examines the domain part of the email and normalizes it. + string DomainMapper(Match match) + { + // Use IdnMapping class to convert Unicode domain names. + var idn = new IdnMapping(); + + // Pull out and process domain name (throws ArgumentException on invalid) + string domainName = idn.GetAscii(match.Groups[2].Value); + + return match.Groups[1].Value + domainName; + } + } + catch (RegexMatchTimeoutException) + { + return false; + } + catch (ArgumentException) + { + return false; + } + + try + { + return Regex.IsMatch(email, + @"^(?("")("".+?(? Match_Basic_TestData() // The last 3 causes the match to fail, since the non backtracking subexpression does not give up the last digit it matched // for it to be a success. For a correct match, remove the last character, '3' from the pattern yield return new object[] { "[^0-9]+(?>[0-9]+)3", "abc123", RegexOptions.None, 0, 6, false, string.Empty }; + yield return new object[] { "[^0-9]+(?>[0-9]+)", "abc123", RegexOptions.None, 0, 6, true, "abc123" }; + + // More nonbacktracking expressions + foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.IgnoreCase }) + { + string Case(string s) => (options & RegexOptions.IgnoreCase) != 0 ? s.ToUpper() : s; + + yield return new object[] { Case("(?>[0-9]+)abc"), "abc12345abc", options, 3, 8, true, "12345abc" }; + yield return new object[] { Case("(?>(?>[0-9]+))abc"), "abc12345abc", options, 3, 8, true, "12345abc" }; + yield return new object[] { Case("(?>[0-9]*)abc"), "abc12345abc", options, 3, 8, true, "12345abc" }; + yield return new object[] { Case("(?>[^z]+)z"), "zzzzxyxyxyz123", options, 4, 9, true, "xyxyxyz" }; + yield return new object[] { Case("(?>(?>[^z]+))z"), "zzzzxyxyxyz123", options, 4, 9, true, "xyxyxyz" }; + yield return new object[] { Case("(?>[^z]*)z123"), "zzzzxyxyxyz123", options, 4, 10, true, "xyxyxyz123" }; + yield return new object[] { Case("(?>a+)123"), "aa1234", options, 0, 5, true, "aa123" }; + yield return new object[] { Case("(?>a*)123"), "aa1234", options, 0, 5, true, "aa123" }; + yield return new object[] { Case("(?>(?>a*))123"), "aa1234", options, 0, 5, true, "aa123" }; + yield return new object[] { Case("(?>a+?)a"), "aaaaa", options, 0, 2, true, "aa" }; + yield return new object[] { Case("(?>a*?)a"), "aaaaa", options, 0, 1, true, "a" }; + yield return new object[] { Case("(?>hi|hello|hey)hi"), "hellohi", options, 0, 0, false, string.Empty }; + yield return new object[] { Case("(?:hi|hello|hey)hi"), "hellohi", options, 0, 7, true, "hellohi" }; // allow backtracking and it succeeds + yield return new object[] { Case("(?>hi|hello|hey)hi"), "hihi", options, 0, 4, true, "hihi" }; + } // Using beginning/end of string chars \A, \Z: Actual - "\\Aaaa\\w+zzz\\Z" yield return new object[] { @"\Aaaa\w+zzz\Z", "aaaasdfajsdlfjzzz", RegexOptions.IgnoreCase, 0, 17, true, "aaaasdfajsdlfjzzz" }; @@ -83,6 +105,12 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { @"\Aaaaaa\w+zzz\Z", "aaaa", RegexOptions.RightToLeft, 0, 4, false, string.Empty }; yield return new object[] { @"\Aaaaaa\w+zzzzz\Z", "aaaa", RegexOptions.RightToLeft, 0, 4, false, string.Empty }; yield return new object[] { @"\Aaaaaa\w+zzz\Z", "aaaa", RegexOptions.RightToLeft | RegexOptions.IgnoreCase, 0, 4, false, string.Empty }; + yield return new object[] { @"abc\Adef", "abcdef", RegexOptions.None, 0, 0, false, string.Empty }; + yield return new object[] { @"abc\adef", "abcdef", RegexOptions.None, 0, 0, false, string.Empty }; + yield return new object[] { @"abc\Gdef", "abcdef", RegexOptions.None, 0, 0, false, string.Empty }; + yield return new object[] { @"abc^def", "abcdef", RegexOptions.None, 0, 0, false, string.Empty }; + yield return new object[] { @"abc\Zef", "abcdef", RegexOptions.None, 0, 0, false, string.Empty }; + yield return new object[] { @"abc\zef", "abcdef", RegexOptions.None, 0, 0, false, string.Empty }; // Using beginning/end of string chars \A, \Z: Actual - "\\Aaaa\\w+zzz\\Z" yield return new object[] { @"\Aaaa\w+zzz\Z", "aaaasdfajsdlfjzzza", RegexOptions.None, 0, 18, false, string.Empty }; @@ -295,7 +323,10 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { @"[a-[a-f]]", "abcdefghijklmnopqrstuvwxyz", RegexOptions.None, 0, 26, false, string.Empty }; // \c - yield return new object[] { @"(cat)(\c[*)(dog)", "asdlkcat\u00FFdogiwod", RegexOptions.None, 0, 15, false, string.Empty }; + if (!PlatformDetection.IsFullFramework) // missing fix for #26501 + { + yield return new object[] { @"(cat)(\c[*)(dog)", "asdlkcat\u00FFdogiwod", RegexOptions.None, 0, 15, false, string.Empty }; + } // Surrogate pairs splitted up into UTF-16 code units. yield return new object[] { @"(\uD82F[\uDCA0-\uDCA3])", "\uD82F\uDCA2", RegexOptions.CultureInvariant, 0, 2, true, "\uD82F\uDCA2" }; @@ -381,13 +412,14 @@ public void Match_Timeout_Throws() // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions. [ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))] - [InlineData(RegexOptions.Compiled)] - [InlineData(RegexOptions.None)] - public void Match_Timeout_Loop_Throws(RegexOptions options) + [InlineData(@"a\s+", RegexOptions.None)] + [InlineData(@"a\s+", RegexOptions.Compiled)] + [InlineData(@"a\s+ ", RegexOptions.None)] + [InlineData(@"a\s+ ", RegexOptions.Compiled)] + public void Match_Timeout_Loop_Throws(string pattern, RegexOptions options) { - var regex = new Regex(@"a\s+", options, TimeSpan.FromSeconds(1)); - string input = @"a" + new string(' ', 800_000_000) + @"b"; - + var regex = new Regex(pattern, options, TimeSpan.FromSeconds(1)); + string input = "a" + new string(' ', 800_000_000) + " "; Assert.Throws(() => regex.Match(input)); } @@ -400,7 +432,6 @@ public void Match_Timeout_Repetition_Throws(RegexOptions options) int repetitionCount = 800_000_000; var regex = new Regex(@"a\s{" + repetitionCount+ "}", options, TimeSpan.FromSeconds(1)); string input = @"a" + new string(' ', repetitionCount) + @"b"; - Assert.Throws(() => regex.Match(input)); } @@ -808,6 +839,7 @@ public void Match_SpecialUnicodeCharacters_Invariant() } [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotArmProcess))] // times out on ARM + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Full framework needs fix for #26484")] [SkipOnCoreClr("Long running tests: https://github.com/dotnet/coreclr/issues/18912", RuntimeStressTestModes.JitMinOpts)] public void Match_ExcessPrefix() { @@ -869,5 +901,20 @@ public void IsMatch_Invalid() Assert.Throws(() => new Regex("pattern").IsMatch("input", -1)); Assert.Throws(() => new Regex("pattern").IsMatch("input", 6)); } + + [Fact] + public void Synchronized() + { + var m = new Regex("abc").Match("abc"); + Assert.True(m.Success); + Assert.Equal("abc", m.Value); + + var m2 = System.Text.RegularExpressions.Match.Synchronized(m); + Assert.Same(m, m2); + Assert.True(m2.Success); + Assert.Equal("abc", m2.Value); + + AssertExtensions.Throws("inner", () => System.Text.RegularExpressions.Match.Synchronized(null)); + } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs index 321ffe52732cb..783c149e6acd4 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs @@ -90,6 +90,44 @@ public static IEnumerable Matches_TestData() } }; + yield return new object[] + { + @"\b\w*\b", "handling words of various lengths", RegexOptions.None, + new CaptureData[] + { + new CaptureData("handling", 0, 8), + new CaptureData("", 8, 0), + new CaptureData("words", 9, 5), + new CaptureData("", 14, 0), + new CaptureData("of", 15, 2), + new CaptureData("", 17, 0), + new CaptureData("various", 18, 7), + new CaptureData("", 25, 0), + new CaptureData("lengths", 26, 7), + new CaptureData("", 33, 0), + } + }; + + yield return new object[] + { + @"\b\w{2}\b", "handling words of various lengths", RegexOptions.None, + new CaptureData[] + { + new CaptureData("of", 15, 2), + } + }; + + yield return new object[] + { + @"\w{6,}", "handling words of various lengths", RegexOptions.None, + new CaptureData[] + { + new CaptureData("handling", 0, 8), + new CaptureData("various", 18, 7), + new CaptureData("lengths", 26, 7), + } + }; + yield return new object[] { @"foo\d+", "0123456789foo4567890foo1foo 0987", RegexOptions.RightToLeft, @@ -141,18 +179,21 @@ public static IEnumerable Matches_TestData() } }; - yield return new object[] + if (!PlatformDetection.IsFullFramework) // missing fix in https://github.com/dotnet/runtime/pull/993 { - "[^]", "every", RegexOptions.ECMAScript, - new CaptureData[] + yield return new object[] { - new CaptureData("e", 0, 1), - new CaptureData("v", 1, 1), - new CaptureData("e", 2, 1), - new CaptureData("r", 3, 1), - new CaptureData("y", 4, 1), - } - }; + "[^]", "every", RegexOptions.ECMAScript, + new CaptureData[] + { + new CaptureData("e", 0, 1), + new CaptureData("v", 1, 1), + new CaptureData("e", 2, 1), + new CaptureData("r", 3, 1), + new CaptureData("y", 4, 1), + } + }; + } } [Theory] diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexCharacterSetTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexCharacterSetTests.cs new file mode 100644 index 0000000000000..7d3896880c60b --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexCharacterSetTests.cs @@ -0,0 +1,157 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using System.Globalization; +using Xunit; +using Xunit.Sdk; + +namespace System.Text.RegularExpressions.Tests +{ + public class RegexCharacterSetTests + { + [Theory] + [InlineData(@"a", RegexOptions.None, new[] { 'a' })] + [InlineData(@"a", RegexOptions.IgnoreCase, new[] { 'a', 'A' })] + [InlineData(@"\u00A9", RegexOptions.None, new[] { '\u00A9' })] + [InlineData(@"\u00A9", RegexOptions.IgnoreCase, new[] { '\u00A9' })] + [InlineData(@"az", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, new[] { 'a', 'A', 'z', 'Z' })] + [InlineData(@"azY", RegexOptions.IgnoreCase, new[] { 'a', 'A', 'z', 'Z', 'y', 'Y' })] + [InlineData(@"azY", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, new[] { 'a', 'A', 'z', 'Z', 'y', 'Y' })] + [InlineData(@"azY\u00A9", RegexOptions.IgnoreCase, new[] { 'a', 'A', 'z', 'Z', 'y', 'Y', '\u00A9' })] + [InlineData(@"azY\u00A9", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, new[] { 'a', 'A', 'z', 'Z', 'y', 'Y', '\u00A9' })] + [InlineData(@"azY\u00A9\u05D0", RegexOptions.IgnoreCase, new[] { 'a', 'A', 'z', 'Z', 'y', 'Y', '\u00A9', '\u05D0' })] + [InlineData(@"azY\u00A9\u05D0", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, new[] { 'a', 'A', 'z', 'Z', 'y', 'Y', '\u00A9', '\u05D0' })] + [InlineData(@"a ", RegexOptions.None, new[] { 'a', ' ' })] + [InlineData(@"a \t\r", RegexOptions.None, new[] { 'a', ' ', '\t', '\r' })] + [InlineData(@"aeiou", RegexOptions.None, new[] { 'a', 'e', 'i', 'o', 'u' })] + [InlineData(@"a-a", RegexOptions.None, new[] { 'a' })] + [InlineData(@"ab", RegexOptions.None, new[] { 'a', 'b' })] + [InlineData(@"a-b", RegexOptions.None, new[] { 'a', 'b' })] + [InlineData(@"abc", RegexOptions.None, new[] { 'a', 'b', 'c' })] + [InlineData(@"1369", RegexOptions.None, new[] { '1', '3', '6', '9' })] + [InlineData(@"ACEGIKMOQSUWY", RegexOptions.None, new[] { 'A', 'C', 'E', 'G', 'I', 'K', 'M', 'O', 'Q', 'S', 'U', 'W', 'Y' })] + [InlineData(@"abcAB", RegexOptions.None, new[] { 'A', 'B', 'a', 'b', 'c' })] + [InlineData(@"a-c", RegexOptions.None, new[] { 'a', 'b', 'c' })] + [InlineData(@"X-b", RegexOptions.None, new[] { 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', '`', 'a', 'b' })] + [InlineData(@"\u0083\u00DE-\u00E1", RegexOptions.None, new[] { '\u0083', '\u00DE', '\u00DF', '\u00E0', '\u00E1' })] + [InlineData(@"\u007A-\u0083\u00DE-\u00E1", RegexOptions.None, new[] { '\u007A', '\u007B', '\u007C', '\u007D', '\u007E', '\u007F', '\u0080', '\u0081', '\u0082', '\u0083', '\u00DE', '\u00DF', '\u00E0', '\u00E1' })] + [InlineData(@"\u05D0", RegexOptions.None, new[] { '\u05D0' })] + [InlineData(@"a\u05D0", RegexOptions.None, new[] { 'a', '\u05D0' })] + [InlineData(@"\uFFFC-\uFFFF", RegexOptions.None, new[] { '\uFFFC', '\uFFFD', '\uFFFE', '\uFFFF' })] + [InlineData(@"a-z-[d-w-[m-o]]", RegexOptions.None, new[] { 'a', 'b', 'c', 'm', 'n', 'n', 'o', 'x', 'y', 'z' })] + [InlineData(@"\p{IsBasicLatin}-[\x00-\x7F]", RegexOptions.None, new char[0])] + [InlineData(@"0-9-[2468]", RegexOptions.None, new[] { '0', '1', '3', '5', '7', '9' })] + public void SetInclusionsExpected(string set, RegexOptions options, char[] expectedIncluded) + { + ValidateSet($"[{set}]", options, new HashSet(expectedIncluded), null); + if (!set.Contains("[")) + { + ValidateSet($"[^{set}]", options, null, new HashSet(expectedIncluded)); + } + } + + [Fact] + public void DotInclusionsExpected() + { + ValidateSet(".", RegexOptions.None, null, new HashSet() { '\n' }); + ValidateSet(".", RegexOptions.IgnoreCase, null, new HashSet() { '\n' }); + ValidateSet(".", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, null, new HashSet() { '\n' }); + + ValidateSet(".", RegexOptions.Singleline, null, new HashSet()); + ValidateSet(".", RegexOptions.Singleline | RegexOptions.IgnoreCase, null, new HashSet()); + ValidateSet(".", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, null, new HashSet()); + } + + [Fact] + public void WhitespaceInclusionsExpected() + { + var whitespaceInclusions = ComputeIncludedSet(char.IsWhiteSpace); + ValidateSet(@"[\s]", RegexOptions.None, whitespaceInclusions, null); + ValidateSet(@"[^\s]", RegexOptions.None, null, whitespaceInclusions); + ValidateSet(@"[\S]", RegexOptions.None, null, whitespaceInclusions); + } + + [Fact] + public void DigitInclusionsExpected() + { + var digitInclusions = ComputeIncludedSet(char.IsDigit); + ValidateSet(@"[\d]", RegexOptions.None, digitInclusions, null); + ValidateSet(@"[^\d]", RegexOptions.None, null, digitInclusions); + ValidateSet(@"[\D]", RegexOptions.None, null, digitInclusions); + } + + [Theory] + [InlineData(@"\p{Lu}", new[] { UnicodeCategory.UppercaseLetter })] + [InlineData(@"\p{S}", new[] { UnicodeCategory.CurrencySymbol, UnicodeCategory.MathSymbol, UnicodeCategory.ModifierSymbol, UnicodeCategory.OtherSymbol })] + [InlineData(@"\p{Lu}\p{Zl}", new[] { UnicodeCategory.UppercaseLetter, UnicodeCategory.LineSeparator })] + [InlineData(@"\w", new[] { UnicodeCategory.LowercaseLetter, UnicodeCategory.UppercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.OtherLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.NonSpacingMark, UnicodeCategory.DecimalDigitNumber, UnicodeCategory.ConnectorPunctuation })] + + public void UnicodeCategoryInclusionsExpected(string set, UnicodeCategory[] categories) + { + var categoryInclusions = ComputeIncludedSet(c => Array.IndexOf(categories, char.GetUnicodeCategory(c)) >= 0); + ValidateSet($"[{set}]", RegexOptions.None, categoryInclusions, null); + ValidateSet($"[^{set}]", RegexOptions.None, null, categoryInclusions); + } + + [Theory] + [InlineData(@"\p{IsGreek}", new[] { 0x0370, 0x03FF })] + [InlineData(@"\p{IsRunic}\p{IsHebrew}", new[] { 0x0590, 0x05FF, 0x16A0, 0x16FF })] + [InlineData(@"abx-z\p{IsRunic}\p{IsHebrew}", new[] { 0x0590, 0x05FF, 0x16A0, 0x16FF, 'a', 'a', 'b', 'b', 'x', 'x', 'y', 'z' })] + public void NamedBlocksInclusionsExpected(string set, int[] ranges) + { + var included = new HashSet(); + for (int i = 0; i < ranges.Length - 1; i += 2) + { + ComputeIncludedSet(c => c >= ranges[i] && c <= ranges[i + 1], included); + } + + ValidateSet($"[{set}]", RegexOptions.None, included, null); + ValidateSet($"[^{set}]", RegexOptions.None, null, included); + } + + private static HashSet ComputeIncludedSet(Func func) + { + var included = new HashSet(); + ComputeIncludedSet(func, included); + return included; + } + + private static void ComputeIncludedSet(Func func, HashSet included) + { + for (int i = 0; i <= char.MaxValue; i++) + { + if (func((char)i)) + { + included.Add((char)i); + } + } + } + + [Fact] + public void ValidateValidateSet() + { + Assert.Throws(() => ValidateSet("[a]", RegexOptions.None, new HashSet() { 'b' }, null)); + Assert.Throws(() => ValidateSet("[b]", RegexOptions.None, null, new HashSet() { 'b' })); + } + + private static void ValidateSet(string regex, RegexOptions options, HashSet included, HashSet excluded) + { + Assert.True((included != null) ^ (excluded != null)); + foreach (RegexOptions compiled in new[] { RegexOptions.None, RegexOptions.Compiled }) + { + var r = new Regex(regex, options | compiled); + for (int i = 0; i <= char.MaxValue; i++) + { + bool actual = r.IsMatch(((char)i).ToString()); + bool expected = included != null ? included.Contains((char)i) : !excluded.Contains((char)i); + if (actual != expected) + { + throw new XunitException($"Set=\"{regex}\", Options=\"{options}\", {i.ToString("X4")} => '{(char)i}' returned {actual}"); + } + } + } + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexCompilationInfoTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexCompilationInfoTests.cs index 45832389ee0e9..53d7e13fcd32d 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexCompilationInfoTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexCompilationInfoTests.cs @@ -24,7 +24,17 @@ public static IEnumerable Ctor_MemberData() [MemberData(nameof(Ctor_MemberData))] public void Ctor_ValidArguments_CheckProperties(string pattern, RegexOptions options, string name, string fullnamespace, bool ispublic, TimeSpan matchTimeout) { - var regexCompilationInfo = new RegexCompilationInfo(pattern, options, name, fullnamespace, ispublic, matchTimeout); + RegexCompilationInfo regexCompilationInfo; + + regexCompilationInfo = new RegexCompilationInfo(pattern, options, name, fullnamespace, ispublic); + Assert.Equal(pattern, regexCompilationInfo.Pattern); + Assert.Equal(options, regexCompilationInfo.Options); + Assert.Equal(name, regexCompilationInfo.Name); + Assert.Equal(fullnamespace, regexCompilationInfo.Namespace); + Assert.Equal(ispublic, regexCompilationInfo.IsPublic); + Assert.Equal(Regex.InfiniteMatchTimeout, regexCompilationInfo.MatchTimeout); + + regexCompilationInfo = new RegexCompilationInfo(pattern, options, name, fullnamespace, ispublic, matchTimeout); Assert.Equal(pattern, regexCompilationInfo.Pattern); Assert.Equal(options, regexCompilationInfo.Options); Assert.Equal(name, regexCompilationInfo.Name); diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexMatchTimeoutExceptionTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexMatchTimeoutExceptionTests.cs new file mode 100644 index 0000000000000..606580344eb63 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexMatchTimeoutExceptionTests.cs @@ -0,0 +1,66 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using System.Runtime.Serialization.Formatters.Binary; +using Xunit; + +namespace System.Text.RegularExpressions.Tests +{ + public class RegexMatchTimeoutExceptionTests + { + [Fact] + public void Ctor() + { + RegexMatchTimeoutException e; + + e = new RegexMatchTimeoutException(); + Assert.Empty(e.Input); + Assert.Empty(e.Pattern); + Assert.Equal(TimeSpan.FromTicks(-1), e.MatchTimeout); + + const string Message = "some message"; + e = new RegexMatchTimeoutException(Message); + Assert.Equal(Message, e.Message); + Assert.Empty(e.Input); + Assert.Empty(e.Pattern); + Assert.Equal(TimeSpan.FromTicks(-1), e.MatchTimeout); + + var inner = new FormatException(); + e = new RegexMatchTimeoutException(Message, inner); + Assert.Equal(Message, e.Message); + Assert.Same(inner, e.InnerException); + Assert.Empty(e.Input); + Assert.Empty(e.Pattern); + Assert.Equal(TimeSpan.FromTicks(-1), e.MatchTimeout); + + const string Input = "abcdef"; + const string Pattern = "(?:abcdef)*"; + TimeSpan timeout = TimeSpan.FromSeconds(42); + e = new RegexMatchTimeoutException(Input, Pattern, timeout); + Assert.Equal(Input, e.Input); + Assert.Equal(Pattern, e.Pattern); + Assert.Equal(timeout, e.MatchTimeout); + } + + [Fact] + public void SerializationRoundtrip() + { + const string Input = "abcdef"; + const string Pattern = "(?:abcdef)*"; + TimeSpan timeout = TimeSpan.FromSeconds(42); + var e = new RegexMatchTimeoutException(Input, Pattern, timeout); + + var bf = new BinaryFormatter(); + var s = new MemoryStream(); + bf.Serialize(s, e); + s.Position = 0; + e = (RegexMatchTimeoutException)bf.Deserialize(s); + + Assert.Equal(Input, e.Input); + Assert.Equal(Pattern, e.Pattern); + Assert.Equal(timeout, e.MatchTimeout); + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs index 61003d2a5d83c..b40e5496d7bbc 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs @@ -2,7 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.IO; using System.Reflection; +using System.Runtime.Serialization.Formatters.Binary; using Xunit; using Xunit.Sdk; @@ -15,8 +17,11 @@ public class RegexParserTests static RegexParserTests() { - s_parseExceptionType = typeof(Regex).Assembly.GetType("System.Text.RegularExpressions.RegexParseException", true); - s_parseErrorField = s_parseExceptionType.GetField("_error", BindingFlags.NonPublic | BindingFlags.Instance); + if (!PlatformDetection.IsFullFramework) + { + s_parseExceptionType = typeof(Regex).Assembly.GetType("System.Text.RegularExpressions.RegexParseException", true); + s_parseErrorField = s_parseExceptionType.GetField("_error", BindingFlags.NonPublic | BindingFlags.Instance); + } } [Theory] @@ -800,11 +805,27 @@ public void Parse(string pattern, RegexOptions options, object errorObj) [InlineData("a{0,2147483648}", RegexOptions.None, RegexParseError.CaptureGroupOutOfRange)] // Surrogate pair which is parsed as [char,char-char,char] as we operate on UTF-16 code units. [InlineData("[\uD82F\uDCA0-\uD82F\uDCA3]", RegexOptions.IgnoreCase, RegexParseError.ReversedCharRange)] + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] public void Parse_NotNetFramework(string pattern, RegexOptions options, object error) { Parse(pattern, options, error); } + [Fact] + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] + public void RegexParseException_Serializes() + { + ArgumentException e = Assert.ThrowsAny(() => new Regex("(abc|def")); + + var bf = new BinaryFormatter(); + var s = new MemoryStream(); + bf.Serialize(s, e); + s.Position = 0; + + ArgumentException e2 = (ArgumentException)bf.Deserialize(s); + Assert.Equal(e.Message, e2.Message); + } + private static void ParseSubTrees(string pattern, RegexOptions options) { // Trim the input from the right and make sure tree invariants hold @@ -858,11 +879,19 @@ private static void ParseSubTree(string pattern, RegexOptions options) /// The action to invoke. private static void Throws(RegexParseError error, Action action) { + // If no specific error is supplied, or we are running on full framework where RegexParseException + // we expect an ArgumentException. + if (PlatformDetection.IsFullFramework) + { + Assert.ThrowsAny(action); + return; + } + try { action(); } - catch (Exception e) + catch (ArgumentException e) { // We use reflection to check if the exception is an internal RegexParseException // and extract its error property and compare with the given one. diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj index bf60979b1b5d9..e3f8c12581c94 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj @@ -10,19 +10,23 @@ + + - - - + + + - + + + System\Text\RegularExpressions\RegexParseError.cs