/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package test.text.harmony_regex import kotlin.text.* import kotlin.test.* class PatternTest { fun assertTrue(msg: String, value: Boolean) = assertTrue(value, msg) fun assertFalse(msg: String, value: Boolean) = assertFalse(value, msg) private fun String.asEscapeSeq() = buildString { this@asEscapeSeq.forEach { when { it.isLetterOrDigit() -> append(it) it == '\n' -> append("\\n") it == '\r' -> append("\\r") it == '\t' -> append("\\t") else -> { val hexCode = it.code.toString(16) append("\\u$hexCode") } } } } private fun assertMatch(regex: Regex, string: String) { assertTrue(regex.matches(string), "Regex `$regex` expected to match string `${string.asEscapeSeq()}`") } private fun assertNoMatch(regex: Regex, string: String) { assertFalse(regex.matches(string), "Regex `$regex` expected to not match string `${string.asEscapeSeq()}`") } private fun assertFind(regex: Regex, string: String, expectedRange: IntRange) { assertEquals( expectedRange, regex.find(string)?.range, "Wrong `find` result for regex `$regex` in string `${string.asEscapeSeq()}`" ) } internal var testPatterns = arrayOf("(a|b)*abb", "(1*2*3*4*)*567", "(a|b|c|d)*aab", "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*", "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*", "(a|b)*(a|b)*A(a|b)*lice.*", "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|" + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do", "x(?c)y", "x(?cc)y", "x(?:c)y") @Test fun testCommentsInPattern() { val p = Regex("ab# this is a comment\ncd", RegexOption.COMMENTS) assertTrue(p.matches("abcd")) } @Test fun testSplitCharSequenceint() { // Splitting CharSequence which ends with pattern. // Harmony regress tests. assertEquals(",,".split(",".toRegex(), 3).toTypedArray().size, 3) assertEquals(",,".split(",".toRegex(), 4).toTypedArray().size, 3) assertEquals(Regex("o").split("boo:and:foo", 5).size, 5) assertEquals(Regex("b").split("ab", 0).size, 2) var s: List var regex = Regex("x") s = regex.split("zxx:zzz:zxx", 10) assertEquals(s.size, 5) s = regex.split("zxx:zzz:zxx", 3) assertEquals(s.size, 3) s = regex.split("zxx:zzz:zxx", 0) assertEquals(s.size, 5) // Other splitting. // Negative limit regex = Regex("b") s = regex.split("abccbadfebb", 0) assertEquals(s.size, 5) s = regex.split("", 0) assertEquals(s.size, 1) regex = Regex("") s = regex.split("", 0) assertEquals(s.size, 2) s = regex.split("abccbadfe", 0) assertEquals(s.size, 11) // positive limit regex = Regex("b") s = regex.split("abccbadfebb", 12) assertEquals(s.size, 5) s = regex.split("", 6) assertEquals(s.size, 1) regex = Regex("") s = regex.split("", 11) assertEquals(s.size, 2) s = regex.split("abccbadfe", 15) assertEquals(s.size, 11) regex = Regex("b") s = regex.split("abccbadfebb", 5) assertEquals(s.size, 5) s = regex.split("", 1) assertEquals(s.size, 1) regex = Regex("") s = regex.split("", 1) assertEquals(s.size, 1) s = regex.split("abccbadfe", 11) assertEquals(s.size, 11) regex = Regex("b") s = regex.split("abccbadfebb", 3) assertEquals(s.size, 3) regex = Regex("") s = regex.split("abccbadfe", 5) assertEquals(s.size, 5) } @Test fun testFlags() { var baseString: String var testString: String var regex: Regex baseString = "((?i)|b)a" testString = "A" regex = Regex(baseString) assertFalse(regex.matches(testString)) baseString = "(?i)a|b" testString = "A" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?i)a|b" testString = "B" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "c|(?i)a|b" testString = "B" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?i)a|(?s)b" testString = "B" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?i)a|(?-i)b" testString = "B" regex = Regex(baseString) assertFalse(regex.matches(testString)) baseString = "(?i)a|(?-i)c|b" testString = "B" regex = Regex(baseString) assertFalse(regex.matches(testString)) baseString = "(?i)a|(?-i)c|(?i)b" testString = "B" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?i)a|(?-i)b" testString = "A" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "((?i))a" testString = "A" regex = Regex(baseString) assertFalse(regex.matches(testString)) baseString = "|(?i)|a" testString = "A" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?i)((?s)a.)" testString = "A\n" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?i)((?-i)a)" testString = "A" regex = Regex(baseString) assertFalse(regex.matches(testString)) baseString = "(?i)(?s:a.)" testString = "A\n" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?i)fgh(?s:aa)" testString = "fghAA" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?i)((?-i))a" testString = "A" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "abc(?i)d" testString = "ABCD" regex = Regex(baseString) assertFalse(regex.matches(testString)) testString = "abcD" assertTrue(regex.matches(testString)) baseString = "a(?i)a(?-i)a(?i)a(?-i)a" testString = "aAaAa" regex = Regex(baseString) assertTrue(regex.matches(testString)) testString = "aAAAa" assertFalse(regex.matches(testString)) } fun Set.containsOnly(vararg options: RegexOption): Boolean { val toCheck = options.toSet() return size == toCheck.size && containsAll(toCheck) } @Test fun testFlagsMethod() { val a = kotlin.text.Regex("sdf") var baseString: String var regex: Regex baseString = "(?-i)" regex = Regex(baseString) baseString = "(?idmsux)abc(?-i)vg(?-dmu)" regex = Regex(baseString) assertTrue(regex.options.containsOnly(RegexOption.DOT_MATCHES_ALL, RegexOption.COMMENTS)) baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)" regex = Regex(baseString) assertTrue(regex.options.containsOnly(RegexOption.DOT_MATCHES_ALL, RegexOption.COMMENTS)) baseString = "(?is)a((?x)b.)" regex = Regex(baseString) assertTrue(regex.options.containsOnly(RegexOption.DOT_MATCHES_ALL, RegexOption.IGNORE_CASE)) baseString = "(?i)a((?-i))" regex = Regex(baseString) assertTrue(regex.options.containsOnly(RegexOption.IGNORE_CASE)) baseString = "((?i)a)" regex = Regex(baseString) assertTrue(regex.options.isEmpty()) regex = Regex("(?is)abc") assertTrue(regex.options.containsOnly(RegexOption.IGNORE_CASE, RegexOption.DOT_MATCHES_ALL)) } @Test fun testCompileStringint() { /* * this tests are needed to verify that appropriate exceptions are hrown */ var pattern = "b)a" try { Regex(pattern) fail("Expected a IllegalArgumentException when compiling pattern: " + pattern) } catch (e: IllegalArgumentException) { // pass } pattern = "bcde)a" try { Regex(pattern) fail("Expected a IllegalArgumentException when compiling pattern: " + pattern) } catch (e: IllegalArgumentException) { // pass } pattern = "bbg())a" try { Regex(pattern) fail("Expected a IllegalArgumentException when compiling pattern: " + pattern) } catch (e: IllegalArgumentException) { // pass } pattern = "cdb(?i))a" try { Regex(pattern) fail("Expected a IllegalArgumentException when compiling pattern: " + pattern) } catch (e: IllegalArgumentException) { // pass } /* * This pattern should compile (Originally it is a regression test for HARMONY-2127) */ pattern = "x(?c)y" Regex(pattern) /* * This pattern doesn't match any string, but should be compiled anyway */ pattern = "(b\\u0001)a" Regex(pattern) } @Test fun testQuantCompileNeg() { val patterns = arrayOf("5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh", "{5,3shdfkjh}") for (element in patterns) { try { Regex(element) fail("IllegalArgumentException was expected, but compilation succeeds") } catch (pse: IllegalArgumentException) { continue } } } @Test fun testQuantCompilePos() { val patterns = arrayOf("abc{2,}", "abc{5}") for (element in patterns) { Regex(element) } } @Test fun testQuantComposition() { val pattern = "(a{1,3})aab" val regex = Regex(pattern) val result = regex.matchEntire("aaab") assertNotNull(result) assertEquals(result!!.groups[1]!!.range.start, 0) assertEquals(result.groupValues[1], "a") } @Test fun testTimeZoneIssue() { val regex = Regex("GMT(\\+|\\-)(\\d+)(:(\\d+))?") val result = regex.matchEntire("GMT-9:45") assertNotNull(result) assertEquals("-", result!!.groupValues[1]) assertEquals("9", result.groupValues[2]) assertEquals(":45", result.groupValues[3]) assertEquals("45", result.groupValues[4]) } @Test fun testCompileRanges() { val correctTestPatterns = arrayOf("[^]*abb]*", "[^a-d[^m-p]]*abb", "[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb", "[a-e&&[^de]]*abb", "[a-z&&[^m-p]]*abb", "[a-d[m-p]]*abb", "[a-zA-Z]*abb", "[+*?]*abb", "[^+*?]*abb") val inputSecuence = arrayOf("kkkk", "admpabb", "abcabcd124654abb", "abcabccbacababb", "dededededededeedabb", "gfdhfghgdfghabb", "accabacbcbaabb", "acbvfgtyabb", "adbcacdbmopabcoabb", "jhfkjhaSDFGHJkdfhHNJMjkhfabb", "+*??+*abb", "sdfghjkabb") for (i in correctTestPatterns.indices) { assertTrue("pattern: " + correctTestPatterns[i] + " input: " + inputSecuence[i], Regex(correctTestPatterns[i]).matches(inputSecuence[i])) } val wrongInputSecuence = arrayOf("]", "admpkk", "abcabcd124k654abb", "abwcabccbacababb", "abababdeababdeabb", "abcabcacbacbabb", "acdcbecbaabb", "acbotyabb", "adbcaecdbmopabcoabb", "jhfkjhaSDFGHJk;dfhHNJMjkhfabb", "+*?a?+*abb", "sdf+ghjkabb") for (i in correctTestPatterns.indices) { assertFalse("pattern: " + correctTestPatterns[i] + " input: " + wrongInputSecuence[i], Regex(correctTestPatterns[i]).matches(wrongInputSecuence[i])) } } @Test fun testRangesSpecialCases() { val neg_patterns = arrayOf("[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]") for (element in neg_patterns) { try { Regex(element) fail("IllegalArgumentException was expected: " + element) } catch (pse: IllegalArgumentException) { } } val pos_patterns = arrayOf("[-]+", "----", "[a-]+", "a-a-a-a-aa--", "[\\w-a]+", "123-2312--aaa-213", "[a-]]+", "-]]]]]]]]]]]]]]]") var i = 0 while (i < pos_patterns.size) { val pat = pos_patterns[i++] val inp = pos_patterns[i] assertTrue("pattern: $pat input: $inp", Regex(pat).matches(inp)) i++ } } @Test fun testZeroSymbols() { assertTrue(Regex("[\u0000]*abb").matches("\u0000\u0000\u0000\u0000\u0000\u0000abb")) } @Test fun testEscapes() { val regex = Regex("\\Q{]()*?") assertTrue(regex.matches("{]()*?")) } @Test fun testRegressions() { // Bug 181 Regex("[\\t-\\r]") // HARMONY-4472 Regex("a*.+") // Bug187 Regex("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))") // HARMONY-5858 Regex("\\u6211", RegexOption.LITERAL) } @Test fun testOrphanQuantifiers() { try { Regex("+++++") fail("IllegalArgumentException expected") } catch (pse: IllegalArgumentException) { } } @Test fun testOrphanQuantifiers2() { try { Regex("\\d+*") fail("IllegalArgumentException expected") } catch (pse: IllegalArgumentException) { } } @Test fun testBug197() { val vals = arrayOf(":", 2, arrayOf("boo", "and:foo"), ":", 5, arrayOf("boo", "and", "foo"), ":", 0, arrayOf("boo", "and", "foo"), ":", 3, arrayOf("boo", "and", "foo"), ":", 1, arrayOf("boo:and:foo"), "o", 5, arrayOf("b", "", ":and:f", "", ""), "o", 4, arrayOf("b", "", ":and:f", "o"), "o", 0, arrayOf("b", "", ":and:f", "", "") ) var i = 0 while (i < vals.size / 3) { val res = Regex(vals[i++].toString()).split("boo:and:foo", (vals[i++] as Int)) val expectedRes = vals[i++] as Array assertEquals(expectedRes.size, res.size) for (j in expectedRes.indices) { assertEquals(expectedRes[j], res[j]) } } } @Test fun testURIPatterns() { val URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; val SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$"; val REL_URI_REGEXP_STR = "^(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; val IPV6_REGEXP_STR = "^[0-9a-fA-F\\:\\.]+(\\%\\w+)?$"; val IPV6_REGEXP_STR2 = "^\\[[0-9a-fA-F\\:\\.]+(\\%\\w+)?\\]$"; val IPV4_REGEXP_STR = "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$"; val HOSTNAME_REGEXP_STR = "\\w+[\\w\\-\\.]*"; Regex(URI_REGEXP_STR) Regex(REL_URI_REGEXP_STR) Regex(SCHEME_REGEXP_STR) Regex(IPV4_REGEXP_STR) Regex(IPV6_REGEXP_STR) Regex(IPV6_REGEXP_STR2) Regex(HOSTNAME_REGEXP_STR) } @Test fun testFindBoundaryCases1() { val regex = Regex(".*\n") val result = regex.find("a\n") assertNotNull(result) assertEquals("a\n", result!!.value) } @Test fun testFindBoundaryCases2() { val regex = Regex(".*A") val result = regex.find("aAa") assertNotNull(result) assertEquals("aA", result!!.value) } @Test fun testFindBoundaryCases3() { val regex = Regex(".*A") val result = regex.find("a\naA\n") assertNotNull(result) assertEquals("aA", result!!.value) } @Test fun testFindBoundaryCases4() { val regex = Regex("A.*") val result = regex.find("A\n") assertNotNull(result) assertEquals("A", result!!.value) } @Test fun testFindBoundaryCases5() { val regex = Regex(".*A.*") var result = regex.find("\nA\naaa\nA\naaAaa\naaaA\n") val expected = arrayOf("A", "A", "aaAaa", "aaaA") var k = 0 while (result != null) { assertEquals(expected[k], result.value) result = result.next() k++ } } @Test fun testFindBoundaryCases6() { val regex = Regex(".*") var result = regex.find("\na\n") val expected = arrayOf("", "a", "", "") var k = 0 while (result != null) { assertEquals(expected[k], result.value) k++ result = result.next() } } @Test fun testBackReferences() { var regex = Regex("(\\((\\w*):(.*):(\\2)\\))") var result = regex.find("(start1: word :start1)(start2: word :start2)") var k = 1 while (result != null) { assertEquals("start" + k, result.groupValues[2]) assertEquals(" word ", result.groupValues[3]) assertEquals("start" + k, result.groupValues[4]) k++ result = result.next() } assertEquals(3, k) regex = Regex(".*(.)\\1") assertTrue(regex.matches("saa")) } @Test fun testNewLine() { val regex = Regex("(^$)*\n", RegexOption.MULTILINE) var result = regex.find("\r\n\n") var counter = 0 while (result != null) { counter++ result = result.next() } assertEquals(2, counter) } @Test fun testFindGreedy() { val regex = Regex(".*aaa", RegexOption.DOT_MATCHES_ALL) val result = regex.matchEntire("aaaa\naaa\naaaaaa") assertNotNull(result) assertEquals(14, result!!.range.endInclusive) } @Test fun testSOLQuant() { val regex = Regex("$*", RegexOption.MULTILINE) var result = regex.find("\n\n") var counter = 0 while (result != null) { counter++ result = result.next() } assertEquals(3, counter) } @Test fun testIllegalEscape() { try { Regex("\\y") fail("IllegalArgumentException expected") } catch (pse: IllegalArgumentException) { } } @Test fun testEmptyFamily() { Regex("\\p{Lower}") } @Test fun testNonCaptConstr() { // Flags var regex = Regex("(?i)b*(?-i)a*") assertTrue(regex.matches("bBbBaaaa")) assertFalse(regex.matches("bBbBAaAa")) // Non-capturing groups regex = Regex("(?i:b*)a*") assertTrue(regex.matches("bBbBaaaa")) assertFalse(regex.matches("bBbBAaAa")) // 1 2 3 4 5 6 7 8 9 10 11 regex = Regex("(?:-|(-?\\d+\\d\\d\\d))?(?:-|-(\\d\\d))?(?:-|-(\\d\\d))?(T)?(?:(\\d\\d):(\\d\\d):(\\d\\d)(\\.\\d+)?)?(?:(?:((?:\\+|\\-)\\d\\d):(\\d\\d))|(Z))?") val result = regex.matchEntire("-1234-21-31T41:51:61.789+71:81") assertNotNull(result) assertEquals("-1234", result!!.groupValues[1]) assertEquals("21", result.groupValues[2]) assertEquals("31", result.groupValues[3]) assertEquals("T", result.groupValues[4]) assertEquals("41", result.groupValues[5]) assertEquals("51", result.groupValues[6]) assertEquals("61", result.groupValues[7]) assertEquals(".789", result.groupValues[8]) assertEquals("+71", result.groupValues[9]) assertEquals("81", result.groupValues[10]) // positive lookahead regex = Regex(".*\\.(?=log$).*$") assertTrue(regex.matches("a.b.c.log")) assertFalse(regex.matches("a.b.c.log.")) // negative lookahead regex = Regex(".*\\.(?!log$).*$") assertFalse(regex.matches("abc.log")) assertTrue(regex.matches("abc.logg")) // positive lookbehind regex = Regex(".*(?<=abc)\\.log$") assertFalse(regex.matches("cde.log")) assertTrue(regex.matches("abc.log")) // negative lookbehind regex = Regex(".*(?a*)abb") assertFalse(regex.matches("aaabb")) regex = Regex("(?>a*)bb") assertTrue(regex.matches("aaabb")) regex = Regex("(?>a|aa)aabb") assertTrue(regex.matches("aaabb")) regex = Regex("(?>aa|a)aabb") assertFalse(regex.matches("aaabb")) // quantifiers over look ahead regex = Regex(".*(?<=abc)*\\.log$") assertTrue(regex.matches("cde.log")) regex = Regex(".*(?<=abc)+\\.log$") assertFalse(regex.matches("cde.log")) } @Test fun testCompilePatternWithTerminatorMark() { val regex = Regex("a\u0000\u0000cd") assertTrue(regex.matches("a\u0000\u0000cd")) } @Test fun testAlternations() { var baseString = "|a|bc" var regex = Regex(baseString) assertTrue(regex.matches("")) baseString = "a||bc" regex = Regex(baseString) assertTrue(regex.matches("")) baseString = "a|bc|" regex = Regex(baseString) assertTrue(regex.matches("")) baseString = "a|b|" regex = Regex(baseString) assertTrue(regex.matches("")) baseString = "a(|b|cd)e" regex = Regex(baseString) assertTrue(regex.matches("ae")) baseString = "a(b||cd)e" regex = Regex(baseString) assertTrue(regex.matches("ae")) baseString = "a(b|cd|)e" regex = Regex(baseString) assertTrue(regex.matches("ae")) baseString = "a(b|c|)e" regex = Regex(baseString) assertTrue(regex.matches("ae")) baseString = "a(|)e" regex = Regex(baseString) assertTrue(regex.matches("ae")) baseString = "|" regex = Regex(baseString) assertTrue(regex.matches("")) baseString = "a(?:|)e" regex = Regex(baseString) assertTrue(regex.matches("ae")) baseString = "a||||bc" regex = Regex(baseString) assertTrue(regex.matches("")) baseString = "(?i-is)|a" regex = Regex(baseString) assertTrue(regex.matches("a")) } @Test fun testMatchWithGroups() { var baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr" var pattern = ".*(..).*\\1.*" assertTrue(Regex(pattern).matches(baseString)) baseString = "saa" pattern = ".*(.)\\1" assertTrue(Regex(pattern).matches(baseString)) assertTrue(Regex(pattern).containsMatchIn(baseString)) } @Test fun testSplitEmptyCharSequence() { val s1 = "" val arr = s1.split(":".toRegex()) assertEquals(arr.size, 1) } @Test fun testSplitEndsWithPattern() { assertEquals(",,".split(",".toRegex(), 3).toTypedArray().size, 3) assertEquals(",,".split(",".toRegex(), 4).toTypedArray().size, 3) assertEquals(Regex("o").split("boo:and:foo", 5).size, 5) assertEquals(Regex("b").split("ab", 0).size, 2) } @Test fun testCaseInsensitiveFlag() { assertTrue(Regex("(?i-:AbC)").matches("ABC")) } @Test fun testEmptyGroups() { var regex = Regex("ab(?>)cda") assertTrue(regex.matches("abcda")) regex = Regex("ab()") assertTrue(regex.matches("ab")) regex = Regex("abc(?:)(..)") assertTrue(regex.matches("abcgf")) } @Test fun testCompileNonCaptGroup() { var isCompiled = false try { Regex("(?:)", RegexOption.CANON_EQ) Regex("(?:)", setOf(RegexOption.CANON_EQ, RegexOption.DOT_MATCHES_ALL)) Regex("(?:)", setOf(RegexOption.CANON_EQ, RegexOption.IGNORE_CASE)) Regex("(?:)", setOf(RegexOption.CANON_EQ, RegexOption.COMMENTS, RegexOption.UNIX_LINES)) isCompiled = true } catch (e: IllegalArgumentException) { println(e) } assertTrue(isCompiled) } @Test fun testEmbeddedFlags() { var baseString = "(?i)((?s)a)" var testString = "A" var regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?x)(?i)(?s)(?d)a" testString = "A" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "(?x)(?i)(?s)(?d)a." testString = "a\n" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "abc(?x:(?i)(?s)(?d)a.)" testString = "abcA\n" regex = Regex(baseString) assertTrue(regex.matches(testString)) baseString = "abc((?x)d)(?i)(?s)a" testString = "abcdA" regex = Regex(baseString) assertTrue(regex.matches(testString)) } @Test fun testAltWithFlags() { Regex("|(?i-xi)|()") } @Test fun testRestoreFlagsAfterGroup() { val baseString = "abc((?x)d) a" val testString = "abcd a" val regex = Regex(baseString) assertTrue(regex.matches(testString)) } @Test fun testCanonEqFlag() { /* * for decompositions see * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt * http://www.unicode.org/reports/tr15/#Decomposition */ var baseString: String var testString: String var regex: Regex baseString = "ab(a*)\\u0001" regex = Regex(baseString, RegexOption.CANON_EQ) baseString = "a(abcdf)d" regex = Regex(baseString, RegexOption.CANON_EQ) baseString = "aabcdfd" regex = Regex(baseString, RegexOption.CANON_EQ) // \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304 // \u00CC -> \u0049\u0300 baseString = "\u01E0\u00CCcdb(ac)" testString = "\u0226\u0304\u0049\u0300cdbac" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\u01E0cdb(a\u00CCc)" testString = "\u0041\u0307\u0304cdba\u0049\u0300c" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "a\u00CC" testString = "a\u0049\u0300" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\u0226\u0304cdb(ac\u0049\u0300)" testString = "\u01E0cdbac\u00CC" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "cdb(?:\u0041\u0307\u0304\u00CC)" testString = "cdb\u0226\u0304\u0049\u0300" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)" testString = "\u01E0b\u00CCcdbac" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\u01E0|\u00CCcdb(ac)" testString = "\u0041\u0307\u0304" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]" testString = "cdb\u0041\u0307\u0304b" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "a\u0300" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.containsMatchIn("a\u00E0a")) baseString = "\u7B20\uF9F8abc" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches("\uF9F8\uF9F8abc")) // \u01F9 -> \u006E\u0300 // \u00C3 -> \u0041\u0303 baseString = "cdb(?:\u00C3\u006E\u0300)" testString = "cdb\u0041\u0303\u01F9" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) // \u014C -> \u004F\u0304 // \u0163 -> \u0074\u0327 baseString = "cdb(?:\u0163\u004F\u0304)" testString = "cdb\u0074\u0327\u014C" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) // \u00E1->a\u0301 // canonical ordering takes place \u0301\u0327 -> \u0327\u0301 baseString = "c\u0327\u0301" testString = "c\u0301\u0327" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) /* * Hangul decompositions */ // \uD4DB->\u1111\u1171\u11B6 // \uD21E->\u1110\u116D\u11B5 // \uD264->\u1110\u1170 // not Hangul:\u0453->\u0433\u0301 baseString = "a\uD4DB\u1111\u1171\u11B6\uD264" regex = Regex(baseString, RegexOption.CANON_EQ) baseString = "\u0453c\uD4DB" regex = Regex(baseString, RegexOption.CANON_EQ) baseString = "a\u1110\u116D\u11B5b\uD21Ebc" regex = Regex(baseString, RegexOption.CANON_EQ) baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)" testString = "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\uD4DB\uD264cdb(a\uD21Ec)" testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "a\uD4DB" testString = "a\u1111\u1171\u11B6" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "a\uD21E" testString = "a\u1110\u116D\u11B5" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)" testString = "\uD4DBcdbac\uD21E" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)" testString = "cdb\uD4DB\u1110\u116D\u11B5" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)" testString = "\uD4DBb\uD21Ecdbac" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\uD4DB|\u00CCcdb(ac)" testString = "\u1111\u1171\u11B6" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\uD4DB|\u00CCcdb(ac)" testString = "\u1111\u1171" regex = Regex(baseString, RegexOption.CANON_EQ) assertFalse(regex.matches(testString)) baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]" testString = "cdb\u1111\u1171\u11B6b" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) baseString = "\uD4DB" regex = Regex(baseString, RegexOption.CANON_EQ) assertTrue(regex.containsMatchIn("a\u1111\u1171\u11B6a")) baseString = "\u1111" regex = Regex(baseString, RegexOption.CANON_EQ) assertFalse(regex.containsMatchIn("bcda\uD4DBr")) } @Test fun testIndexesCanonicalEq() { var baseString: String var testString: String var regex: Regex var result: MatchResult? baseString = "\uD4DB" testString = "bcda\u1111\u1171\u11B6awr" regex = Regex(baseString, RegexOption.CANON_EQ) result = regex.find(testString) assertNotNull(result) assertEquals(result!!.range.start, 4) assertEquals(result.range.endInclusive, 6) baseString = "\uD4DB\u1111\u1171\u11B6" testString = "bcda\u1111\u1171\u11B6\uD4DBawr" regex = Regex(baseString, RegexOption.CANON_EQ) result = regex.find(testString) // Use the same testString assertNotNull(result) assertEquals(result!!.range.start, 4) assertEquals(result.range.endInclusive, 7) baseString = "\uD4DB\uD21E\u1110\u1170" testString = "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac" regex = Regex(baseString, RegexOption.CANON_EQ) result = regex.find(testString) assertNotNull(result) assertEquals(result!!.range.start, 6) assertEquals(result.range.endInclusive, 12) } @Test fun testCanonEqFlagWithSupplementaryCharacters() { /* * \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32 * \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F * ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16 */ var patString = "abc\uD834\uDDBFef" var testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef" var regex = Regex(patString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) testString = "abc\uD834\uDDBB\uD834\uDD6Fef" assertTrue(regex.matches(testString)) patString = "abc\uD834\uDDBB\uD834\uDD6Fef" testString = "abc\uD834\uDDBFef" regex = Regex(patString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef" assertTrue(regex.matches(testString)) patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef" testString = "abc\uD834\uDDBFef" regex = Regex(patString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) testString = "abc\uD834\uDDBB\uD834\uDD6Fef" assertTrue(regex.matches(testString)) /* * testSupplementary characters with no decomposition */ patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef" testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef" regex = Regex(patString, RegexOption.CANON_EQ) assertTrue(regex.matches(testString)) } @Test fun testRangesWithSurrogatesSupplementary() { var patString = "[abc\uD8D2]" var testString = "\uD8D2" var regex = Regex(patString) assertTrue(regex.matches(testString)) testString = "a" assertTrue(regex.matches(testString)) testString = "ef\uD8D2\uDD71gh" assertFalse(regex.containsMatchIn(testString)) testString = "ef\uD8D2gh" assertTrue(regex.containsMatchIn(testString)) patString = "[abc\uD8D3&&[c\uD8D3]]" testString = "c" regex = Regex(patString) assertTrue(regex.matches(testString)) testString = "a" assertFalse(regex.matches(testString)) testString = "ef\uD8D3\uDD71gh" assertFalse(regex.containsMatchIn(testString)) testString = "ef\uD8D3gh" assertTrue(regex.containsMatchIn(testString)) patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]" testString = "c" regex = Regex(patString) assertTrue(regex.matches(testString)) testString = "\uDBEE\uDF0C" assertTrue(regex.matches(testString)) testString = "ef\uD8D3\uDD71gh" assertFalse(regex.containsMatchIn(testString)) testString = "ef\uD8D3gh" assertTrue(regex.containsMatchIn(testString)) patString = "[abc\uDBFC]\uDDC2cd" testString = "\uDBFC\uDDC2cd" regex = Regex(patString) assertFalse(regex.matches(testString)) testString = "a\uDDC2cd" assertTrue(regex.matches(testString)) } @Test fun testSequencesWithSurrogatesSupplementary() { var patString = "abcd\uD8D3" var testString = "abcd\uD8D3\uDFFC" var regex = Regex(patString) assertFalse(regex.containsMatchIn(testString)) testString = "abcd\uD8D3abc" assertTrue(regex.containsMatchIn(testString)) patString = "ab\uDBEFcd" testString = "ab\uDBEFcd" regex = Regex(patString) assertTrue(regex.matches(testString)) patString = "\uDFFCabcd" testString = "\uD8D3\uDFFCabcd" regex = Regex(patString) assertFalse(regex.containsMatchIn(testString)) testString = "abc\uDFFCabcdecd" assertTrue(regex.containsMatchIn(testString)) patString = "\uD8D3\uDFFCabcd" testString = "abc\uD8D3\uD8D3\uDFFCabcd" regex = Regex(patString) assertTrue(regex.containsMatchIn(testString)) } @Test fun testPredefinedClassesWithSurrogatesSupplementary() { var patString = "[123\\D]" var testString = "a" var regex = Regex(patString) assertTrue(regex.containsMatchIn(testString)) testString = "5" assertFalse(regex.containsMatchIn(testString)) testString = "3" assertTrue(regex.containsMatchIn(testString)) // low surrogate testString = "\uDFC4" assertTrue(regex.containsMatchIn(testString)) // high surrogate testString = "\uDADA" assertTrue(regex.containsMatchIn(testString)) testString = "\uDADA\uDFC4" assertTrue(regex.containsMatchIn(testString)) testString = "5" assertFalse(regex.containsMatchIn(testString)) testString = "3" assertTrue(regex.containsMatchIn(testString)) // low surrogate testString = "\uDFC4" assertTrue(regex.containsMatchIn(testString)) // high surrogate testString = "\uDADA" assertTrue(regex.containsMatchIn(testString)) testString = "\uDADA\uDFC4" assertTrue(regex.containsMatchIn(testString)) // surrogate characters patString = "\\p{Cs}" testString = "\uD916\uDE27" regex = Regex(patString) /* * see http://www.unicode.org/reports/tr18/#Supplementary_Characters we * have to treat text as code points not code units. \\p{Cs} matches any * surrogate character but here testString is a one code point * consisting of two code units (two surrogate characters) so we find * nothing */ assertFalse(regex.containsMatchIn(testString)) // swap low and high surrogates testString = "\uDE27\uD916" assertTrue(regex.containsMatchIn(testString)) patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]" testString = "1" regex = Regex(patString) assertTrue(regex.containsMatchIn(testString)) testString = "\uD916" regex = Regex(patString) assertFalse(regex.containsMatchIn(testString)) testString = "\uD916\uDE27" regex = Regex(patString) assertTrue(regex.containsMatchIn(testString)) // \uD9A0\uDE8E=\u7828E // \u78281=\uD9A0\uDE81 patString = "[a-\uD9A0\uDE8E]" testString = "\uD9A0\uDE81" regex = Regex(patString) assertTrue(regex.matches(testString)) } @Test fun testDotConstructionWithSurrogatesSupplementary() { var patString = "." var testString = "\uD9A0\uDE81" var regex = Regex(patString) assertTrue(regex.matches(testString)) testString = "\uDE81" assertTrue(regex.matches(testString)) testString = "\uD9A0" assertTrue(regex.matches(testString)) testString = "\n" assertFalse(regex.matches(testString)) patString = ".*\uDE81" testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81" regex = Regex(patString) assertFalse(regex.matches(testString)) testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81" assertTrue(regex.matches(testString)) patString = ".*" testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81" regex = Regex(patString, RegexOption.DOT_MATCHES_ALL) assertTrue(regex.matches(testString)) } @Test fun testQuantifiersWithSurrogatesSupplementary() { val patString = "\uD9A0\uDE81*abc" var testString = "\uD9A0\uDE81\uD9A0\uDE81abc" val regex = Regex(patString) assertTrue(regex.matches(testString)) testString = "abc" assertTrue(regex.matches(testString)) } @Test fun testAlternationsWithSurrogatesSupplementary() { val patString = "\uDE81|\uD9A0\uDE81|\uD9A0" var testString = "\uD9A0" val regex = Regex(patString) assertTrue(regex.matches(testString)) testString = "\uDE81" assertTrue(regex.matches(testString)) testString = "\uD9A0\uDE81" assertTrue(regex.matches(testString)) testString = "\uDE81\uD9A0" assertFalse(regex.matches(testString)) } @Test fun testGroupsWithSurrogatesSupplementary() { // this pattern matches nothing var patString = "(\uD9A0)\uDE81" var testString = "\uD9A0\uDE81" var regex = Regex(patString) assertFalse(regex.matches(testString)) patString = "(\uD9A0)" testString = "\uD9A0\uDE81" regex = Regex(patString, RegexOption.DOT_MATCHES_ALL) assertFalse(regex.containsMatchIn(testString)) } @Test fun testFailedFindDotQuantifier() { val regex = Regex("For.+ \\(1\\)") val result = regex.find("This is good (1), For You") assertNull(result) } @Test fun testVerticalWhitespaceChar() { // From Java 8+ `Pattern` doc: // \v - A vertical whitespace character: [\n\x0B\f\r\x85\u2028\u2029] // \V - A non-vertical whitespace character: [^\v] val positiveRegex = Regex("\\v") val negativeRegex = Regex("\\V") val verticalWhitespaces = listOf("\n", "\u000B", "\u000C" /* aka \f */, "\r", "\u0085", "\u2028", "\u2029") val nonVerticalWhitespaces = listOf("1", "K", " ", "${Char.MIN_HIGH_SURROGATE}${Char.MIN_LOW_SURROGATE}") // Smoke tests verticalWhitespaces.forEach { assertMatch(positiveRegex, it) assertNoMatch(negativeRegex, it) assertFind(positiveRegex, "prefix$it", 6..6) assertFind(positiveRegex, "prefix${it}suffix", 6..6) assertFind(positiveRegex, "${it}suffix", 0..0) } nonVerticalWhitespaces.forEach { assertNoMatch(positiveRegex, it) assertMatch(negativeRegex, it) assertFind(negativeRegex, "\n\n$it", 2 until (2 + it.length)) assertFind(negativeRegex, "\n\n${it}\n\n", 2 until (2 + it.length)) assertFind(negativeRegex, "${it}\n\n", 0 until it.length) } // Test that \v and \V can be a part of a more complex regex. val neighbors = listOf( // regex expression to its match. "x" to "x", " " to " ", "\\n" to "\n", "\\\\" to "\\", "[abc]" to "a", "." to "x", "\\d" to "5", "\\D" to "x", "\\s" to " ", "\\S" to "x", "\\w" to "x", "\\W" to "|", "\\p{Alnum}" to "x", "\\p{Space}" to " ", "\\p{Blank}" to " ", "\\p{Sc}" to "$" ).flatMap { (expression, match) -> listOf( expression to match, "$expression+" to match.repeat(2), "$expression{2,4}" to match.repeat(3) ) } neighbors.forEach { (neighbor, match) -> // \v assertMatch(Regex("$neighbor\\v"), "$match\n") assertMatch(Regex("$neighbor\\v$neighbor"), "$match\n$match") assertMatch(Regex("\\v$neighbor"), "\n$match") assertMatch(Regex("$neighbor\\v+"), "$match\n\n") assertMatch(Regex("$neighbor\\v+$neighbor"), "$match\n\n$match") assertMatch(Regex("\\v+$neighbor"), "\n\n$match") assertMatch(Regex("$neighbor\\v{2,4}"), "$match\n\n\n") assertMatch(Regex("$neighbor\\v{2,4}$neighbor"), "$match\n\n\n$match") assertMatch(Regex("\\v{2,4}$neighbor"), "\n\n\n$match") assertMatch(Regex("$neighbor\\v*"), match) assertMatch(Regex("$neighbor\\v*$neighbor"), "$match$match") assertMatch(Regex("\\v*$neighbor"), match) // \V assertMatch(Regex("$neighbor\\V"), "$match ") assertMatch(Regex("$neighbor\\V$neighbor"), "$match $match") assertMatch(Regex("\\V$neighbor"), " $match") assertMatch(Regex("$neighbor\\V+"), "$match ") assertMatch(Regex("$neighbor\\V+$neighbor"), "$match $match") assertMatch(Regex("\\V+$neighbor"), " $match") assertMatch(Regex("$neighbor\\V{2,4}"), "$match ") assertMatch(Regex("$neighbor\\V{2,4}$neighbor"), "$match $match") assertMatch(Regex("\\V{2,4}$neighbor"), " $match") assertMatch(Regex("$neighbor\\V*"), match) assertMatch(Regex("$neighbor\\V*$neighbor"), "$match$match") assertMatch(Regex("\\V*$neighbor"), match) } // Backrefs and `or` expression assertMatch(Regex("(\\v)x\\1"), "\nx\n") assertMatch(Regex("\\v|x"), "\n") assertMatch(Regex("\\v|x"), "x") assertMatch(Regex("(\\V)\n\\1"), "x\nx") assertMatch(Regex("\\V|\n"), "x") assertMatch(Regex("\\V|\n"), "\n") // Boundaries: ^, $, \b, \B assertMatch(Regex("^\\vx"), "\nx") assertNoMatch(Regex("^\\vx"), "x\n") assertMatch(Regex("x\\v$"), "x\n") assertNoMatch(Regex("x\\v$"), "\nx") assertMatch(Regex("abc\\b\\v"), "abc\n") assertMatch(Regex("abc\\b\\v"), "abc\n") assertMatch(Regex("^\\V\n"), "x\n") assertNoMatch(Regex("^\\V\n"), "\nx") assertMatch(Regex("\n\\V$"), "\nx") assertNoMatch(Regex("\n\\V$"), "x\n") assertMatch(Regex("abc\\B\\V"), "abcd") assertMatch(Regex("abc\\B\\V"), "abcd") } @Test fun testHorizontalWhitespaceChar() { // From Java 8+ `Pattern` doc: // \h - A horizontal whitespace character: [ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000] // \H - A non-horizontal whitespace character: [^\h] val positiveRegex = Regex("\\h") val negativeRegex = Regex("\\H") val verticalWhitespaces = listOf(" ", "\t", "\u00A0", "\u1680", "\u180e", "\u202f", "\u205f", "\u3000") + ('\u2000'..'\u200a').map(Char::toString) val nonVerticalWhitespaces = listOf("1", "K", "\n", "${Char.MIN_HIGH_SURROGATE}${Char.MIN_LOW_SURROGATE}") // Smoke tests verticalWhitespaces.forEach { assertMatch(positiveRegex, it) assertNoMatch(negativeRegex, it) assertFind(positiveRegex, "prefix$it", 6..6) assertFind(positiveRegex, "prefix${it}suffix", 6..6) assertFind(positiveRegex, "${it}suffix", 0..0) } nonVerticalWhitespaces.forEach { assertNoMatch(positiveRegex, it) assertMatch(negativeRegex, it) assertFind(negativeRegex, " $it", 2 until (2 + it.length)) assertFind(negativeRegex, " $it ", 2 until (2 + it.length)) assertFind(negativeRegex, "$it ", 0 until it.length) } // Test that \h and \H can be a part of a more complex regex. val neighbors = listOf( // regex expression to its match. "x" to "x", " " to " ", "\\n" to "\n", "\\\\" to "\\", "[abc]" to "a", "." to "x", "\\d" to "5", "\\D" to "x", "\\s" to " ", "\\S" to "x", "\\w" to "x", "\\W" to "|", "\\p{Alnum}" to "x", "\\p{Space}" to " ", "\\p{Blank}" to " ", "\\p{Sc}" to "$" ).flatMap { (expression, match) -> listOf( expression to match, "$expression+" to match.repeat(2), "$expression{2,4}" to match.repeat(3) ) } neighbors.forEach { (neighbor, match) -> // \h assertMatch(Regex("$neighbor\\h"), "$match ") assertMatch(Regex("$neighbor\\h$neighbor"), "$match $match") assertMatch(Regex("\\h$neighbor"), " $match") assertMatch(Regex("$neighbor\\h+"), "$match ") assertMatch(Regex("$neighbor\\h+$neighbor"), "$match $match") assertMatch(Regex("\\h+$neighbor"), " $match") assertMatch(Regex("$neighbor\\h{2,4}"), "$match ") assertMatch(Regex("$neighbor\\h{2,4}$neighbor"), "$match $match") assertMatch(Regex("\\h{2,4}$neighbor"), " $match") assertMatch(Regex("$neighbor\\h*"), match) assertMatch(Regex("$neighbor\\h*$neighbor"), "$match$match") assertMatch(Regex("\\h*$neighbor"), match) // \H assertMatch(Regex("$neighbor\\H"), "$match\n") assertMatch(Regex("$neighbor\\H$neighbor"), "$match\n$match") assertMatch(Regex("\\H$neighbor"), "\n$match") assertMatch(Regex("$neighbor\\H+"), "$match\n\n") assertMatch(Regex("$neighbor\\H+$neighbor"), "$match\n\n$match") assertMatch(Regex("\\H+$neighbor"), "\n\n$match") assertMatch(Regex("$neighbor\\H{2,4}"), "$match\n\n\n") assertMatch(Regex("$neighbor\\H{2,4}$neighbor"), "$match\n\n\n$match") assertMatch(Regex("\\H{2,4}$neighbor"), "\n\n\n$match") assertMatch(Regex("$neighbor\\H*"), match) assertMatch(Regex("$neighbor\\H*$neighbor"), "$match$match") assertMatch(Regex("\\H*$neighbor"), match) } // Backrefs and `or` expression assertMatch(Regex("(\\h)x\\1"), " x ") assertMatch(Regex("\\h|x"), " ") assertMatch(Regex("\\h|x"), "x") assertMatch(Regex("(\\H) \\1"), "x x") assertMatch(Regex("\\H| "), "x") assertMatch(Regex("\\H| "), " ") // Boundaries: ^, $, \b, \B assertMatch(Regex("^\\hx"), " x") assertNoMatch(Regex("^\\hx"), "x ") assertMatch(Regex("x\\h$"), "x ") assertNoMatch(Regex("x\\h$"), " x") assertMatch(Regex("abc\\b\\h"), "abc ") assertMatch(Regex("abc\\b\\h"), "abc ") assertMatch(Regex("^\\H "), "x ") assertNoMatch(Regex("^\\H "), " x") assertMatch(Regex(" \\H$"), " x") assertNoMatch(Regex(" \\H$"), "x ") assertMatch(Regex("abc\\B\\H"), "abcd") assertMatch(Regex("abc\\B\\H"), "abcd") } @Test fun testUnicodeLinebreakChar() { // From Java 8+ `Pattern` doc: // \R - Any Unicode linebreak sequence, is equivalent to \u000D\u000A|[\u000A\u000B\u000C\u000D\u0085\u2028\u2029] val regex = Regex("\\R") val linebreaks = listOf("\u000D\u000A", "\u000A", "\u000B", "\u000C", "\u000D", "\u0085", "\u2028", "\u2029") val nonLinebreaks = listOf("1", "K", " ", "${Char.MIN_HIGH_SURROGATE}${Char.MIN_LOW_SURROGATE}") // Smoke tests linebreaks.forEach { assertMatch(regex, it) assertFind(regex, "prefix$it", 6 until (6 + it.length)) assertFind(regex, "prefix${it}suffix", 6 until (6 + it.length)) assertFind(regex, "${it}suffix", 0 until it.length) } nonLinebreaks.forEach { assertNoMatch(regex, it) } // Test that \r\n matches both \R and \R\R assertMatch(Regex("\\R\\R\\R"), "\r\r\n\n") assertMatch(Regex("\\R\\R\\R\\R"), "\r\r\n\n") // Test that \R can be a part of a more complex regex. val neighbors = listOf( // regex expression to its match. "x" to "x", " " to " ", "\\n" to "\n", "\\\\" to "\\", "[abc]" to "a", "." to "x", "\\d" to "5", "\\D" to "x", "\\s" to " ", "\\S" to "x", "\\w" to "x", "\\W" to "|", "\\p{Alnum}" to "x", "\\p{Space}" to " ", "\\p{Blank}" to " ", "\\p{Sc}" to "$" ).flatMap { (expression, match) -> listOf( expression to match, "$expression+" to match.repeat(2), "$expression{2,4}" to match.repeat(3) ) } neighbors.forEach { (neighbor, match) -> assertMatch(Regex("$neighbor\\R"), "$match\n") assertMatch(Regex("$neighbor\\R$neighbor"), "$match\n$match") assertMatch(Regex("\\R$neighbor"), "\n$match") assertMatch(Regex("$neighbor\\R+"), "$match\n\n") assertMatch(Regex("$neighbor\\R+$neighbor"), "$match\n\n$match") assertMatch(Regex("\\R+$neighbor"), "\n\n$match") assertMatch(Regex("$neighbor\\R{2,4}"), "$match\n\n\n") assertMatch(Regex("$neighbor\\R{2,4}$neighbor"), "$match\n\n\n$match") assertMatch(Regex("\\R{2,4}$neighbor"), "\n\n\n$match") assertMatch(Regex("$neighbor\\R*"), match) assertMatch(Regex("$neighbor\\R*$neighbor"), "$match$match") assertMatch(Regex("\\R*$neighbor"), match) } // Backrefs and `or` expression assertMatch(Regex("(\\R)x\\1"), "\nx\n") assertMatch(Regex("\\R|x"), "\n") assertMatch(Regex("\\R|x"), "x") // Boundaries: ^, $, \b, \B assertMatch(Regex("^\\Rx"), "\nx") assertNoMatch(Regex("^\\Rx"), "x\n") assertMatch(Regex("x\\R$"), "x\n") assertNoMatch(Regex("x\\R$"), "\nx") assertMatch(Regex("abc\\b\\R"), "abc\n") assertMatch(Regex("abc\\b\\R"), "abc\n") } }