[K/N] Regex matching zero length should split surrogate pairs #KT-57401

Merge-request: KT-MR-11110
Merged-by: Abduqodiri Qurbonzoda <abduqodiri.qurbonzoda@jetbrains.com>
This commit is contained in:
Abduqodiri Qurbonzoda
2023-07-26 14:19:25 +00:00
committed by Space Team
parent 7ecd7b8e61
commit df16fa3306
5 changed files with 12 additions and 40 deletions
@@ -24,7 +24,7 @@ public actual val supportsOctalLiteralInRegex: Boolean get() = true
public actual val supportsEscapeAnyCharInRegex: Boolean get() = true
public actual val regexSplitUnicodeCodePointHandling: Boolean get() = true
public actual val regexSplitUnicodeCodePointHandling: Boolean get() = false
public actual object BackReferenceHandling {
actual val captureLargestValidIndex: Boolean get() = true
@@ -33,12 +33,6 @@ internal class EmptySet(override var next: AbstractSet) : LeafSet() {
override fun find(startIndex: Int, testString: CharSequence, matchResult: MatchResultImpl): Int {
for (index in startIndex..testString.length) {
if (index < testString.length) {
if (testString[index].isLowSurrogate() &&
index > 0 && testString[index - 1].isHighSurrogate()) {
continue
}
}
if (next.matches(index, testString, matchResult) >= 0) {
return index
}
@@ -48,12 +42,6 @@ internal class EmptySet(override var next: AbstractSet) : LeafSet() {
override fun findBack(leftLimit: Int, rightLimit: Int, testString: CharSequence, matchResult: MatchResultImpl): Int {
for (index in rightLimit downTo leftLimit) {
if (index < testString.length) {
if (testString[index].isLowSurrogate() &&
index > 0 && testString[index - 1].isHighSurrogate()) {
continue
}
}
if (next.matches(index, testString, matchResult) >= 0) {
return index
}
@@ -135,22 +135,4 @@ class SplitTest {
assertEquals("d", s[4])
assertEquals("", s[5])
}
@Test fun testSplitSupplementaryWithEmptyString() {
/*
* See http://www.unicode.org/reports/tr18/#Supplementary_Characters We
* have to treat text as code points not code units.
*/
val p = Regex("")
val s: List<String>
s = p.split("a\ud869\uded6b", 0)
assertEquals(5, s.size)
assertEquals("", s[0])
assertEquals("a", s[1])
assertEquals("\ud869\uded6", s[2])
assertEquals("b", s[3])
assertEquals("", s[4])
}
}
+10 -8
View File
@@ -529,17 +529,19 @@ class RegexTest {
@Test fun splitByEmptyMatch() {
val input = "test"
val emptyMatch = "".toRegex()
for (pattern in listOf("", "(?<=)")) {
val emptyMatch = pattern.toRegex()
testSplitEquals(listOf("", "t", "e", "s", "t", ""), input, emptyMatch)
testSplitEquals(listOf("", "t", "est"), input, emptyMatch, limit = 3)
testSplitEquals(listOf("", "t", "e", "s", "t", ""), input, emptyMatch)
testSplitEquals(listOf("", "t", "est"), input, emptyMatch, limit = 3)
testSplitEquals("".split(""), "", emptyMatch)
testSplitEquals("".split(""), "", emptyMatch)
testSplitEquals(
if (regexSplitUnicodeCodePointHandling) listOf("", "\uD83D\uDE04", "\uD801", "") else listOf("", "\uD83D", "\uDE04", "\uD801", ""),
"\uD83D\uDE04\uD801", emptyMatch
)
testSplitEquals(
if (regexSplitUnicodeCodePointHandling) listOf("", "\uD83D\uDE04", "\uD801", "") else listOf("", "\uD83D", "\uDE04", "\uD801", ""),
"\uD83D\uDE04\uD801", emptyMatch
)
}
val emptyMatchBeforeT = "(?=t)".toRegex()
+1 -1
View File
@@ -21,7 +21,7 @@ public actual val supportsOctalLiteralInRegex: Boolean get() = true
public actual val supportsEscapeAnyCharInRegex: Boolean get() = true
public actual val regexSplitUnicodeCodePointHandling: Boolean get() = true
public actual val regexSplitUnicodeCodePointHandling: Boolean get() = false
public actual object BackReferenceHandling {
actual val captureLargestValidIndex: Boolean get() = true