[K/N] Regex matching zero length should split surrogate pairs #KT-57401
Merge-request: KT-MR-11110 Merged-by: Abduqodiri Qurbonzoda <abduqodiri.qurbonzoda@jetbrains.com>
This commit is contained in:
committed by
Space Team
parent
7ecd7b8e61
commit
df16fa3306
@@ -24,7 +24,7 @@ public actual val supportsOctalLiteralInRegex: Boolean get() = true
|
||||
|
||||
public actual val supportsEscapeAnyCharInRegex: Boolean get() = true
|
||||
|
||||
public actual val regexSplitUnicodeCodePointHandling: Boolean get() = true
|
||||
public actual val regexSplitUnicodeCodePointHandling: Boolean get() = false
|
||||
|
||||
public actual object BackReferenceHandling {
|
||||
actual val captureLargestValidIndex: Boolean get() = true
|
||||
|
||||
@@ -33,12 +33,6 @@ internal class EmptySet(override var next: AbstractSet) : LeafSet() {
|
||||
|
||||
override fun find(startIndex: Int, testString: CharSequence, matchResult: MatchResultImpl): Int {
|
||||
for (index in startIndex..testString.length) {
|
||||
if (index < testString.length) {
|
||||
if (testString[index].isLowSurrogate() &&
|
||||
index > 0 && testString[index - 1].isHighSurrogate()) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if (next.matches(index, testString, matchResult) >= 0) {
|
||||
return index
|
||||
}
|
||||
@@ -48,12 +42,6 @@ internal class EmptySet(override var next: AbstractSet) : LeafSet() {
|
||||
|
||||
override fun findBack(leftLimit: Int, rightLimit: Int, testString: CharSequence, matchResult: MatchResultImpl): Int {
|
||||
for (index in rightLimit downTo leftLimit) {
|
||||
if (index < testString.length) {
|
||||
if (testString[index].isLowSurrogate() &&
|
||||
index > 0 && testString[index - 1].isHighSurrogate()) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if (next.matches(index, testString, matchResult) >= 0) {
|
||||
return index
|
||||
}
|
||||
|
||||
@@ -135,22 +135,4 @@ class SplitTest {
|
||||
assertEquals("d", s[4])
|
||||
assertEquals("", s[5])
|
||||
}
|
||||
|
||||
@Test fun testSplitSupplementaryWithEmptyString() {
|
||||
|
||||
/*
|
||||
* See http://www.unicode.org/reports/tr18/#Supplementary_Characters We
|
||||
* have to treat text as code points not code units.
|
||||
*/
|
||||
val p = Regex("")
|
||||
val s: List<String>
|
||||
s = p.split("a\ud869\uded6b", 0)
|
||||
assertEquals(5, s.size)
|
||||
assertEquals("", s[0])
|
||||
assertEquals("a", s[1])
|
||||
assertEquals("\ud869\uded6", s[2])
|
||||
assertEquals("b", s[3])
|
||||
assertEquals("", s[4])
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -529,17 +529,19 @@ class RegexTest {
|
||||
@Test fun splitByEmptyMatch() {
|
||||
val input = "test"
|
||||
|
||||
val emptyMatch = "".toRegex()
|
||||
for (pattern in listOf("", "(?<=)")) {
|
||||
val emptyMatch = pattern.toRegex()
|
||||
|
||||
testSplitEquals(listOf("", "t", "e", "s", "t", ""), input, emptyMatch)
|
||||
testSplitEquals(listOf("", "t", "est"), input, emptyMatch, limit = 3)
|
||||
testSplitEquals(listOf("", "t", "e", "s", "t", ""), input, emptyMatch)
|
||||
testSplitEquals(listOf("", "t", "est"), input, emptyMatch, limit = 3)
|
||||
|
||||
testSplitEquals("".split(""), "", emptyMatch)
|
||||
testSplitEquals("".split(""), "", emptyMatch)
|
||||
|
||||
testSplitEquals(
|
||||
if (regexSplitUnicodeCodePointHandling) listOf("", "\uD83D\uDE04", "\uD801", "") else listOf("", "\uD83D", "\uDE04", "\uD801", ""),
|
||||
"\uD83D\uDE04\uD801", emptyMatch
|
||||
)
|
||||
testSplitEquals(
|
||||
if (regexSplitUnicodeCodePointHandling) listOf("", "\uD83D\uDE04", "\uD801", "") else listOf("", "\uD83D", "\uDE04", "\uD801", ""),
|
||||
"\uD83D\uDE04\uD801", emptyMatch
|
||||
)
|
||||
}
|
||||
|
||||
val emptyMatchBeforeT = "(?=t)".toRegex()
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ public actual val supportsOctalLiteralInRegex: Boolean get() = true
|
||||
|
||||
public actual val supportsEscapeAnyCharInRegex: Boolean get() = true
|
||||
|
||||
public actual val regexSplitUnicodeCodePointHandling: Boolean get() = true
|
||||
public actual val regexSplitUnicodeCodePointHandling: Boolean get() = false
|
||||
|
||||
public actual object BackReferenceHandling {
|
||||
actual val captureLargestValidIndex: Boolean get() = true
|
||||
|
||||
Reference in New Issue
Block a user