Align JS and JVM behavior of Regex replace function #KT-28378

This commit is contained in:
Abduqodiri Qurbonzoda
2021-09-28 02:20:22 +03:00
committed by Space
parent 5326c875c0
commit dc2f5eab25
9 changed files with 258 additions and 19 deletions
@@ -28,4 +28,6 @@ actual fun testOnJs(action: () -> Unit) {}
public actual val isFloat32RangeEnforced: Boolean get() = true
public actual val supportsSuppressedExceptions: Boolean get() = true
public actual val supportsSuppressedExceptions: Boolean get() = true
public actual val supportsNamedCapturingGroup: Boolean get() = false
@@ -253,10 +253,23 @@ public actual class Regex internal constructor(internal val nativePattern: Patte
}
/**
* Replaces all occurrences of this regular expression in the specified [input] string with
* specified [replacement] expression.
* Replaces all occurrences of this regular expression in the specified [input] string with specified [replacement] expression.
*
* @param replacement A replacement expression that can include substitutions.
* The replacement string may contain references to the captured groups during a match. Occurrences of `$index`
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index.
* The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
*
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
*
* Note that named capturing groups are not supported in Kotlin/Native.
*
* @param input the char sequence to find matches of this regular expression in
* @param replacement the expression to replace found matches with
* @return the result of replacing each occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
*/
actual fun replace(input: CharSequence, replacement: String): String
= replace(input) { match -> processReplacement(match, replacement) }
@@ -290,7 +303,21 @@ public actual class Regex internal constructor(internal val nativePattern: Patte
/**
* Replaces the first occurrence of this regular expression in the specified [input] string with specified [replacement] expression.
*
* @param replacement A replacement expression that can include substitutions.
* The replacement string may contain references to the captured groups during a match. Occurrences of `$index`
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index.
* The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
*
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
*
* Note that named capturing groups are not supported in Kotlin/Native.
*
* @param input the char sequence to find a match of this regular expression in
* @param replacement the expression to replace the found match with
* @return the result of replacing the first occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
*/
actual fun replaceFirst(input: CharSequence, replacement: String): String {
val match = find(input) ?: return input.toString()
+3 -1
View File
@@ -16,4 +16,6 @@ public expect fun testOnJs(action: () -> Unit)
public expect val isFloat32RangeEnforced: Boolean
public expect val supportsSuppressedExceptions: Boolean
public expect val supportsSuppressedExceptions: Boolean
public expect val supportsNamedCapturingGroup: Boolean
+109 -7
View File
@@ -144,9 +144,29 @@ public actual class Regex actual constructor(pattern: String, options: Set<Regex
/**
* Replaces all occurrences of this regular expression in the specified [input] string with specified [replacement] expression.
*
* @param replacement A replacement expression that can include substitutions. See [String.prototype.replace](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) function docs for details.
* The replacement string may contain references to the captured groups during a match. Occurrences of `$index`
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index.
* The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
*
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
*
* Note that referring named capturing groups by name is currently not supported in Kotlin/JS.
* However, you can still refer them by index.
*
* @param input the char sequence to find matches of this regular expression in
* @param replacement the expression to replace found matches with
* @return the result of replacing each occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
*/
public actual fun replace(input: CharSequence, replacement: String): String = input.toString().nativeReplace(nativePattern, replacement)
public actual fun replace(input: CharSequence, replacement: String): String {
if (!replacement.contains('\\') && !replacement.contains('$')) {
return input.toString().nativeReplace(nativePattern, replacement)
}
return replace(input) { substituteGroupRefs(it, replacement) }
}
/**
* Replaces all occurrences of this regular expression in the specified [input] string with the result of
@@ -178,11 +198,36 @@ public actual class Regex actual constructor(pattern: String, options: Set<Regex
/**
* Replaces the first occurrence of this regular expression in the specified [input] string with specified [replacement] expression.
*
* @param replacement A replacement expression that can include substitutions. See [String.prototype.replace](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) function docs for details.
* The replacement string may contain references to the captured groups during a match. Occurrences of `$index`
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index.
* The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
*
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
*
* Note that referring named capturing groups by name is not supported currently in Kotlin/JS.
* However, you can still refer them by index.
*
* @param input the char sequence to find a match of this regular expression in
* @param replacement the expression to replace the found match with
* @return the result of replacing the first occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
*/
public actual fun replaceFirst(input: CharSequence, replacement: String): String {
val nonGlobalOptions = options.map { it.value }.joinToString(separator = "")
return input.toString().nativeReplace(RegExp(pattern, nonGlobalOptions), replacement)
if (!replacement.contains('\\') && !replacement.contains('$')) {
val nonGlobalOptions = options.toFlags("u")
return input.toString().nativeReplace(RegExp(pattern, nonGlobalOptions), replacement)
}
val match = find(input) ?: return input.toString()
return buildString {
append(input.substring(0, match.range.first))
append(substituteGroupRefs(match, replacement))
append(input.substring(match.range.last + 1, input.length))
}
}
/**
@@ -266,10 +311,13 @@ public actual class Regex actual constructor(pattern: String, options: Set<Regex
* Returns a literal replacement expression for the specified [literal] string.
* No characters of that string will have special meaning when it is used as a replacement string in [Regex.replace] function.
*/
public actual fun escapeReplacement(literal: String): String = literal.nativeReplace(replacementEscape, "$$$$")
public actual fun escapeReplacement(literal: String): String = literal.nativeReplace(replacementEscape, "\\$&")
private val patternEscape = RegExp("""[\\^$*+?.()|[\]{}]""", "g")
private val replacementEscape = RegExp("""\$""", "g")
private val replacementEscape = RegExp("""[\$]""", "g")
internal fun nativeEscapeReplacement(literal: String): String = literal.nativeReplace(nativeReplacementEscape, "$$$$")
private val nativeReplacementEscape = RegExp("""\$""", "g")
}
}
@@ -310,3 +358,57 @@ private fun RegExp.findNext(input: String, from: Int, nextPattern: RegExp): Matc
nextPattern.findNext(input, if (range.isEmpty()) range.start + 1 else range.endInclusive + 1, nextPattern)
}
}
private fun substituteGroupRefs(match: MatchResult, replacement: String): String {
var index = 0
val result = StringBuilder(replacement.length)
while (index < replacement.length) {
val char = replacement[index++]
if (char == '\\') {
if (index == replacement.length)
throw IllegalArgumentException("The Char to be escaped is missing")
result.append(replacement[index++])
} else if (char == '$') {
if (index == replacement.length)
throw IllegalArgumentException("Capturing group index is missing")
if (replacement[index] == '{')
throw IllegalArgumentException("Named capturing group reference currently is not supported")
if (replacement[index] !in '0'..'9')
throw IllegalArgumentException("Invalid capturing group reference")
val endIndex = replacement.readGroupIndex(index, match.groupValues.size)
val groupIndex = replacement.substring(index, endIndex).toInt()
if (groupIndex >= match.groupValues.size)
throw IndexOutOfBoundsException("Group with index $groupIndex does not exist")
result.append(match.groupValues[groupIndex])
index = endIndex
} else {
result.append(char)
}
}
return result.toString()
}
private fun String.readGroupIndex(startIndex: Int, groupCount: Int): Int {
// at least one digit after '$' is always captured
var index = startIndex + 1
var groupIndex = this[startIndex] - '0'
// capture the largest valid group index
while (index < length && this[index] in '0'..'9') {
val newGroupIndex = (groupIndex * 10) + (this[index] - '0')
if (newGroupIndex in 0 until groupCount) {
groupIndex = newGroupIndex
index++
} else {
break
}
}
return index
}
@@ -160,7 +160,7 @@ public actual fun CharSequence.repeat(n: Int): String {
*/
@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS")
public actual fun String.replace(oldValue: String, newValue: String, ignoreCase: Boolean = false): String =
nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "gui" else "gu"), Regex.escapeReplacement(newValue))
nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "gui" else "gu"), Regex.nativeEscapeReplacement(newValue))
/**
* Returns a new string with all occurrences of [oldChar] replaced with [newChar].
@@ -173,7 +173,7 @@ public actual fun String.replace(oldChar: Char, newChar: Char, ignoreCase: Boole
@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS")
public actual fun String.replaceFirst(oldValue: String, newValue: String, ignoreCase: Boolean = false): String =
nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "ui" else "u"), Regex.escapeReplacement(newValue))
nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "ui" else "u"), Regex.nativeEscapeReplacement(newValue))
@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS")
public actual fun String.replaceFirst(oldChar: Char, newChar: Char, ignoreCase: Boolean = false): String =
+4 -1
View File
@@ -25,4 +25,7 @@ public actual fun testOnJs(action: () -> Unit) = action()
// TODO: should be true at least in JS IR after implementing KT-24975
public actual val isFloat32RangeEnforced: Boolean = false
actual val supportsSuppressedExceptions: Boolean get() = true
actual val supportsSuppressedExceptions: Boolean get() = true
// TODO: implement named group reference in replacement expression
public actual val supportsNamedCapturingGroup: Boolean get() = false
@@ -158,7 +158,23 @@ internal constructor(private val nativePattern: Pattern) : Serializable {
/**
* Replaces all occurrences of this regular expression in the specified [input] string with specified [replacement] expression.
*
* @param replacement A replacement expression that can include substitutions. See [Matcher.appendReplacement] for details.
* The replacement string may contain references to the captured groups during a match. Occurrences of `${name}` or `$index`
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified name or index.
* In case of `$index` the first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
* In case of `${name}` the `name` can consist of latin letters 'a'..'z' and 'A'..'Z', or digits '0'..'9'. The first character must be
* a letter.
*
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
*
* Note that named capturing groups are supported in Java 7 or later.
*
* @param input the char sequence to find matches of this regular expression in
* @param replacement the expression to replace found matches with
* @return the result of replacing each occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
*/
public actual fun replace(input: CharSequence, replacement: String): String = nativePattern.matcher(input).replaceAll(replacement)
@@ -191,7 +207,23 @@ internal constructor(private val nativePattern: Pattern) : Serializable {
/**
* Replaces the first occurrence of this regular expression in the specified [input] string with specified [replacement] expression.
*
* @param replacement A replacement expression that can include substitutions. See [Matcher.appendReplacement] for details.
* The replacement string may contain references to the captured groups during a match. Occurrences of `${name}` or `$index`
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified name or index.
* In case of `$index` the first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
* In case of `${name}` the `name` can consist of latin letters 'a'..'z' and 'A'..'Z', or digits '0'..'9'. The first character must be
* a letter.
*
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
*
* Note that named capturing groups are supported in Java 7 or later.
*
* @param input the char sequence to find a match of this regular expression in
* @param replacement the expression to replace the found match with
* @return the result of replacing the first occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
*/
public actual fun replaceFirst(input: CharSequence, replacement: String): String =
nativePattern.matcher(input).replaceFirst(replacement)
+3 -1
View File
@@ -39,4 +39,6 @@ public fun <T> platformNull() = Collections.singletonList(null as T).first()
public actual val isFloat32RangeEnforced: Boolean = true
public actual val supportsSuppressedExceptions: Boolean get() = !isJava6
public actual val supportsSuppressedExceptions: Boolean get() = !isJava6
public actual val supportsNamedCapturingGroup: Boolean get() = !isJava6
+69
View File
@@ -7,6 +7,7 @@
package test.text
import test.supportsNamedCapturingGroup
import kotlin.test.*
class RegexTest {
@@ -238,10 +239,78 @@ class RegexTest {
@Test fun replace() {
val input = "123-456"
val pattern = "(\\d+)".toRegex()
// js String.prototype.replace() inserts a "$"
assertFailsWith<IllegalArgumentException>("$$") { pattern.replace(input, "$$") }
// js String.prototype.replace() inserts the matched substring
assertFailsWith<IllegalArgumentException>("$&") { pattern.replace(input, "$&") }
// js String.prototype.replace() inserts the portion of the string that precedes the matched substring
assertFailsWith<IllegalArgumentException>("\$`") { pattern.replace(input, "\$`") }
// js String.prototype.replace() inserts the portion of the string that follows the matched substring
assertFailsWith<IllegalArgumentException>("$'") { pattern.replace(input, "$'") }
// js String.prototype.replace() inserts the replacement string as a literal if it refers to a non-existing capturing group
assertFailsWith<RuntimeException>("$") { pattern.replace(input, "$") } // should be IAE, however jdk7 throws String IOOBE
assertFailsWith<IndexOutOfBoundsException>("$2") { pattern.replace(input, "$2") }
assertFailsWith<IllegalArgumentException>("\$name") { pattern.replace(input, "\$name") }
assertFailsWith<IllegalArgumentException>("\${name}") { pattern.replace(input, "\${name}") }
assertFailsWith<IllegalArgumentException>("$-") { pattern.replace(input, "$-") }
// inserts "$" literally
assertEquals("$-$", pattern.replace(input, "\\$"))
// inserts the matched substring
assertEquals("(123)-(456)", pattern.replace(input, "($0)"))
// inserts the first captured group
assertEquals("(123)-(456)", pattern.replace(input, "($1)"))
assertEquals("$&-$&", pattern.replace(input, Regex.escapeReplacement("$&")))
assertEquals("X-456", pattern.replaceFirst(input, "X"))
val longInput = "0123456789ABC"
val longPattern = "0(1(2(3(4(5(6(7(8(9(A(B(C))))))))))))".toRegex()
for (groupIndex in 0..12) {
assertEquals(longInput.substring(groupIndex), longPattern.replace(longInput, "$$groupIndex"))
}
assertEquals(longInput.substring(1) + "3", longPattern.replace(longInput, "$13"))
// KT-38000
assertEquals("""\,""", ",".replace("([,])".toRegex(), """\\$1"""))
// KT-28378
assertEquals("$ 2", "2".replace(Regex("(.+)"), "\\$ $1"))
assertEquals("$2", "2".replace(Regex("(.+)"), "\\$$1"))
assertFailsWith<IllegalArgumentException> { "2".replace(Regex("(.+)"), "$ $1") }
}
@Test fun replaceWithNamedGroups() {
if (!supportsNamedCapturingGroup) {
assertFails {
val pattern = Regex("(?<first>\\d+)-(?<second>\\d+)")
pattern.replace("123-456", "\${first}+\${second}")
}
return
}
val pattern = Regex("(?<first>\\d+)-(?<second>\\d+)")
"123-456".let { input ->
assertEquals("(123-456)", pattern.replace(input, "($0)"))
assertEquals("123+456", pattern.replace(input, "$1+$2"))
// take largest legal group number reference
assertEquals("1230+456", pattern.replace(input, "$10+$2"))
assertEquals("123+456", pattern.replace(input, "$01+$2"))
// js refers to named capturing groups with "$<name>" syntax
assertFailsWith<IllegalArgumentException>("\$<first>+\$<second>") { pattern.replace(input, "\$<first>+\$<second>") }
assertEquals("123+456", pattern.replace(input, "\${first}+\${second}"))
// missing trailing '}'
assertFailsWith<IllegalArgumentException>("\${first+\${second}") { pattern.replace(input, "\${first+\${second}") }
assertFailsWith<IllegalArgumentException>("\${first}+\${second") { pattern.replace(input, "\${first}+\${second") }
}
"123-456-789-012".let { input ->
assertEquals("123/456-789/012", pattern.replace(input, "$1/$2"))
assertEquals("123/456-789/012", pattern.replace(input, "\${first}/\${second}"))
assertEquals("123/456-789-012", pattern.replaceFirst(input, "\${first}/\${second}"))
}
}
@Test fun replaceEvaluator() {