Align JS and JVM behavior of Regex replace function #KT-28378
This commit is contained in:
committed by
Space
parent
5326c875c0
commit
dc2f5eab25
@@ -28,4 +28,6 @@ actual fun testOnJs(action: () -> Unit) {}
|
||||
|
||||
public actual val isFloat32RangeEnforced: Boolean get() = true
|
||||
|
||||
public actual val supportsSuppressedExceptions: Boolean get() = true
|
||||
public actual val supportsSuppressedExceptions: Boolean get() = true
|
||||
|
||||
public actual val supportsNamedCapturingGroup: Boolean get() = false
|
||||
@@ -253,10 +253,23 @@ public actual class Regex internal constructor(internal val nativePattern: Patte
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces all occurrences of this regular expression in the specified [input] string with
|
||||
* specified [replacement] expression.
|
||||
* Replaces all occurrences of this regular expression in the specified [input] string with specified [replacement] expression.
|
||||
*
|
||||
* @param replacement A replacement expression that can include substitutions.
|
||||
* The replacement string may contain references to the captured groups during a match. Occurrences of `$index`
|
||||
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index.
|
||||
* The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
|
||||
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
|
||||
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
|
||||
*
|
||||
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
|
||||
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
|
||||
*
|
||||
* Note that named capturing groups are not supported in Kotlin/Native.
|
||||
*
|
||||
* @param input the char sequence to find matches of this regular expression in
|
||||
* @param replacement the expression to replace found matches with
|
||||
* @return the result of replacing each occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
|
||||
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
|
||||
*/
|
||||
actual fun replace(input: CharSequence, replacement: String): String
|
||||
= replace(input) { match -> processReplacement(match, replacement) }
|
||||
@@ -290,7 +303,21 @@ public actual class Regex internal constructor(internal val nativePattern: Patte
|
||||
/**
|
||||
* Replaces the first occurrence of this regular expression in the specified [input] string with specified [replacement] expression.
|
||||
*
|
||||
* @param replacement A replacement expression that can include substitutions.
|
||||
* The replacement string may contain references to the captured groups during a match. Occurrences of `$index`
|
||||
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index.
|
||||
* The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
|
||||
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
|
||||
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
|
||||
*
|
||||
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
|
||||
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
|
||||
*
|
||||
* Note that named capturing groups are not supported in Kotlin/Native.
|
||||
*
|
||||
* @param input the char sequence to find a match of this regular expression in
|
||||
* @param replacement the expression to replace the found match with
|
||||
* @return the result of replacing the first occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
|
||||
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
|
||||
*/
|
||||
actual fun replaceFirst(input: CharSequence, replacement: String): String {
|
||||
val match = find(input) ?: return input.toString()
|
||||
|
||||
@@ -16,4 +16,6 @@ public expect fun testOnJs(action: () -> Unit)
|
||||
|
||||
public expect val isFloat32RangeEnforced: Boolean
|
||||
|
||||
public expect val supportsSuppressedExceptions: Boolean
|
||||
public expect val supportsSuppressedExceptions: Boolean
|
||||
|
||||
public expect val supportsNamedCapturingGroup: Boolean
|
||||
@@ -144,9 +144,29 @@ public actual class Regex actual constructor(pattern: String, options: Set<Regex
|
||||
/**
|
||||
* Replaces all occurrences of this regular expression in the specified [input] string with specified [replacement] expression.
|
||||
*
|
||||
* @param replacement A replacement expression that can include substitutions. See [String.prototype.replace](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) function docs for details.
|
||||
* The replacement string may contain references to the captured groups during a match. Occurrences of `$index`
|
||||
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index.
|
||||
* The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
|
||||
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
|
||||
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
|
||||
*
|
||||
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
|
||||
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
|
||||
*
|
||||
* Note that referring named capturing groups by name is currently not supported in Kotlin/JS.
|
||||
* However, you can still refer them by index.
|
||||
*
|
||||
* @param input the char sequence to find matches of this regular expression in
|
||||
* @param replacement the expression to replace found matches with
|
||||
* @return the result of replacing each occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
|
||||
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
|
||||
*/
|
||||
public actual fun replace(input: CharSequence, replacement: String): String = input.toString().nativeReplace(nativePattern, replacement)
|
||||
public actual fun replace(input: CharSequence, replacement: String): String {
|
||||
if (!replacement.contains('\\') && !replacement.contains('$')) {
|
||||
return input.toString().nativeReplace(nativePattern, replacement)
|
||||
}
|
||||
return replace(input) { substituteGroupRefs(it, replacement) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces all occurrences of this regular expression in the specified [input] string with the result of
|
||||
@@ -178,11 +198,36 @@ public actual class Regex actual constructor(pattern: String, options: Set<Regex
|
||||
/**
|
||||
* Replaces the first occurrence of this regular expression in the specified [input] string with specified [replacement] expression.
|
||||
*
|
||||
* @param replacement A replacement expression that can include substitutions. See [String.prototype.replace](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) function docs for details.
|
||||
* The replacement string may contain references to the captured groups during a match. Occurrences of `$index`
|
||||
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index.
|
||||
* The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
|
||||
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
|
||||
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
|
||||
*
|
||||
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
|
||||
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
|
||||
*
|
||||
* Note that referring named capturing groups by name is not supported currently in Kotlin/JS.
|
||||
* However, you can still refer them by index.
|
||||
*
|
||||
* @param input the char sequence to find a match of this regular expression in
|
||||
* @param replacement the expression to replace the found match with
|
||||
* @return the result of replacing the first occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
|
||||
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
|
||||
*/
|
||||
public actual fun replaceFirst(input: CharSequence, replacement: String): String {
|
||||
val nonGlobalOptions = options.map { it.value }.joinToString(separator = "")
|
||||
return input.toString().nativeReplace(RegExp(pattern, nonGlobalOptions), replacement)
|
||||
if (!replacement.contains('\\') && !replacement.contains('$')) {
|
||||
val nonGlobalOptions = options.toFlags("u")
|
||||
return input.toString().nativeReplace(RegExp(pattern, nonGlobalOptions), replacement)
|
||||
}
|
||||
|
||||
val match = find(input) ?: return input.toString()
|
||||
|
||||
return buildString {
|
||||
append(input.substring(0, match.range.first))
|
||||
append(substituteGroupRefs(match, replacement))
|
||||
append(input.substring(match.range.last + 1, input.length))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -266,10 +311,13 @@ public actual class Regex actual constructor(pattern: String, options: Set<Regex
|
||||
* Returns a literal replacement expression for the specified [literal] string.
|
||||
* No characters of that string will have special meaning when it is used as a replacement string in [Regex.replace] function.
|
||||
*/
|
||||
public actual fun escapeReplacement(literal: String): String = literal.nativeReplace(replacementEscape, "$$$$")
|
||||
public actual fun escapeReplacement(literal: String): String = literal.nativeReplace(replacementEscape, "\\$&")
|
||||
|
||||
private val patternEscape = RegExp("""[\\^$*+?.()|[\]{}]""", "g")
|
||||
private val replacementEscape = RegExp("""\$""", "g")
|
||||
private val replacementEscape = RegExp("""[\$]""", "g")
|
||||
|
||||
internal fun nativeEscapeReplacement(literal: String): String = literal.nativeReplace(nativeReplacementEscape, "$$$$")
|
||||
private val nativeReplacementEscape = RegExp("""\$""", "g")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -310,3 +358,57 @@ private fun RegExp.findNext(input: String, from: Int, nextPattern: RegExp): Matc
|
||||
nextPattern.findNext(input, if (range.isEmpty()) range.start + 1 else range.endInclusive + 1, nextPattern)
|
||||
}
|
||||
}
|
||||
|
||||
private fun substituteGroupRefs(match: MatchResult, replacement: String): String {
|
||||
var index = 0
|
||||
val result = StringBuilder(replacement.length)
|
||||
|
||||
while (index < replacement.length) {
|
||||
val char = replacement[index++]
|
||||
if (char == '\\') {
|
||||
if (index == replacement.length)
|
||||
throw IllegalArgumentException("The Char to be escaped is missing")
|
||||
|
||||
result.append(replacement[index++])
|
||||
} else if (char == '$') {
|
||||
if (index == replacement.length)
|
||||
throw IllegalArgumentException("Capturing group index is missing")
|
||||
|
||||
if (replacement[index] == '{')
|
||||
throw IllegalArgumentException("Named capturing group reference currently is not supported")
|
||||
|
||||
if (replacement[index] !in '0'..'9')
|
||||
throw IllegalArgumentException("Invalid capturing group reference")
|
||||
|
||||
val endIndex = replacement.readGroupIndex(index, match.groupValues.size)
|
||||
val groupIndex = replacement.substring(index, endIndex).toInt()
|
||||
|
||||
if (groupIndex >= match.groupValues.size)
|
||||
throw IndexOutOfBoundsException("Group with index $groupIndex does not exist")
|
||||
|
||||
result.append(match.groupValues[groupIndex])
|
||||
index = endIndex
|
||||
} else {
|
||||
result.append(char)
|
||||
}
|
||||
}
|
||||
return result.toString()
|
||||
}
|
||||
|
||||
private fun String.readGroupIndex(startIndex: Int, groupCount: Int): Int {
|
||||
// at least one digit after '$' is always captured
|
||||
var index = startIndex + 1
|
||||
var groupIndex = this[startIndex] - '0'
|
||||
|
||||
// capture the largest valid group index
|
||||
while (index < length && this[index] in '0'..'9') {
|
||||
val newGroupIndex = (groupIndex * 10) + (this[index] - '0')
|
||||
if (newGroupIndex in 0 until groupCount) {
|
||||
groupIndex = newGroupIndex
|
||||
index++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return index
|
||||
}
|
||||
@@ -160,7 +160,7 @@ public actual fun CharSequence.repeat(n: Int): String {
|
||||
*/
|
||||
@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS")
|
||||
public actual fun String.replace(oldValue: String, newValue: String, ignoreCase: Boolean = false): String =
|
||||
nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "gui" else "gu"), Regex.escapeReplacement(newValue))
|
||||
nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "gui" else "gu"), Regex.nativeEscapeReplacement(newValue))
|
||||
|
||||
/**
|
||||
* Returns a new string with all occurrences of [oldChar] replaced with [newChar].
|
||||
@@ -173,7 +173,7 @@ public actual fun String.replace(oldChar: Char, newChar: Char, ignoreCase: Boole
|
||||
|
||||
@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS")
|
||||
public actual fun String.replaceFirst(oldValue: String, newValue: String, ignoreCase: Boolean = false): String =
|
||||
nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "ui" else "u"), Regex.escapeReplacement(newValue))
|
||||
nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "ui" else "u"), Regex.nativeEscapeReplacement(newValue))
|
||||
|
||||
@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS")
|
||||
public actual fun String.replaceFirst(oldChar: Char, newChar: Char, ignoreCase: Boolean = false): String =
|
||||
|
||||
@@ -25,4 +25,7 @@ public actual fun testOnJs(action: () -> Unit) = action()
|
||||
// TODO: should be true at least in JS IR after implementing KT-24975
|
||||
public actual val isFloat32RangeEnforced: Boolean = false
|
||||
|
||||
actual val supportsSuppressedExceptions: Boolean get() = true
|
||||
actual val supportsSuppressedExceptions: Boolean get() = true
|
||||
|
||||
// TODO: implement named group reference in replacement expression
|
||||
public actual val supportsNamedCapturingGroup: Boolean get() = false
|
||||
@@ -158,7 +158,23 @@ internal constructor(private val nativePattern: Pattern) : Serializable {
|
||||
/**
|
||||
* Replaces all occurrences of this regular expression in the specified [input] string with specified [replacement] expression.
|
||||
*
|
||||
* @param replacement A replacement expression that can include substitutions. See [Matcher.appendReplacement] for details.
|
||||
* The replacement string may contain references to the captured groups during a match. Occurrences of `${name}` or `$index`
|
||||
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified name or index.
|
||||
* In case of `$index` the first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
|
||||
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
|
||||
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
|
||||
* In case of `${name}` the `name` can consist of latin letters 'a'..'z' and 'A'..'Z', or digits '0'..'9'. The first character must be
|
||||
* a letter.
|
||||
*
|
||||
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
|
||||
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
|
||||
*
|
||||
* Note that named capturing groups are supported in Java 7 or later.
|
||||
*
|
||||
* @param input the char sequence to find matches of this regular expression in
|
||||
* @param replacement the expression to replace found matches with
|
||||
* @return the result of replacing each occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
|
||||
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
|
||||
*/
|
||||
public actual fun replace(input: CharSequence, replacement: String): String = nativePattern.matcher(input).replaceAll(replacement)
|
||||
|
||||
@@ -191,7 +207,23 @@ internal constructor(private val nativePattern: Pattern) : Serializable {
|
||||
/**
|
||||
* Replaces the first occurrence of this regular expression in the specified [input] string with specified [replacement] expression.
|
||||
*
|
||||
* @param replacement A replacement expression that can include substitutions. See [Matcher.appendReplacement] for details.
|
||||
* The replacement string may contain references to the captured groups during a match. Occurrences of `${name}` or `$index`
|
||||
* in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified name or index.
|
||||
* In case of `$index` the first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated
|
||||
* into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components
|
||||
* of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match.
|
||||
* In case of `${name}` the `name` can consist of latin letters 'a'..'z' and 'A'..'Z', or digits '0'..'9'. The first character must be
|
||||
* a letter.
|
||||
*
|
||||
* Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`.
|
||||
* [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string.
|
||||
*
|
||||
* Note that named capturing groups are supported in Java 7 or later.
|
||||
*
|
||||
* @param input the char sequence to find a match of this regular expression in
|
||||
* @param replacement the expression to replace the found match with
|
||||
* @return the result of replacing the first occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression
|
||||
* @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist
|
||||
*/
|
||||
public actual fun replaceFirst(input: CharSequence, replacement: String): String =
|
||||
nativePattern.matcher(input).replaceFirst(replacement)
|
||||
|
||||
@@ -39,4 +39,6 @@ public fun <T> platformNull() = Collections.singletonList(null as T).first()
|
||||
|
||||
public actual val isFloat32RangeEnforced: Boolean = true
|
||||
|
||||
public actual val supportsSuppressedExceptions: Boolean get() = !isJava6
|
||||
public actual val supportsSuppressedExceptions: Boolean get() = !isJava6
|
||||
|
||||
public actual val supportsNamedCapturingGroup: Boolean get() = !isJava6
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
package test.text
|
||||
|
||||
import test.supportsNamedCapturingGroup
|
||||
import kotlin.test.*
|
||||
|
||||
class RegexTest {
|
||||
@@ -238,10 +239,78 @@ class RegexTest {
|
||||
@Test fun replace() {
|
||||
val input = "123-456"
|
||||
val pattern = "(\\d+)".toRegex()
|
||||
|
||||
// js String.prototype.replace() inserts a "$"
|
||||
assertFailsWith<IllegalArgumentException>("$$") { pattern.replace(input, "$$") }
|
||||
// js String.prototype.replace() inserts the matched substring
|
||||
assertFailsWith<IllegalArgumentException>("$&") { pattern.replace(input, "$&") }
|
||||
// js String.prototype.replace() inserts the portion of the string that precedes the matched substring
|
||||
assertFailsWith<IllegalArgumentException>("\$`") { pattern.replace(input, "\$`") }
|
||||
// js String.prototype.replace() inserts the portion of the string that follows the matched substring
|
||||
assertFailsWith<IllegalArgumentException>("$'") { pattern.replace(input, "$'") }
|
||||
// js String.prototype.replace() inserts the replacement string as a literal if it refers to a non-existing capturing group
|
||||
assertFailsWith<RuntimeException>("$") { pattern.replace(input, "$") } // should be IAE, however jdk7 throws String IOOBE
|
||||
assertFailsWith<IndexOutOfBoundsException>("$2") { pattern.replace(input, "$2") }
|
||||
assertFailsWith<IllegalArgumentException>("\$name") { pattern.replace(input, "\$name") }
|
||||
assertFailsWith<IllegalArgumentException>("\${name}") { pattern.replace(input, "\${name}") }
|
||||
assertFailsWith<IllegalArgumentException>("$-") { pattern.replace(input, "$-") }
|
||||
|
||||
// inserts "$" literally
|
||||
assertEquals("$-$", pattern.replace(input, "\\$"))
|
||||
// inserts the matched substring
|
||||
assertEquals("(123)-(456)", pattern.replace(input, "($0)"))
|
||||
// inserts the first captured group
|
||||
assertEquals("(123)-(456)", pattern.replace(input, "($1)"))
|
||||
|
||||
assertEquals("$&-$&", pattern.replace(input, Regex.escapeReplacement("$&")))
|
||||
assertEquals("X-456", pattern.replaceFirst(input, "X"))
|
||||
|
||||
val longInput = "0123456789ABC"
|
||||
val longPattern = "0(1(2(3(4(5(6(7(8(9(A(B(C))))))))))))".toRegex()
|
||||
for (groupIndex in 0..12) {
|
||||
assertEquals(longInput.substring(groupIndex), longPattern.replace(longInput, "$$groupIndex"))
|
||||
}
|
||||
assertEquals(longInput.substring(1) + "3", longPattern.replace(longInput, "$13"))
|
||||
|
||||
// KT-38000
|
||||
assertEquals("""\,""", ",".replace("([,])".toRegex(), """\\$1"""))
|
||||
// KT-28378
|
||||
assertEquals("$ 2", "2".replace(Regex("(.+)"), "\\$ $1"))
|
||||
assertEquals("$2", "2".replace(Regex("(.+)"), "\\$$1"))
|
||||
assertFailsWith<IllegalArgumentException> { "2".replace(Regex("(.+)"), "$ $1") }
|
||||
}
|
||||
|
||||
@Test fun replaceWithNamedGroups() {
|
||||
if (!supportsNamedCapturingGroup) {
|
||||
assertFails {
|
||||
val pattern = Regex("(?<first>\\d+)-(?<second>\\d+)")
|
||||
pattern.replace("123-456", "\${first}+\${second}")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
val pattern = Regex("(?<first>\\d+)-(?<second>\\d+)")
|
||||
|
||||
"123-456".let { input ->
|
||||
assertEquals("(123-456)", pattern.replace(input, "($0)"))
|
||||
assertEquals("123+456", pattern.replace(input, "$1+$2"))
|
||||
// take largest legal group number reference
|
||||
assertEquals("1230+456", pattern.replace(input, "$10+$2"))
|
||||
assertEquals("123+456", pattern.replace(input, "$01+$2"))
|
||||
// js refers to named capturing groups with "$<name>" syntax
|
||||
assertFailsWith<IllegalArgumentException>("\$<first>+\$<second>") { pattern.replace(input, "\$<first>+\$<second>") }
|
||||
assertEquals("123+456", pattern.replace(input, "\${first}+\${second}"))
|
||||
|
||||
// missing trailing '}'
|
||||
assertFailsWith<IllegalArgumentException>("\${first+\${second}") { pattern.replace(input, "\${first+\${second}") }
|
||||
assertFailsWith<IllegalArgumentException>("\${first}+\${second") { pattern.replace(input, "\${first}+\${second") }
|
||||
}
|
||||
|
||||
"123-456-789-012".let { input ->
|
||||
assertEquals("123/456-789/012", pattern.replace(input, "$1/$2"))
|
||||
assertEquals("123/456-789/012", pattern.replace(input, "\${first}/\${second}"))
|
||||
assertEquals("123/456-789-012", pattern.replaceFirst(input, "\${first}/\${second}"))
|
||||
}
|
||||
}
|
||||
|
||||
@Test fun replaceEvaluator() {
|
||||
|
||||
Reference in New Issue
Block a user