diff --git a/kotlin-native/backend.native/tests/stdlib_external/utils.kt b/kotlin-native/backend.native/tests/stdlib_external/utils.kt index 6a5d6e51c6c..811106059fc 100644 --- a/kotlin-native/backend.native/tests/stdlib_external/utils.kt +++ b/kotlin-native/backend.native/tests/stdlib_external/utils.kt @@ -28,4 +28,6 @@ actual fun testOnJs(action: () -> Unit) {} public actual val isFloat32RangeEnforced: Boolean get() = true -public actual val supportsSuppressedExceptions: Boolean get() = true \ No newline at end of file +public actual val supportsSuppressedExceptions: Boolean get() = true + +public actual val supportsNamedCapturingGroup: Boolean get() = false \ No newline at end of file diff --git a/kotlin-native/runtime/src/main/kotlin/kotlin/text/Regex.kt b/kotlin-native/runtime/src/main/kotlin/kotlin/text/Regex.kt index 157726e4c01..2c1bf8fc554 100644 --- a/kotlin-native/runtime/src/main/kotlin/kotlin/text/Regex.kt +++ b/kotlin-native/runtime/src/main/kotlin/kotlin/text/Regex.kt @@ -253,10 +253,23 @@ public actual class Regex internal constructor(internal val nativePattern: Patte } /** - * Replaces all occurrences of this regular expression in the specified [input] string with - * specified [replacement] expression. + * Replaces all occurrences of this regular expression in the specified [input] string with specified [replacement] expression. * - * @param replacement A replacement expression that can include substitutions. + * The replacement string may contain references to the captured groups during a match. Occurrences of `$index` + * in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index. + * The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated + * into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components + * of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match. + * + * Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`. + * [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string. + * + * Note that named capturing groups are not supported in Kotlin/Native. + * + * @param input the char sequence to find matches of this regular expression in + * @param replacement the expression to replace found matches with + * @return the result of replacing each occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression + * @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist */ actual fun replace(input: CharSequence, replacement: String): String = replace(input) { match -> processReplacement(match, replacement) } @@ -290,7 +303,21 @@ public actual class Regex internal constructor(internal val nativePattern: Patte /** * Replaces the first occurrence of this regular expression in the specified [input] string with specified [replacement] expression. * - * @param replacement A replacement expression that can include substitutions. + * The replacement string may contain references to the captured groups during a match. Occurrences of `$index` + * in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified index. + * The first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated + * into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components + * of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match. + * + * Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`. + * [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string. + * + * Note that named capturing groups are not supported in Kotlin/Native. + * + * @param input the char sequence to find a match of this regular expression in + * @param replacement the expression to replace the found match with + * @return the result of replacing the first occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression + * @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist */ actual fun replaceFirst(input: CharSequence, replacement: String): String { val match = find(input) ?: return input.toString() diff --git a/libraries/stdlib/common/test/testUtils.kt b/libraries/stdlib/common/test/testUtils.kt index 43e0e238bfd..1d8e4986145 100644 --- a/libraries/stdlib/common/test/testUtils.kt +++ b/libraries/stdlib/common/test/testUtils.kt @@ -16,4 +16,6 @@ public expect fun testOnJs(action: () -> Unit) public expect val isFloat32RangeEnforced: Boolean -public expect val supportsSuppressedExceptions: Boolean \ No newline at end of file +public expect val supportsSuppressedExceptions: Boolean + +public expect val supportsNamedCapturingGroup: Boolean \ No newline at end of file diff --git a/libraries/stdlib/js/src/kotlin/text/regex.kt b/libraries/stdlib/js/src/kotlin/text/regex.kt index 9ce583ef3e8..9c2cfd43865 100644 --- a/libraries/stdlib/js/src/kotlin/text/regex.kt +++ b/libraries/stdlib/js/src/kotlin/text/regex.kt @@ -144,9 +144,29 @@ public actual class Regex actual constructor(pattern: String, options: Set= match.groupValues.size) + throw IndexOutOfBoundsException("Group with index $groupIndex does not exist") + + result.append(match.groupValues[groupIndex]) + index = endIndex + } else { + result.append(char) + } + } + return result.toString() +} + +private fun String.readGroupIndex(startIndex: Int, groupCount: Int): Int { + // at least one digit after '$' is always captured + var index = startIndex + 1 + var groupIndex = this[startIndex] - '0' + + // capture the largest valid group index + while (index < length && this[index] in '0'..'9') { + val newGroupIndex = (groupIndex * 10) + (this[index] - '0') + if (newGroupIndex in 0 until groupCount) { + groupIndex = newGroupIndex + index++ + } else { + break + } + } + return index +} \ No newline at end of file diff --git a/libraries/stdlib/js/src/kotlin/text/stringsCode.kt b/libraries/stdlib/js/src/kotlin/text/stringsCode.kt index 4ee4535738e..00e345fd314 100644 --- a/libraries/stdlib/js/src/kotlin/text/stringsCode.kt +++ b/libraries/stdlib/js/src/kotlin/text/stringsCode.kt @@ -160,7 +160,7 @@ public actual fun CharSequence.repeat(n: Int): String { */ @Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") public actual fun String.replace(oldValue: String, newValue: String, ignoreCase: Boolean = false): String = - nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "gui" else "gu"), Regex.escapeReplacement(newValue)) + nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "gui" else "gu"), Regex.nativeEscapeReplacement(newValue)) /** * Returns a new string with all occurrences of [oldChar] replaced with [newChar]. @@ -173,7 +173,7 @@ public actual fun String.replace(oldChar: Char, newChar: Char, ignoreCase: Boole @Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") public actual fun String.replaceFirst(oldValue: String, newValue: String, ignoreCase: Boolean = false): String = - nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "ui" else "u"), Regex.escapeReplacement(newValue)) + nativeReplace(RegExp(Regex.escape(oldValue), if (ignoreCase) "ui" else "u"), Regex.nativeEscapeReplacement(newValue)) @Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") public actual fun String.replaceFirst(oldChar: Char, newChar: Char, ignoreCase: Boolean = false): String = diff --git a/libraries/stdlib/js/test/core/testUtils.kt b/libraries/stdlib/js/test/core/testUtils.kt index a9a6f09c6d2..373d336dc45 100644 --- a/libraries/stdlib/js/test/core/testUtils.kt +++ b/libraries/stdlib/js/test/core/testUtils.kt @@ -25,4 +25,7 @@ public actual fun testOnJs(action: () -> Unit) = action() // TODO: should be true at least in JS IR after implementing KT-24975 public actual val isFloat32RangeEnforced: Boolean = false -actual val supportsSuppressedExceptions: Boolean get() = true \ No newline at end of file +actual val supportsSuppressedExceptions: Boolean get() = true + +// TODO: implement named group reference in replacement expression +public actual val supportsNamedCapturingGroup: Boolean get() = false \ No newline at end of file diff --git a/libraries/stdlib/jvm/src/kotlin/text/regex/Regex.kt b/libraries/stdlib/jvm/src/kotlin/text/regex/Regex.kt index c961d066c65..d25937a3c24 100644 --- a/libraries/stdlib/jvm/src/kotlin/text/regex/Regex.kt +++ b/libraries/stdlib/jvm/src/kotlin/text/regex/Regex.kt @@ -158,7 +158,23 @@ internal constructor(private val nativePattern: Pattern) : Serializable { /** * Replaces all occurrences of this regular expression in the specified [input] string with specified [replacement] expression. * - * @param replacement A replacement expression that can include substitutions. See [Matcher.appendReplacement] for details. + * The replacement string may contain references to the captured groups during a match. Occurrences of `${name}` or `$index` + * in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified name or index. + * In case of `$index` the first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated + * into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components + * of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match. + * In case of `${name}` the `name` can consist of latin letters 'a'..'z' and 'A'..'Z', or digits '0'..'9'. The first character must be + * a letter. + * + * Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`. + * [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string. + * + * Note that named capturing groups are supported in Java 7 or later. + * + * @param input the char sequence to find matches of this regular expression in + * @param replacement the expression to replace found matches with + * @return the result of replacing each occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression + * @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist */ public actual fun replace(input: CharSequence, replacement: String): String = nativePattern.matcher(input).replaceAll(replacement) @@ -191,7 +207,23 @@ internal constructor(private val nativePattern: Pattern) : Serializable { /** * Replaces the first occurrence of this regular expression in the specified [input] string with specified [replacement] expression. * - * @param replacement A replacement expression that can include substitutions. See [Matcher.appendReplacement] for details. + * The replacement string may contain references to the captured groups during a match. Occurrences of `${name}` or `$index` + * in the replacement string will be substituted with the subsequences corresponding to the captured groups with the specified name or index. + * In case of `$index` the first digit after '$' is always treated as part of group reference. Subsequent digits are incorporated + * into `index` only if they would form a valid group reference. Only the digits '0'..'9' are considered as potential components + * of the group reference. Note that indexes of captured groups start from 1, and the group with index 0 is the whole match. + * In case of `${name}` the `name` can consist of latin letters 'a'..'z' and 'A'..'Z', or digits '0'..'9'. The first character must be + * a letter. + * + * Backslash character '\' can be used to include the succeeding character as a literal in the replacement string, e.g, `\$` or `\\`. + * [Regex.escapeReplacement] can be used if [replacement] have to be treated as a literal string. + * + * Note that named capturing groups are supported in Java 7 or later. + * + * @param input the char sequence to find a match of this regular expression in + * @param replacement the expression to replace the found match with + * @return the result of replacing the first occurrence of this regular expression in [input] with the result of evaluating the [replacement] expression + * @throws RuntimeException if [replacement] expression is malformed, or capturing group with specified `name` or `index` does not exist */ public actual fun replaceFirst(input: CharSequence, replacement: String): String = nativePattern.matcher(input).replaceFirst(replacement) diff --git a/libraries/stdlib/jvm/test/testUtilsJVM.kt b/libraries/stdlib/jvm/test/testUtilsJVM.kt index 8b5c875efa0..9b597a0f8be 100644 --- a/libraries/stdlib/jvm/test/testUtilsJVM.kt +++ b/libraries/stdlib/jvm/test/testUtilsJVM.kt @@ -39,4 +39,6 @@ public fun platformNull() = Collections.singletonList(null as T).first() public actual val isFloat32RangeEnforced: Boolean = true -public actual val supportsSuppressedExceptions: Boolean get() = !isJava6 \ No newline at end of file +public actual val supportsSuppressedExceptions: Boolean get() = !isJava6 + +public actual val supportsNamedCapturingGroup: Boolean get() = !isJava6 \ No newline at end of file diff --git a/libraries/stdlib/test/text/RegexTest.kt b/libraries/stdlib/test/text/RegexTest.kt index 66aacbbcd7e..2c055378960 100644 --- a/libraries/stdlib/test/text/RegexTest.kt +++ b/libraries/stdlib/test/text/RegexTest.kt @@ -7,6 +7,7 @@ package test.text +import test.supportsNamedCapturingGroup import kotlin.test.* class RegexTest { @@ -238,10 +239,78 @@ class RegexTest { @Test fun replace() { val input = "123-456" val pattern = "(\\d+)".toRegex() + + // js String.prototype.replace() inserts a "$" + assertFailsWith("$$") { pattern.replace(input, "$$") } + // js String.prototype.replace() inserts the matched substring + assertFailsWith("$&") { pattern.replace(input, "$&") } + // js String.prototype.replace() inserts the portion of the string that precedes the matched substring + assertFailsWith("\$`") { pattern.replace(input, "\$`") } + // js String.prototype.replace() inserts the portion of the string that follows the matched substring + assertFailsWith("$'") { pattern.replace(input, "$'") } + // js String.prototype.replace() inserts the replacement string as a literal if it refers to a non-existing capturing group + assertFailsWith("$") { pattern.replace(input, "$") } // should be IAE, however jdk7 throws String IOOBE + assertFailsWith("$2") { pattern.replace(input, "$2") } + assertFailsWith("\$name") { pattern.replace(input, "\$name") } + assertFailsWith("\${name}") { pattern.replace(input, "\${name}") } + assertFailsWith("$-") { pattern.replace(input, "$-") } + + // inserts "$" literally + assertEquals("$-$", pattern.replace(input, "\\$")) + // inserts the matched substring + assertEquals("(123)-(456)", pattern.replace(input, "($0)")) + // inserts the first captured group assertEquals("(123)-(456)", pattern.replace(input, "($1)")) assertEquals("$&-$&", pattern.replace(input, Regex.escapeReplacement("$&"))) assertEquals("X-456", pattern.replaceFirst(input, "X")) + + val longInput = "0123456789ABC" + val longPattern = "0(1(2(3(4(5(6(7(8(9(A(B(C))))))))))))".toRegex() + for (groupIndex in 0..12) { + assertEquals(longInput.substring(groupIndex), longPattern.replace(longInput, "$$groupIndex")) + } + assertEquals(longInput.substring(1) + "3", longPattern.replace(longInput, "$13")) + + // KT-38000 + assertEquals("""\,""", ",".replace("([,])".toRegex(), """\\$1""")) + // KT-28378 + assertEquals("$ 2", "2".replace(Regex("(.+)"), "\\$ $1")) + assertEquals("$2", "2".replace(Regex("(.+)"), "\\$$1")) + assertFailsWith { "2".replace(Regex("(.+)"), "$ $1") } + } + + @Test fun replaceWithNamedGroups() { + if (!supportsNamedCapturingGroup) { + assertFails { + val pattern = Regex("(?\\d+)-(?\\d+)") + pattern.replace("123-456", "\${first}+\${second}") + } + return + } + + val pattern = Regex("(?\\d+)-(?\\d+)") + + "123-456".let { input -> + assertEquals("(123-456)", pattern.replace(input, "($0)")) + assertEquals("123+456", pattern.replace(input, "$1+$2")) + // take largest legal group number reference + assertEquals("1230+456", pattern.replace(input, "$10+$2")) + assertEquals("123+456", pattern.replace(input, "$01+$2")) + // js refers to named capturing groups with "$" syntax + assertFailsWith("\$+\$") { pattern.replace(input, "\$+\$") } + assertEquals("123+456", pattern.replace(input, "\${first}+\${second}")) + + // missing trailing '}' + assertFailsWith("\${first+\${second}") { pattern.replace(input, "\${first+\${second}") } + assertFailsWith("\${first}+\${second") { pattern.replace(input, "\${first}+\${second") } + } + + "123-456-789-012".let { input -> + assertEquals("123/456-789/012", pattern.replace(input, "$1/$2")) + assertEquals("123/456-789/012", pattern.replace(input, "\${first}/\${second}")) + assertEquals("123/456-789-012", pattern.replaceFirst(input, "\${first}/\${second}")) + } } @Test fun replaceEvaluator() {