Files
kotlin-fork/libraries/stdlib/native-wasm/test/harmony_regex/FixedLengthQuantifierTest.kt
T
Abduqodiri Qurbonzoda fb31a29c39 [K/N] Fix stack overflow in regex when a quantifier is matched many times
Motivation:

Users often expect simple patterns, like `[a]+` or `[^a]+`, to work fast
and without any problems, even with long strings.
Char class from the first pattern matches only 'a' and gets wrapped into
LeafQuantifierSet, and works fine with long strings indeed.
Char class from the second pattern, however, matches any character
except 'a', including supplementary code points. So, the number of chars
it consumes is not known beforehand. There is no optimization for such
char classes, and if they are matched multiple times, the stack memory
gets exhausted.

Modification:

Introduce FixedLengthQuantifierSet node.
The node represents quantifier over constructs that consume a fixed
amount of characters for a given string and index. Such constructs don't
need backtracking to find a different match. Thus, it is possible for
the node to avoid recursion when matching multiple times.

Result:

Fixes KT-46211, KT-35508 and probably KT-39789. Reproducer for the
latter issue is no longer available, but error stacktrace resembles
those of the other issues.
2022-12-19 16:40:51 +00:00

174 lines
6.0 KiB
Kotlin

/*
* Copyright 2010-2022 JetBrains s.r.o. and Kotlin Programming Language contributors.
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
*/
package test.text.harmony_regex
import kotlin.text.*
import kotlin.test.*
class FixedLengthQuantifierTest {
companion object {
private val quantifierMatchCount = 100_000
private val compositeMin = 50_000
private val compositeMax = 150_000
private val input = "a".repeat(quantifierMatchCount)
private val inputDescription = "\"a\".repeat($quantifierMatchCount)"
}
private fun testMatches(regex: Regex, input: String, inputDescription: String, expected: Boolean = true) {
val message = "$regex should ${if (expected) "" else "not " }match $inputDescription"
assertEquals(expected, regex.matches(input), message)
}
@Test
fun fixedLengthQualifierGreedy() {
val plusRegex = Regex("[^\\s]+")
testMatches(plusRegex, input, inputDescription)
val starRegex = Regex("[^\\s]*")
testMatches(starRegex, input, inputDescription)
Regex("[^\\s\\d]{$compositeMin,$compositeMax}").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription)
}
Regex("[^\\s\\d]{$compositeMin,$quantifierMatchCount}").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription)
}
Regex("[^\\s\\d]{$compositeMin,${quantifierMatchCount - 1}}").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription, expected = false)
}
Regex("[^\\s\\d]{$quantifierMatchCount,$compositeMax}").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription)
}
Regex("[^\\s\\d]{${quantifierMatchCount + 1},$compositeMax}").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription, expected = false)
}
}
@Test
fun fixedLengthQualifierReluctant() {
val plusRegex = Regex(".+?")
testMatches(plusRegex, input, inputDescription)
val starRegex = Regex(".*?")
testMatches(starRegex, input, inputDescription)
Regex(".{$compositeMin,$compositeMax}?").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription)
}
Regex(".{$compositeMin,$quantifierMatchCount}?").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription)
}
Regex(".{$compositeMin,${quantifierMatchCount - 1}}?").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription, expected = false)
}
Regex(".{$quantifierMatchCount,$compositeMax}?").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription)
}
Regex(".{${quantifierMatchCount + 1},$compositeMax}?").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription, expected = false)
}
}
@Test
fun fixedLengthQualifierPossesive() {
val plusRegex = Regex("\\p{Ll}++")
testMatches(plusRegex, input, inputDescription)
val starRegex = Regex("\\p{Ll}*+")
testMatches(starRegex, input, inputDescription)
Regex("\\p{Ll}{$compositeMin,$compositeMax}+").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription)
}
Regex("\\p{Ll}{$compositeMin,$quantifierMatchCount}+").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription)
}
Regex("\\p{Ll}{$compositeMin,${quantifierMatchCount - 1}}+").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription, expected = false)
}
Regex("\\p{Ll}{$quantifierMatchCount,$compositeMax}+").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription)
}
Regex("\\p{Ll}{${quantifierMatchCount + 1},$compositeMax}+").let { compositeRegex ->
testMatches(compositeRegex, input, inputDescription, expected = false)
}
}
@Test
fun leafQuantifierGreedy() {
val plusRegex = Regex("a+")
testMatches(plusRegex, input, inputDescription)
val starRegex = Regex("a*")
testMatches(starRegex, input, inputDescription)
val compositeRegex = Regex("a{$compositeMin,$compositeMax}")
testMatches(compositeRegex, input, inputDescription)
}
@Test
fun kt46211_space() {
val regex = "(https?|ftp)://[^\\s/$.?#].[^\\s]*".toRegex(RegexOption.IGNORE_CASE)
val link = "http://" + input
testMatches(regex, link, "\"http://\" + $inputDescription")
}
@Test
fun kt46211() {
val regex = Regex("[a]+")
val output = regex.replace(input, "")
assertEquals("", output)
}
@Test
fun kt53352() {
val test = input + "b c"
val regex = """(.*?b.*?c)""".toRegex()
val res = regex.find(test)!!
assertEquals(test, res.groupValues[1])
}
@Test
fun kt35508() {
val doesNotWork = """=== EREIGNISLISTE ======
""" + "\u001b" + """Kn
BEGINN 28.06 13:25
EREIGNISSE 62
50 5 28.06 1325
3402 28.06 1325
3412 28.06 1325
63 3 28.06 1325
63 0 28.06 1325
EE06 28.06 1325
EE06 28.06 1322
EE07 28.06 1322
63 3 28.06 1322
EE06 28.06 1322
EE07 28.06 1322
63 3 28.06 1322
63 3 28.06 1322
63 3 28.06 1323
63 3 28.06 1500
50 4 28.06 1500
50 5 30.06 1226
3402 30.06 1226
3412 30.06 1226
50 4 30.06 1227
50 5 30.06 1228
3402 30.06 1228"""
val regex = Regex("(\\x1b\\w[\\s\\S]{1,2})([\\s\\S]+?(?=\\x1b\\w[\\s\\S]{1,2}|\$))")
fun regexTest(content: String): List<String> {
return regex.findAll(content).map {
it.groupValues[1]
}.toList()
}
assertEquals("\u001BKn\n", regexTest(doesNotWork).single())
}
}