Commonize CharCategory and related functions #KT-39177 #KT-43216 #KT-39906 #KT-30652
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
@kotlin.SinceKotlin(version = "1.2")
|
||||
public val kotlin.String.Companion.CASE_INSENSITIVE_ORDER: kotlin.Comparator<kotlin.String> { get; }
|
||||
|
||||
public val kotlin.Char.category: kotlin.text.CharCategory { get; }
|
||||
|
||||
public val kotlin.CharSequence.indices: kotlin.ranges.IntRange { get; }
|
||||
|
||||
public val kotlin.CharSequence.lastIndex: kotlin.Int { get; }
|
||||
@@ -361,13 +363,25 @@ public inline fun kotlin.text.StringBuilder.insertRange(index: kotlin.Int, value
|
||||
|
||||
public fun kotlin.CharSequence.isBlank(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isDefined(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isDigit(): kotlin.Boolean
|
||||
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun kotlin.CharSequence.isEmpty(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isHighSurrogate(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isISOControl(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isLetter(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isLetterOrDigit(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isLowSurrogate(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isLowerCase(): kotlin.Boolean
|
||||
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun kotlin.CharSequence.isNotBlank(): kotlin.Boolean
|
||||
|
||||
@@ -382,6 +396,10 @@ public inline fun kotlin.CharSequence?.isNullOrEmpty(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isSurrogate(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isTitleCase(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isUpperCase(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isWhitespace(): kotlin.Boolean
|
||||
|
||||
public operator fun kotlin.CharSequence.iterator(): kotlin.collections.CharIterator
|
||||
@@ -1178,6 +1196,75 @@ public interface Appendable {
|
||||
public abstract fun append(value: kotlin.CharSequence?, startIndex: kotlin.Int, endIndex: kotlin.Int): kotlin.text.Appendable
|
||||
}
|
||||
|
||||
public final enum class CharCategory : kotlin.Enum<kotlin.text.CharCategory> {
|
||||
enum entry UNASSIGNED
|
||||
|
||||
enum entry UPPERCASE_LETTER
|
||||
|
||||
enum entry LOWERCASE_LETTER
|
||||
|
||||
enum entry TITLECASE_LETTER
|
||||
|
||||
enum entry MODIFIER_LETTER
|
||||
|
||||
enum entry OTHER_LETTER
|
||||
|
||||
enum entry NON_SPACING_MARK
|
||||
|
||||
enum entry ENCLOSING_MARK
|
||||
|
||||
enum entry COMBINING_SPACING_MARK
|
||||
|
||||
enum entry DECIMAL_DIGIT_NUMBER
|
||||
|
||||
enum entry LETTER_NUMBER
|
||||
|
||||
enum entry OTHER_NUMBER
|
||||
|
||||
enum entry SPACE_SEPARATOR
|
||||
|
||||
enum entry LINE_SEPARATOR
|
||||
|
||||
enum entry PARAGRAPH_SEPARATOR
|
||||
|
||||
enum entry CONTROL
|
||||
|
||||
enum entry FORMAT
|
||||
|
||||
enum entry PRIVATE_USE
|
||||
|
||||
enum entry SURROGATE
|
||||
|
||||
enum entry DASH_PUNCTUATION
|
||||
|
||||
enum entry START_PUNCTUATION
|
||||
|
||||
enum entry END_PUNCTUATION
|
||||
|
||||
enum entry CONNECTOR_PUNCTUATION
|
||||
|
||||
enum entry OTHER_PUNCTUATION
|
||||
|
||||
enum entry MATH_SYMBOL
|
||||
|
||||
enum entry CURRENCY_SYMBOL
|
||||
|
||||
enum entry MODIFIER_SYMBOL
|
||||
|
||||
enum entry OTHER_SYMBOL
|
||||
|
||||
enum entry INITIAL_QUOTE_PUNCTUATION
|
||||
|
||||
enum entry FINAL_QUOTE_PUNCTUATION
|
||||
|
||||
public final val code: kotlin.String { get; }
|
||||
|
||||
public final operator fun contains(char: kotlin.Char): kotlin.Boolean
|
||||
|
||||
public companion object of CharCategory {
|
||||
}
|
||||
}
|
||||
|
||||
@kotlin.SinceKotlin(version = "1.4")
|
||||
@kotlin.WasExperimental(markerClass = {kotlin.ExperimentalStdlibApi::class})
|
||||
public open class CharacterCodingException : kotlin.Exception {
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
@kotlin.SinceKotlin(version = "1.2")
|
||||
public val kotlin.String.Companion.CASE_INSENSITIVE_ORDER: kotlin.Comparator<kotlin.String> { get; }
|
||||
|
||||
public val kotlin.Char.category: kotlin.text.CharCategory { get; }
|
||||
|
||||
public val kotlin.CharSequence.indices: kotlin.ranges.IntRange { get; }
|
||||
|
||||
public val kotlin.CharSequence.lastIndex: kotlin.Int { get; }
|
||||
@@ -361,13 +363,25 @@ public inline fun kotlin.text.StringBuilder.insertRange(index: kotlin.Int, value
|
||||
|
||||
public fun kotlin.CharSequence.isBlank(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isDefined(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isDigit(): kotlin.Boolean
|
||||
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun kotlin.CharSequence.isEmpty(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isHighSurrogate(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isISOControl(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isLetter(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isLetterOrDigit(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isLowSurrogate(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isLowerCase(): kotlin.Boolean
|
||||
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun kotlin.CharSequence.isNotBlank(): kotlin.Boolean
|
||||
|
||||
@@ -382,6 +396,10 @@ public inline fun kotlin.CharSequence?.isNullOrEmpty(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isSurrogate(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isTitleCase(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isUpperCase(): kotlin.Boolean
|
||||
|
||||
public fun kotlin.Char.isWhitespace(): kotlin.Boolean
|
||||
|
||||
public operator fun kotlin.CharSequence.iterator(): kotlin.collections.CharIterator
|
||||
@@ -1178,6 +1196,75 @@ public interface Appendable {
|
||||
public abstract fun append(value: kotlin.CharSequence?, startIndex: kotlin.Int, endIndex: kotlin.Int): kotlin.text.Appendable
|
||||
}
|
||||
|
||||
public final enum class CharCategory : kotlin.Enum<kotlin.text.CharCategory> {
|
||||
enum entry UNASSIGNED
|
||||
|
||||
enum entry UPPERCASE_LETTER
|
||||
|
||||
enum entry LOWERCASE_LETTER
|
||||
|
||||
enum entry TITLECASE_LETTER
|
||||
|
||||
enum entry MODIFIER_LETTER
|
||||
|
||||
enum entry OTHER_LETTER
|
||||
|
||||
enum entry NON_SPACING_MARK
|
||||
|
||||
enum entry ENCLOSING_MARK
|
||||
|
||||
enum entry COMBINING_SPACING_MARK
|
||||
|
||||
enum entry DECIMAL_DIGIT_NUMBER
|
||||
|
||||
enum entry LETTER_NUMBER
|
||||
|
||||
enum entry OTHER_NUMBER
|
||||
|
||||
enum entry SPACE_SEPARATOR
|
||||
|
||||
enum entry LINE_SEPARATOR
|
||||
|
||||
enum entry PARAGRAPH_SEPARATOR
|
||||
|
||||
enum entry CONTROL
|
||||
|
||||
enum entry FORMAT
|
||||
|
||||
enum entry PRIVATE_USE
|
||||
|
||||
enum entry SURROGATE
|
||||
|
||||
enum entry DASH_PUNCTUATION
|
||||
|
||||
enum entry START_PUNCTUATION
|
||||
|
||||
enum entry END_PUNCTUATION
|
||||
|
||||
enum entry CONNECTOR_PUNCTUATION
|
||||
|
||||
enum entry OTHER_PUNCTUATION
|
||||
|
||||
enum entry MATH_SYMBOL
|
||||
|
||||
enum entry CURRENCY_SYMBOL
|
||||
|
||||
enum entry MODIFIER_SYMBOL
|
||||
|
||||
enum entry OTHER_SYMBOL
|
||||
|
||||
enum entry INITIAL_QUOTE_PUNCTUATION
|
||||
|
||||
enum entry FINAL_QUOTE_PUNCTUATION
|
||||
|
||||
public final val code: kotlin.String { get; }
|
||||
|
||||
public final operator fun contains(char: kotlin.Char): kotlin.Boolean
|
||||
|
||||
public companion object of CharCategory {
|
||||
}
|
||||
}
|
||||
|
||||
@kotlin.SinceKotlin(version = "1.4")
|
||||
@kotlin.WasExperimental(markerClass = {kotlin.ExperimentalStdlibApi::class})
|
||||
public open class CharacterCodingException : kotlin.Exception {
|
||||
|
||||
@@ -63,7 +63,6 @@ expect enum class RegexOption {
|
||||
|
||||
// From char.kt
|
||||
|
||||
expect fun Char.isWhitespace(): Boolean
|
||||
expect fun Char.isHighSurrogate(): Boolean
|
||||
expect fun Char.isLowSurrogate(): Boolean
|
||||
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt
|
||||
// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib
|
||||
//
|
||||
|
||||
// 1343 ranges totally
|
||||
private object Category {
|
||||
val decodedRangeStart: IntArray
|
||||
val decodedRangeCategory: IntArray
|
||||
|
||||
init {
|
||||
val toBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
val fromBase64 = IntArray(128)
|
||||
for (i in toBase64.indices) {
|
||||
fromBase64[toBase64[i].toInt()] = i
|
||||
}
|
||||
|
||||
// rangeStartDiff.length = 1482
|
||||
val rangeStartDiff = "gBCFEDCKCDCaDDaDBhBCEEDDDDDEDXBHYBH5BRwBGDCHDCIDFHDCHFDCDEIRTEE7BGHDDJlCBbSEMOFGERwDEDDDDECEFCRBJhBFDCYFFCCzBvBjBBFC3BOhDBmBDGpBDDCtBBJIbEECLGDFCLDCgBBKVKEDiDDHCFECECKCEODBebC5CLBOKhBJDDDDWEBHFCFCPBZDEL1BVBSLPBgBB2BDBDICFBHKCCKCPDBHEDWBHEDDDDEDEDIBDGDCKCCGDDDCGECCWBFMDDCDEDDCHDDHKDDBKDBHFCWBFGFDBDDFEDBPDDKCHBGDCHEDWBFGFDCEDEDBHDDGDCKCGJEGDBFDDFDDDDDMEFDBFDCGBOKDFDFDCGFCXBQDDDDDBEGEDFDDKHBHDDGFCXBKBFCEFCFCHCHECCKDNCCHFCoBEDECFDDDDHDCCKJBGDCSDYBJEHBFDDEBIGKDCMuBFHEBGBIBKCkBFBFBXEIFJDFDGCKCEgBBDPEDGKKGECIBkBEOBDFFLBkBBIBEFFEClBrBCEBEGDBKGGDDDDDCHDENDCFEKDDlBDDFrBCDpKBECGEECpBBEChBBECGEECPB5BBECjCCDJUDQKG2CCGDsTCRBaCDrCDDIHNBEDLSDCJSCMLFCCM0BDHGFLBFDDKGKGEFDDBKGjBB1BHFChBDFmCKfDDDDDDCGDCFDKeCFLsBEaGKBDiBXDDD1BDGDEIGJEKGKGHBGCMF/BEBvBCEDDFHEKHKJJDDeDDGDKsBFEDCIEkBIICCDFKDDKeGCJHrBCDIIDBNBHEBEFDBFsB/BNBiBlB6BBF1EIiDJIGCGCIIIIGCGCIIIIOCIIIIIIDFEDDBFEDDDDEBDIFDDFEDBLFGCEEICFBJCDEDCLDKBFBKCCGDDKDDNDgBQNEBDMPFFDEDEBFFHECEBEEDFBEDDQjBCEDEFFCCJHBeEEfsIIEUCHCxCBeZoBGlCZLV8BuCW3FBJB2BIvDB4HOesBFCfKQgIjEW/BEgBCiIwBVCGnBCgBBpDvBBuBEDBHEFGCCjDCGEDCFCFlBDDF4BHCOBXJHBHBHBHBHBHBHBHBgBCECGHGEDIFBKCEDMEtBaB5CM2GaMEDDCKCGFCJEDFDDDC2CDDDB6CDCFrBB+CDEKgBkBMQfBKeIBPgBKnBPgKguGgC9vUDVB3jBD3BJoBGCsIBDQKCUuBDDKCcCCmCKCGIXJCNC/BBHGKDECEVFBEMCEEBqBDDGDFDXDCEBDGEG0BEICyBQCICKGSGDEBKcICXLCLBdDDBvBDECCDNCKECFCJKFBpBFEDCJDBICCKCEQBGDDByBEDCEFBYDCLEDDCKGCGCGJHBHBrBBEJDEwCjBIDCKGk9KMXExBEggCgoGuLCqDmBHMFFCKBNBFBIsDQRrLCQgCC2BoBMCCQGEGQDCQDDDDFDGDECEEFBnEEBFEDCKCDCaDDaDBFCKBtBCfDGCGCFEDDDDCECKDC"
|
||||
val diff = decodeVarLenBase64(rangeStartDiff, fromBase64, 1342)
|
||||
val start = IntArray(diff.size + 1)
|
||||
for (i in diff.indices) {
|
||||
start[i + 1] = start[i] + diff[i]
|
||||
}
|
||||
decodedRangeStart = start
|
||||
|
||||
// rangeCategory.length = 2033
|
||||
val rangeCategory = "PsY44a41W54UYJYZYB14W7XC15WZPsYa84bl9Zw8b85Lr7C44brlerrYBZBCZCiBiBiBhCiiBhChiBhiCBhhChiCihBhChCChiBhChiClBCFhjCiBiBihDhiBhCCihBiBBhCCFCEbEbEb7EbGhCk7BixRkiCi4BRbh4BhRhCBRBCiiBBCiBChiZBCBCiBcGHhChCiBRBxxEYC40Rx8c6RGUm4GRFRFYRQZ44acG4wRYFEFGJYllGFlYGwcGmkEmcGFJFl8cYxwFGFGRFGFRJFGkkcYkxRm6aFGEGmmEmEGRYRFGxxYFRFRFRGQGIFmIFIGIooGFGFGYJ4EFmoIRFlxRlxRFRFxlRxlFllRxmFIGxxIoxRomFRIRxlFlmGRJFaL86F4mRxmGoRFRFRFRFllRxGIGRxmGxmGmxRxGRFlRRJmmFllGYRmmIRFllRlRFRFllRFxxGFIGmmRoxImxRFRllGmxRJ4aRFGxmIoRFlxRlxRFRFllRFxxGlImoGmmRxoIxoIGRmmIRxlFlmGRJ8FLRxmFFRFllRllRxxFlRlxRxlFRFRFRooGRIooRomRxFRIRJLc8aRmoIoGFllRlRFRFRlmGmoIooRGRGRxmGFRllGmxRJRYL8lGooYFllRlRFRFRFRmlIIxGooRGRIRlxFGRJxlFRGIFllRlRFlmGIGxIooRomF8xRxxFllILFGRJLcFxmIoRFRFRFxlRFRxxGxxIooGmmRRIRJxxIoYRFllGGRaFEGYJYRxlFRFRFlRFllGGlxRFxEGRJRFRFcY84c8mGcJL8G1WIFRFRGIGmmYFGRGRcGc88RYcYRFIGIGmmIomGFJYFooGmlFllGmmFIFIFGFmoIGIomFJIm8cBhRRxxBC4ECFRFRFlRFRFRFRFRFRFlRFRFRFRFRFRGYLRFcRBRCxxUF8YFMF1WRFYKFRFRFGRFGYRFGRFllRlRGRFmmIGIooGGY44E46FmxRJRLRY44U44GmmQRJRFEFRFGFlGRFRFxmGmoIooGmoIoxRxxIoGIGRxxcx4YJFRFRFRFRJLRcFmmIomRx4YFoGGmRomIGIGmxRJRJRYEYRGmmHRGIFmIGmIIooGFRJYcGcRmmIFomGmmIomGmlFJFmoGooGGIRYFIGIGRYJRFJFEYCRBRBYRGYGIGFGFllGomGFRCECECEGRGhCCiBCBCRBRCBCBCRBRCxBCBCRCDCDCDCiiRBj7CbCiiRBj7b7iCiiRxiCBRbCBbxxCiiRBj7bRMQUY9+V9+VYtOQMY9eY43X44Z1WY54XYMQRQrERLZ12ELZ12RERaRGHGHGR88B88BihBhiChhC8hcZBc8BB8CBCFi8cihBZBC8Z8CLKhCKr8cRZcZc88ZcZc85Z8ZcZc1WcZc1WcZcZcZcRcRLcLcZcZcZcZc1WLcZ1WZ1WZcZ1WZ1WZ1WZcZcZcRcRcBRCixBBCiBBihCCEBhCCchCGhCRY44LCiRRxxCFRkYRGFRFRFRFRFRFRFRFRFRGY9eY49eY44U49e49e1WYEYUY04VY48cRcRcRcRcRs4Y48ElK1Wc1W12U2cKGooUE88KqqEl4c8RFxxGm7bkkFUF4kEkFRFRFx8cLcFcRFcRLcLcLcLcLcFcFRFEFRcRFEYFEYFJFRhClmHnnYG4EhCEGFKGYRbEbhCCiBECiBhCk7bhClBihCiBBCBhCRhiBhhCCRhiFkkCFlGllGllGFooGmIcGRL88aRFYRIFIGRYJRGFYl4FGJFGYFGIRYFRGIFmoIGIGIYxEJRYFmEFJFRFGmoImoIGRFGFmIRJRYFEFcloGIFmlGmlFGFlmGFRllEYFomGo4YlkEoGRFRFRFRFRFRCbECk7bRCFooG4oGRJRFRFRFRTSFRFRCRCRlGFZFRFRlxFFbRF2VRFRFRF6cRGY41WRG40UX1W44V24Y44X33Y44R44U1WY50Z5R46YRFRFxxQY44a41W54UYJYZYB14W7XC15WZ12YYFEFEFRFRFRFlxRllRxxa65b86axcZcRQcR"
|
||||
decodedRangeCategory = decodeVarLenBase64(rangeCategory, fromBase64, 1343)
|
||||
}
|
||||
}
|
||||
|
||||
private fun categoryValueFrom(code: Int, ch: Int): Int {
|
||||
return when {
|
||||
code < 0x20 -> code
|
||||
code < 0x400 -> if ((ch and 1) == 1) code shr 5 else code and 0x1f
|
||||
else ->
|
||||
when (ch % 3) {
|
||||
2 -> code shr 10
|
||||
1 -> (code shr 5) and 0x1f
|
||||
else -> code and 0x1f
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Unicode general category of this character as an Int.
|
||||
*/
|
||||
internal fun Char.getCategoryValue(): Int {
|
||||
val ch = this.toInt()
|
||||
|
||||
val index = binarySearchRange(Category.decodedRangeStart, ch)
|
||||
val start = Category.decodedRangeStart[index]
|
||||
val code = Category.decodedRangeCategory[index]
|
||||
val value = categoryValueFrom(code, ch - start)
|
||||
|
||||
return if (value == 17) CharCategory.UNASSIGNED.value else value
|
||||
}
|
||||
|
||||
internal fun decodeVarLenBase64(base64: String, fromBase64: IntArray, resultLength: Int): IntArray {
|
||||
val result = IntArray(resultLength)
|
||||
var index = 0
|
||||
var int = 0
|
||||
var shift = 0
|
||||
for (char in base64) {
|
||||
val sixBit = fromBase64[char.toInt()]
|
||||
int = int or ((sixBit and 0x1f) shl shift)
|
||||
if (sixBit < 0x20) {
|
||||
result[index++] = int
|
||||
int = 0
|
||||
shift = 0
|
||||
} else {
|
||||
shift += 5
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt
|
||||
// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib
|
||||
//
|
||||
|
||||
// 37 ranges totally
|
||||
private object Digit {
|
||||
internal val rangeStart = intArrayOf(
|
||||
0x0030, 0x0660, 0x06f0, 0x07c0, 0x0966, 0x09e6, 0x0a66, 0x0ae6, 0x0b66, 0x0be6, 0x0c66, 0x0ce6, 0x0d66, 0x0de6, 0x0e50, 0x0ed0, 0x0f20, 0x1040, 0x1090, 0x17e0,
|
||||
0x1810, 0x1946, 0x19d0, 0x1a80, 0x1a90, 0x1b50, 0x1bb0, 0x1c40, 0x1c50, 0xa620, 0xa8d0, 0xa900, 0xa9d0, 0xa9f0, 0xaa50, 0xabf0, 0xff10,
|
||||
)
|
||||
}
|
||||
|
||||
internal fun binarySearchRange(array: IntArray, needle: Int): Int {
|
||||
var bottom = 0
|
||||
var top = array.size - 1
|
||||
var middle = -1
|
||||
var value = 0
|
||||
while (bottom <= top) {
|
||||
middle = (bottom + top) / 2
|
||||
value = array[middle]
|
||||
if (needle > value)
|
||||
bottom = middle + 1
|
||||
else if (needle == value)
|
||||
return middle
|
||||
else
|
||||
top = middle - 1
|
||||
}
|
||||
return middle - (if (needle < value) 1 else 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a digit.
|
||||
*/
|
||||
internal fun Char.isDigitImpl(): Boolean {
|
||||
val ch = this.toInt()
|
||||
val index = binarySearchRange(Digit.rangeStart, ch)
|
||||
val high = Digit.rangeStart[index] + 9
|
||||
return ch <= high
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt
|
||||
// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib
|
||||
//
|
||||
|
||||
// 222 ranges totally
|
||||
private object Letter {
|
||||
val decodedRangeStart: IntArray
|
||||
val decodedRangeLength: IntArray
|
||||
val decodedRangeCategory: IntArray
|
||||
|
||||
init {
|
||||
val toBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
val fromBase64 = IntArray(128)
|
||||
for (i in toBase64.indices) {
|
||||
fromBase64[toBase64[i].toInt()] = i
|
||||
}
|
||||
|
||||
// rangeStartDiff.length = 356
|
||||
val rangeStartDiff = "hCgBpCQGYHZH5BRpBPPPPPPRMP5BPPlCPP6BkEPPPPcPXPzBvBrB3BOiDoBHwD+E3DauCnFmBmB2D6E1BlBTiBmBlBP5BhBiBrBvBjBqBnBPRtBiCmCtBlB0BmB5BiB7BmBgEmChBZgCoEoGVpBSfRhBPqKQ2BwBYoFgB4CJuTiEvBuCuDrF5DgEgFlJ1DgFmBQtBsBRGsB+BPiBlD1EIjDPRPPPQPPPPPGQSQS/DxENVNU+B9zCwBwBPPCkDPNnBPqDYY1R8B7FkFgTgwGgwUwmBgKwBuBScmEP/BPPPPPPrBP8B7F1B/ErBqC6B7BiBmBfQsBUwCw/KwqIwLwETPcPjQgJxFgBlBsD"
|
||||
val diff = decodeVarLenBase64(rangeStartDiff, fromBase64, 222)
|
||||
val start = IntArray(diff.size)
|
||||
for (i in diff.indices) {
|
||||
if (i == 0) start[i] = diff[i]
|
||||
else start[i] = start[i - 1] + diff[i]
|
||||
}
|
||||
decodedRangeStart = start
|
||||
|
||||
// rangeLength.length = 328
|
||||
val rangeLength = "aaMBXHYH5BRpBPPPPPPRMP5BPPlCPPzBDOOPPcPXPzBvBjB3BOhDmBBpB7DoDYxB+EiBP1DoExBkBQhBekBPmBgBhBctBiBMWOOXhCsBpBkBUV3Ba4BkB0DlCgBXgBtD4FSdBfPhBPpKP0BvBXjEQ2CGsT8DhBtCqDpFvD1D3E0IrD2EkBJrBDOBsB+BPiBlB1EIjDPPPPPPPPPPPGPPMNLsBNPNPKCvBvBPPCkDPBmBPhDXXgD4B6FzEgDguG9vUtkB9JcuBSckEP/BPPPPPPBPf4FrBjEhBpC3B5BKaWPrBOwCk/KsCuLqDHPbPxPsFtEaaqDL"
|
||||
decodedRangeLength = decodeVarLenBase64(rangeLength, fromBase64, 222)
|
||||
|
||||
// rangeCategory.length = 959
|
||||
val rangeCategory = "GFjgggUHGGFFZZZmzpz5qB6s6020B60ptltB6smt2sB60mz22B1+vv+8BZZ5s2850BW5q1ymtB506smzBF3q1q1qB1q1q1+Bgii4wDTm74g3KiggxqM60q1q1Bq1o1q1BF1qlrqrBZ2q5wprBGFZWWZGHFsjiooLowgmOowjkwCkgoiIk7ligGogiioBkwkiYkzj2oNoi+sbkwj04DghhkQ8wgiYkgoioDsgnkwC4gikQ//v+85BkwvoIsgoyI4yguI0whiwEowri4CoghsJowgqYowgm4DkwgsY/nwnzPowhmYkg6wI8yggZswikwHgxgmIoxgqYkwgk4DkxgmIkgoioBsgssoBgzgyI8g9gL8g9kI0wgwJoxgkoC0wgioFkw/wI0w53iF4gioYowjmgBHGq1qkgwBF1q1q8qBHwghuIwghyKk0goQkwgoQk3goQHGFHkyg0pBgxj6IoinkxDswno7Ikwhz9Bo0gioB8z48Rwli0xN0mpjoX8w78pDwltoqKHFGGwwgsIHFH3q1q16BFHWFZ1q10q1B2qlwq1B1q10q1B2q1yq1B6q1gq1Biq1qhxBir1qp1Bqt1q1qB1g1q1+B//3q16B///q1qBH/qlqq9Bholqq9B1i00a1q10qD1op1HkwmigEigiy6Cptogq1Bixo1kDq7/j00B2qgoBWGFm1lz50B6s5q1+BGWhggzhwBFFhgk4//Bo2jigE8wguI8wguI8wgugUog1qoB4qjmIwwi2KgkYHHH4lBgiFWkgIWoghssMmz5smrBZ3q1y50B5sm7gzBtz1smzB5smz50BqzqtmzB5sgzqzBF2/9//5BowgoIwmnkzPkwgk4C8ys65BkgoqI0wgy6FghquZo2giY0ghiIsgh24B4ghsQ8QF/v1q1OFs0O8iCHHF1qggz/B8wg6Iznv+//B08QgohsjK0QGFk7hsQ4gB"
|
||||
decodedRangeCategory = decodeVarLenBase64(rangeCategory, fromBase64, 222)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter.
|
||||
*/
|
||||
internal fun Char.isLetterImpl(): Boolean {
|
||||
return getLetterType() != 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a lower case letter.
|
||||
*/
|
||||
internal fun Char.isLowerCaseImpl(): Boolean {
|
||||
return getLetterType() == 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an upper case letter.
|
||||
*/
|
||||
internal fun Char.isUpperCaseImpl(): Boolean {
|
||||
return getLetterType() == 2
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns
|
||||
* - `1` if the character is a lower case letter,
|
||||
* - `2` if the character is an upper case letter,
|
||||
* - `3` if the character is a letter but not a lower or upper case letter,
|
||||
* - `0` otherwise.
|
||||
*/
|
||||
private fun Char.getLetterType(): Int {
|
||||
val ch = this.toInt()
|
||||
val index = binarySearchRange(Letter.decodedRangeStart, ch)
|
||||
|
||||
val rangeStart = Letter.decodedRangeStart[index]
|
||||
val rangeEnd = rangeStart + Letter.decodedRangeLength[index] - 1
|
||||
val code = Letter.decodedRangeCategory[index]
|
||||
|
||||
if (ch > rangeEnd) {
|
||||
return 0
|
||||
}
|
||||
|
||||
val lastTwoBits = code and 0x3
|
||||
|
||||
if (lastTwoBits == 0) { // gap pattern
|
||||
var shift = 2
|
||||
var threshold = rangeStart
|
||||
for (i in 0..1) {
|
||||
threshold += (code shr shift) and 0x7f
|
||||
if (threshold > ch) {
|
||||
return 3
|
||||
}
|
||||
shift += 7
|
||||
threshold += (code shr shift) and 0x7f
|
||||
if (threshold > ch) {
|
||||
return 0
|
||||
}
|
||||
shift += 7
|
||||
}
|
||||
return 3
|
||||
}
|
||||
|
||||
if (code <= 0x7) {
|
||||
return lastTwoBits
|
||||
}
|
||||
|
||||
val distance = (ch - rangeStart)
|
||||
val shift = if (code <= 0x1F) distance % 2 else distance
|
||||
return (code shr (2 * shift)) and 0x3
|
||||
}
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt
|
||||
// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib
|
||||
//
|
||||
|
||||
// 9 ranges totally
|
||||
/**
|
||||
* Returns `true` if this character is a whitespace.
|
||||
*/
|
||||
internal fun Char.isWhitespaceImpl(): Boolean {
|
||||
val ch = this.toInt()
|
||||
return ch in 0x0009..0x000d
|
||||
|| ch in 0x001c..0x0020
|
||||
|| ch == 0x00a0
|
||||
|| ch > 0x1000 && (
|
||||
ch == 0x1680
|
||||
|| ch in 0x2000..0x200a
|
||||
|| ch == 0x2028
|
||||
|| ch == 0x2029
|
||||
|| ch == 0x202f
|
||||
|| ch == 0x205f
|
||||
|| ch == 0x3000
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt
|
||||
// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib
|
||||
//
|
||||
|
||||
// 1343 ranges totally
|
||||
private object Category {
|
||||
val decodedRangeStart: IntArray
|
||||
val decodedRangeCategory: IntArray
|
||||
|
||||
init {
|
||||
val toBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
val fromBase64 = IntArray(128)
|
||||
for (i in toBase64.indices) {
|
||||
fromBase64[toBase64[i].toInt()] = i
|
||||
}
|
||||
|
||||
// rangeStartDiff.length = 1482
|
||||
val rangeStartDiff = "gBCFEDCKCDCaDDaDBhBCEEDDDDDEDXBHYBH5BRwBGDCHDCIDFHDCHFDCDEIRTEE7BGHDDJlCBbSEMOFGERwDEDDDDECEFCRBJhBFDCYFFCCzBvBjBBFC3BOhDBmBDGpBDDCtBBJIbEECLGDFCLDCgBBKVKEDiDDHCFECECKCEODBebC5CLBOKhBJDDDDWEBHFCFCPBZDEL1BVBSLPBgBB2BDBDICFBHKCCKCPDBHEDWBHEDDDDEDEDIBDGDCKCCGDDDCGECCWBFMDDCDEDDCHDDHKDDBKDBHFCWBFGFDBDDFEDBPDDKCHBGDCHEDWBFGFDCEDEDBHDDGDCKCGJEGDBFDDFDDDDDMEFDBFDCGBOKDFDFDCGFCXBQDDDDDBEGEDFDDKHBHDDGFCXBKBFCEFCFCHCHECCKDNCCHFCoBEDECFDDDDHDCCKJBGDCSDYBJEHBFDDEBIGKDCMuBFHEBGBIBKCkBFBFBXEIFJDFDGCKCEgBBDPEDGKKGECIBkBEOBDFFLBkBBIBEFFEClBrBCEBEGDBKGGDDDDDCHDENDCFEKDDlBDDFrBCDpKBECGEECpBBEChBBECGEECPB5BBECjCCDJUDQKG2CCGDsTCRBaCDrCDDIHNBEDLSDCJSCMLFCCM0BDHGFLBFDDKGKGEFDDBKGjBB1BHFChBDFmCKfDDDDDDCGDCFDKeCFLsBEaGKBDiBXDDD1BDGDEIGJEKGKGHBGCMF/BEBvBCEDDFHEKHKJJDDeDDGDKsBFEDCIEkBIICCDFKDDKeGCJHrBCDIIDBNBHEBEFDBFsB/BNBiBlB6BBF1EIiDJIGCGCIIIIGCGCIIIIOCIIIIIIDFEDDBFEDDDDEBDIFDDFEDBLFGCEEICFBJCDEDCLDKBFBKCCGDDKDDNDgBQNEBDMPFFDEDEBFFHECEBEEDFBEDDQjBCEDEFFCCJHBeEEfsIIEUCHCxCBeZoBGlCZLV8BuCW3FBJB2BIvDB4HOesBFCfKQgIjEW/BEgBCiIwBVCGnBCgBBpDvBBuBEDBHEFGCCjDCGEDCFCFlBDDF4BHCOBXJHBHBHBHBHBHBHBHBgBCECGHGEDIFBKCEDMEtBaB5CM2GaMEDDCKCGFCJEDFDDDC2CDDDB6CDCFrBB+CDEKgBkBMQfBKeIBPgBKnBPgKguGgC9vUDVB3jBD3BJoBGCsIBDQKCUuBDDKCcCCmCKCGIXJCNC/BBHGKDECEVFBEMCEEBqBDDGDFDXDCEBDGEG0BEICyBQCICKGSGDEBKcICXLCLBdDDBvBDECCDNCKECFCJKFBpBFEDCJDBICCKCEQBGDDByBEDCEFBYDCLEDDCKGCGCGJHBHBrBBEJDEwCjBIDCKGk9KMXExBEggCgoGuLCqDmBHMFFCKBNBFBIsDQRrLCQgCC2BoBMCCQGEGQDCQDDDDFDGDECEEFBnEEBFEDCKCDCaDDaDBFCKBtBCfDGCGCFEDDDDCECKDC"
|
||||
val diff = decodeVarLenBase64(rangeStartDiff, fromBase64, 1342)
|
||||
val start = IntArray(diff.size + 1)
|
||||
for (i in diff.indices) {
|
||||
start[i + 1] = start[i] + diff[i]
|
||||
}
|
||||
decodedRangeStart = start
|
||||
|
||||
// rangeCategory.length = 2033
|
||||
val rangeCategory = "PsY44a41W54UYJYZYB14W7XC15WZPsYa84bl9Zw8b85Lr7C44brlerrYBZBCZCiBiBiBhCiiBhChiBhiCBhhChiCihBhChCChiBhChiClBCFhjCiBiBihDhiBhCCihBiBBhCCFCEbEbEb7EbGhCk7BixRkiCi4BRbh4BhRhCBRBCiiBBCiBChiZBCBCiBcGHhChCiBRBxxEYC40Rx8c6RGUm4GRFRFYRQZ44acG4wRYFEFGJYllGFlYGwcGmkEmcGFJFl8cYxwFGFGRFGFRJFGkkcYkxRm6aFGEGmmEmEGRYRFGxxYFRFRFRGQGIFmIFIGIooGFGFGYJ4EFmoIRFlxRlxRFRFxlRxlFllRxmFIGxxIoxRomFRIRxlFlmGRJFaL86F4mRxmGoRFRFRFRFllRxGIGRxmGxmGmxRxGRFlRRJmmFllGYRmmIRFllRlRFRFllRFxxGFIGmmRoxImxRFRllGmxRJ4aRFGxmIoRFlxRlxRFRFllRFxxGlImoGmmRxoIxoIGRmmIRxlFlmGRJ8FLRxmFFRFllRllRxxFlRlxRxlFRFRFRooGRIooRomRxFRIRJLc8aRmoIoGFllRlRFRFRlmGmoIooRGRGRxmGFRllGmxRJRYL8lGooYFllRlRFRFRFRmlIIxGooRGRIRlxFGRJxlFRGIFllRlRFlmGIGxIooRomF8xRxxFllILFGRJLcFxmIoRFRFRFxlRFRxxGxxIooGmmRRIRJxxIoYRFllGGRaFEGYJYRxlFRFRFlRFllGGlxRFxEGRJRFRFcY84c8mGcJL8G1WIFRFRGIGmmYFGRGRcGc88RYcYRFIGIGmmIomGFJYFooGmlFllGmmFIFIFGFmoIGIomFJIm8cBhRRxxBC4ECFRFRFlRFRFRFRFRFRFlRFRFRFRFRFRGYLRFcRBRCxxUF8YFMF1WRFYKFRFRFGRFGYRFGRFllRlRGRFmmIGIooGGY44E46FmxRJRLRY44U44GmmQRJRFEFRFGFlGRFRFxmGmoIooGmoIoxRxxIoGIGRxxcx4YJFRFRFRFRJLRcFmmIomRx4YFoGGmRomIGIGmxRJRJRYEYRGmmHRGIFmIGmIIooGFRJYcGcRmmIFomGmmIomGmlFJFmoGooGGIRYFIGIGRYJRFJFEYCRBRBYRGYGIGFGFllGomGFRCECECEGRGhCCiBCBCRBRCBCBCRBRCxBCBCRCDCDCDCiiRBj7CbCiiRBj7b7iCiiRxiCBRbCBbxxCiiRBj7bRMQUY9+V9+VYtOQMY9eY43X44Z1WY54XYMQRQrERLZ12ELZ12RERaRGHGHGR88B88BihBhiChhC8hcZBc8BB8CBCFi8cihBZBC8Z8CLKhCKr8cRZcZc88ZcZc85Z8ZcZc1WcZc1WcZcZcZcRcRLcLcZcZcZcZc1WLcZ1WZ1WZcZ1WZ1WZ1WZcZcZcRcRcBRCixBBCiBBihCCEBhCCchCGhCRY44LCiRRxxCFRkYRGFRFRFRFRFRFRFRFRFRGY9eY49eY44U49e49e1WYEYUY04VY48cRcRcRcRcRs4Y48ElK1Wc1W12U2cKGooUE88KqqEl4c8RFxxGm7bkkFUF4kEkFRFRFx8cLcFcRFcRLcLcLcLcLcFcFRFEFRcRFEYFEYFJFRhClmHnnYG4EhCEGFKGYRbEbhCCiBECiBhCk7bhClBihCiBBCBhCRhiBhhCCRhiFkkCFlGllGllGFooGmIcGRL88aRFYRIFIGRYJRGFYl4FGJFGYFGIRYFRGIFmoIGIGIYxEJRYFmEFJFRFGmoImoIGRFGFmIRJRYFEFcloGIFmlGmlFGFlmGFRllEYFomGo4YlkEoGRFRFRFRFRFRCbECk7bRCFooG4oGRJRFRFRFRTSFRFRCRCRlGFZFRFRlxFFbRF2VRFRFRF6cRGY41WRG40UX1W44V24Y44X33Y44R44U1WY50Z5R46YRFRFxxQY44a41W54UYJYZYB14W7XC15WZ12YYFEFEFRFRFRFlxRllRxxa65b86axcZcRQcR"
|
||||
decodedRangeCategory = decodeVarLenBase64(rangeCategory, fromBase64, 1343)
|
||||
}
|
||||
}
|
||||
|
||||
private fun categoryValueFrom(code: Int, ch: Int): Int {
|
||||
return when {
|
||||
code < 0x20 -> code
|
||||
code < 0x400 -> if ((ch and 1) == 1) code shr 5 else code and 0x1f
|
||||
else ->
|
||||
when (ch % 3) {
|
||||
2 -> code shr 10
|
||||
1 -> (code shr 5) and 0x1f
|
||||
else -> code and 0x1f
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Unicode general category of this character as an Int.
|
||||
*/
|
||||
internal fun Char.getCategoryValue(): Int {
|
||||
val ch = this.toInt()
|
||||
|
||||
val index = binarySearchRange(Category.decodedRangeStart, ch)
|
||||
val start = Category.decodedRangeStart[index]
|
||||
val code = Category.decodedRangeCategory[index]
|
||||
val value = categoryValueFrom(code, ch - start)
|
||||
|
||||
return if (value == 17) CharCategory.UNASSIGNED.value else value
|
||||
}
|
||||
|
||||
internal fun decodeVarLenBase64(base64: String, fromBase64: IntArray, resultLength: Int): IntArray {
|
||||
val result = IntArray(resultLength)
|
||||
var index = 0
|
||||
var int = 0
|
||||
var shift = 0
|
||||
for (char in base64) {
|
||||
val sixBit = fromBase64[char.toInt()]
|
||||
int = int or ((sixBit and 0x1f) shl shift)
|
||||
if (sixBit < 0x20) {
|
||||
result[index++] = int
|
||||
int = 0
|
||||
shift = 0
|
||||
} else {
|
||||
shift += 5
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt
|
||||
// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib
|
||||
//
|
||||
|
||||
// 37 ranges totally
|
||||
private object Digit {
|
||||
internal val rangeStart = intArrayOf(
|
||||
0x0030, 0x0660, 0x06f0, 0x07c0, 0x0966, 0x09e6, 0x0a66, 0x0ae6, 0x0b66, 0x0be6, 0x0c66, 0x0ce6, 0x0d66, 0x0de6, 0x0e50, 0x0ed0, 0x0f20, 0x1040, 0x1090, 0x17e0,
|
||||
0x1810, 0x1946, 0x19d0, 0x1a80, 0x1a90, 0x1b50, 0x1bb0, 0x1c40, 0x1c50, 0xa620, 0xa8d0, 0xa900, 0xa9d0, 0xa9f0, 0xaa50, 0xabf0, 0xff10,
|
||||
)
|
||||
}
|
||||
|
||||
internal fun binarySearchRange(array: IntArray, needle: Int): Int {
|
||||
var bottom = 0
|
||||
var top = array.size - 1
|
||||
var middle = -1
|
||||
var value = 0
|
||||
while (bottom <= top) {
|
||||
middle = (bottom + top) / 2
|
||||
value = array[middle]
|
||||
if (needle > value)
|
||||
bottom = middle + 1
|
||||
else if (needle == value)
|
||||
return middle
|
||||
else
|
||||
top = middle - 1
|
||||
}
|
||||
return middle - (if (needle < value) 1 else 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a digit.
|
||||
*/
|
||||
internal fun Char.isDigitImpl(): Boolean {
|
||||
val ch = this.toInt()
|
||||
val index = binarySearchRange(Digit.rangeStart, ch)
|
||||
val high = Digit.rangeStart[index] + 9
|
||||
return ch <= high
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt
|
||||
// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib
|
||||
//
|
||||
|
||||
// 222 ranges totally
|
||||
private object Letter {
|
||||
val decodedRangeStart: IntArray
|
||||
val decodedRangeLength: IntArray
|
||||
val decodedRangeCategory: IntArray
|
||||
|
||||
init {
|
||||
val toBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
val fromBase64 = IntArray(128)
|
||||
for (i in toBase64.indices) {
|
||||
fromBase64[toBase64[i].toInt()] = i
|
||||
}
|
||||
|
||||
// rangeStartDiff.length = 356
|
||||
val rangeStartDiff = "hCgBpCQGYHZH5BRpBPPPPPPRMP5BPPlCPP6BkEPPPPcPXPzBvBrB3BOiDoBHwD+E3DauCnFmBmB2D6E1BlBTiBmBlBP5BhBiBrBvBjBqBnBPRtBiCmCtBlB0BmB5BiB7BmBgEmChBZgCoEoGVpBSfRhBPqKQ2BwBYoFgB4CJuTiEvBuCuDrF5DgEgFlJ1DgFmBQtBsBRGsB+BPiBlD1EIjDPRPPPQPPPPPGQSQS/DxENVNU+B9zCwBwBPPCkDPNnBPqDYY1R8B7FkFgTgwGgwUwmBgKwBuBScmEP/BPPPPPPrBP8B7F1B/ErBqC6B7BiBmBfQsBUwCw/KwqIwLwETPcPjQgJxFgBlBsD"
|
||||
val diff = decodeVarLenBase64(rangeStartDiff, fromBase64, 222)
|
||||
val start = IntArray(diff.size)
|
||||
for (i in diff.indices) {
|
||||
if (i == 0) start[i] = diff[i]
|
||||
else start[i] = start[i - 1] + diff[i]
|
||||
}
|
||||
decodedRangeStart = start
|
||||
|
||||
// rangeLength.length = 328
|
||||
val rangeLength = "aaMBXHYH5BRpBPPPPPPRMP5BPPlCPPzBDOOPPcPXPzBvBjB3BOhDmBBpB7DoDYxB+EiBP1DoExBkBQhBekBPmBgBhBctBiBMWOOXhCsBpBkBUV3Ba4BkB0DlCgBXgBtD4FSdBfPhBPpKP0BvBXjEQ2CGsT8DhBtCqDpFvD1D3E0IrD2EkBJrBDOBsB+BPiBlB1EIjDPPPPPPPPPPPGPPMNLsBNPNPKCvBvBPPCkDPBmBPhDXXgD4B6FzEgDguG9vUtkB9JcuBSckEP/BPPPPPPBPf4FrBjEhBpC3B5BKaWPrBOwCk/KsCuLqDHPbPxPsFtEaaqDL"
|
||||
decodedRangeLength = decodeVarLenBase64(rangeLength, fromBase64, 222)
|
||||
|
||||
// rangeCategory.length = 959
|
||||
val rangeCategory = "GFjgggUHGGFFZZZmzpz5qB6s6020B60ptltB6smt2sB60mz22B1+vv+8BZZ5s2850BW5q1ymtB506smzBF3q1q1qB1q1q1+Bgii4wDTm74g3KiggxqM60q1q1Bq1o1q1BF1qlrqrBZ2q5wprBGFZWWZGHFsjiooLowgmOowjkwCkgoiIk7ligGogiioBkwkiYkzj2oNoi+sbkwj04DghhkQ8wgiYkgoioDsgnkwC4gikQ//v+85BkwvoIsgoyI4yguI0whiwEowri4CoghsJowgqYowgm4DkwgsY/nwnzPowhmYkg6wI8yggZswikwHgxgmIoxgqYkwgk4DkxgmIkgoioBsgssoBgzgyI8g9gL8g9kI0wgwJoxgkoC0wgioFkw/wI0w53iF4gioYowjmgBHGq1qkgwBF1q1q8qBHwghuIwghyKk0goQkwgoQk3goQHGFHkyg0pBgxj6IoinkxDswno7Ikwhz9Bo0gioB8z48Rwli0xN0mpjoX8w78pDwltoqKHFGGwwgsIHFH3q1q16BFHWFZ1q10q1B2qlwq1B1q10q1B2q1yq1B6q1gq1Biq1qhxBir1qp1Bqt1q1qB1g1q1+B//3q16B///q1qBH/qlqq9Bholqq9B1i00a1q10qD1op1HkwmigEigiy6Cptogq1Bixo1kDq7/j00B2qgoBWGFm1lz50B6s5q1+BGWhggzhwBFFhgk4//Bo2jigE8wguI8wguI8wgugUog1qoB4qjmIwwi2KgkYHHH4lBgiFWkgIWoghssMmz5smrBZ3q1y50B5sm7gzBtz1smzB5smz50BqzqtmzB5sgzqzBF2/9//5BowgoIwmnkzPkwgk4C8ys65BkgoqI0wgy6FghquZo2giY0ghiIsgh24B4ghsQ8QF/v1q1OFs0O8iCHHF1qggz/B8wg6Iznv+//B08QgohsjK0QGFk7hsQ4gB"
|
||||
decodedRangeCategory = decodeVarLenBase64(rangeCategory, fromBase64, 222)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter.
|
||||
*/
|
||||
internal fun Char.isLetterImpl(): Boolean {
|
||||
return getLetterType() != 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a lower case letter.
|
||||
*/
|
||||
internal fun Char.isLowerCaseImpl(): Boolean {
|
||||
return getLetterType() == 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an upper case letter.
|
||||
*/
|
||||
internal fun Char.isUpperCaseImpl(): Boolean {
|
||||
return getLetterType() == 2
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns
|
||||
* - `1` if the character is a lower case letter,
|
||||
* - `2` if the character is an upper case letter,
|
||||
* - `3` if the character is a letter but not a lower or upper case letter,
|
||||
* - `0` otherwise.
|
||||
*/
|
||||
private fun Char.getLetterType(): Int {
|
||||
val ch = this.toInt()
|
||||
val index = binarySearchRange(Letter.decodedRangeStart, ch)
|
||||
|
||||
val rangeStart = Letter.decodedRangeStart[index]
|
||||
val rangeEnd = rangeStart + Letter.decodedRangeLength[index] - 1
|
||||
val code = Letter.decodedRangeCategory[index]
|
||||
|
||||
if (ch > rangeEnd) {
|
||||
return 0
|
||||
}
|
||||
|
||||
val lastTwoBits = code and 0x3
|
||||
|
||||
if (lastTwoBits == 0) { // gap pattern
|
||||
var shift = 2
|
||||
var threshold = rangeStart
|
||||
for (i in 0..1) {
|
||||
threshold += (code shr shift) and 0x7f
|
||||
if (threshold > ch) {
|
||||
return 3
|
||||
}
|
||||
shift += 7
|
||||
threshold += (code shr shift) and 0x7f
|
||||
if (threshold > ch) {
|
||||
return 0
|
||||
}
|
||||
shift += 7
|
||||
}
|
||||
return 3
|
||||
}
|
||||
|
||||
if (code <= 0x7) {
|
||||
return lastTwoBits
|
||||
}
|
||||
|
||||
val distance = (ch - rangeStart)
|
||||
val shift = if (code <= 0x1F) distance % 2 else distance
|
||||
return (code shr (2 * shift)) and 0x3
|
||||
}
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt
|
||||
// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib
|
||||
//
|
||||
|
||||
// 9 ranges totally
|
||||
/**
|
||||
* Returns `true` if this character is a whitespace.
|
||||
*/
|
||||
internal fun Char.isWhitespaceImpl(): Boolean {
|
||||
val ch = this.toInt()
|
||||
return ch in 0x0009..0x000d
|
||||
|| ch in 0x001c..0x0020
|
||||
|| ch == 0x00a0
|
||||
|| ch > 0x1000 && (
|
||||
ch == 0x1680
|
||||
|| ch in 0x2000..0x200a
|
||||
|| ch == 0x2028
|
||||
|| ch == 0x2029
|
||||
|| ch == 0x202f
|
||||
|| ch == 0x205f
|
||||
|| ch == 0x3000
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
public actual enum class CharCategory(internal val value: Int, public actual val code: String) {
|
||||
/**
|
||||
* General category "Cn" in the Unicode specification.
|
||||
*/
|
||||
UNASSIGNED(0, "Cn"),
|
||||
|
||||
/**
|
||||
* General category "Lu" in the Unicode specification.
|
||||
*/
|
||||
UPPERCASE_LETTER(1, "Lu"),
|
||||
|
||||
/**
|
||||
* General category "Ll" in the Unicode specification.
|
||||
*/
|
||||
LOWERCASE_LETTER(2, "Ll"),
|
||||
|
||||
/**
|
||||
* General category "Lt" in the Unicode specification.
|
||||
*/
|
||||
TITLECASE_LETTER(3, "Lt"),
|
||||
|
||||
/**
|
||||
* General category "Lm" in the Unicode specification.
|
||||
*/
|
||||
MODIFIER_LETTER(4, "Lm"),
|
||||
|
||||
/**
|
||||
* General category "Lo" in the Unicode specification.
|
||||
*/
|
||||
OTHER_LETTER(5, "Lo"),
|
||||
|
||||
/**
|
||||
* General category "Mn" in the Unicode specification.
|
||||
*/
|
||||
NON_SPACING_MARK(6, "Mn"),
|
||||
|
||||
/**
|
||||
* General category "Me" in the Unicode specification.
|
||||
*/
|
||||
ENCLOSING_MARK(7, "Me"),
|
||||
|
||||
/**
|
||||
* General category "Mc" in the Unicode specification.
|
||||
*/
|
||||
COMBINING_SPACING_MARK(8, "Mc"),
|
||||
|
||||
/**
|
||||
* General category "Nd" in the Unicode specification.
|
||||
*/
|
||||
DECIMAL_DIGIT_NUMBER(9, "Nd"),
|
||||
|
||||
/**
|
||||
* General category "Nl" in the Unicode specification.
|
||||
*/
|
||||
LETTER_NUMBER(10, "Nl"),
|
||||
|
||||
/**
|
||||
* General category "No" in the Unicode specification.
|
||||
*/
|
||||
OTHER_NUMBER(11, "No"),
|
||||
|
||||
/**
|
||||
* General category "Zs" in the Unicode specification.
|
||||
*/
|
||||
SPACE_SEPARATOR(12, "Zs"),
|
||||
|
||||
/**
|
||||
* General category "Zl" in the Unicode specification.
|
||||
*/
|
||||
LINE_SEPARATOR(13, "Zl"),
|
||||
|
||||
/**
|
||||
* General category "Zp" in the Unicode specification.
|
||||
*/
|
||||
PARAGRAPH_SEPARATOR(14, "Zp"),
|
||||
|
||||
/**
|
||||
* General category "Cc" in the Unicode specification.
|
||||
*/
|
||||
CONTROL(15, "Cc"),
|
||||
|
||||
/**
|
||||
* General category "Cf" in the Unicode specification.
|
||||
*/
|
||||
FORMAT(16, "Cf"),
|
||||
|
||||
/**
|
||||
* General category "Co" in the Unicode specification.
|
||||
*/
|
||||
PRIVATE_USE(18, "Co"),
|
||||
|
||||
/**
|
||||
* General category "Cs" in the Unicode specification.
|
||||
*/
|
||||
SURROGATE(19, "Cs"),
|
||||
|
||||
/**
|
||||
* General category "Pd" in the Unicode specification.
|
||||
*/
|
||||
DASH_PUNCTUATION(20, "Pd"),
|
||||
|
||||
/**
|
||||
* General category "Ps" in the Unicode specification.
|
||||
*/
|
||||
START_PUNCTUATION(21, "Ps"),
|
||||
|
||||
/**
|
||||
* General category "Pe" in the Unicode specification.
|
||||
*/
|
||||
END_PUNCTUATION(22, "Pe"),
|
||||
|
||||
/**
|
||||
* General category "Pc" in the Unicode specification.
|
||||
*/
|
||||
CONNECTOR_PUNCTUATION(23, "Pc"),
|
||||
|
||||
/**
|
||||
* General category "Po" in the Unicode specification.
|
||||
*/
|
||||
OTHER_PUNCTUATION(24, "Po"),
|
||||
|
||||
/**
|
||||
* General category "Sm" in the Unicode specification.
|
||||
*/
|
||||
MATH_SYMBOL(25, "Sm"),
|
||||
|
||||
/**
|
||||
* General category "Sc" in the Unicode specification.
|
||||
*/
|
||||
CURRENCY_SYMBOL(26, "Sc"),
|
||||
|
||||
/**
|
||||
* General category "Sk" in the Unicode specification.
|
||||
*/
|
||||
MODIFIER_SYMBOL(27, "Sk"),
|
||||
|
||||
/**
|
||||
* General category "So" in the Unicode specification.
|
||||
*/
|
||||
OTHER_SYMBOL(28, "So"),
|
||||
|
||||
/**
|
||||
* General category "Pi" in the Unicode specification.
|
||||
*/
|
||||
INITIAL_QUOTE_PUNCTUATION(29, "Pi"),
|
||||
|
||||
/**
|
||||
* General category "Pf" in the Unicode specification.
|
||||
*/
|
||||
FINAL_QUOTE_PUNCTUATION(30, "Pf");
|
||||
|
||||
/**
|
||||
* Returns `true` if [char] character belongs to this category.
|
||||
*/
|
||||
public actual operator fun contains(char: Char): Boolean = char.getCategoryValue() == this.value
|
||||
|
||||
companion object {
|
||||
internal fun valueOf(category: Int): CharCategory =
|
||||
when (category) {
|
||||
in 0..16 -> values()[category]
|
||||
in 18..30 -> values()[category - 1]
|
||||
else -> throw IllegalArgumentException("Category #$category is not defined.")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5,9 +5,6 @@
|
||||
|
||||
package kotlin.text
|
||||
|
||||
// actually \s is enough to match all whitespace, but \xA0 added because of different regexp behavior of Rhino used in Selenium tests
|
||||
public actual fun Char.isWhitespace(): Boolean = toString().matches("[\\s\\xA0]")
|
||||
|
||||
/**
|
||||
* Converts this character to lower case using Unicode mapping rules of the invariant locale.
|
||||
*/
|
||||
@@ -91,3 +88,142 @@ public actual fun Char.isHighSurrogate(): Boolean = this in Char.MIN_HIGH_SURROG
|
||||
* Returns `true` if this character is a Unicode low-surrogate code unit (also known as trailing-surrogate code unit).
|
||||
*/
|
||||
public actual fun Char.isLowSurrogate(): Boolean = this in Char.MIN_LOW_SURROGATE..Char.MAX_LOW_SURROGATE
|
||||
|
||||
/**
|
||||
* Returns the Unicode general category of this character.
|
||||
*/
|
||||
public actual val Char.category: CharCategory
|
||||
get() = CharCategory.valueOf(getCategoryValue())
|
||||
|
||||
/**
|
||||
* Returns `true` if this character (Unicode code point) is defined in Unicode.
|
||||
*
|
||||
* A character is considered to be defined in Unicode if its [category] is not [CharCategory.UNASSIGNED].
|
||||
*/
|
||||
public actual fun Char.isDefined(): Boolean {
|
||||
if (this < '\u0080') {
|
||||
return true
|
||||
}
|
||||
return getCategoryValue() != CharCategory.UNASSIGNED.value
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter.
|
||||
*
|
||||
* A character is considered to be a letter if its [category] is [CharCategory.UPPERCASE_LETTER],
|
||||
* [CharCategory.LOWERCASE_LETTER], [CharCategory.TITLECASE_LETTER], [CharCategory.MODIFIER_LETTER], or [CharCategory.OTHER_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isLetter
|
||||
*/
|
||||
public actual fun Char.isLetter(): Boolean {
|
||||
if (this in 'a'..'z' || this in 'A'..'Z') {
|
||||
return true
|
||||
}
|
||||
if (this < '\u0080') {
|
||||
return false
|
||||
}
|
||||
return isLetterImpl()
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter or digit.
|
||||
*
|
||||
* @see isLetter
|
||||
* @see isDigit
|
||||
*
|
||||
* @sample samples.text.Chars.isLetterOrDigit
|
||||
*/
|
||||
public actual fun Char.isLetterOrDigit(): Boolean {
|
||||
if (this in 'a'..'z' || this in 'A'..'Z' || this in '0'..'9') {
|
||||
return true
|
||||
}
|
||||
if (this < '\u0080') {
|
||||
return false
|
||||
}
|
||||
|
||||
return isDigitImpl() || isLetterImpl()
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a digit.
|
||||
*
|
||||
* A character is considered to be a digit if its [category] is [CharCategory.DECIMAL_DIGIT_NUMBER].
|
||||
*
|
||||
* @sample samples.text.Chars.isDigit
|
||||
*/
|
||||
public actual fun Char.isDigit(): Boolean {
|
||||
if (this in '0'..'9') {
|
||||
return true
|
||||
}
|
||||
if (this < '\u0080') {
|
||||
return false
|
||||
}
|
||||
return isDigitImpl()
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an upper case letter.
|
||||
*
|
||||
* A character is considered to be an upper case letter if its [category] is [CharCategory.UPPERCASE_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isUpperCase
|
||||
*/
|
||||
public actual fun Char.isUpperCase(): Boolean {
|
||||
if (this in 'A'..'Z') {
|
||||
return true
|
||||
}
|
||||
if (this < '\u0080') {
|
||||
return false
|
||||
}
|
||||
return isUpperCaseImpl()
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a lower case letter.
|
||||
*
|
||||
* A character is considered to be a lower case letter if its [category] is [CharCategory.LOWERCASE_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isLowerCase
|
||||
*/
|
||||
public actual fun Char.isLowerCase(): Boolean {
|
||||
if (this in 'a'..'z') {
|
||||
return true
|
||||
}
|
||||
if (this < '\u0080') {
|
||||
return false
|
||||
}
|
||||
return isLowerCaseImpl()
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a title case letter.
|
||||
*
|
||||
* A character is considered to be a title case letter if its [category] is [CharCategory.TITLECASE_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isTitleCase
|
||||
*/
|
||||
public actual fun Char.isTitleCase(): Boolean {
|
||||
if (this < '\u0080') {
|
||||
return false
|
||||
}
|
||||
return getCategoryValue() == CharCategory.TITLECASE_LETTER.value
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an ISO control character.
|
||||
*
|
||||
* A character is considered to be an ISO control character if its [category] is [CharCategory.CONTROL].
|
||||
*
|
||||
* @sample samples.text.Chars.isISOControl
|
||||
*/
|
||||
public actual fun Char.isISOControl(): Boolean {
|
||||
return this <= '\u001F' || this in '\u007F'..'\u009F'
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether a character is whitespace according to the Unicode standard.
|
||||
* Returns `true` if the character is whitespace.
|
||||
*
|
||||
* @sample samples.text.Chars.isWhitespace
|
||||
*/
|
||||
public actual fun Char.isWhitespace(): Boolean = isWhitespaceImpl()
|
||||
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
public actual enum class CharCategory(public val value: Int, public actual val code: String) {
|
||||
/**
|
||||
* General category "Cn" in the Unicode specification.
|
||||
*/
|
||||
UNASSIGNED(0, "Cn"),
|
||||
|
||||
/**
|
||||
* General category "Lu" in the Unicode specification.
|
||||
*/
|
||||
UPPERCASE_LETTER(1, "Lu"),
|
||||
|
||||
/**
|
||||
* General category "Ll" in the Unicode specification.
|
||||
*/
|
||||
LOWERCASE_LETTER(2, "Ll"),
|
||||
|
||||
/**
|
||||
* General category "Lt" in the Unicode specification.
|
||||
*/
|
||||
TITLECASE_LETTER(3, "Lt"),
|
||||
|
||||
/**
|
||||
* General category "Lm" in the Unicode specification.
|
||||
*/
|
||||
MODIFIER_LETTER(4, "Lm"),
|
||||
|
||||
/**
|
||||
* General category "Lo" in the Unicode specification.
|
||||
*/
|
||||
OTHER_LETTER(5, "Lo"),
|
||||
|
||||
/**
|
||||
* General category "Mn" in the Unicode specification.
|
||||
*/
|
||||
NON_SPACING_MARK(6, "Mn"),
|
||||
|
||||
/**
|
||||
* General category "Me" in the Unicode specification.
|
||||
*/
|
||||
ENCLOSING_MARK(7, "Me"),
|
||||
|
||||
/**
|
||||
* General category "Mc" in the Unicode specification.
|
||||
*/
|
||||
COMBINING_SPACING_MARK(8, "Mc"),
|
||||
|
||||
/**
|
||||
* General category "Nd" in the Unicode specification.
|
||||
*/
|
||||
DECIMAL_DIGIT_NUMBER(9, "Nd"),
|
||||
|
||||
/**
|
||||
* General category "Nl" in the Unicode specification.
|
||||
*/
|
||||
LETTER_NUMBER(10, "Nl"),
|
||||
|
||||
/**
|
||||
* General category "No" in the Unicode specification.
|
||||
*/
|
||||
OTHER_NUMBER(11, "No"),
|
||||
|
||||
/**
|
||||
* General category "Zs" in the Unicode specification.
|
||||
*/
|
||||
SPACE_SEPARATOR(12, "Zs"),
|
||||
|
||||
/**
|
||||
* General category "Zl" in the Unicode specification.
|
||||
*/
|
||||
LINE_SEPARATOR(13, "Zl"),
|
||||
|
||||
/**
|
||||
* General category "Zp" in the Unicode specification.
|
||||
*/
|
||||
PARAGRAPH_SEPARATOR(14, "Zp"),
|
||||
|
||||
/**
|
||||
* General category "Cc" in the Unicode specification.
|
||||
*/
|
||||
CONTROL(15, "Cc"),
|
||||
|
||||
/**
|
||||
* General category "Cf" in the Unicode specification.
|
||||
*/
|
||||
FORMAT(16, "Cf"),
|
||||
|
||||
/**
|
||||
* General category "Co" in the Unicode specification.
|
||||
*/
|
||||
PRIVATE_USE(18, "Co"),
|
||||
|
||||
/**
|
||||
* General category "Cs" in the Unicode specification.
|
||||
*/
|
||||
SURROGATE(19, "Cs"),
|
||||
|
||||
/**
|
||||
* General category "Pd" in the Unicode specification.
|
||||
*/
|
||||
DASH_PUNCTUATION(20, "Pd"),
|
||||
|
||||
/**
|
||||
* General category "Ps" in the Unicode specification.
|
||||
*/
|
||||
START_PUNCTUATION(21, "Ps"),
|
||||
|
||||
/**
|
||||
* General category "Pe" in the Unicode specification.
|
||||
*/
|
||||
END_PUNCTUATION(22, "Pe"),
|
||||
|
||||
/**
|
||||
* General category "Pc" in the Unicode specification.
|
||||
*/
|
||||
CONNECTOR_PUNCTUATION(23, "Pc"),
|
||||
|
||||
/**
|
||||
* General category "Po" in the Unicode specification.
|
||||
*/
|
||||
OTHER_PUNCTUATION(24, "Po"),
|
||||
|
||||
/**
|
||||
* General category "Sm" in the Unicode specification.
|
||||
*/
|
||||
MATH_SYMBOL(25, "Sm"),
|
||||
|
||||
/**
|
||||
* General category "Sc" in the Unicode specification.
|
||||
*/
|
||||
CURRENCY_SYMBOL(26, "Sc"),
|
||||
|
||||
/**
|
||||
* General category "Sk" in the Unicode specification.
|
||||
*/
|
||||
MODIFIER_SYMBOL(27, "Sk"),
|
||||
|
||||
/**
|
||||
* General category "So" in the Unicode specification.
|
||||
*/
|
||||
OTHER_SYMBOL(28, "So"),
|
||||
|
||||
/**
|
||||
* General category "Pi" in the Unicode specification.
|
||||
*/
|
||||
INITIAL_QUOTE_PUNCTUATION(29, "Pi"),
|
||||
|
||||
/**
|
||||
* General category "Pf" in the Unicode specification.
|
||||
*/
|
||||
FINAL_QUOTE_PUNCTUATION(30, "Pf");
|
||||
|
||||
/**
|
||||
* Returns `true` if [char] character belongs to this category.
|
||||
*/
|
||||
public actual operator fun contains(char: Char): Boolean = Character.getType(char) == this.value
|
||||
|
||||
companion object {
|
||||
/**
|
||||
* Returns the [CharCategory] corresponding to the specified [category] that represents a Java general category constant.
|
||||
*
|
||||
* @throws IllegalArgumentException if the [category] does not represent a Java general category constant.
|
||||
*/
|
||||
public fun valueOf(category: Int): CharCategory =
|
||||
when (category) {
|
||||
in 0..16 -> values()[category]
|
||||
in 18..30 -> values()[category - 1]
|
||||
else -> throw IllegalArgumentException("Category #$category is not defined.")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -10,32 +10,51 @@ package kotlin.text
|
||||
|
||||
import java.util.Locale
|
||||
|
||||
/**
|
||||
* Returns the Unicode general category of this character.
|
||||
*/
|
||||
public actual val Char.category: CharCategory
|
||||
get() = CharCategory.valueOf(Character.getType(this))
|
||||
|
||||
/**
|
||||
* Returns `true` if this character (Unicode code point) is defined in Unicode.
|
||||
*
|
||||
* A character is considered to be defined in Unicode if its [category] is not [CharCategory.UNASSIGNED].
|
||||
*/
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun Char.isDefined(): Boolean = Character.isDefined(this)
|
||||
public actual inline fun Char.isDefined(): Boolean = Character.isDefined(this)
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter.
|
||||
*
|
||||
* A character is considered to be a letter if its [category] is [CharCategory.UPPERCASE_LETTER],
|
||||
* [CharCategory.LOWERCASE_LETTER], [CharCategory.TITLECASE_LETTER], [CharCategory.MODIFIER_LETTER], or [CharCategory.OTHER_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isLetter
|
||||
*/
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun Char.isLetter(): Boolean = Character.isLetter(this)
|
||||
public actual inline fun Char.isLetter(): Boolean = Character.isLetter(this)
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter or digit.
|
||||
*
|
||||
* @see isLetter
|
||||
* @see isDigit
|
||||
*
|
||||
* @sample samples.text.Chars.isLetterOrDigit
|
||||
*/
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun Char.isLetterOrDigit(): Boolean = Character.isLetterOrDigit(this)
|
||||
public actual inline fun Char.isLetterOrDigit(): Boolean = Character.isLetterOrDigit(this)
|
||||
|
||||
/**
|
||||
* Returns `true` if this character (Unicode code point) is a digit.
|
||||
* Returns `true` if this character is a digit.
|
||||
*
|
||||
* A character is considered to be a digit if its [category] is [CharCategory.DECIMAL_DIGIT_NUMBER].
|
||||
*
|
||||
* @sample samples.text.Chars.isDigit
|
||||
*/
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun Char.isDigit(): Boolean = Character.isDigit(this)
|
||||
public actual inline fun Char.isDigit(): Boolean = Character.isDigit(this)
|
||||
|
||||
|
||||
/**
|
||||
@@ -47,10 +66,13 @@ public inline fun Char.isIdentifierIgnorable(): Boolean = Character.isIdentifier
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an ISO control character.
|
||||
*
|
||||
* A character is considered to be an ISO control character if its [category] is [CharCategory.CONTROL].
|
||||
*
|
||||
* @sample samples.text.Chars.isISOControl
|
||||
*/
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun Char.isISOControl(): Boolean = Character.isISOControl(this)
|
||||
public actual inline fun Char.isISOControl(): Boolean = Character.isISOControl(this)
|
||||
|
||||
/**
|
||||
* Returns `true` if this character (Unicode code point) may be part of a Java identifier as other than the first character.
|
||||
@@ -69,6 +91,7 @@ public inline fun Char.isJavaIdentifierStart(): Boolean = Character.isJavaIdenti
|
||||
/**
|
||||
* Determines whether a character is whitespace according to the Unicode standard.
|
||||
* Returns `true` if the character is whitespace.
|
||||
*
|
||||
* @sample samples.text.Chars.isWhitespace
|
||||
*/
|
||||
public actual fun Char.isWhitespace(): Boolean = Character.isWhitespace(this) || Character.isSpaceChar(this)
|
||||
@@ -78,14 +101,14 @@ public actual fun Char.isWhitespace(): Boolean = Character.isWhitespace(this) ||
|
||||
* @sample samples.text.Chars.isUpperCase
|
||||
*/
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun Char.isUpperCase(): Boolean = Character.isUpperCase(this)
|
||||
public actual inline fun Char.isUpperCase(): Boolean = Character.isUpperCase(this)
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is lower case.
|
||||
* @sample samples.text.Chars.isLowerCase
|
||||
*/
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun Char.isLowerCase(): Boolean = Character.isLowerCase(this)
|
||||
public actual inline fun Char.isLowerCase(): Boolean = Character.isLowerCase(this)
|
||||
|
||||
/**
|
||||
* Converts this character to lower case using Unicode mapping rules of the invariant locale.
|
||||
@@ -192,7 +215,7 @@ public fun Char.lowercase(locale: Locale): String = toString().lowercase(locale)
|
||||
* @sample samples.text.Chars.isTitleCase
|
||||
*/
|
||||
@kotlin.internal.InlineOnly
|
||||
public inline fun Char.isTitleCase(): Boolean = Character.isTitleCase(this)
|
||||
public actual inline fun Char.isTitleCase(): Boolean = Character.isTitleCase(this)
|
||||
|
||||
/**
|
||||
* Converts this character to title case using Unicode mapping rules of the invariant locale.
|
||||
@@ -260,11 +283,6 @@ public fun Char.titlecase(locale: Locale): String {
|
||||
return titlecaseChar().toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value indicating a character's general category.
|
||||
*/
|
||||
public val Char.category: CharCategory get() = CharCategory.valueOf(Character.getType(this))
|
||||
|
||||
/**
|
||||
* Returns the Unicode directionality property for the given character.
|
||||
*/
|
||||
|
||||
@@ -227,3 +227,88 @@ public fun Char.equals(other: Char, ignoreCase: Boolean = false): Boolean {
|
||||
* Returns `true` if this character is a Unicode surrogate code unit.
|
||||
*/
|
||||
public fun Char.isSurrogate(): Boolean = this in Char.MIN_SURROGATE..Char.MAX_SURROGATE
|
||||
|
||||
/**
|
||||
* Returns the Unicode general category of this character.
|
||||
*/
|
||||
public expect val Char.category: CharCategory
|
||||
|
||||
/**
|
||||
* Returns `true` if this character (Unicode code point) is defined in Unicode.
|
||||
*
|
||||
* A character is considered to be defined in Unicode if its [category] is not [CharCategory.UNASSIGNED].
|
||||
*/
|
||||
public expect fun Char.isDefined(): Boolean
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter.
|
||||
*
|
||||
* A character is considered to be a letter if its [category] is [CharCategory.UPPERCASE_LETTER],
|
||||
* [CharCategory.LOWERCASE_LETTER], [CharCategory.TITLECASE_LETTER], [CharCategory.MODIFIER_LETTER], or [CharCategory.OTHER_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isLetter
|
||||
*/
|
||||
public expect fun Char.isLetter(): Boolean
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter or digit.
|
||||
*
|
||||
* @see isLetter
|
||||
* @see isDigit
|
||||
*
|
||||
* @sample samples.text.Chars.isLetterOrDigit
|
||||
*/
|
||||
public expect fun Char.isLetterOrDigit(): Boolean
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a digit.
|
||||
*
|
||||
* A character is considered to be a digit if its [category] is [CharCategory.DECIMAL_DIGIT_NUMBER].
|
||||
*
|
||||
* @sample samples.text.Chars.isDigit
|
||||
*/
|
||||
public expect fun Char.isDigit(): Boolean
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an upper case letter.
|
||||
*
|
||||
* A character is considered to be an upper case letter if its [category] is [CharCategory.UPPERCASE_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isUpperCase
|
||||
*/
|
||||
public expect fun Char.isUpperCase(): Boolean
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a lower case letter.
|
||||
*
|
||||
* A character is considered to be a lower case letter if its [category] is [CharCategory.LOWERCASE_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isLowerCase
|
||||
*/
|
||||
public expect fun Char.isLowerCase(): Boolean
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a title case letter.
|
||||
*
|
||||
* A character is considered to be a title case letter if its [category] is [CharCategory.TITLECASE_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isTitleCase
|
||||
*/
|
||||
public expect fun Char.isTitleCase(): Boolean
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an ISO control character.
|
||||
*
|
||||
* A character is considered to be an ISO control character if its [category] is [CharCategory.CONTROL].
|
||||
*
|
||||
* @sample samples.text.Chars.isISOControl
|
||||
*/
|
||||
public expect fun Char.isISOControl(): Boolean
|
||||
|
||||
/**
|
||||
* Determines whether a character is whitespace according to the Unicode standard.
|
||||
* Returns `true` if the character is whitespace.
|
||||
*
|
||||
* @sample samples.text.Chars.isWhitespace
|
||||
*/
|
||||
public expect fun Char.isWhitespace(): Boolean
|
||||
|
||||
+38
-40
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2010-2018 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
@@ -8,166 +8,164 @@ package kotlin.text
|
||||
/**
|
||||
* Represents the character general category in the Unicode specification.
|
||||
*/
|
||||
public enum class CharCategory(public val value: Int, public val code: String) {
|
||||
public expect enum class CharCategory {
|
||||
/**
|
||||
* General category "Cn" in the Unicode specification.
|
||||
*/
|
||||
UNASSIGNED(Character.UNASSIGNED.toInt(), "Cn"),
|
||||
UNASSIGNED,
|
||||
|
||||
/**
|
||||
* General category "Lu" in the Unicode specification.
|
||||
*/
|
||||
UPPERCASE_LETTER(Character.UPPERCASE_LETTER.toInt(), "Lu"),
|
||||
UPPERCASE_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Ll" in the Unicode specification.
|
||||
*/
|
||||
LOWERCASE_LETTER(Character.LOWERCASE_LETTER.toInt(), "Ll"),
|
||||
LOWERCASE_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Lt" in the Unicode specification.
|
||||
*/
|
||||
TITLECASE_LETTER(Character.TITLECASE_LETTER.toInt(), "Lt"),
|
||||
TITLECASE_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Lm" in the Unicode specification.
|
||||
*/
|
||||
MODIFIER_LETTER(Character.MODIFIER_LETTER.toInt(), "Lm"),
|
||||
MODIFIER_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Lo" in the Unicode specification.
|
||||
*/
|
||||
OTHER_LETTER(Character.OTHER_LETTER.toInt(), "Lo"),
|
||||
OTHER_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Mn" in the Unicode specification.
|
||||
*/
|
||||
NON_SPACING_MARK(Character.NON_SPACING_MARK.toInt(), "Mn"),
|
||||
NON_SPACING_MARK,
|
||||
|
||||
/**
|
||||
* General category "Me" in the Unicode specification.
|
||||
*/
|
||||
ENCLOSING_MARK(Character.ENCLOSING_MARK.toInt(), "Me"),
|
||||
ENCLOSING_MARK,
|
||||
|
||||
/**
|
||||
* General category "Mc" in the Unicode specification.
|
||||
*/
|
||||
COMBINING_SPACING_MARK(Character.COMBINING_SPACING_MARK.toInt(), "Mc"),
|
||||
COMBINING_SPACING_MARK,
|
||||
|
||||
/**
|
||||
* General category "Nd" in the Unicode specification.
|
||||
*/
|
||||
DECIMAL_DIGIT_NUMBER(Character.DECIMAL_DIGIT_NUMBER.toInt(), "Nd"),
|
||||
DECIMAL_DIGIT_NUMBER,
|
||||
|
||||
/**
|
||||
* General category "Nl" in the Unicode specification.
|
||||
*/
|
||||
LETTER_NUMBER(Character.LETTER_NUMBER.toInt(), "Nl"),
|
||||
LETTER_NUMBER,
|
||||
|
||||
/**
|
||||
* General category "No" in the Unicode specification.
|
||||
*/
|
||||
OTHER_NUMBER(Character.OTHER_NUMBER.toInt(), "No"),
|
||||
OTHER_NUMBER,
|
||||
|
||||
/**
|
||||
* General category "Zs" in the Unicode specification.
|
||||
*/
|
||||
SPACE_SEPARATOR(Character.SPACE_SEPARATOR.toInt(), "Zs"),
|
||||
SPACE_SEPARATOR,
|
||||
|
||||
/**
|
||||
* General category "Zl" in the Unicode specification.
|
||||
*/
|
||||
LINE_SEPARATOR(Character.LINE_SEPARATOR.toInt(), "Zl"),
|
||||
LINE_SEPARATOR,
|
||||
|
||||
/**
|
||||
* General category "Zp" in the Unicode specification.
|
||||
*/
|
||||
PARAGRAPH_SEPARATOR(Character.PARAGRAPH_SEPARATOR.toInt(), "Zp"),
|
||||
PARAGRAPH_SEPARATOR,
|
||||
|
||||
/**
|
||||
* General category "Cc" in the Unicode specification.
|
||||
*/
|
||||
CONTROL(Character.CONTROL.toInt(), "Cc"),
|
||||
CONTROL,
|
||||
|
||||
/**
|
||||
* General category "Cf" in the Unicode specification.
|
||||
*/
|
||||
FORMAT(Character.FORMAT.toInt(), "Cf"),
|
||||
FORMAT,
|
||||
|
||||
/**
|
||||
* General category "Co" in the Unicode specification.
|
||||
*/
|
||||
PRIVATE_USE(Character.PRIVATE_USE.toInt(), "Co"),
|
||||
PRIVATE_USE,
|
||||
|
||||
/**
|
||||
* General category "Cs" in the Unicode specification.
|
||||
*/
|
||||
SURROGATE(Character.SURROGATE.toInt(), "Cs"),
|
||||
SURROGATE,
|
||||
|
||||
/**
|
||||
* General category "Pd" in the Unicode specification.
|
||||
*/
|
||||
DASH_PUNCTUATION(Character.DASH_PUNCTUATION.toInt(), "Pd"),
|
||||
DASH_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Ps" in the Unicode specification.
|
||||
*/
|
||||
START_PUNCTUATION(Character.START_PUNCTUATION.toInt(), "Ps"),
|
||||
START_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Pe" in the Unicode specification.
|
||||
*/
|
||||
END_PUNCTUATION(Character.END_PUNCTUATION.toInt(), "Pe"),
|
||||
END_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Pc" in the Unicode specification.
|
||||
*/
|
||||
CONNECTOR_PUNCTUATION(Character.CONNECTOR_PUNCTUATION.toInt(), "Pc"),
|
||||
CONNECTOR_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Po" in the Unicode specification.
|
||||
*/
|
||||
OTHER_PUNCTUATION(Character.OTHER_PUNCTUATION.toInt(), "Po"),
|
||||
OTHER_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Sm" in the Unicode specification.
|
||||
*/
|
||||
MATH_SYMBOL(Character.MATH_SYMBOL.toInt(), "Sm"),
|
||||
MATH_SYMBOL,
|
||||
|
||||
/**
|
||||
* General category "Sc" in the Unicode specification.
|
||||
*/
|
||||
CURRENCY_SYMBOL(Character.CURRENCY_SYMBOL.toInt(), "Sc"),
|
||||
CURRENCY_SYMBOL,
|
||||
|
||||
/**
|
||||
* General category "Sk" in the Unicode specification.
|
||||
*/
|
||||
MODIFIER_SYMBOL(Character.MODIFIER_SYMBOL.toInt(), "Sk"),
|
||||
MODIFIER_SYMBOL,
|
||||
|
||||
/**
|
||||
* General category "So" in the Unicode specification.
|
||||
*/
|
||||
OTHER_SYMBOL(Character.OTHER_SYMBOL.toInt(), "So"),
|
||||
OTHER_SYMBOL,
|
||||
|
||||
/**
|
||||
* General category "Pi" in the Unicode specification.
|
||||
*/
|
||||
INITIAL_QUOTE_PUNCTUATION(Character.INITIAL_QUOTE_PUNCTUATION.toInt(), "Pi"),
|
||||
INITIAL_QUOTE_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Pf" in the Unicode specification.
|
||||
*/
|
||||
FINAL_QUOTE_PUNCTUATION(Character.FINAL_QUOTE_PUNCTUATION.toInt(), "Pf");
|
||||
FINAL_QUOTE_PUNCTUATION;
|
||||
|
||||
/**
|
||||
* Two-letter code of this general category in the Unicode specification.
|
||||
*/
|
||||
public val code: String
|
||||
|
||||
/**
|
||||
* Returns `true` if [char] character belongs to this category.
|
||||
*/
|
||||
public operator fun contains(char: Char): Boolean = Character.getType(char) == this.value
|
||||
|
||||
|
||||
public companion object {
|
||||
private val categoryMap by lazy { CharCategory.values().associateBy { it.value } }
|
||||
|
||||
public fun valueOf(category: Int): CharCategory = categoryMap[category] ?: throw IllegalArgumentException("Category #$category is not defined.")
|
||||
}
|
||||
public operator fun contains(char: Char): Boolean
|
||||
}
|
||||
@@ -7,6 +7,8 @@ package test.text
|
||||
|
||||
import kotlin.test.Test
|
||||
import kotlin.test.assertEquals
|
||||
import kotlin.test.assertFalse
|
||||
import kotlin.test.assertTrue
|
||||
import kotlin.test.assertFails
|
||||
|
||||
class CharTest {
|
||||
@@ -146,4 +148,157 @@ class CharTest {
|
||||
testFails(100, radix = 36)
|
||||
testFails(100, radix = 110)
|
||||
}
|
||||
|
||||
private fun charToCategory() = mapOf(
|
||||
'\u0378' to "Cn",
|
||||
'A' to "Lu", // \u0041
|
||||
'a' to "Ll", // \u0061
|
||||
'Dž' to "Lt", // \u01C5
|
||||
'ʰ' to "Lm", // \u02B0
|
||||
'ƻ' to "Lo", // \u01BB
|
||||
'\u0300' to "Mn",
|
||||
'\u0489' to "Me",
|
||||
'\u0903' to "Mc",
|
||||
'0' to "Nd", // \u0030
|
||||
'Ⅰ' to "Nl", // \u2160
|
||||
'²' to "No", // \u00B2
|
||||
' ' to "Zs", // \u0020
|
||||
'\u2028' to "Zl",
|
||||
'\u2029' to "Zp",
|
||||
'\u0018' to "Cc",
|
||||
'\u00AD' to "Cf",
|
||||
'\uE000' to "Co",
|
||||
'\uD800' to "Cs",
|
||||
'\u002D' to "Pd",
|
||||
'(' to "Ps", // \u0028
|
||||
')' to "Pe", // \u0029
|
||||
'_' to "Pc", // \u005F
|
||||
'!' to "Po", // \u0021
|
||||
'+' to "Sm", // \u002B
|
||||
'$' to "Sc", // \u0024
|
||||
'^' to "Sk", // \u005E
|
||||
'©' to "So", // \u00A9
|
||||
'«' to "Pi", // \u00AB
|
||||
'»' to "Pf" // \u00BB
|
||||
)
|
||||
|
||||
@Test
|
||||
fun charCategory() {
|
||||
for ((char, categoryCode) in charToCategory()) {
|
||||
assertEquals(categoryCode, char.category.code, "char code: ${char.toInt().toString(radix = 16)}")
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryUnassigned() {
|
||||
val unassignedChar = '\u0378'
|
||||
assertFalse(unassignedChar.isDefined())
|
||||
assertEquals(CharCategory.UNASSIGNED, unassignedChar.category)
|
||||
assertEquals("Cn", CharCategory.UNASSIGNED.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryUppercaseLetter() {
|
||||
val latinCapitalLetterA = 'A' // \u0041
|
||||
assertTrue(latinCapitalLetterA.isLetterOrDigit())
|
||||
assertTrue(latinCapitalLetterA.isLetter())
|
||||
assertTrue(latinCapitalLetterA.isUpperCase())
|
||||
assertEquals(CharCategory.UPPERCASE_LETTER, latinCapitalLetterA.category)
|
||||
assertEquals("Lu", CharCategory.UPPERCASE_LETTER.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryLowercaseLetter() {
|
||||
val latinSmallLetterA = 'a' // \u0061
|
||||
assertTrue(latinSmallLetterA.isLetterOrDigit())
|
||||
assertTrue(latinSmallLetterA.isLetter())
|
||||
assertTrue(latinSmallLetterA.isLowerCase())
|
||||
assertEquals(CharCategory.LOWERCASE_LETTER, latinSmallLetterA.category)
|
||||
assertEquals("Ll", CharCategory.LOWERCASE_LETTER.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryTitlecaseLetter() {
|
||||
val latinCapitalLetterDz = 'Dž' // \u01C5
|
||||
assertTrue(latinCapitalLetterDz.isLetterOrDigit())
|
||||
assertTrue(latinCapitalLetterDz.isLetter())
|
||||
assertTrue(latinCapitalLetterDz.isTitleCase())
|
||||
assertEquals(CharCategory.TITLECASE_LETTER, latinCapitalLetterDz.category)
|
||||
assertEquals("Lt", CharCategory.TITLECASE_LETTER.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryModifierLetter() {
|
||||
val modifierLetterSmallH = 'ʰ' // \u02B0
|
||||
assertTrue(modifierLetterSmallH.isLetterOrDigit())
|
||||
assertTrue(modifierLetterSmallH.isLetter())
|
||||
assertEquals(CharCategory.MODIFIER_LETTER, modifierLetterSmallH.category)
|
||||
assertEquals("Lm", CharCategory.MODIFIER_LETTER.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryOtherLetter() {
|
||||
val twoWithStroke = 'ƻ' // \u01BB
|
||||
assertTrue(twoWithStroke.isLetterOrDigit())
|
||||
assertTrue(twoWithStroke.isLetter())
|
||||
assertEquals(CharCategory.OTHER_LETTER, twoWithStroke.category)
|
||||
assertEquals("Lo", CharCategory.OTHER_LETTER.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryDecimalDigitNumber() {
|
||||
val digitZero = '0' // \u0030
|
||||
assertTrue(digitZero.isLetterOrDigit())
|
||||
assertTrue(digitZero.isDigit())
|
||||
assertEquals(CharCategory.DECIMAL_DIGIT_NUMBER, digitZero.category)
|
||||
assertEquals("Nd", CharCategory.DECIMAL_DIGIT_NUMBER.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryLetterNumber() {
|
||||
val romanNumberOne = 'Ⅰ' // \u2160
|
||||
assertFalse(romanNumberOne.isDigit())
|
||||
assertEquals(CharCategory.LETTER_NUMBER, romanNumberOne.category)
|
||||
assertEquals("Nl", CharCategory.LETTER_NUMBER.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryOtherNumber() {
|
||||
val superscriptTwo = '²' // \u00B2
|
||||
assertFalse(superscriptTwo.isDigit())
|
||||
assertEquals(CharCategory.OTHER_NUMBER, superscriptTwo.category)
|
||||
assertEquals("No", CharCategory.OTHER_NUMBER.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategorySpaceSeparator() {
|
||||
val superscriptTwo = ' ' // \u0020
|
||||
assertTrue(superscriptTwo.isWhitespace())
|
||||
assertEquals(CharCategory.SPACE_SEPARATOR, superscriptTwo.category)
|
||||
assertEquals("Zs", CharCategory.SPACE_SEPARATOR.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryLineSeparator() {
|
||||
val lineSeparator = '\u2028'
|
||||
assertTrue(lineSeparator.isWhitespace())
|
||||
assertEquals(CharCategory.LINE_SEPARATOR, lineSeparator.category)
|
||||
assertEquals("Zl", CharCategory.LINE_SEPARATOR.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryParagraphSeparator() {
|
||||
val paragraphSeparator = '\u2029'
|
||||
assertTrue(paragraphSeparator.isWhitespace())
|
||||
assertEquals(CharCategory.PARAGRAPH_SEPARATOR, paragraphSeparator.category)
|
||||
assertEquals("Zp", CharCategory.PARAGRAPH_SEPARATOR.code)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun charCategoryControl() {
|
||||
val controlCancel = '\u0018'
|
||||
assertTrue(controlCancel.isISOControl())
|
||||
assertEquals(CharCategory.CONTROL, controlCancel.category)
|
||||
assertEquals("Cc", CharCategory.CONTROL.code)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -63,7 +63,6 @@ actual enum class RegexOption {
|
||||
|
||||
// From char.kt
|
||||
|
||||
actual fun Char.isWhitespace(): Boolean = TODO("Wasm stdlib: Text")
|
||||
actual fun Char.isHighSurrogate(): Boolean = TODO("Wasm stdlib: Text")
|
||||
actual fun Char.isLowSurrogate(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
@@ -131,6 +130,92 @@ public actual fun Char.uppercaseChar(): Char = TODO("Wasm stdlib: Text")
|
||||
@ExperimentalStdlibApi
|
||||
public actual fun Char.uppercase(): String = TODO("Wasm stdlib: Text")
|
||||
|
||||
|
||||
/**
|
||||
* Returns the Unicode general category of this character.
|
||||
*/
|
||||
public actual val Char.category: CharCategory get() = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Returns `true` if this character (Unicode code point) is defined in Unicode.
|
||||
*
|
||||
* A character is considered to be defined in Unicode if its [category] is not [CharCategory.UNASSIGNED].
|
||||
*/
|
||||
public actual fun Char.isDefined(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter.
|
||||
*
|
||||
* A character is considered to be a letter if its [category] is [CharCategory.UPPERCASE_LETTER],
|
||||
* [CharCategory.LOWERCASE_LETTER], [CharCategory.TITLECASE_LETTER], [CharCategory.MODIFIER_LETTER], or [CharCategory.OTHER_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isLetter
|
||||
*/
|
||||
public actual fun Char.isLetter(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a letter or digit.
|
||||
*
|
||||
* @see isLetter
|
||||
* @see isDigit
|
||||
*
|
||||
* @sample samples.text.Chars.isLetterOrDigit
|
||||
*/
|
||||
public actual fun Char.isLetterOrDigit(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a digit.
|
||||
*
|
||||
* A character is considered to be a digit if its [category] is [CharCategory.DECIMAL_DIGIT_NUMBER].
|
||||
*
|
||||
* @sample samples.text.Chars.isDigit
|
||||
*/
|
||||
public actual fun Char.isDigit(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an upper case letter.
|
||||
*
|
||||
* A character is considered to be an upper case letter if its [category] is [CharCategory.UPPERCASE_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isUpperCase
|
||||
*/
|
||||
public actual fun Char.isUpperCase(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a lower case letter.
|
||||
*
|
||||
* A character is considered to be a lower case letter if its [category] is [CharCategory.LOWERCASE_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isLowerCase
|
||||
*/
|
||||
public actual fun Char.isLowerCase(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a title case letter.
|
||||
*
|
||||
* A character is considered to be a title case letter if its [category] is [CharCategory.TITLECASE_LETTER].
|
||||
*
|
||||
* @sample samples.text.Chars.isTitleCase
|
||||
*/
|
||||
public actual fun Char.isTitleCase(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an ISO control character.
|
||||
*
|
||||
* A character is considered to be an ISO control character if its [category] is [CharCategory.CONTROL].
|
||||
*
|
||||
* @sample samples.text.Chars.isISOControl
|
||||
*/
|
||||
public actual fun Char.isISOControl(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Determines whether a character is whitespace according to the Unicode standard.
|
||||
* Returns `true` if the character is whitespace.
|
||||
*
|
||||
* @sample samples.text.Chars.isWhitespace
|
||||
*/
|
||||
public actual fun Char.isWhitespace(): Boolean = TODO("Wasm stdlib: Text")
|
||||
|
||||
// From string.kt
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package kotlin.text
|
||||
|
||||
/**
|
||||
* Represents the character general category in the Unicode specification.
|
||||
*/
|
||||
public actual enum class CharCategory {
|
||||
/**
|
||||
* General category "Cn" in the Unicode specification.
|
||||
*/
|
||||
UNASSIGNED,
|
||||
|
||||
/**
|
||||
* General category "Lu" in the Unicode specification.
|
||||
*/
|
||||
UPPERCASE_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Ll" in the Unicode specification.
|
||||
*/
|
||||
LOWERCASE_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Lt" in the Unicode specification.
|
||||
*/
|
||||
TITLECASE_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Lm" in the Unicode specification.
|
||||
*/
|
||||
MODIFIER_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Lo" in the Unicode specification.
|
||||
*/
|
||||
OTHER_LETTER,
|
||||
|
||||
/**
|
||||
* General category "Mn" in the Unicode specification.
|
||||
*/
|
||||
NON_SPACING_MARK,
|
||||
|
||||
/**
|
||||
* General category "Me" in the Unicode specification.
|
||||
*/
|
||||
ENCLOSING_MARK,
|
||||
|
||||
/**
|
||||
* General category "Mc" in the Unicode specification.
|
||||
*/
|
||||
COMBINING_SPACING_MARK,
|
||||
|
||||
/**
|
||||
* General category "Nd" in the Unicode specification.
|
||||
*/
|
||||
DECIMAL_DIGIT_NUMBER,
|
||||
|
||||
/**
|
||||
* General category "Nl" in the Unicode specification.
|
||||
*/
|
||||
LETTER_NUMBER,
|
||||
|
||||
/**
|
||||
* General category "No" in the Unicode specification.
|
||||
*/
|
||||
OTHER_NUMBER,
|
||||
|
||||
/**
|
||||
* General category "Zs" in the Unicode specification.
|
||||
*/
|
||||
SPACE_SEPARATOR,
|
||||
|
||||
/**
|
||||
* General category "Zl" in the Unicode specification.
|
||||
*/
|
||||
LINE_SEPARATOR,
|
||||
|
||||
/**
|
||||
* General category "Zp" in the Unicode specification.
|
||||
*/
|
||||
PARAGRAPH_SEPARATOR,
|
||||
|
||||
/**
|
||||
* General category "Cc" in the Unicode specification.
|
||||
*/
|
||||
CONTROL,
|
||||
|
||||
/**
|
||||
* General category "Cf" in the Unicode specification.
|
||||
*/
|
||||
FORMAT,
|
||||
|
||||
/**
|
||||
* General category "Co" in the Unicode specification.
|
||||
*/
|
||||
PRIVATE_USE,
|
||||
|
||||
/**
|
||||
* General category "Cs" in the Unicode specification.
|
||||
*/
|
||||
SURROGATE,
|
||||
|
||||
/**
|
||||
* General category "Pd" in the Unicode specification.
|
||||
*/
|
||||
DASH_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Ps" in the Unicode specification.
|
||||
*/
|
||||
START_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Pe" in the Unicode specification.
|
||||
*/
|
||||
END_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Pc" in the Unicode specification.
|
||||
*/
|
||||
CONNECTOR_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Po" in the Unicode specification.
|
||||
*/
|
||||
OTHER_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Sm" in the Unicode specification.
|
||||
*/
|
||||
MATH_SYMBOL,
|
||||
|
||||
/**
|
||||
* General category "Sc" in the Unicode specification.
|
||||
*/
|
||||
CURRENCY_SYMBOL,
|
||||
|
||||
/**
|
||||
* General category "Sk" in the Unicode specification.
|
||||
*/
|
||||
MODIFIER_SYMBOL,
|
||||
|
||||
/**
|
||||
* General category "So" in the Unicode specification.
|
||||
*/
|
||||
OTHER_SYMBOL,
|
||||
|
||||
/**
|
||||
* General category "Pi" in the Unicode specification.
|
||||
*/
|
||||
INITIAL_QUOTE_PUNCTUATION,
|
||||
|
||||
/**
|
||||
* General category "Pf" in the Unicode specification.
|
||||
*/
|
||||
FINAL_QUOTE_PUNCTUATION;
|
||||
|
||||
/**
|
||||
* Two-letter code of this general category in the Unicode specification.
|
||||
*/
|
||||
public actual val code: String get() = TODO("Wasm stdlib: Text")
|
||||
|
||||
/**
|
||||
* Returns `true` if [char] character belongs to this category.
|
||||
*/
|
||||
public actual operator fun contains(char: Char): Boolean = TODO("Wasm stdlib: Text")
|
||||
}
|
||||
@@ -34,4 +34,11 @@ task run(type: JavaExec) {
|
||||
classpath sourceSets.main.runtimeClasspath
|
||||
args = ["${rootDir}"]
|
||||
systemProperty 'line.separator', '\n'
|
||||
}
|
||||
|
||||
task generateUnicodeData(type: JavaExec) {
|
||||
group 'application'
|
||||
main 'generators.unicode.GenerateUnicodeDataKt'
|
||||
classpath sourceSets.main.runtimeClasspath
|
||||
args = ["${rootDir}"]
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode
|
||||
|
||||
import generators.unicode.ranges.CharCategoryTestGenerator
|
||||
import generators.unicode.ranges.RangesGenerator
|
||||
import templates.COPYRIGHT_NOTICE
|
||||
import templates.KotlinTarget
|
||||
import templates.readCopyrightNoticeFromProfile
|
||||
import java.io.File
|
||||
import java.net.URL
|
||||
import kotlin.system.exitProcess
|
||||
|
||||
|
||||
// Go to https://www.unicode.org/versions/latest/ to find out the latest public version of the Unicode Character Database files.
|
||||
private const val unicodeDataUrl = "https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt"
|
||||
|
||||
/**
|
||||
* This program generates sources related to UnicodeData.txt.
|
||||
* There are two ways to run the program.
|
||||
* 1. Pass the root directory of the project to generate sources for js and js-ir.
|
||||
* _CharCategoryTest.kt and supporting files are also generated to test the generated sources.
|
||||
* The generated test is meant to be run after updating Unicode version and should not be merged to master.
|
||||
* 2. Pass the name of the target to generate sources for, and the directory to generate sources in.
|
||||
* No tests are generated.
|
||||
*/
|
||||
fun main(args: Array<String>) {
|
||||
|
||||
val unicodeDataLines = URL(unicodeDataUrl).openStream().reader().readLines()
|
||||
|
||||
val generators = mutableListOf<UnicodeDataGenerator>()
|
||||
|
||||
fun addRangesGenerators(generatedDir: File, target: KotlinTarget) {
|
||||
val categoryRangesGenerator = RangesGenerator.forCharCategory(generatedDir.resolve("_CharCategories.kt"), target)
|
||||
val digitRangesGenerator = RangesGenerator.forDigit(generatedDir.resolve("_DigitChars.kt"), target)
|
||||
val letterRangesGenerator = RangesGenerator.forLetter(generatedDir.resolve("_LetterChars.kt"), target)
|
||||
val whitespaceRangesGenerator = RangesGenerator.forWhitespace(generatedDir.resolve("_WhitespaceChars.kt"))
|
||||
generators.add(categoryRangesGenerator)
|
||||
generators.add(digitRangesGenerator)
|
||||
generators.add(letterRangesGenerator)
|
||||
generators.add(whitespaceRangesGenerator)
|
||||
}
|
||||
|
||||
when (args.size) {
|
||||
1 -> {
|
||||
val baseDir = File(args.first())
|
||||
|
||||
val categoryTestFile = baseDir.resolve("libraries/stdlib/js/test/text/unicodeData/_CharCategoryTest.kt")
|
||||
val categoryTestGenerator = CharCategoryTestGenerator(categoryTestFile)
|
||||
generators.add(categoryTestGenerator)
|
||||
|
||||
val jsGeneratedDir = baseDir.resolve("libraries/stdlib/js/src/generated/")
|
||||
addRangesGenerators(jsGeneratedDir, KotlinTarget.JS)
|
||||
|
||||
val jsIrGeneratedDir = baseDir.resolve("libraries/stdlib/js-ir/src/generated/")
|
||||
addRangesGenerators(jsIrGeneratedDir, KotlinTarget.JS_IR)
|
||||
|
||||
// For debugging. To see the file content
|
||||
val unicodeDataFile = baseDir.resolve("libraries/tools/kotlin-stdlib-gen/src/generators/unicode/UnicodeData.txt")
|
||||
unicodeDataFile.writeText(unicodeDataLines.joinToString(separator = "\n"))
|
||||
}
|
||||
2 -> {
|
||||
val (targetName, targetDir) = args
|
||||
|
||||
val target = KotlinTarget.values.singleOrNull { it.name.equals(targetName, ignoreCase = true) }
|
||||
?: error("Invalid target: $targetName")
|
||||
|
||||
addRangesGenerators(File(targetDir), target)
|
||||
}
|
||||
else -> {
|
||||
println(
|
||||
"""Parameters:
|
||||
<kotlin-base-dir> - generates UnicodeData.txt sources for js and js-ir targets using paths derived from specified base path
|
||||
<UnicodeData.txt-path> <target> <target-dir> - generates UnicodeData.txt sources for the specified target in the specified target directory
|
||||
"""
|
||||
)
|
||||
exitProcess(1)
|
||||
}
|
||||
}
|
||||
|
||||
COPYRIGHT_NOTICE =
|
||||
readCopyrightNoticeFromProfile { Thread.currentThread().contextClassLoader.getResourceAsStream("apache.xml").reader() }
|
||||
|
||||
unicodeDataLines.forEach { line ->
|
||||
val parts = line.split(";")
|
||||
if (parts[0].length <= 4) {
|
||||
generators.forEach { it.appendChar(parts[0], parts[1], parts[2]) }
|
||||
}
|
||||
}
|
||||
generators.forEach { it.close() }
|
||||
}
|
||||
|
||||
internal interface UnicodeDataGenerator {
|
||||
fun appendChar(char: String, name: String, categoryCode: String)
|
||||
fun close()
|
||||
}
|
||||
+173
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges
|
||||
|
||||
import generators.unicode.UnicodeDataGenerator
|
||||
import generators.unicode.ranges.writers.writeHeader
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
|
||||
internal class CharCategoryTestGenerator(private val outputFile: File) : UnicodeDataGenerator {
|
||||
private var arrayIndex = 0
|
||||
private var arraySize = 0
|
||||
private var writer: FileWriter? = null
|
||||
|
||||
init {
|
||||
outputFile.parentFile.mkdirs()
|
||||
}
|
||||
|
||||
override fun appendChar(char: String, name: String, categoryCode: String) {
|
||||
if (arraySize == 0) {
|
||||
writer?.appendLine(")")
|
||||
writer?.close()
|
||||
|
||||
generateUnicodeDataHeader(arrayIndex)
|
||||
}
|
||||
|
||||
val isStart = name.endsWith(", First>")
|
||||
|
||||
writer?.appendLine(" CharProperties(char = '\\u$char', isStartOfARange = $isStart, categoryCode = \"$categoryCode\"),")
|
||||
|
||||
arraySize++
|
||||
if (arraySize == 2048) {
|
||||
arraySize = 0
|
||||
arrayIndex++
|
||||
}
|
||||
}
|
||||
|
||||
override fun close() {
|
||||
writer?.appendLine(")")
|
||||
writer?.close()
|
||||
|
||||
generateFlattenUnicodeData()
|
||||
generateCharProperties()
|
||||
generateCharCategoryTest()
|
||||
}
|
||||
|
||||
private fun generateFlattenUnicodeData() {
|
||||
val file = outputFile.resolveSibling("_UnicodeDataFlatten.kt")
|
||||
generateFileHeader(file)
|
||||
|
||||
writer?.appendLine("internal val unicodeData = arrayOf<Array<CharProperties>>(")
|
||||
for (index in 0..arrayIndex) {
|
||||
writer?.appendLine(" unicodeData$index,")
|
||||
}
|
||||
writer?.appendLine(").flatten()")
|
||||
|
||||
writer?.close()
|
||||
}
|
||||
|
||||
private fun generateCharProperties() {
|
||||
val file = outputFile.resolveSibling("_CharProperties.kt")
|
||||
generateFileHeader(file)
|
||||
|
||||
writer?.appendLine("data class CharProperties(val char: Char, val isStartOfARange: Boolean, val categoryCode: String)")
|
||||
writer?.close()
|
||||
}
|
||||
|
||||
private fun generateCharCategoryTest() {
|
||||
generateFileHeader(outputFile)
|
||||
|
||||
writer?.appendLine(
|
||||
"""
|
||||
import kotlin.test.*
|
||||
|
||||
class CharCategoryTest {
|
||||
@Test
|
||||
fun category() {
|
||||
val charProperties = hashMapOf<Char, CharProperties>()
|
||||
|
||||
for (properties in unicodeData) {
|
||||
charProperties[properties.char] = properties
|
||||
}
|
||||
|
||||
var properties: CharProperties? = null
|
||||
|
||||
for (char in Char.MIN_VALUE..Char.MAX_VALUE) {
|
||||
if (charProperties.containsKey(char)) {
|
||||
properties = charProperties.getValue(char)
|
||||
} else if (properties?.isStartOfARange != true) {
|
||||
properties = null
|
||||
}
|
||||
|
||||
val charCode = char.toInt().toString(radix = 16).padStart(length = 4, padChar = '0')
|
||||
val expectedCategoryCode = properties?.categoryCode ?: CharCategory.UNASSIGNED.code
|
||||
|
||||
fun <T> test(expected: T, actual: T, name: String) {
|
||||
assertEquals(expected, actual, "Char:[${"$"}char] with code:[${"$"}charCode] in Unicode has ${"$"}name = ${"$"}expected, but in Kotlin ${"$"}name = ${"$"}actual")
|
||||
}
|
||||
|
||||
test(expectedCategoryCode, char.category.code, "category")
|
||||
|
||||
val expectedIsDigit = isDigit(expectedCategoryCode)
|
||||
test(expectedIsDigit, char.isDigit(), "isDigit()")
|
||||
|
||||
val expectedIsLetter = isLetter(expectedCategoryCode)
|
||||
test(expectedIsLetter, char.isLetter(), "isLetter()")
|
||||
|
||||
val expectedIsLetterOrDigit = expectedIsLetter || expectedIsDigit
|
||||
test(expectedIsLetterOrDigit, char.isLetterOrDigit(), "isLetterOrDigit()")
|
||||
|
||||
val expectedIsLowerCase = isLowerCase(expectedCategoryCode)
|
||||
test(expectedIsLowerCase, char.isLowerCase(), "isLowerCase()")
|
||||
|
||||
val expectedIsUpperCase = isUpperCase(expectedCategoryCode)
|
||||
test(expectedIsUpperCase, char.isUpperCase(), "isUpperCase()")
|
||||
|
||||
val expectedIsWhitespace = isWhitespace(char, expectedCategoryCode)
|
||||
test(expectedIsWhitespace, char.isWhitespace(), "isWhitespace()")
|
||||
}
|
||||
}
|
||||
|
||||
private fun isDigit(categoryCode: String): Boolean {
|
||||
return categoryCode == CharCategory.DECIMAL_DIGIT_NUMBER.code
|
||||
}
|
||||
|
||||
private fun isLetter(categoryCode: String): Boolean {
|
||||
return categoryCode in listOf(
|
||||
CharCategory.UPPERCASE_LETTER,
|
||||
CharCategory.LOWERCASE_LETTER,
|
||||
CharCategory.TITLECASE_LETTER,
|
||||
CharCategory.MODIFIER_LETTER,
|
||||
CharCategory.OTHER_LETTER
|
||||
).map { it.code }
|
||||
}
|
||||
|
||||
private fun isLowerCase(categoryCode: String): Boolean {
|
||||
return categoryCode == CharCategory.LOWERCASE_LETTER.code
|
||||
}
|
||||
|
||||
private fun isUpperCase(categoryCode: String): Boolean {
|
||||
return categoryCode == CharCategory.UPPERCASE_LETTER.code
|
||||
}
|
||||
|
||||
private fun isWhitespace(char: Char, categoryCode: String): Boolean {
|
||||
return categoryCode in listOf(
|
||||
CharCategory.SPACE_SEPARATOR.code,
|
||||
CharCategory.LINE_SEPARATOR.code,
|
||||
CharCategory.PARAGRAPH_SEPARATOR.code
|
||||
) || char in '\u0009'..'\u000D' || char in '\u001C'..'\u001F'
|
||||
}
|
||||
}
|
||||
""".trimIndent()
|
||||
)
|
||||
|
||||
writer?.close()
|
||||
}
|
||||
|
||||
private fun generateUnicodeDataHeader(arrayIndex: Int) {
|
||||
val file = outputFile.resolveSibling("_UnicodeData$arrayIndex.kt")
|
||||
generateFileHeader(file)
|
||||
|
||||
writer?.appendLine("internal val unicodeData$arrayIndex = arrayOf<CharProperties>(")
|
||||
}
|
||||
|
||||
private fun generateFileHeader(file: File) {
|
||||
writer = FileWriter(file)
|
||||
writer?.writeHeader(file, "test.text.unicodeData")
|
||||
writer?.appendLine()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges
|
||||
|
||||
import generators.requireExistingDir
|
||||
import generators.unicode.UnicodeDataGenerator
|
||||
import generators.unicode.ranges.builders.*
|
||||
import generators.unicode.ranges.writers.*
|
||||
import templates.KotlinTarget
|
||||
import templates.Platform
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
|
||||
internal class RangesGenerator private constructor(
|
||||
private val outputFile: File,
|
||||
private val rangesBuilder: RangesBuilder,
|
||||
private val rangesWriter: RangesWriter,
|
||||
) : UnicodeDataGenerator {
|
||||
|
||||
init {
|
||||
outputFile.parentFile.requireExistingDir()
|
||||
}
|
||||
|
||||
override fun appendChar(char: String, name: String, categoryCode: String) {
|
||||
rangesBuilder.append(char, name, categoryCode)
|
||||
}
|
||||
|
||||
override fun close() {
|
||||
val (rangeStart, rangeEnd, rangeCategory) = rangesBuilder.build()
|
||||
|
||||
FileWriter(outputFile).use { writer ->
|
||||
writer.writeHeader(outputFile, "kotlin.text")
|
||||
writer.appendLine()
|
||||
writer.appendLine("// ${rangeStart.size} ranges totally")
|
||||
|
||||
rangesWriter.write(rangeStart, rangeEnd, rangeCategory, writer)
|
||||
}
|
||||
}
|
||||
|
||||
companion object {
|
||||
fun forCharCategory(outputFile: File, target: KotlinTarget): RangesGenerator {
|
||||
val rangesBuilder = CharCategoryRangesBuilder()
|
||||
val rangesWriter = RangesWritingStrategy.of(target, "Category").let {
|
||||
if (target.platform == Platform.JS) VarLenBase64CategoryRangesWriter(it) else CategoryRangesWriter(it)
|
||||
}
|
||||
return RangesGenerator(outputFile, rangesBuilder, rangesWriter)
|
||||
}
|
||||
|
||||
fun forLetter(outputFile: File, target: KotlinTarget): RangesGenerator {
|
||||
val rangesBuilder = LetterRangesBuilder()
|
||||
val rangesWriter = RangesWritingStrategy.of(target, "Letter").let {
|
||||
if (target.platform == Platform.JS) VarLenBase64LetterRangesWriter(it) else LetterRangesWriter(it)
|
||||
}
|
||||
return RangesGenerator(outputFile, rangesBuilder, rangesWriter)
|
||||
}
|
||||
|
||||
fun forDigit(outputFile: File, target: KotlinTarget): RangesGenerator {
|
||||
val rangesBuilder = DigitRangesBuilder()
|
||||
val rangesWriter = DigitRangesWriter(RangesWritingStrategy.of(target, "Digit"))
|
||||
return RangesGenerator(outputFile, rangesBuilder, rangesWriter)
|
||||
}
|
||||
|
||||
fun forWhitespace(outputFile: File): RangesGenerator {
|
||||
val rangesBuilder = WhitespaceRangesBuilder()
|
||||
val rangesWriter = WhitespaceRangesWriter()
|
||||
return RangesGenerator(outputFile, rangesBuilder, rangesWriter)
|
||||
}
|
||||
}
|
||||
}
|
||||
+56
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges
|
||||
|
||||
import templates.KotlinTarget
|
||||
import templates.Platform
|
||||
import java.io.FileWriter
|
||||
|
||||
internal sealed class RangesWritingStrategy {
|
||||
abstract val indentation: String
|
||||
abstract val rangesAnnotation: String
|
||||
abstract val rangesVisibilityModifier: String
|
||||
abstract fun beforeWritingRanges(writer: FileWriter)
|
||||
abstract fun afterWritingRanges(writer: FileWriter)
|
||||
abstract fun rangeRef(name: String): String
|
||||
|
||||
companion object {
|
||||
fun of(target: KotlinTarget, wrapperName: String): RangesWritingStrategy {
|
||||
return when (target.platform) {
|
||||
Platform.JS -> JsRangesWritingStrategy(wrapperName)
|
||||
else -> NativeRangesWritingStrategy
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal object NativeRangesWritingStrategy : RangesWritingStrategy() {
|
||||
override val indentation: String get() = ""
|
||||
override val rangesAnnotation: String get() = "@SharedImmutable\n"
|
||||
override val rangesVisibilityModifier: String get() = "private"
|
||||
override fun beforeWritingRanges(writer: FileWriter) {}
|
||||
override fun afterWritingRanges(writer: FileWriter) {}
|
||||
override fun rangeRef(name: String): String = name
|
||||
}
|
||||
|
||||
// see KT-42461, KT-40482
|
||||
internal class JsRangesWritingStrategy(
|
||||
private val wrapperName: String
|
||||
) : RangesWritingStrategy() {
|
||||
override val indentation: String get() = " ".repeat(4)
|
||||
override val rangesAnnotation: String get() = ""
|
||||
override val rangesVisibilityModifier: String get() = "internal"
|
||||
|
||||
override fun beforeWritingRanges(writer: FileWriter) {
|
||||
writer.appendLine("private object $wrapperName {")
|
||||
}
|
||||
|
||||
override fun afterWritingRanges(writer: FileWriter) {
|
||||
writer.appendLine("}")
|
||||
}
|
||||
|
||||
override fun rangeRef(name: String): String = "$wrapperName.$name"
|
||||
}
|
||||
+47
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.builders
|
||||
|
||||
import generators.unicode.ranges.patterns.PeriodicRangePattern
|
||||
import generators.unicode.ranges.patterns.RangePattern
|
||||
|
||||
internal class CharCategoryRangesBuilder : RangesBuilder() {
|
||||
|
||||
override fun categoryId(categoryCode: String): String {
|
||||
return categoryCode
|
||||
}
|
||||
|
||||
override fun shouldSkip(categoryId: String): Boolean {
|
||||
return false
|
||||
}
|
||||
|
||||
override val makeOnePeriodCategory: (Array<String>) -> Int
|
||||
get() = ::periodPatternCategory
|
||||
|
||||
override fun evolveLastRange(lastRange: RangePattern, charCode: Int, categoryId: String): RangePattern? {
|
||||
require(lastRange is PeriodicRangePattern)
|
||||
return when (lastRange.sequenceLength) {
|
||||
1 -> PeriodicRangePattern.from(lastRange, charCode, categoryId, sequenceLength = 2, isPeriodic = true, unassignedCategoryId, ::periodPatternCategory)
|
||||
?: PeriodicRangePattern.from(lastRange, charCode, categoryId, sequenceLength = 3, isPeriodic = true, unassignedCategoryId, ::periodPatternCategory)
|
||||
2 -> PeriodicRangePattern.from(lastRange, charCode, categoryId, sequenceLength = 3, isPeriodic = true, unassignedCategoryId, ::periodPatternCategory)
|
||||
else -> null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 17 and 31 category values are not reserved. Use 17 to replace UNASSIGNED value (0) to be able to encode range pattern categories.
|
||||
internal const val UNASSIGNED_CATEGORY_VALUE_REPLACEMENT = 17
|
||||
private val categoryCodeToValue = CharCategory.values().associateBy({ it.code }, { if (it.value == 0) UNASSIGNED_CATEGORY_VALUE_REPLACEMENT else it.value })
|
||||
|
||||
private fun periodPatternCategory(categoryIds: Array<String>): Int {
|
||||
// Each category value is <= 30, thus 5 bits is enough to represent it.
|
||||
var pattern = 0
|
||||
for (index in categoryIds.indices) {
|
||||
val value = categoryCodeToValue[categoryIds[index]]!!
|
||||
pattern = pattern or (value shl (5 * index))
|
||||
}
|
||||
return pattern
|
||||
}
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.builders
|
||||
|
||||
import generators.unicode.ranges.patterns.RangePattern
|
||||
|
||||
internal class DigitRangesBuilder : RangesBuilder() {
|
||||
override fun categoryId(categoryCode: String): String {
|
||||
return categoryCode
|
||||
}
|
||||
|
||||
override fun shouldSkip(categoryId: String): Boolean {
|
||||
return categoryId != CharCategory.DECIMAL_DIGIT_NUMBER.code
|
||||
}
|
||||
|
||||
override val makeOnePeriodCategory: (Array<String>) -> Int
|
||||
get() = { 0 }
|
||||
|
||||
override fun evolveLastRange(lastRange: RangePattern, charCode: Int, categoryId: String): RangePattern? {
|
||||
return null
|
||||
}
|
||||
}
|
||||
+89
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.builders
|
||||
|
||||
import generators.unicode.ranges.patterns.PeriodicRangePattern
|
||||
import generators.unicode.ranges.patterns.RangePattern
|
||||
import generators.unicode.ranges.patterns.GapRangePattern
|
||||
|
||||
internal class LetterRangesBuilder : RangesBuilder() {
|
||||
|
||||
override fun categoryId(categoryCode: String): String = when (categoryCode) {
|
||||
CharCategory.LOWERCASE_LETTER.code -> categoryCode
|
||||
CharCategory.UPPERCASE_LETTER.code -> categoryCode
|
||||
in letterCategoryCodes -> "OL" // other letter
|
||||
else -> "NL" // not a letter
|
||||
}
|
||||
|
||||
override fun shouldSkip(categoryId: String): Boolean {
|
||||
return categoryId == "NL"
|
||||
}
|
||||
|
||||
override val makeOnePeriodCategory: (Array<String>) -> Int
|
||||
get() = ::periodPatternCategory
|
||||
|
||||
override fun evolveLastRange(lastRange: RangePattern, charCode: Int, categoryId: String): RangePattern? {
|
||||
return when (lastRange) {
|
||||
is PeriodicRangePattern -> when (lastRange.sequenceLength) {
|
||||
1 ->
|
||||
PeriodicRangePattern.from(lastRange, charCode, categoryId, sequenceLength = 2, isPeriodic = true, unassignedCategoryId, ::periodPatternCategory)
|
||||
?: GapRangePattern.from(lastRange, charCode, categoryId, unassignedCategoryId, ::gapPatternCategory)
|
||||
?: PeriodicRangePattern.from(lastRange, charCode, categoryId, sequenceLength = 15, isPeriodic = false, unassignedCategoryId, ::periodPatternCategory)
|
||||
2 ->
|
||||
GapRangePattern.from(lastRange, charCode, categoryId, unassignedCategoryId, ::gapPatternCategory)
|
||||
?: PeriodicRangePattern.from(lastRange, charCode, categoryId, sequenceLength = 15, isPeriodic = false, unassignedCategoryId, ::periodPatternCategory)
|
||||
else -> null
|
||||
}
|
||||
is GapRangePattern ->
|
||||
PeriodicRangePattern.from(lastRange, charCode, categoryId, sequenceLength = 15, isPeriodic = false, unassignedCategoryId, ::periodPatternCategory)
|
||||
else ->
|
||||
error("Unreachable")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private val letterCategoryCodes = listOf(
|
||||
CharCategory.UPPERCASE_LETTER.code,
|
||||
CharCategory.LOWERCASE_LETTER.code,
|
||||
CharCategory.TITLECASE_LETTER.code,
|
||||
CharCategory.MODIFIER_LETTER.code,
|
||||
CharCategory.OTHER_LETTER.code
|
||||
)
|
||||
|
||||
private fun bitmask(categoryId: String) = when (categoryId) {
|
||||
CharCategory.LOWERCASE_LETTER.code -> 0b01
|
||||
CharCategory.UPPERCASE_LETTER.code -> 0b10
|
||||
"OL" -> 0b11
|
||||
"NL" -> 0b00
|
||||
"" -> 0b00
|
||||
else -> error("Unknown categoryID: $categoryId")
|
||||
}
|
||||
|
||||
private fun periodPatternCategory(categoryIds: Array<String>): Int {
|
||||
var pattern = 0
|
||||
for (index in categoryIds.indices) {
|
||||
val value = bitmask(categoryIds[index])
|
||||
pattern = pattern or (value shl (2 * index))
|
||||
}
|
||||
pattern = pattern or (1 shl (2 * categoryIds.size))
|
||||
check(pattern and 0x3 != 0)
|
||||
return pattern
|
||||
}
|
||||
|
||||
private fun gapPatternCategory(start: Int, end: Int, gaps: List<GapRangePattern.Companion.Gap>): Int {
|
||||
var pattern = 0
|
||||
var shift = 2
|
||||
for (i in gaps.indices) {
|
||||
val gap = gaps[i]
|
||||
val charsBeforeGap = gap.start - if (i == 0) start else gaps[i - 1].let { it.start + it.length }
|
||||
pattern += charsBeforeGap shl shift
|
||||
shift += GapRangePattern.CHARS_BITS
|
||||
pattern += gap.length shl shift
|
||||
shift += GapRangePattern.GAP_BITS
|
||||
}
|
||||
check(pattern and 0x3 == 0)
|
||||
return pattern
|
||||
}
|
||||
+192
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.builders
|
||||
|
||||
import generators.unicode.ranges.patterns.PeriodicRangePattern
|
||||
import generators.unicode.ranges.patterns.RangePattern
|
||||
import generators.unicode.ranges.patterns.rangeLength
|
||||
|
||||
/**
|
||||
* The base class of character ranges builders.
|
||||
*/
|
||||
internal abstract class RangesBuilder {
|
||||
private val ranges = mutableListOf<RangePattern>()
|
||||
private var lastAppendedCharCode = -1
|
||||
|
||||
/**
|
||||
* Appends a line from the UnicodeData.txt file.
|
||||
*/
|
||||
fun append(char: String, name: String, categoryCode: String) {
|
||||
val charCode = char.toInt(radix = 16)
|
||||
val categoryId = categoryId(categoryCode)
|
||||
|
||||
when {
|
||||
name.endsWith(", First>") -> rangeFirst(charCode, categoryId)
|
||||
name.endsWith(", Last>") -> rangeLast(charCode, categoryId)
|
||||
else -> append(charCode, categoryId)
|
||||
}
|
||||
|
||||
lastAppendedCharCode = charCode
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimizes the number of ranges and returns them.
|
||||
*
|
||||
* Returns a [Triple] containing lists of range starts, ends and categories in that particular order.
|
||||
*/
|
||||
fun build(): Triple<List<Int>, List<Int>, List<Int>> {
|
||||
for (code in lastAppendedCharCode + 1..0xffff) {
|
||||
appendSingleChar(code, unassignedCategoryId)
|
||||
}
|
||||
|
||||
var index = ranges.lastIndex
|
||||
while (index > 0) {
|
||||
val previous = ranges[index - 1]
|
||||
val previousEnd = previous.rangeEnd()
|
||||
val previousEndCategory = previous.categoryIdOf(previousEnd)
|
||||
val current = ranges[index]
|
||||
if (current.prepend(previousEnd, previousEndCategory)) {
|
||||
val newPrevious = removeLast(previous)
|
||||
if (newPrevious != null) {
|
||||
ranges[index - 1] = newPrevious
|
||||
} else {
|
||||
ranges.removeAt(index - 1)
|
||||
index--
|
||||
}
|
||||
} else {
|
||||
index--
|
||||
}
|
||||
}
|
||||
|
||||
// if (this is LetterRangesBuilder) {
|
||||
// println(ranges.joinToString(separator = "\n"))
|
||||
// }
|
||||
|
||||
// if (this is CharCategoryRangesBuilder) {
|
||||
// println(ranges.subList(fromIndex = 0, toIndex = 10).joinToString(separator = "\n"))
|
||||
// }
|
||||
|
||||
return Triple(ranges.map { it.rangeStart() }, ranges.map { it.rangeEnd() }, ranges.map { it.category() })
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the [charCode] as the start of a range of chars with the specified [categoryId].
|
||||
*/
|
||||
private fun rangeFirst(charCode: Int, categoryId: String) {
|
||||
append(charCode, categoryId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the [charCode] as the end of a range of chars with the specified [categoryId].
|
||||
* Chars between last appended char and the [charCode] are considered to have the specified [categoryId].
|
||||
*/
|
||||
private fun rangeLast(charCode: Int, categoryId: String) {
|
||||
if (!shouldSkip(categoryId)) {
|
||||
check(ranges.last().rangeEnd() == lastAppendedCharCode)
|
||||
check(ranges.last().categoryIdOf(lastAppendedCharCode) == categoryId)
|
||||
}
|
||||
|
||||
for (code in lastAppendedCharCode + 1..charCode) {
|
||||
appendSingleChar(code, categoryId)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the [charCode] with the specified [categoryId].
|
||||
* Chars between last appended char and the [charCode] are considered to be unassigned.
|
||||
*/
|
||||
private fun append(charCode: Int, categoryId: String) {
|
||||
for (code in lastAppendedCharCode + 1 until charCode) {
|
||||
appendSingleChar(code, unassignedCategoryId)
|
||||
}
|
||||
appendSingleChar(charCode, categoryId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the [charCode] with the specified [categoryId] to the last range, or a new range containing the [charCode] is created.
|
||||
* The last range can be transformed to another range type to accommodate the [charCode].
|
||||
*/
|
||||
private fun appendSingleChar(charCode: Int, categoryId: String) {
|
||||
if (shouldSkip(categoryId)) return
|
||||
|
||||
if (ranges.isEmpty()) {
|
||||
ranges.add(createRange(charCode, categoryId))
|
||||
return
|
||||
}
|
||||
|
||||
val lastRange = ranges.last()
|
||||
|
||||
if (!lastRange.append(charCode, categoryId)) {
|
||||
val newLastRange = evolveLastRange(lastRange, charCode, categoryId)
|
||||
if (newLastRange != null) {
|
||||
ranges[ranges.lastIndex] = newLastRange
|
||||
} else {
|
||||
ranges.add(createRange(charCode, categoryId))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Category id used for unassigned chars.
|
||||
*/
|
||||
protected val unassignedCategoryId: String
|
||||
get() = categoryId(CharCategory.UNASSIGNED.code)
|
||||
|
||||
|
||||
/**
|
||||
* Creates the simplest range containing the single [charCode].
|
||||
*/
|
||||
private fun createRange(charCode: Int, categoryId: String): RangePattern {
|
||||
return PeriodicRangePattern.from(charCode, categoryId, sequenceLength = 1, isPeriodic = true, unassignedCategoryId, makeOnePeriodCategory)
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the last char in the specified [range].
|
||||
* Returns the simplest pattern that accommodated the remaining chars in the [range],
|
||||
* or `null` if the [range] contained a single char.
|
||||
*/
|
||||
private fun removeLast(range: RangePattern): RangePattern? {
|
||||
if (range.rangeLength() == 1) {
|
||||
return null
|
||||
}
|
||||
|
||||
val rangeStart = range.rangeStart()
|
||||
var result = createRange(rangeStart, range.categoryIdOf(rangeStart))
|
||||
for (code in rangeStart + 1 until range.rangeEnd()) {
|
||||
val categoryId = range.categoryIdOf(code)
|
||||
if (!shouldSkip(categoryId)) {
|
||||
result = if (result.append(code, categoryId)) result else evolveLastRange(result, code, categoryId)!!
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* The id to use for the [categoryCode] - the Unicode general category code.
|
||||
*/
|
||||
protected abstract fun categoryId(categoryCode: String): String
|
||||
|
||||
/**
|
||||
* Returns true if this range builder skips chars with the specified [categoryId].
|
||||
*/
|
||||
protected abstract fun shouldSkip(categoryId: String): Boolean
|
||||
|
||||
/**
|
||||
* The function to use to transform periodic ranges with period equal to 1 to an Int representation.
|
||||
*/
|
||||
protected abstract val makeOnePeriodCategory: (Array<String>) -> Int
|
||||
|
||||
/**
|
||||
* Appends the [charCode] with the specified [categoryId] to the [lastRange] and returns the resulting range,
|
||||
* or returns `null` if [charCode] can't be appended to the [lastRange].
|
||||
* The [lastRange] can be transformed to another range type to accommodate the [charCode].
|
||||
*/
|
||||
protected abstract fun evolveLastRange(
|
||||
lastRange: RangePattern,
|
||||
charCode: Int,
|
||||
categoryId: String
|
||||
): RangePattern?
|
||||
}
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.builders
|
||||
|
||||
import generators.unicode.ranges.patterns.RangePattern
|
||||
|
||||
internal class WhitespaceRangesBuilder : RangesBuilder() {
|
||||
|
||||
init {
|
||||
// Cc CONTROL spaces
|
||||
append("0009", "<Space, First>", WS)
|
||||
append("000D", "<Space, Last>", WS)
|
||||
append("001C", "<Space, First>", WS)
|
||||
append("001F", "<Space, Last>", WS)
|
||||
}
|
||||
|
||||
override fun categoryId(categoryCode: String): String {
|
||||
return if (categoryCode == WS || categoryCode in whitespaceCategories) WS else NOT_WS
|
||||
}
|
||||
|
||||
override fun shouldSkip(categoryId: String): Boolean {
|
||||
return categoryId == NOT_WS
|
||||
}
|
||||
|
||||
override val makeOnePeriodCategory: (Array<String>) -> Int
|
||||
get() = { 0 }
|
||||
|
||||
override fun evolveLastRange(lastRange: RangePattern, charCode: Int, categoryId: String): RangePattern? {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
private const val WS = "WS"
|
||||
private const val NOT_WS = "NOT_WS"
|
||||
|
||||
private val whitespaceCategories = listOf(
|
||||
CharCategory.SPACE_SEPARATOR.code,
|
||||
CharCategory.LINE_SEPARATOR.code,
|
||||
CharCategory.PARAGRAPH_SEPARATOR.code
|
||||
)
|
||||
+138
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.patterns
|
||||
|
||||
import generators.unicode.ranges.writers.hex
|
||||
|
||||
/**
|
||||
* A range of consequent chars that starts with a letter and ends with a letter, and contains multiple ranges of consequent not-letter chars.
|
||||
*
|
||||
* All letter chars in this range have the same category id.
|
||||
*
|
||||
* @param charCode the start of this range
|
||||
* @param categoryId the category id of the char with the specified [charCode]
|
||||
* @param unassignedCategoryId the categoryId of the unassigned chars.
|
||||
* Chars that are not appended or prepended are considered to be unassigned
|
||||
* @param makeCategory the function used to transform this range to an Int representation that is returned from the [category] function.
|
||||
*/
|
||||
internal class GapRangePattern private constructor(
|
||||
charCode: Int,
|
||||
private val categoryId: String,
|
||||
private val unassignedCategoryId: String,
|
||||
private val makeCategory: (start: Int, end: Int, gaps: List<Gap>) -> Int
|
||||
) : RangePattern {
|
||||
private val start: Int = charCode
|
||||
private var end: Int = charCode
|
||||
private val gaps = mutableListOf<Gap>()
|
||||
|
||||
init {
|
||||
require(categoryId == "OL")
|
||||
}
|
||||
|
||||
override fun append(charCode: Int, categoryId: String): Boolean {
|
||||
require(charCode > end)
|
||||
|
||||
if (categoryId == unassignedCategoryId) {
|
||||
return true
|
||||
}
|
||||
|
||||
if (categoryId != this.categoryId) {
|
||||
return false
|
||||
}
|
||||
|
||||
// lll_gap_lll_X_l
|
||||
if (end == charCode - 1) {
|
||||
// _X_ is empty -> append the letter
|
||||
end = charCode
|
||||
return true
|
||||
}
|
||||
|
||||
val newGap = Gap(start = end + 1, length = charCode - end - 1)
|
||||
val charsBeforeNewGap = newGap.start - if (gaps.isEmpty()) start else gaps.last().let { it.start + it.length }
|
||||
val bits = (gaps.size + 1) * (CHARS_BITS + GAP_BITS)
|
||||
|
||||
if (isValid(charsBeforeNewGap, newGap.length) && bits <= TOTAL_BITS) {
|
||||
gaps.add(newGap)
|
||||
end = charCode
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
override fun prepend(charCode: Int, categoryId: String): Boolean {
|
||||
assert(charCode < start)
|
||||
return false
|
||||
}
|
||||
|
||||
override fun rangeStart(): Int {
|
||||
return start
|
||||
}
|
||||
|
||||
override fun rangeEnd(): Int {
|
||||
return end
|
||||
}
|
||||
|
||||
override fun category(): Int {
|
||||
return makeCategory(start, end, gaps)
|
||||
}
|
||||
|
||||
override fun categoryIdOf(charCode: Int): String {
|
||||
require(charCode in start..end)
|
||||
for (gap in gaps) {
|
||||
if (charCode < gap.start) {
|
||||
return categoryId
|
||||
}
|
||||
if (charCode < gap.start + gap.length) {
|
||||
return unassignedCategoryId
|
||||
}
|
||||
}
|
||||
return categoryId
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
return "GapPattern{" +
|
||||
"start=" + start.hex() +
|
||||
", end=" + end.hex() +
|
||||
", length=" + rangeLength() +
|
||||
", gaps=" + gaps +
|
||||
", categoryId=" + categoryId +
|
||||
"}"
|
||||
}
|
||||
|
||||
companion object {
|
||||
internal const val CHARS_BITS = 7
|
||||
internal const val GAP_BITS = 7
|
||||
private const val TOTAL_BITS = 29
|
||||
|
||||
internal data class Gap(val start: Int, val length: Int)
|
||||
|
||||
fun from(
|
||||
range: RangePattern,
|
||||
charCode: Int,
|
||||
categoryId: String,
|
||||
unassignedCategoryId: String,
|
||||
makeCategory: (start: Int, end: Int, gaps: List<Gap>) -> Int
|
||||
): RangePattern? {
|
||||
val start = range.rangeStart()
|
||||
val startCategoryId = range.categoryIdOf(start)
|
||||
|
||||
check(startCategoryId != unassignedCategoryId)
|
||||
|
||||
if (startCategoryId != categoryId || categoryId != "OL") return null
|
||||
|
||||
val gapRange = GapRangePattern(start, startCategoryId, unassignedCategoryId, makeCategory)
|
||||
if (gapRange.append(start + 1, range.rangeEnd(), range::categoryIdOf, charCode, categoryId)) {
|
||||
return gapRange
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private fun isValid(charsBeforeGap: Int, gapLength: Int): Boolean {
|
||||
return charsBeforeGap < (1 shl CHARS_BITS) && gapLength < (1 shl GAP_BITS)
|
||||
}
|
||||
}
|
||||
}
|
||||
+195
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.patterns
|
||||
|
||||
import generators.unicode.ranges.writers.hex
|
||||
|
||||
/**
|
||||
* A range of consequent chars.
|
||||
*
|
||||
* The chars in the range may have periodic categories, e.g., [Lu, Ll, Lu, Ll, ...].
|
||||
*
|
||||
* @param charCode the start of this range
|
||||
* @param categoryId the category id of the char with the specified [charCode]
|
||||
* @param sequenceLength the maximum length this range can have.
|
||||
* If [isPeriodic] is true than this range can be longer with:
|
||||
* for every `charCode >= start + sequenceLength` categoryIdOf(charCode) is equal to categoryIdOf(charCode - sequenceLength)
|
||||
* @param isPeriodic true if this range is a periodic range with period [sequenceLength]
|
||||
* @param unassignedCategoryId the categoryId of the unassigned chars.
|
||||
* Chars that are not appended or prepended are considered to be unassigned
|
||||
* @param makeCategory the function used to transform this range to an Int representation that is returned from the [category] function.
|
||||
* [makeCategory] is called with an array having its size equal to `minOf(sequenceLength, rangeLength())`.
|
||||
*/
|
||||
internal class PeriodicRangePattern private constructor(
|
||||
charCode: Int,
|
||||
categoryId: String,
|
||||
val sequenceLength: Int,
|
||||
isPeriodic: Boolean,
|
||||
unassignedCategoryId: String,
|
||||
private val makeCategory: (Array<String>) -> Int
|
||||
) : RangePattern {
|
||||
private var start: Int = charCode
|
||||
private var end: Int = charCode
|
||||
private val bag: Bag = Bag(sequenceLength, isPeriodic, unassignedCategoryId)
|
||||
|
||||
init {
|
||||
bag.fill(charCode, categoryId)
|
||||
}
|
||||
|
||||
override fun append(charCode: Int, categoryId: String): Boolean {
|
||||
require(charCode > end)
|
||||
if (!bag.fill(end + 1, charCode - 1, { bag.unassignedCategoryId }, charCode, categoryId)) {
|
||||
return false
|
||||
}
|
||||
end = charCode
|
||||
return true
|
||||
}
|
||||
|
||||
override fun prepend(charCode: Int, categoryId: String): Boolean {
|
||||
require(charCode < start)
|
||||
if (!bag.fill(charCode + 1, start - 1, { bag.unassignedCategoryId }, charCode, categoryId)) {
|
||||
return false
|
||||
}
|
||||
start = charCode
|
||||
return true
|
||||
}
|
||||
|
||||
override fun rangeStart(): Int {
|
||||
return start
|
||||
}
|
||||
|
||||
override fun rangeEnd(): Int {
|
||||
return end
|
||||
}
|
||||
|
||||
override fun category(): Int {
|
||||
return makeCategory(orderedCategoryIds())
|
||||
}
|
||||
|
||||
private fun orderedCategoryIds(): Array<String> {
|
||||
val size = minOf(sequenceLength, rangeLength())
|
||||
return Array(size) { categoryIdOf(start + it) }
|
||||
}
|
||||
|
||||
override fun categoryIdOf(charCode: Int): String {
|
||||
if (charCode !in start..end) {
|
||||
throw IllegalArgumentException("Char code ${charCode.hex()} is not in $this")
|
||||
}
|
||||
val categoryId = bag.categoryIdOf(charCode)
|
||||
check(categoryId != null)
|
||||
return categoryId
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
return "PeriodicRangePattern{" +
|
||||
"start=" + start.hex() +
|
||||
", end=" + end.hex() +
|
||||
", length=" + rangeLength() +
|
||||
", orderedCategoryIds=" + orderedCategoryIds().contentToString() +
|
||||
", bag=" + bag +
|
||||
"}"
|
||||
}
|
||||
|
||||
companion object {
|
||||
fun from(
|
||||
range: RangePattern,
|
||||
charCode: Int,
|
||||
categoryId: String,
|
||||
sequenceLength: Int,
|
||||
isPeriodic: Boolean,
|
||||
unassignedCategoryId: String,
|
||||
makeCategory: (Array<String>) -> Int
|
||||
): PeriodicRangePattern? {
|
||||
require(charCode > range.rangeEnd())
|
||||
|
||||
val start = range.rangeStart()
|
||||
val newRange = from(start, range.categoryIdOf(start), sequenceLength, isPeriodic, unassignedCategoryId, makeCategory)
|
||||
if (newRange.append(start + 1, range.rangeEnd(), range::categoryIdOf, charCode, categoryId)) {
|
||||
return newRange
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
fun from(
|
||||
charCode: Int,
|
||||
categoryId: String,
|
||||
sequenceLength: Int,
|
||||
isPeriodic: Boolean,
|
||||
unassignedCategoryId: String,
|
||||
makeCategory: (Array<String>) -> Int
|
||||
): PeriodicRangePattern {
|
||||
return PeriodicRangePattern(charCode, categoryId, sequenceLength, isPeriodic, unassignedCategoryId, makeCategory)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A set of chars with their corresponding categories.
|
||||
*
|
||||
* Category Id of a char with code equal to `charCode` is placed at index `charCode % sequenceLength` of the [categoryIds].
|
||||
*/
|
||||
private class Bag(
|
||||
private val sequenceLength: Int,
|
||||
private val isPeriodic: Boolean,
|
||||
val unassignedCategoryId: String
|
||||
) {
|
||||
private val categoryIds = arrayOfNulls<String>(sequenceLength)
|
||||
|
||||
fun categoryIdOf(charCode: Int): String? {
|
||||
return categoryIds[charCode % sequenceLength]
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if a range with the specified [rangeStart], [rangeEnd] and [categoryIdOf] was successfully added
|
||||
* together with a char with the specified [charCode] and [categoryId].
|
||||
*
|
||||
* The [charCode] must go immediately after the [rangeEnd] or before the [rangeStart].
|
||||
*/
|
||||
fun fill(rangeStart: Int, rangeEnd: Int, categoryIdOf: (Int) -> String, charCode: Int, categoryId: String): Boolean {
|
||||
require(charCode == rangeStart - 1 || charCode == rangeEnd + 1)
|
||||
|
||||
val attempt = categoryIds.copyOf()
|
||||
|
||||
for (ch in rangeStart..rangeEnd) {
|
||||
if (!attempt.fill(ch, categoryIdOf(ch))) return false
|
||||
}
|
||||
if (!attempt.fill(charCode, categoryId)) return false
|
||||
|
||||
attempt.copyInto(categoryIds)
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the [charCode] with the [categoryId] was successfully placed in [categoryIds].
|
||||
*/
|
||||
fun fill(charCode: Int, categoryId: String): Boolean {
|
||||
return categoryIds.fill(charCode, categoryId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the [charCode] with the [categoryId] was successfully placed in this array.
|
||||
*
|
||||
* The [charCode] is placed at index `charCode % sequenceLength`.
|
||||
*/
|
||||
private fun Array<String?>.fill(charCode: Int, categoryId: String): Boolean {
|
||||
val index = charCode % sequenceLength
|
||||
val current = this[index]
|
||||
if (current == null || (isPeriodic && current == categoryId)) {
|
||||
this[index] = categoryId
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
return "Bag{" +
|
||||
"sequenceLength=" + sequenceLength +
|
||||
", isPeriodic=" + isPeriodic +
|
||||
", unassignedCategoryId=" + unassignedCategoryId +
|
||||
", categoryIds=" + categoryIds.contentToString() +
|
||||
"}"
|
||||
}
|
||||
}
|
||||
+61
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.patterns
|
||||
|
||||
/**
|
||||
* A range of consequent chars that fit within a particular pattern.
|
||||
*/
|
||||
internal interface RangePattern {
|
||||
/**
|
||||
* Appends the [charCode] to this range pattern.
|
||||
* Returns true if the [charCode] with the specified [categoryId] could be accommodated within this pattern.
|
||||
* Returns false otherwise.
|
||||
*/
|
||||
fun append(charCode: Int, categoryId: String): Boolean
|
||||
|
||||
/**
|
||||
* Prepends the [charCode] to this range pattern.
|
||||
* Returns true if the [charCode] with the specified [categoryId] could be accommodated within this pattern.
|
||||
* Returns false otherwise.
|
||||
*/
|
||||
fun prepend(charCode: Int, categoryId: String): Boolean
|
||||
|
||||
/**
|
||||
* Char code of the first char in this range.
|
||||
*/
|
||||
fun rangeStart(): Int
|
||||
|
||||
/**
|
||||
* Char code of the last char in this range.
|
||||
*/
|
||||
fun rangeEnd(): Int
|
||||
|
||||
/**
|
||||
* An integer value that contains information about the category of each char in this range.
|
||||
*/
|
||||
fun category(): Int
|
||||
|
||||
/**
|
||||
* Returns category id of the char with the specified [charCode].
|
||||
* Throws an exception if the [charCode] is not in `rangeStart()..rangeEnd()`.
|
||||
*/
|
||||
fun categoryIdOf(charCode: Int): String
|
||||
}
|
||||
|
||||
internal fun RangePattern.rangeLength(): Int = rangeEnd() - rangeStart() + 1
|
||||
|
||||
|
||||
internal fun RangePattern.append(rangeStart: Int, rangeEnd: Int, categoryIdOf: (Int) -> String, charCode: Int, categoryId: String): Boolean {
|
||||
for (code in rangeStart..rangeEnd) {
|
||||
if (!append(code, categoryIdOf(code))) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
if (!append(charCode, categoryId)) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
+172
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.writers
|
||||
|
||||
import generators.unicode.ranges.RangesWritingStrategy
|
||||
import generators.unicode.ranges.builders.UNASSIGNED_CATEGORY_VALUE_REPLACEMENT
|
||||
import java.io.FileWriter
|
||||
|
||||
internal open class CategoryRangesWriter(protected val strategy: RangesWritingStrategy) : RangesWriter {
|
||||
|
||||
override fun write(rangeStart: List<Int>, rangeEnd: List<Int>, rangeCategory: List<Int>, writer: FileWriter) {
|
||||
beforeWritingRanges(writer)
|
||||
|
||||
writeRangeStart(rangeStart, writer)
|
||||
writeRangeCategory(rangeCategory, writer)
|
||||
writeInit(rangeStart, rangeEnd, rangeCategory, writer)
|
||||
|
||||
afterWritingRanges(writer)
|
||||
}
|
||||
|
||||
protected open fun beforeWritingRanges(writer: FileWriter) {
|
||||
strategy.beforeWritingRanges(writer)
|
||||
}
|
||||
|
||||
protected open fun afterWritingRanges(writer: FileWriter) {
|
||||
strategy.afterWritingRanges(writer)
|
||||
|
||||
writer.appendLine()
|
||||
writer.appendLine(categoryValueFrom())
|
||||
writer.appendLine()
|
||||
writer.appendLine(getCategoryValue())
|
||||
}
|
||||
|
||||
protected open fun writeRangeStart(elements: List<Int>, writer: FileWriter) {
|
||||
writer.writeIntArray("rangeStart", elements, strategy)
|
||||
writer.appendLine()
|
||||
}
|
||||
|
||||
protected open fun writeRangeCategory(elements: List<Int>, writer: FileWriter) {
|
||||
writer.writeIntArray("rangeCategory", elements, strategy)
|
||||
writer.appendLine()
|
||||
}
|
||||
|
||||
protected open fun writeInit(rangeStart: List<Int>, rangeEnd: List<Int>, rangeCategory: List<Int>, writer: FileWriter) {}
|
||||
|
||||
private fun categoryValueFrom(): String = """
|
||||
private fun categoryValueFrom(code: Int, ch: Int): Int {
|
||||
return when {
|
||||
code < 0x20 -> code
|
||||
code < 0x400 -> if ((ch and 1) == 1) code shr 5 else code and 0x1f
|
||||
else ->
|
||||
when (ch % 3) {
|
||||
2 -> code shr 10
|
||||
1 -> (code shr 5) and 0x1f
|
||||
else -> code and 0x1f
|
||||
}
|
||||
}
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
private fun getCategoryValue(): String = """
|
||||
/**
|
||||
* Returns the Unicode general category of this character as an Int.
|
||||
*/
|
||||
internal fun Char.getCategoryValue(): Int {
|
||||
val ch = this.toInt()
|
||||
|
||||
val index = ${indexOf("ch")}
|
||||
val start = ${startAt("index")}
|
||||
val code = ${categoryAt("index")}
|
||||
val value = categoryValueFrom(code, ch - start)
|
||||
|
||||
return if (value == $UNASSIGNED_CATEGORY_VALUE_REPLACEMENT) CharCategory.UNASSIGNED.value else value
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
protected open fun indexOf(charCode: String): String {
|
||||
return "binarySearchRange(${strategy.rangeRef("rangeStart")}, $charCode)"
|
||||
}
|
||||
|
||||
protected open fun startAt(index: String): String {
|
||||
return "${strategy.rangeRef("rangeStart")}[$index]"
|
||||
}
|
||||
|
||||
protected open fun categoryAt(index: String): String {
|
||||
return "${strategy.rangeRef("rangeCategory")}[$index]"
|
||||
}
|
||||
}
|
||||
|
||||
internal class VarLenBase64CategoryRangesWriter(strategy: RangesWritingStrategy) : CategoryRangesWriter(strategy) {
|
||||
|
||||
override fun afterWritingRanges(writer: FileWriter) {
|
||||
super.afterWritingRanges(writer)
|
||||
writer.appendLine()
|
||||
writer.appendLine(decodeVarLenBase64())
|
||||
}
|
||||
|
||||
override fun writeInit(rangeStart: List<Int>, rangeEnd: List<Int>, rangeCategory: List<Int>, writer: FileWriter) {
|
||||
val rangeLength = rangeStart.zipWithNext { a, b -> b - a }
|
||||
val base64RangeLength = rangeLength.toVarLenBase64()
|
||||
|
||||
val base64RangeCategory = rangeCategory.toVarLenBase64()
|
||||
|
||||
writer.appendLine(
|
||||
"""
|
||||
val decodedRangeStart: IntArray
|
||||
val decodedRangeCategory: IntArray
|
||||
|
||||
init {
|
||||
val toBase64 = "$TO_BASE64"
|
||||
val fromBase64 = IntArray(128)
|
||||
for (i in toBase64.indices) {
|
||||
fromBase64[toBase64[i].toInt()] = i
|
||||
}
|
||||
|
||||
// rangeStartDiff.length = ${base64RangeLength.length}
|
||||
val rangeStartDiff = "$base64RangeLength"
|
||||
val diff = decodeVarLenBase64(rangeStartDiff, fromBase64, ${rangeLength.size})
|
||||
val start = IntArray(diff.size + 1)
|
||||
for (i in diff.indices) {
|
||||
start[i + 1] = start[i] + diff[i]
|
||||
}
|
||||
decodedRangeStart = start
|
||||
|
||||
// rangeCategory.length = ${base64RangeCategory.length}
|
||||
val rangeCategory = "$base64RangeCategory"
|
||||
decodedRangeCategory = decodeVarLenBase64(rangeCategory, fromBase64, ${rangeCategory.size})
|
||||
}
|
||||
""".replaceIndent(strategy.indentation)
|
||||
)
|
||||
}
|
||||
|
||||
override fun writeRangeStart(elements: List<Int>, writer: FileWriter) {}
|
||||
|
||||
override fun writeRangeCategory(elements: List<Int>, writer: FileWriter) {}
|
||||
|
||||
private fun decodeVarLenBase64() = """
|
||||
internal fun decodeVarLenBase64(base64: String, fromBase64: IntArray, resultLength: Int): IntArray {
|
||||
val result = IntArray(resultLength)
|
||||
var index = 0
|
||||
var int = 0
|
||||
var shift = 0
|
||||
for (char in base64) {
|
||||
val sixBit = fromBase64[char.toInt()]
|
||||
int = int or ((sixBit and 0x1f) shl shift)
|
||||
if (sixBit < 0x20) {
|
||||
result[index++] = int
|
||||
int = 0
|
||||
shift = 0
|
||||
} else {
|
||||
shift += 5
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
override fun indexOf(charCode: String): String {
|
||||
return "binarySearchRange(${strategy.rangeRef("decodedRangeStart")}, $charCode)"
|
||||
}
|
||||
|
||||
override fun startAt(index: String): String {
|
||||
return "${strategy.rangeRef("decodedRangeStart")}[$index]"
|
||||
}
|
||||
|
||||
override fun categoryAt(index: String): String {
|
||||
return "${strategy.rangeRef("decodedRangeCategory")}[$index]"
|
||||
}
|
||||
}
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.writers
|
||||
|
||||
import generators.unicode.ranges.RangesWritingStrategy
|
||||
import java.io.FileWriter
|
||||
|
||||
internal class DigitRangesWriter(private val strategy: RangesWritingStrategy) : RangesWriter {
|
||||
override fun write(rangeStart: List<Int>, rangeEnd: List<Int>, rangeCategory: List<Int>, writer: FileWriter) {
|
||||
// digit ranges always have length equal to 10, so that the difference between the last char code in range and the first one is always 9.
|
||||
// Therefore, no need to generate ranges end
|
||||
check(rangeStart.indices.all { rangeEnd[it] - rangeStart[it] == 9 })
|
||||
|
||||
strategy.beforeWritingRanges(writer)
|
||||
writer.writeIntArray("rangeStart", rangeStart, strategy)
|
||||
strategy.afterWritingRanges(writer)
|
||||
writer.appendLine()
|
||||
writer.appendLine(binarySearchRange())
|
||||
writer.appendLine()
|
||||
writer.appendLine(isDigitImpl())
|
||||
}
|
||||
|
||||
private fun binarySearchRange(): String = """
|
||||
internal fun binarySearchRange(array: IntArray, needle: Int): Int {
|
||||
var bottom = 0
|
||||
var top = array.size - 1
|
||||
var middle = -1
|
||||
var value = 0
|
||||
while (bottom <= top) {
|
||||
middle = (bottom + top) / 2
|
||||
value = array[middle]
|
||||
if (needle > value)
|
||||
bottom = middle + 1
|
||||
else if (needle == value)
|
||||
return middle
|
||||
else
|
||||
top = middle - 1
|
||||
}
|
||||
return middle - (if (needle < value) 1 else 0)
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
private fun isDigitImpl(): String {
|
||||
val rangeStart = strategy.rangeRef("rangeStart")
|
||||
return """
|
||||
/**
|
||||
* Returns `true` if this character is a digit.
|
||||
*/
|
||||
internal fun Char.isDigitImpl(): Boolean {
|
||||
val ch = this.toInt()
|
||||
val index = binarySearchRange($rangeStart, ch)
|
||||
val high = $rangeStart[index] + 9
|
||||
return ch <= high
|
||||
}
|
||||
""".trimIndent()
|
||||
}
|
||||
}
|
||||
+76
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.writers
|
||||
|
||||
import generators.unicode.ranges.RangesWritingStrategy
|
||||
import templates.COPYRIGHT_NOTICE
|
||||
import templates.autoGeneratedWarning
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
|
||||
internal fun FileWriter.writeHeader(file: File, pkg: String) {
|
||||
println("Generating file: $file")
|
||||
appendLine(COPYRIGHT_NOTICE)
|
||||
appendLine("package $pkg")
|
||||
appendLine()
|
||||
appendLine(autoGeneratedWarning("GenerateUnicodeData.kt"))
|
||||
}
|
||||
|
||||
internal fun FileWriter.writeIntArray(
|
||||
name: String,
|
||||
elements: List<Int>,
|
||||
strategy: RangesWritingStrategy
|
||||
) {
|
||||
fun appendWithIndentation(string: String) {
|
||||
append(strategy.indentation + string)
|
||||
}
|
||||
|
||||
append(strategy.rangesAnnotation)
|
||||
appendWithIndentation("${strategy.rangesVisibilityModifier} val $name = intArrayOf(")
|
||||
for (i in elements.indices) {
|
||||
if (i % 20 == 0) {
|
||||
appendLine()
|
||||
appendWithIndentation(" ")
|
||||
}
|
||||
append(elements[i].hex() + ", ")
|
||||
}
|
||||
appendLine()
|
||||
appendWithIndentation(")")
|
||||
appendLine()
|
||||
}
|
||||
|
||||
internal const val TO_BASE64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
|
||||
internal fun List<Int>.toVarLenBase64(): String {
|
||||
val base64 = flatMap { it.to6Bits() }
|
||||
return base64.joinToString(separator = "") { TO_BASE64[it].toString() }
|
||||
}
|
||||
|
||||
private fun Int.to6Bits(): List<Int> {
|
||||
require(this >= 0)
|
||||
|
||||
val result = mutableListOf<Int>()
|
||||
|
||||
var value = this
|
||||
do {
|
||||
var fiveBits = value and 0x1f
|
||||
value = value shr 5
|
||||
if (value != 0) {
|
||||
fiveBits = fiveBits or 0x20
|
||||
}
|
||||
result.add(fiveBits)
|
||||
} while (value != 0)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
internal fun Int.hex(): String {
|
||||
val result = toString(radix = 16)
|
||||
if (result.first() == '-') {
|
||||
return "-0x" + result.substring(startIndex = 1).padStart(4, '0')
|
||||
}
|
||||
return "0x" + result.padStart(4, '0')
|
||||
}
|
||||
+210
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.writers
|
||||
|
||||
import generators.unicode.ranges.RangesWritingStrategy
|
||||
import generators.unicode.ranges.patterns.GapRangePattern
|
||||
import java.io.FileWriter
|
||||
|
||||
internal open class LetterRangesWriter(protected val strategy: RangesWritingStrategy) : RangesWriter {
|
||||
override fun write(rangeStart: List<Int>, rangeEnd: List<Int>, rangeCategory: List<Int>, writer: FileWriter) {
|
||||
beforeWritingRanges(writer)
|
||||
|
||||
writeRangeStart(rangeStart, writer)
|
||||
writeRangeLength(rangeEnd.mapIndexed { i, e -> e - rangeStart[i] + 1 }, writer)
|
||||
writeRangeCategory(rangeCategory, writer)
|
||||
writeInit(rangeStart, rangeEnd, rangeCategory, writer)
|
||||
|
||||
afterWritingRanges(writer)
|
||||
}
|
||||
|
||||
protected open fun beforeWritingRanges(writer: FileWriter) {
|
||||
strategy.beforeWritingRanges(writer)
|
||||
}
|
||||
|
||||
protected open fun afterWritingRanges(writer: FileWriter) {
|
||||
strategy.afterWritingRanges(writer)
|
||||
writer.appendLine()
|
||||
writer.appendLine(getLetterType())
|
||||
}
|
||||
|
||||
protected open fun writeRangeStart(elements: List<Int>, writer: FileWriter) {
|
||||
writer.writeIntArray("rangeStart", elements, strategy)
|
||||
writer.appendLine()
|
||||
}
|
||||
|
||||
protected open fun writeRangeLength(elements: List<Int>, writer: FileWriter) {
|
||||
writer.writeIntArray("rangeLength", elements, strategy)
|
||||
writer.appendLine()
|
||||
}
|
||||
|
||||
protected open fun writeRangeCategory(elements: List<Int>, writer: FileWriter) {
|
||||
writer.writeIntArray("rangeCategory", elements, strategy)
|
||||
writer.appendLine()
|
||||
}
|
||||
|
||||
protected open fun writeInit(rangeStart: List<Int>, rangeEnd: List<Int>, rangeCategory: List<Int>, writer: FileWriter) {}
|
||||
|
||||
private fun getLetterType(): String = """
|
||||
/**
|
||||
* Returns `true` if this character is a letter.
|
||||
*/
|
||||
internal fun Char.isLetterImpl(): Boolean {
|
||||
return getLetterType() != 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is a lower case letter.
|
||||
*/
|
||||
internal fun Char.isLowerCaseImpl(): Boolean {
|
||||
return getLetterType() == 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if this character is an upper case letter.
|
||||
*/
|
||||
internal fun Char.isUpperCaseImpl(): Boolean {
|
||||
return getLetterType() == 2
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns
|
||||
* - `1` if the character is a lower case letter,
|
||||
* - `2` if the character is an upper case letter,
|
||||
* - `3` if the character is a letter but not a lower or upper case letter,
|
||||
* - `0` otherwise.
|
||||
*/
|
||||
private fun Char.getLetterType(): Int {
|
||||
val ch = this.toInt()
|
||||
val index = ${indexOf("ch")}
|
||||
|
||||
val rangeStart = ${startAt("index")}
|
||||
val rangeEnd = rangeStart + ${lengthAt("index")} - 1
|
||||
val code = ${categoryAt("index")}
|
||||
|
||||
if (ch > rangeEnd) {
|
||||
return 0
|
||||
}
|
||||
|
||||
val lastTwoBits = code and 0x3
|
||||
|
||||
if (lastTwoBits == 0) { // gap pattern
|
||||
var shift = 2
|
||||
var threshold = rangeStart
|
||||
for (i in 0..1) {
|
||||
threshold += (code shr shift) and 0x${((1 shl GapRangePattern.CHARS_BITS) - 1).toString(16)}
|
||||
if (threshold > ch) {
|
||||
return 3
|
||||
}
|
||||
shift += ${GapRangePattern.CHARS_BITS}
|
||||
threshold += (code shr shift) and 0x${((1 shl GapRangePattern.GAP_BITS) - 1).toString(16)}
|
||||
if (threshold > ch) {
|
||||
return 0
|
||||
}
|
||||
shift += ${GapRangePattern.GAP_BITS}
|
||||
}
|
||||
return 3
|
||||
}
|
||||
|
||||
if (code <= 0x7) {
|
||||
return lastTwoBits
|
||||
}
|
||||
|
||||
val distance = (ch - rangeStart)
|
||||
val shift = if (code <= 0x1F) distance % 2 else distance
|
||||
return (code shr (2 * shift)) and 0x3
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
protected open fun indexOf(charCode: String): String {
|
||||
return "binarySearchRange(${strategy.rangeRef("rangeStart")}, $charCode)"
|
||||
}
|
||||
|
||||
protected open fun startAt(index: String): String {
|
||||
return "${strategy.rangeRef("rangeStart")}[$index]"
|
||||
}
|
||||
|
||||
protected open fun lengthAt(index: String): String {
|
||||
return "${strategy.rangeRef("rangeLength")}[$index]"
|
||||
}
|
||||
|
||||
protected open fun categoryAt(index: String): String {
|
||||
return "${strategy.rangeRef("rangeCategory")}[$index]"
|
||||
}
|
||||
}
|
||||
|
||||
internal class VarLenBase64LetterRangesWriter(strategy: RangesWritingStrategy) : LetterRangesWriter(strategy) {
|
||||
|
||||
override fun afterWritingRanges(writer: FileWriter) {
|
||||
super.afterWritingRanges(writer)
|
||||
writer.appendLine()
|
||||
}
|
||||
|
||||
override fun writeInit(rangeStart: List<Int>, rangeEnd: List<Int>, rangeCategory: List<Int>, writer: FileWriter) {
|
||||
val rangeStartDiff = rangeStart.mapIndexed { i, e -> if (i == 0) e else e - rangeStart[i - 1] }
|
||||
val rangeLength = rangeEnd.mapIndexed { i, e -> e - rangeStart[i] + 1 }
|
||||
|
||||
val base64RangeStartDiff = rangeStartDiff.toVarLenBase64()
|
||||
val base64RangeLength = rangeLength.toVarLenBase64()
|
||||
val base64RangeCategory = rangeCategory.toVarLenBase64()
|
||||
|
||||
writer.appendLine(
|
||||
"""
|
||||
val decodedRangeStart: IntArray
|
||||
val decodedRangeLength: IntArray
|
||||
val decodedRangeCategory: IntArray
|
||||
|
||||
init {
|
||||
val toBase64 = "$TO_BASE64"
|
||||
val fromBase64 = IntArray(128)
|
||||
for (i in toBase64.indices) {
|
||||
fromBase64[toBase64[i].toInt()] = i
|
||||
}
|
||||
|
||||
// rangeStartDiff.length = ${base64RangeStartDiff.length}
|
||||
val rangeStartDiff = "$base64RangeStartDiff"
|
||||
val diff = decodeVarLenBase64(rangeStartDiff, fromBase64, ${rangeStartDiff.size})
|
||||
val start = IntArray(diff.size)
|
||||
for (i in diff.indices) {
|
||||
if (i == 0) start[i] = diff[i]
|
||||
else start[i] = start[i - 1] + diff[i]
|
||||
}
|
||||
decodedRangeStart = start
|
||||
|
||||
// rangeLength.length = ${base64RangeLength.length}
|
||||
val rangeLength = "$base64RangeLength"
|
||||
decodedRangeLength = decodeVarLenBase64(rangeLength, fromBase64, ${rangeLength.size})
|
||||
|
||||
// rangeCategory.length = ${base64RangeCategory.length}
|
||||
val rangeCategory = "$base64RangeCategory"
|
||||
decodedRangeCategory = decodeVarLenBase64(rangeCategory, fromBase64, ${rangeCategory.size})
|
||||
}
|
||||
""".replaceIndent(strategy.indentation)
|
||||
)
|
||||
}
|
||||
|
||||
override fun writeRangeStart(elements: List<Int>, writer: FileWriter) {}
|
||||
|
||||
override fun writeRangeLength(elements: List<Int>, writer: FileWriter) {}
|
||||
|
||||
override fun writeRangeCategory(elements: List<Int>, writer: FileWriter) {}
|
||||
|
||||
override fun indexOf(charCode: String): String {
|
||||
return "binarySearchRange(${strategy.rangeRef("decodedRangeStart")}, $charCode)"
|
||||
}
|
||||
|
||||
override fun startAt(index: String): String {
|
||||
return "${strategy.rangeRef("decodedRangeStart")}[$index]"
|
||||
}
|
||||
|
||||
override fun lengthAt(index: String): String {
|
||||
return "${strategy.rangeRef("decodedRangeLength")}[$index]"
|
||||
}
|
||||
|
||||
override fun categoryAt(index: String): String {
|
||||
return "${strategy.rangeRef("decodedRangeCategory")}[$index]"
|
||||
}
|
||||
}
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.writers
|
||||
|
||||
import java.io.FileWriter
|
||||
|
||||
interface RangesWriter {
|
||||
fun write(rangeStart: List<Int>, rangeEnd: List<Int>, rangeCategory: List<Int>, writer: FileWriter)
|
||||
}
|
||||
+62
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors.
|
||||
* Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file.
|
||||
*/
|
||||
|
||||
package generators.unicode.ranges.writers
|
||||
|
||||
import java.io.FileWriter
|
||||
|
||||
internal class WhitespaceRangesWriter : RangesWriter {
|
||||
override fun write(rangeStart: List<Int>, rangeEnd: List<Int>, rangeCategory: List<Int>, writer: FileWriter) {
|
||||
writer.appendLine(isWhitespaceImpl(rangeStart, rangeEnd))
|
||||
}
|
||||
|
||||
private fun isWhitespaceImpl(rangeStart: List<Int>, rangeEnd: List<Int>): String {
|
||||
val checks = rangeChecks(rangeStart, rangeEnd, "ch")
|
||||
return """
|
||||
/**
|
||||
* Returns `true` if this character is a whitespace.
|
||||
*/
|
||||
internal fun Char.isWhitespaceImpl(): Boolean {
|
||||
val ch = this.toInt()
|
||||
return $checks
|
||||
}
|
||||
""".trimIndent()
|
||||
}
|
||||
|
||||
private fun rangeChecks(rangeStart: List<Int>, rangeEnd: List<Int>, ch: String): String {
|
||||
val tab = " "
|
||||
var tabCount = 5
|
||||
val builder = StringBuilder()
|
||||
|
||||
for (i in rangeStart.indices) {
|
||||
if (i != 0) {
|
||||
builder.append(tab.repeat(tabCount)).append("|| ")
|
||||
}
|
||||
|
||||
val start = rangeStart[i]
|
||||
val end = rangeEnd[i]
|
||||
when (start) {
|
||||
end -> {
|
||||
if (start > 0x1000 && tabCount == 5) {
|
||||
builder.appendLine("$ch > 0x1000 && (")
|
||||
tabCount = 6
|
||||
builder.append(tab.repeat(tabCount))
|
||||
}
|
||||
builder.appendLine("$ch == ${start.hex()}")
|
||||
}
|
||||
end - 1 -> {
|
||||
builder.appendLine("$ch == ${start.hex()}")
|
||||
builder.append(tab.repeat(tabCount)).append("|| ")
|
||||
builder.appendLine("$ch == ${end.hex()}")
|
||||
}
|
||||
else -> {
|
||||
builder.appendLine("$ch in ${start.hex()}..${end.hex()}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return builder.append(tab.repeat(5)).append(")").toString()
|
||||
}
|
||||
}
|
||||
@@ -11,8 +11,8 @@ import java.io.FileWriter
|
||||
import java.io.Reader
|
||||
import javax.xml.xpath.XPathFactory
|
||||
|
||||
val COMMON_AUTOGENERATED_WARNING: String = """//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateStandardLib.kt
|
||||
fun autoGeneratedWarning(generator: String): String = """//
|
||||
// NOTE: THIS FILE IS AUTO-GENERATED by the $generator
|
||||
// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib
|
||||
//"""
|
||||
|
||||
@@ -92,7 +92,7 @@ fun List<MemberBuilder>.writeTo(file: File, targetedSource: TargetedSourceFile)
|
||||
}
|
||||
|
||||
writer.append("package ${sourceFile.packageName ?: "kotlin"}\n\n")
|
||||
writer.append("${COMMON_AUTOGENERATED_WARNING}\n\n")
|
||||
writer.append("${autoGeneratedWarning("GenerateStandardLib.kt")}\n\n")
|
||||
if (target.platform == Platform.JS) {
|
||||
writer.appendln("import kotlin.js.*")
|
||||
if (sourceFile == SourceFile.Arrays) {
|
||||
|
||||
Reference in New Issue
Block a user