diff --git a/libraries/stdlib/wasm/builtins/kotlin/Char.kt b/libraries/stdlib/wasm/builtins/kotlin/Char.kt index 8ed41b24c5f..3238aff24fc 100644 --- a/libraries/stdlib/wasm/builtins/kotlin/Char.kt +++ b/libraries/stdlib/wasm/builtins/kotlin/Char.kt @@ -141,6 +141,21 @@ public class Char private constructor(public val value: Char) : Comparable */ public const val MAX_SURROGATE: Char = MAX_LOW_SURROGATE + /** + * The minimum value of a supplementary code point, `\u0x10000`. + */ + public const val MIN_SUPPLEMENTARY_CODE_POINT: Int = 0x10000 + + /** + * The minimum radix available for conversion to and from strings. + */ + public const val MIN_RADIX: Int = 2 + + /** + * The maximum radix available for conversion to and from strings. + */ + public const val MAX_RADIX: Int = 36 + /** * The number of bytes used to represent a Char in a binary form. */ diff --git a/libraries/stdlib/wasm/src/generated/_CharCategories.kt b/libraries/stdlib/wasm/src/generated/_CharCategories.kt new file mode 100644 index 00000000000..7138fc74a64 --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_CharCategories.kt @@ -0,0 +1,182 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +// 1343 ranges totally +private val rangeStart = intArrayOf( + 0x0000, 0x0020, 0x0022, 0x0027, 0x002b, 0x002e, 0x0030, 0x003a, 0x003c, 0x003f, 0x0041, 0x005b, 0x005e, 0x0061, 0x007b, 0x007e, 0x007f, 0x00a0, 0x00a2, 0x00a6, + 0x00aa, 0x00ad, 0x00b0, 0x00b3, 0x00b6, 0x00b9, 0x00bd, 0x00c0, 0x00d7, 0x00d8, 0x00df, 0x00f7, 0x00f8, 0x00ff, 0x0138, 0x0149, 0x0179, 0x017f, 0x0182, 0x0184, + 0x018b, 0x018e, 0x0190, 0x0198, 0x019b, 0x01a0, 0x01a7, 0x01aa, 0x01ac, 0x01b3, 0x01b8, 0x01bb, 0x01bd, 0x01c0, 0x01c4, 0x01cc, 0x01dd, 0x01f0, 0x01f4, 0x01f8, + 0x0233, 0x0239, 0x0240, 0x0243, 0x0246, 0x024f, 0x0294, 0x0295, 0x02b0, 0x02c2, 0x02c6, 0x02d2, 0x02e0, 0x02e5, 0x02eb, 0x02ef, 0x0300, 0x0370, 0x0374, 0x0377, + 0x037a, 0x037d, 0x0380, 0x0384, 0x0386, 0x038a, 0x038f, 0x0391, 0x03a2, 0x03a3, 0x03ac, 0x03cd, 0x03d2, 0x03d5, 0x03d7, 0x03ef, 0x03f4, 0x03f9, 0x03fb, 0x03fd, + 0x0430, 0x045f, 0x0482, 0x0483, 0x0488, 0x048a, 0x04c1, 0x04cf, 0x0530, 0x0531, 0x0557, 0x055a, 0x0560, 0x0589, 0x058c, 0x058f, 0x0591, 0x05be, 0x05bf, 0x05c8, + 0x05d0, 0x05eb, 0x05ef, 0x05f3, 0x05f5, 0x0600, 0x0606, 0x0609, 0x060e, 0x0610, 0x061b, 0x061e, 0x0620, 0x0640, 0x0641, 0x064b, 0x0660, 0x066a, 0x066e, 0x0671, + 0x06d3, 0x06d6, 0x06dd, 0x06df, 0x06e4, 0x06e8, 0x06ea, 0x06ee, 0x06f0, 0x06fa, 0x06fc, 0x0700, 0x070e, 0x0711, 0x0712, 0x0730, 0x074b, 0x074d, 0x07a6, 0x07b1, + 0x07b2, 0x07c0, 0x07ca, 0x07eb, 0x07f4, 0x07f7, 0x07fa, 0x07fd, 0x0800, 0x0816, 0x081a, 0x081b, 0x0822, 0x0827, 0x0829, 0x082e, 0x0830, 0x083f, 0x0840, 0x0859, + 0x085c, 0x0860, 0x086b, 0x08a0, 0x08b5, 0x08b6, 0x08c8, 0x08d3, 0x08e2, 0x08e3, 0x0903, 0x0904, 0x093a, 0x093d, 0x093e, 0x0941, 0x0949, 0x094b, 0x0950, 0x0951, + 0x0958, 0x0962, 0x0964, 0x0966, 0x0970, 0x0972, 0x0981, 0x0984, 0x0985, 0x098c, 0x0990, 0x0993, 0x09a9, 0x09aa, 0x09b1, 0x09b5, 0x09b8, 0x09bb, 0x09be, 0x09c1, + 0x09c5, 0x09c8, 0x09cc, 0x09cf, 0x09d7, 0x09d8, 0x09db, 0x09e1, 0x09e4, 0x09e6, 0x09f0, 0x09f2, 0x09f4, 0x09fa, 0x09fd, 0x0a00, 0x0a03, 0x0a05, 0x0a0b, 0x0a0f, + 0x0a11, 0x0a13, 0x0a29, 0x0a2a, 0x0a2f, 0x0a3b, 0x0a3e, 0x0a41, 0x0a43, 0x0a46, 0x0a4a, 0x0a4d, 0x0a50, 0x0a52, 0x0a59, 0x0a5c, 0x0a5f, 0x0a66, 0x0a70, 0x0a73, + 0x0a76, 0x0a77, 0x0a81, 0x0a84, 0x0a85, 0x0a8c, 0x0a91, 0x0a93, 0x0aa9, 0x0aaa, 0x0aaf, 0x0ab5, 0x0aba, 0x0abd, 0x0abe, 0x0ac1, 0x0ac4, 0x0ac9, 0x0acd, 0x0ad0, + 0x0ad1, 0x0ae0, 0x0ae3, 0x0ae6, 0x0af0, 0x0af2, 0x0af9, 0x0afa, 0x0b00, 0x0b03, 0x0b05, 0x0b0c, 0x0b10, 0x0b13, 0x0b29, 0x0b2a, 0x0b2f, 0x0b35, 0x0b3a, 0x0b3d, + 0x0b3f, 0x0b43, 0x0b46, 0x0b4a, 0x0b4d, 0x0b4e, 0x0b55, 0x0b58, 0x0b5b, 0x0b61, 0x0b64, 0x0b66, 0x0b70, 0x0b72, 0x0b78, 0x0b81, 0x0b85, 0x0b8b, 0x0b8e, 0x0b8f, + 0x0b94, 0x0b97, 0x0b9a, 0x0b9f, 0x0ba2, 0x0ba5, 0x0ba8, 0x0bab, 0x0bae, 0x0bba, 0x0bbe, 0x0bc3, 0x0bc6, 0x0bc7, 0x0bcc, 0x0bcf, 0x0bd1, 0x0bd7, 0x0bd8, 0x0be6, + 0x0bf0, 0x0bf3, 0x0bf8, 0x0bfb, 0x0c00, 0x0c03, 0x0c05, 0x0c0b, 0x0c10, 0x0c12, 0x0c29, 0x0c2a, 0x0c3a, 0x0c3d, 0x0c40, 0x0c43, 0x0c46, 0x0c49, 0x0c4a, 0x0c4e, + 0x0c54, 0x0c58, 0x0c5b, 0x0c60, 0x0c63, 0x0c66, 0x0c70, 0x0c77, 0x0c78, 0x0c7f, 0x0c82, 0x0c85, 0x0c8b, 0x0c90, 0x0c92, 0x0ca9, 0x0caa, 0x0cb4, 0x0cb5, 0x0cba, + 0x0cbc, 0x0cc0, 0x0cc5, 0x0cc7, 0x0ccc, 0x0cce, 0x0cd5, 0x0cd7, 0x0cde, 0x0ce2, 0x0ce4, 0x0ce6, 0x0cf0, 0x0cf3, 0x0d00, 0x0d02, 0x0d04, 0x0d0b, 0x0d10, 0x0d12, + 0x0d3a, 0x0d3e, 0x0d41, 0x0d45, 0x0d47, 0x0d4c, 0x0d4f, 0x0d52, 0x0d55, 0x0d58, 0x0d5f, 0x0d62, 0x0d64, 0x0d66, 0x0d70, 0x0d79, 0x0d7a, 0x0d80, 0x0d83, 0x0d85, + 0x0d97, 0x0d9a, 0x0db2, 0x0db3, 0x0dbc, 0x0dc0, 0x0dc7, 0x0dc8, 0x0dcd, 0x0dd0, 0x0dd3, 0x0dd7, 0x0dd8, 0x0de0, 0x0de6, 0x0df0, 0x0df3, 0x0df5, 0x0e01, 0x0e2f, + 0x0e34, 0x0e3b, 0x0e3f, 0x0e40, 0x0e46, 0x0e47, 0x0e4f, 0x0e50, 0x0e5a, 0x0e5c, 0x0e80, 0x0e85, 0x0e86, 0x0e8b, 0x0e8c, 0x0ea3, 0x0ea7, 0x0eaf, 0x0eb4, 0x0ebd, + 0x0ec0, 0x0ec5, 0x0ec8, 0x0ece, 0x0ed0, 0x0eda, 0x0edc, 0x0ee0, 0x0f00, 0x0f01, 0x0f04, 0x0f13, 0x0f17, 0x0f1a, 0x0f20, 0x0f2a, 0x0f34, 0x0f3a, 0x0f3e, 0x0f40, + 0x0f48, 0x0f49, 0x0f6d, 0x0f71, 0x0f7f, 0x0f80, 0x0f83, 0x0f88, 0x0f8d, 0x0f98, 0x0f99, 0x0fbd, 0x0fbe, 0x0fc6, 0x0fc7, 0x0fcb, 0x0fd0, 0x0fd5, 0x0fd9, 0x0fdb, + 0x1000, 0x102b, 0x102d, 0x1031, 0x1032, 0x1036, 0x103c, 0x103f, 0x1040, 0x104a, 0x1050, 0x1056, 0x1059, 0x105c, 0x105f, 0x1062, 0x1065, 0x1067, 0x106e, 0x1071, + 0x1075, 0x1082, 0x1085, 0x1087, 0x108c, 0x1090, 0x109a, 0x109d, 0x10a0, 0x10c5, 0x10c8, 0x10cb, 0x10d0, 0x10fb, 0x10fd, 0x1100, 0x1249, 0x124a, 0x124e, 0x1250, + 0x1256, 0x125a, 0x125e, 0x1260, 0x1289, 0x128a, 0x128e, 0x1290, 0x12b1, 0x12b2, 0x12b6, 0x12b8, 0x12be, 0x12c2, 0x12c6, 0x12c8, 0x12d7, 0x12d8, 0x1311, 0x1312, + 0x1316, 0x1318, 0x135b, 0x135d, 0x1360, 0x1369, 0x137d, 0x1380, 0x1390, 0x139a, 0x13a0, 0x13f6, 0x13f8, 0x13fe, 0x1401, 0x166d, 0x166f, 0x1680, 0x1681, 0x169b, + 0x169d, 0x16a0, 0x16eb, 0x16ee, 0x16f1, 0x16f9, 0x1700, 0x170d, 0x170e, 0x1712, 0x1715, 0x1720, 0x1732, 0x1735, 0x1737, 0x1740, 0x1752, 0x1754, 0x1760, 0x176b, + 0x1770, 0x1772, 0x1774, 0x1780, 0x17b4, 0x17b7, 0x17be, 0x17c4, 0x17c9, 0x17d4, 0x17d5, 0x17da, 0x17dd, 0x17e0, 0x17ea, 0x17f0, 0x17fa, 0x1800, 0x1804, 0x1809, + 0x180c, 0x180f, 0x1810, 0x181a, 0x1820, 0x1843, 0x1844, 0x1879, 0x1880, 0x1885, 0x1887, 0x18a8, 0x18ab, 0x18b0, 0x18f6, 0x1900, 0x191f, 0x1922, 0x1925, 0x1928, + 0x192b, 0x192e, 0x1931, 0x1933, 0x1939, 0x193c, 0x193e, 0x1943, 0x1946, 0x1950, 0x196e, 0x1970, 0x1975, 0x1980, 0x19ac, 0x19b0, 0x19ca, 0x19d0, 0x19da, 0x19db, + 0x19de, 0x1a00, 0x1a17, 0x1a1a, 0x1a1d, 0x1a20, 0x1a55, 0x1a58, 0x1a5e, 0x1a61, 0x1a65, 0x1a6d, 0x1a73, 0x1a7c, 0x1a80, 0x1a8a, 0x1a90, 0x1a9a, 0x1aa0, 0x1aa7, + 0x1aa8, 0x1aae, 0x1ab0, 0x1abc, 0x1ac1, 0x1b00, 0x1b04, 0x1b05, 0x1b34, 0x1b36, 0x1b3a, 0x1b3d, 0x1b40, 0x1b45, 0x1b4c, 0x1b50, 0x1b5a, 0x1b61, 0x1b6b, 0x1b74, + 0x1b7d, 0x1b80, 0x1b83, 0x1ba1, 0x1ba4, 0x1ba7, 0x1bad, 0x1bb0, 0x1bba, 0x1be6, 0x1beb, 0x1bef, 0x1bf2, 0x1bf4, 0x1bfc, 0x1c00, 0x1c24, 0x1c2c, 0x1c34, 0x1c36, + 0x1c38, 0x1c3b, 0x1c40, 0x1c4a, 0x1c4d, 0x1c50, 0x1c5a, 0x1c78, 0x1c7e, 0x1c80, 0x1c89, 0x1c90, 0x1cbb, 0x1cbd, 0x1cc0, 0x1cc8, 0x1cd0, 0x1cd3, 0x1cd4, 0x1ce1, + 0x1ce2, 0x1ce9, 0x1ced, 0x1cee, 0x1cf2, 0x1cf7, 0x1cfa, 0x1cfb, 0x1d00, 0x1d2c, 0x1d6b, 0x1d78, 0x1d79, 0x1d9b, 0x1dc0, 0x1dfa, 0x1dfb, 0x1e00, 0x1e95, 0x1e9d, + 0x1eff, 0x1f08, 0x1f10, 0x1f16, 0x1f18, 0x1f1e, 0x1f20, 0x1f28, 0x1f30, 0x1f38, 0x1f40, 0x1f46, 0x1f48, 0x1f4e, 0x1f50, 0x1f58, 0x1f60, 0x1f68, 0x1f70, 0x1f7e, + 0x1f80, 0x1f88, 0x1f90, 0x1f98, 0x1fa0, 0x1fa8, 0x1fb0, 0x1fb3, 0x1fb8, 0x1fbc, 0x1fbf, 0x1fc2, 0x1fc3, 0x1fc8, 0x1fcc, 0x1fcf, 0x1fd2, 0x1fd5, 0x1fd8, 0x1fdc, + 0x1fdd, 0x1fe0, 0x1fe8, 0x1fed, 0x1ff0, 0x1ff3, 0x1ff8, 0x1ffc, 0x1fff, 0x2000, 0x200b, 0x2010, 0x2016, 0x2018, 0x201c, 0x2020, 0x2028, 0x202a, 0x202f, 0x2030, + 0x2039, 0x203b, 0x203e, 0x2042, 0x2045, 0x2047, 0x2052, 0x2055, 0x205f, 0x2060, 0x2065, 0x2066, 0x2070, 0x2072, 0x2074, 0x207a, 0x207d, 0x2080, 0x208a, 0x208d, + 0x2090, 0x209d, 0x20a0, 0x20c0, 0x20d0, 0x20dd, 0x20e1, 0x20e2, 0x20e5, 0x20f1, 0x2100, 0x2105, 0x210a, 0x210d, 0x2111, 0x2114, 0x2118, 0x2119, 0x211e, 0x2123, + 0x212a, 0x212e, 0x2130, 0x2134, 0x2135, 0x2139, 0x213d, 0x2140, 0x2145, 0x2146, 0x214a, 0x214d, 0x2150, 0x2160, 0x2183, 0x2185, 0x2189, 0x218c, 0x2190, 0x2195, + 0x219a, 0x219c, 0x219e, 0x21a7, 0x21ae, 0x21af, 0x21cd, 0x21d1, 0x21d5, 0x21f4, 0x2300, 0x2308, 0x230c, 0x2320, 0x2322, 0x2329, 0x232b, 0x237c, 0x237d, 0x239b, + 0x23b4, 0x23dc, 0x23e2, 0x2427, 0x2440, 0x244b, 0x2460, 0x249c, 0x24ea, 0x2500, 0x25b7, 0x25b8, 0x25c1, 0x25c2, 0x25f8, 0x2600, 0x266f, 0x2670, 0x2768, 0x2776, + 0x2794, 0x27c0, 0x27c5, 0x27c7, 0x27e6, 0x27f0, 0x2800, 0x2900, 0x2983, 0x2999, 0x29d8, 0x29dc, 0x29fc, 0x29fe, 0x2b00, 0x2b30, 0x2b45, 0x2b47, 0x2b4d, 0x2b74, + 0x2b76, 0x2b96, 0x2b97, 0x2c00, 0x2c2f, 0x2c30, 0x2c5e, 0x2c62, 0x2c65, 0x2c66, 0x2c6d, 0x2c71, 0x2c76, 0x2c7c, 0x2c7e, 0x2c80, 0x2ce3, 0x2ce5, 0x2ceb, 0x2cef, + 0x2cf2, 0x2cf4, 0x2cf9, 0x2cfb, 0x2d00, 0x2d25, 0x2d28, 0x2d2b, 0x2d30, 0x2d68, 0x2d6f, 0x2d71, 0x2d7f, 0x2d80, 0x2d97, 0x2da0, 0x2da7, 0x2da8, 0x2daf, 0x2db0, + 0x2db7, 0x2db8, 0x2dbf, 0x2dc0, 0x2dc7, 0x2dc8, 0x2dcf, 0x2dd0, 0x2dd7, 0x2dd8, 0x2ddf, 0x2de0, 0x2e00, 0x2e02, 0x2e06, 0x2e08, 0x2e0e, 0x2e15, 0x2e1b, 0x2e1f, + 0x2e22, 0x2e2a, 0x2e2f, 0x2e30, 0x2e3a, 0x2e3c, 0x2e40, 0x2e43, 0x2e4f, 0x2e53, 0x2e80, 0x2e9a, 0x2e9b, 0x2ef4, 0x2f00, 0x2fd6, 0x2ff0, 0x2ffc, 0x3000, 0x3003, + 0x3006, 0x3008, 0x3012, 0x3014, 0x301a, 0x301f, 0x3021, 0x302a, 0x302e, 0x3031, 0x3036, 0x3039, 0x303c, 0x303f, 0x3041, 0x3097, 0x309a, 0x309d, 0x30a0, 0x30a1, + 0x30fb, 0x30fe, 0x3100, 0x3105, 0x3130, 0x3131, 0x318f, 0x3192, 0x3196, 0x31a0, 0x31c0, 0x31e4, 0x31f0, 0x3200, 0x321f, 0x3220, 0x322a, 0x3248, 0x3250, 0x3251, + 0x3260, 0x3280, 0x328a, 0x32b1, 0x32c0, 0x3400, 0x4dc0, 0x4e00, 0x9ffd, 0xa000, 0xa015, 0xa016, 0xa48d, 0xa490, 0xa4c7, 0xa4d0, 0xa4f8, 0xa4fe, 0xa500, 0xa60c, + 0xa60d, 0xa610, 0xa620, 0xa62a, 0xa62c, 0xa640, 0xa66e, 0xa671, 0xa674, 0xa67e, 0xa680, 0xa69c, 0xa69e, 0xa6a0, 0xa6e6, 0xa6f0, 0xa6f2, 0xa6f8, 0xa700, 0xa717, + 0xa720, 0xa722, 0xa72f, 0xa731, 0xa770, 0xa771, 0xa778, 0xa77e, 0xa788, 0xa78b, 0xa78f, 0xa791, 0xa795, 0xa7aa, 0xa7af, 0xa7b0, 0xa7b4, 0xa7c0, 0xa7c2, 0xa7c6, + 0xa7ca, 0xa7cb, 0xa7f5, 0xa7f8, 0xa7fb, 0xa801, 0xa804, 0xa809, 0xa80c, 0xa823, 0xa826, 0xa828, 0xa82c, 0xa82d, 0xa830, 0xa836, 0xa83a, 0xa840, 0xa874, 0xa878, + 0xa880, 0xa882, 0xa8b4, 0xa8c4, 0xa8c6, 0xa8ce, 0xa8d0, 0xa8da, 0xa8e0, 0xa8f2, 0xa8f8, 0xa8fb, 0xa8ff, 0xa900, 0xa90a, 0xa926, 0xa92e, 0xa930, 0xa947, 0xa952, + 0xa954, 0xa95f, 0xa960, 0xa97d, 0xa980, 0xa983, 0xa984, 0xa9b3, 0xa9b6, 0xa9ba, 0xa9bc, 0xa9be, 0xa9c1, 0xa9ce, 0xa9d0, 0xa9da, 0xa9de, 0xa9e0, 0xa9e5, 0xa9e7, + 0xa9f0, 0xa9fa, 0xa9ff, 0xaa00, 0xaa29, 0xaa2e, 0xaa32, 0xaa35, 0xaa37, 0xaa40, 0xaa43, 0xaa44, 0xaa4c, 0xaa4e, 0xaa50, 0xaa5a, 0xaa5c, 0xaa60, 0xaa70, 0xaa71, + 0xaa77, 0xaa7a, 0xaa7d, 0xaa7e, 0xaab0, 0xaab4, 0xaab7, 0xaab9, 0xaabd, 0xaac2, 0xaac3, 0xaadb, 0xaade, 0xaae0, 0xaaeb, 0xaaef, 0xaaf2, 0xaaf5, 0xaaf7, 0xab01, + 0xab07, 0xab09, 0xab0f, 0xab11, 0xab17, 0xab20, 0xab27, 0xab28, 0xab2f, 0xab30, 0xab5b, 0xab5c, 0xab60, 0xab69, 0xab6c, 0xab70, 0xabc0, 0xabe3, 0xabeb, 0xabee, + 0xabf0, 0xabfa, 0xac00, 0xd7a4, 0xd7b0, 0xd7c7, 0xd7cb, 0xd7fc, 0xd800, 0xe000, 0xf900, 0xfa6e, 0xfa70, 0xfada, 0xfb00, 0xfb07, 0xfb13, 0xfb18, 0xfb1d, 0xfb1f, + 0xfb29, 0xfb2a, 0xfb37, 0xfb38, 0xfb3d, 0xfb3e, 0xfb46, 0xfbb2, 0xfbc2, 0xfbd3, 0xfd3e, 0xfd40, 0xfd50, 0xfd90, 0xfd92, 0xfdc8, 0xfdf0, 0xfdfc, 0xfdfe, 0xfe00, + 0xfe10, 0xfe16, 0xfe1a, 0xfe20, 0xfe30, 0xfe33, 0xfe35, 0xfe45, 0xfe48, 0xfe4b, 0xfe4e, 0xfe51, 0xfe56, 0xfe59, 0xfe5f, 0xfe62, 0xfe66, 0xfe68, 0xfe6c, 0xfe70, + 0xfe75, 0xfe76, 0xfefd, 0xff01, 0xff02, 0xff07, 0xff0b, 0xff0e, 0xff10, 0xff1a, 0xff1c, 0xff1f, 0xff21, 0xff3b, 0xff3e, 0xff41, 0xff5b, 0xff5e, 0xff5f, 0xff64, + 0xff66, 0xff70, 0xff71, 0xff9e, 0xffa0, 0xffbf, 0xffc2, 0xffc8, 0xffca, 0xffd0, 0xffd2, 0xffd7, 0xffdb, 0xffde, 0xffe1, 0xffe4, 0xffe7, 0xffe9, 0xffed, 0xffef, + 0xfff9, 0xfffc, 0xfffe, +) + +private val rangeCategory = intArrayOf( + 0x000f, 0x030c, 0x6b18, 0x5ab8, 0x5319, 0x0018, 0x0009, 0x0018, 0x0019, 0x0018, 0x0001, 0x5b15, 0x02fb, 0x0002, 0x5b35, 0x0019, 0x000f, 0x030c, 0x001a, 0x6f1c, + 0x67a5, 0x6f90, 0x2f3c, 0x0b6b, 0x6f18, 0x78ab, 0x616b, 0x0001, 0x0019, 0x0001, 0x0002, 0x0019, 0x0002, 0x0022, 0x0022, 0x0022, 0x0041, 0x0442, 0x0041, 0x0441, + 0x0841, 0x0001, 0x0821, 0x0841, 0x0422, 0x0041, 0x0041, 0x0002, 0x0441, 0x0041, 0x0841, 0x0025, 0x0002, 0x0005, 0x0861, 0x0022, 0x0022, 0x0c22, 0x0441, 0x0041, + 0x0002, 0x0422, 0x0022, 0x0001, 0x0041, 0x0002, 0x0005, 0x0002, 0x0004, 0x001b, 0x0004, 0x001b, 0x0004, 0x001b, 0x009b, 0x001b, 0x0006, 0x0041, 0x0764, 0x4622, + 0x0844, 0x0702, 0x0011, 0x001b, 0x0701, 0x0221, 0x0041, 0x0001, 0x0011, 0x0001, 0x0002, 0x0442, 0x0001, 0x0002, 0x0022, 0x0002, 0x6441, 0x0001, 0x0002, 0x0001, + 0x0002, 0x0022, 0x001c, 0x0006, 0x0007, 0x0041, 0x0041, 0x0022, 0x0011, 0x0001, 0x1231, 0x0018, 0x0002, 0x4698, 0x7391, 0x023a, 0x0006, 0x0014, 0x1b06, 0x0011, + 0x0005, 0x0011, 0x0005, 0x0018, 0x0011, 0x0010, 0x0019, 0x6b18, 0x001c, 0x0006, 0x4618, 0x0018, 0x0005, 0x0004, 0x0005, 0x0006, 0x0009, 0x0018, 0x18a5, 0x0005, + 0x0305, 0x0006, 0x0390, 0x0006, 0x1086, 0x0386, 0x0006, 0x0005, 0x0009, 0x0005, 0x7385, 0x0018, 0x1611, 0x0006, 0x0005, 0x0006, 0x0011, 0x0005, 0x0006, 0x0005, + 0x0011, 0x0009, 0x0005, 0x0006, 0x7084, 0x0018, 0x4624, 0x6b46, 0x0005, 0x0006, 0x0004, 0x0006, 0x10c6, 0x0086, 0x0006, 0x0011, 0x0018, 0x0011, 0x0005, 0x0006, + 0x6231, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0006, 0x0010, 0x0006, 0x0008, 0x0005, 0x0106, 0x0005, 0x0008, 0x0006, 0x0008, 0x1908, 0x0005, 0x0006, + 0x0005, 0x0006, 0x0018, 0x0009, 0x0098, 0x0005, 0x2106, 0x0011, 0x0005, 0x4625, 0x4625, 0x0005, 0x0011, 0x0005, 0x44b1, 0x14b1, 0x44a5, 0x14d1, 0x0008, 0x0006, + 0x2231, 0x4628, 0x14c8, 0x0011, 0x0008, 0x0011, 0x14b1, 0x18c5, 0x0011, 0x0009, 0x0005, 0x001a, 0x000b, 0x175c, 0x44d8, 0x18d1, 0x0228, 0x0005, 0x0011, 0x0005, + 0x0011, 0x0005, 0x0011, 0x0005, 0x44a5, 0x00d1, 0x0008, 0x0006, 0x0011, 0x18d1, 0x18d1, 0x4626, 0x00d1, 0x0011, 0x0005, 0x0225, 0x0011, 0x0009, 0x14c6, 0x18a5, + 0x0018, 0x0011, 0x20c6, 0x0011, 0x0005, 0x44a5, 0x0225, 0x0005, 0x0011, 0x0005, 0x44a5, 0x0005, 0x1a31, 0x0005, 0x0008, 0x0006, 0x44c6, 0x2228, 0x4626, 0x0005, + 0x0011, 0x18a5, 0x4626, 0x0009, 0x0358, 0x0011, 0x0005, 0x0006, 0x20d1, 0x0228, 0x0005, 0x4625, 0x4625, 0x0005, 0x0011, 0x0005, 0x44a5, 0x0005, 0x1a31, 0x0105, + 0x1906, 0x44c6, 0x2111, 0x2111, 0x0006, 0x0011, 0x20c6, 0x0011, 0x14b1, 0x18c5, 0x0011, 0x0009, 0x00bc, 0x000b, 0x0011, 0x14d1, 0x0005, 0x0011, 0x0005, 0x44a5, + 0x44a5, 0x1631, 0x0225, 0x4625, 0x14b1, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x1908, 0x0011, 0x0008, 0x4508, 0x44c8, 0x00b1, 0x0011, 0x0008, 0x0011, 0x0009, + 0x000b, 0x001c, 0x035c, 0x0011, 0x2106, 0x00c8, 0x0005, 0x44a5, 0x0225, 0x0005, 0x0011, 0x0005, 0x0011, 0x18c5, 0x2106, 0x4508, 0x0006, 0x0011, 0x0006, 0x0011, + 0x18d1, 0x0005, 0x0011, 0x18a5, 0x4626, 0x0009, 0x0011, 0x0018, 0x000b, 0x18bc, 0x6108, 0x0005, 0x44a5, 0x0225, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, + 0x20a6, 0x0008, 0x00d1, 0x4508, 0x0006, 0x0011, 0x0008, 0x0011, 0x1625, 0x0006, 0x0011, 0x0009, 0x14b1, 0x0011, 0x0006, 0x0008, 0x0005, 0x44a5, 0x0225, 0x0005, + 0x18c5, 0x0008, 0x0006, 0x0111, 0x4508, 0x14c8, 0x463c, 0x1631, 0x20a5, 0x000b, 0x0005, 0x0006, 0x0011, 0x0009, 0x000b, 0x001c, 0x0005, 0x20d1, 0x0228, 0x0005, + 0x0011, 0x0005, 0x0011, 0x0005, 0x44b1, 0x0005, 0x0011, 0x1a31, 0x2231, 0x1908, 0x44c6, 0x0011, 0x0008, 0x0011, 0x0009, 0x2231, 0x0308, 0x0011, 0x0005, 0x18a5, + 0x0006, 0x0011, 0x001a, 0x0005, 0x0004, 0x0006, 0x0018, 0x0009, 0x0018, 0x0011, 0x14b1, 0x0011, 0x0005, 0x0011, 0x0005, 0x0225, 0x0005, 0x18a5, 0x0006, 0x4625, + 0x0005, 0x0091, 0x0006, 0x0011, 0x0009, 0x0011, 0x0005, 0x0011, 0x0005, 0x001c, 0x0018, 0x731c, 0x18dc, 0x001c, 0x0009, 0x000b, 0x00dc, 0x02d5, 0x0008, 0x0005, + 0x0011, 0x0005, 0x0011, 0x0006, 0x0008, 0x0006, 0x60c6, 0x0005, 0x0006, 0x0011, 0x0006, 0x0011, 0x001c, 0x0006, 0x001c, 0x479c, 0x0018, 0x001c, 0x0018, 0x0011, + 0x0005, 0x0008, 0x0006, 0x0008, 0x0006, 0x20c6, 0x18c8, 0x0005, 0x0009, 0x0018, 0x0005, 0x1908, 0x14a6, 0x18a5, 0x14c6, 0x0008, 0x0005, 0x0008, 0x0005, 0x0006, + 0x0005, 0x2106, 0x0006, 0x0008, 0x14c8, 0x0009, 0x0008, 0x7386, 0x0001, 0x0221, 0x0011, 0x0631, 0x0002, 0x0098, 0x0002, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, + 0x0225, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0225, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, + 0x0011, 0x0005, 0x0011, 0x0006, 0x0018, 0x000b, 0x0011, 0x0005, 0x001c, 0x0011, 0x0001, 0x0011, 0x0002, 0x5231, 0x0005, 0x031c, 0x0005, 0x000c, 0x0005, 0x02d5, + 0x0011, 0x0005, 0x0018, 0x000a, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0006, 0x0011, 0x0005, 0x0006, 0x0018, 0x0011, 0x0005, 0x0006, 0x0011, 0x0005, 0x44a5, + 0x0225, 0x0006, 0x0011, 0x0005, 0x20c6, 0x0006, 0x0008, 0x1908, 0x0006, 0x0018, 0x1318, 0x1758, 0x4626, 0x0009, 0x0011, 0x000b, 0x0011, 0x0018, 0x5318, 0x1b18, + 0x40c6, 0x0011, 0x0009, 0x0011, 0x0005, 0x0004, 0x0005, 0x0011, 0x0005, 0x0006, 0x0005, 0x00c5, 0x0011, 0x0005, 0x0011, 0x0005, 0x18d1, 0x2106, 0x1908, 0x2106, + 0x4628, 0x2231, 0x00c8, 0x0008, 0x0006, 0x0011, 0x7231, 0x6311, 0x0009, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0009, 0x000b, 0x0011, + 0x001c, 0x0005, 0x20c6, 0x44c8, 0x6311, 0x0005, 0x00c8, 0x0006, 0x0226, 0x20c8, 0x0006, 0x0008, 0x0006, 0x4626, 0x0009, 0x0011, 0x0009, 0x0011, 0x0018, 0x0004, + 0x0018, 0x0011, 0x0006, 0x1cc6, 0x0011, 0x0006, 0x0008, 0x0005, 0x0106, 0x0006, 0x0106, 0x0008, 0x1908, 0x0005, 0x0011, 0x0009, 0x0018, 0x001c, 0x0006, 0x001c, + 0x0011, 0x20c6, 0x0005, 0x18c8, 0x20c6, 0x18c8, 0x14a6, 0x0009, 0x0005, 0x1906, 0x1908, 0x0006, 0x0008, 0x0011, 0x0018, 0x0005, 0x0008, 0x0006, 0x0008, 0x0006, + 0x0011, 0x0018, 0x0009, 0x0011, 0x0005, 0x0009, 0x0005, 0x0004, 0x0018, 0x0002, 0x0011, 0x0001, 0x0011, 0x0001, 0x0018, 0x0011, 0x0006, 0x0018, 0x0006, 0x0008, + 0x0006, 0x0005, 0x0006, 0x0005, 0x18a5, 0x18c8, 0x0005, 0x0011, 0x0002, 0x0004, 0x0002, 0x0004, 0x0002, 0x0004, 0x0006, 0x0011, 0x0006, 0x0041, 0x0002, 0x0022, + 0x0002, 0x0001, 0x0002, 0x0011, 0x0001, 0x0011, 0x0002, 0x0001, 0x0002, 0x0001, 0x0002, 0x0011, 0x0001, 0x0011, 0x0002, 0x0031, 0x0002, 0x0001, 0x0002, 0x0011, + 0x0002, 0x0003, 0x0002, 0x0003, 0x0002, 0x0003, 0x0002, 0x4442, 0x0001, 0x0b63, 0x001b, 0x0002, 0x4442, 0x0001, 0x6f63, 0x085b, 0x4442, 0x0851, 0x0001, 0x0011, + 0x001b, 0x0002, 0x0001, 0x001b, 0x0a31, 0x4442, 0x0001, 0x6f63, 0x0011, 0x000c, 0x0010, 0x0014, 0x0018, 0x57dd, 0x57dd, 0x0018, 0x01cd, 0x0010, 0x000c, 0x0018, + 0x03dd, 0x0018, 0x5ef8, 0x6718, 0x02d5, 0x0018, 0x5f19, 0x0018, 0x000c, 0x0010, 0x0011, 0x0010, 0x008b, 0x0011, 0x000b, 0x0019, 0x12d5, 0x000b, 0x0019, 0x46d5, + 0x0004, 0x0011, 0x001a, 0x0011, 0x0006, 0x0007, 0x0006, 0x0007, 0x0006, 0x0011, 0x079c, 0x079c, 0x0422, 0x0841, 0x0821, 0x703c, 0x0019, 0x0001, 0x001c, 0x003c, + 0x0001, 0x005c, 0x0001, 0x0002, 0x0005, 0x7382, 0x0422, 0x0019, 0x0001, 0x0002, 0x033c, 0x005c, 0x000b, 0x000a, 0x0041, 0x000a, 0x738b, 0x0011, 0x0019, 0x001c, + 0x0019, 0x001c, 0x679c, 0x001c, 0x0019, 0x001c, 0x673c, 0x033c, 0x001c, 0x0019, 0x001c, 0x02d5, 0x001c, 0x0019, 0x001c, 0x02d5, 0x001c, 0x0019, 0x001c, 0x0019, + 0x001c, 0x0019, 0x001c, 0x0011, 0x001c, 0x0011, 0x000b, 0x001c, 0x000b, 0x001c, 0x0019, 0x001c, 0x0019, 0x001c, 0x0019, 0x001c, 0x0019, 0x001c, 0x02d5, 0x000b, + 0x001c, 0x0019, 0x02d5, 0x0019, 0x02d5, 0x0019, 0x001c, 0x0019, 0x02d5, 0x0019, 0x02d5, 0x0019, 0x02d5, 0x0019, 0x001c, 0x0019, 0x001c, 0x0019, 0x001c, 0x0011, + 0x001c, 0x0011, 0x001c, 0x0001, 0x0011, 0x0002, 0x0622, 0x0001, 0x0002, 0x0022, 0x0001, 0x0822, 0x0002, 0x0004, 0x0001, 0x0041, 0x0002, 0x001c, 0x0041, 0x0006, + 0x0041, 0x0011, 0x0018, 0x2f18, 0x0002, 0x0222, 0x0011, 0x0a31, 0x0005, 0x0011, 0x0304, 0x0011, 0x0006, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, + 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0006, 0x0018, 0x03dd, 0x0018, 0x7bb8, 0x0018, 0x5318, 0x7bb8, 0x7bb8, + 0x02d5, 0x0018, 0x0004, 0x0018, 0x0014, 0x0018, 0x5714, 0x0018, 0x7398, 0x0011, 0x001c, 0x0011, 0x001c, 0x0011, 0x001c, 0x0011, 0x001c, 0x0011, 0x630c, 0x1398, + 0x0145, 0x02d5, 0x001c, 0x02d5, 0x52d5, 0x0396, 0x000a, 0x0006, 0x5108, 0x0004, 0x2b9c, 0x114a, 0x7305, 0x023c, 0x0005, 0x1a31, 0x6f66, 0x1484, 0x0014, 0x0005, + 0x1098, 0x00a4, 0x0011, 0x0005, 0x0011, 0x0005, 0x7391, 0x000b, 0x001c, 0x0005, 0x001c, 0x0011, 0x0005, 0x001c, 0x0011, 0x000b, 0x001c, 0x000b, 0x001c, 0x000b, + 0x001c, 0x000b, 0x001c, 0x000b, 0x001c, 0x0005, 0x001c, 0x0005, 0x0011, 0x0005, 0x0004, 0x0005, 0x0011, 0x001c, 0x0011, 0x0005, 0x0004, 0x0018, 0x0005, 0x0004, + 0x0018, 0x0005, 0x0009, 0x0005, 0x0011, 0x0041, 0x1cc5, 0x60e7, 0x0006, 0x0098, 0x0041, 0x0004, 0x0006, 0x0005, 0x000a, 0x0006, 0x0018, 0x0011, 0x001b, 0x0004, + 0x001b, 0x0041, 0x0002, 0x0022, 0x0004, 0x0002, 0x0022, 0x0041, 0x6f64, 0x0041, 0x0025, 0x0822, 0x0022, 0x0001, 0x0002, 0x0001, 0x0041, 0x0011, 0x0441, 0x0821, + 0x0002, 0x0011, 0x1441, 0x0884, 0x0005, 0x00c5, 0x18a5, 0x18a5, 0x0005, 0x1908, 0x0106, 0x001c, 0x0006, 0x0011, 0x000b, 0x6b9c, 0x0011, 0x0005, 0x0018, 0x0011, + 0x0008, 0x0005, 0x0008, 0x0006, 0x0011, 0x0018, 0x0009, 0x0011, 0x0006, 0x0005, 0x0018, 0x1705, 0x0006, 0x0009, 0x0005, 0x0006, 0x0018, 0x0005, 0x0006, 0x0008, + 0x0011, 0x0018, 0x0005, 0x0011, 0x0006, 0x0008, 0x0005, 0x2106, 0x0006, 0x0008, 0x0006, 0x0008, 0x0018, 0x0091, 0x0009, 0x0011, 0x0018, 0x0005, 0x0086, 0x0005, + 0x0009, 0x0005, 0x0011, 0x0005, 0x0006, 0x2106, 0x2106, 0x0006, 0x0011, 0x0005, 0x0006, 0x0005, 0x0106, 0x0011, 0x0009, 0x0011, 0x0018, 0x0005, 0x0004, 0x0005, + 0x001c, 0x1905, 0x0008, 0x0005, 0x18a6, 0x14a6, 0x0006, 0x0005, 0x18c5, 0x0005, 0x0011, 0x10a5, 0x0018, 0x0005, 0x18c8, 0x6308, 0x1085, 0x00c8, 0x0011, 0x0005, + 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0002, 0x001b, 0x0004, 0x0002, 0x6f64, 0x0011, 0x0002, 0x0005, 0x1908, 0x1918, 0x0011, + 0x0009, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0013, 0x0012, 0x0005, 0x0011, 0x0005, 0x0011, 0x0002, 0x0011, 0x0002, 0x0011, 0x00c5, 0x0005, + 0x0019, 0x0005, 0x0011, 0x0005, 0x0011, 0x1625, 0x0005, 0x001b, 0x0011, 0x0005, 0x02b6, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x039a, 0x0011, 0x0006, + 0x0018, 0x5ab8, 0x0011, 0x0006, 0x5298, 0x0017, 0x02d5, 0x5718, 0x6316, 0x5f18, 0x62f7, 0x4718, 0x5318, 0x02d5, 0x0018, 0x6699, 0x0239, 0x6358, 0x0011, 0x0005, + 0x0011, 0x0005, 0x4231, 0x0018, 0x6b18, 0x5ab8, 0x5319, 0x0018, 0x0009, 0x0018, 0x0019, 0x0018, 0x0001, 0x5b15, 0x02fb, 0x0002, 0x5b35, 0x0019, 0x62d5, 0x0018, + 0x0005, 0x0004, 0x0005, 0x0004, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x0011, 0x0005, 0x4625, 0x44a5, 0x6a31, 0x6f3a, 0x6b5c, 0x0391, 0x0019, 0x001c, 0x0011, + 0x0010, 0x001c, 0x0011, +) + + +private fun categoryValueFrom(code: Int, ch: Int): Int { + return when { + code < 0x20 -> code + code < 0x400 -> if ((ch and 1) == 1) code shr 5 else code and 0x1f + else -> + when (ch % 3) { + 2 -> code shr 10 + 1 -> (code shr 5) and 0x1f + else -> code and 0x1f + } + } +} + +/** + * Returns the Unicode general category of this character as an Int. + */ +internal fun Char.getCategoryValue(): Int { + val ch = this.code + + val index = binarySearchRange(rangeStart, ch) + val start = rangeStart[index] + val code = rangeCategory[index] + val value = categoryValueFrom(code, ch - start) + + return if (value == 17) CharCategory.UNASSIGNED.value else value +} diff --git a/libraries/stdlib/wasm/src/generated/_DigitChars.kt b/libraries/stdlib/wasm/src/generated/_DigitChars.kt new file mode 100644 index 00000000000..e53ebe3539e --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_DigitChars.kt @@ -0,0 +1,57 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +// 37 ranges totally +private val rangeStart = intArrayOf( + 0x0030, 0x0660, 0x06f0, 0x07c0, 0x0966, 0x09e6, 0x0a66, 0x0ae6, 0x0b66, 0x0be6, 0x0c66, 0x0ce6, 0x0d66, 0x0de6, 0x0e50, 0x0ed0, 0x0f20, 0x1040, 0x1090, 0x17e0, + 0x1810, 0x1946, 0x19d0, 0x1a80, 0x1a90, 0x1b50, 0x1bb0, 0x1c40, 0x1c50, 0xa620, 0xa8d0, 0xa900, 0xa9d0, 0xa9f0, 0xaa50, 0xabf0, 0xff10, +) + +/** + * Returns the index of the largest element in [array] smaller or equal to the specified [needle], + * or -1 if [needle] is smaller than the smallest element in [array]. + */ +internal fun binarySearchRange(array: IntArray, needle: Int): Int { + var bottom = 0 + var top = array.size - 1 + var middle = -1 + var value = 0 + while (bottom <= top) { + middle = (bottom + top) / 2 + value = array[middle] + if (needle > value) + bottom = middle + 1 + else if (needle == value) + return middle + else + top = middle - 1 + } + return middle - (if (needle < value) 1 else 0) +} + +/** + * Returns an integer from 0..9 indicating the digit this character represents, + * or -1 if this character is not a digit. + */ +internal fun Char.digitToIntImpl(): Int { + val ch = this.code + val index = binarySearchRange(rangeStart, ch) + val diff = ch - rangeStart[index] + return if (diff < 10) diff else -1 +} + +/** + * Returns `true` if this character is a digit. + */ +internal fun Char.isDigitImpl(): Boolean { + return digitToIntImpl() >= 0 +} diff --git a/libraries/stdlib/wasm/src/generated/_LetterChars.kt b/libraries/stdlib/wasm/src/generated/_LetterChars.kt new file mode 100644 index 00000000000..17e94b9719b --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_LetterChars.kt @@ -0,0 +1,127 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +// 222 ranges totally +private val rangeStart = intArrayOf( + 0x0041, 0x0061, 0x00aa, 0x00ba, 0x00c0, 0x00d8, 0x00df, 0x00f8, 0x00ff, 0x0138, 0x0149, 0x0172, 0x0181, 0x0190, 0x019f, 0x01ae, 0x01bd, 0x01cc, 0x01dd, 0x01e9, + 0x01f8, 0x0231, 0x0240, 0x024f, 0x0294, 0x02a3, 0x02b2, 0x02ec, 0x0370, 0x037f, 0x038e, 0x039d, 0x03ac, 0x03c8, 0x03d7, 0x03ee, 0x03fd, 0x0430, 0x045f, 0x048a, + 0x04c1, 0x04cf, 0x0531, 0x0559, 0x0560, 0x05d0, 0x066e, 0x06e5, 0x06ff, 0x074d, 0x07f4, 0x081a, 0x0840, 0x08b6, 0x0950, 0x0985, 0x09aa, 0x09bd, 0x09df, 0x0a05, + 0x0a2a, 0x0a39, 0x0a72, 0x0a93, 0x0ab5, 0x0ae0, 0x0b0f, 0x0b32, 0x0b5c, 0x0b83, 0x0b92, 0x0ba3, 0x0bd0, 0x0c12, 0x0c58, 0x0c85, 0x0caa, 0x0cde, 0x0d04, 0x0d3d, + 0x0d5f, 0x0d9a, 0x0dc0, 0x0e40, 0x0e86, 0x0ea7, 0x0ec0, 0x0f00, 0x0f88, 0x1050, 0x1065, 0x108e, 0x10a0, 0x10bf, 0x10d0, 0x10f1, 0x1100, 0x124a, 0x125a, 0x1290, + 0x12c0, 0x12d8, 0x1380, 0x13a0, 0x13f8, 0x1401, 0x166f, 0x16f1, 0x1720, 0x176e, 0x17dc, 0x1887, 0x1900, 0x1980, 0x1a20, 0x1b45, 0x1bba, 0x1c5a, 0x1c80, 0x1c90, + 0x1cbd, 0x1ce9, 0x1cfa, 0x1d00, 0x1d2c, 0x1d6a, 0x1d79, 0x1d9b, 0x1e00, 0x1e95, 0x1e9d, 0x1f00, 0x1f0f, 0x1f20, 0x1f2f, 0x1f3e, 0x1f4d, 0x1f5d, 0x1f6c, 0x1f7b, + 0x1f8a, 0x1f99, 0x1fa8, 0x1fae, 0x1fbe, 0x1fd0, 0x1fe0, 0x1ff2, 0x2071, 0x2102, 0x210f, 0x2124, 0x2131, 0x2145, 0x2183, 0x2c00, 0x2c30, 0x2c60, 0x2c6f, 0x2c7e, + 0x2c80, 0x2ce4, 0x2cf3, 0x2d00, 0x2d27, 0x2d36, 0x2da0, 0x2db8, 0x2dd0, 0x3005, 0x3041, 0x30fc, 0x31a0, 0x3400, 0x4e00, 0xa000, 0xa4d0, 0xa610, 0xa640, 0xa66e, + 0xa680, 0xa69c, 0xa722, 0xa731, 0xa770, 0xa77f, 0xa78e, 0xa79d, 0xa7ac, 0xa7bb, 0xa7ca, 0xa7f5, 0xa804, 0xa840, 0xa8fb, 0xa930, 0xa9cf, 0xa9fa, 0xaa44, 0xaa7e, + 0xaab9, 0xaadb, 0xab01, 0xab20, 0xab30, 0xab5c, 0xab70, 0xabc0, 0xd7b0, 0xf900, 0xfa70, 0xfb00, 0xfb13, 0xfb22, 0xfb3e, 0xfb4d, 0xfd50, 0xfe70, 0xff21, 0xff41, + 0xff66, 0xffd2, +) + +private val rangeLength = intArrayOf( + 0x001a, 0x001a, 0x000c, 0x0001, 0x0017, 0x0007, 0x0018, 0x0007, 0x0039, 0x0011, 0x0029, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f, 0x0011, 0x000c, 0x000f, + 0x0039, 0x000f, 0x000f, 0x0045, 0x000f, 0x000f, 0x0033, 0x0003, 0x000e, 0x000e, 0x000f, 0x000f, 0x001c, 0x000f, 0x0017, 0x000f, 0x0033, 0x002f, 0x0023, 0x0037, + 0x000e, 0x0061, 0x0026, 0x0001, 0x0029, 0x007b, 0x0068, 0x0018, 0x0031, 0x009e, 0x0022, 0x000f, 0x0075, 0x0088, 0x0031, 0x0024, 0x0010, 0x0021, 0x001e, 0x0024, + 0x000f, 0x0026, 0x0020, 0x0021, 0x001c, 0x002d, 0x0022, 0x000c, 0x0016, 0x000e, 0x000e, 0x0017, 0x0041, 0x002c, 0x0029, 0x0024, 0x0014, 0x0015, 0x0037, 0x001a, + 0x0038, 0x0024, 0x0074, 0x0045, 0x0020, 0x0017, 0x0020, 0x006d, 0x00b8, 0x0012, 0x001d, 0x0001, 0x001f, 0x000f, 0x0021, 0x000f, 0x0149, 0x000f, 0x0034, 0x002f, + 0x0017, 0x0083, 0x0010, 0x0056, 0x0006, 0x026c, 0x007c, 0x0021, 0x004d, 0x006a, 0x00a9, 0x006f, 0x0075, 0x0097, 0x0114, 0x006b, 0x0096, 0x0024, 0x0009, 0x002b, + 0x0003, 0x000e, 0x0001, 0x002c, 0x003e, 0x000f, 0x0022, 0x0025, 0x0095, 0x0008, 0x0063, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f, + 0x000f, 0x000f, 0x0006, 0x000f, 0x000f, 0x000c, 0x000d, 0x000b, 0x002c, 0x000d, 0x000f, 0x000d, 0x000f, 0x000a, 0x0002, 0x002f, 0x002f, 0x000f, 0x000f, 0x0002, + 0x0064, 0x000f, 0x0001, 0x0026, 0x000f, 0x0061, 0x0017, 0x0017, 0x0060, 0x0038, 0x00ba, 0x0093, 0x0060, 0x19c0, 0x51fd, 0x048d, 0x013d, 0x001c, 0x002e, 0x0012, + 0x001c, 0x0084, 0x000f, 0x003f, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f, 0x000f, 0x0001, 0x000f, 0x001f, 0x00b8, 0x002b, 0x0083, 0x0021, 0x0049, 0x0037, 0x0039, + 0x000a, 0x001a, 0x0016, 0x000f, 0x002b, 0x000e, 0x0050, 0x2be4, 0x004c, 0x016e, 0x006a, 0x0007, 0x000f, 0x001b, 0x000f, 0x01f1, 0x00ac, 0x008d, 0x001a, 0x001a, + 0x006a, 0x000b, +) + +private val rangeCategory = intArrayOf( + 0x0006, 0x0005, 0x1400003, 0x0007, 0x0006, 0x0006, 0x0005, 0x0005, 0x0019, 0x0019, 0x0019, 0x5599a666, 0x696a699a, 0x5a56a69a, 0x5966999a, 0x6d699a9a, 0x79e7bfd5, 0x0019, 0x0019, 0x699e5999, + 0x0016, 0x5a695559, 0x66666a99, 0x0005, 0x55555557, 0x7d555555, 0x70c0840, 0x0013, 0x15706366, 0x18a88002, 0x6aaaaa9a, 0x6aaaa2aa, 0x0005, 0x56a59555, 0x0019, 0x56986556, 0x0006, 0x0005, 0x0019, 0x0016, + 0x0016, 0x0019, 0x0006, 0x0007, 0x0005, 0x1684086c, 0xe30208, 0x5020e08, 0x812004, 0xc011764, 0x2810808, 0x1811204, 0x1a8b0e64, 0x1b67848, 0x78a0e04, 0x1020420, 0x181021c, 0x6812004, 0x5021c0c, 0x1020818, + 0x73cf3fff, 0x843e04, 0x89200c, 0x870258, 0x9010614, 0x5812e08, 0x960408, 0x1850208, 0x7830208, 0x1860204, 0x1f33c0ff, 0x1830608, 0x886804, 0x190025c, 0xf020a0c, 0x830220, 0x1850228, 0x7820204, 0x830224, 0x2812004, + 0x286300c, 0x890260, 0xb0741c, 0x82741c, 0x980214, 0x4820228, 0xa810214, 0x887e04, 0xa2be614, 0x1840818, 0x2030e08, 0x0007, 0x0006, 0x60022aaa, 0x0005, 0x55c55555, 0x0007, 0x870410, 0xa90410, 0x1040284, + 0x1040204, 0x10402e4, 0x0007, 0x0006, 0x0005, 0x0007, 0x29a0244, 0x8d0e20, 0x7121c48, 0x11b41e0c, 0x3d98604, 0x2810288, 0x11e627c, 0x1b1a08b0, 0x2e81a4d4, 0x69e6e1c, 0x14a434b0, 0x0007, 0x0005, 0x0006, + 0x0006, 0x860210, 0x0007, 0x0005, 0x0007, 0x75555557, 0x0005, 0x0007, 0x0016, 0x0005, 0x0019, 0x6aaa5555, 0x6aa81556, 0x6aaa5555, 0x6aa95556, 0x6aa0555a, 0x62155542, 0x6a955562, 0x555555aa, 0x7d555415, + 0x75555fff, 0x55557fff, 0x0007, 0x7aa5155f, 0x7aa51501, 0x1aa5055, 0x6aa5555, 0x7aa515, 0x8011a04, 0x5a90802, 0x6aa021a9, 0x64aa222, 0x6941ff6a, 0x140156, 0x0016, 0x0006, 0x0005, 0x699996a6, 0x7d55659a, 0x0006, + 0x0016, 0x60198001, 0x0005, 0x0005, 0x7ffc1001, 0x8010ec8, 0x87021c, 0x87021c, 0x2807021c, 0x2855408, 0x830d58, 0xab0a10, 0x6080, 0x0007, 0x0007, 0x0007, 0x04b8, 0x1440, 0x0016, 0x2004, + 0x0016, 0x18c60408, 0x56666666, 0x0019, 0x69995557, 0x660d9999, 0x6666566d, 0x69999999, 0x6666aa6a, 0x66a98199, 0x0005, 0x73fff7f6, 0x840208, 0x1f321cd0, 0x5820204, 0x39d325c, 0x852004, 0xba90214, 0x1972820, 0x18102c8, + 0x810414, 0x38b040c, 0x1060418, 0x021c, 0x0005, 0x1d5555ff, 0x0005, 0x3a8c, 0x085c, 0x0007, 0x0007, 0x0005, 0x7f300155, 0x8d021c, 0x7fff3cf3, 0x4394, 0x14360500, 0x0214, 0x0006, 0x0005, + 0x1060764, 0x0418, +) + + +/** + * Returns `true` if this character is a letter. + */ +internal fun Char.isLetterImpl(): Boolean { + return getLetterType() != 0 +} + +/** + * Returns `true` if this character is a lower case letter, or it has contributory property `Other_Lowercase`. + */ +internal fun Char.isLowerCaseImpl(): Boolean { + return getLetterType() == 1 || code.isOtherLowercase() +} + +/** + * Returns `true` if this character is an upper case letter, or it has contributory property `Other_Uppercase`. + */ +internal fun Char.isUpperCaseImpl(): Boolean { + return getLetterType() == 2 || code.isOtherUppercase() +} + +/** + * Returns + * - `1` if the character is a lower case letter, + * - `2` if the character is an upper case letter, + * - `3` if the character is a letter but not a lower or upper case letter, + * - `0` otherwise. + */ +private fun Char.getLetterType(): Int { + val ch = this.code + val index = binarySearchRange(rangeStart, ch) + + val rangeStart = rangeStart[index] + val rangeEnd = rangeStart + rangeLength[index] - 1 + val code = rangeCategory[index] + + if (ch > rangeEnd) { + return 0 + } + + val lastTwoBits = code and 0x3 + + if (lastTwoBits == 0) { // gap pattern + var shift = 2 + var threshold = rangeStart + for (i in 0..1) { + threshold += (code shr shift) and 0x7f + if (threshold > ch) { + return 3 + } + shift += 7 + threshold += (code shr shift) and 0x7f + if (threshold > ch) { + return 0 + } + shift += 7 + } + return 3 + } + + if (code <= 0x7) { + return lastTwoBits + } + + val distance = (ch - rangeStart) + val shift = if (code <= 0x1F) distance % 2 else distance + return (code shr (2 * shift)) and 0x3 +} diff --git a/libraries/stdlib/wasm/src/generated/_LowercaseMappings.kt b/libraries/stdlib/wasm/src/generated/_LowercaseMappings.kt new file mode 100644 index 00000000000..5e88a6e4c06 --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_LowercaseMappings.kt @@ -0,0 +1,51 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +// 174 ranges totally +private val rangeStart = intArrayOf( + 0x0041, 0x00c0, 0x00d8, 0x0100, 0x0130, 0x0132, 0x0139, 0x014a, 0x0178, 0x0179, 0x0181, 0x0182, 0x0186, 0x0187, 0x0189, 0x018b, 0x018e, 0x018f, 0x0190, 0x0191, + 0x0193, 0x0194, 0x0196, 0x0197, 0x0198, 0x019c, 0x019d, 0x019f, 0x01a0, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae, 0x01af, 0x01b1, 0x01b3, 0x01b7, 0x01b8, 0x01c4, + 0x01c5, 0x01c7, 0x01c8, 0x01ca, 0x01cb, 0x01de, 0x01f1, 0x01f2, 0x01f6, 0x01f7, 0x01f8, 0x0220, 0x0222, 0x023a, 0x023b, 0x023d, 0x023e, 0x0241, 0x0243, 0x0244, + 0x0245, 0x0246, 0x0370, 0x0376, 0x037f, 0x0386, 0x0388, 0x038c, 0x038e, 0x0391, 0x03a3, 0x03cf, 0x03d8, 0x03f4, 0x03f7, 0x03f9, 0x03fa, 0x03fd, 0x0400, 0x0410, + 0x0460, 0x048a, 0x04c0, 0x04c1, 0x04d0, 0x0531, 0x10a0, 0x10c7, 0x13a0, 0x13f0, 0x1c90, 0x1cbd, 0x1e00, 0x1e9e, 0x1ea0, 0x1f08, 0x1f18, 0x1f28, 0x1f38, 0x1f48, + 0x1f59, 0x1f68, 0x1f88, 0x1f98, 0x1fa8, 0x1fb8, 0x1fba, 0x1fbc, 0x1fc8, 0x1fcc, 0x1fd8, 0x1fda, 0x1fe8, 0x1fea, 0x1fec, 0x1ff8, 0x1ffa, 0x1ffc, 0x2126, 0x212a, + 0x212b, 0x2132, 0x2160, 0x2183, 0x24b6, 0x2c00, 0x2c60, 0x2c62, 0x2c63, 0x2c64, 0x2c67, 0x2c6d, 0x2c6e, 0x2c6f, 0x2c70, 0x2c72, 0x2c7e, 0x2c80, 0x2ceb, 0x2cf2, + 0xa640, 0xa680, 0xa722, 0xa732, 0xa779, 0xa77d, 0xa77e, 0xa78b, 0xa78d, 0xa790, 0xa796, 0xa7aa, 0xa7ab, 0xa7ac, 0xa7ad, 0xa7ae, 0xa7b0, 0xa7b1, 0xa7b2, 0xa7b3, + 0xa7b4, 0xa7c2, 0xa7c4, 0xa7c5, 0xa7c6, 0xa7c7, 0xa7f5, 0xff21, 0x10400, 0x104b0, 0x10c80, 0x118a0, 0x16e40, 0x1e900, +) + +private val rangeLength = intArrayOf( + 0x2011a, 0x20117, 0x20107, 0x122f, -0xc6eff, 0x1205, 0x120f, 0x122d, -0x78eff, 0x1205, 0xd2101, 0x1203, 0xce101, 0x1101, 0xcd102, 0x1101, 0x4f101, 0xca101, 0xcb101, 0x1101, + 0xcd101, 0xcf101, 0xd3101, 0xd1101, 0x1101, 0xd3101, 0xd5101, 0xd6101, 0x1205, 0xda101, 0x1101, 0xda101, 0x1101, 0xda101, 0x1101, 0xd9102, 0x1203, 0xdb101, 0x1405, 0x2101, + 0x1101, 0x2101, 0x1101, 0x2101, 0x1211, 0x1211, 0x2101, 0x1203, -0x60eff, -0x37eff, 0x1227, -0x81eff, 0x1211, 0x2a2b101, 0x1101, -0xa2eff, 0x2a28101, 0x1101, -0xc2eff, 0x45101, + 0x47101, 0x1209, 0x1203, 0x1101, 0x74101, 0x26101, 0x25103, 0x40101, 0x3f102, 0x20111, 0x20109, 0x8101, 0x1217, -0x3beff, 0x1101, -0x6eff, 0x1101, -0x81efd, 0x50110, 0x20120, + 0x1221, 0x1235, 0xf101, 0x120d, 0x125f, 0x30126, 0x1c60126, 0x1c60607, 0x97d0150, 0x8106, -0xbbfed5, -0xbbfefd, 0x1295, -0x1dbeeff, 0x125f, -0x7ef8, -0x7efa, -0x7ef8, -0x7ef8, -0x7efa, + -0x7df9, -0x7ef8, -0x7ef8, -0x7ef8, -0x7ef8, -0x7efe, -0x49efe, -0x8eff, -0x55efc, -0x8eff, -0x7efe, -0x63efe, -0x7efe, -0x6fefe, -0x6eff, -0x7fefe, -0x7defe, -0x8eff, -0x1d5ceff, -0x20beeff, + -0x2045eff, 0x1c101, 0x10110, 0x1101, 0x1a11a, 0x3012f, 0x1101, -0x29f6eff, -0xee5eff, -0x29e6eff, 0x1205, -0x2a1beff, -0x29fceff, -0x2a1eeff, -0x2a1deff, 0x1304, -0x2a3eefe, 0x1263, 0x1203, 0x1101, + 0x122d, 0x121b, 0x120d, 0x123d, 0x1203, -0x8a03eff, 0x1209, 0x1101, -0xa527eff, 0x1203, 0x1213, -0xa543eff, -0xa54eeff, -0xa54aeff, -0xa540eff, -0xa543eff, -0xa511eff, -0xa529eff, -0xa514eff, 0x3a0101, + 0x120b, 0x1101, -0x2feff, -0xa542eff, -0x8a37eff, 0x1203, 0x1101, 0x2011a, 0x28128, 0x28124, 0x40133, 0x20120, 0x20120, 0x22122, +) + +internal fun Int.lowercaseCodePoint(): Int { + if (this in 0x41..0x5a) { + return this + 32 + } + if (this < 0x80) { + return this + } + val index = binarySearchRange(rangeStart, this) + return equalDistanceMapping(this, rangeStart[index], rangeLength[index]) +} + +internal fun Char.lowercaseCharImpl(): Char { + return code.lowercaseCodePoint().toChar() +} diff --git a/libraries/stdlib/wasm/src/generated/_OneToManyLowercaseMappings.kt b/libraries/stdlib/wasm/src/generated/_OneToManyLowercaseMappings.kt new file mode 100644 index 00000000000..334d1245de2 --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_OneToManyLowercaseMappings.kt @@ -0,0 +1,19 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +// 1 mappings totally +internal fun Char.lowercaseImpl(): String { + if (this == '\u0130') { + return "\u0069\u0307" + } + return lowercaseCharImpl().toString() +} diff --git a/libraries/stdlib/wasm/src/generated/_OneToManyUppercaseMappings.kt b/libraries/stdlib/wasm/src/generated/_OneToManyUppercaseMappings.kt new file mode 100644 index 00000000000..2c2e70792f2 --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_OneToManyUppercaseMappings.kt @@ -0,0 +1,46 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +// 102 mappings totally +private val keys = intArrayOf( + 0x00df, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96, 0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56, 0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, + 0x1f85, 0x1f86, 0x1f87, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97, 0x1f98, + 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, + 0x1fad, 0x1fae, 0x1faf, 0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3, 0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7, 0x1fe2, + 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4, 0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04, 0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, + 0xfb16, 0xfb17, +) +private val values = arrayOf( + "\u0053\u0053", "\u02BC\u004E", "\u004A\u030C", "\u0399\u0308\u0301", "\u03A5\u0308\u0301", "\u0535\u0552", "\u0048\u0331", "\u0054\u0308", "\u0057\u030A", "\u0059\u030A", "\u0041\u02BE", "\u03A5\u0313", "\u03A5\u0313\u0300", "\u03A5\u0313\u0301", "\u03A5\u0313\u0342", "\u1F08\u0399", "\u1F09\u0399", "\u1F0A\u0399", "\u1F0B\u0399", "\u1F0C\u0399", + "\u1F0D\u0399", "\u1F0E\u0399", "\u1F0F\u0399", "\u1F08\u0399", "\u1F09\u0399", "\u1F0A\u0399", "\u1F0B\u0399", "\u1F0C\u0399", "\u1F0D\u0399", "\u1F0E\u0399", "\u1F0F\u0399", "\u1F28\u0399", "\u1F29\u0399", "\u1F2A\u0399", "\u1F2B\u0399", "\u1F2C\u0399", "\u1F2D\u0399", "\u1F2E\u0399", "\u1F2F\u0399", "\u1F28\u0399", + "\u1F29\u0399", "\u1F2A\u0399", "\u1F2B\u0399", "\u1F2C\u0399", "\u1F2D\u0399", "\u1F2E\u0399", "\u1F2F\u0399", "\u1F68\u0399", "\u1F69\u0399", "\u1F6A\u0399", "\u1F6B\u0399", "\u1F6C\u0399", "\u1F6D\u0399", "\u1F6E\u0399", "\u1F6F\u0399", "\u1F68\u0399", "\u1F69\u0399", "\u1F6A\u0399", "\u1F6B\u0399", "\u1F6C\u0399", + "\u1F6D\u0399", "\u1F6E\u0399", "\u1F6F\u0399", "\u1FBA\u0399", "\u0391\u0399", "\u0386\u0399", "\u0391\u0342", "\u0391\u0342\u0399", "\u0391\u0399", "\u1FCA\u0399", "\u0397\u0399", "\u0389\u0399", "\u0397\u0342", "\u0397\u0342\u0399", "\u0397\u0399", "\u0399\u0308\u0300", "\u0399\u0308\u0301", "\u0399\u0342", "\u0399\u0308\u0342", "\u03A5\u0308\u0300", + "\u03A5\u0308\u0301", "\u03A1\u0313", "\u03A5\u0342", "\u03A5\u0308\u0342", "\u1FFA\u0399", "\u03A9\u0399", "\u038F\u0399", "\u03A9\u0342", "\u03A9\u0342\u0399", "\u03A9\u0399", "\u0046\u0046", "\u0046\u0049", "\u0046\u004C", "\u0046\u0046\u0049", "\u0046\u0046\u004C", "\u0053\u0054", "\u0053\u0054", "\u0544\u0546", "\u0544\u0535", "\u0544\u053B", + "\u054E\u0546", "\u0544\u053D", +) + +internal fun Char.oneToManyUppercase(): String? { + if (this < '\u00df') { + return null + } + + val code = this.code + val index = binarySearchRange(keys, code) + if (keys[index] == code) { + return values[index] + } + return null +} + +internal fun Char.uppercaseImpl(): String { + return oneToManyUppercase() ?: uppercaseCharImpl().toString() +} diff --git a/libraries/stdlib/wasm/src/generated/_OtherLowercaseChars.kt b/libraries/stdlib/wasm/src/generated/_OtherLowercaseChars.kt new file mode 100644 index 00000000000..13bfe14ac98 --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_OtherLowercaseChars.kt @@ -0,0 +1,23 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +private val otherLowerStart = intArrayOf( + 0x00aa, 0x00ba, 0x02b0, 0x02c0, 0x02e0, 0x0345, 0x037a, 0x1d2c, 0x1d78, 0x1d9b, 0x2071, 0x207f, 0x2090, 0x2170, 0x24d0, 0x2c7c, 0xa69c, 0xa770, 0xa7f8, 0xab5c, +) +private val otherLowerLength = intArrayOf( + 1, 1, 9, 2, 5, 1, 1, 63, 1, 37, 1, 1, 13, 16, 26, 2, 2, 1, 2, 4, +) + +internal fun Int.isOtherLowercase(): Boolean { + val index = binarySearchRange(otherLowerStart, this) + return index >= 0 && this < otherLowerStart[index] + otherLowerLength[index] +} diff --git a/libraries/stdlib/wasm/src/generated/_OtherUppercaseChars.kt b/libraries/stdlib/wasm/src/generated/_OtherUppercaseChars.kt new file mode 100644 index 00000000000..d26518cb207 --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_OtherUppercaseChars.kt @@ -0,0 +1,16 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +internal fun Int.isOtherUppercase(): Boolean { + return this in 0x2160..0x216f + || this in 0x24b6..0x24cf +} diff --git a/libraries/stdlib/wasm/src/generated/_StringLowercase.kt b/libraries/stdlib/wasm/src/generated/_StringLowercase.kt new file mode 100644 index 00000000000..3a5a82f611a --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_StringLowercase.kt @@ -0,0 +1,156 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +private val casedStart = intArrayOf( + 0x10400, 0x104b0, 0x104d8, 0x10c80, 0x10cc0, 0x118a0, 0x16e40, 0x1d400, 0x1d456, 0x1d49e, 0x1d4a2, 0x1d4a5, 0x1d4a9, 0x1d4ae, 0x1d4bb, 0x1d4bd, 0x1d4c5, 0x1d507, 0x1d50d, 0x1d516, + 0x1d51e, 0x1d53b, 0x1d540, 0x1d546, 0x1d54a, 0x1d552, 0x1d6a8, 0x1d6c2, 0x1d6dc, 0x1d6fc, 0x1d716, 0x1d736, 0x1d750, 0x1d770, 0x1d78a, 0x1d7aa, 0x1d7c4, 0x1e900, +) +private val casedEnd = intArrayOf( + 0x1044f, 0x104d3, 0x104fb, 0x10cb2, 0x10cf2, 0x118df, 0x16e7f, 0x1d454, 0x1d49c, 0x1d49f, 0x1d4a2, 0x1d4a6, 0x1d4ac, 0x1d4b9, 0x1d4bb, 0x1d4c3, 0x1d505, 0x1d50a, 0x1d514, 0x1d51c, + 0x1d539, 0x1d53e, 0x1d544, 0x1d546, 0x1d550, 0x1d6a5, 0x1d6c0, 0x1d6da, 0x1d6fa, 0x1d714, 0x1d734, 0x1d74e, 0x1d76e, 0x1d788, 0x1d7a8, 0x1d7c2, 0x1d7cb, 0x1e943, +) + +// Lu + Ll + Lt + Other_Lowercase + Other_Uppercase (PropList.txt of Unicode Character Database files) +// Declared internal for testing +internal fun Int.isCased(): Boolean { + if (this <= Char.MAX_VALUE.code) { + when (toChar().getCategoryValue()) { + CharCategory.UPPERCASE_LETTER.value, + CharCategory.LOWERCASE_LETTER.value, + CharCategory.TITLECASE_LETTER.value -> return true + } + } + if (isOtherUppercase() || isOtherLowercase()) { + return true + } + val index = binarySearchRange(casedStart, this) + return index >= 0 && this <= casedEnd[index] +} + +private val caseIgnorableStart = intArrayOf( + 0x0027, 0x002e, 0x003a, 0x00b7, 0x0387, 0x055f, 0x05f4, 0x2018, 0x2019, 0x2024, 0x2027, 0xfe13, 0xfe52, 0xfe55, 0xff07, 0xff0e, 0xff1a, 0x101fd, 0x102e0, 0x10376, + 0x10a01, 0x10a05, 0x10a0c, 0x10a38, 0x10a3f, 0x10ae5, 0x10d24, 0x10eab, 0x10f46, 0x11001, 0x11038, 0x1107f, 0x110b3, 0x110b9, 0x110bd, 0x110cd, 0x11100, 0x11127, 0x1112d, 0x11173, + 0x11180, 0x111b6, 0x111c9, 0x111cf, 0x1122f, 0x11234, 0x11236, 0x1123e, 0x112df, 0x112e3, 0x11300, 0x1133b, 0x11340, 0x11366, 0x11370, 0x11438, 0x11442, 0x11446, 0x1145e, 0x114b3, + 0x114ba, 0x114bf, 0x114c2, 0x115b2, 0x115bc, 0x115bf, 0x115dc, 0x11633, 0x1163d, 0x1163f, 0x116ab, 0x116ad, 0x116b0, 0x116b7, 0x1171d, 0x11722, 0x11727, 0x1182f, 0x11839, 0x1193b, + 0x1193e, 0x11943, 0x119d4, 0x119da, 0x119e0, 0x11a01, 0x11a33, 0x11a3b, 0x11a47, 0x11a51, 0x11a59, 0x11a8a, 0x11a98, 0x11c30, 0x11c38, 0x11c3f, 0x11c92, 0x11caa, 0x11cb2, 0x11cb5, + 0x11d31, 0x11d3a, 0x11d3c, 0x11d3f, 0x11d47, 0x11d90, 0x11d95, 0x11d97, 0x11ef3, 0x13430, 0x16af0, 0x16b30, 0x16b40, 0x16f4f, 0x16f8f, 0x16fe0, 0x16fe3, 0x1bc9d, 0x1bca0, 0x1d167, + 0x1d173, 0x1d185, 0x1d1aa, 0x1d242, 0x1da00, 0x1da3b, 0x1da75, 0x1da84, 0x1da9b, 0x1daa1, 0x1e000, 0x1e008, 0x1e01b, 0x1e023, 0x1e026, 0x1e130, 0x1e2ec, 0x1e8d0, 0x1e944, 0x1f3fb, + 0xe0001, 0xe0020, 0xe0100, +) +private val caseIgnorableEnd = intArrayOf( + 0x0027, 0x002e, 0x003a, 0x00b7, 0x0387, 0x055f, 0x05f4, 0x2018, 0x2019, 0x2024, 0x2027, 0xfe13, 0xfe52, 0xfe55, 0xff07, 0xff0e, 0xff1a, 0x101fd, 0x102e0, 0x1037a, + 0x10a03, 0x10a06, 0x10a0f, 0x10a3a, 0x10a3f, 0x10ae6, 0x10d27, 0x10eac, 0x10f50, 0x11001, 0x11046, 0x11081, 0x110b6, 0x110ba, 0x110bd, 0x110cd, 0x11102, 0x1112b, 0x11134, 0x11173, + 0x11181, 0x111be, 0x111cc, 0x111cf, 0x11231, 0x11234, 0x11237, 0x1123e, 0x112df, 0x112ea, 0x11301, 0x1133c, 0x11340, 0x1136c, 0x11374, 0x1143f, 0x11444, 0x11446, 0x1145e, 0x114b8, + 0x114ba, 0x114c0, 0x114c3, 0x115b5, 0x115bd, 0x115c0, 0x115dd, 0x1163a, 0x1163d, 0x11640, 0x116ab, 0x116ad, 0x116b5, 0x116b7, 0x1171f, 0x11725, 0x1172b, 0x11837, 0x1183a, 0x1193c, + 0x1193e, 0x11943, 0x119d7, 0x119db, 0x119e0, 0x11a0a, 0x11a38, 0x11a3e, 0x11a47, 0x11a56, 0x11a5b, 0x11a96, 0x11a99, 0x11c36, 0x11c3d, 0x11c3f, 0x11ca7, 0x11cb0, 0x11cb3, 0x11cb6, + 0x11d36, 0x11d3a, 0x11d3d, 0x11d45, 0x11d47, 0x11d91, 0x11d95, 0x11d97, 0x11ef4, 0x13438, 0x16af4, 0x16b36, 0x16b43, 0x16f4f, 0x16f9f, 0x16fe1, 0x16fe4, 0x1bc9e, 0x1bca3, 0x1d169, + 0x1d182, 0x1d18b, 0x1d1ad, 0x1d244, 0x1da36, 0x1da6c, 0x1da75, 0x1da84, 0x1da9f, 0x1daaf, 0x1e006, 0x1e018, 0x1e021, 0x1e024, 0x1e02a, 0x1e13d, 0x1e2ef, 0x1e8d6, 0x1e94b, 0x1f3ff, + 0xe0001, 0xe007f, 0xe01ef, +) + +// Mn + Me + Cf + Lm + Sk + Word_Break=MidLetter + Word_Break=MidNumLet + Word_Break=Single_Quote (WordBreakProperty.txt of Unicode Character Database files) +// Declared internal for testing +internal fun Int.isCaseIgnorable(): Boolean { + if (this <= Char.MAX_VALUE.code) { + when (toChar().getCategoryValue()) { + CharCategory.NON_SPACING_MARK.value, + CharCategory.ENCLOSING_MARK.value, + CharCategory.FORMAT.value, + CharCategory.MODIFIER_LETTER.value, + CharCategory.MODIFIER_SYMBOL.value -> return true + } + } + val index = binarySearchRange(caseIgnorableStart, this) + return index >= 0 && this <= caseIgnorableEnd[index] +} + +private fun String.codePointBefore(index: Int): Int { + val low = this[index] + if (low.isLowSurrogate() && index - 1 >= 0) { + val high = this[index - 1] + if (high.isHighSurrogate()) { + return Char.toCodePoint(high, low) + } + } + return low.code +} + +// \p{cased} (\p{case-ignorable})* Sigma !( (\p{case-ignorable})* \p{cased} ) +// The regular-expression operator * is "possessive", consuming as many characters as possible, with no backup. +// This is significant in the case of Final_Sigma, because the sets of case-ignorable and cased characters are not disjoint. +private fun String.isFinalSigmaAt(index: Int): Boolean { + if (this[index] == '\u03A3' && index > 0) { + var i = index - 1 + var codePoint: Int = 0 + while (i >= 0) { + codePoint = codePointBefore(i) + if (codePoint.isCaseIgnorable()) { + i -= codePoint.charCount() + } else { + break + } + } + if (i >= 0 && codePoint.isCased()) { + var j = index + 1 + while (j < length) { + codePoint = codePointAt(j) + if (codePoint.isCaseIgnorable()) { + j += codePoint.charCount() + } else { + break + } + } + if (j >= length || !codePoint.isCased()) { + return true + } + } + } + return false +} + +internal fun String.lowercaseImpl(): String { + var unchangedIndex = 0 + while (unchangedIndex < this.length) { + val codePoint = codePointAt(unchangedIndex) + if (codePoint.lowercaseCodePoint() != codePoint) { // '\u0130' and '\u03A3' have lowercase corresponding mapping in UnicodeData.txt, no need to check them separately + break + } + unchangedIndex += codePoint.charCount() + } + if (unchangedIndex == this.length) { + return this + } + + val sb = StringBuilder(this.length) + sb.appendRange(this, 0, unchangedIndex) + + var index = unchangedIndex + + while (index < this.length) { + if (this[index] == '\u0130') { + sb.append("\u0069\u0307") + index++ + continue + } + if (isFinalSigmaAt(index)) { + sb.append('\u03C2') + index++ + continue + } + val codePoint = codePointAt(index) + val lowercaseCodePoint = codePoint.lowercaseCodePoint() + sb.appendCodePoint(lowercaseCodePoint) + index += codePoint.charCount() + } + + return sb.toString() +} diff --git a/libraries/stdlib/wasm/src/generated/_StringUppercase.kt b/libraries/stdlib/wasm/src/generated/_StringUppercase.kt new file mode 100644 index 00000000000..cd1f2b04424 --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_StringUppercase.kt @@ -0,0 +1,67 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +internal fun String.codePointAt(index: Int): Int { + val high = this[index] + if (high.isHighSurrogate() && index + 1 < this.length) { + val low = this[index + 1] + if (low.isLowSurrogate()) { + return Char.toCodePoint(high, low) + } + } + return high.code +} + +internal fun Int.charCount(): Int = if (this >= Char.MIN_SUPPLEMENTARY_CODE_POINT) 2 else 1 + +internal fun StringBuilder.appendCodePoint(codePoint: Int) { + if (codePoint < Char.MIN_SUPPLEMENTARY_CODE_POINT) { + append(codePoint.toChar()) + } else { + append(Char.MIN_HIGH_SURROGATE + ((codePoint - 0x10000) shr 10)) + append(Char.MIN_LOW_SURROGATE + (codePoint and 0x3ff)) + } +} + +internal fun String.uppercaseImpl(): String { + var unchangedIndex = 0 + while (unchangedIndex < this.length) { + val codePoint = codePointAt(unchangedIndex) + if (this[unchangedIndex].oneToManyUppercase() != null || codePoint.uppercaseCodePoint() != codePoint) { + break + } + unchangedIndex += codePoint.charCount() + } + if (unchangedIndex == this.length) { + return this + } + + val sb = StringBuilder(this.length) + sb.appendRange(this, 0, unchangedIndex) + + var index = unchangedIndex + + while (index < this.length) { + val specialCasing = this[index].oneToManyUppercase() + if (specialCasing != null) { + sb.append(specialCasing) + index++ + continue + } + val codePoint = codePointAt(index) + val uppercaseCodePoint = codePoint.uppercaseCodePoint() + sb.appendCodePoint(uppercaseCodePoint) + index += codePoint.charCount() + } + + return sb.toString() +} diff --git a/libraries/stdlib/wasm/src/generated/_TitlecaseMappings.kt b/libraries/stdlib/wasm/src/generated/_TitlecaseMappings.kt new file mode 100644 index 00000000000..281a620ec0e --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_TitlecaseMappings.kt @@ -0,0 +1,25 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +// 4 ranges totally +internal fun Char.titlecaseCharImpl(): Char { + val code = this.code + // Letters repeating sequence and code of the Lt is a multiple of 3, e.g. <DŽ, Dž, dž> + if (code in 0x01c4..0x01cc || code in 0x01f1..0x01f3) { + return (3 * ((code + 1) / 3)).toChar() + } + // Lower case letters whose title case mapping equivalent is equal to the original letter + if (code in 0x10d0..0x10fa || code in 0x10fd..0x10ff) { + return this + } + return uppercaseChar() +} \ No newline at end of file diff --git a/libraries/stdlib/wasm/src/generated/_UppercaseMappings.kt b/libraries/stdlib/wasm/src/generated/_UppercaseMappings.kt new file mode 100644 index 00000000000..a4413ba973d --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_UppercaseMappings.kt @@ -0,0 +1,70 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +// 189 ranges totally +private val rangeStart = intArrayOf( + 0x0061, 0x00b5, 0x00e0, 0x00f8, 0x00ff, 0x0101, 0x0131, 0x0133, 0x013a, 0x014b, 0x017a, 0x017f, 0x0180, 0x0183, 0x0188, 0x0192, 0x0195, 0x0199, 0x019a, 0x019e, + 0x01a1, 0x01a8, 0x01b0, 0x01b6, 0x01bd, 0x01bf, 0x01c5, 0x01c6, 0x01c8, 0x01c9, 0x01cb, 0x01cc, 0x01ce, 0x01dd, 0x01df, 0x01f2, 0x01f3, 0x01f5, 0x01fb, 0x0223, + 0x023c, 0x023f, 0x0242, 0x0249, 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0256, 0x0259, 0x025b, 0x025c, 0x0260, 0x0261, 0x0263, 0x0265, 0x0266, 0x0268, 0x0269, + 0x026a, 0x026b, 0x026c, 0x026f, 0x0271, 0x0272, 0x0275, 0x027d, 0x0280, 0x0282, 0x0283, 0x0287, 0x0288, 0x0289, 0x028a, 0x028c, 0x0292, 0x029d, 0x029e, 0x0345, + 0x0371, 0x0377, 0x037b, 0x03ac, 0x03ad, 0x03b1, 0x03c2, 0x03c3, 0x03cc, 0x03cd, 0x03d0, 0x03d1, 0x03d5, 0x03d6, 0x03d7, 0x03d9, 0x03f0, 0x03f1, 0x03f2, 0x03f3, + 0x03f5, 0x03f8, 0x0430, 0x0450, 0x0461, 0x048b, 0x04c2, 0x04cf, 0x04d1, 0x0561, 0x10d0, 0x10fd, 0x13f8, 0x1c80, 0x1c81, 0x1c82, 0x1c83, 0x1c85, 0x1c86, 0x1c87, + 0x1c88, 0x1d79, 0x1d7d, 0x1d8e, 0x1e01, 0x1e9b, 0x1ea1, 0x1f00, 0x1f10, 0x1f20, 0x1f30, 0x1f40, 0x1f51, 0x1f60, 0x1f70, 0x1f72, 0x1f76, 0x1f78, 0x1f7a, 0x1f7c, + 0x1f80, 0x1f90, 0x1fa0, 0x1fb0, 0x1fb3, 0x1fbe, 0x1fc3, 0x1fd0, 0x1fe0, 0x1fe5, 0x1ff3, 0x214e, 0x2170, 0x2184, 0x24d0, 0x2c30, 0x2c61, 0x2c65, 0x2c66, 0x2c68, + 0x2c73, 0x2c81, 0x2cec, 0x2cf3, 0x2d00, 0x2d27, 0xa641, 0xa681, 0xa723, 0xa733, 0xa77a, 0xa77f, 0xa78c, 0xa793, 0xa794, 0xa797, 0xa7b5, 0xa7c3, 0xa7ca, 0xa7f6, + 0xab53, 0xab70, 0xff41, 0x10428, 0x104d8, 0x10cc0, 0x118c0, 0x16e60, 0x1e922, +) + +private val rangeLength = intArrayOf( + -0x1fee6, 0x2e7101, -0x1fee9, -0x1fef9, 0x79101, -0x0dd1, -0xe7eff, -0x0dfb, -0x0df1, -0x0dd3, -0x0dfb, -0x12beff, 0xc3101, -0x0dfd, -0x0bfb, -0x0eff, 0x61101, -0x0eff, 0xa3101, 0x82101, + -0x0dfb, -0x0afa, -0x0bfb, -0x0cfc, -0x0eff, 0x38101, -0x0eff, -0x1eff, -0x0eff, -0x1eff, -0x0eff, -0x1eff, -0x0df1, -0x4eeff, -0x0def, -0x0eff, -0x1eff, -0x0bfb, -0x0ddb, -0x0def, + -0x0eff, 0x2a3f102, -0x0afa, -0x0df9, 0x2a1f101, 0x2a1c101, 0x2a1e101, -0xd1eff, -0xcdeff, -0xccefe, -0xc9eff, -0xcaeff, 0xa54f101, -0xcceff, 0xa54b101, -0xceeff, 0xa528101, 0xa544101, -0xd0eff, -0xd2eff, + 0xa544101, 0x29f7101, 0xa541101, -0xd2eff, 0x29fd101, -0xd4eff, -0xd5eff, 0x29e7101, -0xd9eff, 0xa543101, -0xd9eff, 0xa52a101, -0xd9eff, -0x44eff, -0xd8efe, -0x46eff, -0xdaeff, 0xa515101, 0xa512101, 0x54101, + -0x0dfd, -0x0eff, 0x82103, -0x25eff, -0x24efd, -0x1feef, -0x1eeff, -0x1fef7, -0x3feff, -0x3eefe, -0x3deff, -0x38eff, -0x2eeff, -0x35eff, -0x7eff, -0x0de9, -0x55eff, -0x4feff, 0x7101, -0x73eff, + -0x5feff, -0x0cfc, -0x1fee0, -0x4fef0, -0x0ddf, -0x0dcb, -0x0df3, -0xeeff, -0x0da1, -0x2feda, 0xbc012b, 0xbc0103, -0x7efa, -0x186deff, -0x186ceff, -0x1863eff, -0x1861efe, -0x1862eff, -0x185beff, -0x1824eff, + 0x89c2101, 0x8a04101, 0xee6101, 0x8a38101, -0x0d6b, -0x3aeff, -0x0da1, 0x8108, 0x8106, 0x8108, 0x8108, 0x8106, 0x8207, 0x8108, 0x4a102, 0x56104, 0x64102, 0x80102, 0x70102, 0x7e102, + 0x8108, 0x8108, 0x8108, 0x8102, 0x9101, -0x1c24eff, 0x9101, 0x8102, 0x8102, 0x7101, 0x9101, -0x1beff, -0xfef0, -0x0eff, -0x19ee6, -0x2fed1, -0x0eff, -0x2a2aeff, -0x2a27eff, -0x0dfb, + -0x0cfc, -0x0d9d, -0x0dfd, -0x0eff, -0x1c5feda, -0x1c5f9f9, -0x0dd3, -0x0de5, -0x0df3, -0x0dc3, -0x0dfd, -0x0df7, -0x0afa, -0x0eff, 0x30101, -0x0ded, -0x0df5, -0x0afa, -0x0eff, -0x0eff, + -0x39feff, -0x97cfeb0, -0x1fee6, -0x27ed8, -0x27edc, -0x3fecd, -0x1fee0, -0x1fee0, -0x21ede, +) + +internal fun equalDistanceMapping(code: Int, start: Int, pattern: Int): Int { + val diff = code - start + + val length = pattern and 0xff + if (diff >= length) { + return code + } + + val distance = (pattern shr 8) and 0xf + if (diff % distance != 0) { + return code + } + + val mapping = pattern shr 12 + return code + mapping +} + +internal fun Int.uppercaseCodePoint(): Int { + if (this in 0x61..0x7a) { + return this - 32 + } + if (this < 0x80) { + return this + } + val index = binarySearchRange(rangeStart, this) + return equalDistanceMapping(this, rangeStart[index], rangeLength[index]) +} + +internal fun Char.uppercaseCharImpl(): Char { + return code.uppercaseCodePoint().toChar() +} diff --git a/libraries/stdlib/wasm/src/generated/_WhitespaceChars.kt b/libraries/stdlib/wasm/src/generated/_WhitespaceChars.kt new file mode 100644 index 00000000000..5ab0a1420d0 --- /dev/null +++ b/libraries/stdlib/wasm/src/generated/_WhitespaceChars.kt @@ -0,0 +1,31 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +// +// NOTE: THIS FILE IS AUTO-GENERATED by the GenerateUnicodeData.kt +// See: https://github.com/JetBrains/kotlin/tree/master/libraries/stdlib +// + +// 9 ranges totally +/** + * Returns `true` if this character is a whitespace. + */ +internal fun Char.isWhitespaceImpl(): Boolean { + val ch = this.code + return ch in 0x0009..0x000d + || ch in 0x001c..0x0020 + || ch == 0x00a0 + || ch > 0x1000 && ( + ch == 0x1680 + || ch in 0x2000..0x200a + || ch == 0x2028 + || ch == 0x2029 + || ch == 0x202f + || ch == 0x205f + || ch == 0x3000 + ) +} diff --git a/libraries/stdlib/wasm/src/kotlin/CharCode.kt b/libraries/stdlib/wasm/src/kotlin/CharCode.kt index 2e22d5f6322..bc173e24a8e 100644 --- a/libraries/stdlib/wasm/src/kotlin/CharCode.kt +++ b/libraries/stdlib/wasm/src/kotlin/CharCode.kt @@ -11,4 +11,8 @@ package kotlin * @sample samples.text.Chars.charFromCode */ @SinceKotlin("1.5") -public actual fun Char(code: UShort): Char = TODO("Wasm stdlib: CharCode") +@WasExperimental(ExperimentalStdlibApi::class) +@kotlin.internal.InlineOnly +public actual inline fun Char(code: UShort): Char { + return code.toInt().toChar() +} \ No newline at end of file diff --git a/libraries/stdlib/wasm/src/kotlin/Text.kt b/libraries/stdlib/wasm/src/kotlin/Text.kt index 3a5948e4be8..da36ae33dcf 100644 --- a/libraries/stdlib/wasm/src/kotlin/Text.kt +++ b/libraries/stdlib/wasm/src/kotlin/Text.kt @@ -75,13 +75,22 @@ actual enum class RegexOption { // From char.kt -actual fun Char.isHighSurrogate(): Boolean = TODO("Wasm stdlib: Text") -actual fun Char.isLowSurrogate(): Boolean = TODO("Wasm stdlib: Text") +/** + * Returns `true` if this character is a Unicode high-surrogate code unit (also known as leading-surrogate code unit). + */ +public actual fun Char.isHighSurrogate(): Boolean = this in Char.MIN_HIGH_SURROGATE..Char.MAX_HIGH_SURROGATE + +/** + * Returns `true` if this character is a Unicode low-surrogate code unit (also known as trailing-surrogate code unit). + */ +public actual fun Char.isLowSurrogate(): Boolean = this in Char.MIN_LOW_SURROGATE..Char.MAX_LOW_SURROGATE /** * Converts this character to lower case using Unicode mapping rules of the invariant locale. */ -public actual fun Char.toLowerCase(): Char = TODO("Wasm stdlib: Text") +@Deprecated("Use lowercaseChar() instead.", ReplaceWith("lowercaseChar()")) +@DeprecatedSinceKotlin(warningSince = "1.5") +public actual fun Char.toLowerCase(): Char = lowercaseCharImpl() /** * Converts this character to lower case using Unicode mapping rules of the invariant locale. @@ -93,7 +102,8 @@ public actual fun Char.toLowerCase(): Char = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.lowercase */ @SinceKotlin("1.5") -public actual fun Char.lowercaseChar(): Char = TODO("Wasm stdlib: Text") +@WasExperimental(ExperimentalStdlibApi::class) +public actual fun Char.lowercaseChar(): Char = lowercaseCharImpl() /** * Converts this character to lower case using Unicode mapping rules of the invariant locale. @@ -106,12 +116,15 @@ public actual fun Char.lowercaseChar(): Char = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.lowercase */ @SinceKotlin("1.5") -public actual fun Char.lowercase(): String = TODO("Wasm stdlib: Text") +@WasExperimental(ExperimentalStdlibApi::class) +public actual fun Char.lowercase(): String = lowercaseImpl() /** * Converts this character to upper case using Unicode mapping rules of the invariant locale. */ -public actual fun Char.toUpperCase(): Char = TODO("Wasm stdlib: Text") +@Deprecated("Use uppercaseChar() instead.", ReplaceWith("uppercaseChar()")) +@DeprecatedSinceKotlin(warningSince = "1.5") +public actual fun Char.toUpperCase(): Char = uppercaseCharImpl() /** * Converts this character to upper case using Unicode mapping rules of the invariant locale. @@ -123,7 +136,8 @@ public actual fun Char.toUpperCase(): Char = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.uppercase */ @SinceKotlin("1.5") -public actual fun Char.uppercaseChar(): Char = TODO("Wasm stdlib: Text") +@WasExperimental(ExperimentalStdlibApi::class) +public actual fun Char.uppercaseChar(): Char = uppercaseCharImpl() /** * Converts this character to upper case using Unicode mapping rules of the invariant locale. @@ -136,7 +150,8 @@ public actual fun Char.uppercaseChar(): Char = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.uppercase */ @SinceKotlin("1.5") -public actual fun Char.uppercase(): String = TODO("Wasm stdlib: Text") +@WasExperimental(ExperimentalStdlibApi::class) +public actual fun Char.uppercase(): String = uppercaseImpl() /** * Converts this character to title case using Unicode mapping rules of the invariant locale. @@ -148,14 +163,15 @@ public actual fun Char.uppercase(): String = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.titlecase */ @SinceKotlin("1.5") -public actual fun Char.titlecaseChar(): Char = TODO("Wasm stdlib: Text") +public actual fun Char.titlecaseChar(): Char = titlecaseCharImpl() /** * Returns the Unicode general category of this character. */ @SinceKotlin("1.5") -public actual val Char.category: CharCategory get() = TODO("Wasm stdlib: Text") +public actual val Char.category: CharCategory + get() = CharCategory.valueOf(getCategoryValue()) /** * Returns `true` if this character (Unicode code point) is defined in Unicode. @@ -163,7 +179,12 @@ public actual val Char.category: CharCategory get() = TODO("Wasm stdlib: Text") * A character is considered to be defined in Unicode if its [category] is not [CharCategory.UNASSIGNED]. */ @SinceKotlin("1.5") -public actual fun Char.isDefined(): Boolean = TODO("Wasm stdlib: Text") +public actual fun Char.isDefined(): Boolean { + if (this < '\u0080') { + return true + } + return getCategoryValue() != CharCategory.UNASSIGNED.value +} /** * Returns `true` if this character is a letter. @@ -174,7 +195,15 @@ public actual fun Char.isDefined(): Boolean = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.isLetter */ @SinceKotlin("1.5") -public actual fun Char.isLetter(): Boolean = TODO("Wasm stdlib: Text") +public actual fun Char.isLetter(): Boolean { + if (this in 'a'..'z' || this in 'A'..'Z') { + return true + } + if (this < '\u0080') { + return false + } + return isLetterImpl() +} /** * Returns `true` if this character is a letter or digit. @@ -185,7 +214,16 @@ public actual fun Char.isLetter(): Boolean = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.isLetterOrDigit */ @SinceKotlin("1.5") -public actual fun Char.isLetterOrDigit(): Boolean = TODO("Wasm stdlib: Text") +public actual fun Char.isLetterOrDigit(): Boolean { + if (this in 'a'..'z' || this in 'A'..'Z' || this in '0'..'9') { + return true + } + if (this < '\u0080') { + return false + } + + return isDigit() || isLetter() +} /** * Returns `true` if this character is a digit. @@ -195,7 +233,15 @@ public actual fun Char.isLetterOrDigit(): Boolean = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.isDigit */ @SinceKotlin("1.5") -public actual fun Char.isDigit(): Boolean = TODO("Wasm stdlib: Text") +public actual fun Char.isDigit(): Boolean { + if (this in '0'..'9') { + return true + } + if (this < '\u0080') { + return false + } + return isDigitImpl() +} /** * Returns `true` if this character is an upper case letter. @@ -205,7 +251,15 @@ public actual fun Char.isDigit(): Boolean = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.isUpperCase */ @SinceKotlin("1.5") -public actual fun Char.isUpperCase(): Boolean = TODO("Wasm stdlib: Text") +public actual fun Char.isUpperCase(): Boolean { + if (this in 'A'..'Z') { + return true + } + if (this < '\u0080') { + return false + } + return isUpperCaseImpl() +} /** * Returns `true` if this character is a lower case letter. @@ -215,7 +269,15 @@ public actual fun Char.isUpperCase(): Boolean = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.isLowerCase */ @SinceKotlin("1.5") -public actual fun Char.isLowerCase(): Boolean = TODO("Wasm stdlib: Text") +public actual fun Char.isLowerCase(): Boolean { + if (this in 'a'..'z') { + return true + } + if (this < '\u0080') { + return false + } + return isLowerCaseImpl() +} /** * Returns `true` if this character is a title case letter. @@ -225,7 +287,12 @@ public actual fun Char.isLowerCase(): Boolean = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.isTitleCase */ @SinceKotlin("1.5") -public actual fun Char.isTitleCase(): Boolean = TODO("Wasm stdlib: Text") +public actual fun Char.isTitleCase(): Boolean { + if (this < '\u0080') { + return false + } + return getCategoryValue() == CharCategory.TITLECASE_LETTER.value +} /** * Returns `true` if this character is an ISO control character. @@ -235,7 +302,9 @@ public actual fun Char.isTitleCase(): Boolean = TODO("Wasm stdlib: Text") * @sample samples.text.Chars.isISOControl */ @SinceKotlin("1.5") -public actual fun Char.isISOControl(): Boolean = TODO("Wasm stdlib: Text") +public actual fun Char.isISOControl(): Boolean { + return this <= '\u001F' || this in '\u007F'..'\u009F' +} /** * Determines whether a character is whitespace according to the Unicode standard. @@ -243,7 +312,7 @@ public actual fun Char.isISOControl(): Boolean = TODO("Wasm stdlib: Text") * * @sample samples.text.Chars.isWhitespace */ -public actual fun Char.isWhitespace(): Boolean = TODO("Wasm stdlib: Text") +public actual fun Char.isWhitespace(): Boolean = isWhitespaceImpl() // From string.kt @@ -284,9 +353,15 @@ public actual fun String(chars: CharArray, offset: Int, length: Int): String { /** * Concatenates characters in this [CharArray] into a String. */ -@SinceKotlin("1.3") -@ExperimentalStdlibApi -public actual fun CharArray.concatToString(): String = TODO("Wasm stdlib: Text") +@SinceKotlin("1.4") +@WasExperimental(ExperimentalStdlibApi::class) +public actual fun CharArray.concatToString(): String { + var result = "" + for (char in this) { + result += char + } + return result +} /** * Concatenates characters in this [CharArray] or its subrange into a String. @@ -297,16 +372,26 @@ public actual fun CharArray.concatToString(): String = TODO("Wasm stdlib: Text") * @throws IndexOutOfBoundsException if [startIndex] is less than zero or [endIndex] is greater than the size of this array. * @throws IllegalArgumentException if [startIndex] is greater than [endIndex]. */ -@SinceKotlin("1.3") -@ExperimentalStdlibApi -public actual fun CharArray.concatToString(startIndex: Int, endIndex: Int): String = TODO("Wasm stdlib: Text") +@SinceKotlin("1.4") +@WasExperimental(ExperimentalStdlibApi::class) +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun CharArray.concatToString(startIndex: Int = 0, endIndex: Int = this.size): String { + AbstractList.checkBoundsIndexes(startIndex, endIndex, this.size) + var result = "" + for (index in startIndex until endIndex) { + result += this[index] + } + return result +} /** * Returns a [CharArray] containing characters of this string. */ -@SinceKotlin("1.3") -@ExperimentalStdlibApi -public actual fun String.toCharArray(): CharArray = this.chars.copyOf() +@SinceKotlin("1.4") +@WasExperimental(ExperimentalStdlibApi::class) +public actual fun String.toCharArray(): CharArray { + return CharArray(length) { get(it) } +} /** * Returns a [CharArray] containing characters of this string or its substring. @@ -317,18 +402,24 @@ public actual fun String.toCharArray(): CharArray = this.chars.copyOf() * @throws IndexOutOfBoundsException if [startIndex] is less than zero or [endIndex] is greater than the length of this string. * @throws IllegalArgumentException if [startIndex] is greater than [endIndex]. */ -@SinceKotlin("1.3") -@ExperimentalStdlibApi -public actual fun String.toCharArray(startIndex: Int, endIndex: Int): CharArray = TODO("Wasm stdlib: Text") +@SinceKotlin("1.4") +@WasExperimental(ExperimentalStdlibApi::class) +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String.toCharArray(startIndex: Int = 0, endIndex: Int = this.length): CharArray { + AbstractList.checkBoundsIndexes(startIndex, endIndex, length) + return CharArray(endIndex - startIndex) { get(startIndex + it) } +} /** * Decodes a string from the bytes in UTF-8 encoding in this array. * * Malformed byte sequences are replaced by the replacement char `\uFFFD`. */ -@SinceKotlin("1.3") -@ExperimentalStdlibApi -public actual fun ByteArray.decodeToString(): String = TODO("Wasm stdlib: Text") +@SinceKotlin("1.4") +@WasExperimental(ExperimentalStdlibApi::class) +public actual fun ByteArray.decodeToString(): String { + return decodeUtf8(this, 0, size, false) +} /** * Decodes a string from the bytes in UTF-8 encoding in this array or its subrange. @@ -341,22 +432,28 @@ public actual fun ByteArray.decodeToString(): String = TODO("Wasm stdlib: Text") * @throws IllegalArgumentException if [startIndex] is greater than [endIndex]. * @throws CharacterCodingException if the byte array contains malformed UTF-8 byte sequence and [throwOnInvalidSequence] is true. */ -@SinceKotlin("1.3") -@ExperimentalStdlibApi +@SinceKotlin("1.4") +@WasExperimental(ExperimentalStdlibApi::class) +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") public actual fun ByteArray.decodeToString( - startIndex: Int, - endIndex: Int, - throwOnInvalidSequence: Boolean -): String = TODO("Wasm stdlib: Text") + startIndex: Int = 0, + endIndex: Int = this.size, + throwOnInvalidSequence: Boolean = false +): String { + AbstractList.checkBoundsIndexes(startIndex, endIndex, this.size) + return decodeUtf8(this, startIndex, endIndex, throwOnInvalidSequence) +} /** * Encodes this string to an array of bytes in UTF-8 encoding. * * Any malformed char sequence is replaced by the replacement byte sequence. */ -@SinceKotlin("1.3") -@ExperimentalStdlibApi -public actual fun String.encodeToByteArray(): ByteArray = TODO("Wasm stdlib: Text") +@SinceKotlin("1.4") +@WasExperimental(ExperimentalStdlibApi::class) +public actual fun String.encodeToByteArray(): ByteArray { + return encodeUtf8(this, 0, length, false) +} /** * Encodes this string or its substring to an array of bytes in UTF-8 encoding. @@ -369,26 +466,39 @@ public actual fun String.encodeToByteArray(): ByteArray = TODO("Wasm stdlib: Tex * @throws IllegalArgumentException if [startIndex] is greater than [endIndex]. * @throws CharacterCodingException if this string contains malformed char sequence and [throwOnInvalidSequence] is true. */ -@SinceKotlin("1.3") -@ExperimentalStdlibApi +@SinceKotlin("1.4") +@WasExperimental(ExperimentalStdlibApi::class) +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") public actual fun String.encodeToByteArray( - startIndex: Int, - endIndex: Int, - throwOnInvalidSequence: Boolean -): ByteArray = TODO("Wasm stdlib: Text") + startIndex: Int = 0, + endIndex: Int = this.length, + throwOnInvalidSequence: Boolean = false +): ByteArray { + AbstractList.checkBoundsIndexes(startIndex, endIndex, length) + return encodeUtf8(this, startIndex, endIndex, throwOnInvalidSequence) +} +/** + * Returns a substring of this string that starts at the specified [startIndex] and continues to the end of the string. + */ +public actual fun String.substring(startIndex: Int): String = + subSequence(startIndex, this.length) as String -internal actual fun String.nativeIndexOf(str: String, fromIndex: Int): Int = TODO("Wasm stdlib: Text") -internal actual fun String.nativeLastIndexOf(str: String, fromIndex: Int): Int = TODO("Wasm stdlib: Text") - - -public actual fun String.substring(startIndex: Int): String = TODO("Wasm stdlib: Text") -public actual fun String.substring(startIndex: Int, endIndex: Int): String = TODO("Wasm stdlib: Text") +/** + * Returns the substring of this string starting at the [startIndex] and ending right before the [endIndex]. + * + * @param startIndex the start index (inclusive). + * @param endIndex the end index (exclusive). + */ +public actual fun String.substring(startIndex: Int, endIndex: Int): String = + subSequence(startIndex, endIndex) as String /** * Returns a copy of this string converted to upper case using the rules of the default locale. */ -public actual fun String.toUpperCase(): String = TODO("Wasm stdlib: Text") +@Deprecated("Use uppercase() instead.", ReplaceWith("uppercase()")) +@DeprecatedSinceKotlin(warningSince = "1.5") +public actual fun String.toUpperCase(): String = uppercase() /** * Returns a copy of this string converted to upper case using Unicode mapping rules of the invariant locale. @@ -399,12 +509,14 @@ public actual fun String.toUpperCase(): String = TODO("Wasm stdlib: Text") * @sample samples.text.Strings.uppercase */ @SinceKotlin("1.5") -public actual fun String.uppercase(): String = TODO("Wasm stdlib: Text") +public actual fun String.uppercase(): String = uppercaseImpl() /** * Returns a copy of this string converted to lower case using the rules of the default locale. */ -public actual fun String.toLowerCase(): String = TODO("Wasm stdlib: Text") +@Deprecated("Use lowercase() instead.", ReplaceWith("lowercase()")) +@DeprecatedSinceKotlin(warningSince = "1.5") +public actual fun String.toLowerCase(): String = lowercase() /** * Returns a copy of this string converted to lower case using Unicode mapping rules of the invariant locale. @@ -415,34 +527,120 @@ public actual fun String.toLowerCase(): String = TODO("Wasm stdlib: Text") * @sample samples.text.Strings.lowercase */ @SinceKotlin("1.5") -public actual fun String.lowercase(): String = TODO("Wasm stdlib: Text") +public actual fun String.lowercase(): String = lowercaseImpl() -public actual fun String.capitalize(): String = TODO("Wasm stdlib: Text") -public actual fun String.decapitalize(): String = TODO("Wasm stdlib: Text") -public actual fun CharSequence.repeat(n: Int): String = TODO("Wasm stdlib: Text") +/** + * Returns a copy of this string having its first letter titlecased using the rules of the default locale, + * or the original string if it's empty or already starts with a title case letter. + * + * The title case of a character is usually the same as its upper case with several exceptions. + * The particular list of characters with the special title case form depends on the underlying platform. + * + * @sample samples.text.Strings.capitalize + */ +@Deprecated("Use replaceFirstChar instead.", ReplaceWith("replaceFirstChar { if (it.isLowerCase()) it.titlecase() else it.toString() }")) +@DeprecatedSinceKotlin(warningSince = "1.5") +public actual fun String.capitalize(): String = replaceFirstChar(Char::uppercaseChar) +/** + * Returns a copy of this string having its first letter lowercased using the rules of the default locale, + * or the original string if it's empty or already starts with a lower case letter. + * + * @sample samples.text.Strings.decapitalize + */ +@Deprecated("Use replaceFirstChar instead.", ReplaceWith("replaceFirstChar { it.lowercase() }")) +@DeprecatedSinceKotlin(warningSince = "1.5") +public actual fun String.decapitalize(): String = replaceFirstChar(Char::lowercaseChar) + +/** + * Returns a string containing this char sequence repeated [n] times. + * @throws [IllegalArgumentException] when n < 0. + * @sample samples.text.Strings.repeat + */ +public actual fun CharSequence.repeat(n: Int): String { + require(n >= 0) { "Count 'n' must be non-negative, but was $n." } + return when (n) { + 0 -> "" + 1 -> this.toString() + else -> { + var result = "" + if (!isEmpty()) { + var s = this.toString() + var count = n + while (true) { + if ((count and 1) == 1) { + result += s + } + count = count ushr 1 + if (count == 0) { + break + } + s += s + } + } + return result + } + } +} /** * Returns a new string with all occurrences of [oldChar] replaced with [newChar]. */ -actual fun String.replace(oldChar: Char, newChar: Char, ignoreCase: Boolean): String = TODO("Wasm stdlib: Text") +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String.replace(oldChar: Char, newChar: Char, ignoreCase: Boolean = false): String { + return buildString(length) { + this@replace.forEach { c -> + append(if (c.equals(oldChar, ignoreCase)) newChar else c) + } + } +} /** * Returns a new string obtained by replacing all occurrences of the [oldValue] substring in this string * with the specified [newValue] string. */ -actual fun String.replace(oldValue: String, newValue: String, ignoreCase: Boolean): String = TODO("Wasm stdlib: Text") +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String.replace(oldValue: String, newValue: String, ignoreCase: Boolean = false): String { + run { + var occurrenceIndex: Int = indexOf(oldValue, 0, ignoreCase) + // FAST PATH: no match + if (occurrenceIndex < 0) return this + + val oldValueLength = oldValue.length + val searchStep = oldValueLength.coerceAtLeast(1) + val newLengthHint = length - oldValueLength + newValue.length + if (newLengthHint < 0) throw OutOfMemoryError() + val stringBuilder = StringBuilder(newLengthHint) + + var i = 0 + do { + stringBuilder.append(this, i, occurrenceIndex).append(newValue) + i = occurrenceIndex + oldValueLength + if (occurrenceIndex >= length) break + occurrenceIndex = indexOf(oldValue, occurrenceIndex + searchStep, ignoreCase) + } while (occurrenceIndex > 0) + return stringBuilder.append(this, i, length).toString() + } +} /** * Returns a new string with the first occurrence of [oldChar] replaced with [newChar]. */ -actual fun String.replaceFirst(oldChar: Char, newChar: Char, ignoreCase: Boolean): String = TODO("Wasm stdlib: Text") +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String.replaceFirst(oldChar: Char, newChar: Char, ignoreCase: Boolean = false): String { + val index = indexOf(oldChar, ignoreCase = ignoreCase) + return if (index < 0) this else this.replaceRange(index, index + 1, newChar.toString()) +} /** * Returns a new string obtained by replacing the first occurrence of the [oldValue] substring in this string * with the specified [newValue] string. */ -actual fun String.replaceFirst(oldValue: String, newValue: String, ignoreCase: Boolean): String = TODO("Wasm stdlib: Text") +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String.replaceFirst(oldValue: String, newValue: String, ignoreCase: Boolean = false): String { + val index = indexOf(oldValue, ignoreCase = ignoreCase) + return if (index < 0) this else this.replaceRange(index, index + oldValue.length, newValue) +} /** * Returns `true` if this string is equal to [other], optionally ignoring character case. @@ -452,7 +650,24 @@ actual fun String.replaceFirst(oldValue: String, newValue: String, ignoreCase: B * * @param ignoreCase `true` to ignore character case when comparing strings. By default `false`. */ -actual fun String?.equals(other: String?, ignoreCase: Boolean): Boolean = TODO("Wasm stdlib: Text") +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String?.equals(other: String?, ignoreCase: Boolean = false): Boolean { + if (this == null) return other == null + if (other == null) return false + if (!ignoreCase) return this == other + + if (this.length != other.length) return false + + for (index in 0 until this.length) { + val thisChar = this[index] + val otherChar = other[index] + if (!thisChar.equals(otherChar, ignoreCase)) { + return false + } + } + + return true +} /** * Compares two strings lexicographically, optionally ignoring case differences. @@ -460,7 +675,36 @@ actual fun String?.equals(other: String?, ignoreCase: Boolean): Boolean = TODO(" * If [ignoreCase] is true, the result of `Char.uppercaseChar().lowercaseChar()` on each character is compared. */ @SinceKotlin("1.2") -actual fun String.compareTo(other: String, ignoreCase: Boolean): Int = TODO("Wasm stdlib: Text") +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String.compareTo(other: String, ignoreCase: Boolean = false): Int { + if (ignoreCase) { + val n1 = this.length + val n2 = other.length + val min = minOf(n1, n2) + if (min == 0) return n1 - n2 + for (index in 0 until min) { + var thisChar = this[index] + var otherChar = other[index] + + if (thisChar != otherChar) { + thisChar = thisChar.uppercaseChar() + otherChar = otherChar.uppercaseChar() + + if (thisChar != otherChar) { + thisChar = thisChar.lowercaseChar() + otherChar = otherChar.lowercaseChar() + + if (thisChar != otherChar) { + return thisChar.compareTo(otherChar) + } + } + } + } + return n1 - n2 + } else { + return compareTo(other) + } +} /** * Returns `true` if the contents of this char sequence are equal to the contents of the specified [other], @@ -469,7 +713,7 @@ actual fun String.compareTo(other: String, ignoreCase: Boolean): Int = TODO("Was * @sample samples.text.Strings.contentEquals */ @SinceKotlin("1.5") -public actual infix fun CharSequence?.contentEquals(other: CharSequence?): Boolean = TODO("Wasm stdlib: Text") +public actual infix fun CharSequence?.contentEquals(other: CharSequence?): Boolean = contentEqualsImpl(other) /** * Returns `true` if the contents of this char sequence are equal to the contents of the specified [other], optionally ignoring case difference. @@ -479,19 +723,43 @@ public actual infix fun CharSequence?.contentEquals(other: CharSequence?): Boole * @sample samples.text.Strings.contentEquals */ @SinceKotlin("1.5") -public actual fun CharSequence?.contentEquals(other: CharSequence?, ignoreCase: Boolean): Boolean = TODO("Wasm stdlib: Text") +public actual fun CharSequence?.contentEquals(other: CharSequence?, ignoreCase: Boolean): Boolean { + return if (ignoreCase) + this.contentEqualsIgnoreCaseImpl(other) + else + this.contentEqualsImpl(other) +} +/** + * Returns `true` if this string starts with the specified prefix. + */ +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String.startsWith(prefix: String, ignoreCase: Boolean = false): Boolean = + regionMatches(0, prefix, 0, prefix.length, ignoreCase) -public actual fun String.startsWith(prefix: String, ignoreCase: Boolean): Boolean = TODO("Wasm stdlib: Text") -public actual fun String.startsWith(prefix: String, startIndex: Int, ignoreCase: Boolean): Boolean = TODO("Wasm stdlib: Text") -public actual fun String.endsWith(suffix: String, ignoreCase: Boolean): Boolean = TODO("Wasm stdlib: Text") +/** + * Returns `true` if a substring of this string starting at the specified offset [startIndex] starts with the specified prefix. + */ +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String.startsWith(prefix: String, startIndex: Int, ignoreCase: Boolean = false): Boolean = + regionMatches(startIndex, prefix, 0, prefix.length, ignoreCase) + +/** + * Returns `true` if this string ends with the specified suffix. + */ +@Suppress("ACTUAL_FUNCTION_WITH_DEFAULT_ARGUMENTS") +public actual fun String.endsWith(suffix: String, ignoreCase: Boolean = false): Boolean = + regionMatches(length - suffix.length, suffix, 0, suffix.length, ignoreCase) // From stringsCode.kt -internal actual fun String.nativeIndexOf(ch: Char, fromIndex: Int): Int = TODO("Wasm stdlib: Text") -internal actual fun String.nativeLastIndexOf(ch: Char, fromIndex: Int): Int = TODO("Wasm stdlib: Text") +/** + * Returns `true` if this string is empty or consists solely of whitespace characters. + * + * @sample samples.text.Strings.stringIsBlank + */ +public actual fun CharSequence.isBlank(): Boolean = length == 0 || indices.all { this[it].isWhitespace() } -actual fun CharSequence.isBlank(): Boolean = TODO("Wasm stdlib: Text") /** * Returns `true` if the specified range in this char sequence is equal to the specified range in another char sequence. * @param thisOffset the start offset in this char sequence of the substring to compare. @@ -505,8 +773,19 @@ actual fun CharSequence.regionMatches( otherOffset: Int, length: Int, ignoreCase: Boolean -): Boolean = TODO("Wasm stdlib: Text") +): Boolean { + if ((otherOffset < 0) || (thisOffset < 0) || (thisOffset > this.length - length) || (otherOffset > other.length - length)) { + return false + } + for (index in 0 until length) { + if (!this[thisOffset + index].equals(other[otherOffset + index], ignoreCase)) + return false + } + return true +} + +private val STRING_CASE_INSENSITIVE_ORDER = Comparator { a, b -> a.compareTo(b, ignoreCase = true) } /** * A Comparator that orders strings ignoring character case. @@ -515,93 +794,105 @@ actual fun CharSequence.regionMatches( * and will result in an unsatisfactory ordering for certain locales. */ @SinceKotlin("1.2") -public actual val String.Companion.CASE_INSENSITIVE_ORDER: Comparator get() = TODO("Wasm stdlib: Text") +public actual val String.Companion.CASE_INSENSITIVE_ORDER: Comparator + get() = STRING_CASE_INSENSITIVE_ORDER -actual fun String.toBoolean(): Boolean = TODO("Wasm stdlib: Text") +/** + * Returns `true` if the content of this string is equal to the word "true", ignoring case, and `false` otherwise. + */ +@Deprecated("Use Kotlin compiler 1.4 to avoid deprecation warning.") +@DeprecatedSinceKotlin(hiddenSince = "1.4") +@kotlin.internal.InlineOnly +actual fun String.toBoolean(): Boolean = this.toBoolean() /** * Returns `true` if the contents of this string is equal to the word "true", ignoring case, and `false` otherwise. * * There are also strict versions of the function available on non-nullable String, [toBooleanStrict] and [toBooleanStrictOrNull]. */ -actual fun String?.toBoolean(): Boolean = TODO("Wasm stdlib: Text") +actual fun String?.toBoolean(): Boolean = this != null && this.lowercase() == "true" /** * Parses the string as a signed [Byte] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. */ -actual fun String.toByte(): Byte = TODO("Wasm stdlib: Text") +actual fun String.toByte(): Byte = toByteOrNull() ?: numberFormatError(this) /** * Parses the string as a signed [Byte] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. * @throws IllegalArgumentException when [radix] is not a valid radix for string to number conversion. */ -actual fun String.toByte(radix: Int): Byte = TODO("Wasm stdlib: Text") - +public actual fun String.toByte(radix: Int): Byte = toByteOrNull(radix) ?: numberFormatError(this) /** * Parses the string as a [Short] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. */ -actual fun String.toShort(): Short = TODO("Wasm stdlib: Text") +public actual fun String.toShort(): Short = toShortOrNull() ?: numberFormatError(this) /** * Parses the string as a [Short] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. * @throws IllegalArgumentException when [radix] is not a valid radix for string to number conversion. */ -actual fun String.toShort(radix: Int): Short = TODO("Wasm stdlib: Text") +public actual fun String.toShort(radix: Int): Short = toShortOrNull(radix) ?: numberFormatError(this) /** * Parses the string as an [Int] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. */ -actual fun String.toInt(): Int = TODO("Wasm stdlib: Text") +public actual fun String.toInt(): Int = toIntOrNull() ?: numberFormatError(this) /** * Parses the string as an [Int] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. * @throws IllegalArgumentException when [radix] is not a valid radix for string to number conversion. */ -actual fun String.toInt(radix: Int): Int = TODO("Wasm stdlib: Text") +public actual fun String.toInt(radix: Int): Int = toIntOrNull(radix) ?: numberFormatError(this) /** * Parses the string as a [Long] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. */ -actual fun String.toLong(): Long = TODO("Wasm stdlib: Text") +public actual fun String.toLong(): Long = toLongOrNull() ?: numberFormatError(this) /** * Parses the string as a [Long] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. * @throws IllegalArgumentException when [radix] is not a valid radix for string to number conversion. */ -actual fun String.toLong(radix: Int): Long = TODO("Wasm stdlib: Text") +public actual fun String.toLong(radix: Int): Long = toLongOrNull(radix) ?: numberFormatError(this) /** * Parses the string as a [Double] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. */ -actual fun String.toDouble(): Double = TODO("Wasm stdlib: Text") +public actual fun String.toDouble(): Double = TODO("Wasm stdlib: Text") /** * Parses the string as a [Float] number and returns the result. * @throws NumberFormatException if the string is not a valid representation of a number. */ -actual fun String.toFloat(): Float = TODO("Wasm stdlib: Text") - -/** - * Parses the string as a [Double] number and returns the result - * or `null` if the string is not a valid representation of a number. - */ -actual fun String.toDoubleOrNull(): Double? = TODO("Wasm stdlib: Text") +public actual fun String.toFloat(): Float = toDouble() as Float /** * Parses the string as a [Float] number and returns the result * or `null` if the string is not a valid representation of a number. */ -actual fun String.toFloatOrNull(): Float? = TODO("Wasm stdlib: Text") +public actual fun String.toFloatOrNull(): Float? = toDoubleOrNull() as Float? + +/** + * Parses the string as a [Double] number and returns the result + * or `null` if the string is not a valid representation of a number. + */ +public actual fun String.toDoubleOrNull(): Double? { + try { + return toDouble() + } catch (e: NumberFormatException) { + return null + } +} /** * Returns a string representation of this [Byte] value in the specified [radix]. @@ -609,7 +900,7 @@ actual fun String.toFloatOrNull(): Float? = TODO("Wasm stdlib: Text") * @throws IllegalArgumentException when [radix] is not a valid radix for number to string conversion. */ @SinceKotlin("1.2") -actual fun Byte.toString(radix: Int): String = TODO("Wasm stdlib: Text") +public actual fun Byte.toString(radix: Int): String = this.toInt().toString(radix) /** * Returns a string representation of this [Short] value in the specified [radix]. @@ -617,7 +908,7 @@ actual fun Byte.toString(radix: Int): String = TODO("Wasm stdlib: Text") * @throws IllegalArgumentException when [radix] is not a valid radix for number to string conversion. */ @SinceKotlin("1.2") -actual fun Short.toString(radix: Int): String = TODO("Wasm stdlib: Text") +public actual fun Short.toString(radix: Int): String = this.toInt().toString(radix) /** * Returns a string representation of this [Int] value in the specified [radix]. @@ -636,6 +927,19 @@ actual fun Int.toString(radix: Int): String = TODO("Wasm stdlib: Text") actual fun Long.toString(radix: Int): String = TODO("Wasm stdlib: Text") @PublishedApi -internal actual fun checkRadix(radix: Int): Int = TODO("Wasm stdlib: Text") +internal actual fun checkRadix(radix: Int): Int { + if (radix !in Char.MIN_RADIX..Char.MAX_RADIX) { + throw IllegalArgumentException("radix $radix was not in valid range ${Char.MIN_RADIX..Char.MAX_RADIX}") + } + return radix +} -internal actual fun digitOf(char: Char, radix: Int): Int = TODO("Wasm stdlib: Text") +internal actual fun digitOf(char: Char, radix: Int): Int = when { + char >= '0' && char <= '9' -> char - '0' + char >= 'A' && char <= 'Z' -> char - 'A' + 10 + char >= 'a' && char <= 'z' -> char - 'a' + 10 + char < '\u0080' -> -1 + char >= '\uFF21' && char <= '\uFF3A' -> char - '\uFF21' + 10 // full-width latin capital letter + char >= '\uFF41' && char <= '\uFF5A' -> char - '\uFF41' + 10 // full-width latin small letter + else -> char.digitToIntImpl() +}.let { if (it >= radix) -1 else it } diff --git a/libraries/stdlib/wasm/src/kotlin/text/CharCategory.kt b/libraries/stdlib/wasm/src/kotlin/text/CharCategoryWasm.kt similarity index 64% rename from libraries/stdlib/wasm/src/kotlin/text/CharCategory.kt rename to libraries/stdlib/wasm/src/kotlin/text/CharCategoryWasm.kt index d2240867d16..e512278b2c5 100644 --- a/libraries/stdlib/wasm/src/kotlin/text/CharCategory.kt +++ b/libraries/stdlib/wasm/src/kotlin/text/CharCategoryWasm.kt @@ -8,165 +8,169 @@ package kotlin.text /** * Represents the character general category in the Unicode specification. */ -@SinceKotlin("1.5") -public actual enum class CharCategory { +public actual enum class CharCategory(public val value: Int, public actual val code: String) { /** * General category "Cn" in the Unicode specification. */ - UNASSIGNED, + UNASSIGNED(0, "Cn"), /** * General category "Lu" in the Unicode specification. */ - UPPERCASE_LETTER, + UPPERCASE_LETTER(1, "Lu"), /** * General category "Ll" in the Unicode specification. */ - LOWERCASE_LETTER, + LOWERCASE_LETTER(2, "Ll"), /** * General category "Lt" in the Unicode specification. */ - TITLECASE_LETTER, + TITLECASE_LETTER(3, "Lt"), /** * General category "Lm" in the Unicode specification. */ - MODIFIER_LETTER, + MODIFIER_LETTER(4, "Lm"), /** * General category "Lo" in the Unicode specification. */ - OTHER_LETTER, + OTHER_LETTER(5, "Lo"), /** * General category "Mn" in the Unicode specification. */ - NON_SPACING_MARK, + NON_SPACING_MARK(6, "Mn"), /** * General category "Me" in the Unicode specification. */ - ENCLOSING_MARK, + ENCLOSING_MARK(7, "Me"), /** * General category "Mc" in the Unicode specification. */ - COMBINING_SPACING_MARK, + COMBINING_SPACING_MARK(8, "Mc"), /** * General category "Nd" in the Unicode specification. */ - DECIMAL_DIGIT_NUMBER, + DECIMAL_DIGIT_NUMBER(9, "Nd"), /** * General category "Nl" in the Unicode specification. */ - LETTER_NUMBER, + LETTER_NUMBER(10, "Nl"), /** * General category "No" in the Unicode specification. */ - OTHER_NUMBER, + OTHER_NUMBER(11, "No"), /** * General category "Zs" in the Unicode specification. */ - SPACE_SEPARATOR, + SPACE_SEPARATOR(12, "Zs"), /** * General category "Zl" in the Unicode specification. */ - LINE_SEPARATOR, + LINE_SEPARATOR(13, "Zl"), /** * General category "Zp" in the Unicode specification. */ - PARAGRAPH_SEPARATOR, + PARAGRAPH_SEPARATOR(14, "Zp"), /** * General category "Cc" in the Unicode specification. */ - CONTROL, + CONTROL(15, "Cc"), /** * General category "Cf" in the Unicode specification. */ - FORMAT, + FORMAT(16, "Cf"), /** * General category "Co" in the Unicode specification. */ - PRIVATE_USE, + PRIVATE_USE(18, "Co"), /** * General category "Cs" in the Unicode specification. */ - SURROGATE, + SURROGATE(19, "Cs"), /** * General category "Pd" in the Unicode specification. */ - DASH_PUNCTUATION, + DASH_PUNCTUATION(20, "Pd"), /** * General category "Ps" in the Unicode specification. */ - START_PUNCTUATION, + START_PUNCTUATION(21, "Ps"), /** * General category "Pe" in the Unicode specification. */ - END_PUNCTUATION, + END_PUNCTUATION(22, "Pe"), /** * General category "Pc" in the Unicode specification. */ - CONNECTOR_PUNCTUATION, + CONNECTOR_PUNCTUATION(23, "Pc"), /** * General category "Po" in the Unicode specification. */ - OTHER_PUNCTUATION, + OTHER_PUNCTUATION(24, "Po"), /** * General category "Sm" in the Unicode specification. */ - MATH_SYMBOL, + MATH_SYMBOL(25, "Sm"), /** * General category "Sc" in the Unicode specification. */ - CURRENCY_SYMBOL, + CURRENCY_SYMBOL(26, "Sc"), /** * General category "Sk" in the Unicode specification. */ - MODIFIER_SYMBOL, + MODIFIER_SYMBOL(27, "Sk"), /** * General category "So" in the Unicode specification. */ - OTHER_SYMBOL, + OTHER_SYMBOL(28, "So"), /** * General category "Pi" in the Unicode specification. */ - INITIAL_QUOTE_PUNCTUATION, + INITIAL_QUOTE_PUNCTUATION(29, "Pi"), /** * General category "Pf" in the Unicode specification. */ - FINAL_QUOTE_PUNCTUATION; - - /** - * Two-letter code of this general category in the Unicode specification. - */ - public actual val code: String get() = TODO("Wasm stdlib: Text") + FINAL_QUOTE_PUNCTUATION(30, "Pf"); /** * Returns `true` if [char] character belongs to this category. */ - public actual operator fun contains(char: Char): Boolean = TODO("Wasm stdlib: Text") + public actual operator fun contains(char: Char): Boolean = char.getCategoryValue() == this.value + + public companion object { + public fun valueOf(category: Int): CharCategory = + when (category) { + in 0..16 -> values()[category] + in 18..30 -> values()[category - 1] + else -> throw IllegalArgumentException("Category #$category is not defined.") + } + + } } diff --git a/libraries/stdlib/wasm/src/kotlin/text/CharWasm.kt b/libraries/stdlib/wasm/src/kotlin/text/CharWasm.kt new file mode 100644 index 00000000000..06922997794 --- /dev/null +++ b/libraries/stdlib/wasm/src/kotlin/text/CharWasm.kt @@ -0,0 +1,9 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +internal fun Char.Companion.toCodePoint(high: Char, low: Char): Int = + (((high - MIN_HIGH_SURROGATE) shl 10) or (low - MIN_LOW_SURROGATE)) + 0x10000 diff --git a/libraries/stdlib/wasm/src/kotlin/text/CharacterCodingException.kt b/libraries/stdlib/wasm/src/kotlin/text/CharacterCodingException.kt deleted file mode 100644 index 9e3a8aae00c..00000000000 --- a/libraries/stdlib/wasm/src/kotlin/text/CharacterCodingException.kt +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright 2010-2020 JetBrains s.r.o. and Kotlin Programming Language contributors. - * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. - */ - -package kotlin.text - -/** - * The exception thrown when a character encoding or decoding error occurs. - */ -@SinceKotlin("1.3") -@ExperimentalStdlibApi -public actual open class CharacterCodingException actual constructor() : Exception() \ No newline at end of file diff --git a/libraries/stdlib/wasm/src/kotlin/text/CharacterCodingExceptionWasm.kt b/libraries/stdlib/wasm/src/kotlin/text/CharacterCodingExceptionWasm.kt new file mode 100644 index 00000000000..6087af83aa6 --- /dev/null +++ b/libraries/stdlib/wasm/src/kotlin/text/CharacterCodingExceptionWasm.kt @@ -0,0 +1,15 @@ +/* + * Copyright 2010-2019 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +/** + * The exception thrown when a character encoding or decoding error occurs. + */ +@SinceKotlin("1.4") +@WasExperimental(ExperimentalStdlibApi::class) +public actual open class CharacterCodingException(message: String?) : Exception(message) { + actual constructor() : this(null) +} \ No newline at end of file diff --git a/libraries/stdlib/wasm/src/kotlin/text/StringBuilder.kt b/libraries/stdlib/wasm/src/kotlin/text/StringBuilderWasm.kt similarity index 100% rename from libraries/stdlib/wasm/src/kotlin/text/StringBuilder.kt rename to libraries/stdlib/wasm/src/kotlin/text/StringBuilderWasm.kt diff --git a/libraries/stdlib/wasm/src/kotlin/text/StringsWasm.kt b/libraries/stdlib/wasm/src/kotlin/text/StringsWasm.kt new file mode 100644 index 00000000000..887460a9ca4 --- /dev/null +++ b/libraries/stdlib/wasm/src/kotlin/text/StringsWasm.kt @@ -0,0 +1,50 @@ +/* + * Copyright 2010-2021 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +/** + * Returns the index within this string of the first occurrence of the specified character, starting from the specified offset. + */ +internal actual fun String.nativeIndexOf(ch: Char, fromIndex: Int): Int { + for (index in fromIndex.coerceAtLeast(0)..this.lastIndex) { + if (ch == get(index)) return index + } + return -1 +} + +/** + * Returns the index within this string of the last occurrence of the specified character. + */ +internal actual fun String.nativeLastIndexOf(ch: Char, fromIndex: Int): Int { + for (index in fromIndex.coerceAtMost(this.lastIndex) downTo 0) { + if (ch == get(index)) return index + } + return -1 +} + +/** + * Returns the index within this string of the first occurrence of the specified substring, starting from the specified offset. + */ +internal actual fun String.nativeIndexOf(str: String, fromIndex: Int): Int { + for (index in fromIndex.coerceAtLeast(0)..this.length) { + if (str.regionMatchesImpl(0, this, index, str.length, false)) { + return index + } + } + return -1 +} + +/** + * Returns the index within this string of the last occurrence of the specified character, starting from the specified offset. + */ +internal actual fun String.nativeLastIndexOf(str: String, fromIndex: Int): Int { + for (index in fromIndex.coerceAtMost(this.lastIndex) downTo 0) { + if (str.regionMatchesImpl(0, this, index, str.length, false)) { + return index + } + } + return -1 +} \ No newline at end of file diff --git a/libraries/stdlib/wasm/src/kotlin/text/utf8Encoding.kt b/libraries/stdlib/wasm/src/kotlin/text/utf8Encoding.kt new file mode 100644 index 00000000000..b389955a3e0 --- /dev/null +++ b/libraries/stdlib/wasm/src/kotlin/text/utf8Encoding.kt @@ -0,0 +1,271 @@ +/* + * Copyright 2010-2019 JetBrains s.r.o. and Kotlin Programming Language contributors. + * Use of this source code is governed by the Apache 2.0 license that can be found in the license/LICENSE.txt file. + */ + +package kotlin.text + +/** Returns the negative [size] if [throwOnMalformed] is false, throws [CharacterCodingException] otherwise. */ +private fun malformed(size: Int, index: Int, throwOnMalformed: Boolean): Int { + if (throwOnMalformed) throw CharacterCodingException("Malformed sequence starting at ${index - 1}") + return -size +} + +/** + * Returns code point corresponding to UTF-16 surrogate pair, + * where the first of the pair is the [high] and the second is in the [string] at the [index]. + * Returns zero if the pair is malformed and [throwOnMalformed] is false. + * + * @throws CharacterCodingException if the pair is malformed and [throwOnMalformed] is true. + */ +private fun codePointFromSurrogate(string: String, high: Int, index: Int, endIndex: Int, throwOnMalformed: Boolean): Int { + if (high !in 0xD800..0xDBFF || index >= endIndex) { + return malformed(0, index, throwOnMalformed) + } + val low = string[index].code + if (low !in 0xDC00..0xDFFF) { + return malformed(0, index, throwOnMalformed) + } + return 0x10000 + ((high and 0x3FF) shl 10) or (low and 0x3FF) +} + +/** + * Returns code point corresponding to UTF-8 sequence of two bytes, + * where the first byte of the sequence is the [byte1] and the second byte is in the [bytes] array at the [index]. + * Returns zero if the sequence is malformed and [throwOnMalformed] is false. + * + * @throws CharacterCodingException if the sequence of two bytes is malformed and [throwOnMalformed] is true. + */ +private fun codePointFrom2(bytes: ByteArray, byte1: Int, index: Int, endIndex: Int, throwOnMalformed: Boolean): Int { + if (byte1 and 0x1E == 0 || index >= endIndex) { + return malformed(0, index, throwOnMalformed) + } + val byte2 = bytes[index].toInt() + if (byte2 and 0xC0 != 0x80) { + return malformed(0, index, throwOnMalformed) + } + return (byte1 shl 6) xor byte2 xor 0xF80 +} + +/** + * Returns code point corresponding to UTF-8 sequence of three bytes, + * where the first byte of the sequence is the [byte1] and the others are in the [bytes] array starting from the [index]. + * Returns a non-positive value indicating number of bytes from [bytes] included in malformed sequence + * if the sequence is malformed and [throwOnMalformed] is false. + * + * @throws CharacterCodingException if the sequence of three bytes is malformed and [throwOnMalformed] is true. + */ +private fun codePointFrom3(bytes: ByteArray, byte1: Int, index: Int, endIndex: Int, throwOnMalformed: Boolean): Int { + if (index >= endIndex) { + return malformed(0, index, throwOnMalformed) + } + + val byte2 = bytes[index].toInt() + if (byte1 and 0xF == 0) { + if (byte2 and 0xE0 != 0xA0) { + // Non-shortest form + return malformed(0, index, throwOnMalformed) + } + } else if (byte1 and 0xF == 0xD) { + if (byte2 and 0xE0 != 0x80) { + // Surrogate code point + return malformed(0, index, throwOnMalformed) + } + } else if (byte2 and 0xC0 != 0x80) { + return malformed(0, index, throwOnMalformed) + } + + if (index + 1 == endIndex) { + return malformed(1, index, throwOnMalformed) + } + val byte3 = bytes[index + 1].toInt() + if (byte3 and 0xC0 != 0x80) { + return malformed(1, index, throwOnMalformed) + } + + return (byte1 shl 12) xor (byte2 shl 6) xor byte3 xor -0x1E080 +} + +/** + * Returns code point corresponding to UTF-8 sequence of four bytes, + * where the first byte of the sequence is the [byte1] and the others are in the [bytes] array starting from the [index]. + * Returns a non-positive value indicating number of bytes from [bytes] included in malformed sequence + * if the sequence is malformed and [throwOnMalformed] is false. + * + * @throws CharacterCodingException if the sequence of four bytes is malformed and [throwOnMalformed] is true. + */ +private fun codePointFrom4(bytes: ByteArray, byte1: Int, index: Int, endIndex: Int, throwOnMalformed: Boolean): Int { + if (index >= endIndex) { + malformed(0, index, throwOnMalformed) + } + + val byte2 = bytes[index].toInt() + if (byte1 and 0xF == 0x0) { + if (byte2 and 0xF0 <= 0x80) { + // Non-shortest form + return malformed(0, index, throwOnMalformed) + } + } else if (byte1 and 0xF == 0x4) { + if (byte2 and 0xF0 != 0x80) { + // Out of Unicode code points domain (larger than U+10FFFF) + return malformed(0, index, throwOnMalformed) + } + } else if (byte1 and 0xF > 0x4) { + return malformed(0, index, throwOnMalformed) + } else if (byte2 and 0xC0 != 0x80) { + return malformed(0, index, throwOnMalformed) + } + + if (index + 1 == endIndex) { + return malformed(1, index, throwOnMalformed) + } + val byte3 = bytes[index + 1].toInt() + if (byte3 and 0xC0 != 0x80) { + return malformed(1, index, throwOnMalformed) + } + + if (index + 2 == endIndex) { + return malformed(2, index, throwOnMalformed) + } + val byte4 = bytes[index + 2].toInt() + if (byte4 and 0xC0 != 0x80) { + return malformed(2, index, throwOnMalformed) + } + return (byte1 shl 18) xor (byte2 shl 12) xor (byte3 shl 6) xor byte4 xor 0x381F80 +} + +/** + * Maximum number of bytes needed to encode a single char. + * + * Code points in `0..0x7F` are encoded in a single byte. + * Code points in `0x80..0x7FF` are encoded in two bytes. + * Code points in `0x800..0xD7FF` or in `0xE000..0xFFFF` are encoded in three bytes. + * Surrogate code points in `0xD800..0xDFFF` are not Unicode scalar values, therefore aren't encoded. + * Code points in `0x10000..0x10FFFF` are represented by a pair of surrogate `Char`s and are encoded in four bytes. + */ +private const val MAX_BYTES_PER_CHAR = 3 + +/** + * The byte sequence a malformed UTF-16 char sequence is replaced by. + */ +private val REPLACEMENT_BYTE_SEQUENCE: ByteArray = byteArrayOf(0xEF.toByte(), 0xBF.toByte(), 0xBD.toByte()) + +/** + * Encodes the [string] using UTF-8 and returns the resulting [ByteArray]. + * + * @param string the string to encode. + * @param startIndex the start offset (inclusive) of the substring to encode. + * @param endIndex the end offset (exclusive) of the substring to encode. + * @param throwOnMalformed whether to throw on malformed char sequence or replace by the [REPLACEMENT_BYTE_SEQUENCE]. + * + * @throws CharacterCodingException if the char sequence is malformed and [throwOnMalformed] is true. + */ +internal fun encodeUtf8(string: String, startIndex: Int, endIndex: Int, throwOnMalformed: Boolean): ByteArray { + require(startIndex >= 0 && endIndex <= string.length && startIndex <= endIndex) + + val bytes = ByteArray((endIndex - startIndex) * MAX_BYTES_PER_CHAR) + var byteIndex = 0 + var charIndex = startIndex + + while (charIndex < endIndex) { + val code = string[charIndex++].code + when { + code < 0x80 -> + bytes[byteIndex++] = code.toByte() + code < 0x800 -> { + bytes[byteIndex++] = ((code shr 6) or 0xC0).toByte() + bytes[byteIndex++] = ((code and 0x3F) or 0x80).toByte() + } + code < 0xD800 || code >= 0xE000 -> { + bytes[byteIndex++] = ((code shr 12) or 0xE0).toByte() + bytes[byteIndex++] = (((code shr 6) and 0x3F) or 0x80).toByte() + bytes[byteIndex++] = ((code and 0x3F) or 0x80).toByte() + } + else -> { // Surrogate char value + val codePoint = codePointFromSurrogate(string, code, charIndex, endIndex, throwOnMalformed) + if (codePoint <= 0) { + bytes[byteIndex++] = REPLACEMENT_BYTE_SEQUENCE[0] + bytes[byteIndex++] = REPLACEMENT_BYTE_SEQUENCE[1] + bytes[byteIndex++] = REPLACEMENT_BYTE_SEQUENCE[2] + } else { + bytes[byteIndex++] = ((codePoint shr 18) or 0xF0).toByte() + bytes[byteIndex++] = (((codePoint shr 12) and 0x3F) or 0x80).toByte() + bytes[byteIndex++] = (((codePoint shr 6) and 0x3F) or 0x80).toByte() + bytes[byteIndex++] = ((codePoint and 0x3F) or 0x80).toByte() + charIndex++ + } + } + } + } + + return if (bytes.size == byteIndex) bytes else bytes.copyOf(byteIndex) +} + +/** + * The character a malformed UTF-8 byte sequence is replaced by. + */ +private const val REPLACEMENT_CHAR = '\uFFFD' + +/** + * Decodes the UTF-8 [bytes] array and returns the resulting [String]. + * + * @param bytes the byte array to decode. + * @param startIndex the start offset (inclusive) of the array to be decoded. + * @param endIndex the end offset (exclusive) of the array to be encoded. + * @param throwOnMalformed whether to throw on malformed byte sequence or replace by the [REPLACEMENT_CHAR]. + * + * @throws CharacterCodingException if the array is malformed UTF-8 byte sequence and [throwOnMalformed] is true. + */ +internal fun decodeUtf8(bytes: ByteArray, startIndex: Int, endIndex: Int, throwOnMalformed: Boolean): String { + require(startIndex >= 0 && endIndex <= bytes.size && startIndex <= endIndex) + + var byteIndex = startIndex + val stringBuilder = StringBuilder() + + while (byteIndex < endIndex) { + val byte = bytes[byteIndex++].toInt() + when { + byte >= 0 -> + stringBuilder.append(byte.toChar()) + byte shr 5 == -2 -> { + val code = codePointFrom2(bytes, byte, byteIndex, endIndex, throwOnMalformed) + if (code <= 0) { + stringBuilder.append(REPLACEMENT_CHAR) + byteIndex += -code + } else { + stringBuilder.append(code.toChar()) + byteIndex += 1 + } + } + byte shr 4 == -2 -> { + val code = codePointFrom3(bytes, byte, byteIndex, endIndex, throwOnMalformed) + if (code <= 0) { + stringBuilder.append(REPLACEMENT_CHAR) + byteIndex += -code + } else { + stringBuilder.append(code.toChar()) + byteIndex += 2 + } + } + byte shr 3 == -2 -> { + val code = codePointFrom4(bytes, byte, byteIndex, endIndex, throwOnMalformed) + if (code <= 0) { + stringBuilder.append(REPLACEMENT_CHAR) + byteIndex += -code + } else { + val high = (code - 0x10000) shr 10 or 0xD800 + val low = (code and 0x3FF) or 0xDC00 + stringBuilder.append(high.toChar()) + stringBuilder.append(low.toChar()) + byteIndex += 3 + } + } + else -> { + malformed(0, byteIndex, throwOnMalformed) + stringBuilder.append(REPLACEMENT_CHAR) + } + } + } + + return stringBuilder.toString() +} \ No newline at end of file diff --git a/libraries/stdlib/wasm/test/stringEncoding.kt b/libraries/stdlib/wasm/test/stringEncoding.kt index ab5a16b37bd..5065df1ca36 100644 --- a/libraries/stdlib/wasm/test/stringEncoding.kt +++ b/libraries/stdlib/wasm/test/stringEncoding.kt @@ -5,7 +5,6 @@ package test.text -// TODO: Fix this once we implement kotlin.text -internal actual val surrogateCodePointDecoding: String = "�" +internal actual val surrogateCodePointDecoding: String = "���" -internal actual val surrogateCharEncoding: ByteArray = byteArrayOf(0x3F) +internal actual val surrogateCharEncoding: ByteArray = byteArrayOf(0xEF.toByte(), 0xBF.toByte(), 0xBD.toByte()) \ No newline at end of file diff --git a/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/GenerateUnicodeData.kt b/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/GenerateUnicodeData.kt index 2af9f68ce87..6edefa097de 100644 --- a/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/GenerateUnicodeData.kt +++ b/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/GenerateUnicodeData.kt @@ -100,10 +100,10 @@ fun main(args: Array) { oneToManyMappingsGenerators.add(lowercase) } - val categoryTestGenerator: CharCategoryTestGenerator + val stringUppercaseGenerators = mutableListOf() + val stringLowercaseGenerators = mutableListOf() - val stringUppercaseGenerator: StringUppercaseGenerator - val stringLowercaseGenerator: StringLowercaseGenerator + val categoryTestGenerator: CharCategoryTestGenerator val stringCasingTestGenerator: StringCasingTestGenerator @@ -115,7 +115,9 @@ fun main(args: Array) { categoryTestGenerator = CharCategoryTestGenerator(categoryTestFile) val commonGeneratedDir = baseDir.resolve("libraries/stdlib/common/src/generated") - oneToManyMappingsGenerators.add(OneToManyMappingsGenerator.forTitlecase(commonGeneratedDir.resolve("_OneToManyTitlecaseMappings.kt"), bmpUnicodeDataLines)) + oneToManyMappingsGenerators.add( + OneToManyMappingsGenerator.forTitlecase(commonGeneratedDir.resolve("_OneToManyTitlecaseMappings.kt"), bmpUnicodeDataLines) + ) val jsGeneratedDir = baseDir.resolve("libraries/stdlib/js/src/generated/") addRangesGenerators(jsGeneratedDir, KotlinTarget.JS) @@ -129,8 +131,19 @@ fun main(args: Array) { addRangesGenerators(nativeGeneratedDir, KotlinTarget.Native) addOneToOneMappingsGenerators(nativeGeneratedDir, KotlinTarget.Native) addOneToManyMappingsGenerators(nativeGeneratedDir, KotlinTarget.Native) - stringUppercaseGenerator = StringUppercaseGenerator(nativeGeneratedDir.resolve("_StringUppercase.kt"), unicodeDataLines) - stringLowercaseGenerator = StringLowercaseGenerator(nativeGeneratedDir.resolve("_StringLowercase.kt"), unicodeDataLines) + stringUppercaseGenerators.add(StringUppercaseGenerator(nativeGeneratedDir.resolve("_StringUppercase.kt"), unicodeDataLines)) + stringLowercaseGenerators.add( + StringLowercaseGenerator(nativeGeneratedDir.resolve("_StringLowercase.kt"), unicodeDataLines, KotlinTarget.Native) + ) + + val wasmGeneratedDir = baseDir.resolve("libraries/stdlib/wasm/src/generated/") + addRangesGenerators(wasmGeneratedDir, KotlinTarget.WASM) + addOneToOneMappingsGenerators(wasmGeneratedDir, KotlinTarget.WASM) + addOneToManyMappingsGenerators(wasmGeneratedDir, KotlinTarget.WASM) + stringUppercaseGenerators.add(StringUppercaseGenerator(wasmGeneratedDir.resolve("_StringUppercase.kt"), unicodeDataLines)) + stringLowercaseGenerators.add( + StringLowercaseGenerator(wasmGeneratedDir.resolve("_StringLowercase.kt"), unicodeDataLines, KotlinTarget.WASM) + ) val nativeTestDir = baseDir.resolve("kotlin-native/backend.native/tests/stdlib_external/text") stringCasingTestGenerator = StringCasingTestGenerator(nativeTestDir) @@ -186,15 +199,17 @@ fun main(args: Array) { it.generate() } - stringUppercaseGenerator.let { + stringUppercaseGenerators.forEach { specialCasingLines.forEach { line -> it.appendSpecialCasingLine(line) } it.generate() } - stringLowercaseGenerator.let { + + stringLowercaseGenerators.forEach { specialCasingLines.forEach { line -> it.appendSpecialCasingLine(line) } wordBreakPropertyLines.forEach { line -> it.appendWordBreakPropertyLine(line) } it.generate() } + stringCasingTestGenerator.let { derivedCorePropertiesLines.forEach { line -> it.appendDerivedCorePropertiesLine(line) } it.generate() diff --git a/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/mappings/string/StringLowercaseGenerator.kt b/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/mappings/string/StringLowercaseGenerator.kt index 853c7670d40..e4e590e4e0f 100644 --- a/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/mappings/string/StringLowercaseGenerator.kt +++ b/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/mappings/string/StringLowercaseGenerator.kt @@ -18,6 +18,7 @@ import java.io.FileWriter internal class StringLowercaseGenerator( private val outputFile: File, unicodeDataLines: List, + private val target: KotlinTarget, ) : StringCasingGenerator(unicodeDataLines) { private val casedRanges = mutableListOf() @@ -60,7 +61,7 @@ internal class StringLowercaseGenerator( casedRanges.sortBy { it.first } caseIgnorableRanges.sortBy { it.first } - val strategy = RangesWritingStrategy.of(KotlinTarget.Native) + val strategy = RangesWritingStrategy.of(target) FileWriter(outputFile).use { writer -> writer.writeHeader(outputFile, "kotlin.text") diff --git a/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/ranges/RangesWritingStrategy.kt b/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/ranges/RangesWritingStrategy.kt index 5ab0f0f8f73..181b3969097 100644 --- a/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/ranges/RangesWritingStrategy.kt +++ b/libraries/tools/kotlin-stdlib-gen/src/generators/unicode/ranges/RangesWritingStrategy.kt @@ -5,6 +5,7 @@ package generators.unicode.ranges +import templates.Backend import templates.KotlinTarget import templates.Platform import java.io.FileWriter @@ -21,15 +22,15 @@ internal sealed class RangesWritingStrategy { fun of(target: KotlinTarget, wrapperName: String? = null): RangesWritingStrategy { return when (target.platform) { Platform.JS -> JsRangesWritingStrategy(wrapperName!!) - else -> NativeRangesWritingStrategy + else -> NativeRangesWritingStrategy(useNativeRangesAnnotation = target.backend != Backend.Wasm) } } } } -internal object NativeRangesWritingStrategy : RangesWritingStrategy() { +internal class NativeRangesWritingStrategy(private val useNativeRangesAnnotation: Boolean) : RangesWritingStrategy() { override val indentation: String get() = "" - override val rangesAnnotation: String get() = "@SharedImmutable\n" + override val rangesAnnotation: String get() = if (useNativeRangesAnnotation) "@SharedImmutable\n" else "" override val rangesVisibilityModifier: String get() = "private" override fun beforeWritingRanges(writer: FileWriter) {} override fun afterWritingRanges(writer: FileWriter) {}