/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package test.text.harmony_regex import kotlin.text.* import kotlin.test.* class PatternTest2 { fun assertTrue(msg: String, value: Boolean) = assertTrue(value, msg) fun assertFalse(msg: String, value: Boolean) = assertFalse(value, msg) /** * Tests simple pattern compilation and matching methods */ @Test fun testSimpleMatch() { val regex = Regex("foo.*") var testString = "foo123" assertTrue(regex.matches(testString)) assertTrue(regex in testString) assertTrue(regex.find(testString) != null) testString = "fox" assertFalse(regex.matches(testString)) assertFalse(regex in testString) assertFalse(regex.find(testString) != null) assertTrue(Regex("foo.*").matches("foo123")) assertFalse(Regex("foo.*").matches("fox")) assertFalse(Regex("bar").matches("foobar")) assertTrue(Regex("").matches("")) } @Test fun testCursors() { val regex: Regex var result: MatchResult? try { regex = Regex("foo") result = regex.find("foobar") assertNotNull(result) assertEquals(0, result!!.range.start) assertEquals(3, result.range.endInclusive + 1) assertNull(result.next()) result = regex.find("barfoobar") assertNotNull(result) assertEquals(3, result!!.range.start) assertEquals(6, result.range.endInclusive + 1) assertNull(result.next()) result = regex.find("barfoo") assertNotNull(result) assertEquals(3, result!!.range.start) assertEquals(6, result.range.endInclusive + 1) assertNull(result.next()) result = regex.find("foobarfoobarfoo") assertNotNull(result) assertEquals(0, result!!.range.start) assertEquals(3, result.range.endInclusive + 1) result = result.next() assertNotNull(result) assertEquals(6, result!!.range.start) assertEquals(9, result.range.endInclusive + 1) result = result.next() assertNotNull(result) assertEquals(12, result!!.range.start) assertEquals(15, result.range.endInclusive + 1) assertNull(result.next()) result = regex.find("foobarfoobarfoo", 0) assertNotNull(result) assertEquals(0, result!!.range.start) assertEquals(3, result.range.endInclusive + 1) result = regex.find("foobarfoobarfoo", 4) assertNotNull(result) assertEquals(6, result!!.range.start) assertEquals(9, result.range.endInclusive + 1) } catch (e: IllegalArgumentException) { println(e.message) fail() } } @Test fun testGroups() { val regex: Regex var result: MatchResult? regex = Regex("(p[0-9]*)#?(q[0-9]*)") result = regex.find("p1#q3p2q42p5p71p63#q888") assertNotNull(result) assertEquals(0, result!!.range.start) assertEquals(5, result.range.endInclusive + 1) assertEquals(3, result.groups.size) assertEquals(0, result.groups[0]!!.range.start) assertEquals(5, result.groups[0]!!.range.endInclusive + 1) assertEquals(0, result.groups[1]!!.range.start) assertEquals(2, result.groups[1]!!.range.endInclusive + 1) assertEquals(3, result.groups[2]!!.range.start) assertEquals(5, result.groups[2]!!.range.endInclusive + 1) assertEquals("p1#q3", result.value) assertEquals("p1#q3", result.groupValues[0]) assertEquals("p1", result.groupValues[1]) assertEquals("q3", result.groupValues[2]) result = result.next() assertNotNull(result) assertEquals(5, result!!.range.start) assertEquals(10, result.range.endInclusive + 1) assertEquals(3, result.groups.size) assertEquals(10, result.groups[0]!!.range.endInclusive + 1) assertEquals(5, result.groups[1]!!.range.start) assertEquals(7, result.groups[1]!!.range.endInclusive + 1) assertEquals(7, result.groups[2]!!.range.start) assertEquals(10, result.groups[2]!!.range.endInclusive + 1) assertEquals("p2q42", result.value) assertEquals("p2q42", result.groupValues[0]) assertEquals("p2", result.groupValues[1]) assertEquals("q42", result.groupValues[2]) result = result.next() assertNotNull(result) assertEquals(15, result!!.range.start) assertEquals(23, result.range.endInclusive + 1) assertEquals(3, result.groups.size) assertEquals(15, result.groups[0]!!.range.start) assertEquals(23, result.groups[0]!!.range.endInclusive + 1) assertEquals(15, result.groups[1]!!.range.start) assertEquals(18, result.groups[1]!!.range.endInclusive + 1) assertEquals(19, result.groups[2]!!.range.start) assertEquals(23, result.groups[2]!!.range.endInclusive + 1) assertEquals("p63#q888", result.value) assertEquals("p63#q888", result.groupValues[0]) assertEquals("p63", result.groupValues[1]) assertEquals("q888", result.groupValues[2]) assertNull(result.next()) } @Test fun testReplace() { var regex: Regex // Note: examples from book, // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171 regex = Regex("a*b") var testString = "aabfooaabfooabfoob" assertTrue(regex.replace(testString, "-") == "-foo-foo-foo-") assertTrue(regex.replaceFirst(testString, "-") == "-fooaabfooabfoob") regex = Regex("([bB])yte") testString = "Byte for byte" assertTrue(regex.replaceFirst(testString, "$1ite") == "Bite for byte") assertTrue(regex.replace(testString, "$1ite") == "Bite for bite") regex = Regex("\\d\\d\\d\\d([- ])") testString = "card #1234-5678-1234" assertTrue(regex.replaceFirst(testString, "xxxx$1") == "card #xxxx-5678-1234") assertTrue(regex.replace(testString, "xxxx$1") == "card #xxxx-xxxx-1234") regex = Regex("(up|left)( *)(right|down)") testString = "left right, up down" assertTrue(regex.replaceFirst(testString, "$3$2$1") == "right left, up down") assertTrue(regex.replace(testString, "$3$2$1") == "right left, down up") regex = Regex("([CcPp][hl]e[ea]se)") testString = "I want cheese. Please." assertTrue(regex.replaceFirst(testString, " $1 ") == "I want cheese . Please.") assertTrue(regex.replace(testString, " $1 ") == "I want cheese . Please .") } @Test fun testEscapes() { var regex: Regex var result: MatchResult? // Test \\ sequence regex = Regex("([a-z]+)\\\\([a-z]+);") result = regex.find("fred\\ginger;abbott\\costello;jekell\\hyde;") assertNotNull(result) assertEquals("fred", result!!.groupValues[1]) assertEquals("ginger", result.groupValues[2]) result = result.next() assertNotNull(result) assertEquals("abbott", result!!.groupValues[1]) assertEquals("costello", result.groupValues[2]) result = result.next() assertNotNull(result) assertEquals("jekell", result!!.groupValues[1]) assertEquals("hyde", result.groupValues[2]) assertNull(result.next()) // Test \n, \t, \r, \f, \e, \a sequences regex = Regex("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)") result = regex.find("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh") assertNotNull(result) assertEquals("aa", result!!.groupValues[1]) assertEquals("bb", result.groupValues[2]) result = result.next() assertNotNull(result) assertEquals("cc", result!!.groupValues[1]) assertEquals("dd", result.groupValues[2]) result = result.next() assertNotNull(result) assertEquals("ee", result!!.groupValues[1]) assertEquals("ff", result.groupValues[2]) result = result.next() assertNotNull(result) assertEquals("gg", result!!.groupValues[1]) assertEquals("hh", result.groupValues[2]) assertNull(result.next()) // Test \\u and \\x sequences regex = Regex("([0-9]+)[\\u0020:\\x21];") result = regex.find("11:;22 ;33-;44!;") assertNotNull(result) assertEquals("11", result!!.groupValues[1]) result = result.next() assertNotNull(result) assertEquals("22", result!!.groupValues[1]) result = result.next() assertNotNull(result) assertEquals("44", result!!.groupValues[1]) assertNull(result.next()) // Test invalid unicode sequences // TODO: Double check it. try { regex = Regex("\\u") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\u;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\u002") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\u002;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } // Test invalid hex sequences try { regex = Regex("\\x") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\x;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\xa") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\xa;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } // Test \0 (octal) sequences (1, 2 and 3 digit) regex = Regex("([0-9]+)[\\07\\040\\0160];") result = regex.find("11\u0007;22:;33 ;44p;") assertNotNull(result) assertEquals("11", result!!.groupValues[1]) result = result.next() assertNotNull(result) assertEquals("33", result!!.groupValues[1]) result = result.next() assertNotNull(result) assertEquals("44", result!!.groupValues[1]) assertNull(result.next()) // Test invalid octal sequences try { regex = Regex("\\08") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\0") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\0;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } // Test \c (control character) sequence regex = Regex("([0-9]+)[\\cA\\cB\\cC\\cD];") result = regex.find("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;") assertNotNull(result) assertEquals("11", result!!.groupValues[1]) result = result.next() assertNotNull(result) assertEquals("33", result!!.groupValues[1]) result = result.next() assertNotNull(result) assertEquals("55", result!!.groupValues[1]) result = result.next() assertNotNull(result) assertEquals("66", result!!.groupValues[1]) assertNull(result.next()) // More thorough control escape test // Ensure that each escape matches exactly the corresponding // character // code and no others (well, from 0-255 at least) for (i in 0..25) { regex = Regex("\\c${'A' + i}") var match_char = -1 for (j in 0..255) { if (regex.matches("${j.toChar()}")) { assertEquals(-1, match_char) match_char = j } } assertTrue(match_char == i + 1) } // Test invalid control escapes try { regex = Regex("\\c") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } } @Test fun testCharacterClasses() { var regex: Regex // Test one character range regex = Regex("[p].*[l]") assertTrue(regex.matches("paul")) assertTrue(regex.matches("pool")) assertFalse(regex.matches("pong")) assertTrue(regex.matches("pl")) // Test two character range regex = Regex("[pm].*[lp]") assertTrue(regex.matches("prop")) assertTrue(regex.matches("mall")) assertFalse(regex.matches("pong")) assertTrue(regex.matches("pill")) // Test range including [ and ] regex = Regex("[<\\[].*[\\]>]") assertTrue(regex.matches("")) assertTrue(regex.matches("[bar]")) assertFalse(regex.matches("{foobar]")) assertTrue(regex.matches("") assertTrue(regex.matches("")) assertFalse(regex.matches("")) val result = regex.find("xyz zzz") assertNotNull(result) assertNotNull(result!!.next()) assertNull(result.next()!!.next()) // Test \S (not whitespace) // TODO: We've removed \f from string since kotlin doesn't recognize this escape in a string. regex = Regex("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>") assertTrue(regex.matches("")) assertTrue(regex.matches("")) assertFalse(regex.matches("")) assertTrue(regex.matches("")) regex = Regex("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>") assertTrue(regex.matches("")) assertTrue(regex.matches("")) assertFalse(regex.matches("")) assertFalse(regex.matches("")) assertFalse(regex.matches("")) assertTrue(regex.matches("")) // Test \w (ascii word) regex = Regex("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;") assertTrue(regex.matches(""); * m = p.matcher(""); assertTrue(m.matches()); m = p.matcher(""); * assertTrue(m.matches()); m = p.matcher(""); * assertFalse(m.matches()); */ regex = Regex("\\p{Lower}+") assertTrue(regex.matches("abcdefghijklmnopqrstuvwxyz")) // Invalid uses of \p{Lower} try { regex = Regex("\\p") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\p;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\p{") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\p{;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\p{Lower") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\p{Lower;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } // Test \p{Upper} /* * FIXME: Requires complex range processing p = Regex("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>"); * m = p.matcher(""); assertTrue(m.matches()); m = p.matcher(""); * assertTrue(m.matches()); m = p.matcher(""); * assertFalse(m.matches()); */ regex = Regex("\\p{Upper}+") assertTrue(regex.matches("ABCDEFGHIJKLMNOPQRSTUVWXYZ")) // Invalid uses of \p{Upper} try { regex = Regex("\\p{Upper") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\p{Upper;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } // Test \p{ASCII} /* * FIXME: Requires complex range processing p = Regex("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>"); * m = p.matcher(""); assertTrue(m.matches()); m = * p.matcher(""); assertTrue(m.matches()); m = * p.matcher("<\u00846#:E E>"); assertFalse(m.matches()) */ regex = Regex("\\p{ASCII}") for (i in 0 until 0x80) { assertTrue(regex.matches("${i.toChar()}")) } for (i in 0x80..0xff) { assertFalse(regex.matches("${i.toChar()}")) } // Invalid uses of \p{ASCII} try { regex = Regex("\\p{ASCII") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } try { regex = Regex("\\p{ASCII;") fail("IllegalArgumentException expected") } catch (e: IllegalArgumentException) { } // Test \p{Alpha} // TODO // Test \p{Digit} // TODO // Test \p{XDigit} // TODO // Test \p{Alnum} // TODO // Test \p{Punct} // TODO // Test \p{Graph} // TODO // Test \p{Print} // TODO // Test \p{Blank} // TODO // Test \p{Space} // TODO // Test \p{Cntrl} // TODO } @Test fun testUnicodeCategories() { // Test Unicode categories using \p and \P // One letter codes: L, M, N, P, S, Z, C // Two letter codes: Lu, Nd, Sc, Sm, ... // See java.lang.Character and Unicode standard for complete list // TODO // Test \p{L} // TODO // Test \p{N} // TODO // ... etc // Test two letter codes: // From unicode.org: // Lu // Ll // Lt // Lm // Lo // Mn // Mc // Me // Nd // Nl // No // Pc // Pd // Ps // Pe // Pi // Pf // Po // Sm // Sc // Sk // So // Zs // Zl // Zp // Cc // Cf // Cs // Co // Cn } @Test fun testUnicodeBlocks() { var regex: Regex // Test Unicode blocks using \p and \P for (block in UBlocks) { regex = Regex("\\p{In" + block.name + "}") if (block.low > 0) { assertFalse(regex.matches((block.low - 1).toChar().toString())) } for (i in block.low..block.high) { assertTrue(regex.matches(i.toChar().toString())) } if (block.high < 0xFFFF) { assertFalse(regex.matches((block.high + 1).toChar().toString())) } regex = Regex("\\P{In" + block.name + "}") if (block.low > 0) { assertTrue(regex.matches((block.low - 1).toChar().toString())) } for (i in block.low..block.high) { assertFalse("assert: Regex: $regex, match to: ${i.toChar()} ($i)", regex.matches(i.toChar().toString())) } if (block.high < 0xFFFF) { assertTrue(regex.matches((block.high + 1).toChar().toString())) } } } @Test fun testCapturingGroups() { // Test simple capturing groups // TODO // Test grouping without capture (?:...) // TODO // Test combination of grouping and capture // TODO // Test \ sequence with capturing and non-capturing groups // TODO // Test \ with out of range // TODO } @Test fun testRepeats() { // Test ? // TODO // Test * // TODO // Test + // TODO // Test {}, including 0, 1 and more // TODO // Test {,}, including 0, 1 and more // TODO // Test {,}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?) // TODO } @Test fun testAnchors() { // Test ^, default and MULTILINE // TODO // Test $, default and MULTILINE // TODO // Test \b (word boundary) // TODO // Test \B (not a word boundary) // TODO // Test \A (beginning of string) // TODO // Test \Z (end of string) // TODO // Test \z (end of string) // TODO // Test \G // TODO // Test positive lookahead using (?=...) // TODO // Test negative lookahead using (?!...) // TODO // Test positive lookbehind using (?<=...) // TODO // Test negative lookbehind using (?...) // TODO // Test (?onflags-offflags) // Valid flags are i,m,d,s,u,x // TODO // Test (?onflags-offflags:...) // TODO // Test \Q, \E regex = Regex("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+") assertTrue(regex.matches("abc;[a-z]+;\\Q(foo.*);411")) assertFalse(regex.matches("abc;def;foo42;555")) assertFalse(regex.matches("abc;\\Qdef;\\Qfoo99;\\E123")) regex = Regex("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+") val result = regex.matchEntire("abc;foo5-(...);123") assertNotNull(result) assertEquals("foo5-(...)", result!!.groupValues[1]) assertFalse(regex.matches("abc;foo9-(xxx);789")) regex = Regex("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+") assertTrue(regex.matches("abc;bar0-def$-;123")) regex = Regex("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+") assertTrue(regex.matches("abc;bar0-def$-;123")) regex = Regex("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+") assertTrue(regex.matches("abc;bar0-def[99]-]0x[;123")); regex = Regex("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+") assertTrue(regex.matches("abc;bar0-def[99]-]0x[;123")) // Test # // TODO } @Test fun testCompile1() { val regex = Regex("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*") val name = "iso-8859-1" assertTrue(regex.matches(name)) } @Test fun testCompile2() { val findString = "\\Qimport\\E" val regex = Regex(findString) assertTrue(regex in "import a.A;\n\n import b.B;\nclass C {}") } @Test fun testCompile3() { var regex: Regex var result: MatchResult? regex = Regex("a$") result = regex.find("a\n") assertNotNull(result) assertEquals("a", result!!.value) assertNull(result.next()) regex = Regex("(a$)") result = regex.find("a\n") assertNotNull(result) assertEquals("a", result!!.value) assertEquals("a", result.groupValues[1]) assertNull(result.next()) regex = Regex("^.*$", RegexOption.MULTILINE) result = regex.find("a\n") assertNotNull(result) assertEquals("a", result!!.value) assertNull(result.next()) result = regex.find("a\nb\n") assertNotNull(result) assertEquals("a", result!!.value) result = result.next() assertNotNull(result) assertEquals("b", result!!.value) assertNull(result.next()) result = regex.find("a\nb") assertNotNull(result) assertEquals("a", result!!.value) result = result.next() assertNotNull(result) assertEquals("b", result!!.value) assertNull(result.next()) result = regex.find("\naa\r\nbb\rcc\n\n") assertNotNull(result) assertTrue(result!!.value == "") result = result.next() assertNotNull(result) assertEquals("aa", result!!.value) result = result.next() assertNotNull(result) assertEquals("bb", result!!.value) result = result.next() assertNotNull(result) assertEquals("cc", result!!.value) result = result.next() assertNotNull(result) assertTrue(result!!.value == "") assertNull(result.next()) result = regex.find("a") assertNotNull(result) assertEquals("a", result!!.value) assertNull(result.next()) result = regex.find("") assertNull(result) regex = Regex("^.*$") result = regex.find("") assertNotNull(result) assertTrue(result!!.value == "") assertNull(result.next()) } @Test fun testCompile4() { val findString = "\\Qpublic\\E" val text = StringBuilder(" public class Class {\n" + " public class Class {") val regex = Regex(findString) val result = regex.find(text) assertNotNull(result) assertEquals(4, result!!.range.start) // modify text text.setLength(0) text.append("Text have been changed.") assertNull(regex.find(text)) } @Test fun testCompile5() { val p = Regex("^[0-9]") val s = p.split("12", 0) assertEquals("", s[0]) assertEquals("2", s[1]) assertEquals(2, s.size) } private class UBInfo(var low: Int, var high: Int, var name: String) // A table representing the unicode categories // private static UBInfo[] UCategories = { // Lu // Ll // Lt // Lm // Lo // Mn // Mc // Me // Nd // Nl // No // Pc // Pd // Ps // Pe // Pi // Pf // Po // Sm // Sc // Sk // So // Zs // Zl // Zp // Cc // Cf // Cs // Co // Cn // }; // A table representing the unicode character blocks private val UBlocks = arrayOf( /* 0000; 007F; Basic Latin */ UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN /* 0080; 00FF; Latin-1 Supplement */ UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT /* 0100; 017F; Latin Extended-A */ UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A /* 0180; 024F; Latin Extended-B */ // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), // // Character.UnicodeBlock.LATIN_EXTENDED_B /* 0250; 02AF; IPA Extensions */ UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS /* 02B0; 02FF; Spacing Modifier Letters */ UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS /* 0300; 036F; Combining Diacritical Marks */ UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS /* 0370; 03FF; Greek */ UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK /* 0400; 04FF; Cyrillic */ UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC /* 0530; 058F; Armenian */ UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN /* 0590; 05FF; Hebrew */ UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW /* 0600; 06FF; Arabic */ UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC /* 0700; 074F; Syriac */ UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC /* 0780; 07BF; Thaana */ UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA /* 0900; 097F; Devanagari */ UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI /* 0980; 09FF; Bengali */ UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI /* 0A00; 0A7F; Gurmukhi */ UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI /* 0A80; 0AFF; Gujarati */ UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI /* 0B00; 0B7F; Oriya */ UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA /* 0B80; 0BFF; Tamil */ UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL /* 0C00; 0C7F; Telugu */ UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU /* 0C80; 0CFF; Kannada */ UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA /* 0D00; 0D7F; Malayalam */ UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM /* 0D80; 0DFF; Sinhala */ UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA /* 0E00; 0E7F; Thai */ UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI /* 0E80; 0EFF; Lao */ UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO /* 0F00; 0FFF; Tibetan */ UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN /* 1000; 109F; Myanmar */ UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR /* 10A0; 10FF; Georgian */ UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN /* 1100; 11FF; Hangul Jamo */ UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO /* 1200; 137F; Ethiopic */ UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC /* 13A0; 13FF; Cherokee */ UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE /* 1400; 167F; Unified Canadian Aboriginal Syllabics */ UBInfo(0x1400, 0x167F, "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS /* 1680; 169F; Ogham */ UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM /* 16A0; 16FF; Runic */ UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC /* 1780; 17FF; Khmer */ UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER /* 1800; 18AF; Mongolian */ UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN /* 1E00; 1EFF; Latin Extended Additional */ UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL /* 1F00; 1FFF; Greek Extended */ UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED /* 2000; 206F; General Punctuation */ UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION /* 2070; 209F; Superscripts and Subscripts */ UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS /* 20A0; 20CF; Currency Symbols */ UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS /* 20D0; 20FF; Combining Marks for Symbols */ UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS /* 2100; 214F; Letterlike Symbols */ UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS /* 2150; 218F; Number Forms */ UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS /* 2190; 21FF; Arrows */ UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS /* 2200; 22FF; Mathematical Operators */ UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS /* 2300; 23FF; Miscellaneous Technical */ UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL /* 2400; 243F; Control Pictures */ UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES /* 2440; 245F; Optical Character Recognition */ UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION /* 2460; 24FF; Enclosed Alphanumerics */ UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS /* 2500; 257F; Box Drawing */ UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING /* 2580; 259F; Block Elements */ UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS /* 25A0; 25FF; Geometric Shapes */ UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES /* 2600; 26FF; Miscellaneous Symbols */ UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS /* 2700; 27BF; Dingbats */ UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS /* 2800; 28FF; Braille Patterns */ UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS /* 2E80; 2EFF; CJK Radicals Supplement */ UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT /* 2F00; 2FDF; Kangxi Radicals */ UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS /* 2FF0; 2FFF; Ideographic Description Characters */ UBInfo(0x2FF0, 0x2FFF, "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS /* 3000; 303F; CJK Symbols and Punctuation */ UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION /* 3040; 309F; Hiragana */ UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA /* 30A0; 30FF; Katakana */ UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA /* 3100; 312F; Bopomofo */ UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO /* 3130; 318F; Hangul Compatibility Jamo */ UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO /* 3190; 319F; Kanbun */ UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN /* 31A0; 31BF; Bopomofo Extended */ UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED /* 3200; 32FF; Enclosed CJK Letters and Months */ UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS /* 3300; 33FF; CJK Compatibility */ UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY /* 3400; 4DB5; CJK Unified Ideographs Extension A */ UBInfo(0x3400, 0x4DB5, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A /* 4E00; 9FFF; CJK Unified Ideographs */ UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS /* A000; A48F; Yi Syllables */ UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES /* A490; A4CF; Yi Radicals */ UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS /* AC00; D7A3; Hangul Syllables */ UBInfo(0xAC00, 0xD7A3, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES /* D800; DB7F; High Surrogates */ /* DB80; DBFF; High Private Use Surrogates */ /* DC00; DFFF; Low Surrogates */ /* E000; F8FF; Private Use */ /* F900; FAFF; CJK Compatibility Ideographs */ UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS /* FB00; FB4F; Alphabetic Presentation Forms */ UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS /* FB50; FDFF; Arabic Presentation Forms-A */ UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A /* FE20; FE2F; Combining Half Marks */ UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS /* FE30; FE4F; CJK Compatibility Forms */ UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS /* FE50; FE6F; Small Form Variants */ UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS /* FE70; FEFE; Arabic Presentation Forms-B */ // new UBInfo (0xFE70,0xFEFE,"InArabicPresentationForms-B"), // // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B /* FEFF; FEFF; Specials */ UBInfo(0xFEFF, 0xFEFF, "Specials"), // Character.UnicodeBlock.SPECIALS /* FF00; FFEF; Halfwidth and Fullwidth Forms */ UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS /* FFF0; FFFD; Specials */ UBInfo(0xFFF0, 0xFFFD, "Specials") // Character.UnicodeBlock.SPECIALS ) }