Minor, simplify utfEncoding, add option to force using 8to7 encoding

2016-01-11 18:38:19 +03:00
parent fe15984a59
commit 5585c7da7f
2 changed files with 12 additions and 14 deletions
@@ -21,8 +21,10 @@ import org.jetbrains.annotations.NotNull;
 import java.util.ArrayList;
 import java.util.List;

+import static org.jetbrains.kotlin.serialization.jvm.UtfEncodingKt.MAX_UTF8_INFO_LENGTH;
+
 public class BitEncoding {
-    private static boolean NEW = true;
+    private static final boolean FORCE_8TO7_ENCODING = "true".equals(System.getProperty("kotlin.jvm.serialization.use8to7"));

    private BitEncoding() {
    }
@@ -36,9 +38,9 @@ public class BitEncoding {
     */
    @NotNull
    public static String[] encodeBytes(@NotNull byte[] data) {
-        if (NEW) {
-            List<String> strings = UtfEncodingKt.bytesToStrings(data);
-            return strings.toArray(new String[strings.size()]);
+        // TODO: try both encodings here and choose the best one (with the smallest size)
+        if (!FORCE_8TO7_ENCODING) {
+            return UtfEncodingKt.bytesToStrings(data);
        }
        byte[] bytes = encode8to7(data);
        // Since 0x0 byte is encoded as two bytes in the Modified UTF-8 (0xc0 0x80) and zero is rather common to byte arrays, we increment
@@ -111,9 +113,6 @@ public class BitEncoding {
        }
    }

-    // The maximum possible length of the byte array in the CONSTANT_Utf8_info structure in the bytecode, as per JVMS7 4.4.7
-    private static final int MAX_UTF8_INFO_LENGTH = 65535;
-
    /**
     * Converts a big byte array into the array of strings, where each string, when written to the constant pool table in bytecode, produces
     * a byte array of not more than MAX_UTF8_INFO_LENGTH. Each byte, except those which are 0x0, occupies exactly one byte in the constant
@@ -163,7 +162,7 @@ public class BitEncoding {
     */
    @NotNull
    public static byte[] decodeBytes(@NotNull String[] data) {
-        if (NEW) {
+        if (!FORCE_8TO7_ENCODING) {
            return UtfEncodingKt.stringsToBytes(data);
        }
        byte[] bytes = combineStringArrayIntoBytes(data);
@@ -19,18 +19,17 @@ package org.jetbrains.kotlin.serialization.jvm
 import java.util.*

 // The maximum possible length of the byte array in the CONSTANT_Utf8_info structure in the bytecode, as per JVMS7 4.4.7
-private val MAX_UTF8_INFO_LENGTH = 65535
+const val MAX_UTF8_INFO_LENGTH = 65535

 // Leading bytes are prefixed with 110 in UTF-8
 private val LEADING_BYTE_MASK = 0b11000000
 // Continuation bytes are prefixed with 10 in UTF-8
 private val CONTINUATION_BYTE_MASK = 0b10000000

-private val TWO_HIGHER_BITS_MASK = 0b11000000
 private val TWO_LOWER_BITS_MASK = 0b00000011
 private val SIX_LOWER_BITS_MASK = 0b00111111

-fun bytesToStrings(bytes: ByteArray): List<String> {
+fun bytesToStrings(bytes: ByteArray): Array<String> {
    val result = ArrayList<String>(1)
    val buffer = StringBuilder()
    var bytesInBuffer = 0
@@ -42,7 +41,7 @@ fun bytesToStrings(bytes: ByteArray): List<String> {
        }
        else {
            val int = b.toInt() and 0xFF
-            val leadingByte = LEADING_BYTE_MASK or ((int and TWO_HIGHER_BITS_MASK) shr 6)
+            val leadingByte = LEADING_BYTE_MASK or (int shr 6)
            val continuationByte = CONTINUATION_BYTE_MASK or (int and SIX_LOWER_BITS_MASK)
            val encodedByte = (leadingByte shl 8) or continuationByte

@@ -68,7 +67,7 @@ fun bytesToStrings(bytes: ByteArray): List<String> {
        result.add(buffer.toString())
    }

-    return result
+    return result.toTypedArray()
 }

 fun stringsToBytes(strings: Array<String>): ByteArray {
@@ -95,4 +94,4 @@ fun stringsToBytes(strings: Array<String>): ByteArray {
    }

    return result
-}
+}