diff --git a/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/BitEncoding.java b/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/BitEncoding.java index d1e545bf744..1d9eaf5a386 100644 --- a/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/BitEncoding.java +++ b/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/BitEncoding.java @@ -21,8 +21,10 @@ import org.jetbrains.annotations.NotNull; import java.util.ArrayList; import java.util.List; +import static org.jetbrains.kotlin.serialization.jvm.UtfEncodingKt.MAX_UTF8_INFO_LENGTH; + public class BitEncoding { - private static boolean NEW = true; + private static final boolean FORCE_8TO7_ENCODING = "true".equals(System.getProperty("kotlin.jvm.serialization.use8to7")); private BitEncoding() { } @@ -36,9 +38,9 @@ public class BitEncoding { */ @NotNull public static String[] encodeBytes(@NotNull byte[] data) { - if (NEW) { - List strings = UtfEncodingKt.bytesToStrings(data); - return strings.toArray(new String[strings.size()]); + // TODO: try both encodings here and choose the best one (with the smallest size) + if (!FORCE_8TO7_ENCODING) { + return UtfEncodingKt.bytesToStrings(data); } byte[] bytes = encode8to7(data); // Since 0x0 byte is encoded as two bytes in the Modified UTF-8 (0xc0 0x80) and zero is rather common to byte arrays, we increment @@ -111,9 +113,6 @@ public class BitEncoding { } } - // The maximum possible length of the byte array in the CONSTANT_Utf8_info structure in the bytecode, as per JVMS7 4.4.7 - private static final int MAX_UTF8_INFO_LENGTH = 65535; - /** * Converts a big byte array into the array of strings, where each string, when written to the constant pool table in bytecode, produces * a byte array of not more than MAX_UTF8_INFO_LENGTH. Each byte, except those which are 0x0, occupies exactly one byte in the constant @@ -163,7 +162,7 @@ public class BitEncoding { */ @NotNull public static byte[] decodeBytes(@NotNull String[] data) { - if (NEW) { + if (!FORCE_8TO7_ENCODING) { return UtfEncodingKt.stringsToBytes(data); } byte[] bytes = combineStringArrayIntoBytes(data); diff --git a/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/utfEncoding.kt b/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/utfEncoding.kt index b18abc98e9c..7a368fba2b0 100644 --- a/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/utfEncoding.kt +++ b/core/descriptor.loader.java/src/org/jetbrains/kotlin/serialization/jvm/utfEncoding.kt @@ -19,18 +19,17 @@ package org.jetbrains.kotlin.serialization.jvm import java.util.* // The maximum possible length of the byte array in the CONSTANT_Utf8_info structure in the bytecode, as per JVMS7 4.4.7 -private val MAX_UTF8_INFO_LENGTH = 65535 +const val MAX_UTF8_INFO_LENGTH = 65535 // Leading bytes are prefixed with 110 in UTF-8 private val LEADING_BYTE_MASK = 0b11000000 // Continuation bytes are prefixed with 10 in UTF-8 private val CONTINUATION_BYTE_MASK = 0b10000000 -private val TWO_HIGHER_BITS_MASK = 0b11000000 private val TWO_LOWER_BITS_MASK = 0b00000011 private val SIX_LOWER_BITS_MASK = 0b00111111 -fun bytesToStrings(bytes: ByteArray): List { +fun bytesToStrings(bytes: ByteArray): Array { val result = ArrayList(1) val buffer = StringBuilder() var bytesInBuffer = 0 @@ -42,7 +41,7 @@ fun bytesToStrings(bytes: ByteArray): List { } else { val int = b.toInt() and 0xFF - val leadingByte = LEADING_BYTE_MASK or ((int and TWO_HIGHER_BITS_MASK) shr 6) + val leadingByte = LEADING_BYTE_MASK or (int shr 6) val continuationByte = CONTINUATION_BYTE_MASK or (int and SIX_LOWER_BITS_MASK) val encodedByte = (leadingByte shl 8) or continuationByte @@ -68,7 +67,7 @@ fun bytesToStrings(bytes: ByteArray): List { result.add(buffer.toString()) } - return result + return result.toTypedArray() } fun stringsToBytes(strings: Array): ByteArray { @@ -95,4 +94,4 @@ fun stringsToBytes(strings: Array): ByteArray { } return result -} \ No newline at end of file +}