Minor, simplify utfEncoding, add option to force using 8to7 encoding
This commit is contained in:
+7
-8
@@ -21,8 +21,10 @@ import org.jetbrains.annotations.NotNull;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.jetbrains.kotlin.serialization.jvm.UtfEncodingKt.MAX_UTF8_INFO_LENGTH;
|
||||
|
||||
public class BitEncoding {
|
||||
private static boolean NEW = true;
|
||||
private static final boolean FORCE_8TO7_ENCODING = "true".equals(System.getProperty("kotlin.jvm.serialization.use8to7"));
|
||||
|
||||
private BitEncoding() {
|
||||
}
|
||||
@@ -36,9 +38,9 @@ public class BitEncoding {
|
||||
*/
|
||||
@NotNull
|
||||
public static String[] encodeBytes(@NotNull byte[] data) {
|
||||
if (NEW) {
|
||||
List<String> strings = UtfEncodingKt.bytesToStrings(data);
|
||||
return strings.toArray(new String[strings.size()]);
|
||||
// TODO: try both encodings here and choose the best one (with the smallest size)
|
||||
if (!FORCE_8TO7_ENCODING) {
|
||||
return UtfEncodingKt.bytesToStrings(data);
|
||||
}
|
||||
byte[] bytes = encode8to7(data);
|
||||
// Since 0x0 byte is encoded as two bytes in the Modified UTF-8 (0xc0 0x80) and zero is rather common to byte arrays, we increment
|
||||
@@ -111,9 +113,6 @@ public class BitEncoding {
|
||||
}
|
||||
}
|
||||
|
||||
// The maximum possible length of the byte array in the CONSTANT_Utf8_info structure in the bytecode, as per JVMS7 4.4.7
|
||||
private static final int MAX_UTF8_INFO_LENGTH = 65535;
|
||||
|
||||
/**
|
||||
* Converts a big byte array into the array of strings, where each string, when written to the constant pool table in bytecode, produces
|
||||
* a byte array of not more than MAX_UTF8_INFO_LENGTH. Each byte, except those which are 0x0, occupies exactly one byte in the constant
|
||||
@@ -163,7 +162,7 @@ public class BitEncoding {
|
||||
*/
|
||||
@NotNull
|
||||
public static byte[] decodeBytes(@NotNull String[] data) {
|
||||
if (NEW) {
|
||||
if (!FORCE_8TO7_ENCODING) {
|
||||
return UtfEncodingKt.stringsToBytes(data);
|
||||
}
|
||||
byte[] bytes = combineStringArrayIntoBytes(data);
|
||||
|
||||
+5
-6
@@ -19,18 +19,17 @@ package org.jetbrains.kotlin.serialization.jvm
|
||||
import java.util.*
|
||||
|
||||
// The maximum possible length of the byte array in the CONSTANT_Utf8_info structure in the bytecode, as per JVMS7 4.4.7
|
||||
private val MAX_UTF8_INFO_LENGTH = 65535
|
||||
const val MAX_UTF8_INFO_LENGTH = 65535
|
||||
|
||||
// Leading bytes are prefixed with 110 in UTF-8
|
||||
private val LEADING_BYTE_MASK = 0b11000000
|
||||
// Continuation bytes are prefixed with 10 in UTF-8
|
||||
private val CONTINUATION_BYTE_MASK = 0b10000000
|
||||
|
||||
private val TWO_HIGHER_BITS_MASK = 0b11000000
|
||||
private val TWO_LOWER_BITS_MASK = 0b00000011
|
||||
private val SIX_LOWER_BITS_MASK = 0b00111111
|
||||
|
||||
fun bytesToStrings(bytes: ByteArray): List<String> {
|
||||
fun bytesToStrings(bytes: ByteArray): Array<String> {
|
||||
val result = ArrayList<String>(1)
|
||||
val buffer = StringBuilder()
|
||||
var bytesInBuffer = 0
|
||||
@@ -42,7 +41,7 @@ fun bytesToStrings(bytes: ByteArray): List<String> {
|
||||
}
|
||||
else {
|
||||
val int = b.toInt() and 0xFF
|
||||
val leadingByte = LEADING_BYTE_MASK or ((int and TWO_HIGHER_BITS_MASK) shr 6)
|
||||
val leadingByte = LEADING_BYTE_MASK or (int shr 6)
|
||||
val continuationByte = CONTINUATION_BYTE_MASK or (int and SIX_LOWER_BITS_MASK)
|
||||
val encodedByte = (leadingByte shl 8) or continuationByte
|
||||
|
||||
@@ -68,7 +67,7 @@ fun bytesToStrings(bytes: ByteArray): List<String> {
|
||||
result.add(buffer.toString())
|
||||
}
|
||||
|
||||
return result
|
||||
return result.toTypedArray()
|
||||
}
|
||||
|
||||
fun stringsToBytes(strings: Array<String>): ByteArray {
|
||||
@@ -95,4 +94,4 @@ fun stringsToBytes(strings: Array<String>): ByteArray {
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user