diff options
author | Zoltan Szabadka <szabadka@google.com> | 2014-01-08 12:28:28 +0100 |
---|---|---|
committer | Zoltan Szabadka <szabadka@google.com> | 2014-01-08 12:28:28 +0100 |
commit | 4c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348 (patch) | |
tree | 2c0e6096ed820a5b699eb6b66321504b1edb6b14 | |
parent | efbc1a896593be75066ba8769915f19a6c1d7485 (diff) | |
download | src-4c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348.tar.gz |
Brotli format change: small improvement to the encoding of Huffman codes
Combine the HSKIP and the simple/complex Huffman code type bits.
-rw-r--r-- | brotli/brotlispec.txt | 20 | ||||
-rw-r--r-- | brotli/dec/decode.c | 13 | ||||
-rw-r--r-- | brotli/enc/encode.cc | 22 | ||||
-rw-r--r-- | brotli/enc/literal_cost.cc | 2 |
4 files changed, 32 insertions, 25 deletions
diff --git a/brotli/brotlispec.txt b/brotli/brotlispec.txt index 765abfb..23de497 100644 --- a/brotli/brotlispec.txt +++ b/brotli/brotlispec.txt @@ -412,16 +412,16 @@ Abstract 3.4. Simple Huffman codes - The first bit of the compressed representation of each Huffman + The first two bits of the compressed representation of each Huffman code distinguishes between simple and complex Huffman codes. If - the first bit is 1, then a simple, otherwise a complex Huffman - code follows. + this value is 1, then a simple Huffman code follows. Otherwise + the value indicates the number of leading zeros. A simple Huffman code can have only up to four symbols with non- zero code length. The format of the simple Huffman code is as follows: - 1 bit: 1, indicating a simple Huffman code + 2 bits: value of 1 indicates a simple Huffman code 2 bits: NSYM - 1, where NSYM = # of symbols with non-zero code length @@ -507,8 +507,10 @@ Abstract We can now define the format of the complex Huffman code as follows: - 1 bit: 0, indicating a complex Huffman code - 1 bit : HSKIP, if 1, skip over first two code length codes + 2 bits: HSKIP, values of 0, 2 or 3 represent the respective + number of leading zeros. (Value of 1 indicates the + Simple Huffman code.) + Code lengths for symbols in the code length alphabet given just above, in the order: 1, 2, 3, 4, 0, 17, 5, 6, 16, 7, @@ -519,9 +521,9 @@ Abstract the static Huffman code above. A code length of 0 means the corresponding code length symbol is not used. - If HSKIP is 1, code lengths of code length symbols 1 and - 2 are implicit zeros and are not present in the code - lengths sequence above. If there are at least two non- + If HSKIP is 2 or 3, a respective number of leading code + lengths are implicit zeros and are not present in the + code lengths sequence above. If there are at least two non- zero code lengths, any trailing zero code lengths are omitted, i.e. the last code length in the sequence must be non-zero. In this case the sum of (32 >> code length) diff --git a/brotli/dec/decode.c b/brotli/dec/decode.c index 4dce7bb..df2e1f9 100644 --- a/brotli/dec/decode.c +++ b/brotli/dec/decode.c @@ -234,7 +234,7 @@ static int ReadHuffmanCode(int alphabet_size, HuffmanTree* tree, BrotliBitReader* br) { int ok = 1; - int simple_code; + int simple_code_or_skip; uint8_t* code_lengths = NULL; code_lengths = @@ -247,9 +247,12 @@ static int ReadHuffmanCode(int alphabet_size, printf("[ReadHuffmanCode] Unexpected end of input.\n"); return 0; } - simple_code = BrotliReadBits(br, 1); - BROTLI_LOG_UINT(simple_code); - if (simple_code) { /* Read symbols, codes & code lengths directly. */ + /* simple_code_or_skip is used as follows: + 1 for simple code; + 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */ + simple_code_or_skip = BrotliReadBits(br, 2); + BROTLI_LOG_UINT(simple_code_or_skip); + if (simple_code_or_skip == 1) { /* Read symbols, codes & code lengths directly. */ int i; int max_bits_counter = alphabet_size - 1; int max_bits = 0; @@ -286,7 +289,7 @@ static int ReadHuffmanCode(int alphabet_size, int i; uint8_t code_length_code_lengths[CODE_LENGTH_CODES] = { 0 }; int space = 32; - for (i = BrotliReadBits(br, 1) * 2; + for (i = simple_code_or_skip; i < CODE_LENGTH_CODES && space > 0; ++i) { int code_len_idx = kCodeLengthCodeOrder[i]; int v = BrotliReadBits(br, 2); diff --git a/brotli/enc/encode.cc b/brotli/enc/encode.cc index 8e497fa..b492421 100644 --- a/brotli/enc/encode.cc +++ b/brotli/enc/encode.cc @@ -136,12 +136,16 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask( if (num_codes == 1) { codes_to_store = kCodeLengthCodes; } - const int skip_two_first = - code_length_bitdepth[kStorageOrder[0]] == 0 && - code_length_bitdepth[kStorageOrder[1]] == 0; - WriteBits(1, skip_two_first, storage_ix, storage); - - for (int i = skip_two_first * 2; i < codes_to_store; ++i) { + int skip_some = 0; // skips none. + if (code_length_bitdepth[kStorageOrder[0]] == 0 && + code_length_bitdepth[kStorageOrder[1]] == 0) { + skip_some = 2; // skips two. + if (code_length_bitdepth[kStorageOrder[2]] == 0) { + skip_some = 3; // skips three. + } + } + WriteBits(2, skip_some, storage_ix, storage); + for (int i = skip_some; i < codes_to_store; ++i) { uint8_t len[] = { 2, 4, 3, 2, 2, 4 }; uint8_t bits[] = { 0, 5, 1, 3, 2, 13 }; int v = code_length_bitdepth[kStorageOrder[i]]; @@ -182,7 +186,7 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size, } if (code.count_ == 0) { // emit minimal tree for empty cases // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0 - WriteBits(3 + max_bits, 0x01, storage_ix, storage); + WriteBits(4 + max_bits, 0x1, storage_ix, storage); return; } if (code.count_ <= 4) { @@ -202,7 +206,7 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size, } } // Small tree marker to encode 1-4 symbols. - WriteBits(1, 1, storage_ix, storage); + WriteBits(2, 1, storage_ix, storage); WriteBits(2, code.count_ - 1, storage_ix, storage); for (int i = 0; i < code.count_; ++i) { WriteBits(max_bits, symbols[i], storage_ix, storage); @@ -219,8 +223,6 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size, } return; } - WriteBits(1, 0, storage_ix, storage); - uint8_t huffman_tree[kSize]; uint8_t huffman_tree_extra_bits[kSize]; int huffman_tree_size = 0; diff --git a/brotli/enc/literal_cost.cc b/brotli/enc/literal_cost.cc index 2a388d7..bf05a98 100644 --- a/brotli/enc/literal_cost.cc +++ b/brotli/enc/literal_cost.cc @@ -51,7 +51,7 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask, histo = 1; } cost[masked_pos] = log2(static_cast<double>(in_window) / histo); - cost[masked_pos] += 0.03; + cost[masked_pos] += 0.029; if (cost[masked_pos] < 1.0) { cost[masked_pos] *= 0.5; cost[masked_pos] += 0.5; |