img-stream-enc/src/output.zig
2023-12-12 16:02:12 +05:00

405 lines
12 KiB
Zig

const std = @import("std");
const util = @import("util.zig");
const RLE_Seq = std.ArrayList(RLE_Unit);
const RLE_Unit = struct {
symbol: u8,
value: i16,
};
const Huffman = std.AutoHashMap(u8, HuffCode);
const HuffCode = struct {
value: u16,
n_bits: u5,
};
const HuffmanMeta = struct {
bits: [33]u8,
total_n: usize,
huffval: [256]u8,
};
const RLEWriter = struct {
bw: util.BufferedBitWriter,
const Self = @This();
fn init(f: std.fs.File) Self {
return Self{
.bw = util.BufferedBitWriter.init(f),
};
}
fn write_until_eob(self: *Self, units: []RLE_Unit, idx_ptr: *usize, dc_huff: *Huffman, ac_huff: *Huffman) !void {
var i = idx_ptr.*;
defer idx_ptr.* = i;
try self.write_value(dc_huff.get(units[i].symbol).?, units[i]);
i += 1;
while (true) : (i += 1) {
try self.write_value(ac_huff.get(units[i].symbol).?, units[i]);
if (units[i].symbol == 0x00) {
i += 1;
break;
}
}
}
// must write out the huffcode and extra bits if size >= 1
fn write_value(self: *Self, huffcode: HuffCode, unit: RLE_Unit) !void {
try self.bw.write_bits(huffcode.value, huffcode.n_bits);
// negative values must be written as val - 1 with the same number of bits as orig
const unit_val: u16 = if (unit.value >= 0) @bitCast(unit.value) else @bitCast(unit.value - 1);
const value_size = unit.symbol & 0x0f;
if (value_size != 0) {
// 0 values only need the huffcode, no extra bits
try self.bw.write_bits(unit_val & (try std.math.powi(u16, 2, value_size) - 1), @truncate(value_size));
}
}
inline fn flush(self: *Self) !void {
try self.bw.flush_end();
}
};
const Scan = struct {
arena: std.heap.ArenaAllocator,
root_alloc: std.mem.Allocator,
dc_diffs: [3]i16,
rles: [3]RLE_Seq,
freqs: [4][]u32,
huffs: [4]Huffman,
const Self = @This();
fn init(alloc_root: std.mem.Allocator) !Self {
var arena = std.heap.ArenaAllocator.init(alloc_root);
var alloc = arena.allocator();
var self = Self{ .root_alloc = alloc_root, .arena = arena, .dc_diffs = [3]i16{ 0, 0, 0 }, .rles = [3]RLE_Seq{
RLE_Seq.init(alloc_root),
RLE_Seq.init(alloc_root),
RLE_Seq.init(alloc_root),
}, .freqs = [4][]u32{ try alloc.alloc(u32, 13), try alloc.alloc(u32, 257), try alloc.alloc(u32, 13), try alloc.alloc(u32, 257) }, .huffs = [4]Huffman{
Huffman.init(alloc_root),
Huffman.init(alloc_root),
Huffman.init(alloc_root),
Huffman.init(alloc_root),
} };
return self;
}
fn deinit(self: *Self) void {
self.arena.deinit();
for (0..3) |i| {
self.rles[i].deinit();
}
}
// operates on full image, generating an RLE sequence and frequency values,
// then gens corresponding hufftable
fn do_rle_freq_pass(self: *Self, buff: *const util.Buffers, f: std.fs.File) !void {
const h = buff.Y_quant.len;
const w = buff.Y_quant[0].len;
for (0..4) |i| {
@memset(self.freqs[i], 0);
}
for (0..h) |x| {
for (0..w) |y| {
for (0..4) |Y_sub_i| {
try parse_block(&buff.Y_quant[x][y][Y_sub_i], &self.dc_diffs[0], &self.rles[0], self.freqs[0], self.freqs[1]);
}
try parse_block(&buff.U_quant[x][y], &self.dc_diffs[1], &self.rles[1], self.freqs[2], self.freqs[3]);
try parse_block(&buff.V_quant[x][y], &self.dc_diffs[2], &self.rles[2], self.freqs[2], self.freqs[3]);
}
}
try self.generate_huffmans(f);
}
fn generate_huffmans(self: *Self, f: std.fs.File) !void {
for (0..4) |i| {
const huff_meta = gen_huffman(self.freqs[i]);
try gen_codes(&self.huffs[i], huff_meta);
try dump_huffman(f, huff_meta, i);
}
}
// dumps scan, order of Y macroblock (blockx4), U block, V block,
// need per block since first must be interpreted as dc
// 0x00 dc does not signal next block 0x00 ac does
fn dump_scan(self: *Self, f: std.fs.File) !void {
// mostly hardcoded values
_ = try f.write(&[_]u8{ 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x11, 0x03, 0x11, 0x00, 0x3f, 0x00 });
var bw = RLEWriter.init(f);
var idxs = [3]usize{ 0, 0, 0 };
while (idxs[0] < self.rles[0].items.len and idxs[1] < self.rles[1].items.len and idxs[2] < self.rles[2].items.len) {
for (0..4) |_| {
try bw.write_until_eob(self.rles[0].items, &idxs[0], &self.huffs[0], &self.huffs[1]);
}
try bw.write_until_eob(self.rles[1].items, &idxs[1], &self.huffs[2], &self.huffs[3]);
try bw.write_until_eob(self.rles[2].items, &idxs[2], &self.huffs[2], &self.huffs[3]);
}
try bw.flush();
_ = try f.write(&[2]u8{ 0xff, 0xd9 });
}
};
// helper func needed for generating huffman code
// gets idxs of least 2 non-zero values in slice.
inline fn get_idx_min2(freqs: []u32) ?struct { usize, usize } {
var min1: u32 = undefined;
var min1_idx: usize = undefined;
var min2: u32 = undefined;
var min2_idx: usize = undefined;
var is_init_1 = false;
var is_init_2 = false;
for (0..freqs.len) |i| {
if (freqs[i] == 0) continue;
if (!is_init_1) {
min1 = freqs[i];
min1_idx = i;
is_init_1 = true;
} else if (!is_init_2) {
if (freqs[i] < min1) {
min2 = min1;
min2_idx = min1_idx;
min1 = freqs[i];
min1_idx = i;
} else {
min2 = freqs[i];
min2_idx = i;
}
is_init_2 = true;
} else if (freqs[i] < min1) {
min1 = freqs[i];
min1_idx = i;
} else if (freqs[i] < min2) {
min2 = freqs[i];
min2_idx = i;
}
}
return if (is_init_2) .{
min1_idx,
min2_idx,
} else null;
}
// generates code mapping from BITS and HUFFVAL
// see https://www.w3.org/Graphics/JPEG/itu-t81.pdf annex c
inline fn gen_codes(huff: *Huffman, huffman_meta: HuffmanMeta) !void {
var huffsize = [_]u5{0} ** 256;
var huffcode = [_]u16{0} ** 256;
var k: usize = 0;
var j: usize = 1;
for (1..17) |i| {
while (j <= huffman_meta.bits[i]) {
huffsize[k] = @truncate(i);
k += 1;
j += 1;
}
j = 1;
}
huffsize[k] = 0;
const total_k = k;
k = 0;
var code: usize = 0;
var si = huffsize[0];
while (true) {
huffcode[k] = @truncate(code);
code += 1;
k += 1;
if (huffsize[k] == si) {
continue;
}
if (huffsize[k] == 0) {
break;
}
code <<= 1;
si += 1;
while (huffsize[k] != si) {
code <<= 1;
si += 1;
}
}
for (0..total_k) |x| {
try huff.put(huffman_meta.huffval[x], HuffCode{
.n_bits = huffsize[x],
.value = huffcode[x],
});
}
}
// generate BITS and HUFFVAL
// see https://www.w3.org/Graphics/JPEG/itu-t81.pdf annex K
inline fn gen_huffman(freqs: []u32) HuffmanMeta {
freqs[freqs.len - 1] = 1;
var codesizes = [_]u8{0} ** 257;
var others = [_]u9{0x1ff} ** 257; // 0x1ff since -1 not available and i8 wouldn't fit anyway
while (get_idx_min2(freqs)) |tmp| {
var v1 = tmp.@"0";
var v2 = tmp.@"1";
freqs[v1] += freqs[v2];
freqs[v2] = 0;
codesizes[v1] += 1;
while (others[v1] != 0x1ff) : (codesizes[v1] += 1) {
v1 = others[v1];
}
others[v1] = @truncate(v2);
codesizes[v2] += 1;
while (others[v2] != 0x1ff) : (codesizes[v2] += 1) {
v2 = others[v2];
}
}
var bits = [_]u8{0} ** 33;
for (0..freqs.len) |i| {
if (codesizes[i] != 0) {
bits[codesizes[i]] += 1;
}
}
var i: usize = 32;
while (i > 16) {
if (bits[i] > 0) {
var j = i - 2;
while (bits[j] <= 0) : (j -= 1) {}
bits[i] -= 2;
bits[i - 1] += 1;
bits[j + 1] += 2;
bits[j] -= 1;
} else {
i -= 1;
}
}
while (bits[i] == 0) : (i -= 1) {}
bits[i] -= 1;
var k: usize = 0;
var huffval = [_]u8{0} ** 256;
for (1..33) |i_| {
for (0..freqs.len - 1) |j| {
if (codesizes[j] == i_) {
huffval[k] = @truncate(j);
k += 1;
}
}
}
return .{
.bits = bits,
.huffval = huffval,
.total_n = k,
};
}
// returns size field for given value tested switch and bitshifts, this was
// somehow the fastest, slightly better than bit shifting
inline fn get_size(n: i16) u8 {
if (n == 0) {
return 0;
} else if (n < 2 and n > -2) {
return 1;
} else if (n < 4 and n > -4) {
return 2;
} else if (n < 8 and n > -8) {
return 3;
} else if (n < 16 and n > -16) {
return 4;
} else if (n < 32 and n > -32) {
return 5;
} else if (n < 64 and n > -64) {
return 6;
} else if (n < 128 and n > -128) {
return 7;
} else if (n < 256 and n > -256) {
return 8;
} else if (n < 512 and n > -512) {
return 9;
} else if (n < 1024 and n > -1024) {
return 10;
} else if (n < 2048 and n > -2048) {
return 11;
} else {
return 12;
}
}
// for each block, process the dc by differencing it and perform RLE.
// dc symbols are just the size, ac symbols are the rle in top 4 bits, size in next
// append 0x00 at end of each block.
fn parse_block(block: *util.BlockQuantized, dc_diff: *i16, rle: *RLE_Seq, dc_freqs: []u32, ac_freqs: []u32) !void {
const diff = block[0] - dc_diff.*;
var symbol = get_size(diff);
dc_freqs[symbol] += 1;
try rle.append(RLE_Unit{
.symbol = symbol,
.value = diff,
});
dc_diff.* = block[0];
var curr_rlen: u8 = 0;
for (1..64) |i| {
if (block[i] == 0) {
curr_rlen += 1;
continue;
}
while (curr_rlen > 15) : (curr_rlen -= 16) {
ac_freqs[0xf0] += 1;
try rle.append(RLE_Unit{
.symbol = 0xf0,
.value = 0,
});
}
symbol = (curr_rlen << 4) + get_size(block[i]);
ac_freqs[symbol] += 1;
try rle.append(RLE_Unit{
.symbol = symbol,
.value = block[i],
});
curr_rlen = 0;
}
ac_freqs[0x00] += 1;
try rle.append(RLE_Unit{
.symbol = 0x00,
.value = 0,
});
}
fn write_headers(f: std.fs.File, buff: *const util.Buffers) !void {
const w = buff.Y.len * 16;
const h = buff.Y[0].len * 16;
// write out magic marker 0xff 0xd8 and both quant tables,
var out_buff = [6]u8{ 0xff, 0xd8, 0xff, 0xdb, 0x00, 0x84 } ++ [1]u8{0x00} ++ [_]u8{0x00} ** 64 ++ [1]u8{0x01} ++ [_]u8{0x00} ** 64;
for (0..64) |i| {
out_buff[7 + i] = @intFromFloat(buff.Q_Lum[i]);
out_buff[7 + i + 65] = @intFromFloat(buff.Q_Chrom[i]);
}
_ = try f.write(&out_buff);
// write out SOF block, everything except w and h are largely irrelevant
var sof_buff = [_]u8{
0xff, 0xc0, 0x00, 0x11, 0x08, @truncate(w >> 8), @truncate(w & 0x00ff), @truncate(h >> 8), @truncate(h & 0x00ff), 0x03,
0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01,
};
_ = try f.write(&sof_buff);
}
fn dump_huffman(f: std.fs.File, huff_meta: HuffmanMeta, tree_i: usize) !void {
// write out huffman table bits and huffval representation
const table_len = 3 + 16 + huff_meta.total_n;
var out_buff = [5]u8{ 0xff, 0xc4, @truncate(table_len >> 8), @truncate(table_len & 0x00ff), @truncate(((tree_i % 2) << 4) | (tree_i / 2)) };
_ = try f.write(&out_buff);
_ = try f.write(huff_meta.bits[1..17]);
_ = try f.write(huff_meta.huffval[0..huff_meta.total_n]);
}
pub fn generate_jpg(buff: util.Buffers, alloc: std.mem.Allocator) !void {
var scan_data = try Scan.init(alloc);
defer scan_data.deinit();
var f = try std.fs.cwd().createFile("out.jpg", .{});
defer f.close();
try write_headers(f, &buff);
// requires 2 passes, one for RLE and huffcode generation
// second to actually write out data
try scan_data.do_rle_freq_pass(&buff, f);
try scan_data.dump_scan(f);
}