const std = @import("std"); const util = @import("util.zig"); const RLE_Seq = std.ArrayList(RLE_Unit); const RLE_Unit = struct { symbol: u8, value: i16, }; const Huffman = std.AutoHashMap(u8, HuffCode); const HuffCode = struct { value: u16, n_bits: u5, }; const HuffmanMeta = struct { bits: [33]u8, total_n: usize, huffval: [256]u8, }; const RLEWriter = struct { bw: util.BufferedBitWriter, const Self = @This(); fn init(f: std.fs.File) Self { return Self{ .bw = util.BufferedBitWriter.init(f), }; } fn write_until_eob(self: *Self, units: []RLE_Unit, idx_ptr: *usize, dc_huff: *Huffman, ac_huff: *Huffman) !void { var i = idx_ptr.*; defer idx_ptr.* = i; try self.write_value(dc_huff.get(units[i].symbol).?, units[i]); i += 1; while (true) : (i += 1) { try self.write_value(ac_huff.get(units[i].symbol).?, units[i]); if (units[i].symbol == 0x00) { i += 1; break; } } } // must write out the huffcode and extra bits if size >= 1 fn write_value(self: *Self, huffcode: HuffCode, unit: RLE_Unit) !void { try self.bw.write_bits(huffcode.value, huffcode.n_bits); // negative values must be written as val - 1 with the same number of bits as orig const unit_val: u16 = if (unit.value >= 0) @bitCast(unit.value) else @bitCast(unit.value - 1); const value_size = unit.symbol & 0x0f; if (value_size != 0) { // 0 values only need the huffcode, no extra bits try self.bw.write_bits(unit_val & (try std.math.powi(u16, 2, value_size) - 1), @truncate(value_size)); } } inline fn flush(self: *Self) !void { try self.bw.flush_end(); } }; const Scan = struct { arena: std.heap.ArenaAllocator, root_alloc: std.mem.Allocator, dc_diffs: [3]i16, rles: [3]RLE_Seq, freqs: [4][]u32, huffs: [4]Huffman, const Self = @This(); fn init(alloc_root: std.mem.Allocator) !Self { var arena = std.heap.ArenaAllocator.init(alloc_root); var alloc = arena.allocator(); var self = Self{ .root_alloc = alloc_root, .arena = arena, .dc_diffs = [3]i16{ 0, 0, 0 }, .rles = [3]RLE_Seq{ RLE_Seq.init(alloc_root), RLE_Seq.init(alloc_root), RLE_Seq.init(alloc_root), }, .freqs = [4][]u32{ try alloc.alloc(u32, 13), try alloc.alloc(u32, 257), try alloc.alloc(u32, 13), try alloc.alloc(u32, 257) }, .huffs = [4]Huffman{ Huffman.init(alloc_root), Huffman.init(alloc_root), Huffman.init(alloc_root), Huffman.init(alloc_root), } }; return self; } fn deinit(self: *Self) void { self.arena.deinit(); for (0..3) |i| { self.rles[i].deinit(); } } // operates on full image, generating an RLE sequence and frequency values, // then gens corresponding hufftable fn do_rle_freq_pass(self: *Self, buff: *const util.Buffers, f: std.fs.File) !void { const h = buff.Y_quant.len; const w = buff.Y_quant[0].len; for (0..4) |i| { @memset(self.freqs[i], 0); } for (0..h) |x| { for (0..w) |y| { for (0..4) |Y_sub_i| { try parse_block(&buff.Y_quant[x][y][Y_sub_i], &self.dc_diffs[0], &self.rles[0], self.freqs[0], self.freqs[1]); } try parse_block(&buff.U_quant[x][y], &self.dc_diffs[1], &self.rles[1], self.freqs[2], self.freqs[3]); try parse_block(&buff.V_quant[x][y], &self.dc_diffs[2], &self.rles[2], self.freqs[2], self.freqs[3]); } } try self.generate_huffmans(f); } fn generate_huffmans(self: *Self, f: std.fs.File) !void { for (0..4) |i| { const huff_meta = gen_huffman(self.freqs[i]); try gen_codes(&self.huffs[i], huff_meta); try dump_huffman(f, huff_meta, i); } } // dumps scan, order of Y macroblock (blockx4), U block, V block, // need per block since first must be interpreted as dc // 0x00 dc does not signal next block 0x00 ac does fn dump_scan(self: *Self, f: std.fs.File) !void { // mostly hardcoded values _ = try f.write(&[_]u8{ 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x11, 0x03, 0x11, 0x00, 0x3f, 0x00 }); var bw = RLEWriter.init(f); var idxs = [3]usize{ 0, 0, 0 }; while (idxs[0] < self.rles[0].items.len and idxs[1] < self.rles[1].items.len and idxs[2] < self.rles[2].items.len) { for (0..4) |_| { try bw.write_until_eob(self.rles[0].items, &idxs[0], &self.huffs[0], &self.huffs[1]); } try bw.write_until_eob(self.rles[1].items, &idxs[1], &self.huffs[2], &self.huffs[3]); try bw.write_until_eob(self.rles[2].items, &idxs[2], &self.huffs[2], &self.huffs[3]); } try bw.flush(); _ = try f.write(&[2]u8{ 0xff, 0xd9 }); } }; // helper func needed for generating huffman code // gets idxs of least 2 non-zero values in slice. inline fn get_idx_min2(freqs: []u32) ?struct { usize, usize } { var min1: u32 = undefined; var min1_idx: usize = undefined; var min2: u32 = undefined; var min2_idx: usize = undefined; var is_init_1 = false; var is_init_2 = false; for (0..freqs.len) |i| { if (freqs[i] == 0) continue; if (!is_init_1) { min1 = freqs[i]; min1_idx = i; is_init_1 = true; } else if (!is_init_2) { if (freqs[i] < min1) { min2 = min1; min2_idx = min1_idx; min1 = freqs[i]; min1_idx = i; } else { min2 = freqs[i]; min2_idx = i; } is_init_2 = true; } else if (freqs[i] < min1) { min1 = freqs[i]; min1_idx = i; } else if (freqs[i] < min2) { min2 = freqs[i]; min2_idx = i; } } return if (is_init_2) .{ min1_idx, min2_idx, } else null; } // generates code mapping from BITS and HUFFVAL // see https://www.w3.org/Graphics/JPEG/itu-t81.pdf annex c inline fn gen_codes(huff: *Huffman, huffman_meta: HuffmanMeta) !void { var huffsize = [_]u5{0} ** 256; var huffcode = [_]u16{0} ** 256; var k: usize = 0; var j: usize = 1; for (1..17) |i| { while (j <= huffman_meta.bits[i]) { huffsize[k] = @truncate(i); k += 1; j += 1; } j = 1; } huffsize[k] = 0; const total_k = k; k = 0; var code: usize = 0; var si = huffsize[0]; while (true) { huffcode[k] = @truncate(code); code += 1; k += 1; if (huffsize[k] == si) { continue; } if (huffsize[k] == 0) { break; } code <<= 1; si += 1; while (huffsize[k] != si) { code <<= 1; si += 1; } } for (0..total_k) |x| { try huff.put(huffman_meta.huffval[x], HuffCode{ .n_bits = huffsize[x], .value = huffcode[x], }); } } // generate BITS and HUFFVAL // see https://www.w3.org/Graphics/JPEG/itu-t81.pdf annex K inline fn gen_huffman(freqs: []u32) HuffmanMeta { freqs[freqs.len - 1] = 1; var codesizes = [_]u8{0} ** 257; var others = [_]u9{0x1ff} ** 257; // 0x1ff since -1 not available and i8 wouldn't fit anyway while (get_idx_min2(freqs)) |tmp| { var v1 = tmp.@"0"; var v2 = tmp.@"1"; freqs[v1] += freqs[v2]; freqs[v2] = 0; codesizes[v1] += 1; while (others[v1] != 0x1ff) : (codesizes[v1] += 1) { v1 = others[v1]; } others[v1] = @truncate(v2); codesizes[v2] += 1; while (others[v2] != 0x1ff) : (codesizes[v2] += 1) { v2 = others[v2]; } } var bits = [_]u8{0} ** 33; for (0..freqs.len) |i| { if (codesizes[i] != 0) { bits[codesizes[i]] += 1; } } var i: usize = 32; while (i > 16) { if (bits[i] > 0) { var j = i - 2; while (bits[j] <= 0) : (j -= 1) {} bits[i] -= 2; bits[i - 1] += 1; bits[j + 1] += 2; bits[j] -= 1; } else { i -= 1; } } while (bits[i] == 0) : (i -= 1) {} bits[i] -= 1; var k: usize = 0; var huffval = [_]u8{0} ** 256; for (1..33) |i_| { for (0..freqs.len - 1) |j| { if (codesizes[j] == i_) { huffval[k] = @truncate(j); k += 1; } } } return .{ .bits = bits, .huffval = huffval, .total_n = k, }; } // returns size field for given value tested switch and bitshifts, this was // somehow the fastest, slightly better than bit shifting inline fn get_size(n: i16) u8 { if (n == 0) { return 0; } else if (n < 2 and n > -2) { return 1; } else if (n < 4 and n > -4) { return 2; } else if (n < 8 and n > -8) { return 3; } else if (n < 16 and n > -16) { return 4; } else if (n < 32 and n > -32) { return 5; } else if (n < 64 and n > -64) { return 6; } else if (n < 128 and n > -128) { return 7; } else if (n < 256 and n > -256) { return 8; } else if (n < 512 and n > -512) { return 9; } else if (n < 1024 and n > -1024) { return 10; } else if (n < 2048 and n > -2048) { return 11; } else { return 12; } } // for each block, process the dc by differencing it and perform RLE. // dc symbols are just the size, ac symbols are the rle in top 4 bits, size in next // append 0x00 at end of each block. fn parse_block(block: *util.BlockQuantized, dc_diff: *i16, rle: *RLE_Seq, dc_freqs: []u32, ac_freqs: []u32) !void { const diff = block[0] - dc_diff.*; var symbol = get_size(diff); dc_freqs[symbol] += 1; try rle.append(RLE_Unit{ .symbol = symbol, .value = diff, }); dc_diff.* = block[0]; var curr_rlen: u8 = 0; for (1..64) |i| { if (block[i] == 0) { curr_rlen += 1; continue; } while (curr_rlen > 15) : (curr_rlen -= 16) { ac_freqs[0xf0] += 1; try rle.append(RLE_Unit{ .symbol = 0xf0, .value = 0, }); } symbol = (curr_rlen << 4) + get_size(block[i]); ac_freqs[symbol] += 1; try rle.append(RLE_Unit{ .symbol = symbol, .value = block[i], }); curr_rlen = 0; } ac_freqs[0x00] += 1; try rle.append(RLE_Unit{ .symbol = 0x00, .value = 0, }); } fn write_headers(f: std.fs.File, buff: *const util.Buffers) !void { const w = buff.Y.len * 16; const h = buff.Y[0].len * 16; // write out magic marker 0xff 0xd8 and both quant tables, var out_buff = [6]u8{ 0xff, 0xd8, 0xff, 0xdb, 0x00, 0x84 } ++ [1]u8{0x00} ++ [_]u8{0x00} ** 64 ++ [1]u8{0x01} ++ [_]u8{0x00} ** 64; for (0..64) |i| { out_buff[7 + i] = @intFromFloat(buff.Q_Lum[i]); out_buff[7 + i + 65] = @intFromFloat(buff.Q_Chrom[i]); } _ = try f.write(&out_buff); // write out SOF block, everything except w and h are largely irrelevant var sof_buff = [_]u8{ 0xff, 0xc0, 0x00, 0x11, 0x08, @truncate(w >> 8), @truncate(w & 0x00ff), @truncate(h >> 8), @truncate(h & 0x00ff), 0x03, 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, }; _ = try f.write(&sof_buff); } fn dump_huffman(f: std.fs.File, huff_meta: HuffmanMeta, tree_i: usize) !void { // write out huffman table bits and huffval representation const table_len = 3 + 16 + huff_meta.total_n; var out_buff = [5]u8{ 0xff, 0xc4, @truncate(table_len >> 8), @truncate(table_len & 0x00ff), @truncate(((tree_i % 2) << 4) | (tree_i / 2)) }; _ = try f.write(&out_buff); _ = try f.write(huff_meta.bits[1..17]); _ = try f.write(huff_meta.huffval[0..huff_meta.total_n]); } pub fn generate_jpg(buff: util.Buffers, alloc: std.mem.Allocator) !void { var scan_data = try Scan.init(alloc); defer scan_data.deinit(); var f = try std.fs.cwd().createFile("out.jpg", .{}); defer f.close(); try write_headers(f, &buff); // requires 2 passes, one for RLE and huffcode generation // second to actually write out data try scan_data.do_rle_freq_pass(&buff, f); try scan_data.dump_scan(f); }