405 lines
12 KiB
Zig
405 lines
12 KiB
Zig
const std = @import("std");
|
|
|
|
const util = @import("util.zig");
|
|
|
|
const RLE_Seq = std.ArrayList(RLE_Unit);
|
|
const RLE_Unit = struct {
|
|
symbol: u8,
|
|
value: i16,
|
|
};
|
|
|
|
const Huffman = std.AutoHashMap(u8, HuffCode);
|
|
const HuffCode = struct {
|
|
value: u16,
|
|
n_bits: u5,
|
|
};
|
|
const HuffmanMeta = struct {
|
|
bits: [33]u8,
|
|
total_n: usize,
|
|
huffval: [256]u8,
|
|
};
|
|
|
|
const RLEWriter = struct {
|
|
bw: util.BufferedBitWriter,
|
|
|
|
const Self = @This();
|
|
|
|
fn init(f: std.fs.File) Self {
|
|
return Self{
|
|
.bw = util.BufferedBitWriter.init(f),
|
|
};
|
|
}
|
|
|
|
fn write_until_eob(self: *Self, units: []RLE_Unit, idx_ptr: *usize, dc_huff: *Huffman, ac_huff: *Huffman) !void {
|
|
var i = idx_ptr.*;
|
|
defer idx_ptr.* = i;
|
|
|
|
try self.write_value(dc_huff.get(units[i].symbol).?, units[i]);
|
|
i += 1;
|
|
while (true) : (i += 1) {
|
|
try self.write_value(ac_huff.get(units[i].symbol).?, units[i]);
|
|
if (units[i].symbol == 0x00) {
|
|
i += 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// must write out the huffcode and extra bits if size >= 1
|
|
fn write_value(self: *Self, huffcode: HuffCode, unit: RLE_Unit) !void {
|
|
try self.bw.write_bits(huffcode.value, huffcode.n_bits);
|
|
// negative values must be written as val - 1 with the same number of bits as orig
|
|
const unit_val: u16 = if (unit.value >= 0) @bitCast(unit.value) else @bitCast(unit.value - 1);
|
|
const value_size = unit.symbol & 0x0f;
|
|
if (value_size != 0) {
|
|
// 0 values only need the huffcode, no extra bits
|
|
try self.bw.write_bits(unit_val & (try std.math.powi(u16, 2, value_size) - 1), @truncate(value_size));
|
|
}
|
|
}
|
|
|
|
inline fn flush(self: *Self) !void {
|
|
try self.bw.flush_end();
|
|
}
|
|
};
|
|
|
|
const Scan = struct {
|
|
arena: std.heap.ArenaAllocator,
|
|
root_alloc: std.mem.Allocator,
|
|
dc_diffs: [3]i16,
|
|
rles: [3]RLE_Seq,
|
|
freqs: [4][]u32,
|
|
huffs: [4]Huffman,
|
|
|
|
const Self = @This();
|
|
|
|
fn init(alloc_root: std.mem.Allocator) !Self {
|
|
var arena = std.heap.ArenaAllocator.init(alloc_root);
|
|
var alloc = arena.allocator();
|
|
var self = Self{ .root_alloc = alloc_root, .arena = arena, .dc_diffs = [3]i16{ 0, 0, 0 }, .rles = [3]RLE_Seq{
|
|
RLE_Seq.init(alloc_root),
|
|
RLE_Seq.init(alloc_root),
|
|
RLE_Seq.init(alloc_root),
|
|
}, .freqs = [4][]u32{ try alloc.alloc(u32, 13), try alloc.alloc(u32, 257), try alloc.alloc(u32, 13), try alloc.alloc(u32, 257) }, .huffs = [4]Huffman{
|
|
Huffman.init(alloc_root),
|
|
Huffman.init(alloc_root),
|
|
Huffman.init(alloc_root),
|
|
Huffman.init(alloc_root),
|
|
} };
|
|
return self;
|
|
}
|
|
fn deinit(self: *Self) void {
|
|
self.arena.deinit();
|
|
for (0..3) |i| {
|
|
self.rles[i].deinit();
|
|
}
|
|
}
|
|
|
|
// operates on full image, generating an RLE sequence and frequency values,
|
|
// then gens corresponding hufftable
|
|
fn do_rle_freq_pass(self: *Self, buff: *const util.Buffers, f: std.fs.File) !void {
|
|
const h = buff.Y_quant.len;
|
|
const w = buff.Y_quant[0].len;
|
|
for (0..4) |i| {
|
|
@memset(self.freqs[i], 0);
|
|
}
|
|
for (0..h) |x| {
|
|
for (0..w) |y| {
|
|
for (0..4) |Y_sub_i| {
|
|
try parse_block(&buff.Y_quant[x][y][Y_sub_i], &self.dc_diffs[0], &self.rles[0], self.freqs[0], self.freqs[1]);
|
|
}
|
|
try parse_block(&buff.U_quant[x][y], &self.dc_diffs[1], &self.rles[1], self.freqs[2], self.freqs[3]);
|
|
try parse_block(&buff.V_quant[x][y], &self.dc_diffs[2], &self.rles[2], self.freqs[2], self.freqs[3]);
|
|
}
|
|
}
|
|
try self.generate_huffmans(f);
|
|
}
|
|
|
|
fn generate_huffmans(self: *Self, f: std.fs.File) !void {
|
|
for (0..4) |i| {
|
|
const huff_meta = gen_huffman(self.freqs[i]);
|
|
try gen_codes(&self.huffs[i], huff_meta);
|
|
try dump_huffman(f, huff_meta, i);
|
|
}
|
|
}
|
|
|
|
// dumps scan, order of Y macroblock (blockx4), U block, V block,
|
|
// need per block since first must be interpreted as dc
|
|
// 0x00 dc does not signal next block 0x00 ac does
|
|
fn dump_scan(self: *Self, f: std.fs.File) !void {
|
|
// mostly hardcoded values
|
|
_ = try f.write(&[_]u8{ 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x11, 0x03, 0x11, 0x00, 0x3f, 0x00 });
|
|
var bw = RLEWriter.init(f);
|
|
var idxs = [3]usize{ 0, 0, 0 };
|
|
while (idxs[0] < self.rles[0].items.len and idxs[1] < self.rles[1].items.len and idxs[2] < self.rles[2].items.len) {
|
|
for (0..4) |_| {
|
|
try bw.write_until_eob(self.rles[0].items, &idxs[0], &self.huffs[0], &self.huffs[1]);
|
|
}
|
|
try bw.write_until_eob(self.rles[1].items, &idxs[1], &self.huffs[2], &self.huffs[3]);
|
|
try bw.write_until_eob(self.rles[2].items, &idxs[2], &self.huffs[2], &self.huffs[3]);
|
|
}
|
|
try bw.flush();
|
|
_ = try f.write(&[2]u8{ 0xff, 0xd9 });
|
|
}
|
|
};
|
|
|
|
// helper func needed for generating huffman code
|
|
// gets idxs of least 2 non-zero values in slice.
|
|
inline fn get_idx_min2(freqs: []u32) ?struct { usize, usize } {
|
|
var min1: u32 = undefined;
|
|
var min1_idx: usize = undefined;
|
|
var min2: u32 = undefined;
|
|
var min2_idx: usize = undefined;
|
|
var is_init_1 = false;
|
|
var is_init_2 = false;
|
|
for (0..freqs.len) |i| {
|
|
if (freqs[i] == 0) continue;
|
|
if (!is_init_1) {
|
|
min1 = freqs[i];
|
|
min1_idx = i;
|
|
is_init_1 = true;
|
|
} else if (!is_init_2) {
|
|
if (freqs[i] < min1) {
|
|
min2 = min1;
|
|
min2_idx = min1_idx;
|
|
min1 = freqs[i];
|
|
min1_idx = i;
|
|
} else {
|
|
min2 = freqs[i];
|
|
min2_idx = i;
|
|
}
|
|
is_init_2 = true;
|
|
} else if (freqs[i] < min1) {
|
|
min1 = freqs[i];
|
|
min1_idx = i;
|
|
} else if (freqs[i] < min2) {
|
|
min2 = freqs[i];
|
|
min2_idx = i;
|
|
}
|
|
}
|
|
return if (is_init_2) .{
|
|
min1_idx,
|
|
min2_idx,
|
|
} else null;
|
|
}
|
|
|
|
// generates code mapping from BITS and HUFFVAL
|
|
// see https://www.w3.org/Graphics/JPEG/itu-t81.pdf annex c
|
|
inline fn gen_codes(huff: *Huffman, huffman_meta: HuffmanMeta) !void {
|
|
var huffsize = [_]u5{0} ** 256;
|
|
var huffcode = [_]u16{0} ** 256;
|
|
|
|
var k: usize = 0;
|
|
var j: usize = 1;
|
|
for (1..17) |i| {
|
|
while (j <= huffman_meta.bits[i]) {
|
|
huffsize[k] = @truncate(i);
|
|
k += 1;
|
|
j += 1;
|
|
}
|
|
j = 1;
|
|
}
|
|
huffsize[k] = 0;
|
|
const total_k = k;
|
|
|
|
k = 0;
|
|
var code: usize = 0;
|
|
var si = huffsize[0];
|
|
while (true) {
|
|
huffcode[k] = @truncate(code);
|
|
code += 1;
|
|
k += 1;
|
|
if (huffsize[k] == si) {
|
|
continue;
|
|
}
|
|
if (huffsize[k] == 0) {
|
|
break;
|
|
}
|
|
code <<= 1;
|
|
si += 1;
|
|
while (huffsize[k] != si) {
|
|
code <<= 1;
|
|
si += 1;
|
|
}
|
|
}
|
|
for (0..total_k) |x| {
|
|
try huff.put(huffman_meta.huffval[x], HuffCode{
|
|
.n_bits = huffsize[x],
|
|
.value = huffcode[x],
|
|
});
|
|
}
|
|
}
|
|
|
|
// generate BITS and HUFFVAL
|
|
// see https://www.w3.org/Graphics/JPEG/itu-t81.pdf annex K
|
|
inline fn gen_huffman(freqs: []u32) HuffmanMeta {
|
|
freqs[freqs.len - 1] = 1;
|
|
var codesizes = [_]u8{0} ** 257;
|
|
var others = [_]u9{0x1ff} ** 257; // 0x1ff since -1 not available and i8 wouldn't fit anyway
|
|
while (get_idx_min2(freqs)) |tmp| {
|
|
var v1 = tmp.@"0";
|
|
var v2 = tmp.@"1";
|
|
freqs[v1] += freqs[v2];
|
|
freqs[v2] = 0;
|
|
codesizes[v1] += 1;
|
|
while (others[v1] != 0x1ff) : (codesizes[v1] += 1) {
|
|
v1 = others[v1];
|
|
}
|
|
others[v1] = @truncate(v2);
|
|
codesizes[v2] += 1;
|
|
while (others[v2] != 0x1ff) : (codesizes[v2] += 1) {
|
|
v2 = others[v2];
|
|
}
|
|
}
|
|
var bits = [_]u8{0} ** 33;
|
|
|
|
for (0..freqs.len) |i| {
|
|
if (codesizes[i] != 0) {
|
|
bits[codesizes[i]] += 1;
|
|
}
|
|
}
|
|
var i: usize = 32;
|
|
while (i > 16) {
|
|
if (bits[i] > 0) {
|
|
var j = i - 2;
|
|
while (bits[j] <= 0) : (j -= 1) {}
|
|
bits[i] -= 2;
|
|
bits[i - 1] += 1;
|
|
bits[j + 1] += 2;
|
|
bits[j] -= 1;
|
|
} else {
|
|
i -= 1;
|
|
}
|
|
}
|
|
while (bits[i] == 0) : (i -= 1) {}
|
|
bits[i] -= 1;
|
|
|
|
var k: usize = 0;
|
|
var huffval = [_]u8{0} ** 256;
|
|
for (1..33) |i_| {
|
|
for (0..freqs.len - 1) |j| {
|
|
if (codesizes[j] == i_) {
|
|
huffval[k] = @truncate(j);
|
|
k += 1;
|
|
}
|
|
}
|
|
}
|
|
return .{
|
|
.bits = bits,
|
|
.huffval = huffval,
|
|
.total_n = k,
|
|
};
|
|
}
|
|
|
|
// returns size field for given value tested switch and bitshifts, this was
|
|
// somehow the fastest, slightly better than bit shifting
|
|
inline fn get_size(n: i16) u8 {
|
|
if (n == 0) {
|
|
return 0;
|
|
} else if (n < 2 and n > -2) {
|
|
return 1;
|
|
} else if (n < 4 and n > -4) {
|
|
return 2;
|
|
} else if (n < 8 and n > -8) {
|
|
return 3;
|
|
} else if (n < 16 and n > -16) {
|
|
return 4;
|
|
} else if (n < 32 and n > -32) {
|
|
return 5;
|
|
} else if (n < 64 and n > -64) {
|
|
return 6;
|
|
} else if (n < 128 and n > -128) {
|
|
return 7;
|
|
} else if (n < 256 and n > -256) {
|
|
return 8;
|
|
} else if (n < 512 and n > -512) {
|
|
return 9;
|
|
} else if (n < 1024 and n > -1024) {
|
|
return 10;
|
|
} else if (n < 2048 and n > -2048) {
|
|
return 11;
|
|
} else {
|
|
return 12;
|
|
}
|
|
}
|
|
|
|
// for each block, process the dc by differencing it and perform RLE.
|
|
// dc symbols are just the size, ac symbols are the rle in top 4 bits, size in next
|
|
// append 0x00 at end of each block.
|
|
fn parse_block(block: *util.BlockQuantized, dc_diff: *i16, rle: *RLE_Seq, dc_freqs: []u32, ac_freqs: []u32) !void {
|
|
const diff = block[0] - dc_diff.*;
|
|
var symbol = get_size(diff);
|
|
dc_freqs[symbol] += 1;
|
|
try rle.append(RLE_Unit{
|
|
.symbol = symbol,
|
|
.value = diff,
|
|
});
|
|
dc_diff.* = block[0];
|
|
var curr_rlen: u8 = 0;
|
|
for (1..64) |i| {
|
|
if (block[i] == 0) {
|
|
curr_rlen += 1;
|
|
continue;
|
|
}
|
|
while (curr_rlen > 15) : (curr_rlen -= 16) {
|
|
ac_freqs[0xf0] += 1;
|
|
try rle.append(RLE_Unit{
|
|
.symbol = 0xf0,
|
|
.value = 0,
|
|
});
|
|
}
|
|
symbol = (curr_rlen << 4) + get_size(block[i]);
|
|
ac_freqs[symbol] += 1;
|
|
try rle.append(RLE_Unit{
|
|
.symbol = symbol,
|
|
.value = block[i],
|
|
});
|
|
curr_rlen = 0;
|
|
}
|
|
ac_freqs[0x00] += 1;
|
|
try rle.append(RLE_Unit{
|
|
.symbol = 0x00,
|
|
.value = 0,
|
|
});
|
|
}
|
|
|
|
fn write_headers(f: std.fs.File, buff: *const util.Buffers) !void {
|
|
const w = buff.Y.len * 16;
|
|
const h = buff.Y[0].len * 16;
|
|
|
|
// write out magic marker 0xff 0xd8 and both quant tables,
|
|
var out_buff = [6]u8{ 0xff, 0xd8, 0xff, 0xdb, 0x00, 0x84 } ++ [1]u8{0x00} ++ [_]u8{0x00} ** 64 ++ [1]u8{0x01} ++ [_]u8{0x00} ** 64;
|
|
for (0..64) |i| {
|
|
out_buff[7 + i] = @intFromFloat(buff.Q_Lum[i]);
|
|
out_buff[7 + i + 65] = @intFromFloat(buff.Q_Chrom[i]);
|
|
}
|
|
_ = try f.write(&out_buff);
|
|
// write out SOF block, everything except w and h are largely irrelevant
|
|
var sof_buff = [_]u8{
|
|
0xff, 0xc0, 0x00, 0x11, 0x08, @truncate(w >> 8), @truncate(w & 0x00ff), @truncate(h >> 8), @truncate(h & 0x00ff), 0x03,
|
|
0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01,
|
|
};
|
|
_ = try f.write(&sof_buff);
|
|
}
|
|
|
|
fn dump_huffman(f: std.fs.File, huff_meta: HuffmanMeta, tree_i: usize) !void {
|
|
// write out huffman table bits and huffval representation
|
|
const table_len = 3 + 16 + huff_meta.total_n;
|
|
var out_buff = [5]u8{ 0xff, 0xc4, @truncate(table_len >> 8), @truncate(table_len & 0x00ff), @truncate(((tree_i % 2) << 4) | (tree_i / 2)) };
|
|
_ = try f.write(&out_buff);
|
|
_ = try f.write(huff_meta.bits[1..17]);
|
|
_ = try f.write(huff_meta.huffval[0..huff_meta.total_n]);
|
|
}
|
|
|
|
pub fn generate_jpg(buff: util.Buffers, alloc: std.mem.Allocator) !void {
|
|
var scan_data = try Scan.init(alloc);
|
|
defer scan_data.deinit();
|
|
|
|
var f = try std.fs.cwd().createFile("out.jpg", .{});
|
|
defer f.close();
|
|
try write_headers(f, &buff);
|
|
|
|
// requires 2 passes, one for RLE and huffcode generation
|
|
// second to actually write out data
|
|
try scan_data.do_rle_freq_pass(&buff, f);
|
|
try scan_data.dump_scan(f);
|
|
}
|