Align frame header and extend benchmark driver

Align the framed header section so decode can avoid hidden alignment repair on the hot path, and teach protocol_bench.rs to build and run the standalone tracing binaries directly. The updated benchmark shows lower encode, decode, forward, local call, and hook data costs than the previous baseline.
This commit is contained in:
Michael Mikovsky
2026-04-25 13:28:20 -06:00
parent de194ac319
commit 412960203c
2 changed files with 122 additions and 2 deletions
+119
View File
@@ -1,4 +1,6 @@
use std::hint::black_box; use std::hint::black_box;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::Instant; use std::time::Instant;
use unshell::protocol::tree::{ use unshell::protocol::tree::{
@@ -8,8 +10,14 @@ use unshell::protocol::{CallMessage, PacketHeader, PacketType, decode_frame, enc
const SAMPLES: usize = 500; const SAMPLES: usize = 500;
const ITERS: usize = 1_000; const ITERS: usize = 1_000;
const TOOL_ITERS: usize = 10_000;
fn main() { fn main() {
if std::env::args().nth(1).as_deref() == Some("tools") {
run_external_tools();
return;
}
println!("protocol benchmark"); println!("protocol benchmark");
println!("samples: {SAMPLES}"); println!("samples: {SAMPLES}");
println!("iterations/sample: {ITERS}"); println!("iterations/sample: {ITERS}");
@@ -33,6 +41,10 @@ fn main() {
bench.name, bench.mean_ns, bench.stddev_ns, bench.samples bench.name, bench.mean_ns, bench.stddev_ns, bench.samples
); );
} }
println!();
println!("Run `cargo run --example protocol_bench -- tools` to build and execute");
println!("the standalone operation binaries under strace, perf, and heaptrack.");
} }
struct BenchResult { struct BenchResult {
@@ -284,3 +296,110 @@ fn summarize(name: &'static str, samples: &[f64]) -> BenchResult {
fn path(parts: &[&str]) -> Vec<String> { fn path(parts: &[&str]) -> Vec<String> {
parts.iter().map(|part| String::from(*part)).collect() parts.iter().map(|part| String::from(*part)).collect()
} }
fn run_external_tools() {
let root = Path::new(env!("CARGO_MANIFEST_DIR"));
build_examples(root);
let ops = [
("encode_call", "protocol_op_encode_call"),
("decode_call", "protocol_op_decode_call"),
("forward_call_receive", "protocol_op_forward_call_receive"),
("local_call_receive", "protocol_op_local_call_receive"),
("hook_data_receive", "protocol_op_hook_data_receive"),
];
let heap_dir = root.join("heaptrack-cli");
std::fs::create_dir_all(&heap_dir).expect("heaptrack-cli directory should be creatable");
for (name, binary) in ops {
let binary_path = root.join("target/debug/examples").join(binary);
println!();
println!("=== {name} ===");
run_binary(&binary_path, TOOL_ITERS, "direct run");
run_strace(&binary_path, TOOL_ITERS);
run_perf(&binary_path, TOOL_ITERS);
run_heaptrack(root, &heap_dir, name, &binary_path, TOOL_ITERS);
}
}
fn build_examples(root: &Path) {
run_command(
"cargo build --examples",
Command::new("cargo")
.arg("build")
.arg("--examples")
.current_dir(root),
);
}
fn run_binary(binary: &Path, iterations: usize, label: &str) {
run_command(
label,
Command::new(binary).arg(iterations.to_string()),
);
}
fn run_strace(binary: &Path, iterations: usize) {
run_command(
"strace -c memory syscalls",
Command::new("strace")
.arg("-qq")
.arg("-c")
.arg("-e")
.arg("trace=brk,mmap,mremap,munmap,mprotect,madvise")
.arg(binary)
.arg(iterations.to_string()),
);
}
fn run_perf(binary: &Path, iterations: usize) {
run_command(
"perf stat",
Command::new("perf")
.arg("stat")
.arg("-e")
.arg("task-clock,cycles,instructions,branches,branch-misses,cache-references,cache-misses")
.arg(binary)
.arg(iterations.to_string()),
);
}
fn run_heaptrack(root: &Path, heap_dir: &Path, name: &str, binary: &Path, iterations: usize) {
let prefix = heap_dir.join(format!("{name}.zst"));
run_command(
"heaptrack --record-only",
Command::new("heaptrack")
.arg("--record-only")
.arg("-o")
.arg(&prefix)
.arg(binary)
.arg(iterations.to_string())
.current_dir(root),
);
let recorded = PathBuf::from(format!("{}.zst", prefix.display()));
run_command(
"heaptrack_print summary",
Command::new("heaptrack_print")
.arg("-f")
.arg(recorded)
.arg("-n")
.arg("4")
.arg("-s")
.arg("2")
.current_dir(root),
);
}
fn run_command(label: &str, command: &mut Command) {
println!("--- {label} ---");
let output = command.output().unwrap_or_else(|error| panic!("{label} failed to launch: {error}"));
if !output.stdout.is_empty() {
print!("{}", String::from_utf8_lossy(&output.stdout));
}
if !output.stderr.is_empty() {
print!("{}", String::from_utf8_lossy(&output.stderr));
}
assert!(output.status.success(), "{label} failed with status {}", output.status);
}
+3 -2
View File
@@ -89,13 +89,14 @@ where
let header_len = u32::try_from(header_bytes.len()).map_err(|_| FrameError::LengthOverflow)?; let header_len = u32::try_from(header_bytes.len()).map_err(|_| FrameError::LengthOverflow)?;
let payload_len = u32::try_from(payload_bytes.len()).map_err(|_| FrameError::LengthOverflow)?; let payload_len = u32::try_from(payload_bytes.len()).map_err(|_| FrameError::LengthOverflow)?;
let header_start = 8usize; let header_start = align_up(8usize, SECTION_ALIGN);
let payload_start = align_up(header_start + header_bytes.len(), SECTION_ALIGN); let payload_start = align_up(header_start + header_bytes.len(), SECTION_ALIGN);
let total_len = payload_start + payload_bytes.len(); let total_len = payload_start + payload_bytes.len();
let mut frame = FrameBytes::with_capacity(total_len); let mut frame = FrameBytes::with_capacity(total_len);
frame.extend_from_slice(&header_len.to_be_bytes()); frame.extend_from_slice(&header_len.to_be_bytes());
frame.extend_from_slice(&payload_len.to_be_bytes()); frame.extend_from_slice(&payload_len.to_be_bytes());
append_padding(&mut frame, header_start - 8usize);
frame.extend_from_slice(&header_bytes); frame.extend_from_slice(&header_bytes);
append_padding( append_padding(
&mut frame, &mut frame,
@@ -113,7 +114,7 @@ pub fn decode_frame(bytes: &[u8]) -> Result<ParsedFrame<'_>, FrameError> {
let header_len = read_u32(bytes, 0)? as usize; let header_len = read_u32(bytes, 0)? as usize;
let payload_len = read_u32(bytes, 4)? as usize; let payload_len = read_u32(bytes, 4)? as usize;
let header_start = 8usize; let header_start = align_up(8usize, SECTION_ALIGN);
let header_end = header_start + header_len; let header_end = header_start + header_len;
if header_end > bytes.len() { if header_end > bytes.len() {
return Err(FrameError::Truncated); return Err(FrameError::Truncated);