Add SWU extraction

This commit is contained in:
Michael Mikovsky
2026-05-03 19:14:02 -06:00
parent ace2294748
commit 309d01bc30
2 changed files with 284 additions and 0 deletions
+153
View File
@@ -0,0 +1,153 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -ne 2 ]]; then
printf 'Usage: %s input.swu output-dir\n' "$0" >&2
exit 1
fi
input=$1
outdir=$2
python3 - "$input" "$outdir" <<'PY'
import json
import os
import pathlib
import stat
import sys
HEADER_LEN = 110
MANIFEST = ".swu-manifest.json"
def align4(value):
return (value + 3) & ~3
def fail(message):
print(message, file=sys.stderr)
sys.exit(1)
def safe_path(root, name):
if name.startswith("/"):
fail(f"absolute archive path is not supported: {name}")
path = (root / name).resolve()
root_resolved = root.resolve()
if path != root_resolved and root_resolved not in path.parents:
fail(f"archive path escapes output directory: {name}")
return path
if len(sys.argv) != 3:
fail("Usage: extract-swu.sh input.swu output-dir")
input_path = pathlib.Path(sys.argv[1])
out_root = pathlib.Path(sys.argv[2])
if not input_path.is_file():
fail(f"input does not exist: {input_path}")
if out_root.exists() and any(out_root.iterdir()):
fail(f"output directory is not empty: {out_root}")
out_root.mkdir(parents=True, exist_ok=True)
data = input_path.read_bytes()
offset = 0
entries = []
trailer = None
while True:
if offset + HEADER_LEN > len(data):
fail("truncated cpio header")
header_offset = offset
header = data[offset:offset + HEADER_LEN]
offset += HEADER_LEN
fields = [header[i:i + 8].decode("ascii") for i in range(6, HEADER_LEN, 8)]
magic = header[:6].decode("ascii")
if magic != "070702":
fail(f"unsupported cpio magic at offset {header_offset}: {magic!r}")
values = [int(field, 16) for field in fields]
(
ino,
mode,
uid,
gid,
nlink,
mtime,
filesize,
devmajor,
devminor,
rdevmajor,
rdevminor,
namesize,
check,
) = values
if offset + namesize > len(data):
fail("truncated cpio filename")
raw_name = data[offset:offset + namesize]
offset += namesize
if not raw_name.endswith(b"\0"):
fail("cpio filename is missing NUL terminator")
name = raw_name[:-1].decode("utf-8")
offset = align4(offset)
if offset + filesize > len(data):
fail(f"truncated cpio payload for {name}")
payload = data[offset:offset + filesize]
offset += filesize
offset = align4(offset)
item = {
"name": name,
"ino": ino,
"mode": mode,
"uid": uid,
"gid": gid,
"nlink": nlink,
"mtime": mtime,
"devmajor": devmajor,
"devminor": devminor,
"rdevmajor": rdevmajor,
"rdevminor": rdevminor,
}
if name == "TRAILER!!!":
trailer = item
break
entries.append(item)
path = safe_path(out_root, name)
file_type = stat.S_IFMT(mode)
if file_type == stat.S_IFDIR:
path.mkdir(parents=True, exist_ok=True)
elif file_type == stat.S_IFREG:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(payload)
else:
fail(f"unsupported cpio entry type for {name}: mode {mode:o}")
os.chmod(path, stat.S_IMODE(mode))
os.utime(path, (mtime, mtime), follow_symlinks=False)
if trailer is None:
fail("archive has no TRAILER!!! entry")
final_padding = len(data) - offset
if data[offset:] != b"\0" * final_padding:
fail("archive has non-zero bytes after TRAILER!!!")
manifest = {
"format": "svr4-crc-cpio",
"block_size": 512,
"final_padding": final_padding,
"entries": entries,
"trailer": trailer,
}
(out_root / MANIFEST).write_text(json.dumps(manifest, indent=2) + "\n")
print(f"extracted {len(entries)} entries to {out_root}")
PY