From 309d01bc30a3178c4384477dfe3018ede82e46f2 Mon Sep 17 00:00:00 2001 From: Michael Mikovsky <77305074+Astatin3@users.noreply.github.com> Date: Sun, 3 May 2026 19:14:02 -0600 Subject: [PATCH] Add SWU extraction --- sig/scripts/build-swu.sh | 131 +++++++++++++++++++++++++++++++ sig/scripts/extract-swu.sh | 153 +++++++++++++++++++++++++++++++++++++ 2 files changed, 284 insertions(+) create mode 100755 sig/scripts/build-swu.sh create mode 100755 sig/scripts/extract-swu.sh diff --git a/sig/scripts/build-swu.sh b/sig/scripts/build-swu.sh new file mode 100755 index 0000000..01c1220 --- /dev/null +++ b/sig/scripts/build-swu.sh @@ -0,0 +1,131 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -ne 2 ]]; then + printf 'Usage: %s input-dir output.swu\n' "$0" >&2 + exit 1 +fi + +indir=$1 +output=$2 + +python3 - "$indir" "$output" <<'PY' +import json +import hashlib +import pathlib +import stat +import sys + + +HEADER_LEN = 110 +MANIFEST = ".swu-manifest.json" + + +def align4_len(value): + return (-value) % 4 + + +def fail(message): + print(message, file=sys.stderr) + sys.exit(1) + + +def safe_path(root, name): + if name.startswith("/"): + fail(f"absolute archive path is not supported: {name}") + path = (root / name).resolve() + root_resolved = root.resolve() + if path != root_resolved and root_resolved not in path.parents: + fail(f"archive path escapes input directory: {name}") + return path + + +def checksum(payload): + return sum(payload) & 0xFFFFFFFF + + +def header_for(item, payload): + name_bytes = item["name"].encode("utf-8") + b"\0" + values = [ + item["ino"], + item["mode"], + item["uid"], + item["gid"], + item["nlink"], + item["mtime"], + len(payload), + item["devmajor"], + item["devminor"], + item["rdevmajor"], + item["rdevminor"], + len(name_bytes), + checksum(payload), + ] + return b"070702" + b"".join(f"{value & 0xFFFFFFFF:08X}".encode("ascii") for value in values) + + +def append_entry(out, item, payload): + name_bytes = item["name"].encode("utf-8") + b"\0" + out.extend(header_for(item, payload)) + if len(out) % 4 != 2: # 110-byte newc/crc header always ends at +2 mod 4. + fail("internal cpio alignment error before filename") + out.extend(name_bytes) + out.extend(b"\0" * align4_len(len(out))) + out.extend(payload) + out.extend(b"\0" * align4_len(len(out))) + + +if len(sys.argv) != 3: + fail("Usage: build-swu.sh input-dir output.swu") + +root = pathlib.Path(sys.argv[1]) +output = pathlib.Path(sys.argv[2]) +manifest_path = root / MANIFEST + +if not root.is_dir(): + fail(f"input directory does not exist: {root}") +if not manifest_path.is_file(): + fail(f"missing manifest: {manifest_path}") + +manifest = json.loads(manifest_path.read_text()) +if manifest.get("format") != "svr4-crc-cpio": + fail("manifest was not created from a CRC SWU archive") + +entry_names = [item["name"] for item in manifest["entries"]] +if "cpio_item_md5" in entry_names: + lines = [] + for item in manifest["entries"]: + name = item["name"] + if name == "cpio_item_md5": + break + path = safe_path(root, name) + if stat.S_IFMT(item["mode"]) == stat.S_IFREG: + lines.append(f"{hashlib.md5(path.read_bytes()).hexdigest()} {name}\n") + safe_path(root, "cpio_item_md5").write_text("".join(lines)) + +out = bytearray() +for item in manifest["entries"]: + path = safe_path(root, item["name"]) + file_type = stat.S_IFMT(item["mode"]) + if file_type == stat.S_IFDIR: + if not path.is_dir(): + fail(f"missing directory entry: {item['name']}") + payload = b"" + elif file_type == stat.S_IFREG: + if not path.is_file(): + fail(f"missing file entry: {item['name']}") + payload = path.read_bytes() + else: + fail(f"unsupported cpio entry type for {item['name']}: mode {item['mode']:o}") + append_entry(out, item, payload) + +trailer = manifest["trailer"] +if trailer.get("name") != "TRAILER!!!": + fail("manifest trailer entry is invalid") +append_entry(out, trailer, b"") +out.extend(b"\0" * int(manifest.get("final_padding", 0))) + +output.parent.mkdir(parents=True, exist_ok=True) +output.write_bytes(out) +print(f"wrote {output}") +PY diff --git a/sig/scripts/extract-swu.sh b/sig/scripts/extract-swu.sh new file mode 100755 index 0000000..e9d6197 --- /dev/null +++ b/sig/scripts/extract-swu.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -ne 2 ]]; then + printf 'Usage: %s input.swu output-dir\n' "$0" >&2 + exit 1 +fi + +input=$1 +outdir=$2 + +python3 - "$input" "$outdir" <<'PY' +import json +import os +import pathlib +import stat +import sys + + +HEADER_LEN = 110 +MANIFEST = ".swu-manifest.json" + + +def align4(value): + return (value + 3) & ~3 + + +def fail(message): + print(message, file=sys.stderr) + sys.exit(1) + + +def safe_path(root, name): + if name.startswith("/"): + fail(f"absolute archive path is not supported: {name}") + path = (root / name).resolve() + root_resolved = root.resolve() + if path != root_resolved and root_resolved not in path.parents: + fail(f"archive path escapes output directory: {name}") + return path + + +if len(sys.argv) != 3: + fail("Usage: extract-swu.sh input.swu output-dir") + +input_path = pathlib.Path(sys.argv[1]) +out_root = pathlib.Path(sys.argv[2]) + +if not input_path.is_file(): + fail(f"input does not exist: {input_path}") + +if out_root.exists() and any(out_root.iterdir()): + fail(f"output directory is not empty: {out_root}") +out_root.mkdir(parents=True, exist_ok=True) + +data = input_path.read_bytes() +offset = 0 +entries = [] +trailer = None + +while True: + if offset + HEADER_LEN > len(data): + fail("truncated cpio header") + + header_offset = offset + header = data[offset:offset + HEADER_LEN] + offset += HEADER_LEN + fields = [header[i:i + 8].decode("ascii") for i in range(6, HEADER_LEN, 8)] + magic = header[:6].decode("ascii") + if magic != "070702": + fail(f"unsupported cpio magic at offset {header_offset}: {magic!r}") + + values = [int(field, 16) for field in fields] + ( + ino, + mode, + uid, + gid, + nlink, + mtime, + filesize, + devmajor, + devminor, + rdevmajor, + rdevminor, + namesize, + check, + ) = values + + if offset + namesize > len(data): + fail("truncated cpio filename") + raw_name = data[offset:offset + namesize] + offset += namesize + if not raw_name.endswith(b"\0"): + fail("cpio filename is missing NUL terminator") + name = raw_name[:-1].decode("utf-8") + offset = align4(offset) + + if offset + filesize > len(data): + fail(f"truncated cpio payload for {name}") + payload = data[offset:offset + filesize] + offset += filesize + offset = align4(offset) + + item = { + "name": name, + "ino": ino, + "mode": mode, + "uid": uid, + "gid": gid, + "nlink": nlink, + "mtime": mtime, + "devmajor": devmajor, + "devminor": devminor, + "rdevmajor": rdevmajor, + "rdevminor": rdevminor, + } + + if name == "TRAILER!!!": + trailer = item + break + + entries.append(item) + path = safe_path(out_root, name) + file_type = stat.S_IFMT(mode) + if file_type == stat.S_IFDIR: + path.mkdir(parents=True, exist_ok=True) + elif file_type == stat.S_IFREG: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(payload) + else: + fail(f"unsupported cpio entry type for {name}: mode {mode:o}") + + os.chmod(path, stat.S_IMODE(mode)) + os.utime(path, (mtime, mtime), follow_symlinks=False) + +if trailer is None: + fail("archive has no TRAILER!!! entry") + +final_padding = len(data) - offset +if data[offset:] != b"\0" * final_padding: + fail("archive has non-zero bytes after TRAILER!!!") + +manifest = { + "format": "svr4-crc-cpio", + "block_size": 512, + "final_padding": final_padding, + "entries": entries, + "trailer": trailer, +} +(out_root / MANIFEST).write_text(json.dumps(manifest, indent=2) + "\n") +print(f"extracted {len(entries)} entries to {out_root}") +PY