Add SWU extraction

This commit is contained in:
Michael Mikovsky
2026-05-03 19:14:02 -06:00
parent ace2294748
commit 309d01bc30
2 changed files with 284 additions and 0 deletions
+131
View File
@@ -0,0 +1,131 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -ne 2 ]]; then
printf 'Usage: %s input-dir output.swu\n' "$0" >&2
exit 1
fi
indir=$1
output=$2
python3 - "$indir" "$output" <<'PY'
import json
import hashlib
import pathlib
import stat
import sys
HEADER_LEN = 110
MANIFEST = ".swu-manifest.json"
def align4_len(value):
return (-value) % 4
def fail(message):
print(message, file=sys.stderr)
sys.exit(1)
def safe_path(root, name):
if name.startswith("/"):
fail(f"absolute archive path is not supported: {name}")
path = (root / name).resolve()
root_resolved = root.resolve()
if path != root_resolved and root_resolved not in path.parents:
fail(f"archive path escapes input directory: {name}")
return path
def checksum(payload):
return sum(payload) & 0xFFFFFFFF
def header_for(item, payload):
name_bytes = item["name"].encode("utf-8") + b"\0"
values = [
item["ino"],
item["mode"],
item["uid"],
item["gid"],
item["nlink"],
item["mtime"],
len(payload),
item["devmajor"],
item["devminor"],
item["rdevmajor"],
item["rdevminor"],
len(name_bytes),
checksum(payload),
]
return b"070702" + b"".join(f"{value & 0xFFFFFFFF:08X}".encode("ascii") for value in values)
def append_entry(out, item, payload):
name_bytes = item["name"].encode("utf-8") + b"\0"
out.extend(header_for(item, payload))
if len(out) % 4 != 2: # 110-byte newc/crc header always ends at +2 mod 4.
fail("internal cpio alignment error before filename")
out.extend(name_bytes)
out.extend(b"\0" * align4_len(len(out)))
out.extend(payload)
out.extend(b"\0" * align4_len(len(out)))
if len(sys.argv) != 3:
fail("Usage: build-swu.sh input-dir output.swu")
root = pathlib.Path(sys.argv[1])
output = pathlib.Path(sys.argv[2])
manifest_path = root / MANIFEST
if not root.is_dir():
fail(f"input directory does not exist: {root}")
if not manifest_path.is_file():
fail(f"missing manifest: {manifest_path}")
manifest = json.loads(manifest_path.read_text())
if manifest.get("format") != "svr4-crc-cpio":
fail("manifest was not created from a CRC SWU archive")
entry_names = [item["name"] for item in manifest["entries"]]
if "cpio_item_md5" in entry_names:
lines = []
for item in manifest["entries"]:
name = item["name"]
if name == "cpio_item_md5":
break
path = safe_path(root, name)
if stat.S_IFMT(item["mode"]) == stat.S_IFREG:
lines.append(f"{hashlib.md5(path.read_bytes()).hexdigest()} {name}\n")
safe_path(root, "cpio_item_md5").write_text("".join(lines))
out = bytearray()
for item in manifest["entries"]:
path = safe_path(root, item["name"])
file_type = stat.S_IFMT(item["mode"])
if file_type == stat.S_IFDIR:
if not path.is_dir():
fail(f"missing directory entry: {item['name']}")
payload = b""
elif file_type == stat.S_IFREG:
if not path.is_file():
fail(f"missing file entry: {item['name']}")
payload = path.read_bytes()
else:
fail(f"unsupported cpio entry type for {item['name']}: mode {item['mode']:o}")
append_entry(out, item, payload)
trailer = manifest["trailer"]
if trailer.get("name") != "TRAILER!!!":
fail("manifest trailer entry is invalid")
append_entry(out, trailer, b"")
out.extend(b"\0" * int(manifest.get("final_padding", 0)))
output.parent.mkdir(parents=True, exist_ok=True)
output.write_bytes(out)
print(f"wrote {output}")
PY
+153
View File
@@ -0,0 +1,153 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -ne 2 ]]; then
printf 'Usage: %s input.swu output-dir\n' "$0" >&2
exit 1
fi
input=$1
outdir=$2
python3 - "$input" "$outdir" <<'PY'
import json
import os
import pathlib
import stat
import sys
HEADER_LEN = 110
MANIFEST = ".swu-manifest.json"
def align4(value):
return (value + 3) & ~3
def fail(message):
print(message, file=sys.stderr)
sys.exit(1)
def safe_path(root, name):
if name.startswith("/"):
fail(f"absolute archive path is not supported: {name}")
path = (root / name).resolve()
root_resolved = root.resolve()
if path != root_resolved and root_resolved not in path.parents:
fail(f"archive path escapes output directory: {name}")
return path
if len(sys.argv) != 3:
fail("Usage: extract-swu.sh input.swu output-dir")
input_path = pathlib.Path(sys.argv[1])
out_root = pathlib.Path(sys.argv[2])
if not input_path.is_file():
fail(f"input does not exist: {input_path}")
if out_root.exists() and any(out_root.iterdir()):
fail(f"output directory is not empty: {out_root}")
out_root.mkdir(parents=True, exist_ok=True)
data = input_path.read_bytes()
offset = 0
entries = []
trailer = None
while True:
if offset + HEADER_LEN > len(data):
fail("truncated cpio header")
header_offset = offset
header = data[offset:offset + HEADER_LEN]
offset += HEADER_LEN
fields = [header[i:i + 8].decode("ascii") for i in range(6, HEADER_LEN, 8)]
magic = header[:6].decode("ascii")
if magic != "070702":
fail(f"unsupported cpio magic at offset {header_offset}: {magic!r}")
values = [int(field, 16) for field in fields]
(
ino,
mode,
uid,
gid,
nlink,
mtime,
filesize,
devmajor,
devminor,
rdevmajor,
rdevminor,
namesize,
check,
) = values
if offset + namesize > len(data):
fail("truncated cpio filename")
raw_name = data[offset:offset + namesize]
offset += namesize
if not raw_name.endswith(b"\0"):
fail("cpio filename is missing NUL terminator")
name = raw_name[:-1].decode("utf-8")
offset = align4(offset)
if offset + filesize > len(data):
fail(f"truncated cpio payload for {name}")
payload = data[offset:offset + filesize]
offset += filesize
offset = align4(offset)
item = {
"name": name,
"ino": ino,
"mode": mode,
"uid": uid,
"gid": gid,
"nlink": nlink,
"mtime": mtime,
"devmajor": devmajor,
"devminor": devminor,
"rdevmajor": rdevmajor,
"rdevminor": rdevminor,
}
if name == "TRAILER!!!":
trailer = item
break
entries.append(item)
path = safe_path(out_root, name)
file_type = stat.S_IFMT(mode)
if file_type == stat.S_IFDIR:
path.mkdir(parents=True, exist_ok=True)
elif file_type == stat.S_IFREG:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(payload)
else:
fail(f"unsupported cpio entry type for {name}: mode {mode:o}")
os.chmod(path, stat.S_IMODE(mode))
os.utime(path, (mtime, mtime), follow_symlinks=False)
if trailer is None:
fail("archive has no TRAILER!!! entry")
final_padding = len(data) - offset
if data[offset:] != b"\0" * final_padding:
fail("archive has non-zero bytes after TRAILER!!!")
manifest = {
"format": "svr4-crc-cpio",
"block_size": 512,
"final_padding": final_padding,
"entries": entries,
"trailer": trailer,
}
(out_root / MANIFEST).write_text(json.dumps(manifest, indent=2) + "\n")
print(f"extracted {len(entries)} entries to {out_root}")
PY